2122 files changed, 172571 insertions, 8809 deletions
diff --git a/test/.clang-format b/test/.clang-format
new file mode 100644
index 0000000..4799b66
--- /dev/null
+++ b/test/.clang-format
@@ -0,0 +1,2 @@
+BasedOnStyle: LLVM
+ColumnLimit: 0
diff --git a/test/Analysis/BasicAA/full-store-partial-alias.ll b/test/Analysis/BasicAA/full-store-partial-alias.ll
index 2c34fd5..4de2daf 100644
--- a/test/Analysis/BasicAA/full-store-partial-alias.ll
+++ b/test/Analysis/BasicAA/full-store-partial-alias.ll
@@ -29,7 +29,9 @@ entry:
   ret i32 %tmp5.lobit
 }
 
-!0 = metadata !{metadata !"double", metadata !1}
+!0 = metadata !{metadata !4, metadata !4, i64 0}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"int", metadata !1}
+!3 = metadata !{metadata !5, metadata !5, i64 0}
+!4 = metadata !{metadata !"double", metadata !1}
+!5 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Analysis/BasicAA/gep-alias.ll b/test/Analysis/BasicAA/gep-alias.ll
index 9c2c7ee..2c0d467 100644
--- a/test/Analysis/BasicAA/gep-alias.ll
+++ b/test/Analysis/BasicAA/gep-alias.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -basicaa -gvn -instcombine -S 2>&1 | FileCheck %s
 
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
 ; Make sure that basicaa thinks R and r are must aliases.
 define i32 @test1(i8 * %P) {
@@ -15,7 +15,7 @@ entry:
 
 	%t = sub i32 %S, %s
 	ret i32 %t
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: ret i32 0
 }
 
@@ -32,7 +32,7 @@ entry:
 
 	%t = sub i32 %S, %s
 	ret i32 %t
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: ret i32 0
 }
 
@@ -51,7 +51,7 @@ entry:
 
 	%t = sub i32 %S, %s
 	ret i32 %t
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: ret i32 0
 }
 
@@ -68,7 +68,7 @@ entry:
   store i8* null, i8** %tmp3, align 8
   %tmp4 = load i32* %tmp2, align 8
 	ret i32 %tmp4
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: ret i32 64
 }
 
@@ -82,7 +82,34 @@ define i32 @test5(i32* %p, i64 %i) {
   %y = load i32* %pi
   %z = sub i32 %x, %y
   ret i32 %z
-; CHECK: @test5
+; CHECK-LABEL: @test5(
+; CHECK: ret i32 0
+}
+
+define i32 @test5_as1_smaller_size(i32 addrspace(1)* %p, i8 %i) {
+  %pi = getelementptr i32 addrspace(1)* %p, i8 %i
+  %i.next = add i8 %i, 1
+  %pi.next = getelementptr i32 addrspace(1)* %p, i8 %i.next
+  %x = load i32 addrspace(1)* %pi
+  store i32 42, i32 addrspace(1)* %pi.next
+  %y = load i32 addrspace(1)* %pi
+  %z = sub i32 %x, %y
+  ret i32 %z
+; CHECK-LABEL: @test5_as1_smaller_size(
+; CHECK: sext
+; CHECK: ret i32 0
+}
+
+define i32 @test5_as1_same_size(i32 addrspace(1)* %p, i16 %i) {
+  %pi = getelementptr i32 addrspace(1)* %p, i16 %i
+  %i.next = add i16 %i, 1
+  %pi.next = getelementptr i32 addrspace(1)* %p, i16 %i.next
+  %x = load i32 addrspace(1)* %pi
+  store i32 42, i32 addrspace(1)* %pi.next
+  %y = load i32 addrspace(1)* %pi
+  %z = sub i32 %x, %y
+  ret i32 %z
+; CHECK-LABEL: @test5_as1_same_size(
 ; CHECK: ret i32 0
 }
 
@@ -97,7 +124,7 @@ define i32 @test6(i32* %p, i64 %i1) {
   %y = load i32* %pi
   %z = sub i32 %x, %y
   ret i32 %z
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: ret i32 0
 }
 
@@ -111,7 +138,7 @@ define i32 @test7(i32* %p, i64 %i) {
   %y = load i32* %pi
   %z = sub i32 %x, %y
   ret i32 %z
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK: ret i32 0
 }
 
@@ -128,7 +155,7 @@ define i32 @test8(i32* %p, i16 %i) {
   %y = load i32* %pi
   %z = sub i32 %x, %y
   ret i32 %z
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK: ret i32 0
 }
 
@@ -139,7 +166,7 @@ define i8 @test9([4 x i8] *%P, i32 %i, i32 %j) {
   %P2 = getelementptr [4 x i8] *%P, i32 0, i32 %i3
 
   %j2 = shl i32 %j, 2
-  
+
   ; P4 = P + 4*j
   %P4 = getelementptr [4 x i8]* %P, i32 0, i32 %j2
 
@@ -148,7 +175,7 @@ define i8 @test9([4 x i8] *%P, i32 %i, i32 %j) {
   %y = load i8* %P2
   %z = sub i8 %x, %y
   ret i8 %z
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK: ret i8 0
 }
 
@@ -157,7 +184,7 @@ define i8 @test10([4 x i8] *%P, i32 %i) {
   %i3 = add i32 %i2, 4
   ; P2 = P + 4 + 4*i
   %P2 = getelementptr [4 x i8] *%P, i32 0, i32 %i3
-  
+
   ; P4 = P + 4*i
   %P4 = getelementptr [4 x i8]* %P, i32 0, i32 %i2
 
@@ -166,7 +193,7 @@ define i8 @test10([4 x i8] *%P, i32 %i) {
   %y = load i8* %P2
   %z = sub i8 %x, %y
   ret i8 %z
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 ; CHECK: ret i8 0
 }
 
@@ -182,8 +209,8 @@ define float @test11(i32 %indvar, [4 x [2 x float]]* %q) nounwind ssp {
   store i64 0, i64* %scevgep35, align 4
   %tmp30 = load float* %y29, align 4
   ret float %tmp30
-  ; CHECK: @test11
-  ; CHECK: ret float %tmp30
+; CHECK-LABEL: @test11(
+; CHECK: ret float %tmp30
 }
 
 ; (This was a miscompilation.)
@@ -198,6 +225,6 @@ define i32 @test12(i32 %x, i32 %y, i8* %p) nounwind {
   store i32 0, i32* %castd
   %r = load i32* %castp
   ret i32 %r
-  ; CHECK: @test12
-  ; CHECK: ret i32 %r
+; CHECK-LABEL: @test12(
+; CHECK: ret i32 %r
 }
diff --git a/test/Analysis/BasicAA/global-size.ll b/test/Analysis/BasicAA/global-size.ll
index a7e5aab..f081cb1 100644
--- a/test/Analysis/BasicAA/global-size.ll
+++ b/test/Analysis/BasicAA/global-size.ll
@@ -2,11 +2,11 @@
 ; the global.
 
 ; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+target datalayout = "E-p:64:64:64-p1:16:16:16-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 @B = global i16 8
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 define i16 @test1(i32* %P) {
         %X = load i16* @B
         store i32 7, i32* %P
@@ -16,11 +16,23 @@ define i16 @test1(i32* %P) {
 ; CHECK: ret i16 0
 }
 
+@B_as1 = addrspace(1) global i16 8
+
+define i16 @test1_as1(i32 addrspace(1)* %P) {
+; CHECK-LABEL: @test1_as1(
+; CHECK: ret i16 0
+  %X = load i16 addrspace(1)* @B_as1
+  store i32 7, i32 addrspace(1)* %P
+  %Y = load i16 addrspace(1)* @B_as1
+  %Z = sub i16 %Y, %X
+  ret i16 %Z
+}
+
 ; Cannot know anything about the size of this global.
 ; rdar://8813415
 @window = external global [0 x i8]
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 define i8 @test2(i32 %tmp79, i32 %w.2, i32 %indvar89) nounwind {
   %tmp92 = add i32 %tmp79, %indvar89
   %arrayidx412 = getelementptr [0 x i8]* @window, i32 0, i32 %tmp92
diff --git a/test/Analysis/BasicAA/noalias-geps.ll b/test/Analysis/BasicAA/noalias-geps.ll
index a93d778..f9ec713 100644
--- a/test/Analysis/BasicAA/noalias-geps.ll
+++ b/test/Analysis/BasicAA/noalias-geps.ll
@@ -4,6 +4,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 
 ; Check that geps with equal base offsets of noalias base pointers stay noalias.
 define i32 @test(i32* %p, i16 %i) {
+; CHECK-LABEL: Function: test:
   %pi = getelementptr i32* %p, i32 0
   %pi.next = getelementptr i32* %p, i32 1
   %b = icmp eq i16 %i, 0
@@ -30,6 +31,7 @@ ret i32 0
 
 ; Check that geps with equal indices of noalias base pointers stay noalias.
 define i32 @test2([2 x i32]* %p, i32 %i) {
+; CHECK-LABEL: Function: test2:
   %pi = getelementptr [2 x i32]* %p, i32 0
   %pi.next = getelementptr [2 x i32]* %p, i32 1
   %b = icmp eq i32 %i, 0
diff --git a/test/Analysis/BlockFrequencyInfo/lit.local.cfg b/test/Analysis/BlockFrequencyInfo/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Analysis/BlockFrequencyInfo/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/BranchProbabilityInfo/basic.ll b/test/Analysis/BranchProbabilityInfo/basic.ll
index c6e1bde..05cb31d 100644
--- a/test/Analysis/BranchProbabilityInfo/basic.ll
+++ b/test/Analysis/BranchProbabilityInfo/basic.ll
@@ -173,3 +173,42 @@ exit:
   %ret = phi i32 [ %val4, %then ], [ %val3, %else ]
   ret i32 %ret
 }
+
+define i32 @zero1(i32 %i, i32 %a, i32 %b) {
+; CHECK: Printing analysis {{.*}} for function 'zero1'
+entry:
+  %cond = icmp eq i32 %i, 0
+  br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 12 / 32
+; CHECK: edge entry -> else probability is 20 / 32
+
+then:
+  br label %exit
+
+else:
+  br label %exit
+
+exit:
+  %result = phi i32 [ %a, %then ], [ %b, %else ]
+  ret i32 %result
+}
+
+define i32 @zero2(i32 %i, i32 %a, i32 %b) {
+; CHECK: Printing analysis {{.*}} for function 'zero2'
+entry:
+  %cond = icmp ne i32 %i, -1
+  br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 20 / 32
+; CHECK: edge entry -> else probability is 12 / 32
+
+then:
+  br label %exit
+
+else:
+  br label %exit
+
+exit:
+  %result = phi i32 [ %a, %then ], [ %b, %else ]
+  ret i32 %result
+}
+
diff --git a/test/Analysis/BranchProbabilityInfo/lit.local.cfg b/test/Analysis/BranchProbabilityInfo/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Analysis/BranchProbabilityInfo/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/CallGraph/lit.local.cfg b/test/Analysis/CallGraph/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Analysis/CallGraph/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/CostModel/ARM/lit.local.cfg b/test/Analysis/CostModel/ARM/lit.local.cfg
index cb77b09..8a3ba96 100644
--- a/test/Analysis/CostModel/ARM/lit.local.cfg
+++ b/test/Analysis/CostModel/ARM/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
     config.unsupported = True
diff --git a/test/Analysis/CostModel/PowerPC/lit.local.cfg b/test/Analysis/CostModel/PowerPC/lit.local.cfg
index 4019eca..2e46300 100644
--- a/test/Analysis/CostModel/PowerPC/lit.local.cfg
+++ b/test/Analysis/CostModel/PowerPC/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'PowerPC' in targets:
     config.unsupported = True
diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll
index b69b3bf..f3c1283 100644
--- a/test/Analysis/CostModel/X86/cast.ll
+++ b/test/Analysis/CostModel/X86/cast.ll
@@ -38,6 +38,10 @@ define i32 @zext_sext(<8 x i1> %in) {
   ;CHECK: cost of 9 {{.*}} sext
   %S = sext <8 x i1> %in to <8 x i32>
 
+  ;CHECK: cost of 1 {{.*}} zext
+  %A1 = zext <16 x i8> undef to <16 x i16>
+  ;CHECK: cost of 1 {{.*}} sext
+  %A2 = sext <16 x i8> undef to <16 x i16>
   ;CHECK: cost of 1 {{.*}} sext
   %A = sext <8 x i16> undef to <8 x i32>
   ;CHECK: cost of 1 {{.*}} zext
@@ -51,11 +55,13 @@ define i32 @zext_sext(<8 x i1> %in) {
 
   ;CHECK: cost of 1 {{.*}} zext
   %D = zext <4 x i32> undef to <4 x i64>
-  ;CHECK: cost of 1 {{.*}} trunc
 
+  ;CHECK: cost of 1 {{.*}} trunc
   %E = trunc <4 x i64> undef to <4 x i32>
   ;CHECK: cost of 1 {{.*}} trunc
   %F = trunc <8 x i32> undef to <8 x i16>
+  ;CHECK: cost of 2 {{.*}} trunc
+  %F1 = trunc <16 x i16> undef to <16 x i8>
 
   ;CHECK: cost of 3 {{.*}} trunc
   %G = trunc <8 x i64> undef to <8 x i32>
diff --git a/test/Analysis/CostModel/X86/lit.local.cfg b/test/Analysis/CostModel/X86/lit.local.cfg
index a8ad0f1..ba763cf 100644
--- a/test/Analysis/CostModel/X86/lit.local.cfg
+++ b/test/Analysis/CostModel/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Analysis/CostModel/X86/reduction.ll b/test/Analysis/CostModel/X86/reduction.ll
new file mode 100644
index 0000000..78e65ae
--- /dev/null
+++ b/test/Analysis/CostModel/X86/reduction.ll
@@ -0,0 +1,365 @@
+; RUN: opt < %s -cost-model -costmodel-reduxcost=true -analyze -mcpu=core2 -mtriple=x86_64-apple-darwin | FileCheck %s
+; RUN: opt < %s -cost-model -costmodel-reduxcost=true -analyze -mcpu=corei7 -mtriple=x86_64-apple-darwin | FileCheck %s --check-prefix=SSE3
+; RUN: opt < %s -cost-model -costmodel-reduxcost=true -analyze -mcpu=corei7-avx -mtriple=x86_64-apple-darwin | FileCheck %s --check-prefix=AVX
+; RUN: opt < %s -cost-model -costmodel-reduxcost=true -analyze -mcpu=core-avx2 -mtriple=x86_64-apple-darwin | FileCheck %s --check-prefix=AVX2
+
+define fastcc float @reduction_cost_float(<4 x float> %rdx) {
+  %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
+  %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
+
+; Check that we recognize the tree starting at the extractelement as a
+; reduction.
+; CHECK-LABEL: reduction_cost
+; CHECK:  cost of 9 {{.*}} extractelement
+
+  %r = extractelement <4 x float> %bin.rdx8, i32 0
+  ret float %r
+}
+
+define fastcc i32 @reduction_cost_int(<8 x i32> %rdx) {
+  %rdx.shuf = shufflevector <8 x i32> %rdx, <8 x i32> undef,
+   <8 x i32> <i32 4    , i32     5, i32     6, i32     7,
+              i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx = add <8 x i32> %rdx, %rdx.shuf
+  %rdx.shuf.2 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef,
+   <8 x i32> <i32 2    , i32 3,     i32 undef, i32 undef,
+              i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx.2 = add <8 x i32> %bin.rdx, %rdx.shuf.2
+  %rdx.shuf.3 = shufflevector <8 x i32> %bin.rdx.2, <8 x i32> undef,
+   <8 x i32> <i32 1    , i32 undef, i32 undef, i32 undef,
+              i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx.3 = add <8 x i32> %bin.rdx.2, %rdx.shuf.3
+
+; CHECK-LABEL: reduction_cost_int
+; CHECK:  cost of 23 {{.*}} extractelement
+
+  %r = extractelement <8 x i32> %bin.rdx.3, i32 0
+  ret i32 %r
+}
+
+define fastcc float @pairwise_hadd(<4 x float> %rdx, float %f1) {
+  %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
+        <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
+  %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
+        <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
+  %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+        <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+        <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
+
+; CHECK-LABEL: pairwise_hadd
+; CHECK: cost of 11 {{.*}} extractelement
+
+  %r = extractelement <4 x float> %bin.rdx.1, i32 0
+  %r2 = fadd float %r, %f1
+  ret float %r2
+}
+
+define fastcc float @pairwise_hadd_assoc(<4 x float> %rdx, float %f1) {
+  %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
+        <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
+  %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
+        <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.1, %rdx.shuf.0.0
+  %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+        <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+        <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
+
+; CHECK-LABEL: pairwise_hadd_assoc
+; CHECK: cost of 11 {{.*}} extractelement
+
+  %r = extractelement <4 x float> %bin.rdx.1, i32 0
+  %r2 = fadd float %r, %f1
+  ret float %r2
+}
+
+define fastcc float @pairwise_hadd_skip_first(<4 x float> %rdx, float %f1) {
+  %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
+        <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
+  %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
+        <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
+  %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+        <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx.1 = fadd <4 x float> %bin.rdx.0, %rdx.shuf.1.1
+
+; CHECK-LABEL: pairwise_hadd_skip_first
+; CHECK: cost of 11 {{.*}} extractelement
+
+  %r = extractelement <4 x float> %bin.rdx.1, i32 0
+  %r2 = fadd float %r, %f1
+  ret float %r2
+}
+
+define fastcc double @no_pairwise_reduction2double(<2 x double> %rdx, double %f1) {
+  %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
+  %bin.rdx = fadd <2 x double> %rdx, %rdx.shuf
+
+; SSE3:  cost of 2 {{.*}} extractelement
+; AVX:  cost of 2 {{.*}} extractelement
+; AVX2:  cost of 2 {{.*}} extractelement
+
+  %r = extractelement <2 x double> %bin.rdx, i32 0
+  ret double %r
+}
+
+define fastcc float @no_pairwise_reduction4float(<4 x float> %rdx, float %f1) {
+  %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
+  %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
+
+; SSE3:  cost of 4 {{.*}} extractelement
+; AVX:  cost of 3 {{.*}} extractelement
+; AVX2:  cost of 3 {{.*}} extractelement
+
+  %r = extractelement <4 x float> %bin.rdx8, i32 0
+  ret float %r
+}
+
+define fastcc double @no_pairwise_reduction4double(<4 x double> %rdx, double %f1) {
+  %rdx.shuf = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf
+  %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7
+
+; AVX:  cost of 3 {{.*}} extractelement
+; AVX2:  cost of 3 {{.*}} extractelement
+
+  %r = extractelement <4 x double> %bin.rdx8, i32 0
+  ret double %r
+}
+
+define fastcc float @no_pairwise_reduction8float(<8 x float> %rdx, float %f1) {
+  %rdx.shuf3 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7,i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx4 = fadd <8 x float> %rdx, %rdx.shuf3
+  %rdx.shuf = shufflevector <8 x float> %bin.rdx4, <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx = fadd <8 x float> %bin.rdx4, %rdx.shuf
+  %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = fadd <8 x float> %bin.rdx, %rdx.shuf7
+
+; AVX:  cost of 4 {{.*}} extractelement
+; AVX2:  cost of 4 {{.*}} extractelement
+
+  %r = extractelement <8 x float> %bin.rdx8, i32 0
+  ret float %r
+}
+
+define fastcc i64 @no_pairwise_reduction2i64(<2 x i64> %rdx, i64 %f1) {
+  %rdx.shuf = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+  %bin.rdx = add <2 x i64> %rdx, %rdx.shuf
+
+; SSE3:  cost of 2 {{.*}} extractelement
+; AVX:  cost of 1 {{.*}} extractelement
+; AVX2:  cost of 1 {{.*}} extractelement
+
+  %r = extractelement <2 x i64> %bin.rdx, i32 0
+  ret i64 %r
+}
+
+define fastcc i32 @no_pairwise_reduction4i32(<4 x i32> %rdx, i32 %f1) {
+  %rdx.shuf = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %bin.rdx = add <4 x i32> %rdx, %rdx.shuf
+  %rdx.shuf7 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = add <4 x i32> %bin.rdx, %rdx.shuf7
+
+; SSE3:  cost of 3 {{.*}} extractelement
+; AVX:  cost of 3 {{.*}} extractelement
+; AVX2:  cost of 3 {{.*}} extractelement
+
+  %r = extractelement <4 x i32> %bin.rdx8, i32 0
+  ret i32 %r
+}
+
+define fastcc i64 @no_pairwise_reduction4i64(<4 x i64> %rdx, i64 %f1) {
+  %rdx.shuf = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %bin.rdx = add <4 x i64> %rdx, %rdx.shuf
+  %rdx.shuf7 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = add <4 x i64> %bin.rdx, %rdx.shuf7
+
+; AVX:  cost of 3 {{.*}} extractelement
+; AVX2:  cost of 3 {{.*}} extractelement
+
+  %r = extractelement <4 x i64> %bin.rdx8, i32 0
+  ret i64 %r
+}
+
+define fastcc i16 @no_pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) {
+  %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7,i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx4 = add <8 x i16> %rdx, %rdx.shuf3
+  %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx = add <8 x i16> %bin.rdx4, %rdx.shuf
+  %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = add <8 x i16> %bin.rdx, %rdx.shuf7
+
+; SSE3:  cost of 4 {{.*}} extractelement
+; AVX:  cost of 4 {{.*}} extractelement
+; AVX2:  cost of 4 {{.*}} extractelement
+
+  %r = extractelement <8 x i16> %bin.rdx8, i32 0
+  ret i16 %r
+}
+
+define fastcc i32 @no_pairwise_reduction8i32(<8 x i32> %rdx, i32 %f1) {
+  %rdx.shuf3 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7,i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx4 = add <8 x i32> %rdx, %rdx.shuf3
+  %rdx.shuf = shufflevector <8 x i32> %bin.rdx4, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx = add <8 x i32> %bin.rdx4, %rdx.shuf
+  %rdx.shuf7 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = add <8 x i32> %bin.rdx, %rdx.shuf7
+
+; AVX:  cost of 5 {{.*}} extractelement
+; AVX2:  cost of 5 {{.*}} extractelement
+
+  %r = extractelement <8 x i32> %bin.rdx8, i32 0
+  ret i32 %r
+}
+
+define fastcc double @pairwise_reduction2double(<2 x double> %rdx, double %f1) {
+  %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
+  %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
+  %bin.rdx8 = fadd <2 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
+
+; SSE3:  cost of 2 {{.*}} extractelement
+; AVX:  cost of 2 {{.*}} extractelement
+; AVX2:  cost of 2 {{.*}} extractelement
+
+  %r = extractelement <2 x double> %bin.rdx8, i32 0
+  ret double %r
+}
+
+define fastcc float @pairwise_reduction4float(<4 x float> %rdx, float %f1) {
+  %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+  %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %bin.rdx = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
+  %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
+
+; SSE3:  cost of 4 {{.*}} extractelement
+; AVX:  cost of 4 {{.*}} extractelement
+; AVX2:  cost of 4 {{.*}} extractelement
+
+  %r = extractelement <4 x float> %bin.rdx8, i32 0
+  ret float %r
+}
+
+define fastcc double @pairwise_reduction4double(<4 x double> %rdx, double %f1) {
+  %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+  %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %bin.rdx = fadd <4 x double> %rdx.shuf.0.0, %rdx.shuf.0.1
+  %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
+
+; AVX:  cost of 5 {{.*}} extractelement
+; AVX2:  cost of 5 {{.*}} extractelement
+
+  %r = extractelement <4 x double> %bin.rdx8, i32 0
+  ret double %r
+}
+
+define fastcc float @pairwise_reduction8float(<8 x float> %rdx, float %f1) {
+  %rdx.shuf.0.0 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6,i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.0.1 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7,i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx = fadd <8 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
+  %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef,<8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef,<8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = fadd <8 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
+  %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef,<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef,<8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1
+
+; AVX:  cost of 7 {{.*}} extractelement
+; AVX2:  cost of 7 {{.*}} extractelement
+
+  %r = extractelement <8 x float> %bin.rdx9, i32 0
+  ret float %r
+}
+
+define fastcc i64 @pairwise_reduction2i64(<2 x i64> %rdx, i64 %f1) {
+  %rdx.shuf.1.0 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 0, i32 undef>
+  %rdx.shuf.1.1 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+  %bin.rdx8 = add <2 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1
+
+; SSE3:  cost of 2 {{.*}} extractelement
+; AVX:  cost of 1 {{.*}} extractelement
+; AVX2:  cost of 1 {{.*}} extractelement
+
+  %r = extractelement <2 x i64> %bin.rdx8, i32 0
+  ret i64 %r
+}
+
+define fastcc i32 @pairwise_reduction4i32(<4 x i32> %rdx, i32 %f1) {
+  %rdx.shuf.0.0 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+  %rdx.shuf.0.1 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %bin.rdx = add <4 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1
+  %rdx.shuf.1.0 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.1.1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = add <4 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1
+
+; SSE3:  cost of 3 {{.*}} extractelement
+; AVX:  cost of 3 {{.*}} extractelement
+; AVX2:  cost of 3 {{.*}} extractelement
+
+  %r = extractelement <4 x i32> %bin.rdx8, i32 0
+  ret i32 %r
+}
+
+define fastcc i64 @pairwise_reduction4i64(<4 x i64> %rdx, i64 %f1) {
+  %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+  %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %bin.rdx = add <4 x i64> %rdx.shuf.0.0, %rdx.shuf.0.1
+  %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1
+
+; AVX:  cost of 5 {{.*}} extractelement
+; AVX2:  cost of 5 {{.*}} extractelement
+
+  %r = extractelement <4 x i64> %bin.rdx8, i32 0
+  ret i64 %r
+}
+
+define fastcc i16 @pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) {
+  %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6,i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7,i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx = add <8 x i16> %rdx.shuf.0.0, %rdx.shuf.0.1
+  %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef,<8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef,<8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = add <8 x i16> %rdx.shuf.1.0, %rdx.shuf.1.1
+  %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef,<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef,<8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1
+
+; SSE3:  cost of 5 {{.*}} extractelement
+; AVX:  cost of 5 {{.*}} extractelement
+; AVX2:  cost of 5 {{.*}} extractelement
+
+  %r = extractelement <8 x i16> %bin.rdx9, i32 0
+  ret i16 %r
+}
+
+define fastcc i32 @pairwise_reduction8i32(<8 x i32> %rdx, i32 %f1) {
+  %rdx.shuf.0.0 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6,i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.0.1 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7,i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx = add <8 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1
+  %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef,<8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef,<8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = add <8 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1
+  %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef,<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef,<8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1
+
+; AVX:  cost of 5 {{.*}} extractelement
+; AVX2:  cost of 5 {{.*}} extractelement
+
+  %r = extractelement <8 x i32> %bin.rdx9, i32 0
+  ret i32 %r
+}
diff --git a/test/Analysis/CostModel/lit.local.cfg b/test/Analysis/CostModel/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Analysis/CostModel/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/Delinearization/a.ll b/test/Analysis/Delinearization/a.ll
new file mode 100644
index 0000000..9308749
--- /dev/null
+++ b/test/Analysis/Delinearization/a.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s -analyze -delinearize | FileCheck %s
+;
+; void foo(long n, long m, long o, int A[n][m][o]) {
+;   for (long i = 0; i < n; i++)
+;     for (long j = 0; j < m; j++)
+;       for (long k = 0; k < o; k++)
+;         A[2*i+3][3*j-4][5*k+7] = 1;
+; }
+
+; AddRec: {{{(28 + (4 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j>,+,20}<%for.k>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(i32) bytes.
+; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<%for.k>]
+
+; AddRec: {{(8 + ((4 + (12 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][%o] with elements of sizeof(i32) bytes.
+; CHECK: ArrayRef[{(1 + (3 * %m)),+,(2 * %m)}<%for.i>][{2,+,(3 * %o)}<%for.j>]
+
+; AddRec: {(8 + ((-8 + (24 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize] with elements of 2 bytes.
+; CHECK: ArrayRef[{((1 + ((-1 + (3 * %m)) * %o)) * sizeof(i32)),+,(%m * %o * sizeof(i32))}<%for.i>]
+
+; Function Attrs: nounwind uwtable
+define void @foo(i64 %n, i64 %m, i64 %o, i32* nocapture %A) #0 {
+entry:
+  %cmp32 = icmp sgt i64 %n, 0
+  br i1 %cmp32, label %for.cond1.preheader.lr.ph, label %for.end17
+
+for.cond1.preheader.lr.ph:                        ; preds = %entry
+  %cmp230 = icmp sgt i64 %m, 0
+  %cmp528 = icmp sgt i64 %o, 0
+  br i1 %cmp230, label %for.i, label %for.end17
+
+for.inc15.us:                                     ; preds = %for.inc12.us.us, %for.i
+  %inc16.us = add nsw i64 %i.033.us, 1
+  %exitcond55 = icmp eq i64 %inc16.us, %n
+  br i1 %exitcond55, label %for.end17, label %for.i
+
+for.i:                     ; preds = %for.cond1.preheader.lr.ph, %for.inc15.us
+  %i.033.us = phi i64 [ %inc16.us, %for.inc15.us ], [ 0, %for.cond1.preheader.lr.ph ]
+  %mul8.us = shl i64 %i.033.us, 1
+  %add9.us = add nsw i64 %mul8.us, 3
+  %0 = mul i64 %add9.us, %m
+  %sub.us = add i64 %0, -4
+  br i1 %cmp528, label %for.j, label %for.inc15.us
+
+for.inc12.us.us:                                  ; preds = %for.k
+  %inc13.us.us = add nsw i64 %j.031.us.us, 1
+  %exitcond54 = icmp eq i64 %inc13.us.us, %m
+  br i1 %exitcond54, label %for.inc15.us, label %for.j
+
+for.j:                            ; preds = %for.i, %for.inc12.us.us
+  %j.031.us.us = phi i64 [ %inc13.us.us, %for.inc12.us.us ], [ 0, %for.i ]
+  %mul7.us.us = mul nsw i64 %j.031.us.us, 3
+  %tmp.us.us = add i64 %sub.us, %mul7.us.us
+  %tmp27.us.us = mul i64 %tmp.us.us, %o
+  br label %for.k
+
+for.k:                                  ; preds = %for.k, %for.j
+  %k.029.us.us = phi i64 [ 0, %for.j ], [ %inc.us.us, %for.k ]
+  %mul.us.us = mul nsw i64 %k.029.us.us, 5
+  %arrayidx.sum.us.us = add i64 %mul.us.us, 7
+  %arrayidx10.sum.us.us = add i64 %arrayidx.sum.us.us, %tmp27.us.us
+  %arrayidx11.us.us = getelementptr inbounds i32* %A, i64 %arrayidx10.sum.us.us
+  store i32 1, i32* %arrayidx11.us.us, align 4
+  %inc.us.us = add nsw i64 %k.029.us.us, 1
+  %exitcond = icmp eq i64 %inc.us.us, %o
+  br i1 %exitcond, label %for.inc12.us.us, label %for.k
+
+for.end17:                                        ; preds = %for.inc15.us, %for.cond1.preheader.lr.ph, %entry
+  ret void
+}
diff --git a/test/Analysis/Delinearization/himeno_1.ll b/test/Analysis/Delinearization/himeno_1.ll
new file mode 100644
index 0000000..9458bd2
--- /dev/null
+++ b/test/Analysis/Delinearization/himeno_1.ll
@@ -0,0 +1,102 @@
+; RUN: opt < %s -analyze -delinearize | FileCheck %s
+
+; #define MR(mt,n,r,c,d)  mt->m[(n) * mt->mrows * mt->mcols * mt->mdeps + (r) * mt->mcols* mt->mdeps + (c) * mt->mdeps + (d)]
+;
+; struct Mat {
+;   float* m;
+;   int mnums;
+;   int mrows;
+;   int mcols;
+;   int mdeps;
+; };
+;
+; typedef struct Mat Matrix;
+;
+; void jacobi(int nn, Matrix* a, Matrix* p)
+; {
+;   long i, j, k, max,jmax,kmax;
+;
+;   p_rows_sub = p->mrows - 1;
+;   p_cols_sub = p->mcols - 1;
+;   p_deps_sub = p->mdeps - 1;
+;
+;     for(i = 1; i < p_rows_sub; i++)
+;       for(j = 1; j < p_cols_sub; j++)
+;         for(k = 1; k < p_deps_sub; k++)
+;           MR(a,0,i,j,k) = i + j + k;
+; }
+
+; AddRec: {{{(4 + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>,+,4}<%for.k>
+; CHECK: Base offset: %a.base
+; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes.
+; CHECK: ArrayRef[{1,+,1}<nuw><nsw><%for.i>][{1,+,1}<nuw><nsw><%for.j>][{1,+,1}<nuw><nsw><%for.k>]
+
+; AddRec: {{(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>
+; CHECK: Base offset: %a.base
+; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes.
+; CHECK: ArrayRef[{(1 + (sext i32 %a.cols to i64)),+,(sext i32 %a.cols to i64)}<%for.i>][{(-1 + (sext i32 (-1 + %p.deps) to i64)),+,(sext i32 %a.deps to i64)}<%for.j>]
+
+; AddRec: {(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + ((sext i32 %a.deps to i64) * (-4 + (4 * (sext i32 (-1 + %p.cols) to i64)) + (4 * (sext i32 %a.cols to i64)))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>
+; CHECK: Base offset: %a.base
+; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(float) bytes.
+; CHECK: ArrayRef[{(-1 + (sext i32 (-1 + %p.deps) to i64) + ((sext i32 %a.deps to i64) * (-1 + (sext i32 (-1 + %p.cols) to i64) + (sext i32 %a.cols to i64)))),+,((sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>]
+
+%struct.Mat = type { float*, i32, i32, i32, i32 }
+
+define void @jacobi(i32 %nn, %struct.Mat* nocapture %a, %struct.Mat* nocapture %p) nounwind uwtable {
+entry:
+  %p.rows.ptr = getelementptr inbounds %struct.Mat* %p, i64 0, i32 2
+  %p.rows = load i32* %p.rows.ptr
+  %p.rows.sub = add i32 %p.rows, -1
+  %p.rows.sext = sext i32 %p.rows.sub to i64
+  %p.cols.ptr = getelementptr inbounds %struct.Mat* %p, i64 0, i32 3
+  %p.cols = load i32* %p.cols.ptr
+  %p.cols.sub = add i32 %p.cols, -1
+  %p.cols.sext = sext i32 %p.cols.sub to i64
+  %p.deps.ptr = getelementptr inbounds %struct.Mat* %p, i64 0, i32 4
+  %p.deps = load i32* %p.deps.ptr
+  %p.deps.sub = add i32 %p.deps, -1
+  %p.deps.sext = sext i32 %p.deps.sub to i64
+  %a.cols.ptr = getelementptr inbounds %struct.Mat* %a, i64 0, i32 3
+  %a.cols = load i32* %a.cols.ptr
+  %a.deps.ptr = getelementptr inbounds %struct.Mat* %a, i64 0, i32 4
+  %a.deps = load i32* %a.deps.ptr
+  %a.base.ptr = getelementptr inbounds %struct.Mat* %a, i64 0, i32 0
+  %a.base = load float** %a.base.ptr, align 8
+  br label %for.i
+
+for.i:                                            ; preds = %for.i.inc, %entry
+  %i = phi i64 [ %i.inc, %for.i.inc ], [ 1, %entry ]
+  br label %for.j
+
+for.j:                                            ; preds = %for.j.inc, %for.i
+  %j = phi i64 [ %j.inc, %for.j.inc ], [ 1, %for.i ]
+  %a.cols.sext = sext i32 %a.cols to i64
+  %a.deps.sext = sext i32 %a.deps to i64
+  br label %for.k
+
+for.k:                                            ; preds = %for.k, %for.j
+  %k = phi i64 [ 1, %for.j ], [ %k.inc, %for.k ]
+  %tmp1 = mul nsw i64 %a.cols.sext, %i
+  %tmp2 = add i64 %tmp1, %j
+  %tmp3 = mul i64 %tmp2, %a.deps.sext
+  %tmp4 = add nsw i64 %k, %tmp3
+  %arrayidx = getelementptr inbounds float* %a.base, i64 %tmp4
+  store float 1.000000e+00, float* %arrayidx
+  %k.inc = add nsw i64 %k, 1
+  %k.exitcond = icmp eq i64 %k.inc, %p.deps.sext
+  br i1 %k.exitcond, label %for.j.inc, label %for.k
+
+for.j.inc:                                        ; preds = %for.k
+  %j.inc = add nsw i64 %j, 1
+  %j.exitcond = icmp eq i64 %j.inc, %p.cols.sext
+  br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:                                        ; preds = %for.j.inc
+  %i.inc = add nsw i64 %i, 1
+  %i.exitcond = icmp eq i64 %i.inc, %p.rows.sext
+  br i1 %i.exitcond, label %end, label %for.i
+
+end:                                              ; preds = %for.i.inc
+  ret void
+}
diff --git a/test/Analysis/Delinearization/himeno_2.ll b/test/Analysis/Delinearization/himeno_2.ll
new file mode 100644
index 0000000..a290066
--- /dev/null
+++ b/test/Analysis/Delinearization/himeno_2.ll
@@ -0,0 +1,102 @@
+; RUN: opt < %s -analyze -delinearize | FileCheck %s
+
+; #define MR(mt,n,r,c,d)  mt->m[(n) * mt->mrows * mt->mcols * mt->mdeps + (r) * mt->mcols* mt->mdeps + (c) * mt->mdeps + (d)]
+;
+; struct Mat {
+;   float* m;
+;   int mnums;
+;   int mrows;
+;   int mcols;
+;   int mdeps;
+; };
+;
+; typedef struct Mat Matrix;
+;
+; void jacobi(int nn, Matrix* a, Matrix* p)
+; {
+;   long i, j, k, max,jmax,kmax;
+;
+;   p_rows_sub = p->mrows - 1;
+;   p_cols_sub = p->mcols - 1;
+;   p_deps_sub = p->mdeps - 1;
+;
+;     for(i = 1; i < p_rows_sub; i++)
+;       for(j = 1; j < p_cols_sub; j++)
+;         for(k = 1; k < p_deps_sub; k++)
+;           MR(a,0,i,j,k) = i + j + k;
+; }
+
+; AddRec: {{{(4 + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>,+,4}<%for.k>
+; CHECK: Base offset: %a.base
+; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes.
+; CHECK: ArrayRef[{1,+,1}<nuw><nsw><%for.i>][{1,+,1}<nuw><nsw><%for.j>][{1,+,1}<nuw><nsw><%for.k>]
+
+; AddRec: {{(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>
+; CHECK: Base offset: %a.base
+; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes.
+; CHECK: ArrayRef[{(1 + (sext i32 %a.cols to i64)),+,(sext i32 %a.cols to i64)}<%for.i>][{(-1 + (sext i32 (-1 + %p.deps) to i64)),+,(sext i32 %a.deps to i64)}<%for.j>]
+
+; AddRec: {(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + ((sext i32 %a.deps to i64) * (-4 + (4 * (sext i32 (-1 + %p.cols) to i64)) + (4 * (sext i32 %a.cols to i64)))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>
+; CHECK: Base offset: %a.base
+; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(float) bytes.
+; CHECK: ArrayRef[{(-1 + (sext i32 (-1 + %p.deps) to i64) + ((sext i32 %a.deps to i64) * (-1 + (sext i32 (-1 + %p.cols) to i64) + (sext i32 %a.cols to i64)))),+,((sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>]
+
+%struct.Mat = type { float*, i32, i32, i32, i32 }
+
+define void @jacobi(i32 %nn, %struct.Mat* nocapture %a, %struct.Mat* nocapture %p) nounwind uwtable {
+entry:
+  %p.rows.ptr = getelementptr inbounds %struct.Mat* %p, i64 0, i32 2
+  %p.rows = load i32* %p.rows.ptr
+  %p.rows.sub = add i32 %p.rows, -1
+  %p.rows.sext = sext i32 %p.rows.sub to i64
+  %p.cols.ptr = getelementptr inbounds %struct.Mat* %p, i64 0, i32 3
+  %p.cols = load i32* %p.cols.ptr
+  %p.cols.sub = add i32 %p.cols, -1
+  %p.cols.sext = sext i32 %p.cols.sub to i64
+  %p.deps.ptr = getelementptr inbounds %struct.Mat* %p, i64 0, i32 4
+  %p.deps = load i32* %p.deps.ptr
+  %p.deps.sub = add i32 %p.deps, -1
+  %p.deps.sext = sext i32 %p.deps.sub to i64
+  %a.cols.ptr = getelementptr inbounds %struct.Mat* %a, i64 0, i32 3
+  %a.cols = load i32* %a.cols.ptr
+  %a.cols.sext = sext i32 %a.cols to i64
+  %a.deps.ptr = getelementptr inbounds %struct.Mat* %a, i64 0, i32 4
+  %a.deps = load i32* %a.deps.ptr
+  %a.deps.sext = sext i32 %a.deps to i64
+  %a.base.ptr = getelementptr inbounds %struct.Mat* %a, i64 0, i32 0
+  %a.base = load float** %a.base.ptr, align 8
+  br label %for.i
+
+for.i:                                            ; preds = %for.i.inc, %entry
+  %i = phi i64 [ %i.inc, %for.i.inc ], [ 1, %entry ]
+  br label %for.j
+
+for.j:                                            ; preds = %for.j.inc, %for.i
+  %j = phi i64 [ %j.inc, %for.j.inc ], [ 1, %for.i ]
+  br label %for.k
+
+for.k:                                            ; preds = %for.k, %for.j
+  %k = phi i64 [ 1, %for.j ], [ %k.inc, %for.k ]
+  %tmp1 = mul nsw i64 %a.cols.sext, %i
+  %tmp2 = add i64 %tmp1, %j
+  %tmp3 = mul i64 %tmp2, %a.deps.sext
+  %tmp4 = add nsw i64 %k, %tmp3
+  %arrayidx = getelementptr inbounds float* %a.base, i64 %tmp4
+  store float 1.000000e+00, float* %arrayidx
+  %k.inc = add nsw i64 %k, 1
+  %k.exitcond = icmp eq i64 %k.inc, %p.deps.sext
+  br i1 %k.exitcond, label %for.j.inc, label %for.k
+
+for.j.inc:                                        ; preds = %for.k
+  %j.inc = add nsw i64 %j, 1
+  %j.exitcond = icmp eq i64 %j.inc, %p.cols.sext
+  br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:                                        ; preds = %for.j.inc
+  %i.inc = add nsw i64 %i, 1
+  %i.exitcond = icmp eq i64 %i.inc, %p.rows.sext
+  br i1 %i.exitcond, label %end, label %for.i
+
+end:                                              ; preds = %for.i.inc
+  ret void
+}
diff --git a/test/Analysis/BasicAA/lit.local.cfg b/test/Analysis/Delinearization/lit.local.cfg
index 19eebc0..19eebc0 100644
--- a/test/Analysis/BasicAA/lit.local.cfg
+++ b/test/Analysis/Delinearization/lit.local.cfg
diff --git a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
new file mode 100644
index 0000000..82cab16
--- /dev/null
+++ b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
@@ -0,0 +1,68 @@
+; RUN: opt < %s -analyze -delinearize  | FileCheck %s
+
+; void foo(long n, long m, long o, double A[n][m][o]) {
+;
+;   for (long i = 0; i < n; i++)
+;     for (long j = 0; j < m; j++)
+;       for (long k = 0; k < o; k++)
+;         A[i+3][j-4][k+7] = 1.0;
+; }
+
+; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{3,+,1}<nw><%for.i>][{-4,+,1}<nw><%for.j>][{7,+,1}<nw><%for.k>]
+
+; AddRec: {{(48 + ((-24 + (24 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][%o] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{(-3 + (3 * %m)),+,%m}<%for.i>][{6,+,%o}<%for.j>]
+
+; AddRec: {(48 + ((-32 + (32 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{(6 + ((-4 + (4 * %m)) * %o)),+,(%m * %o)}<%for.i>]
+
+define void @foo(i64 %n, i64 %m, i64 %o, double* %A) {
+entry:
+  br label %for.i
+
+for.i:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
+  br label %for.j
+
+for.j:
+  %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j.inc ]
+  br label %for.k
+
+for.k:
+  %k = phi i64 [ 0, %for.j ], [ %k.inc, %for.k.inc ]
+  %offset0 = add nsw i64 %i, 3
+  %subscript0 = mul i64 %offset0, %m
+  %offset1 = add nsw i64 %j, -4
+  %subscript1 = add i64 %offset1, %subscript0
+  %subscript2 = mul i64 %subscript1, %o
+  %offset2 = add nsw i64 %k, 7
+  %subscript = add i64 %subscript2, %offset2
+  %idx = getelementptr inbounds double* %A, i64 %subscript
+  store double 1.0, double* %idx
+  br label %for.k.inc
+
+for.k.inc:
+  %k.inc = add nsw i64 %k, 1
+  %k.exitcond = icmp eq i64 %k.inc, %o
+  br i1 %k.exitcond, label %for.j.inc, label %for.k
+
+for.j.inc:
+  %j.inc = add nsw i64 %j, 1
+  %j.exitcond = icmp eq i64 %j.inc, %m
+  br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:
+  %i.inc = add nsw i64 %i, 1
+  %i.exitcond = icmp eq i64 %i.inc, %n
+  br i1 %i.exitcond, label %end, label %for.i
+
+end:
+  ret void
+}
diff --git a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
new file mode 100644
index 0000000..a1e779f
--- /dev/null
+++ b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
@@ -0,0 +1,72 @@
+; RUN: opt < %s -analyze -delinearize | FileCheck %s
+
+; void foo(long n, long m, long o, long p, double A[n][m][o+p]) {
+;
+;   for (long i = 0; i < n; i++)
+;     for (long j = 0; j < m; j++)
+;       for (long k = 0; k < o; k++)
+;         A[i+3][j-4][k+7] = 1.0;
+; }
+
+; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * (%o + %p)) + %A),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>,+,(8 * (%o + %p))}<%for.body6.lr.ph.us.us>,+,8}<%for.body6.us.us>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][%m][(%o + %p)] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{3,+,1}<nw><%for.cond4.preheader.lr.ph.us>][{-4,+,1}<nw><%for.body6.lr.ph.us.us>][{7,+,1}<nw><%for.body6.us.us>]
+
+; AddRec: {{(48 + (8 * %o) + (8 * (-4 + (3 * %m)) * (%o + %p)) + %A),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>,+,(8 * (%o + %p))}<%for.body6.lr.ph.us.us>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][(%o + %p)] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{(-4 + (3 * %m)),+,%m}<%for.cond4.preheader.lr.ph.us>][{(6 + %o),+,(%o + %p)}<%for.body6.lr.ph.us.us>]
+
+; AddRec: {(48 + (8 * %o) + ((-40 + (32 * %m)) * (%o + %p)) + %A),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{(6 + ((-5 + (4 * %m)) * (%o + %p)) + %o),+,((%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>]
+
+define void @foo(i64 %n, i64 %m, i64 %o, i64 %p, double* nocapture %A) nounwind uwtable {
+entry:
+  %add = add nsw i64 %p, %o
+  %cmp22 = icmp sgt i64 %n, 0
+  br i1 %cmp22, label %for.cond1.preheader.lr.ph, label %for.end16
+
+for.cond1.preheader.lr.ph:                        ; preds = %entry
+  %cmp220 = icmp sgt i64 %m, 0
+  %cmp518 = icmp sgt i64 %o, 0
+  br i1 %cmp220, label %for.cond4.preheader.lr.ph.us, label %for.end16
+
+for.inc14.us:                                     ; preds = %for.cond4.preheader.lr.ph.us, %for.inc11.us.us
+  %inc15.us = add nsw i64 %i.023.us, 1
+  %exitcond43 = icmp eq i64 %inc15.us, %n
+  br i1 %exitcond43, label %for.end16, label %for.cond4.preheader.lr.ph.us
+
+for.cond4.preheader.lr.ph.us:                     ; preds = %for.inc14.us, %for.cond1.preheader.lr.ph
+  %i.023.us = phi i64 [ %inc15.us, %for.inc14.us ], [ 0, %for.cond1.preheader.lr.ph ]
+  %add8.us = add nsw i64 %i.023.us, 3
+  %0 = mul i64 %add8.us, %m
+  %sub.us = add i64 %0, -4
+  br i1 %cmp518, label %for.body6.lr.ph.us.us, label %for.inc14.us
+
+for.inc11.us.us:                                  ; preds = %for.body6.us.us
+  %inc12.us.us = add nsw i64 %j.021.us.us, 1
+  %exitcond42 = icmp eq i64 %inc12.us.us, %m
+  br i1 %exitcond42, label %for.inc14.us, label %for.body6.lr.ph.us.us
+
+for.body6.lr.ph.us.us:                            ; preds = %for.cond4.preheader.lr.ph.us, %for.inc11.us.us
+  %j.021.us.us = phi i64 [ %inc12.us.us, %for.inc11.us.us ], [ 0, %for.cond4.preheader.lr.ph.us ]
+  %tmp.us.us = add i64 %sub.us, %j.021.us.us
+  %tmp17.us.us = mul i64 %tmp.us.us, %add
+  br label %for.body6.us.us
+
+for.body6.us.us:                                  ; preds = %for.body6.us.us, %for.body6.lr.ph.us.us
+  %k.019.us.us = phi i64 [ 0, %for.body6.lr.ph.us.us ], [ %inc.us.us, %for.body6.us.us ]
+  %arrayidx.sum.us.us = add i64 %k.019.us.us, 7
+  %arrayidx9.sum.us.us = add i64 %arrayidx.sum.us.us, %tmp17.us.us
+  %arrayidx10.us.us = getelementptr inbounds double* %A, i64 %arrayidx9.sum.us.us
+  store double 1.000000e+00, double* %arrayidx10.us.us, align 8
+  %inc.us.us = add nsw i64 %k.019.us.us, 1
+  %exitcond = icmp eq i64 %inc.us.us, %o
+  br i1 %exitcond, label %for.inc11.us.us, label %for.body6.us.us
+
+for.end16:                                        ; preds = %for.cond1.preheader.lr.ph, %for.inc14.us, %entry
+  ret void
+}
diff --git a/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll b/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
new file mode 100644
index 0000000..a52a4c9
--- /dev/null
+++ b/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
@@ -0,0 +1,68 @@
+; RUN: opt < %s -analyze -delinearize | FileCheck %s
+
+; void foo(long n, long m, long o, double A[n][m][o], long p, long q, long r) {
+;
+;   for (long i = 0; i < n; i++)
+;     for (long j = 0; j < m; j++)
+;       for (long k = 0; k < o; k++)
+;         A[i+p][j+q][k+r] = 1.0;
+; }
+
+; AddRec: {{{((8 * ((((%m * %p) + %q) * %o) + %r)) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{%p,+,1}<nw><%for.i>][{%q,+,1}<nw><%for.j>][{%r,+,1}<nw><%for.k>]
+
+; AddRec: {{(-8 + (8 * ((((%m * %p) + %q) * %o) + %r)) + (8 * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][%o] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{(1 + (%m * %p) + %q),+,%m}<%for.i>][{(-1 + %r),+,%o}<%for.j>]
+
+; AddRec: {(-8 + (8 * ((((%m * %p) + %q) * %o) + %r)) + (8 * %m * %o) + %A),+,(8 * %m * %o)}<%for.i>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{(-1 + ((((1 + %p) * %m) + %q) * %o) + %r),+,(%m * %o)}<%for.i>]
+
+define void @foo(i64 %n, i64 %m, i64 %o, double* %A, i64 %p, i64 %q, i64 %r) {
+entry:
+  br label %for.i
+
+for.i:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
+  br label %for.j
+
+for.j:
+  %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j.inc ]
+  br label %for.k
+
+for.k:
+  %k = phi i64 [ 0, %for.j ], [ %k.inc, %for.k.inc ]
+  %offset0 = add nsw i64 %i, %p
+  %subscript0 = mul i64 %offset0, %m
+  %offset1 = add nsw i64 %j, %q
+  %subscript1 = add i64 %offset1, %subscript0
+  %subscript2 = mul i64 %subscript1, %o
+  %offset2 = add nsw i64 %k, %r
+  %subscript = add i64 %subscript2, %offset2
+  %idx = getelementptr inbounds double* %A, i64 %subscript
+  store double 1.0, double* %idx
+  br label %for.k.inc
+
+for.k.inc:
+  %k.inc = add nsw i64 %k, 1
+  %k.exitcond = icmp eq i64 %k.inc, %o
+  br i1 %k.exitcond, label %for.j.inc, label %for.k
+
+for.j.inc:
+  %j.inc = add nsw i64 %j, 1
+  %j.exitcond = icmp eq i64 %j.inc, %m
+  br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:
+  %i.inc = add nsw i64 %i, 1
+  %i.exitcond = icmp eq i64 %i.inc, %n
+  br i1 %i.exitcond, label %end, label %for.i
+
+end:
+  ret void
+}
diff --git a/test/Analysis/Delinearization/multidim_only_ivs_2d.ll b/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
new file mode 100644
index 0000000..d68a158
--- /dev/null
+++ b/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -analyze -delinearize | FileCheck %s
+
+; Derived from the following code:
+;
+; void foo(long n, long m, double A[n][m]) {
+;   for (long i = 0; i < n; i++)
+;     for (long j = 0; j < m; j++)
+;       A[i][j] = 1.0;
+; }
+
+; AddRec: {{%A,+,(8 * %m)}<%for.i>,+,8}<%for.j>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][%m] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>]
+
+; AddRec: {(-8 + (8 * %m) + %A),+,(8 * %m)}<%for.i>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{(-1 + %m),+,%m}<%for.i>]
+
+define void @foo(i64 %n, i64 %m, double* %A) {
+entry:
+  br label %for.i
+
+for.i:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
+  %tmp = mul nsw i64 %i, %m
+  br label %for.j
+
+for.j:
+  %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j ]
+  %vlaarrayidx.sum = add i64 %j, %tmp
+  %arrayidx = getelementptr inbounds double* %A, i64 %vlaarrayidx.sum
+  store double 1.0, double* %arrayidx
+  %j.inc = add nsw i64 %j, 1
+  %j.exitcond = icmp eq i64 %j.inc, %m
+  br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:
+  %i.inc = add nsw i64 %i, 1
+  %i.exitcond = icmp eq i64 %i.inc, %n
+  br i1 %i.exitcond, label %end, label %for.i
+
+end:
+  ret void
+}
diff --git a/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll b/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll
new file mode 100644
index 0000000..7207420
--- /dev/null
+++ b/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll
@@ -0,0 +1,78 @@
+; RUN: opt < %s -analyze -delinearize | FileCheck %s
+
+; extern void bar(long n, long m, double A[n][m]);
+;
+; void foo(long a, long b) {
+;   for (long n = 1; n < a; ++n)
+;   for (long m = 1; m < b; ++m) {
+;     double A[n][m];
+;     for (long i = 0; i < n; i++)
+;       for (long j = 0; j < m; j++)
+;         A[i][j] = 1.0;
+;     bar(n, m, A);
+;   }
+; }
+
+; AddRec: {{%vla.us,+,{8,+,8}<%for.cond7.preheader.lr.ph.split.us.us>}<%for.body9.lr.ph.us.us>,+,8}<%for.body9.us.us>
+; CHECK: Base offset: %vla.us
+; CHECK: ArrayDecl[UnknownSize][{1,+,1}<%for.cond7.preheader.lr.ph.split.us.us>] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.body9.lr.ph.us.us>][{0,+,1}<nuw><nsw><%for.body9.us.us>]
+
+define void @foo(i64 %a, i64 %b) nounwind uwtable {
+entry:
+  %cmp43 = icmp sgt i64 %a, 1
+  br i1 %cmp43, label %for.cond1.preheader.lr.ph, label %for.end19
+
+for.cond1.preheader.lr.ph:                        ; preds = %entry
+  %cmp224 = icmp sgt i64 %b, 1
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc17, %for.cond1.preheader.lr.ph
+  %indvars.iv51 = phi i64 [ 1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next52, %for.inc17 ]
+  br i1 %cmp224, label %for.cond7.preheader.lr.ph.split.us.us, label %for.inc17
+
+for.end13.us:                                     ; preds = %for.inc11.us.us
+  call void @bar(i64 %indvars.iv51, i64 %indvars.iv48, double* %vla.us) nounwind
+  call void @llvm.stackrestore(i8* %1)
+  %indvars.iv.next49 = add i64 %indvars.iv48, 1
+  %exitcond54 = icmp eq i64 %indvars.iv.next49, %b
+  br i1 %exitcond54, label %for.inc17, label %for.cond7.preheader.lr.ph.split.us.us
+
+for.inc11.us.us:                                  ; preds = %for.body9.us.us
+  %inc12.us.us = add nsw i64 %i.023.us.us, 1
+  %exitcond53 = icmp eq i64 %inc12.us.us, %indvars.iv51
+  br i1 %exitcond53, label %for.end13.us, label %for.body9.lr.ph.us.us
+
+for.body9.lr.ph.us.us:                            ; preds = %for.cond7.preheader.lr.ph.split.us.us, %for.inc11.us.us
+  %i.023.us.us = phi i64 [ 0, %for.cond7.preheader.lr.ph.split.us.us ], [ %inc12.us.us, %for.inc11.us.us ]
+  %0 = mul nsw i64 %i.023.us.us, %indvars.iv48
+  br label %for.body9.us.us
+
+for.body9.us.us:                                  ; preds = %for.body9.us.us, %for.body9.lr.ph.us.us
+  %j.021.us.us = phi i64 [ 0, %for.body9.lr.ph.us.us ], [ %inc.us.us, %for.body9.us.us ]
+  %arrayidx.sum.us.us = add i64 %j.021.us.us, %0
+  %arrayidx10.us.us = getelementptr inbounds double* %vla.us, i64 %arrayidx.sum.us.us
+  store double 1.000000e+00, double* %arrayidx10.us.us, align 8
+  %inc.us.us = add nsw i64 %j.021.us.us, 1
+  %exitcond50 = icmp eq i64 %inc.us.us, %indvars.iv48
+  br i1 %exitcond50, label %for.inc11.us.us, label %for.body9.us.us
+
+for.cond7.preheader.lr.ph.split.us.us:            ; preds = %for.cond1.preheader, %for.end13.us
+  %indvars.iv48 = phi i64 [ %indvars.iv.next49, %for.end13.us ], [ 1, %for.cond1.preheader ]
+  %1 = call i8* @llvm.stacksave()
+  %2 = mul nuw i64 %indvars.iv48, %indvars.iv51
+  %vla.us = alloca double, i64 %2, align 16
+  br label %for.body9.lr.ph.us.us
+
+for.inc17:                                        ; preds = %for.end13.us, %for.cond1.preheader
+  %indvars.iv.next52 = add i64 %indvars.iv51, 1
+  %exitcond55 = icmp eq i64 %indvars.iv.next52, %a
+  br i1 %exitcond55, label %for.end19, label %for.cond1.preheader
+
+for.end19:                                        ; preds = %for.inc17, %entry
+  ret void
+}
+
+declare i8* @llvm.stacksave() nounwind
+declare void @bar(i64, i64, double*)
+declare void @llvm.stackrestore(i8*) nounwind
diff --git a/test/Analysis/Delinearization/multidim_only_ivs_3d.ll b/test/Analysis/Delinearization/multidim_only_ivs_3d.ll
new file mode 100644
index 0000000..24f9583
--- /dev/null
+++ b/test/Analysis/Delinearization/multidim_only_ivs_3d.ll
@@ -0,0 +1,65 @@
+; RUN: opt < %s -analyze -delinearize | FileCheck %s
+
+; void foo(long n, long m, long o, double A[n][m][o]) {
+;
+;   for (long i = 0; i < n; i++)
+;     for (long j = 0; j < m; j++)
+;       for (long k = 0; k < o; k++)
+;         A[i][j][k] = 1.0;
+; }
+
+; AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>][{0,+,1}<nuw><nsw><%for.k>]
+
+; AddRec: {{(-8 + (8 * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][(%m * %o)] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{(-1 + %o),+,%o}<%for.j>]
+
+; AddRec: {(-8 + (8 * %m * %o) + %A),+,(8 * %m * %o)}<%for.i>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{(-1 + (%m * %o)),+,(%m * %o)}<%for.i>]
+
+define void @foo(i64 %n, i64 %m, i64 %o, double* %A) {
+entry:
+  br label %for.i
+
+for.i:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
+  br label %for.j
+
+for.j:
+  %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j.inc ]
+  br label %for.k
+
+for.k:
+  %k = phi i64 [ 0, %for.j ], [ %k.inc, %for.k.inc ]
+  %subscript0 = mul i64 %i, %m
+  %subscript1 = add i64 %j, %subscript0
+  %subscript2 = mul i64 %subscript1, %o
+  %subscript = add i64 %subscript2, %k
+  %idx = getelementptr inbounds double* %A, i64 %subscript
+  store double 1.0, double* %idx
+  br label %for.k.inc
+
+for.k.inc:
+  %k.inc = add nsw i64 %k, 1
+  %k.exitcond = icmp eq i64 %k.inc, %o
+  br i1 %k.exitcond, label %for.j.inc, label %for.k
+
+for.j.inc:
+  %j.inc = add nsw i64 %j, 1
+  %j.exitcond = icmp eq i64 %j.inc, %m
+  br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:
+  %i.inc = add nsw i64 %i, 1
+  %i.exitcond = icmp eq i64 %i.inc, %n
+  br i1 %i.exitcond, label %end, label %for.i
+
+end:
+  ret void
+}
diff --git a/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll b/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll
new file mode 100644
index 0000000..e151610
--- /dev/null
+++ b/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -analyze -delinearize | FileCheck %s
+; void foo(int n, int m, int o, double A[n][m][o]) {
+;
+;   for (int i = 0; i < n; i++)
+;     for (int j = 0; j < m; j++)
+;       for (int k = 0; k < o; k++)
+;         A[i][j][k] = 1.0;
+; }
+
+; AddRec: {{{%A,+,(8 * (zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>,+,(8 * (zext i32 %o to i64))}<%for.j>,+,8}<%for.k>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][(zext i32 %m to i64)][(zext i32 %o to i64)] with elements of 8 bytes.
+; CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
+
+; AddRec: {{((8 * (zext i32 (-1 + %o) to i64)) + %A),+,(8 * (zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>,+,(8 * (zext i32 %o to i64))}<%for.j>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][((zext i32 %m to i64) * (zext i32 %o to i64))] with elements of 8 bytes.
+; CHECK: ArrayRef[{0,+,1}<%for.i>][{(zext i32 (-1 + %o) to i64),+,(zext i32 %o to i64)}<%for.j>]
+
+; AddRec: {((8 * (zext i32 (-1 + %o) to i64)) + (8 * (zext i32 (-1 + %m) to i64) * (zext i32 %o to i64)) + %A),+,(8 * (zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize] with elements of 8 bytes.
+; CHECK: ArrayRef[{((zext i32 (-1 + %o) to i64) + ((zext i32 (-1 + %m) to i64) * (zext i32 %o to i64))),+,((zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>]
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo(i32 %n, i32 %m, i32 %o, double* %A) {
+entry:
+  %m_zext = zext i32 %m to i64
+  %n_zext = zext i32 %o to i64
+  br label %for.i
+
+for.i:
+  %i = phi i64 [ %i.inc, %for.i.inc ], [ 0, %entry ]
+  br label %for.j
+
+for.j:
+  %j = phi i64 [ %j.inc, %for.j.inc ], [ 0, %for.i ]
+  br label %for.k
+
+for.k:
+  %k = phi i64 [ %k.inc, %for.k.inc ], [ 0, %for.j ]
+  %tmp = mul i64 %i, %m_zext
+  %tmp1 = trunc i64 %j to i32
+  %tmp2 = trunc i64 %i to i32
+  %mul.us.us = mul nsw i32 %tmp1, %tmp2
+  %tmp.us.us = add i64 %j, %tmp
+  %tmp17.us.us = mul i64 %tmp.us.us, %n_zext
+  %subscript = add i64 %tmp17.us.us, %k
+  %idx = getelementptr inbounds double* %A, i64 %subscript
+  store double 1.0, double* %idx
+  br label %for.k.inc
+
+for.k.inc:
+  %k.inc = add i64 %k, 1
+  %k.inc.trunc = trunc i64 %k.inc to i32
+  %k.exitcond = icmp eq i32 %k.inc.trunc, %o
+  br i1 %k.exitcond, label %for.j.inc, label %for.k
+
+for.j.inc:
+  %j.inc = add i64 %j, 1
+  %j.inc.trunc = trunc i64 %j.inc to i32
+  %j.exitcond = icmp eq i32 %j.inc.trunc, %m
+  br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:
+  %i.inc = add i64 %i, 1
+  %i.inc.trunc = trunc i64 %i.inc to i32
+  %i.exitcond = icmp eq i32 %i.inc.trunc, %n
+  br i1 %i.exitcond, label %end, label %for.i
+
+end:
+  ret void
+}
diff --git a/test/Analysis/DependenceAnalysis/Banerjee.ll b/test/Analysis/DependenceAnalysis/Banerjee.ll
index 003ee03..09e8fd2 100644
--- a/test/Analysis/DependenceAnalysis/Banerjee.ll
+++ b/test/Analysis/DependenceAnalysis/Banerjee.ll
@@ -13,7 +13,7 @@ target triple = "x86_64-apple-macosx10.6.0"
 define void @banerjee0(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
-
+; CHECK: 'Dependence Analysis' for function 'banerjee0':
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [<= <>]!
 ; CHECK: da analyze - confused!
@@ -65,6 +65,7 @@ entry:
   %cmp4 = icmp sgt i64 %n, 0
   br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end9
 
+; CHECK: 'Dependence Analysis' for function 'banerjee1':
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - flow [* <>]!
 ; CHECK: da analyze - confused!
@@ -131,6 +132,7 @@ define void @banerjee2(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'banerjee2':
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -181,6 +183,7 @@ define void @banerjee3(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'banerjee3':
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [> >]!
 ; CHECK: da analyze - confused!
@@ -231,6 +234,7 @@ define void @banerjee4(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'banerjee4':
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -281,6 +285,7 @@ define void @banerjee5(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'banerjee5':
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [< <]!
 ; CHECK: da analyze - confused!
@@ -331,6 +336,7 @@ define void @banerjee6(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'banerjee6':
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [=> <>]!
 ; CHECK: da analyze - confused!
@@ -381,6 +387,7 @@ define void @banerjee7(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'banerjee7':
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [> <=]!
 ; CHECK: da analyze - confused!
@@ -431,6 +438,7 @@ define void @banerjee8(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'banerjee8':
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [> <>]!
 ; CHECK: da analyze - confused!
@@ -481,6 +489,7 @@ define void @banerjee9(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'banerjee9':
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - flow [<= =|<]!
 ; CHECK: da analyze - confused!
@@ -532,6 +541,7 @@ define void @banerjee10(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'banerjee10':
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [<> =]!
 ; CHECK: da analyze - confused!
@@ -582,6 +592,7 @@ define void @banerjee11(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'banerjee11':
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [<= <>]!
 ; CHECK: da analyze - confused!
@@ -632,6 +643,7 @@ define void @banerjee12(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'banerjee12':
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [= <>]!
 ; CHECK: da analyze - confused!
diff --git a/test/Analysis/DependenceAnalysis/GCD.ll b/test/Analysis/DependenceAnalysis/GCD.ll
index a422124..bb31d11 100644
--- a/test/Analysis/DependenceAnalysis/GCD.ll
+++ b/test/Analysis/DependenceAnalysis/GCD.ll
@@ -14,6 +14,7 @@ define void @gcd0(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'gcd0'
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - flow [=> *|<]!
 ; CHECK: da analyze - confused!
@@ -66,6 +67,7 @@ define void @gcd1(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'gcd1'
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -119,6 +121,7 @@ define void @gcd2(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'gcd2'
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -172,6 +175,7 @@ define void @gcd3(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'gcd3'
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - flow [<> *]!
 ; CHECK: da analyze - confused!
@@ -223,6 +227,7 @@ define void @gcd4(i32* %A, i32* %B, i64 %M, i64 %N) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'gcd4'
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -284,6 +289,7 @@ define void @gcd5(i32* %A, i32* %B, i64 %M, i64 %N) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'gcd5'
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - flow [<> *]!
 ; CHECK: da analyze - confused!
@@ -346,6 +352,7 @@ entry:
   %cmp4 = icmp sgt i64 %n, 0
   br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end12
 
+; CHECK: 'Dependence Analysis' for function 'gcd6'
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -417,6 +424,7 @@ entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end15
 
+; CHECK: 'Dependence Analysis' for function 'gcd7'
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - flow [* *|<]!
 ; CHECK: da analyze - confused!
@@ -500,6 +508,7 @@ entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end15
 
+; CHECK: 'Dependence Analysis' for function 'gcd8'
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -578,6 +587,7 @@ entry:
   %cmp4 = icmp eq i32 %n, 0
   br i1 %cmp4, label %for.end15, label %for.cond1.preheader.preheader
 
+; CHECK: 'Dependence Analysis' for function 'gcd9'
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - flow [* *|<]!
 ; CHECK: da analyze - confused!
diff --git a/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll b/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
index 81e6189..5443909 100644
--- a/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
+++ b/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
@@ -15,6 +15,7 @@ entry:
   %cmp4 = icmp eq i64 %n1, 0
   br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
 
+; CHECK: 'Dependence Analysis' for function 'symbolicrdiv0'
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -77,6 +78,7 @@ entry:
   %cmp4 = icmp eq i64 %n1, 0
   br i1 %cmp4, label %for.cond2.preheader, label %for.body.preheader
 
+; CHECK: 'Dependence Analysis' for function 'symbolicrdiv1'
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -141,6 +143,7 @@ entry:
   %cmp4 = icmp eq i64 %n1, 0
   br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
 
+; CHECK: 'Dependence Analysis' for function 'symbolicrdiv2'
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -203,6 +206,7 @@ entry:
   %cmp4 = icmp eq i64 %n1, 0
   br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
 
+; CHECK: 'Dependence Analysis' for function 'symbolicrdiv3'
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -263,6 +267,7 @@ entry:
   %cmp4 = icmp eq i64 %n1, 0
   br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
 
+; CHECK: 'Dependence Analysis' for function 'symbolicrdiv4'
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -324,6 +329,7 @@ entry:
   %cmp4 = icmp eq i64 %n1, 0
   br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
 
+; CHECK: 'Dependence Analysis' for function 'symbolicrdiv5'
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -385,6 +391,7 @@ entry:
   %cmp4 = icmp eq i64 %n1, 0
   br i1 %cmp4, label %for.end7, label %for.cond1.preheader.preheader
 
+; CHECK: 'Dependence Analysis' for function 'symbolicrdiv6'
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
diff --git a/test/Analysis/Dominators/lit.local.cfg b/test/Analysis/Dominators/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Analysis/Dominators/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/GlobalsModRef/lit.local.cfg b/test/Analysis/GlobalsModRef/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Analysis/GlobalsModRef/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/Lint/check-zero-divide.ll b/test/Analysis/Lint/check-zero-divide.ll
new file mode 100644
index 0000000..f4e79ed
--- /dev/null
+++ b/test/Analysis/Lint/check-zero-divide.ll
@@ -0,0 +1,78 @@
+; RUN: opt -lint -disable-output %s 2>&1 | FileCheck %s
+
+define <2 x i32> @use_vector_sdiv(<2 x i32> %a) nounwind {
+  %b = sdiv <2 x i32> %a, <i32 5, i32 8>
+  ret <2 x i32> %b
+}
+
+define <2 x i32> @use_vector_srem(<2 x i32> %a) nounwind {
+  %b = srem <2 x i32> %a, <i32 5, i32 8>
+  ret <2 x i32> %b
+}
+
+define <2 x i32> @use_vector_udiv(<2 x i32> %a) nounwind {
+  %b = udiv <2 x i32> %a, <i32 5, i32 8>
+  ret <2 x i32> %b
+}
+
+define <2 x i32> @use_vector_urem(<2 x i32> %a) nounwind {
+  %b = urem <2 x i32> %a, <i32 5, i32 8>
+  ret <2 x i32> %b
+}
+
+define i32 @use_sdiv_by_zero(i32 %a) nounwind {
+; CHECK: Undefined behavior: Division by zero
+; CHECK-NEXT: %b = sdiv i32 %a, 0
+  %b = sdiv i32 %a, 0
+  ret i32 %b
+}
+
+define i32 @use_sdiv_by_zeroinitializer(i32 %a) nounwind {
+; CHECK: Undefined behavior: Division by zero
+; CHECK-NEXT: %b = sdiv i32 %a, 0
+  %b = sdiv i32 %a, zeroinitializer
+   ret i32 %b
+}
+
+define <2 x i32> @use_vector_sdiv_by_zero_x(<2 x i32> %a) nounwind {
+; CHECK: Undefined behavior: Division by zero
+; CHECK-NEXT: %b = sdiv <2 x i32> %a, <i32 0, i32 5>
+  %b = sdiv <2 x i32> %a, <i32 0, i32 5>
+  ret <2 x i32> %b
+}
+
+define <2 x i32> @use_vector_sdiv_by_zero_y(<2 x i32> %a) nounwind {
+; CHECK: Undefined behavior: Division by zero
+; CHECK-NEXT:  %b = sdiv <2 x i32> %a, <i32 4, i32 0>
+  %b = sdiv <2 x i32> %a, <i32 4, i32 0>
+  ret <2 x i32> %b
+}
+
+define <2 x i32> @use_vector_sdiv_by_zero_xy(<2 x i32> %a) nounwind {
+; CHECK: Undefined behavior: Division by zero
+; CHECK-NEXT: %b = sdiv <2 x i32> %a, zeroinitializer
+  %b = sdiv <2 x i32> %a, <i32 0, i32 0>
+  ret <2 x i32> %b
+}
+
+define <2 x i32> @use_vector_sdiv_by_undef_x(<2 x i32> %a) nounwind {
+; CHECK: Undefined behavior: Division by zero
+; CHECK-NEXT: %b = sdiv <2 x i32> %a, <i32 undef, i32 5>
+  %b = sdiv <2 x i32> %a, <i32 undef, i32 5>
+  ret <2 x i32> %b
+}
+
+define <2 x i32> @use_vector_sdiv_by_undef_y(<2 x i32> %a) nounwind {
+; CHECK: Undefined behavior: Division by zero
+; CHECK-NEXT: %b = sdiv <2 x i32> %a, <i32 5, i32 undef>
+  %b = sdiv <2 x i32> %a, <i32 5, i32 undef>
+  ret <2 x i32> %b
+}
+
+define <2 x i32> @use_vector_sdiv_by_undef_xy(<2 x i32> %a) nounwind {
+; CHECK: Undefined behavior: Division by zero
+; CHECK-NEXT: %b = sdiv <2 x i32> %a, undef
+  %b = sdiv <2 x i32> %a, <i32 undef, i32 undef>
+  ret <2 x i32> %b
+}
+
diff --git a/test/Analysis/DependenceAnalysis/lit.local.cfg b/test/Analysis/Lint/lit.local.cfg
index c6106e4..c6106e4 100644
--- a/test/Analysis/DependenceAnalysis/lit.local.cfg
+++ b/test/Analysis/Lint/lit.local.cfg
diff --git a/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll b/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll
index 7119007..a87bab7 100644
--- a/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll
+++ b/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll
@@ -1,8 +1,9 @@
 ; This testcase was incorrectly computing that the loopentry.7 loop was
 ; not a child of the loopentry.6 loop.
 ;
-; RUN: opt < %s -analyze -loops | \
-; RUN:   grep "^            Loop at depth 4 containing: %loopentry.7<header><latch><exiting>"
+; RUN: opt < %s -analyze -loops | FileCheck %s
+
+; CHECK: Loop at depth 4 containing: %loopentry.7<header><latch><exiting>
 
 define void @getAndMoveToFrontDecode() {
 	br label %endif.2
diff --git a/test/Analysis/LoopInfo/lit.local.cfg b/test/Analysis/LoopInfo/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Analysis/LoopInfo/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/MemoryDependenceAnalysis/lit.local.cfg b/test/Analysis/MemoryDependenceAnalysis/lit.local.cfg
deleted file mode 100644
index c6106e4..0000000
--- a/test/Analysis/MemoryDependenceAnalysis/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Analysis/PostDominators/lit.local.cfg b/test/Analysis/PostDominators/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Analysis/PostDominators/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/PostDominators/pr1098.ll b/test/Analysis/PostDominators/pr1098.ll
index afb4776..2eed213 100644
--- a/test/Analysis/PostDominators/pr1098.ll
+++ b/test/Analysis/PostDominators/pr1098.ll
@@ -1,7 +1,8 @@
-; RUN: opt < %s -postdomtree -analyze | grep entry
+; RUN: opt < %s -postdomtree -analyze | FileCheck %s
 ; PR932
 
 define void @foo(i1 %x) {
+; CHECK: entry
 entry:
         br i1 %x, label %bb1, label %bb0
 bb0:            ; preds = %entry, bb0
diff --git a/test/Analysis/Profiling/edge-profiling.ll b/test/Analysis/Profiling/edge-profiling.ll
deleted file mode 100644
index cbaf476..0000000
--- a/test/Analysis/Profiling/edge-profiling.ll
+++ /dev/null
@@ -1,139 +0,0 @@
-; Test the edge profiling instrumentation.
-; RUN: opt < %s -insert-edge-profiling -S | FileCheck %s
-
-; ModuleID = '<stdin>'
-
-@.str = private constant [12 x i8] c"hello world\00", align 1 ; <[12 x i8]*> [#uses=1]
-@.str1 = private constant [6 x i8] c"franz\00", align 1 ; <[6 x i8]*> [#uses=1]
-@.str2 = private constant [9 x i8] c"argc > 2\00", align 1 ; <[9 x i8]*> [#uses=1]
-@.str3 = private constant [9 x i8] c"argc = 1\00", align 1 ; <[9 x i8]*> [#uses=1]
-@.str4 = private constant [6 x i8] c"fritz\00", align 1 ; <[6 x i8]*> [#uses=1]
-@.str5 = private constant [10 x i8] c"argc <= 1\00", align 1 ; <[10 x i8]*> [#uses=1]
-; CHECK:@EdgeProfCounters
-; CHECK:[19 x i32] 
-; CHECK:zeroinitializer
-
-define void @oneblock() nounwind {
-entry:
-; CHECK:entry:
-; CHECK:%OldFuncCounter
-; CHECK:load 
-; CHECK:getelementptr
-; CHECK:@EdgeProfCounters
-; CHECK:i32 0
-; CHECK:i32 0
-; CHECK:%NewFuncCounter
-; CHECK:add
-; CHECK:%OldFuncCounter
-; CHECK:store 
-; CHECK:%NewFuncCounter
-; CHECK:getelementptr
-; CHECK:@EdgeProfCounters
-  %0 = call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  ret void
-}
-
-declare i32 @puts(i8*)
-
-define i32 @main(i32 %argc, i8** %argv) nounwind {
-entry:
-; CHECK:entry:
-  %argc_addr = alloca i32                         ; <i32*> [#uses=4]
-  %argv_addr = alloca i8**                        ; <i8***> [#uses=1]
-  %retval = alloca i32                            ; <i32*> [#uses=2]
-  %j = alloca i32                                 ; <i32*> [#uses=4]
-  %i = alloca i32                                 ; <i32*> [#uses=4]
-  %0 = alloca i32                                 ; <i32*> [#uses=2]
-; CHECK:call 
-; CHECK:@llvm_start_edge_profiling
-; CHECK:@EdgeProfCounters
-  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  store i32 %argc, i32* %argc_addr
-  store i8** %argv, i8*** %argv_addr
-  store i32 0, i32* %i, align 4
-  br label %bb10
-
-bb:                                               ; preds = %bb10
-; CHECK:bb:
-  %1 = load i32* %argc_addr, align 4              ; <i32> [#uses=1]
-  %2 = icmp sgt i32 %1, 1                         ; <i1> [#uses=1]
-  br i1 %2, label %bb1, label %bb8
-
-bb1:                                              ; preds = %bb
-; CHECK:bb1:
-  store i32 0, i32* %j, align 4
-  br label %bb6
-
-bb2:                                              ; preds = %bb6
-; CHECK:bb2:
-  %3 = call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  %4 = load i32* %argc_addr, align 4              ; <i32> [#uses=1]
-  %5 = icmp sgt i32 %4, 2                         ; <i1> [#uses=1]
-  br i1 %5, label %bb3, label %bb4
-
-bb3:                                              ; preds = %bb2
-; CHECK:bb3:
-  %6 = call i32 @puts(i8* getelementptr inbounds ([9 x i8]* @.str2, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  br label %bb5
-
-bb4:                                              ; preds = %bb2
-; CHECK:bb4:
-  %7 = call i32 @puts(i8* getelementptr inbounds ([9 x i8]* @.str3, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  br label %bb11
-
-bb5:                                              ; preds = %bb3
-; CHECK:bb5:
-  %8 = call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str4, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  %9 = load i32* %j, align 4                      ; <i32> [#uses=1]
-  %10 = add nsw i32 %9, 1                         ; <i32> [#uses=1]
-  store i32 %10, i32* %j, align 4
-  br label %bb6
-
-bb6:                                              ; preds = %bb5, %bb1
-; CHECK:bb6:
-  %11 = load i32* %j, align 4                     ; <i32> [#uses=1]
-  %12 = load i32* %argc_addr, align 4             ; <i32> [#uses=1]
-  %13 = icmp slt i32 %11, %12                     ; <i1> [#uses=1]
-  br i1 %13, label %bb2, label %bb7
-
-bb7:                                              ; preds = %bb6
-; CHECK:bb7:
-  br label %bb9
-
-bb8:                                              ; preds = %bb
-; CHECK:bb8:
-  %14 = call i32 @puts(i8* getelementptr inbounds ([10 x i8]* @.str5, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  br label %bb9
-
-bb9:                                              ; preds = %bb8, %bb7
-; CHECK:bb9:
-  %15 = load i32* %i, align 4                     ; <i32> [#uses=1]
-  %16 = add nsw i32 %15, 1                        ; <i32> [#uses=1]
-  store i32 %16, i32* %i, align 4
-  br label %bb10
-
-bb10:                                             ; preds = %bb9, %entry
-; CHECK:bb10:
-  %17 = load i32* %i, align 4                     ; <i32> [#uses=1]
-  %18 = icmp ne i32 %17, 3                        ; <i1> [#uses=1]
-  br i1 %18, label %bb, label %bb11
-; CHECK:br
-; CHECK:label %bb10.bb11_crit_edge
-
-; CHECK:bb10.bb11_crit_edge:
-; CHECK:br
-; CHECK:label %bb11
-
-bb11:                                             ; preds = %bb10, %bb4
-; CHECK:bb11:
-  call void @oneblock() nounwind
-  store i32 0, i32* %0, align 4
-  %19 = load i32* %0, align 4                     ; <i32> [#uses=1]
-  store i32 %19, i32* %retval, align 4
-  br label %return
-
-return:                                           ; preds = %bb11
-; CHECK:return:
-  %retval12 = load i32* %retval                   ; <i32> [#uses=1]
-  ret i32 %retval12
-}
diff --git a/test/Analysis/Profiling/lit.local.cfg b/test/Analysis/Profiling/lit.local.cfg
deleted file mode 100644
index d40fa4f..0000000
--- a/test/Analysis/Profiling/lit.local.cfg
+++ /dev/null
@@ -1,11 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-if 'hexagon' in root.target_triple:
-    config.unsupported = True
diff --git a/test/Analysis/Profiling/load-branch-weights-ifs.ll b/test/Analysis/Profiling/load-branch-weights-ifs.ll
deleted file mode 100644
index 7ed090b..0000000
--- a/test/Analysis/Profiling/load-branch-weights-ifs.ll
+++ /dev/null
@@ -1,122 +0,0 @@
-; RUN: opt -insert-edge-profiling -o %t1 < %s
-; RUN: rm -f %t1.prof_data
-; RUN: lli %defaultjit -load %llvmshlibdir/libprofile_rt%shlibext %t1 \
-; RUN:     -llvmprof-output %t1.prof_data
-; RUN: opt -profile-file %t1.prof_data -profile-metadata-loader -S -o - < %s \
-; RUN:     | FileCheck %s
-; RUN: rm -f %t1.prof_data
-
-; FIXME: profile_rt.dll could be built on win32.
-; REQUIRES: loadable_module
-
-;; func_mod - Branch taken 6 times in 7.
-define i32 @func_mod(i32 %N) nounwind uwtable {
-entry:
-  %retval = alloca i32, align 4
-  %N.addr = alloca i32, align 4
-  store i32 %N, i32* %N.addr, align 4
-  %0 = load i32* %N.addr, align 4
-  %rem = srem i32 %0, 7
-  %tobool = icmp ne i32 %rem, 0
-  br i1 %tobool, label %if.then, label %if.else
-; CHECK: br i1 %tobool, label %if.then, label %if.else, !prof !0
-
-if.then:
-  store i32 1, i32* %retval
-  br label %return
-
-if.else:
-  store i32 0, i32* %retval
-  br label %return
-
-return:
-  %1 = load i32* %retval
-  ret i32 %1
-}
-
-;; func_const_true - conditional branch which 100% taken probability.
-define i32 @func_const_true(i32 %N) nounwind uwtable {
-entry:
-  %retval = alloca i32, align 4
-  %N.addr = alloca i32, align 4
-  store i32 %N, i32* %N.addr, align 4
-  %0 = load i32* %N.addr, align 4
-  %cmp = icmp eq i32 %0, 1
-  br i1 %cmp, label %if.then, label %if.end
-; CHECK: br i1 %cmp, label %if.then, label %if.end, !prof !1
-
-if.then:
-  store i32 1, i32* %retval
-  br label %return
-
-if.end:
-  store i32 0, i32* %retval
-  br label %return
-
-return:
-  %1 = load i32* %retval
-  ret i32 %1
-}
-
-;; func_const_true - conditional branch which 100% not-taken probability.
-define i32 @func_const_false(i32 %N) nounwind uwtable {
-entry:
-  %retval = alloca i32, align 4
-  %N.addr = alloca i32, align 4
-  store i32 %N, i32* %N.addr, align 4
-  %0 = load i32* %N.addr, align 4
-  %cmp = icmp eq i32 %0, 1
-  br i1 %cmp, label %if.then, label %if.end
-; CHECK: br i1 %cmp, label %if.then, label %if.end, !prof !2
-
-if.then:
-  store i32 1, i32* %retval
-  br label %return
-
-if.end:
-  store i32 0, i32* %retval
-  br label %return
-
-return:
-  %1 = load i32* %retval
-  ret i32 %1
-}
-
-define i32 @main(i32 %argc, i8** %argv) nounwind uwtable {
-entry:
-  %retval = alloca i32, align 4
-  %argc.addr = alloca i32, align 4
-  %argv.addr = alloca i8**, align 8
-  %loop = alloca i32, align 4
-  store i32 0, i32* %retval
-  store i32 0, i32* %loop, align 4
-  br label %for.cond
-
-for.cond:
-  %0 = load i32* %loop, align 4
-  %cmp = icmp slt i32 %0, 7000
-  br i1 %cmp, label %for.body, label %for.end
-; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !3
-
-for.body:
-  %1 = load i32* %loop, align 4
-  %call = call i32 @func_mod(i32 %1)
-  br label %for.inc
-
-for.inc:
-  %2 = load i32* %loop, align 4
-  %inc = add nsw i32 %2, 1
-  store i32 %inc, i32* %loop, align 4
-  br label %for.cond
-
-for.end:
-  %call1 = call i32 @func_const_true(i32 1)
-  %call2 = call i32 @func_const_false(i32 0)
-  ret i32 0
-}
-
-; CHECK: !0 = metadata !{metadata !"branch_weights", i32 6000, i32 1000}
-; CHECK: !1 = metadata !{metadata !"branch_weights", i32 1, i32 0}
-; CHECK: !2 = metadata !{metadata !"branch_weights", i32 0, i32 1}
-; CHECK: !3 = metadata !{metadata !"branch_weights", i32 7000, i32 1}
-; CHECK-NOT: !4
diff --git a/test/Analysis/Profiling/load-branch-weights-loops.ll b/test/Analysis/Profiling/load-branch-weights-loops.ll
deleted file mode 100644
index 9d1925a..0000000
--- a/test/Analysis/Profiling/load-branch-weights-loops.ll
+++ /dev/null
@@ -1,188 +0,0 @@
-; RUN: opt -insert-edge-profiling -o %t1 < %s
-; RUN: rm -f %t1.prof_data
-; RUN: lli %defaultjit -load %llvmshlibdir/libprofile_rt%shlibext %t1 \
-; RUN:     -llvmprof-output %t1.prof_data
-; RUN: opt -profile-file %t1.prof_data -profile-metadata-loader -S -o - < %s \
-; RUN:     | FileCheck %s
-; RUN: rm -f %t1.prof_data
-
-; FIXME: profile_rt.dll could be built on win32.
-; REQUIRES: loadable_module
-
-;; func_for - Test branch probabilities for a vanilla for loop.
-define i32 @func_for(i32 %N) nounwind uwtable {
-entry:
-  %N.addr = alloca i32, align 4
-  %ret = alloca i32, align 4
-  %loop = alloca i32, align 4
-  store i32 %N, i32* %N.addr, align 4
-  store i32 0, i32* %ret, align 4
-  store i32 0, i32* %loop, align 4
-  br label %for.cond
-
-for.cond:
-  %0 = load i32* %loop, align 4
-  %1 = load i32* %N.addr, align 4
-  %cmp = icmp slt i32 %0, %1
-  br i1 %cmp, label %for.body, label %for.end
-; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !0
-
-for.body:
-  %2 = load i32* %N.addr, align 4
-  %3 = load i32* %ret, align 4
-  %add = add nsw i32 %3, %2
-  store i32 %add, i32* %ret, align 4
-  br label %for.inc
-
-for.inc:
-  %4 = load i32* %loop, align 4
-  %inc = add nsw i32 %4, 1
-  store i32 %inc, i32* %loop, align 4
-  br label %for.cond
-
-for.end:
-  %5 = load i32* %ret, align 4
-  ret i32 %5
-}
-
-;; func_for_odd - Test branch probabilities for a for loop with a continue and
-;; a break.
-define i32 @func_for_odd(i32 %N) nounwind uwtable {
-entry:
-  %N.addr = alloca i32, align 4
-  %ret = alloca i32, align 4
-  %loop = alloca i32, align 4
-  store i32 %N, i32* %N.addr, align 4
-  store i32 0, i32* %ret, align 4
-  store i32 0, i32* %loop, align 4
-  br label %for.cond
-
-for.cond:
-  %0 = load i32* %loop, align 4
-  %1 = load i32* %N.addr, align 4
-  %cmp = icmp slt i32 %0, %1
-  br i1 %cmp, label %for.body, label %for.end
-; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !1
-
-for.body:
-  %2 = load i32* %loop, align 4
-  %rem = srem i32 %2, 10
-  %tobool = icmp ne i32 %rem, 0
-  br i1 %tobool, label %if.then, label %if.end
-; CHECK: br i1 %tobool, label %if.then, label %if.end, !prof !2
-
-if.then:
-  br label %for.inc
-
-if.end:
-  %3 = load i32* %loop, align 4
-  %cmp1 = icmp eq i32 %3, 500
-  br i1 %cmp1, label %if.then2, label %if.end3
-; CHECK: br i1 %cmp1, label %if.then2, label %if.end3, !prof !3
-
-if.then2:
-  br label %for.end
-
-if.end3:
-  %4 = load i32* %N.addr, align 4
-  %5 = load i32* %ret, align 4
-  %add = add nsw i32 %5, %4
-  store i32 %add, i32* %ret, align 4
-  br label %for.inc
-
-for.inc:
-  %6 = load i32* %loop, align 4
-  %inc = add nsw i32 %6, 1
-  store i32 %inc, i32* %loop, align 4
-  br label %for.cond
-
-for.end:
-  %7 = load i32* %ret, align 4
-  ret i32 %7
-}
-
-;; func_while - Test branch probability in a vanilla while loop.
-define i32 @func_while(i32 %N) nounwind uwtable {
-entry:
-  %N.addr = alloca i32, align 4
-  %ret = alloca i32, align 4
-  %loop = alloca i32, align 4
-  store i32 %N, i32* %N.addr, align 4
-  store i32 0, i32* %ret, align 4
-  store i32 0, i32* %loop, align 4
-  br label %while.cond
-
-while.cond:
-  %0 = load i32* %loop, align 4
-  %1 = load i32* %N.addr, align 4
-  %cmp = icmp slt i32 %0, %1
-  br i1 %cmp, label %while.body, label %while.end
-; CHECK: br i1 %cmp, label %while.body, label %while.end, !prof !0
-
-while.body:
-  %2 = load i32* %N.addr, align 4
-  %3 = load i32* %ret, align 4
-  %add = add nsw i32 %3, %2
-  store i32 %add, i32* %ret, align 4
-  %4 = load i32* %loop, align 4
-  %inc = add nsw i32 %4, 1
-  store i32 %inc, i32* %loop, align 4
-  br label %while.cond
-
-while.end:
-  %5 = load i32* %ret, align 4
-  ret i32 %5
-}
-
-;; func_while - Test branch probability in a vanilla do-while loop.
-define i32 @func_do_while(i32 %N) nounwind uwtable {
-entry:
-  %N.addr = alloca i32, align 4
-  %ret = alloca i32, align 4
-  %loop = alloca i32, align 4
-  store i32 %N, i32* %N.addr, align 4
-  store i32 0, i32* %ret, align 4
-  store i32 0, i32* %loop, align 4
-  br label %do.body
-
-do.body:
-  %0 = load i32* %N.addr, align 4
-  %1 = load i32* %ret, align 4
-  %add = add nsw i32 %1, %0
-  store i32 %add, i32* %ret, align 4
-  %2 = load i32* %loop, align 4
-  %inc = add nsw i32 %2, 1
-  store i32 %inc, i32* %loop, align 4
-  br label %do.cond
-
-do.cond:
-  %3 = load i32* %loop, align 4
-  %4 = load i32* %N.addr, align 4
-  %cmp = icmp slt i32 %3, %4
-  br i1 %cmp, label %do.body, label %do.end
-; CHECK: br i1 %cmp, label %do.body, label %do.end, !prof !4
-
-do.end:
-  %5 = load i32* %ret, align 4
-  ret i32 %5
-}
-
-define i32 @main(i32 %argc, i8** %argv) nounwind uwtable {
-entry:
-  %retval = alloca i32, align 4
-  %argc.addr = alloca i32, align 4
-  %argv.addr = alloca i8**, align 8
-  store i32 0, i32* %retval
-  %call = call i32 @func_for(i32 1000)
-  %call1 = call i32 @func_for_odd(i32 1000)
-  %call2 = call i32 @func_while(i32 1000)
-  %call3 = call i32 @func_do_while(i32 1000)
-  ret i32 0
-}
-
-!0 = metadata !{metadata !"branch_weights", i32 1000, i32 1}
-!1 = metadata !{metadata !"branch_weights", i32 501, i32 0}
-!2 = metadata !{metadata !"branch_weights", i32 450, i32 51}
-!3 = metadata !{metadata !"branch_weights", i32 1, i32 50}
-!4 = metadata !{metadata !"branch_weights", i32 999, i32 1}
-; CHECK-NOT: !5
diff --git a/test/Analysis/Profiling/load-branch-weights-switches.ll b/test/Analysis/Profiling/load-branch-weights-switches.ll
deleted file mode 100644
index 5587c71..0000000
--- a/test/Analysis/Profiling/load-branch-weights-switches.ll
+++ /dev/null
@@ -1,165 +0,0 @@
-; RUN: opt -insert-edge-profiling -o %t1 < %s
-; RUN: rm -f %t1.prof_data
-; RUN: lli %defaultjit -load %llvmshlibdir/libprofile_rt%shlibext %t1 \
-; RUN:     -llvmprof-output %t1.prof_data
-; RUN: opt -profile-file %t1.prof_data -profile-metadata-loader -S -o - < %s \
-; RUN:     | FileCheck %s
-; RUN: rm -f %t1.prof_data
-
-; FIXME: profile_rt.dll could be built on win32.
-; REQUIRES: loadable_module
-
-;; func_switch - Test branch probabilities for a switch instruction with an
-;; even chance of taking each case (or no case).
-define i32 @func_switch(i32 %N) nounwind uwtable {
-entry:
-  %retval = alloca i32, align 4
-  %N.addr = alloca i32, align 4
-  store i32 %N, i32* %N.addr, align 4
-  %0 = load i32* %N.addr, align 4
-  %rem = srem i32 %0, 4
-  switch i32 %rem, label %sw.epilog [
-    i32 0, label %sw.bb
-    i32 1, label %sw.bb1
-    i32 2, label %sw.bb2
-  ]
-; CHECK: ], !prof !0
-
-sw.bb:
-  store i32 5, i32* %retval
-  br label %return
-
-sw.bb1:
-  store i32 6, i32* %retval
-  br label %return
-
-sw.bb2:
-  store i32 7, i32* %retval
-  br label %return
-
-sw.epilog:
-  store i32 8, i32* %retval
-  br label %return
-
-return:
-  %1 = load i32* %retval
-  ret i32 %1
-}
-
-;; func_switch_switch - Test branch probabilities in a switch-instruction that
-;; leads to further switch instructions.  The first-tier switch occludes some
-;; possibilities in the second-tier switches, leading to some branches having a
-;; 0 probability.
-define i32 @func_switch_switch(i32 %N) nounwind uwtable {
-entry:
-  %retval = alloca i32, align 4
-  %N.addr = alloca i32, align 4
-  store i32 %N, i32* %N.addr, align 4
-  %0 = load i32* %N.addr, align 4
-  %rem = srem i32 %0, 2
-  switch i32 %rem, label %sw.default11 [
-    i32 0, label %sw.bb
-    i32 1, label %sw.bb5
-  ]
-; CHECK: ], !prof !1
-
-sw.bb:
-  %1 = load i32* %N.addr, align 4
-  %rem1 = srem i32 %1, 4
-  switch i32 %rem1, label %sw.default [
-    i32 0, label %sw.bb2
-    i32 1, label %sw.bb3
-    i32 2, label %sw.bb4
-  ]
-; CHECK: ], !prof !2
-
-sw.bb2:
-  store i32 5, i32* %retval
-  br label %return
-
-sw.bb3:
-  store i32 6, i32* %retval
-  br label %return
-
-sw.bb4:
-  store i32 7, i32* %retval
-  br label %return
-
-sw.default:
-  store i32 8, i32* %retval
-  br label %return
-
-sw.bb5:
-  %2 = load i32* %N.addr, align 4
-  %rem6 = srem i32 %2, 4
-  switch i32 %rem6, label %sw.default10 [
-    i32 0, label %sw.bb7
-    i32 1, label %sw.bb8
-    i32 2, label %sw.bb9
-  ]
-; CHECK: ], !prof !3
-
-sw.bb7:
-  store i32 9, i32* %retval
-  br label %return
-
-sw.bb8:
-  store i32 10, i32* %retval
-  br label %return
-
-sw.bb9:
-  store i32 11, i32* %retval
-  br label %return
-
-sw.default10:
-  store i32 12, i32* %retval
-  br label %return
-
-sw.default11:
-  store i32 13, i32* %retval
-  br label %return
-
-return:
-  %3 = load i32* %retval
-  ret i32 %3
-}
-
-define i32 @main(i32 %argc, i8** %argv) nounwind uwtable {
-entry:
-  %retval = alloca i32, align 4
-  %argc.addr = alloca i32, align 4
-  %argv.addr = alloca i8**, align 8
-  %loop = alloca i32, align 4
-  store i32 0, i32* %retval
-  store i32 0, i32* %loop, align 4
-  br label %for.cond
-
-for.cond:
-  %0 = load i32* %loop, align 4
-  %cmp = icmp slt i32 %0, 4000
-  br i1 %cmp, label %for.body, label %for.end
-; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !4
-
-for.body:
-  %1 = load i32* %loop, align 4
-  %call = call i32 @func_switch(i32 %1)
-  %2 = load i32* %loop, align 4
-  %call1 = call i32 @func_switch_switch(i32 %2)
-  br label %for.inc
-
-for.inc:
-  %3 = load i32* %loop, align 4
-  %inc = add nsw i32 %3, 1
-  store i32 %inc, i32* %loop, align 4
-  br label %for.cond
-
-for.end:
-  ret i32 0
-}
-
-; CHECK: !0 = metadata !{metadata !"branch_weights", i32 1000, i32 1000, i32 1000, i32 1000}
-; CHECK: !1 = metadata !{metadata !"branch_weights", i32 0, i32 2000, i32 2000}
-; CHECK: !2 = metadata !{metadata !"branch_weights", i32 0, i32 1000, i32 0, i32 1000}
-; CHECK: !3 = metadata !{metadata !"branch_weights", i32 1000, i32 0, i32 1000, i32 0}
-; CHECK: !4 = metadata !{metadata !"branch_weights", i32 4000, i32 1}
-; CHECK-NOT: !5
diff --git a/test/Analysis/Profiling/profiling-tool-chain.ll b/test/Analysis/Profiling/profiling-tool-chain.ll
deleted file mode 100644
index 9135a85..0000000
--- a/test/Analysis/Profiling/profiling-tool-chain.ll
+++ /dev/null
@@ -1,212 +0,0 @@
-; RUN: llvm-as %s -o %t1
-
-; FIXME: The RUX parts of the test are disabled for now, they aren't working on
-; llvm-gcc-x86_64-darwin10-selfhost.
-
-; Test the edge optimal profiling instrumentation.
-; RUN: opt %t1 -insert-optimal-edge-profiling -o %t2
-; RUX: llvm-dis < %t2 | FileCheck --check-prefix=INST %s
-
-; Test the creation, reading and displaying of profile
-; RUX: rm -f llvmprof.out
-; RUX: lli -load %llvmshlibdir/profile_rt%shlibext %t2
-; RUX: lli -load %llvmshlibdir/profile_rt%shlibext %t2 1 2
-; RUX: llvm-prof -print-all-code %t1 | FileCheck --check-prefix=PROF %s
-
-; Test the loaded profile also with verifier.
-; RUX  opt %t1 -profile-loader -profile-verifier -o %t3
-
-; Test profile estimator.
-; RUN: opt %t1 -profile-estimator -profile-verifier -o %t3
-
-; PROF:  1.     2/4 oneblock
-; PROF:  2.     2/4 main
-; PROF:  1. 15.7895%    12/76	main() - bb6
-; PROF:  2. 11.8421%     9/76	main() - bb2
-; PROF:  3. 11.8421%     9/76	main() - bb3
-; PROF:  4. 11.8421%     9/76	main() - bb5
-; PROF:  5. 10.5263%     8/76	main() - bb10
-; PROF:  6. 7.89474%     6/76	main() - bb
-; PROF:  7. 7.89474%     6/76	main() - bb9
-; PROF:  8. 3.94737%     3/76	main() - bb1
-; PROF:  9. 3.94737%     3/76	main() - bb7
-; PROF: 10. 3.94737%     3/76	main() - bb8
-; PROF: 11. 2.63158%     2/76	oneblock() - entry
-; PROF: 12. 2.63158%     2/76	main() - entry
-; PROF: 13. 2.63158%     2/76	main() - bb11
-; PROF: 14. 2.63158%     2/76	main() - return
-
-; ModuleID = '<stdin>'
-
-@.str = private constant [12 x i8] c"hello world\00", align 1 ; <[12 x i8]*> [#uses=1]
-@.str1 = private constant [6 x i8] c"franz\00", align 1 ; <[6 x i8]*> [#uses=1]
-@.str2 = private constant [9 x i8] c"argc > 2\00", align 1 ; <[9 x i8]*> [#uses=1]
-@.str3 = private constant [9 x i8] c"argc = 1\00", align 1 ; <[9 x i8]*> [#uses=1]
-@.str4 = private constant [6 x i8] c"fritz\00", align 1 ; <[6 x i8]*> [#uses=1]
-@.str5 = private constant [10 x i8] c"argc <= 1\00", align 1 ; <[10 x i8]*> [#uses=1]
-; INST:@OptEdgeProfCounters
-; INST:[21 x i32]
-; INST:[i32 0,
-; INST:i32 -1,
-; INST:i32 -1,
-; INST:i32 -1,
-; INST:i32 -1,
-; INST:i32 -1,
-; INST:i32 -1,
-; INST:i32 -1,
-; INST:i32 -1,
-; INST:i32 0,
-; INST:i32 0,
-; INST:i32 -1,
-; INST:i32 -1,
-; INST:i32 -1,
-; INST:i32 0,
-; INST:i32 0,
-; INST:i32 -1,
-; INST:i32 -1,
-; INST:i32 0,
-; INST:i32 -1,
-; INST:i32 -1]
-
-; PROF:;;; %oneblock called 2 times.
-; PROF:;;;
-define void @oneblock() nounwind {
-entry:
-; PROF:entry:
-; PROF:	;;; Basic block executed 2 times.
-  %0 = call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  ret void
-}
-
-declare i32 @puts(i8*)
-
-; PROF:;;; %main called 2 times.
-; PROF:;;;
-define i32 @main(i32 %argc, i8** %argv) nounwind {
-entry:
-; PROF:entry:
-; PROF:	;;; Basic block executed 2 times.
-  %argc_addr = alloca i32                         ; <i32*> [#uses=4]
-  %argv_addr = alloca i8**                        ; <i8***> [#uses=1]
-  %retval = alloca i32                            ; <i32*> [#uses=2]
-  %j = alloca i32                                 ; <i32*> [#uses=4]
-  %i = alloca i32                                 ; <i32*> [#uses=4]
-  %0 = alloca i32                                 ; <i32*> [#uses=2]
-; INST:call 
-; INST:@llvm_start_opt_edge_profiling
-; INST:@OptEdgeProfCounters
-  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  store i32 %argc, i32* %argc_addr
-  store i8** %argv, i8*** %argv_addr
-  store i32 0, i32* %i, align 4
-  br label %bb10
-; PROF:	;;; Out-edge counts: [2.000000e+00 -> bb10]
-
-bb:                                               ; preds = %bb10
-; PROF:bb:
-; PROF:	;;; Basic block executed 6 times.
-  %1 = load i32* %argc_addr, align 4              ; <i32> [#uses=1]
-  %2 = icmp sgt i32 %1, 1                         ; <i1> [#uses=1]
-  br i1 %2, label %bb1, label %bb8
-; PROF:	;;; Out-edge counts: [3.000000e+00 -> bb1] [3.000000e+00 -> bb8]
-
-bb1:                                              ; preds = %bb
-; PROF:bb1:
-; PROF:	;;; Basic block executed 3 times.
-  store i32 0, i32* %j, align 4
-  br label %bb6
-; PROF:	;;; Out-edge counts: [3.000000e+00 -> bb6]
-
-bb2:                                              ; preds = %bb6
-; PROF:bb2:
-; PROF:	;;; Basic block executed 9 times.
-  %3 = call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  %4 = load i32* %argc_addr, align 4              ; <i32> [#uses=1]
-  %5 = icmp sgt i32 %4, 2                         ; <i1> [#uses=1]
-  br i1 %5, label %bb3, label %bb4
-; PROF:	;;; Out-edge counts: [9.000000e+00 -> bb3]
-
-bb3:                                              ; preds = %bb2
-; PROF:bb3:
-; PROF:	;;; Basic block executed 9 times.
-  %6 = call i32 @puts(i8* getelementptr inbounds ([9 x i8]* @.str2, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  br label %bb5
-; PROF:	;;; Out-edge counts: [9.000000e+00 -> bb5]
-
-bb4:                                              ; preds = %bb2
-; PROF:bb4:
-; PROF:	;;; Never executed!
-  %7 = call i32 @puts(i8* getelementptr inbounds ([9 x i8]* @.str3, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  br label %bb11
-
-bb5:                                              ; preds = %bb3
-; PROF:bb5:
-; PROF:	;;; Basic block executed 9 times.
-  %8 = call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str4, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  %9 = load i32* %j, align 4                      ; <i32> [#uses=1]
-  %10 = add nsw i32 %9, 1                         ; <i32> [#uses=1]
-  store i32 %10, i32* %j, align 4
-  br label %bb6
-; PROF:	;;; Out-edge counts: [9.000000e+00 -> bb6]
-
-bb6:                                              ; preds = %bb5, %bb1
-; PROF:bb6:
-; PROF:	;;; Basic block executed 12 times.
-  %11 = load i32* %j, align 4                     ; <i32> [#uses=1]
-  %12 = load i32* %argc_addr, align 4             ; <i32> [#uses=1]
-  %13 = icmp slt i32 %11, %12                     ; <i1> [#uses=1]
-  br i1 %13, label %bb2, label %bb7
-; PROF:	;;; Out-edge counts: [9.000000e+00 -> bb2] [3.000000e+00 -> bb7]
-
-bb7:                                              ; preds = %bb6
-; PROF:bb7:
-; PROF:	;;; Basic block executed 3 times.
-  br label %bb9
-; PROF:	;;; Out-edge counts: [3.000000e+00 -> bb9]
-
-bb8:                                              ; preds = %bb
-; PROF:bb8:
-; PROF:	;;; Basic block executed 3 times.
-  %14 = call i32 @puts(i8* getelementptr inbounds ([10 x i8]* @.str5, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  br label %bb9
-; PROF:	;;; Out-edge counts: [3.000000e+00 -> bb9]
-
-bb9:                                              ; preds = %bb8, %bb7
-; PROF:bb9:
-; PROF:	;;; Basic block executed 6 times.
-  %15 = load i32* %i, align 4                     ; <i32> [#uses=1]
-  %16 = add nsw i32 %15, 1                        ; <i32> [#uses=1]
-  store i32 %16, i32* %i, align 4
-  br label %bb10
-; PROF:	;;; Out-edge counts: [6.000000e+00 -> bb10]
-
-bb10:                                             ; preds = %bb9, %entry
-; PROF:bb10:
-; PROF:	;;; Basic block executed 8 times.
-  %17 = load i32* %i, align 4                     ; <i32> [#uses=1]
-  %18 = icmp ne i32 %17, 3                        ; <i1> [#uses=1]
-  br i1 %18, label %bb, label %bb11
-; INST:br
-; INST:label %bb10.bb11_crit_edge
-; PROF:	;;; Out-edge counts: [6.000000e+00 -> bb] [2.000000e+00 -> bb11]
-
-; INST:bb10.bb11_crit_edge:
-; INST:br
-; INST:label %bb11
-
-bb11:                                             ; preds = %bb10, %bb4
-; PROF:bb11:
-; PROF:	;;; Basic block executed 2 times.
-  call void @oneblock() nounwind
-  store i32 0, i32* %0, align 4
-  %19 = load i32* %0, align 4                     ; <i32> [#uses=1]
-  store i32 %19, i32* %retval, align 4
-  br label %return
-; PROF:	;;; Out-edge counts: [2.000000e+00 -> return]
-
-return:                                           ; preds = %bb11
-; PROF:return:
-; PROF:	;;; Basic block executed 2 times.
-  %retval12 = load i32* %retval                   ; <i32> [#uses=1]
-  ret i32 %retval12
-}
diff --git a/test/Analysis/RegionInfo/lit.local.cfg b/test/Analysis/RegionInfo/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Analysis/RegionInfo/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll b/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll
index e67e4d0..fd09fd5 100644
--- a/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll
+++ b/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -indvars -adce -simplifycfg -S | grep "icmp s"
+; RUN: opt < %s -indvars -adce -simplifycfg -S | FileCheck %s
 ; PR1598
 
+; CHECK: icmp s
+
 define i32 @f(i32 %a, i32 %b, i32 %x, i32 %y) {
 entry:
 	%tmp3 = icmp eq i32 %a, %b		; <i1> [#uses=1]
diff --git a/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll b/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
index 036abf5..9e19cca 100644
--- a/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
+++ b/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -scalar-evolution -analyze | grep "Loop %bb: backedge-taken count is (-1 + (-1 \* %x) + %y)"
+; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
 ; PR1597
 
+; CHECK: Loop %bb: backedge-taken count is (-1 + (-1 * %x) + %y)
+
 define i32 @f(i32 %x, i32 %y) {
 entry:
         %tmp63 = icmp ult i32 %x, %y            ; <i1> [#uses=1]
diff --git a/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll b/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll
index a3192b9..b65a525 100644
--- a/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll
+++ b/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll
@@ -1,7 +1,8 @@
-; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   -scalar-evolution-max-iterations=0 | grep "backedge-taken count is 13"
+; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
 ; PR1706
 
+; CHECK: backedge-taken count is 13
+
 define i32 @f() {
 entry:
 	br label %bb5
diff --git a/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll b/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll
index 514920f..a2850d8 100644
--- a/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll
+++ b/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -indvars -S | grep printd | grep 1206807378
+; RUN: opt < %s -indvars -S | FileCheck %s
 ; PR1798
 
+; CHECK: printd(i32 1206807378)
+
 declare void @printd(i32)
 
 define i32 @test() {
diff --git a/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll b/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
index d0644f7..6ebfa61 100644
--- a/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
+++ b/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
@@ -1,4 +1,6 @@
-; RUN: opt < %s -scalar-evolution -analyze | grep "Loop %header: backedge-taken count is (0 smax %n)"
+; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
+
+; CHECK: Loop %header: backedge-taken count is (0 smax %n)
 
 define void @foo(i32 %n) {
 entry:
diff --git a/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll b/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll
index 52c7985..527fd27 100644
--- a/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll
+++ b/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -analyze -scalar-evolution | grep umax
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
 ; PR2003
 
+; CHECK: umax
+
 define i32 @foo(i32 %n) {
 entry:
         br label %header
diff --git a/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll b/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll
index 41734d7..9a05d88 100644
--- a/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll
+++ b/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll
@@ -1,7 +1,8 @@
-; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   -scalar-evolution-max-iterations=0 | grep "backedge-taken count is 61"
+; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
 ; PR2364
 
+; CHECK: backedge-taken count is 61
+
 define i32 @func_6() nounwind  {
 entry:
 	br label %bb5
diff --git a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
index 5cf17a2..dcf8fc9 100644
--- a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
+++ b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
@@ -1,6 +1,9 @@
-; RUN: opt < %s -analyze -scalar-evolution 2>&1 | not grep smax
+; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s
 ; PR2261
 
+; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'foo'
+; CHECK-NOT: smax
+
 @lut = common global [256 x i8] zeroinitializer, align 32		; <[256 x i8]*> [#uses=1]
 
 define void @foo(i32 %count, i32* %srcptr, i32* %dstptr) nounwind  {
diff --git a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll
index 195dfaa..c804bd9 100644
--- a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll
+++ b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll
@@ -1,6 +1,9 @@
-; RUN: opt < %s -analyze -scalar-evolution 2>&1 | not grep smax
+; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s
 ; PR2070
 
+; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'a'
+; CHECK-NOT: smax
+
 define i32 @a(i32 %x) nounwind  {
 entry:
 	icmp sgt i32 %x, 1		; <i1>:0 [#uses=1]
diff --git a/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll b/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll
index 1865c05..ad34f6c 100644
--- a/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll
+++ b/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll
@@ -1,7 +1,8 @@
-; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   -scalar-evolution-max-iterations=0 | grep Unpredictable
+; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
 ; PR2088
 
+; CHECK: Unpredictable
+
 define void @fun() {
 entry:
         br label %loop
diff --git a/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll b/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll
index cbf200e..82b9d56 100644
--- a/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll
+++ b/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll
@@ -1,7 +1,8 @@
-; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   -scalar-evolution-max-iterations=0 | grep "backedge-taken count is 113"
+; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
 ; PR2088
 
+; CHECK: backedge-taken count is 113
+
 define void @fun() {
 entry:
         br label %loop
diff --git a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
index c25e4a3..46c6c59 100644
--- a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
@@ -1,5 +1,6 @@
-; RUN: opt < %s -analyze -scalar-evolution 2>&1 | \
-; RUN: grep "Loop %bb: backedge-taken count is (7 + (-1 \* %argc))"
+; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s
+
+; CHECK: Loop %bb: backedge-taken count is (7 + (-1 * %argc))
 
 define i32 @main(i32 %argc, i8** %argv) nounwind {
 entry:
diff --git a/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll b/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
index 56a8343..7acf90c 100644
--- a/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
@@ -1,5 +1,8 @@
-; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:  | grep "Loop %bb: Unpredictable backedge-taken count\."
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+; CHECK: Loop %bb: backedge-taken count is ((-5 + %x) /u 3)
+; CHECK: Loop %bb: max backedge-taken count is 1431655764
+
 
 ; ScalarEvolution can't compute a trip count because it doesn't know if
 ; dividing by the stride will have a remainder. This could theoretically
diff --git a/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll b/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
index aaf6770..2b2296a 100644
--- a/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
@@ -1,5 +1,8 @@
-; RUN: opt < %s -analyze -scalar-evolution 2>&1 | grep "/u 3"
-; XFAIL: *
+; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s
+
+; CHECK: Loop %bb: backedge-taken count is ((999 + (-1 * %x)) /u 3)
+; CHECK: Loop %bb: max backedge-taken count is 334
+
 
 ; This is a tricky testcase for unsigned wrap detection which ScalarEvolution
 ; doesn't yet know how to do.
diff --git a/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll b/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
index a1b3b71..7a7a640 100644
--- a/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
+++ b/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
@@ -1,4 +1,6 @@
-; RUN: opt < %s -analyze -scalar-evolution | grep "backedge-taken count is 255"
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+; CHECK: backedge-taken count is 255
 
 define i32 @foo(i32 %x, i32 %y, i32* %lam, i32* %alp) nounwind {
 bb1.thread:
diff --git a/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll b/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll
index bb14919..95aa1fc 100644
--- a/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll
+++ b/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll
@@ -1,7 +1,8 @@
-; RUN: opt < %s -analyze -scalar-evolution 2>&1 | \
-; RUN: grep "(((-1 * %i0) + (100005 smax %i0)) /u 5)"
+; RUN: opt < %s -analyze -scalar-evolution 2>&1 |  FileCheck %s
 ; XFAIL: *
 
+; CHECK: (((-1 * %i0) + (100005 smax %i0)) /u 5)
+
 define i32 @foo0(i32 %i0) nounwind {
 entry:
 	br label %bb1
diff --git a/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll b/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll
index 7000626..70588bc 100644
--- a/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll
+++ b/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll
@@ -1,5 +1,6 @@
-; RUN: opt < %s -analyze -scalar-evolution 2>&1 | grep "/u 5"
-; XFAIL: *
+; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s
+
+; CHECK: /u 5
 
 define i8 @foo0(i8 %i0) nounwind {
 entry:
diff --git a/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll b/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
index 82f2608..f19d18c 100644
--- a/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
+++ b/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
@@ -1,6 +1,9 @@
-; RUN: opt < %s -analyze -scalar-evolution | not grep "/u -1"
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
 ; PR3275
 
+; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'func_15'
+; CHECK-NOT: /u -1
+
 @g_16 = external global i16		; <i16*> [#uses=3]
 @.str = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
 
diff --git a/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll b/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
index ebd9f73..3dacfbb 100644
--- a/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
+++ b/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
@@ -1,35 +1,53 @@
 ; RUN: opt < %s -analyze -scalar-evolution | grep "(trunc i" | not grep ext
 
+; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'test1'
+; CHECK-NOT: (trunc i{{.*}}ext
+
 define i16 @test1(i8 %x) {
   %A = sext i8 %x to i32
   %B = trunc i32 %A to i16
   ret i16 %B
 }
 
+; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'test2'
+; CHECK-NOT: (trunc i{{.*}}ext
+
 define i8 @test2(i16 %x) {
   %A = sext i16 %x to i32
   %B = trunc i32 %A to i8
   ret i8 %B
 }
 
+; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'test3'
+; CHECK-NOT: (trunc i{{.*}}ext
+
 define i16 @test3(i16 %x) {
   %A = sext i16 %x to i32
   %B = trunc i32 %A to i16
   ret i16 %B
 }
 
+; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'test4'
+; CHECK-NOT: (trunc i{{.*}}ext
+
 define i16 @test4(i8 %x) {
   %A = zext i8 %x to i32
   %B = trunc i32 %A to i16
   ret i16 %B
 }
 
+; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'test5'
+; CHECK-NOT: (trunc i{{.*}}ext
+
 define i8 @test5(i16 %x) {
   %A = zext i16 %x to i32
   %B = trunc i32 %A to i8
   ret i8 %B
 }
 
+; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'test6'
+; CHECK-NOT: (trunc i{{.*}}ext
+
 define i16 @test6(i16 %x) {
   %A = zext i16 %x to i32
   %B = trunc i32 %A to i16
diff --git a/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll b/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
index 8a78043..5d1502d 100644
--- a/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
+++ b/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
@@ -1,5 +1,8 @@
-; RUN: opt < %s -analyze -scalar-evolution | grep "count is 2"
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
 ; PR3171
+
+; CHECK: count is 2
+
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 	%struct.Foo = type { i32 }
diff --git a/test/Analysis/ScalarEvolution/and-xor.ll b/test/Analysis/ScalarEvolution/and-xor.ll
index 06f4a85..404ab91 100644
--- a/test/Analysis/ScalarEvolution/and-xor.ll
+++ b/test/Analysis/ScalarEvolution/and-xor.ll
@@ -1,5 +1,8 @@
-; RUN: opt < %s -scalar-evolution -analyze \
-; RUN:   | grep "\-->  (zext" | count 2
+; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
+
+; CHECK: -->  (zext
+; CHECK: -->  (zext
+; CHECK-NOT: -->  (zext
 
 define i32 @foo(i32 %x) {
   %n = and i32 %x, 255
diff --git a/test/Analysis/ScalarEvolution/avoid-smax-0.ll b/test/Analysis/ScalarEvolution/avoid-smax-0.ll
index 3d15c78..8abb430 100644
--- a/test/Analysis/ScalarEvolution/avoid-smax-0.ll
+++ b/test/Analysis/ScalarEvolution/avoid-smax-0.ll
@@ -1,4 +1,6 @@
-; RUN: opt < %s -scalar-evolution -analyze | grep "Loop %bb3: backedge-taken count is (-1 + %n)"
+; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
+
+; CHECK: Loop %bb3: backedge-taken count is (-1 + %n)
 
 ; We don't want to use a max in the trip count expression in
 ; this testcase.
diff --git a/test/Analysis/ScalarEvolution/div-overflow.ll b/test/Analysis/ScalarEvolution/div-overflow.ll
index 2846797..aca964a 100644
--- a/test/Analysis/ScalarEvolution/div-overflow.ll
+++ b/test/Analysis/ScalarEvolution/div-overflow.ll
@@ -1,5 +1,6 @@
-; RUN: opt < %s -scalar-evolution -analyze \
-; RUN:  | grep "\-->  ((-128 \* %a) /u -128)"
+; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
+
+; CHECK: -->  ((-128 * %a) /u -128)
 
 ; Don't let ScalarEvolution fold this div away.
 
diff --git a/test/Analysis/ScalarEvolution/do-loop.ll b/test/Analysis/ScalarEvolution/do-loop.ll
index 6e3295a..e35ea7d 100644
--- a/test/Analysis/ScalarEvolution/do-loop.ll
+++ b/test/Analysis/ScalarEvolution/do-loop.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -analyze -scalar-evolution | grep smax
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
 ; PR1614
 
+; CHECK: smax
+
 define i32 @f(i32 %x, i32 %y) {
 entry:
 	br label %bb
diff --git a/test/Analysis/ScalarEvolution/lit.local.cfg b/test/Analysis/ScalarEvolution/lit.local.cfg
deleted file mode 100644
index c6106e4..0000000
--- a/test/Analysis/ScalarEvolution/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Analysis/ScalarEvolution/max-trip-count-address-space.ll b/test/Analysis/ScalarEvolution/max-trip-count-address-space.ll
new file mode 100644
index 0000000..aa5254c
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/max-trip-count-address-space.ll
@@ -0,0 +1,68 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+; ScalarEvolution should be able to understand the loop and eliminate the casts.
+
+target datalayout = "e-p:32:32:32-p1:16:16:16-p2:8:8:8-p4:64:64:64-n16:32:64"
+
+; CHECK:  {%d,+,4}<%bb>		Exits: ((4 * (trunc i32 (-1 + %n) to i16)) + %d)
+
+
+define void @foo(i32 addrspace(1)* nocapture %d, i32 %n) nounwind {
+; CHECK: @foo
+entry:
+	%0 = icmp sgt i32 %n, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb.nph, label %return
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb1, %bb.nph
+	%i.02 = phi i32 [ %5, %bb1 ], [ 0, %bb.nph ]		; <i32> [#uses=2]
+	%p.01 = phi i8 [ %4, %bb1 ], [ -1, %bb.nph ]		; <i8> [#uses=2]
+	%1 = sext i8 %p.01 to i32		; <i32> [#uses=1]
+	%2 = sext i32 %i.02 to i64		; <i64> [#uses=1]
+	%3 = getelementptr i32 addrspace(1)* %d, i64 %2		; <i32*> [#uses=1]
+	store i32 %1, i32 addrspace(1)* %3, align 4
+	%4 = add i8 %p.01, 1		; <i8> [#uses=1]
+	%5 = add i32 %i.02, 1		; <i32> [#uses=2]
+	br label %bb1
+
+bb1:		; preds = %bb
+	%6 = icmp slt i32 %5, %n		; <i1> [#uses=1]
+	br i1 %6, label %bb, label %bb1.return_crit_edge
+
+bb1.return_crit_edge:		; preds = %bb1
+	br label %return
+
+return:		; preds = %bb1.return_crit_edge, %entry
+	ret void
+}
+
+define void @test(i8 addrspace(1)* %a, i32 %n) nounwind {
+; CHECK: @test
+entry:
+  %cmp1 = icmp sgt i32 %n, 0
+  br i1 %cmp1, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  %tmp = zext i32 %n to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %for.body.lr.ph ]
+  %arrayidx = getelementptr i8 addrspace(1)* %a, i64 %indvar
+  store i8 0, i8 addrspace(1)* %arrayidx, align 1
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp ne i64 %indvar.next, %tmp
+  br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @test
+; CHECK-NEXT: backedge-taken count is
+; CHECK-NEXT: max backedge-taken count is -1
diff --git a/test/Analysis/ScalarEvolution/nsw.ll b/test/Analysis/ScalarEvolution/nsw.ll
index 659cf4f..05992ea 100644
--- a/test/Analysis/ScalarEvolution/nsw.ll
+++ b/test/Analysis/ScalarEvolution/nsw.ll
@@ -62,11 +62,11 @@ for.body.lr.ph.i.i:                               ; preds = %entry
 for.body.i.i:                                     ; preds = %for.body.i.i, %for.body.lr.ph.i.i
   %__first.addr.02.i.i = phi i32* [ %begin, %for.body.lr.ph.i.i ], [ %ptrincdec.i.i, %for.body.i.i ]
 ; CHECK: %__first.addr.02.i.i
-; CHECK-NEXT: -->  {%begin,+,4}<nw><%for.body.i.i>
+; CHECK-NEXT: -->  {%begin,+,4}<nuw><%for.body.i.i>
   store i32 0, i32* %__first.addr.02.i.i, align 4
   %ptrincdec.i.i = getelementptr inbounds i32* %__first.addr.02.i.i, i64 1
 ; CHECK: %ptrincdec.i.i
-; CHECK-NEXT: -->  {(4 + %begin),+,4}<nw><%for.body.i.i>
+; CHECK-NEXT: -->  {(4 + %begin),+,4}<nuw><%for.body.i.i>
   %cmp.i.i = icmp eq i32* %ptrincdec.i.i, %end
   br i1 %cmp.i.i, label %for.cond.for.end_crit_edge.i.i, label %for.body.i.i
 
@@ -122,3 +122,39 @@ exit:
   %result = phi i32 [ %a, %entry ], [ %tmp2, %greater ]
   ret i32 %result
 }
+
+; TODO: This could fold down to '1'
+; CHECK-LABEL: PR12375
+; CHECK: -->  {(4 + %arg),+,4}<nuw><%bb1>		Exits: (4 + (4 * ((-1 + (-1 * %arg) + ((4 + %arg) umax (8 + %arg)<nsw>)) /u 4)) + %arg)
+define i32 @PR12375(i32* readnone %arg) {
+bb:
+  %tmp = getelementptr inbounds i32* %arg, i64 2
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %tmp2 = phi i32* [ %arg, %bb ], [ %tmp5, %bb1 ]
+  %tmp3 = phi i32 [ 0, %bb ], [ %tmp4, %bb1 ]
+  %tmp4 = add nsw i32 %tmp3, 1
+  %tmp5 = getelementptr inbounds i32* %tmp2, i64 1
+  %tmp6 = icmp ult i32* %tmp5, %tmp
+  br i1 %tmp6, label %bb1, label %bb7
+
+bb7:                                              ; preds = %bb1
+  ret i32 %tmp4
+}
+
+; CHECK-LABEL: PR12376
+; CHECK: -->  {(4 + %arg),+,4}<nuw><%bb2>		Exits: (4 + (4 * ((3 + (-1 * %arg) + (%arg umax %arg1)) /u 4)) + %arg)
+define void @PR12376(i32* nocapture %arg, i32* nocapture %arg1)  {
+bb:
+  br label %bb2
+
+bb2:                                              ; preds = %bb2, %bb
+  %tmp = phi i32* [ %arg, %bb ], [ %tmp4, %bb2 ]
+  %tmp3 = icmp ult i32* %tmp, %arg1
+  %tmp4 = getelementptr inbounds i32* %tmp, i64 1
+  br i1 %tmp3, label %bb2, label %bb5
+
+bb5:                                              ; preds = %bb2
+  ret void
+}
diff --git a/test/Analysis/ScalarEvolution/trip-count11.ll b/test/Analysis/ScalarEvolution/trip-count11.ll
index 7191503..e14af08 100644
--- a/test/Analysis/ScalarEvolution/trip-count11.ll
+++ b/test/Analysis/ScalarEvolution/trip-count11.ll
@@ -1,9 +1,11 @@
 ; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 @foo.a = internal constant [8 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7], align 16
+@foo.a_as1 = internal addrspace(1) constant [8 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7], align 16
+
 
 define i32 @foo() nounwind uwtable noinline {
 entry:
@@ -27,3 +29,27 @@ for.inc:                                          ; preds = %for.cond
 for.end:                                          ; preds = %for.cond
   ret i32 %sum.0
 }
+
+define i32 @foo_as1() nounwind uwtable noinline {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
+; CHECK: --> %sum.0 Exits: 28
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp ult i32 %i.0, 8
+  br i1 %cmp, label %for.inc, label %for.end
+
+for.inc:                                          ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds [8 x i32] addrspace(1)* @foo.a_as1, i64 0, i64 %idxprom
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %add = add nsw i32 %sum.0, %0
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret i32 %sum.0
+}
+
diff --git a/test/Analysis/ScalarEvolution/trip-count3.ll b/test/Analysis/ScalarEvolution/trip-count3.ll
index 32c51bf..850e035 100644
--- a/test/Analysis/ScalarEvolution/trip-count3.ll
+++ b/test/Analysis/ScalarEvolution/trip-count3.ll
@@ -4,7 +4,8 @@
 ; dividing by the stride will have a remainder. This could theoretically
 ; be teaching it how to use a more elaborate trip count computation.
 
-; CHECK: Loop %bb3.i: Unpredictable backedge-taken count.
+; CHECK: Loop %bb3.i: backedge-taken count is ((64 + (-64 smax (-1 + (-1 * %0))) + %0) /u 64)
+; CHECK: Loop %bb3.i: max backedge-taken count is 33554431
 
 %struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
 %struct.SHA_INFO = type { [5 x i32], i32, i32, [16 x i32] }
diff --git a/test/Analysis/ScalarEvolution/trip-count9.ll b/test/Analysis/ScalarEvolution/trip-count9.ll
index 9180f2b..9a080b3 100644
--- a/test/Analysis/ScalarEvolution/trip-count9.ll
+++ b/test/Analysis/ScalarEvolution/trip-count9.ll
@@ -25,8 +25,8 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable backedge-taken count.
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
 define void @step2(i4 %n) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -57,8 +57,8 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @start1_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable backedge-taken count.
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
 define void @start1_step2(i4 %n) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -89,8 +89,8 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @startx_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable backedge-taken count.
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
 define void @startx_step2(i4 %n, i4 %x) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -120,12 +120,18 @@ exit:
   ret void
 }
 
-; Be careful with this one. If %n is INT4_MAX, %i.next will wrap. The nsw bit
-; says that the result is undefined, but ScalarEvolution must respect that
-; subsequent passes may result the undefined behavior in predictable ways.
+; If %n is INT4_MAX, %i.next will wrap. The nsw bit says that the
+; result is undefined. Therefore, after the loop's second iteration,
+; we are free to assume that the loop exits. This is valid because:
+; (a) %i.next is a poison value after the second iteration, which can
+; also be considered an undef value.
+; (b) the return instruction enacts a side effect that is control
+; dependent on the poison value.
+;
+; CHECK-LABEL: nsw_step2
 ; CHECK: Determining loop execution counts for: @nsw_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: backedge-taken count is ((-1 + %n) /u 2)
+; CHECK: Loop %loop: max backedge-taken count is 2
 define void @nsw_step2(i4 %n) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -139,6 +145,7 @@ exit:
   ret void
 }
 
+; CHECK-LABEL: nsw_start1
 ; CHECK: Determining loop execution counts for: @nsw_start1
 ; CHECK: Loop %loop: backedge-taken count is (-2 + (2 smax %n))
 ; CHECK: Loop %loop: max backedge-taken count is 5
@@ -156,8 +163,8 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @nsw_start1_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: backedge-taken count is ((-2 + (3 smax %n)) /u 2)
+; CHECK: Loop %loop: max backedge-taken count is 2
 define void @nsw_start1_step2(i4 %n) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -188,8 +195,8 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @nsw_startx_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: backedge-taken count is ((-1 + (-1 * %x) + ((2 + %x) smax %n)) /u 2)
+; CHECK: Loop %loop: max backedge-taken count is 7
 define void @nsw_startx_step2(i4 %n, i4 %x) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -221,7 +228,7 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @even_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: backedge-taken count is ((-1 + (2 * %n)) /u 2)
 ; CHECK: Loop %loop: max backedge-taken count is 2
 define void @even_step2(i4 %n) {
 entry:
@@ -255,7 +262,7 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @even_start1_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: backedge-taken count is ((-2 + (3 smax (2 * %n))) /u 2)
 ; CHECK: Loop %loop: max backedge-taken count is 2
 define void @even_start1_step2(i4 %n) {
 entry:
@@ -273,7 +280,7 @@ exit:
 
 ; CHECK: Determining loop execution counts for: @even_startx
 ; CHECK: Loop %loop: backedge-taken count is (-1 + (-1 * %x) + ((1 + %x) smax (2 * %n)))
-; CHECK: Loop %loop: max backedge-taken count is -1
+; CHECK: Loop %loop: max backedge-taken count is -2
 define void @even_startx(i4 %n, i4 %x) {
 entry:
   %m = shl i4 %n, 1
@@ -289,7 +296,7 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @even_startx_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: backedge-taken count is ((-1 + (-1 * %x) + ((2 + %x) smax (2 * %n))) /u 2)
 ; CHECK: Loop %loop: max backedge-taken count is 7
 define void @even_startx_step2(i4 %n, i4 %x) {
 entry:
@@ -375,7 +382,7 @@ exit:
 
 ; CHECK: Determining loop execution counts for: @even_nsw_startx
 ; CHECK: Loop %loop: backedge-taken count is (-1 + (-1 * %x) + ((1 + %x) smax (2 * %n)))
-; CHECK: Loop %loop: max backedge-taken count is -1
+; CHECK: Loop %loop: max backedge-taken count is -2
 define void @even_nsw_startx(i4 %n, i4 %x) {
 entry:
   %m = shl i4 %n, 1
diff --git a/test/Analysis/TypeBasedAliasAnalysis/PR17620.ll b/test/Analysis/TypeBasedAliasAnalysis/PR17620.ll
new file mode 100644
index 0000000..9051139
--- /dev/null
+++ b/test/Analysis/TypeBasedAliasAnalysis/PR17620.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -tbaa -gvn -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+%structA = type { %structB }
+%structB = type { i32*, %classT }
+%classT = type { %classO, %classJ*, i8 }
+%classO = type { i32 }
+%classJ = type { i8 }
+%classA = type { %classB }
+%classB = type { i8 }
+%classC = type { %classD, %structA }
+%classD = type { %structA* }
+
+; Function Attrs: ssp uwtable
+define %structA** @test(%classA* %this, i32** %p1) #0 align 2 {
+entry:
+; CHECK-LABEL: @test
+; CHECK: load i32** %p1, align 8, !tbaa
+; CHECK: load i32** getelementptr (%classC* null, i32 0, i32 1, i32 0, i32 0), align 8, !tbaa
+; CHECK: call void @callee
+  %0 = load i32** %p1, align 8, !tbaa !1
+  %1 = load i32** getelementptr (%classC* null, i32 0, i32 1, i32 0, i32 0), align 8, !tbaa !5
+  call void @callee(i32* %0, i32* %1)
+  unreachable
+}
+
+declare void @callee(i32*, i32*) #1
+
+attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"clang version 3.4"}
+!1 = metadata !{metadata !2, metadata !2, i64 0}
+!2 = metadata !{metadata !"any pointer", metadata !3, i64 0}
+!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
+!4 = metadata !{metadata !"Simple C/C++ TBAA"}
+!5 = metadata !{metadata !6, metadata !2, i64 8}
+!6 = metadata !{metadata !"_ZTSN12_GLOBAL__N_11RINS_1FIPi8TreeIterN1I1S1LENS_1KINS_1DIKS2_S3_EEEEE1GEPSD_EE", metadata !7, i64 8}
+!7 = metadata !{metadata !"_ZTSN12_GLOBAL__N_11FIPi8TreeIterN1I1S1LENS_1KINS_1DIKS1_S2_EEEEE1GE", metadata !8, i64 0}
+!8 = metadata !{metadata !"_ZTSN12_GLOBAL__N_11DIKPi8TreeIterEE", metadata !2, i64 0, metadata !9, i64 8}
+!9 = metadata !{metadata !"_ZTS8TreeIter", metadata !2, i64 8, metadata !10, i64 16}
+!10 = metadata !{metadata !"bool", metadata !3, i64 0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll b/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll
index d59e392..76a88c8 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll
@@ -47,16 +47,21 @@ define i8 @test1_no(i8* %a, i8* %b) nounwind {
 ; Root note.
 !0 = metadata !{ }
 ; Some type.
-!1 = metadata !{ metadata !"foo", metadata !0 }
+!1 = metadata !{metadata !7, metadata !7, i64 0}
 ; Some other non-aliasing type.
-!2 = metadata !{ metadata !"bar", metadata !0 }
+!2 = metadata !{metadata !8, metadata !8, i64 0}
 
 ; Some type.
-!3 = metadata !{ metadata !"foo", metadata !0 }
+!3 = metadata !{metadata !9, metadata !9, i64 0}
 ; Some type in a different type system.
-!4 = metadata !{ metadata !"bar", metadata !"different" }
+!4 = metadata !{metadata !10, metadata !10, i64 0}
 
 ; Invariant memory.
-!5 = metadata !{ metadata !"qux", metadata !0, i1 1 }
+!5 = metadata !{metadata !11, metadata !11, i64 0, i1 1}
 ; Not invariant memory.
-!6 = metadata !{ metadata !"qux", metadata !0, i1 0 }
+!6 = metadata !{metadata !11, metadata !11, i64 0, i1 0}
+!7 = metadata !{ metadata !"foo", metadata !0 }
+!8 = metadata !{ metadata !"bar", metadata !0 }
+!9 = metadata !{ metadata !"foo", metadata !0 }
+!10 = metadata !{ metadata !"bar", metadata !"different" }
+!11 = metadata !{ metadata !"qux", metadata !0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll b/test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll
index bb66e37..14bbeac 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll
@@ -33,5 +33,7 @@ define i32 @callercaller(i32* %Q) {
 }
 
 !0 = metadata !{metadata !"test"}
-!1 = metadata !{metadata !"green", metadata !0}
-!2 = metadata !{metadata !"blue", metadata !0}
+!1 = metadata !{metadata !3, metadata !3, i64 0}
+!2 = metadata !{metadata !4, metadata !4, i64 0}
+!3 = metadata !{metadata !"green", metadata !0}
+!4 = metadata !{metadata !"blue", metadata !0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/dse.ll b/test/Analysis/TypeBasedAliasAnalysis/dse.ll
index 6b44eb6..bcf1f2c 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/dse.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/dse.ll
@@ -51,16 +51,21 @@ define i8 @test1_no(i8* %a, i8* %b) nounwind {
 ; Root note.
 !0 = metadata !{ }
 ; Some type.
-!1 = metadata !{ metadata !"foo", metadata !0 }
+!1 = metadata !{metadata !7, metadata !7, i64 0}
 ; Some other non-aliasing type.
-!2 = metadata !{ metadata !"bar", metadata !0 }
+!2 = metadata !{metadata !8, metadata !8, i64 0}
 
 ; Some type.
-!3 = metadata !{ metadata !"foo", metadata !0 }
+!3 = metadata !{metadata !9, metadata !9, i64 0}
 ; Some type in a different type system.
-!4 = metadata !{ metadata !"bar", metadata !"different" }
+!4 = metadata !{metadata !10, metadata !10, i64 0}
 
 ; Invariant memory.
-!5 = metadata !{ metadata !"qux", metadata !0, i1 1 }
+!5 = metadata !{metadata !11, metadata !11, i64 0, i1 1}
 ; Not invariant memory.
-!6 = metadata !{ metadata !"qux", metadata !0, i1 0 }
+!6 = metadata !{metadata !11, metadata !11, i64 0, i1 0}
+!7 = metadata !{ metadata !"foo", metadata !0 }
+!8 = metadata !{ metadata !"bar", metadata !0 }
+!9 = metadata !{ metadata !"foo", metadata !0 }
+!10 = metadata !{ metadata !"bar", metadata !"different" }
+!11 = metadata !{ metadata !"qux", metadata !0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll b/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll
index 52e394b..4dc4073 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll
@@ -13,7 +13,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; CHECK: for.end:
 ; CHECK:   %arrayidx31 = getelementptr inbounds %union.vector_t* %t, i64 0, i32 0, i64 1
-; CHECK:   %tmp32 = load i64* %arrayidx31, align 8, !tbaa !3
+; CHECK:   %tmp32 = load i64* %arrayidx31, align 8, !tbaa [[TAG:!.*]]
 
 define void @vrlh(%union.vector_t* %va, %union.vector_t* %vb, %union.vector_t* %vd) nounwind {
 entry:
@@ -123,9 +123,15 @@ for.end:                                          ; preds = %for.body
   ret float %tmp10
 }
 
-!0 = metadata !{metadata !"short", metadata !1}
+; CHECK: [[TAG]] = metadata !{metadata [[TYPE_LL:!.*]], metadata [[TYPE_LL]], i64 0}
+; CHECK: [[TYPE_LL]] = metadata !{metadata !"long long", metadata {{!.*}}}
+!0 = metadata !{metadata !6, metadata !6, i64 0}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"long long", metadata !1}
-!4 = metadata !{metadata !"int", metadata !1}
-!5 = metadata !{metadata !"float", metadata !1}
+!3 = metadata !{metadata !7, metadata !7, i64 0}
+!4 = metadata !{metadata !8, metadata !8, i64 0}
+!5 = metadata !{metadata !9, metadata !9, i64 0}
+!6 = metadata !{metadata !"short", metadata !1}
+!7 = metadata !{metadata !"long long", metadata !1}
+!8 = metadata !{metadata !"int", metadata !1}
+!9 = metadata !{metadata !"float", metadata !1}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
index 0a30b30..e9fb941 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
@@ -80,6 +80,7 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) nounwind
 !0 = metadata !{ }
 
 ; Invariant memory.
-!1 = metadata !{ metadata !"foo", metadata !0, i1 1 }
+!1 = metadata !{metadata !3, metadata !3, i64 0, i1 1 }
 ; Not invariant memory.
-!2 = metadata !{ metadata !"foo", metadata !0, i1 0 }
+!2 = metadata !{metadata !3, metadata !3, i64 0, i1 0 }
+!3 = metadata !{ metadata !"foo", metadata !0 }
diff --git a/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll b/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
index eceaa2c..90e1abb 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
@@ -85,7 +85,11 @@ if.else:
 }
 
 !0 = metadata !{}
-!1 = metadata !{metadata !"red", metadata !0}
-!2 = metadata !{metadata !"blu", metadata !0}
-!3 = metadata !{metadata !"outer space"}
-!4 = metadata !{metadata !"brick red", metadata !1}
+!1 = metadata !{metadata !5, metadata !5, i64 0}
+!2 = metadata !{metadata !6, metadata !6, i64 0}
+!3 = metadata !{metadata !7, metadata !7, i64 0}
+!4 = metadata !{metadata !8, metadata !8, i64 0}
+!5 = metadata !{metadata !"red", metadata !0}
+!6 = metadata !{metadata !"blu", metadata !0}
+!7 = metadata !{metadata !"outer space"}
+!8 = metadata !{metadata !"brick red", metadata !5}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
index 6f1c22d..93b8e50 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
@@ -26,5 +26,7 @@ declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
 ; CHECK: attributes [[NUW]] = { nounwind }
 
 !0 = metadata !{metadata !"tbaa root", null}
-!1 = metadata !{metadata !"A", metadata !0}
-!2 = metadata !{metadata !"B", metadata !0}
+!1 = metadata !{metadata !3, metadata !3, i64 0}
+!2 = metadata !{metadata !4, metadata !4, i64 0}
+!3 = metadata !{metadata !"A", metadata !0}
+!4 = metadata !{metadata !"B", metadata !0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/licm.ll b/test/Analysis/TypeBasedAliasAnalysis/licm.ll
index 12a9c1d..e45fc85 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/licm.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/licm.ll
@@ -30,8 +30,8 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 !0 = metadata !{metadata !"root", null}
-!1 = metadata !{metadata !"pointer", metadata !0}
-!2 = metadata !{metadata !"double", metadata !0}
+!1 = metadata !{metadata !6, metadata !6, i64 0}
+!2 = metadata !{metadata !7, metadata !7, i64 0}
 
 ; LICM shouldn't hoist anything here.
 
@@ -56,6 +56,10 @@ loop:
   br label %loop
 }
 
-!3 = metadata !{metadata !"pointer", metadata !4}
-!4 = metadata !{metadata !"char", metadata !5}
-!5 = metadata !{metadata !"root", null}
+!3 = metadata !{metadata !"pointer", metadata !8}
+!4 = metadata !{metadata !8, metadata !8, i64 0}
+!5 = metadata !{metadata !9, metadata !9, i64 0}
+!6 = metadata !{metadata !"pointer", metadata !0}
+!7 = metadata !{metadata !"double", metadata !0}
+!8 = metadata !{metadata !"char", metadata !9}
+!9 = metadata !{metadata !"root", null}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/lit.local.cfg b/test/Analysis/TypeBasedAliasAnalysis/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Analysis/TypeBasedAliasAnalysis/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll b/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
index c2407df..6fd6eac 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
@@ -7,7 +7,7 @@ target datalayout = "e-p:64:64:64"
 
 ; CHECK: @foo
 ; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i32 1, i1 false), !tbaa !0
-; CHECK-NEXT: store i8 2, i8* %s, align 1, !tbaa !2
+; CHECK-NEXT: store i8 2, i8* %s, align 1, !tbaa [[TAGA:!.*]]
 ; CHECK-NEXT: ret void
 define void @foo(i8* nocapture %p, i8* nocapture %q, i8* nocapture %s) nounwind {
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i32 1, i1 false), !tbaa !2
@@ -18,6 +18,10 @@ define void @foo(i8* nocapture %p, i8* nocapture %q, i8* nocapture %s) nounwind
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
+; CHECK [[TAGA]] = metadata !{metadata [[TYPEA:!.*]], metadata [[TYPEA]], i64 0}
+; CHECK [[TYPEA]] = metadata !{metadata !"A", metadata !{{.*}}}
 !0 = metadata !{metadata !"tbaa root", null}
-!1 = metadata !{metadata !"A", metadata !0}
-!2 = metadata !{metadata !"B", metadata !0}
+!1 = metadata !{metadata !3, metadata !3, i64 0}
+!2 = metadata !{metadata !4, metadata !4, i64 0}
+!3 = metadata !{metadata !"A", metadata !0}
+!4 = metadata !{metadata !"B", metadata !0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll b/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll
index f1edb44..609e87c 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll
@@ -18,7 +18,7 @@
 
 ; Basic AA says MayAlias, TBAA says NoAlias
 ; CHECK: MayAlias: i64* %i5, i8** %p
-; CHECK: NoAlias: store i64 %conv, i64* %i5, align 8, !tbaa !4 <->   store i8* null, i8** %p, align 8, !tbaa !3
+; CHECK: NoAlias: store i64 %conv, i64* %i5, align 8, !tbaa !6 <->   store i8* null, i8** %p, align 8, !tbaa !9
 
 %struct.Foo = type { i64 }
 %struct.Bar = type { i8* }
@@ -32,10 +32,10 @@ entry:
   store i32 %n, i32* %n.addr, align 4, !tbaa !0
   %call = call noalias i8* @_Znwm(i64 8)
   %0 = bitcast i8* %call to %struct.Foo*
-  store %struct.Foo* %0, %struct.Foo** %f, align 8, !tbaa !3
-  %1 = load %struct.Foo** %f, align 8, !tbaa !3
+  store %struct.Foo* %0, %struct.Foo** %f, align 8, !tbaa !4
+  %1 = load %struct.Foo** %f, align 8, !tbaa !4
   %i = getelementptr inbounds %struct.Foo* %1, i32 0, i32 0
-  store i64 1, i64* %i, align 8, !tbaa !4
+  store i64 1, i64* %i, align 8, !tbaa !6
   store i32 0, i32* %i1, align 4, !tbaa !0
   br label %for.cond
 
@@ -46,7 +46,7 @@ for.cond:
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:
-  %4 = load %struct.Foo** %f, align 8, !tbaa !3
+  %4 = load %struct.Foo** %f, align 8, !tbaa !4
   %5 = bitcast %struct.Foo* %4 to i8*
   %new.isnull = icmp eq i8* %5, null
   br i1 %new.isnull, label %new.cont, label %new.notnull
@@ -57,11 +57,11 @@ new.notnull:
 
 new.cont:
   %7 = phi %struct.Bar* [ %6, %new.notnull ], [ null, %for.body ]
-  store %struct.Bar* %7, %struct.Bar** %b, align 8, !tbaa !3
-  %8 = load %struct.Bar** %b, align 8, !tbaa !3
+  store %struct.Bar* %7, %struct.Bar** %b, align 8, !tbaa !4
+  %8 = load %struct.Bar** %b, align 8, !tbaa !4
   %p = getelementptr inbounds %struct.Bar* %8, i32 0, i32 0
-  store i8* null, i8** %p, align 8, !tbaa !3
-  %9 = load %struct.Foo** %f, align 8, !tbaa !3
+  store i8* null, i8** %p, align 8, !tbaa !9
+  %9 = load %struct.Foo** %f, align 8, !tbaa !4
   %10 = bitcast %struct.Foo* %9 to i8*
   %new.isnull2 = icmp eq i8* %10, null
   br i1 %new.isnull2, label %new.cont4, label %new.notnull3
@@ -72,12 +72,12 @@ new.notnull3:
 
 new.cont4:
   %12 = phi %struct.Foo* [ %11, %new.notnull3 ], [ null, %new.cont ]
-  store %struct.Foo* %12, %struct.Foo** %f, align 8, !tbaa !3
+  store %struct.Foo* %12, %struct.Foo** %f, align 8, !tbaa !4
   %13 = load i32* %i1, align 4, !tbaa !0
   %conv = sext i32 %13 to i64
-  %14 = load %struct.Foo** %f, align 8, !tbaa !3
+  %14 = load %struct.Foo** %f, align 8, !tbaa !4
   %i5 = getelementptr inbounds %struct.Foo* %14, i32 0, i32 0
-  store i64 %conv, i64* %i5, align 8, !tbaa !4
+  store i64 %conv, i64* %i5, align 8, !tbaa !6
   br label %for.inc
 
 for.inc:
@@ -87,9 +87,9 @@ for.inc:
   br label %for.cond
 
 for.end:
-  %16 = load %struct.Foo** %f, align 8, !tbaa !3
+  %16 = load %struct.Foo** %f, align 8, !tbaa !4
   %i6 = getelementptr inbounds %struct.Foo* %16, i32 0, i32 0
-  %17 = load i64* %i6, align 8, !tbaa !4
+  %17 = load i64* %i6, align 8, !tbaa !6
   ret i64 %17
 }
 
@@ -97,8 +97,14 @@ declare noalias i8* @_Znwm(i64)
 
 attributes #0 = { nounwind }
 
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"any pointer", metadata !1}
-!4 = metadata !{metadata !"long", metadata !1}
+!0 = metadata !{metadata !1, metadata !1, i64 0}
+!1 = metadata !{metadata !"int", metadata !2, i64 0}
+!2 = metadata !{metadata !"omnipotent char", metadata !3, i64 0}
+!3 = metadata !{metadata !"Simple C/C++ TBAA"}
+!4 = metadata !{metadata !5, metadata !5, i64 0}
+!5 = metadata !{metadata !"any pointer", metadata !2, i64 0}
+!6 = metadata !{metadata !7, metadata !8, i64 0}
+!7 = metadata !{metadata !"_ZTS3Foo", metadata !8, i64 0}
+!8 = metadata !{metadata !"long", metadata !2, i64 0}
+!9 = metadata !{metadata !10, metadata !5, i64 0}
+!10 = metadata !{metadata !"_ZTS3Bar", metadata !5, i64 0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/precedence.ll b/test/Analysis/TypeBasedAliasAnalysis/precedence.ll
index 47cb5f2..b219ef1 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/precedence.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/precedence.ll
@@ -39,8 +39,12 @@ entry:
   ret i64 %tmp3
 }
 
-!0 = metadata !{metadata !"int", metadata !1}
+!0 = metadata !{metadata !2, metadata !2, i64 0}
 !1 = metadata !{metadata !"simple"}
-!3 = metadata !{metadata !"float", metadata !1}
-!4 = metadata !{metadata !"long", metadata !1}
-!5 = metadata !{metadata !"small", metadata !1}
+!2 = metadata !{metadata !"int", metadata !1}
+!3 = metadata !{metadata !6, metadata !6, i64 0}
+!4 = metadata !{metadata !7, metadata !7, i64 0}
+!5 = metadata !{metadata !8, metadata !8, i64 0}
+!6 = metadata !{metadata !"float", metadata !1}
+!7 = metadata !{metadata !"long", metadata !1}
+!8 = metadata !{metadata !"small", metadata !1}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/sink.ll b/test/Analysis/TypeBasedAliasAnalysis/sink.ll
index fd32d6a..726da6c 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/sink.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/sink.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -tbaa -sink -S < %s | FileCheck %s
 
 ; CHECK: a:
-; CHECK:   %f = load float* %p, !tbaa !2
+; CHECK:   %f = load float* %p, !tbaa [[TAGA:!.*]]
 ; CHECK:   store float %f, float* %q
 
 define void @foo(float* %p, i1 %c, float* %q, float* %r) {
@@ -15,6 +15,10 @@ b:
   ret void
 }
 
-!0 = metadata !{metadata !"A", metadata !2}
-!1 = metadata !{metadata !"B", metadata !2}
+; CHECK: [[TAGA]] = metadata !{metadata [[TYPEA:!.*]], metadata [[TYPEA]], i64 0}
+; CHECK: [[TYPEA]] = metadata !{metadata !"A", metadata !{{.*}}}
+!0 = metadata !{metadata !3, metadata !3, i64 0}
+!1 = metadata !{metadata !4, metadata !4, i64 0}
 !2 = metadata !{metadata !"test"}
+!3 = metadata !{metadata !"A", metadata !2}
+!4 = metadata !{metadata !"B", metadata !2}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll b/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll
index ee52763..0cd5c30 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -tbaa -basicaa -struct-path-tbaa -aa-eval -evaluate-tbaa -print-no-aliases -print-may-aliases -disable-output 2>&1 | FileCheck %s
-; RUN: opt < %s -tbaa -basicaa -struct-path-tbaa -gvn -S | FileCheck %s --check-prefix=OPT
+; RUN: opt < %s -tbaa -basicaa -aa-eval -evaluate-tbaa -print-no-aliases -print-may-aliases -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -tbaa -basicaa -gvn -S | FileCheck %s --check-prefix=OPT
 ; Generated from clang/test/CodeGen/tbaa.cpp with "-O1 -struct-path-tbaa -disable-llvm-optzns".
 
 %struct.StructA = type { i16, i32, i16, i32 }
diff --git a/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll b/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
index a0d77fa..17dd745 100644
--- a/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
+++ b/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
@@ -25,9 +25,9 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !7 = metadata !{metadata !1}
 !6 = metadata !{i32 786449, metadata !8, i32 12, metadata !"clang version 3.0 (trunk 131941)", i1 true, metadata !"", i32 0, metadata !9, metadata !9, metadata !7, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !0 = metadata !{i32 786688, metadata !1, metadata !"c", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!1 = metadata !{i32 786478, metadata !8, metadata !2, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 ()* @main, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !8, metadata !2, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !8} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !8, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !8, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, null, metadata !6, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !8 = metadata !{metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b"}
diff --git a/test/Assembler/ConstantExprFoldCast.ll b/test/Assembler/ConstantExprFoldCast.ll
index 0ce6e84..161a4ca 100644
--- a/test/Assembler/ConstantExprFoldCast.ll
+++ b/test/Assembler/ConstantExprFoldCast.ll
@@ -12,3 +12,5 @@
 @F = global i32* inttoptr (i32 add (i32 5, i32 -5) to i32*)
 @G = global i32* inttoptr (i32 sub (i32 5, i32 5) to i32*)
 
+; Address space cast AS0 null-> AS1 null
+@H = global i32 addrspace(1)* addrspacecast(i32* null to i32 addrspace(1)*)
diff --git a/test/Assembler/ConstantExprNoFold.ll b/test/Assembler/ConstantExprNoFold.ll
index 83e8909..b41959f 100644
--- a/test/Assembler/ConstantExprNoFold.ll
+++ b/test/Assembler/ConstantExprNoFold.ll
@@ -21,3 +21,6 @@ target datalayout = "p:32:32"
 
 ; CHECK: @D = global i1 icmp eq (i64* getelementptr inbounds (i64* @A, i64 1), i64* getelementptr inbounds (i64* @B, i64 2))
 @D = global i1 icmp eq (i64* getelementptr inbounds (i64* @A, i64 1), i64* getelementptr inbounds (i64* @B, i64 2))
+
+; CHECK: @E = global i64 addrspace(1)* addrspacecast (i64* @A to i64 addrspace(1)*)
+@E = global i64 addrspace(1)* addrspacecast(i64* @A to i64 addrspace(1)*)
diff --git a/test/Assembler/attribute-builtin.ll b/test/Assembler/attribute-builtin.ll
index a7799f0..01c8a6b 100644
--- a/test/Assembler/attribute-builtin.ll
+++ b/test/Assembler/attribute-builtin.ll
@@ -8,7 +8,7 @@
 ; RUN: llvm-dis | \
 ; RUN: llvm-as -disable-verify | \
 ; RUN: llvm-dis | \
-; RUN: FileCheck -check-prefix=ASSEMBLES %s
+; RUN: FileCheck -check-prefix=CHECK-ASSEMBLES %s
 
 ; CHECK-ASSEMBLES: declare i8* @foo(i8*) [[NOBUILTIN:#[0-9]+]]
 ; CHECK-ASSEMBLES: call i8* @foo(i8* %x) [[BUILTIN:#[0-9]+]]
@@ -26,10 +26,8 @@ define i8* @bar(i8* %x) {
 ; which do not have nobuiltin on them.
 ; rdar://13727199
 
-; RUN: not llvm-as <%s 2>&1  | FileCheck -check-prefix=BAD %s
+; RUN: not llvm-as <%s 2>&1  | FileCheck -check-prefix=CHECK-BAD %s
 
-; CHECK-BAD: Attribute 'builtin' can only be used in a call to a function with the 'nobuiltin' attribute.
-; CHECK-BAD-NEXT: %y = call i8* @lar(i8* %x) #1
 ; CHECK-BAD: Attribute 'builtin' can only be applied to a callsite.
 ; CHECK-BAD-NEXT: i8* (i8*)* @car
 ; CHECK-BAD: Attribute 'builtin' can only be applied to a callsite.
diff --git a/test/Assembler/auto_upgrade_intrinsics.ll b/test/Assembler/auto_upgrade_intrinsics.ll
index 7ad5cc3..8f655ce 100644
--- a/test/Assembler/auto_upgrade_intrinsics.ll
+++ b/test/Assembler/auto_upgrade_intrinsics.ll
@@ -6,6 +6,10 @@ declare i16 @llvm.ctlz.i16(i16)
 declare i32 @llvm.ctlz.i32(i32)
 declare i42 @llvm.ctlz.i42(i42)  ; Not a power-of-2
 
+
+declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
+
+
 define void @test.ctlz(i8 %a, i16 %b, i32 %c, i42 %d) {
 ; CHECK: @test.ctlz
 
@@ -42,3 +46,14 @@ entry:
 
   ret void
 }
+
+
+@a = private global [60 x i8] zeroinitializer, align 1
+
+define i32 @test.objectsize() {
+; CHECK-LABEL: @test.objectsize(
+; CHECK: @llvm.objectsize.i32.p0i8
+; CHECK-DAG: declare i32 @llvm.objectsize.i32.p0i8
+  %s = call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false)
+  ret i32 %s
+}
diff --git a/test/Assembler/functionlocal-metadata.ll b/test/Assembler/functionlocal-metadata.ll
index 0f0ab4c..0d93bfd 100644
--- a/test/Assembler/functionlocal-metadata.ll
+++ b/test/Assembler/functionlocal-metadata.ll
@@ -3,7 +3,7 @@
 define void @Foo(i32 %a, i32 %b) {
 entry:
   call void @llvm.dbg.value(metadata !{ i32* %1 }, i64 16, metadata !2)
-; CHECK: call void @llvm.dbg.value(metadata !{i32* %1}, i64 16, metadata !2)
+; CHECK: call void @llvm.dbg.value(metadata !{i32* %1}, i64 16, metadata ![[ID2:[0-9]+]])
   %0 = add i32 %a, 1                              ; <i32> [#uses=1]
   %two = add i32 %b, %0                           ; <i32> [#uses=0]
   %1 = alloca i32                                 ; <i32*> [#uses=1]
@@ -19,28 +19,38 @@ entry:
   call void @llvm.dbg.declare(metadata !{i32 %a}, metadata !{i32 %a, metadata !"foo"})
 ; CHECK: metadata !{i32 %a}, metadata !{i32 %a, metadata !"foo"}
   call void @llvm.dbg.declare(metadata !{i32 %b}, metadata !{metadata !0, i32 %two})
-; CHECK: metadata !{i32 %b}, metadata !{metadata !0, i32 %two}
+; CHECK: metadata !{i32 %b}, metadata !{metadata ![[ID0:[0-9]+]], i32 %two}
 
   call void @llvm.dbg.value(metadata !{ i32 %a }, i64 0, metadata !1)
-; CHECK: metadata !{i32 %a}, i64 0, metadata !1
+; CHECK: metadata !{i32 %a}, i64 0, metadata ![[ID1:[0-9]+]]
   call void @llvm.dbg.value(metadata !{ i32 %0 }, i64 25, metadata !0)
-; CHECK: metadata !{i32 %0}, i64 25, metadata !0
+; CHECK: metadata !{i32 %0}, i64 25, metadata ![[ID0]]
   call void @llvm.dbg.value(metadata !{ i32* %1 }, i64 16, metadata !3)
-; CHECK: call void @llvm.dbg.value(metadata !{i32* %1}, i64 16, metadata !3)
+; CHECK: call void @llvm.dbg.value(metadata !{i32* %1}, i64 16, metadata ![[ID3:[0-9]+]])
   call void @llvm.dbg.value(metadata !3, i64 12, metadata !2)
-; CHECK: metadata !3, i64 12, metadata !2
+; CHECK: metadata ![[ID3]], i64 12, metadata ![[ID2]]
 
   ret void, !foo !0, !bar !1
-; CHECK: ret void, !foo !0, !bar !1
+; CHECK: ret void, !foo ![[FOO:[0-9]+]], !bar ![[BAR:[0-9]+]]
 }
 
+!llvm.module.flags = !{!4}
+
 !0 = metadata !{i32 662302, i32 26, metadata !1, null}
 !1 = metadata !{i32 4, metadata !"foo"}
 !2 = metadata !{metadata !"bar"}
 !3 = metadata !{metadata !"foo"}
+!4 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !foo = !{ !0 }
 !bar = !{ !1 }
+
+; CHECK: !foo = !{![[FOO]]}
+; CHECK: !bar = !{![[BAR]]}
+; CHECK: ![[ID0]] = metadata !{i32 662302, i32 26, metadata ![[ID1]], null}
+; CHECK: ![[ID1]] = metadata !{i32 4, metadata !"foo"}
+; CHECK: ![[ID2]] = metadata !{metadata !"bar"}
+; CHECK; ![[ID3]] = metadata !{metadata !"foo"}
diff --git a/test/Assembler/lit.local.cfg b/test/Assembler/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Assembler/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Bindings/Ocaml/analysis.ml b/test/Bindings/Ocaml/analysis.ml
index 7df8e21..c02645c 100644
--- a/test/Bindings/Ocaml/analysis.ml
+++ b/test/Bindings/Ocaml/analysis.ml
@@ -1,4 +1,7 @@
-(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa %s -o %t
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa %t.builddir/analysis.ml -o %t
  * RUN: %t
  * XFAIL: vg_leak
  *)
diff --git a/test/Bindings/Ocaml/bitreader.ml b/test/Bindings/Ocaml/bitreader.ml
index e5beccd..f1d202a 100644
--- a/test/Bindings/Ocaml/bitreader.ml
+++ b/test/Bindings/Ocaml/bitreader.ml
@@ -1,4 +1,7 @@
-(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitreader.cmxa llvm_bitwriter.cmxa %s -o %t
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitreader.cmxa llvm_bitwriter.cmxa %t.builddir/bitreader.ml -o %t
  * RUN: %t %t.bc
  * RUN: llvm-dis < %t.bc
  * XFAIL: vg_leak
diff --git a/test/Bindings/Ocaml/bitwriter.ml b/test/Bindings/Ocaml/bitwriter.ml
index 1388760..ae456cf 100644
--- a/test/Bindings/Ocaml/bitwriter.ml
+++ b/test/Bindings/Ocaml/bitwriter.ml
@@ -1,4 +1,7 @@
-(* RUN: %ocamlopt -warn-error A unix.cmxa llvm.cmxa llvm_bitwriter.cmxa %s -o %t
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -warn-error A unix.cmxa llvm.cmxa llvm_bitwriter.cmxa %t.builddir/bitwriter.ml -o %t
  * RUN: %t %t.bc
  * RUN: llvm-dis < %t.bc
  * XFAIL: vg_leak
diff --git a/test/Bindings/Ocaml/executionengine.ml b/test/Bindings/Ocaml/executionengine.ml
index f7a49bb..8e24949 100644
--- a/test/Bindings/Ocaml/executionengine.ml
+++ b/test/Bindings/Ocaml/executionengine.ml
@@ -1,4 +1,7 @@
-(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_target.cmxa llvm_executionengine.cmxa %s -o %t
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_target.cmxa llvm_executionengine.cmxa %t.builddir/executionengine.ml -o %t
  * RUN: %t
  * XFAIL: vg_leak
  *)
@@ -100,11 +103,11 @@ let test_executionengine () =
   (* run_static_dtors *)
   ExecutionEngine.run_static_dtors ee;
 
-  (* Show that the target data binding links and runs.*)
-  let td = ExecutionEngine.target_data ee in
+  (* Show that the data layout binding links and runs.*)
+  let dl = ExecutionEngine.data_layout ee in
 
   (* Demonstrate that a garbage pointer wasn't returned. *)
-  let ty = intptr_type td in
+  let ty = DataLayout.intptr_type context dl in
   if ty != i32_type && ty != i64_type then bomb "target_data did not work";
   
   (* dispose *)
diff --git a/test/Bindings/Ocaml/ext_exc.ml b/test/Bindings/Ocaml/ext_exc.ml
index b4d2e6d..9afc3c3 100644
--- a/test/Bindings/Ocaml/ext_exc.ml
+++ b/test/Bindings/Ocaml/ext_exc.ml
@@ -1,4 +1,7 @@
-(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitreader.cmxa llvm_executionengine.cmxa %s -o %t
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitreader.cmxa llvm_executionengine.cmxa %t.builddir/ext_exc.ml -o %t
  * RUN: %t </dev/null
  * XFAIL: vg_leak
  *)
diff --git a/test/Bindings/Ocaml/ipo_opts.ml b/test/Bindings/Ocaml/ipo_opts.ml
index d4537e4..e0bcbe5 100644
--- a/test/Bindings/Ocaml/ipo_opts.ml
+++ b/test/Bindings/Ocaml/ipo_opts.ml
@@ -1,4 +1,7 @@
-(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_ipo.cmxa llvm_target.cmxa %s -o %t
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_ipo.cmxa llvm_target.cmxa %t.builddir/ipo_opts.ml -o %t
  * RUN: %t %t.bc
  * XFAIL: vg_leak
  *)
@@ -43,15 +46,13 @@ let test_transforms () =
       ignore (build_ret (build_call fn [| |] "" b) b);
   end;
 
-  let td = DataLayout.create (target_triple m) in
-  
   ignore (PassManager.create ()
-           ++ DataLayout.add td
            ++ add_argument_promotion
            ++ add_constant_merge
            ++ add_dead_arg_elimination
            ++ add_function_attrs
            ++ add_function_inlining
+           ++ add_always_inliner
            ++ add_global_dce
            ++ add_global_optimizer
            ++ add_ipc_propagation
@@ -61,9 +62,7 @@ let test_transforms () =
            ++ add_strip_dead_prototypes
            ++ add_strip_symbols
            ++ PassManager.run_module m
-           ++ PassManager.dispose);
-
-  DataLayout.dispose td
+           ++ PassManager.dispose)
 
 
 (*===-- Driver ------------------------------------------------------------===*)
diff --git a/test/Bindings/Ocaml/irreader.ml b/test/Bindings/Ocaml/irreader.ml
new file mode 100644
index 0000000..3511c2b
--- /dev/null
+++ b/test/Bindings/Ocaml/irreader.ml
@@ -0,0 +1,59 @@
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -g -warn-error A llvm.cmxa llvm_irreader.cmxa %t.builddir/irreader.ml -o %t
+ * RUN: %t
+ * XFAIL: vg_leak
+ *)
+
+(* Note: It takes several seconds for ocamlopt to link an executable with
+         libLLVMCore.a, so it's better to write a big test than a bunch of
+         little ones. *)
+
+open Llvm
+open Llvm_irreader
+
+let context = global_context ()
+
+(* Tiny unit test framework - really just to help find which line is busted *)
+let print_checkpoints = false
+
+let suite name f =
+  if print_checkpoints then
+    prerr_endline (name ^ ":");
+  f ()
+
+let _ =
+  Printexc.record_backtrace true
+
+let insist cond =
+  if not cond then failwith "insist"
+
+
+(*===-- IR Reader ---------------------------------------------------------===*)
+
+let test_irreader () =
+  begin
+    let buf = MemoryBuffer.of_string "@foo = global i32 42" in
+    let m   = parse_ir context buf in
+    match lookup_global "foo" m with
+    | Some foo ->
+        insist ((global_initializer foo) = (const_int (i32_type context) 42))
+    | None ->
+        failwith "global"
+  end;
+
+  begin
+    let buf = MemoryBuffer.of_string "@foo = global garble" in
+    try
+      ignore (parse_ir context buf);
+      failwith "parsed"
+    with Llvm_irreader.Error _ ->
+      ()
+  end
+
+
+(*===-- Driver ------------------------------------------------------------===*)
+
+let _ =
+  suite "irreader" test_irreader
diff --git a/test/Bindings/Ocaml/linker.ml b/test/Bindings/Ocaml/linker.ml
new file mode 100644
index 0000000..9359ae9
--- /dev/null
+++ b/test/Bindings/Ocaml/linker.ml
@@ -0,0 +1,63 @@
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_linker.cmxa %t.builddir/linker.ml -o %t
+ * RUN: %t
+ * XFAIL: vg_leak
+ *)
+
+(* Note: It takes several seconds for ocamlopt to link an executable with
+         libLLVMCore.a, so it's better to write a big test than a bunch of
+         little ones. *)
+
+open Llvm
+open Llvm_linker
+
+let context = global_context ()
+let void_type = Llvm.void_type context
+
+(* Tiny unit test framework - really just to help find which line is busted *)
+let print_checkpoints = false
+
+let suite name f =
+  if print_checkpoints then
+    prerr_endline (name ^ ":");
+  f ()
+
+
+(*===-- Linker -----------------------------------------------------------===*)
+
+let test_linker () =
+  let fty = function_type void_type [| |] in
+
+  let make_module name =
+    let m = create_module context name in
+    let fn = define_function ("fn_" ^ name) fty m in
+    ignore (build_ret_void (builder_at_end context (entry_block fn)));
+    m
+  in
+
+  let m1 = make_module "one"
+  and m2 = make_module "two" in
+  link_modules m1 m2 Mode.PreserveSource;
+  dispose_module m1;
+  dispose_module m2;
+
+  let m1 = make_module "one"
+  and m2 = make_module "two" in
+  link_modules m1 m2 Mode.DestroySource;
+  dispose_module m1;
+
+  let m1 = make_module "one"
+  and m2 = make_module "one" in
+  try
+    link_modules m1 m2 Mode.PreserveSource;
+    failwith "must raise"
+  with Error _ ->
+    dispose_module m1;
+    dispose_module m2
+
+(*===-- Driver ------------------------------------------------------------===*)
+
+let _ =
+  suite "linker" test_linker
diff --git a/test/Bindings/Ocaml/lit.local.cfg b/test/Bindings/Ocaml/lit.local.cfg
index 640c58d..c38d89a 100644
--- a/test/Bindings/Ocaml/lit.local.cfg
+++ b/test/Bindings/Ocaml/lit.local.cfg
@@ -1,6 +1,5 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.ml']
+config.suffixes = ['.ml']
 
 bindings = set([s.strip() for s in config.root.llvm_bindings.split(',')])
 if not 'ocaml' in bindings:
     config.unsupported = True
-
diff --git a/test/Bindings/Ocaml/passmgr_builder.ml b/test/Bindings/Ocaml/passmgr_builder.ml
new file mode 100644
index 0000000..1a3102f
--- /dev/null
+++ b/test/Bindings/Ocaml/passmgr_builder.ml
@@ -0,0 +1,64 @@
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_passmgr_builder.cmxa %t.builddir/passmgr_builder.ml -o %t
+ * RUN: %t %t.bc
+ * XFAIL: vg_leak
+ *)
+
+(* Note: It takes several seconds for ocamlopt to link an executable with
+         libLLVMCore.a, so it's better to write a big test than a bunch of
+         little ones. *)
+
+open Llvm
+open Llvm_passmgr_builder
+
+let context = global_context ()
+let void_type = Llvm.void_type context
+
+(* Tiny unit test framework - really just to help find which line is busted *)
+let print_checkpoints = false
+
+let suite name f =
+  if print_checkpoints then
+    prerr_endline (name ^ ":");
+  f ()
+
+
+(*===-- Fixture -----------------------------------------------------------===*)
+
+let filename = Sys.argv.(1)
+let m = create_module context filename
+
+
+(*===-- Pass Manager Builder ----------------------------------------------===*)
+
+let test_pmbuilder () =
+  let (++) x f = ignore (f x); x in
+
+  let module_passmgr = PassManager.create () in
+  let func_passmgr   = PassManager.create_function m in
+  let lto_passmgr    = PassManager.create () in
+
+  ignore (Llvm_passmgr_builder.create ()
+           ++ set_opt_level 3
+           ++ set_size_level 1
+           ++ set_disable_unit_at_a_time false
+           ++ set_disable_unroll_loops false
+           ++ use_inliner_with_threshold 10
+           ++ populate_function_pass_manager func_passmgr
+           ++ populate_module_pass_manager module_passmgr
+           ++ populate_lto_pass_manager lto_passmgr
+                  ~internalize:false ~run_inliner:false);
+  Gc.compact ();
+
+  PassManager.dispose module_passmgr;
+  PassManager.dispose func_passmgr;
+  PassManager.dispose lto_passmgr
+
+
+(*===-- Driver ------------------------------------------------------------===*)
+
+let _ =
+  suite "pass manager builder" test_pmbuilder;
+  dispose_module m
diff --git a/test/Bindings/Ocaml/scalar_opts.ml b/test/Bindings/Ocaml/scalar_opts.ml
index 0760dad..39913e4 100644
--- a/test/Bindings/Ocaml/scalar_opts.ml
+++ b/test/Bindings/Ocaml/scalar_opts.ml
@@ -1,4 +1,7 @@
-(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_scalar_opts.cmxa llvm_target.cmxa %s -o %t
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_scalar_opts.cmxa llvm_target.cmxa %t.builddir/scalar_opts.ml -o %t
  * RUN: %t %t.bc
  * XFAIL: vg_leak
  *)
@@ -38,10 +41,7 @@ let test_transforms () =
   let fn = define_function "fn" fty m in
   ignore (build_ret_void (builder_at_end context (entry_block fn)));
   
-  let td = DataLayout.create (target_triple m) in
-  
   ignore (PassManager.create_function m
-           ++ DataLayout.add td
            ++ add_verifier
            ++ add_constant_propagation
            ++ add_sccp
@@ -72,13 +72,12 @@ let test_transforms () =
            ++ add_lower_expect_intrinsic
            ++ add_type_based_alias_analysis
            ++ add_basic_alias_analysis
+           ++ add_partially_inline_lib_calls
            ++ add_verifier
            ++ PassManager.initialize
            ++ PassManager.run_function fn
            ++ PassManager.finalize
-           ++ PassManager.dispose);
-  
-  DataLayout.dispose td
+           ++ PassManager.dispose)
 
 
 (*===-- Driver ------------------------------------------------------------===*)
diff --git a/test/Bindings/Ocaml/target.ml b/test/Bindings/Ocaml/target.ml
index 7a35a79..d69fb0e 100644
--- a/test/Bindings/Ocaml/target.ml
+++ b/test/Bindings/Ocaml/target.ml
@@ -1,5 +1,9 @@
-(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_target.cmxa %s -o %t
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -g -warn-error A llvm.cmxa llvm_target.cmxa llvm_executionengine.cmxa %t.builddir/target.ml -o %t
  * RUN: %t %t.bc
+ * REQUIRES: native, object-emission
  * XFAIL: vg_leak
  *)
 
@@ -10,6 +14,7 @@
 open Llvm
 open Llvm_target
 
+let _ = Llvm_executionengine.initialize_native_target ()
 
 let context = global_context ()
 let i32_type = Llvm.i32_type context
@@ -18,10 +23,11 @@ let i64_type = Llvm.i64_type context
 (* Tiny unit test framework - really just to help find which line is busted *)
 let print_checkpoints = false
 
-let suite name f =
-  if print_checkpoints then
-    prerr_endline (name ^ ":");
-  f ()
+let _ =
+  Printexc.record_backtrace true
+
+let assert_equal a b =
+  if a <> b then failwith "assert_equal"
 
 
 (*===-- Fixture -----------------------------------------------------------===*)
@@ -29,31 +35,83 @@ let suite name f =
 let filename = Sys.argv.(1)
 let m = create_module context filename
 
+let target = Target.by_triple (Target.default_triple ())
 
-(*===-- Target Data -------------------------------------------------------===*)
+let machine = TargetMachine.create (Target.default_triple ()) target
+
+(*===-- Data Layout -------------------------------------------------------===*)
 
 let test_target_data () =
-  let td = DataLayout.create (target_triple m) in
-  let sty = struct_type context [| i32_type; i64_type |] in
-  
-  ignore (DataLayout.as_string td);
-  ignore (byte_order td);
-  ignore (pointer_size td);
-  ignore (intptr_type td);
-  ignore (size_in_bits td sty);
-  ignore (store_size td sty);
-  ignore (abi_size td sty);
-  ignore (stack_align td sty);
-  ignore (preferred_align td sty);
-  ignore (preferred_align_of_global td (declare_global sty "g" m));
-  ignore (element_at_offset td sty (Int64.of_int 1));
-  ignore (offset_of_element td sty 1);
+  let module DL = DataLayout in
+  let layout = "e-p:32:32:32-S32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-" ^
+               "f16:16:16-f32:32:32-f64:32:64-f128:128:128-v64:32:64-v128:32:128-" ^
+               "a0:0:64-n32" in
+  let dl     = DL.of_string layout in
+  let sty    = struct_type context [| i32_type; i64_type |] in
   
-  DataLayout.dispose td
+  assert_equal (DL.as_string dl) layout;
+  assert_equal (DL.byte_order dl) Endian.Little;
+  assert_equal (DL.pointer_size dl) 4;
+  assert_equal (DL.intptr_type context dl) i32_type;
+  assert_equal (DL.qualified_pointer_size 0 dl) 4;
+  assert_equal (DL.qualified_intptr_type context 0 dl) i32_type;
+  assert_equal (DL.size_in_bits sty dl) (Int64.of_int 96);
+  assert_equal (DL.store_size sty dl) (Int64.of_int 12);
+  assert_equal (DL.abi_size sty dl) (Int64.of_int 12);
+  assert_equal (DL.stack_align sty dl) 4;
+  assert_equal (DL.preferred_align sty dl) 8;
+  assert_equal (DL.preferred_align_of_global (declare_global sty "g" m) dl) 8;
+  assert_equal (DL.element_at_offset sty (Int64.of_int 1) dl) 0;
+  assert_equal (DL.offset_of_element sty 1 dl) (Int64.of_int 4);
+
+  let pm = PassManager.create () in
+  ignore (DL.add_to_pass_manager pm dl)
+
+
+(*===-- Target ------------------------------------------------------------===*)
+
+let test_target () =
+  let module T = Target in
+  ignore (T.succ target);
+  ignore (T.name target);
+  ignore (T.description target);
+  ignore (T.has_jit target);
+  ignore (T.has_target_machine target);
+  ignore (T.has_asm_backend target)
+
+
+(*===-- Target Machine ----------------------------------------------------===*)
+
+let test_target_machine () =
+  let module TM = TargetMachine in
+  assert_equal (TM.target machine) target;
+  assert_equal (TM.triple machine) (Target.default_triple ());
+  assert_equal (TM.cpu machine) "";
+  assert_equal (TM.features machine) "";
+  ignore (TM.data_layout machine)
+
+
+(*===-- Code Emission -----------------------------------------------------===*)
+
+let test_code_emission () =
+  TargetMachine.emit_to_file m CodeGenFileType.ObjectFile filename machine;
+  try
+    TargetMachine.emit_to_file m CodeGenFileType.ObjectFile
+                               "/nonexistent/file" machine;
+    failwith "must raise"
+  with Llvm_target.Error _ ->
+    ();
+
+  let buf = TargetMachine.emit_to_memory_buffer m CodeGenFileType.ObjectFile
+                                                machine in
+  Llvm.MemoryBuffer.dispose buf
 
 
 (*===-- Driver ------------------------------------------------------------===*)
 
 let _ =
-  suite "target data" test_target_data;
+  test_target_data ();
+  test_target ();
+  test_target_machine ();
+  (* test_code_emission (); *) (* broken without AsmParser support *)
   dispose_module m
diff --git a/test/Bindings/Ocaml/vectorize_opts.ml b/test/Bindings/Ocaml/vectorize_opts.ml
new file mode 100644
index 0000000..5ef985d
--- /dev/null
+++ b/test/Bindings/Ocaml/vectorize_opts.ml
@@ -0,0 +1,56 @@
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_vectorize.cmxa llvm_target.cmxa %t.builddir/vectorize_opts.ml -o %t
+ * RUN: %t %t.bc
+ * XFAIL: vg_leak
+ *)
+
+(* Note: It takes several seconds for ocamlopt to link an executable with
+         libLLVMCore.a, so it's better to write a big test than a bunch of
+         little ones. *)
+
+open Llvm
+open Llvm_vectorize
+open Llvm_target
+
+let context = global_context ()
+let void_type = Llvm.void_type context
+
+(* Tiny unit test framework - really just to help find which line is busted *)
+let print_checkpoints = false
+
+let suite name f =
+  if print_checkpoints then
+    prerr_endline (name ^ ":");
+  f ()
+
+
+(*===-- Fixture -----------------------------------------------------------===*)
+
+let filename = Sys.argv.(1)
+let m = create_module context filename
+
+
+(*===-- Transforms --------------------------------------------------------===*)
+
+let test_transforms () =
+  let (++) x f = ignore (f x); x in
+
+  let fty = function_type void_type [| |] in
+  let fn = define_function "fn" fty m in
+  ignore (build_ret_void (builder_at_end context (entry_block fn)));
+
+  ignore (PassManager.create ()
+           ++ add_bb_vectorize
+           ++ add_loop_vectorize
+           ++ add_slp_vectorize
+           ++ PassManager.run_module m
+           ++ PassManager.dispose)
+
+
+(*===-- Driver ------------------------------------------------------------===*)
+
+let _ =
+  suite "transforms" test_transforms;
+  dispose_module m
diff --git a/test/Bindings/Ocaml/vmcore.ml b/test/Bindings/Ocaml/vmcore.ml
index ccde1f0..167efce 100644
--- a/test/Bindings/Ocaml/vmcore.ml
+++ b/test/Bindings/Ocaml/vmcore.ml
@@ -1,4 +1,7 @@
-(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa llvm_bitwriter.cmxa %s -o %t
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa llvm_bitwriter.cmxa %t.builddir/vmcore.ml -o %t
  * RUN: %t %t.bc
  * RUN: llvm-dis < %t.bc > %t.ll
  * RUN: FileCheck %s < %t.ll
@@ -64,6 +67,14 @@ let filename = Sys.argv.(1)
 let m = create_module context filename
 
 
+(*===-- Conversion --------------------------------------------------------===*)
+
+let test_conversion () =
+  insist ("i32" = (string_of_lltype i32_type));
+  let c = const_int i32_type 42 in
+  insist ("i32 42" = (string_of_llvalue c))
+
+
 (*===-- Target ------------------------------------------------------------===*)
 
 let test_target () =
@@ -283,6 +294,7 @@ let test_constants () =
    * CHECK: const_ptrtoint{{.*}}ptrtoint
    * CHECK: const_inttoptr{{.*}}inttoptr
    * CHECK: const_bitcast{{.*}}bitcast
+   * CHECK: const_intcast{{.*}}zext
    *)
   let i128_type = integer_type context 128 in
   ignore (define_global "const_trunc" (const_trunc (const_add foldbomb five)
@@ -302,6 +314,8 @@ let test_constants () =
   ignore (define_global "const_inttoptr" (const_inttoptr (const_add foldbomb five)
                                                   void_ptr) m);
   ignore (define_global "const_bitcast" (const_bitcast ffoldbomb i64_type) m);
+  ignore (define_global "const_intcast"
+          (const_intcast foldbomb i128_type ~is_signed:false) m);
   
   group "misc constants";
   (* CHECK: const_size_of{{.*}}getelementptr{{.*}}null
@@ -402,7 +416,8 @@ let test_global_variables () =
   let fourty_two32 = const_int i32_type 42 in
 
   group "declarations"; begin
-    (* CHECK: GVar01{{.*}}external
+    (* CHECK: @GVar01 = external global i32
+     * CHECK: @QGVar01 = external addrspace(3) global i32
      *)
     insist (None == lookup_global "GVar01" m);
     let g = declare_global i32_type "GVar01" m in
@@ -424,8 +439,10 @@ let test_global_variables () =
   end;
   
   group "definitions"; begin
-    (* CHECK: GVar02{{.*}}42
-     * CHECK: GVar03{{.*}}42
+    (* CHECK: @GVar02 = global i32 42
+     * CHECK: @GVar03 = global i32 42
+     * CHECK: @QGVar02 = addrspace(3) global i32 42
+     * CHECK: @QGVar03 = addrspace(3) global i32 42
      *)
     let g = define_global "GVar02" fourty_two32 m in
     let g2 = declare_global i32_type "GVar03" m ++
@@ -449,10 +466,24 @@ let test_global_variables () =
           set_thread_local true in
   insist (is_thread_local g);
 
-  (* CHECK-NOWHERE-NOT: GVar05
+  (* CHECK: GVar05{{.*}}thread_local(initialexec)
+   *)
+  group "threadlocal_mode";
+  let g = define_global "GVar05" fourty_two32 m ++
+          set_thread_local_mode ThreadLocalMode.InitialExec in
+  insist ((thread_local_mode g) = ThreadLocalMode.InitialExec);
+
+  (* CHECK: GVar06{{.*}}externally_initialized
+   *)
+  group "externally_initialized";
+  let g = define_global "GVar06" fourty_two32 m ++
+          set_externally_initialized true in
+  insist (is_externally_initialized g);
+
+  (* CHECK-NOWHERE-NOT: GVar07
    *)
   group "delete";
-  let g = define_global "GVar05" fourty_two32 m in
+  let g = define_global "GVar07" fourty_two32 m in
   delete_global g;
 
   (* CHECK: ConstGlobalVar{{.*}}constant
@@ -1000,8 +1031,8 @@ let test_builder () =
   end;
 
   group "metadata"; begin
-    (* CHECK: %metadata = add i32 %P1, %P2, !test !0
-     * !0 is metadata emitted at EOF.
+    (* CHECK: %metadata = add i32 %P1, %P2, !test !1
+     * !1 is metadata emitted at EOF.
      *)
     let i = build_add p1 p2 "metadata" atentry in
     insist ((has_metadata i) = false);
@@ -1024,9 +1055,19 @@ let test_builder () =
     set_metadata i kind md
   end;
 
+  group "named metadata"; begin
+    (* !llvm.module.flags is emitted at EOF. *)
+    let n1 = const_int i32_type 1 in
+    let n2 = mdstring context "Debug Info Version" in
+    let md = mdnode context [| n1; n2; n1 |] in
+    add_named_metadata_operand m "llvm.module.flags" md;
+
+    insist ((get_named_metadata m "llvm.module.flags") = [| md |])
+  end;
+
   group "dbg"; begin
-    (* CHECK: %dbg = add i32 %P1, %P2, !dbg !1
-     * !1 is metadata emitted at EOF.
+    (* CHECK: %dbg = add i32 %P1, %P2, !dbg !2
+     * !2 is metadata emitted at EOF.
      *)
     insist ((current_debug_location atentry) = None);
 
@@ -1234,16 +1275,27 @@ let test_builder () =
 
     (* CHECK: %build_alloca = alloca i32
      * CHECK: %build_array_alloca = alloca i32, i32 %P2
-     * CHECK: %build_load = load i32* %build_array_alloca
-     * CHECK: store i32 %P2, i32* %build_alloca
+     * CHECK: %build_load = load volatile i32* %build_array_alloca, align 4
+     * CHECK: store volatile i32 %P2, i32* %build_alloca, align 4
      * CHECK: %build_gep = getelementptr i32* %build_array_alloca, i32 %P2
      * CHECK: %build_in_bounds_gep = getelementptr inbounds i32* %build_array_alloca, i32 %P2
      * CHECK: %build_struct_gep = getelementptr inbounds{{.*}}%build_alloca2, i32 0, i32 1
+     * CHECK: %build_atomicrmw = atomicrmw xchg i8* %p, i8 42 seq_cst
      *)
     let alloca = build_alloca i32_type "build_alloca" b in
     let array_alloca = build_array_alloca i32_type p2 "build_array_alloca" b in
-    ignore(build_load array_alloca "build_load" b);
-    ignore(build_store p2 alloca b);
+
+    let load = build_load array_alloca "build_load" b in
+    ignore(set_alignment 4 load);
+    ignore(set_volatile true load);
+    insist(true = is_volatile load);
+    insist(4 = alignment load);
+
+    let store = build_store p2 alloca b in
+    ignore(set_volatile true store);
+    ignore(set_alignment 4 store);
+    insist(true = is_volatile store);
+    insist(4 = alignment store);
     ignore(build_gep array_alloca [| p2 |] "build_gep" b);
     ignore(build_in_bounds_gep array_alloca [| p2 |] "build_in_bounds_gep" b);
 
@@ -1251,6 +1303,11 @@ let test_builder () =
     let alloca2 = build_alloca sty "build_alloca2" b in
     ignore(build_struct_gep alloca2 1 "build_struct_gep" b);
 
+    let p = build_alloca i8_type "p" b in
+    ignore(build_atomicrmw AtomicRMWBinOp.Xchg p (const_int i8_type 42)
+              AtomicOrdering.SequentiallyConsistent false "build_atomicrmw"
+              b);
+
     ignore(build_unreachable b)
   end;
 
@@ -1289,8 +1346,10 @@ let test_builder () =
 
 (* End-of-file checks for things like metdata and attributes.
  * CHECK: attributes #0 = {{.*}}uwtable{{.*}}
- * CHECK: !0 = metadata !{i32 1, metadata !"metadata test"}
- * CHECK: !1 = metadata !{i32 2, i32 3, metadata !2, metadata !2}
+ * CHECK: !llvm.module.flags = !{!0}
+ * CHECK: !0 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+ * CHECK: !1 = metadata !{i32 1, metadata !"metadata test"}
+ * CHECK: !2 = metadata !{i32 2, i32 3, metadata !3, metadata !3}
  *)
 
 (*===-- Pass Managers -----------------------------------------------------===*)
@@ -1317,6 +1376,14 @@ let test_pass_manager () =
   end
 
 
+(*===-- Memory Buffer -----------------------------------------------------===*)
+
+let test_memory_buffer () =
+  group "memory buffer";
+  let buf = MemoryBuffer.of_string "foobar" in
+  insist ((MemoryBuffer.as_string buf) = "foobar")
+
+
 (*===-- Writer ------------------------------------------------------------===*)
 
 let test_writer () =
@@ -1334,6 +1401,7 @@ let test_writer () =
 (*===-- Driver ------------------------------------------------------------===*)
 
 let _ =
+  suite "conversion"       test_conversion;
   suite "target"           test_target;
   suite "constants"        test_constants;
   suite "global values"    test_global_values;
@@ -1347,5 +1415,6 @@ let _ =
   suite "instructions"     test_instructions;
   suite "builder"          test_builder;
   suite "pass manager"     test_pass_manager;
+  suite "memory buffer"    test_memory_buffer;
   suite "writer"           test_writer; (* Keep this last; it disposes m. *)
   exit !exit_status
diff --git a/test/Bindings/llvm-c/calc.test b/test/Bindings/llvm-c/calc.test
new file mode 100644
index 0000000..36a76e6
--- /dev/null
+++ b/test/Bindings/llvm-c/calc.test
@@ -0,0 +1,15 @@
+; RUN: llvm-c-test --calc <%s | FileCheck %s
+
+; constant folding
+test 100 200 +
+;CHECK: ModuleID = 'test'
+;CHECK: define i64 @test
+;CHECK: {
+;CHECK: ret i64 300
+;CHECK: }
+
+arg1 0 @ 0 @ * 1 @ 1 @ * +
+;CHECK: ModuleID = 'arg1'
+;CHECK: getelementptr
+;CHECK: load
+;CHECK: ret
diff --git a/test/Bindings/llvm-c/disassemble.test b/test/Bindings/llvm-c/disassemble.test
new file mode 100644
index 0000000..201e914
--- /dev/null
+++ b/test/Bindings/llvm-c/disassemble.test
@@ -0,0 +1,29 @@
+; RUN: llvm-c-test --disassemble < %s | FileCheck %s
+
+
+arm-linux-android    44 26 1f e5 0c 10 4b e2 02 20 81 e0
+;CHECK: triple: arm-linux-android
+;CHECK: ldr	r2, [pc, #-1604]
+;CHECK: sub	r1, r11, #12
+;CHECK: 02 20 81 e0
+;CHECK: add	r2, r1, r2
+
+x86_64-linux-unknown 48 83 c4 38 5b 5d 41 5c 41 5d 41 5e 41 5f c3
+;CHECK: triple: x86_64-linux-unknown
+;CHECK: addq	$56, %rsp
+;CHECK: popq	%rbx
+;CHECK: popq	%rbp
+;CHECK: popq	%r12
+;CHECK: popq	%r13
+;CHECK: popq	%r14
+;CHECK: popq	%r15
+;CHECK: ret
+
+i686-apple-darwin    0f b7 4c 24 0a e8 29 ce ff ff
+;CHECK: movzwl	10(%esp), %ecx
+;CHECK: calll	-12759
+
+i686-linux-unknown   dd 44 24 04 d9 e1 c3
+;CHECK: fldl	4(%esp)
+;CHECK: fabs
+;CHECK: ret
diff --git a/test/Bindings/llvm-c/functions.ll b/test/Bindings/llvm-c/functions.ll
new file mode 100644
index 0000000..4503fb1
--- /dev/null
+++ b/test/Bindings/llvm-c/functions.ll
@@ -0,0 +1,31 @@
+; RUN: llvm-as < %s | llvm-c-test --module-list-functions | FileCheck %s
+
+define i32 @X() {
+entry:
+  br label %l1
+
+l1:
+  br label %l2
+
+l2:
+  br label %l3
+
+l3:
+  ret i32 1234
+}
+;CHECK: FunctionDefinition: X [#bb=4]
+
+
+define i32 @Z(i32 %a) {
+entry:
+  %0 = tail call i32 @Y(i32 %a)
+  ret i32 %0
+}
+
+;CHECK: FunctionDefinition: Z [#bb=1]
+;CHECK:  calls: Y
+;CHECK:  #isn: 2
+
+declare i32 @Y(i32)
+;CHECK: FunctionDeclaration: Y
+
diff --git a/test/Bindings/llvm-c/globals.ll b/test/Bindings/llvm-c/globals.ll
new file mode 100644
index 0000000..a38f08b
--- /dev/null
+++ b/test/Bindings/llvm-c/globals.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as < %s | llvm-c-test --module-list-globals | FileCheck %s
+
+@foo = constant [7 x i8] c"foobar\00", align 1
+;CHECK: GlobalDefinition: foo [7 x i8]*
+
+@bar = common global i32 0, align 4
+;CHECK: GlobalDefinition: bar i32*
diff --git a/test/Bindings/llvm-c/lit.local.cfg b/test/Bindings/llvm-c/lit.local.cfg
new file mode 100644
index 0000000..d83ebee
--- /dev/null
+++ b/test/Bindings/llvm-c/lit.local.cfg
@@ -0,0 +1,5 @@
+targets = set(config.root.targets_to_build.split())
+if not "X86" in targets:
+    config.unsupported = True
+if not "ARM" in targets:
+    config.unsupported = True
diff --git a/test/Bitcode/2012-05-07-SwitchInstRangesSupport.ll b/test/Bitcode/2012-05-07-SwitchInstRangesSupport.ll
deleted file mode 100644
index 583b9a8..0000000
--- a/test/Bitcode/2012-05-07-SwitchInstRangesSupport.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: rm -f %t.bc
-; RUN: rm -f %t.ll
-; RUN: rm -f %t2.bc
-; RUN: rm -f %t2.ll
-; RUN: llvm-as %s -o %t.bc
-; RUN: llvm-dis %t.bc -o - | tail -n +2 > %t.ll
-; RUN: llvm-as %t.ll -o %t2.bc
-; RUN: llvm-dis %t2.bc -o - | tail -n +2 > %t2.ll
-; RUN: llvm-diff %t.ll %t2.ll
-
-define void @test() {
-  %mem = alloca i32
-  store i32 2, i32* %mem
-  %c = load i32* %mem
-  switch i32 %c, label %exit [
-      i32 1, label %exit
-      i32 2, label %exit
-  ]
-exit:
-  ret void
-}
-define void @test_wide() {
-  %mem = alloca i256
-  store i256 2, i256* %mem
-  %c = load i256* %mem
-  switch i256 %c, label %exit [
-      i256 123456789012345678901234567890, label %exit
-      i256 2, label %exit
-  ]
-exit:
-  ret void
-}
-
diff --git a/test/Bitcode/attributes.ll b/test/Bitcode/attributes.ll
index 92f892a..1789878 100644
--- a/test/Bitcode/attributes.ll
+++ b/test/Bitcode/attributes.ll
@@ -203,7 +203,13 @@ define void @f34()
 ; CHECK: define void @f34()
 {
         call void @nobuiltin() nobuiltin
-; CHECK: call void @nobuiltin() #23
+; CHECK: call void @nobuiltin() #24
+        ret void;
+}
+
+define void @f35() optnone noinline
+; CHECK: define void @f35() #23
+{
         ret void;
 }
 
@@ -230,4 +236,6 @@ define void @f34()
 ; CHECK: attributes #20 = { "cpu"="cortex-a8" }
 ; CHECK: attributes #21 = { sspstrong }
 ; CHECK: attributes #22 = { minsize }
-; CHECK: attributes #23 = { nobuiltin }
+; CHECK: attributes #23 = { noinline optnone }
+; CHECK: attributes #24 = { nobuiltin }
+
diff --git a/test/Bitcode/case-ranges-3.3.ll b/test/Bitcode/case-ranges-3.3.ll
new file mode 100644
index 0000000..6e1d0a6
--- /dev/null
+++ b/test/Bitcode/case-ranges-3.3.ll
@@ -0,0 +1,67 @@
+; RUN:  llvm-dis < %s.bc| FileCheck %s
+
+; case-ranges.ll.bc was generated by passing this file to llvm-as from the 3.3
+; release of LLVM. This tests that the bitcode for switches from that release
+; can still be read.
+
+define i32 @foo(i32 %x) nounwind ssp uwtable {
+; CHECK: define i32 @foo
+  %1 = alloca i32, align 4
+  %2 = alloca i32, align 4
+  store i32 %x, i32* %2, align 4
+  %3 = load i32* %2, align 4
+  switch i32 %3, label %9 [
+; CHECK: switch i32 %3, label %9
+    i32 -3, label %4
+; CHECK-NEXT: i32 -3, label %4
+    i32 -2, label %4
+; CHECK-NEXT: i32 -2, label %4
+    i32 -1, label %4
+; CHECK-NEXT: i32 -1, label %4
+    i32 0, label %4
+; CHECK-NEXT: i32 0, label %4
+    i32 1, label %4
+; CHECK-NEXT: i32 1, label %4
+    i32 2, label %4
+; CHECK-NEXT: i32 2, label %4
+    i32 4, label %5
+; CHECK-NEXT: i32 4, label %5
+    i32 5, label %6
+; CHECK-NEXT: i32 5, label %6
+    i32 6, label %7
+; CHECK-NEXT: i32 6, label %7
+    i32 7, label %8
+; CHECK-NEXT: i32 7, label %8
+  ]
+
+; <label>:4
+  store i32 -1, i32* %1
+  br label %11
+
+; <label>:5
+  store i32 2, i32* %1
+  br label %11
+
+; <label>:6
+  store i32 1, i32* %1
+  br label %11
+
+; <label>:7
+  store i32 4, i32* %1
+  br label %11
+
+; <label>:8
+  store i32 3, i32* %1
+  br label %11
+
+; <label>:9
+  br label %10
+
+; <label>:10
+  store i32 0, i32* %1
+  br label %11
+
+; <label>:11
+  %12 = load i32* %1
+  ret i32 %12
+}
diff --git a/test/Bitcode/case-ranges-3.3.ll.bc b/test/Bitcode/case-ranges-3.3.ll.bc
new file mode 100644
index 0000000..998f747
--- /dev/null
+++ b/test/Bitcode/case-ranges-3.3.ll.bc
diff --git a/test/Bitcode/drop-debug-info.ll b/test/Bitcode/drop-debug-info.ll
new file mode 100644
index 0000000..da4ae0c
--- /dev/null
+++ b/test/Bitcode/drop-debug-info.ll
@@ -0,0 +1,26 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+define i32 @main() {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  ret i32 0, !dbg !12
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 (trunk 195495) (llvm/trunk 195495:195504M)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/manmanren/llvm_gmail/release/../llvm/tools/clang/test/CodeGen/debug-info-version.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"../llvm/tools/clang/test/CodeGen/debug-info-version.c", metadata !"/Users/manmanren/llvm_gmail/release"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [main]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/Users/manmanren/llvm_gmail/release/../llvm/tools/clang/test/CodeGen/debug-info-version.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!12 = metadata !{i32 4, i32 0, metadata !4, null}
+
+; CHECK-NOT: !dbg
+; CHECK-NOT: !llvm.dbg.cu
diff --git a/test/Bitcode/extractelement.ll b/test/Bitcode/extractelement.ll
index d88f811..8999c65 100644
--- a/test/Bitcode/extractelement.ll
+++ b/test/Bitcode/extractelement.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -constprop | llvm-dis
+; RUN: opt < %s -constprop | llvm-dis -disable-output
 ; PR3465
 
 define double @test() {
diff --git a/test/Bitcode/invalid.ll b/test/Bitcode/invalid.ll
new file mode 100644
index 0000000..1d4a82b
--- /dev/null
+++ b/test/Bitcode/invalid.ll
@@ -0,0 +1,7 @@
+; RUN:  not llvm-dis < %s.bc 2>&1 | FileCheck %s
+
+; CHECK: llvm-dis{{(\.EXE|\.exe)?}}: Invalid value
+
+; invalid.ll.bc has an invalid attribute number.
+; The test checks that LLVM reports the error and doesn't access freed memory
+; in doing so.
diff --git a/test/Bitcode/invalid.ll.bc b/test/Bitcode/invalid.ll.bc
new file mode 100644
index 0000000..a85c364
--- /dev/null
+++ b/test/Bitcode/invalid.ll.bc
diff --git a/test/Bitcode/lit.local.cfg b/test/Bitcode/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Bitcode/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Bitcode/metadata-2.ll b/test/Bitcode/metadata-2.ll
index dbf46b0..4055f92 100644
--- a/test/Bitcode/metadata-2.ll
+++ b/test/Bitcode/metadata-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llvm-dis -o /dev/null
+; RUN: llvm-as < %s | llvm-dis -disable-output
 	%0 = type { %object.ModuleInfo.__vtbl*, i8*, %"byte[]", %1, %"ClassInfo[]", i32, void ()*, void ()*, void ()*, i8*, void ()* }		; type %0
 	%1 = type { i64, %object.ModuleInfo* }		; type %1
 	%2 = type { i32, void ()* }		; type %2
diff --git a/test/Bitcode/metadata.ll b/test/Bitcode/metadata.ll
index 19db3ea..fc8a622 100644
--- a/test/Bitcode/metadata.ll
+++ b/test/Bitcode/metadata.ll
@@ -1,6 +1,5 @@
-; RUN: llvm-as < %s | llvm-dis -o /dev/null
+; RUN: llvm-as < %s | llvm-dis -disable-output
 
 !llvm.foo = !{!0}
 !0 = metadata !{i32 42}
 @my.str = internal constant [4 x i8] c"foo\00"
-
diff --git a/test/Bitcode/null-type.ll b/test/Bitcode/null-type.ll
index 8502b0d..a620fab 100644
--- a/test/Bitcode/null-type.ll
+++ b/test/Bitcode/null-type.ll
@@ -1,5 +1,4 @@
-; RUN: not llvm-dis < %s.bc > /dev/null 2> %t
-; RUN: FileCheck %s < %t
+; RUN: not llvm-dis < %s.bc 2>&1 | FileCheck %s
 ; PR8494
 
-; CHECK: Invalid MODULE_CODE_FUNCTION record
+; CHECK: Invalid record
diff --git a/test/Bitcode/select.ll b/test/Bitcode/select.ll
new file mode 100644
index 0000000..71e669a
--- /dev/null
+++ b/test/Bitcode/select.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+define <2 x i32> @main() {
+  ret <2 x i32> select (<2 x i1> <i1 false, i1 undef>, <2 x i32> zeroinitializer, <2 x i32> <i32 0, i32 undef>)
+}
+
+; CHECK: define <2 x i32> @main() {
+; CHECK:   ret <2 x i32> select (<2 x i1> <i1 false, i1 undef>, <2 x i32> zeroinitializer, <2 x i32> <i32 0, i32 undef>)
+; CHECK: }
diff --git a/test/Bitcode/shuffle.ll b/test/Bitcode/shuffle.ll
index c3c01c6..1495d8e 100644
--- a/test/Bitcode/shuffle.ll
+++ b/test/Bitcode/shuffle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llvm-dis
+; RUN: llvm-as < %s | llvm-dis -disable-output
 
 ; <rdar://problem/8622574>
 ; tests the bitcodereader can handle the case where the reader will initially
diff --git a/test/Bitcode/upgrade-tbaa.ll b/test/Bitcode/upgrade-tbaa.ll
new file mode 100644
index 0000000..e738909
--- /dev/null
+++ b/test/Bitcode/upgrade-tbaa.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+; Function Attrs: nounwind
+define void @_Z4testPiPf(i32* nocapture %pI, float* nocapture %pF) #0 {
+entry:
+  store i32 0, i32* %pI, align 4, !tbaa !{metadata !"int", metadata !0}
+  ; CHECK: store i32 0, i32* %pI, align 4, !tbaa [[TAG_INT:!.*]]
+  store float 1.000000e+00, float* %pF, align 4, !tbaa !2
+  ; CHECK: store float 1.000000e+00, float* %pF, align 4, !tbaa [[TAG_FLOAT:!.*]]
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
+!2 = metadata !{metadata !"float", metadata !0}
+
+; CHECK: [[TAG_INT]] = metadata !{metadata [[TYPE_INT:!.*]], metadata [[TYPE_INT]], i64 0}
+; CHECK: [[TYPE_INT]] = metadata !{metadata !"int", metadata [[TYPE_CHAR:!.*]]}
+; CHECK: [[TYPE_CHAR]] = metadata !{metadata !"omnipotent char", metadata !{{.*}}
+; CHECK: [[TAG_FLOAT]] = metadata !{metadata [[TYPE_FLOAT:!.*]], metadata [[TYPE_FLOAT]], i64 0}
+; CHECK: [[TYPE_FLOAT]] = metadata !{metadata !"float", metadata [[TYPE_CHAR]]}
diff --git a/test/BugPoint/lit.local.cfg b/test/BugPoint/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/BugPoint/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/BugPoint/metadata.ll b/test/BugPoint/metadata.ll
index 2ba1a9f..cc043f0 100644
--- a/test/BugPoint/metadata.ll
+++ b/test/BugPoint/metadata.ll
@@ -4,12 +4,12 @@
 
 ; Bugpoint should keep the call's metadata attached to the call.
 
-; CHECK: call void @foo(), !dbg !0, !attach !4
-; CHECK: !0 = metadata !{i32 104, i32 105, metadata !1, metadata !1}
-; CHECK: !1 = metadata !{i32 458769, metadata !2, i32 0, metadata !"me", i1 true, metadata !"", i32 0, metadata !3, metadata !3, null, null, null, metadata !""}
-; CHECK: !2 = metadata !{metadata !"source.c", metadata !"/dir"}
-; CHECK: !3 = metadata !{i32 0}
-; CHECK: !4 = metadata !{metadata !"the call to foo"}
+; CHECK: call void @foo(), !dbg ![[LOC:[0-9]+]], !attach ![[CALL:[0-9]+]]
+; CHECK: ![[LOC]] = metadata !{i32 104, i32 105, metadata ![[SCOPE:[0-9]+]], metadata ![[SCOPE]]}
+; CHECK: ![[SCOPE]] = metadata !{i32 458769, metadata ![[FILE:[0-9]+]], i32 0, metadata !"me", i1 true, metadata !"", i32 0, metadata ![[LIST:[0-9]+]], metadata ![[LIST]], null, null, null, metadata !""}
+; CHECK: ![[FILE]] = metadata !{metadata !"source.c", metadata !"/dir"}
+; CHECK: ![[LIST]] = metadata !{i32 0}
+; CHECK: ![[CALL]] = metadata !{metadata !"the call to foo"}
 
 %rust_task = type {}
 define void @test(i32* %a, i8* %b) {
@@ -23,6 +23,8 @@ define void @test(i32* %a, i8* %b) {
 
 declare void @foo()
 
+!llvm.module.flags = !{!17}
+
 !0 = metadata !{metadata !"boring"}
 !1 = metadata !{metadata !"uninteresting"}
 !2 = metadata !{metadata !"the call to foo"}
@@ -37,3 +39,4 @@ declare void @foo()
 !14 = metadata !{i32 108, i32 109, metadata !9, metadata !9}
 !15 = metadata !{metadata !"source.c", metadata !"/dir"}
 !16 = metadata !{i32 0}
+!17 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 392f5f2..d6f7dab 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -13,21 +13,25 @@ if(NOT LLVM_BUILD_TOOLS)
 endif()
 
 # Set the depends list as a variable so that it can grow conditionally.
+# NOTE: Sync the substitutions in test/lit.cfg when adding to this list.
 set(LLVM_TEST_DEPENDS
           UnitTests
           BugpointPasses
           LLVMHello
           llc
           lli
+          lli-child-target
           llvm-ar
           llvm-as
           llvm-bcanalyzer
+          llvm-c-test
           llvm-cov
           llvm-diff
           llvm-dis
           llvm-extract
           llvm-dwarfdump
           llvm-link
+          llvm-lto
           llvm-mc
           llvm-mcmarkup
           llvm-nm
diff --git a/test/CodeGen/AArch64/adrp-relocation.ll b/test/CodeGen/AArch64/adrp-relocation.ll
deleted file mode 100644
index 1e12d69..0000000
--- a/test/CodeGen/AArch64/adrp-relocation.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -filetype=obj < %s | llvm-readobj -s -r | FileCheck %s
-
-define i64 @testfn() nounwind {
-entry:
-  ret i64 0
-}
-
-define i64 @foo() nounwind {
-entry:
-  %bar = alloca i64 ()*, align 8
-  store i64 ()* @testfn, i64 ()** %bar, align 8
-  %call = call i64 @testfn()
-  ret i64 %call
-}
-
-; The above should produce an ADRP/ADD pair to calculate the address of
-; testfn. The important point is that LLVM shouldn't think it can deal with the
-; relocation on the ADRP itself (even though it knows everything about the
-; relative offsets of testfn and foo) because its value depends on where this
-; object file's .text section gets relocated in memory.
-
-; CHECK:      Relocations [
-; CHECK-NEXT:   Section (2) .rela.text {
-; CHECK-NEXT:     0x10 R_AARCH64_ADR_PREL_PG_HI21 testfn 0x0
-; CHECK-NEXT:     0x14 R_AARCH64_ADD_ABS_LO12_NC testfn 0x0
-; CHECK-NEXT:   }
-; CHECK-NEXT: ]
diff --git a/test/CodeGen/AArch64/alloca.ll b/test/CodeGen/AArch64/alloca.ll
index a84217f..1d3c0a0 100644
--- a/test/CodeGen/AArch64/alloca.ll
+++ b/test/CodeGen/AArch64/alloca.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP %s
 
 declare void @use_addr(i8*)
 
@@ -7,13 +8,13 @@ define void @test_simple_alloca(i64 %n) {
 
   %buf = alloca i8, i64 %n
   ; Make sure we align the stack change to 16 bytes:
-; CHECK: add [[SPDELTA:x[0-9]+]], x0, #15
-; CHECK: and x0, [[SPDELTA]], #0xfffffffffffffff0
+; CHECK-DAG: add [[SPDELTA:x[0-9]+]], x0, #15
+; CHECK-DAG: and x0, [[SPDELTA]], #0xfffffffffffffff0
 
   ; Make sure we change SP. It would be surprising if anything but x0 were used
   ; for the final sp, but it could be if it was then moved into x0.
-; CHECK: mov [[TMP:x[0-9]+]], sp
-; CHECK: sub x0, [[TMP]], [[SPDELTA]]
+; CHECK-DAG: mov [[TMP:x[0-9]+]], sp
+; CHECK-DAG: sub x0, [[TMP]], [[SPDELTA]]
 ; CHECK: mov sp, x0
 
   call void @use_addr(i8* %buf)
@@ -37,13 +38,13 @@ define i64 @test_alloca_with_local(i64 %n) {
   %loc = alloca i64
   %buf = alloca i8, i64 %n
   ; Make sure we align the stack change to 16 bytes:
-; CHECK: add [[SPDELTA:x[0-9]+]], x0, #15
-; CHECK: and x0, [[SPDELTA]], #0xfffffffffffffff0
+; CHECK-DAG: add [[SPDELTA:x[0-9]+]], x0, #15
+; CHECK-DAG: and x0, [[SPDELTA]], #0xfffffffffffffff0
 
   ; Make sure we change SP. It would be surprising if anything but x0 were used
   ; for the final sp, but it could be if it was then moved into x0.
-; CHECK: mov [[TMP:x[0-9]+]], sp
-; CHECK: sub x0, [[TMP]], [[SPDELTA]]
+; CHECK-DAG: mov [[TMP:x[0-9]+]], sp
+; CHECK-DAG: sub x0, [[TMP]], [[SPDELTA]]
 ; CHECK: mov sp, x0
 
   ; Obviously suboptimal code here, but it to get &local in x1
@@ -66,16 +67,22 @@ define i64 @test_alloca_with_local(i64 %n) {
 }
 
 define void @test_variadic_alloca(i64 %n, ...) {
-; CHECK-LABEL: test_variadic_alloca:
+; CHECK: test_variadic_alloca:
 
 ; CHECK: sub     sp, sp, #208
 ; CHECK: stp     x29, x30, [sp, #192]
 ; CHECK: add     x29, sp, #192
 ; CHECK: sub     [[TMP:x[0-9]+]], x29, #192
 ; CHECK: add     x8, [[TMP]], #0
-; CHECK: str     q7, [x8, #112]
+; CHECK-FP: str     q7, [x8, #112]
 ; [...]
-; CHECK: str     q1, [x8, #16]
+; CHECK-FP: str     q1, [x8, #16]
+
+; CHECK-NOFP: sub     sp, sp, #80
+; CHECK-NOFP: stp     x29, x30, [sp, #64]
+; CHECK-NOFP: add     x29, sp, #64
+; CHECK-NOFP: sub     [[TMP:x[0-9]+]], x29, #64
+; CHECK-NOFP: add     x8, [[TMP]], #0
 
   %addr = alloca i8, i64 %n
 
@@ -86,6 +93,10 @@ define void @test_variadic_alloca(i64 %n, ...) {
 ; CHECK: sub sp, x29, #192
 ; CHECK: ldp x29, x30, [sp, #192]
 ; CHECK: add sp, sp, #208
+
+; CHECK-NOFP: sub sp, x29, #64
+; CHECK-NOFP: ldp x29, x30, [sp, #64]
+; CHECK-NOFP: add sp, sp, #80
 }
 
 define void @test_alloca_large_frame(i64 %n) {
@@ -112,16 +123,16 @@ declare i8* @llvm.stacksave()
 declare void @llvm.stackrestore(i8*)
 
 define void @test_scoped_alloca(i64 %n) {
-; CHECK: test_scoped_alloca
+; CHECK-LABEL: test_scoped_alloca:
 ; CHECK: sub sp, sp, #32
 
   %sp = call i8* @llvm.stacksave()
 ; CHECK: mov [[SAVED_SP:x[0-9]+]], sp
+; CHECK: mov [[OLDSP:x[0-9]+]], sp
 
   %addr = alloca i8, i64 %n
 ; CHECK: and [[SPDELTA:x[0-9]+]], {{x[0-9]+}}, #0xfffffffffffffff0
-; CHECK: mov [[OLDSP:x[0-9]+]], sp
-; CHECK: sub [[NEWSP:x[0-9]+]], [[OLDSP]], [[SPDELTA]]
+; CHECK-DAG: sub [[NEWSP:x[0-9]+]], [[OLDSP]], [[SPDELTA]]
 ; CHECK: mov sp, [[NEWSP]]
 
   call void @use_addr(i8* %addr)
diff --git a/test/CodeGen/AArch64/basic-pic.ll b/test/CodeGen/AArch64/basic-pic.ll
index 1b14be2..682b7ba 100644
--- a/test/CodeGen/AArch64/basic-pic.ll
+++ b/test/CodeGen/AArch64/basic-pic.ll
@@ -1,10 +1,7 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -filetype=obj %s -o -| llvm-objdump -r - | FileCheck --check-prefix=CHECK-ELF %s
 
 @var = global i32 0
 
-; CHECK-ELF: RELOCATION RECORDS FOR [.rela.text]
-
 define i32 @get_globalvar() {
 ; CHECK-LABEL: get_globalvar:
 
@@ -13,8 +10,6 @@ define i32 @get_globalvar() {
 ; CHECK: ldr x[[GOTLOC:[0-9]+]], [x[[GOTHI]], #:got_lo12:var]
 ; CHECK: ldr w0, [x[[GOTLOC]]]
 
-; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE var
-; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC var
   ret i32 %val
 }
 
@@ -25,8 +20,6 @@ define i32* @get_globalvaraddr() {
 ; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
 ; CHECK: ldr x0, [x[[GOTHI]], #:got_lo12:var]
 
-; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE var
-; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC var
   ret i32* @var
 }
 
@@ -39,8 +32,6 @@ define i32 @get_hiddenvar() {
 ; CHECK: adrp x[[HI:[0-9]+]], hiddenvar
 ; CHECK: ldr w0, [x[[HI]], #:lo12:hiddenvar]
 
-; CHECK-ELF: R_AARCH64_ADR_PREL_PG_HI21 hiddenvar
-; CHECK-ELF: R_AARCH64_LDST32_ABS_LO12_NC hiddenvar
   ret i32 %val
 }
 
@@ -51,8 +42,6 @@ define i32* @get_hiddenvaraddr() {
 ; CHECK: adrp [[HI:x[0-9]+]], hiddenvar
 ; CHECK: add x0, [[HI]], #:lo12:hiddenvar
 
-; CHECK-ELF: R_AARCH64_ADR_PREL_PG_HI21 hiddenvar
-; CHECK-ELF: R_AARCH64_ADD_ABS_LO12_NC hiddenvar
   ret i32* @hiddenvar
 }
 
@@ -62,9 +51,4 @@ define void()* @get_func() {
   ret void()* bitcast(void()*()* @get_func to void()*)
 ; CHECK: adrp x[[GOTHI:[0-9]+]], :got:get_func
 ; CHECK: ldr x0, [x[[GOTHI]], #:got_lo12:get_func]
-
-  ; Particularly important that the ADRP gets a relocation, LLVM tends to think
-  ; it can relax it because it knows where get_func is. It can't!
-; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE get_func
-; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC get_func
 }
diff --git a/test/CodeGen/AArch64/cond-sel.ll b/test/CodeGen/AArch64/cond-sel.ll
index 48c50a1..9c1dfeb 100644
--- a/test/CodeGen/AArch64/cond-sel.ll
+++ b/test/CodeGen/AArch64/cond-sel.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 @var32 = global i32 0
 @var64 = global i64 0
@@ -9,16 +10,16 @@ define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
   %tst1 = icmp ugt i32 %lhs32, %rhs32
   %val1 = select i1 %tst1, i32 42, i32 52
   store i32 %val1, i32* @var32
-; CHECK: movz [[W52:w[0-9]+]], #52
-; CHECK: movz [[W42:w[0-9]+]], #42
+; CHECK-DAG: movz [[W52:w[0-9]+]], #52
+; CHECK-DAG: movz [[W42:w[0-9]+]], #42
 ; CHECK: csel {{w[0-9]+}}, [[W42]], [[W52]], hi
 
   %rhs64 = sext i32 %rhs32 to i64
   %tst2 = icmp sle i64 %lhs64, %rhs64
   %val2 = select i1 %tst2, i64 %lhs64, i64 %rhs64
   store i64 %val2, i64* @var64
-; CHECK: cmp [[LHS:x[0-9]+]], [[RHS:w[0-9]+]], sxtw
-; CHECK: sxtw [[EXT_RHS:x[0-9]+]], [[RHS]]
+; CHECK-DAG: cmp [[LHS:x[0-9]+]], [[RHS:w[0-9]+]], sxtw
+; CHECK-DAG: sxtw [[EXT_RHS:x[0-9]+]], [[RHS]]
 ; CHECK: csel {{x[0-9]+}}, [[LHS]], [[EXT_RHS]], le
 
   ret void
@@ -30,6 +31,7 @@ define void @test_floatcsel(float %lhs32, float %rhs32, double %lhs64, double %r
 
   %tst1 = fcmp one float %lhs32, %rhs32
 ; CHECK: fcmp {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFP-NOT: fcmp
   %val1 = select i1 %tst1, i32 42, i32 52
   store i32 %val1, i32* @var32
 ; CHECK: movz [[W52:w[0-9]+]], #52
@@ -40,6 +42,7 @@ define void @test_floatcsel(float %lhs32, float %rhs32, double %lhs64, double %r
 
   %tst2 = fcmp ueq double %lhs64, %rhs64
 ; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NOFP-NOT: fcmp
   %val2 = select i1 %tst2, i64 9, i64 15
   store i64 %val2, i64* @var64
 ; CHECK: movz [[CONST15:x[0-9]+]], #15
diff --git a/test/CodeGen/AArch64/directcond.ll b/test/CodeGen/AArch64/directcond.ll
index 13f032d..12c7b6a 100644
--- a/test/CodeGen/AArch64/directcond.ll
+++ b/test/CodeGen/AArch64/directcond.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) {
 ; CHECK-LABEL: test_select_i32:
@@ -26,7 +27,7 @@ define float @test_select_float(i1 %bit, float %a, float %b) {
 ; CHECK: movz [[ONE:w[0-9]+]], #1
 ; CHECK: tst w0, [[ONE]]
 ; CHECK-NEXT: fcsel s0, s0, s1, ne
-
+; CHECK-NOFP-NOT: fcsel
   ret float %val
 }
 
@@ -36,6 +37,7 @@ define double @test_select_double(i1 %bit, double %a, double %b) {
 ; CHECK: movz [[ONE:w[0-9]+]], #1
 ; CHECK: tst w0, [[ONE]]
 ; CHECK-NEXT: fcsel d0, d0, d1, ne
+; CHECK-NOFP-NOT: fcsel
 
   ret double %val
 }
@@ -56,6 +58,7 @@ define i1 @test_setcc_float(float %lhs, float %rhs) {
   %val = fcmp oeq float %lhs, %rhs
 ; CHECK: fcmp s0, s1
 ; CHECK: csinc w0, wzr, wzr, ne
+; CHECK-NOFP-NOT: fcmp
   ret i1 %val
 }
 
@@ -64,6 +67,7 @@ define i1 @test_setcc_double(double %lhs, double %rhs) {
   %val = fcmp oeq double %lhs, %rhs
 ; CHECK: fcmp d0, d1
 ; CHECK: csinc w0, wzr, wzr, ne
+; CHECK-NOFP-NOT: fcmp
   ret i1 %val
 }
 
diff --git a/test/CodeGen/AArch64/elf-extern.ll b/test/CodeGen/AArch64/elf-extern.ll
deleted file mode 100644
index e09aa12..0000000
--- a/test/CodeGen/AArch64/elf-extern.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s
-
-; External symbols are a different concept to global variables but should still
-; get relocations and so on when used.
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
-
-define i32 @check_extern() {
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 undef, i32 4, i1 0)
-  ret i32 0
-}
-
-; CHECK: Relocations [
-; CHECK:   Section (2) .rela.text {
-; CHECK:     0x{{[0-9,A-F]+}} R_AARCH64_CALL26 memcpy
-; CHECK:   }
-; CHECK: ]
diff --git a/test/CodeGen/AArch64/fcvt-int.ll b/test/CodeGen/AArch64/fcvt-int.ll
index 9afcfc4..b28eb3e 100644
--- a/test/CodeGen/AArch64/fcvt-int.ll
+++ b/test/CodeGen/AArch64/fcvt-int.ll
@@ -5,8 +5,8 @@ define i32 @test_floattoi32(float %in) {
 
   %signed = fptosi float %in to i32
   %unsigned = fptoui float %in to i32
-; CHECK: fcvtzu [[UNSIG:w[0-9]+]], {{s[0-9]+}}
-; CHECK: fcvtzs [[SIG:w[0-9]+]], {{s[0-9]+}}
+; CHECK-DAG: fcvtzu [[UNSIG:w[0-9]+]], {{s[0-9]+}}
+; CHECK-DAG: fcvtzs [[SIG:w[0-9]+]], {{s[0-9]+}}
 
   %res = sub i32 %signed, %unsigned
 ; CHECK: sub {{w[0-9]+}}, [[SIG]], [[UNSIG]]
@@ -20,8 +20,8 @@ define i32 @test_doubletoi32(double %in) {
 
   %signed = fptosi double %in to i32
   %unsigned = fptoui double %in to i32
-; CHECK: fcvtzu [[UNSIG:w[0-9]+]], {{d[0-9]+}}
-; CHECK: fcvtzs [[SIG:w[0-9]+]], {{d[0-9]+}}
+; CHECK-DAG: fcvtzu [[UNSIG:w[0-9]+]], {{d[0-9]+}}
+; CHECK-DAG: fcvtzs [[SIG:w[0-9]+]], {{d[0-9]+}}
 
   %res = sub i32 %signed, %unsigned
 ; CHECK: sub {{w[0-9]+}}, [[SIG]], [[UNSIG]]
@@ -35,8 +35,8 @@ define i64 @test_floattoi64(float %in) {
 
   %signed = fptosi float %in to i64
   %unsigned = fptoui float %in to i64
-; CHECK: fcvtzu [[UNSIG:x[0-9]+]], {{s[0-9]+}}
-; CHECK: fcvtzs [[SIG:x[0-9]+]], {{s[0-9]+}}
+; CHECK-DAG: fcvtzu [[UNSIG:x[0-9]+]], {{s[0-9]+}}
+; CHECK-DAG: fcvtzs [[SIG:x[0-9]+]], {{s[0-9]+}}
 
   %res = sub i64 %signed, %unsigned
 ; CHECK: sub {{x[0-9]+}}, [[SIG]], [[UNSIG]]
@@ -50,8 +50,8 @@ define i64 @test_doubletoi64(double %in) {
 
   %signed = fptosi double %in to i64
   %unsigned = fptoui double %in to i64
-; CHECK: fcvtzu [[UNSIG:x[0-9]+]], {{d[0-9]+}}
-; CHECK: fcvtzs [[SIG:x[0-9]+]], {{d[0-9]+}}
+; CHECK-DAG: fcvtzu [[UNSIG:x[0-9]+]], {{d[0-9]+}}
+; CHECK-DAG: fcvtzs [[SIG:x[0-9]+]], {{d[0-9]+}}
 
   %res = sub i64 %signed, %unsigned
 ; CHECK: sub {{x[0-9]+}}, [[SIG]], [[UNSIG]]
@@ -65,8 +65,8 @@ define float @test_i32tofloat(i32 %in) {
 
   %signed = sitofp i32 %in to float
   %unsigned = uitofp i32 %in to float
-; CHECK: ucvtf [[UNSIG:s[0-9]+]], {{w[0-9]+}}
-; CHECK: scvtf [[SIG:s[0-9]+]], {{w[0-9]+}}
+; CHECK-DAG: ucvtf [[UNSIG:s[0-9]+]], {{w[0-9]+}}
+; CHECK-DAG: scvtf [[SIG:s[0-9]+]], {{w[0-9]+}}
 
   %res = fsub float %signed, %unsigned
 ; CHECL: fsub {{s[0-9]+}}, [[SIG]], [[UNSIG]]
@@ -79,8 +79,8 @@ define double @test_i32todouble(i32 %in) {
 
   %signed = sitofp i32 %in to double
   %unsigned = uitofp i32 %in to double
-; CHECK: ucvtf [[UNSIG:d[0-9]+]], {{w[0-9]+}}
-; CHECK: scvtf [[SIG:d[0-9]+]], {{w[0-9]+}}
+; CHECK-DAG: ucvtf [[UNSIG:d[0-9]+]], {{w[0-9]+}}
+; CHECK-DAG: scvtf [[SIG:d[0-9]+]], {{w[0-9]+}}
 
   %res = fsub double %signed, %unsigned
 ; CHECK: fsub {{d[0-9]+}}, [[SIG]], [[UNSIG]]
@@ -93,8 +93,8 @@ define float @test_i64tofloat(i64 %in) {
 
   %signed = sitofp i64 %in to float
   %unsigned = uitofp i64 %in to float
-; CHECK: ucvtf [[UNSIG:s[0-9]+]], {{x[0-9]+}}
-; CHECK: scvtf [[SIG:s[0-9]+]], {{x[0-9]+}}
+; CHECK-DAG: ucvtf [[UNSIG:s[0-9]+]], {{x[0-9]+}}
+; CHECK-DAG: scvtf [[SIG:s[0-9]+]], {{x[0-9]+}}
 
   %res = fsub float %signed, %unsigned
 ; CHECK: fsub {{s[0-9]+}}, [[SIG]], [[UNSIG]]
@@ -107,8 +107,8 @@ define double @test_i64todouble(i64 %in) {
 
   %signed = sitofp i64 %in to double
   %unsigned = uitofp i64 %in to double
-; CHECK: ucvtf [[UNSIG:d[0-9]+]], {{x[0-9]+}}
-; CHECK: scvtf [[SIG:d[0-9]+]], {{x[0-9]+}}
+; CHECK-DAG: ucvtf [[UNSIG:d[0-9]+]], {{x[0-9]+}}
+; CHECK-DAG: scvtf [[SIG:d[0-9]+]], {{x[0-9]+}}
 
   %res = fsub double %signed, %unsigned
 ; CHECK: sub {{d[0-9]+}}, [[SIG]], [[UNSIG]]
diff --git a/test/CodeGen/AArch64/fp-dp3.ll b/test/CodeGen/AArch64/fp-dp3.ll
index 3a9a6fc..590557f 100644
--- a/test/CodeGen/AArch64/fp-dp3.ll
+++ b/test/CodeGen/AArch64/fp-dp3.ll
@@ -129,8 +129,9 @@ define float @test_fnmsub_unfused(float %a, float %b, float %c) {
   %diff = fsub float %nega, %prod
 ; CHECK: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
 ; CHECK-NOFAST-NOT: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-; CHECK-NOFAST: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-; CHECK-NOFAST: fneg {{s[0-9]+}}, {{s[0-9]+}}
-; CHECK-NOFAST: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST-DAG: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST-DAG: fneg {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST-DAG: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: ret
   ret float %diff
 }
diff --git a/test/CodeGen/AArch64/fp128.ll b/test/CodeGen/AArch64/fp128.ll
index 853c03d..c312bb1 100644
--- a/test/CodeGen/AArch64/fp128.ll
+++ b/test/CodeGen/AArch64/fp128.ll
@@ -150,14 +150,14 @@ define i1 @test_setcc2() {
 ; Technically, everything after the call to __letf2 is redundant, but we'll let
 ; LLVM have its fun for now.
   %val = fcmp ugt fp128 %lhs, %rhs
-; CHECK: bl      __unordtf2
-; CHECK: mov     x[[UNORDERED:[0-9]+]], x0
-
 ; CHECK: bl      __gttf2
 ; CHECK: cmp w0, #0
 ; CHECK: csinc   [[GT:w[0-9]+]], wzr, wzr, le
-; CHECK: cmp w[[UNORDERED]], #0
+
+; CHECK: bl      __unordtf2
+; CHECK: cmp w0, #0
 ; CHECK: csinc   [[UNORDERED:w[0-9]+]], wzr, wzr, eq
+
 ; CHECK: orr     w0, [[UNORDERED]], [[GT]]
 
   ret i1 %val
@@ -174,15 +174,14 @@ define i32 @test_br_cc() {
 
   ; olt == !uge, which LLVM unfortunately "optimizes" this to.
   %cond = fcmp olt fp128 %lhs, %rhs
-; CHECK: bl      __unordtf2
-; CHECK: mov     x[[UNORDERED:[0-9]+]], x0
-
 ; CHECK: bl      __getf2
 ; CHECK: cmp w0, #0
-
 ; CHECK: csinc   [[OGE:w[0-9]+]], wzr, wzr, lt
-; CHECK: cmp w[[UNORDERED]], #0
+
+; CHECK: bl      __unordtf2
+; CHECK: cmp w0, #0
 ; CHECK: csinc   [[UNORDERED:w[0-9]+]], wzr, wzr, eq
+
 ; CHECK: orr     [[UGE:w[0-9]+]], [[UNORDERED]], [[OGE]]
 ; CHECK: cbnz [[UGE]], [[RET29:.LBB[0-9]+_[0-9]+]]
   br i1 %cond, label %iftrue, label %iffalse
diff --git a/test/CodeGen/AArch64/fpimm.ll b/test/CodeGen/AArch64/fpimm.ll
index ccf7c8a..b8f7169 100644
--- a/test/CodeGen/AArch64/fpimm.ll
+++ b/test/CodeGen/AArch64/fpimm.ll
@@ -9,12 +9,13 @@ define void @check_float() {
   %val = load float* @varf32
   %newval1 = fadd float %val, 8.5
   store volatile float %newval1, float* @varf32
-; CHECK: fmov {{s[0-9]+}}, #8.5
+; CHECK-DAG: fmov [[EIGHT5:s[0-9]+]], #8.5
 
   %newval2 = fadd float %val, 128.0
   store volatile float %newval2, float* @varf32
-; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, #:lo12:.LCPI0_0
+; CHECK-DAG: ldr [[HARD:s[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI0_0
 
+; CHECK: ret
   ret void
 }
 
@@ -24,11 +25,12 @@ define void @check_double() {
   %val = load double* @varf64
   %newval1 = fadd double %val, 8.5
   store volatile double %newval1, double* @varf64
-; CHECK: fmov {{d[0-9]+}}, #8.5
+; CHECK-DAG: fmov {{d[0-9]+}}, #8.5
 
   %newval2 = fadd double %val, 128.0
   store volatile double %newval2, double* @varf64
-; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.LCPI1_0
+; CHECK-DAG: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.LCPI1_0
 
+; CHECK: ret
   ret void
 }
diff --git a/test/CodeGen/AArch64/frameaddr.ll b/test/CodeGen/AArch64/frameaddr.ll
new file mode 100644
index 0000000..182704b
--- /dev/null
+++ b/test/CodeGen/AArch64/frameaddr.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu  | FileCheck %s
+
+define i8* @t() nounwind {
+entry:
+; CHECK-LABEL: t:
+; CHECK: mov x0, x29
+	%0 = call i8* @llvm.frameaddress(i32 0)
+        ret i8* %0
+}
+
+define i8* @t2() nounwind {
+entry:
+; CHECK-LABEL: t2:
+; CHECK: ldr x[[reg:[0-9]+]], [x29]
+; CHECK: ldr x[[reg]], [x[[reg]]]
+	%0 = call i8* @llvm.frameaddress(i32 2)
+        ret i8* %0
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll
index 15f8e76..430d77f 100644
--- a/test/CodeGen/AArch64/func-argpassing.ll
+++ b/test/CodeGen/AArch64/func-argpassing.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 %myStruct = type { i64 , i8, i32 }
 
@@ -23,6 +24,7 @@ define void @add_floats(float %val1, float %val2) {
 ; CHECK-LABEL: add_floats:
     %newval = fadd float %val1, %val2
 ; CHECK: fadd [[ADDRES:s[0-9]+]], s0, s1
+; CHECK-NOFP-NOT: fadd
     store float %newval, float* @varfloat
 ; CHECK: str [[ADDRES]], [{{x[0-9]+}}, #:lo12:varfloat]
     ret void
@@ -35,15 +37,15 @@ define void @take_struct(%myStruct* byval %structval) {
     %addr0 = getelementptr %myStruct* %structval, i64 0, i32 2
     %addr1 = getelementptr %myStruct* %structval, i64 0, i32 0
 
-    %val0 = load i32* %addr0
+    %val0 = load volatile i32* %addr0
     ; Some weird move means x0 is used for one access
 ; CHECK: ldr [[REG32:w[0-9]+]], [{{x[0-9]+|sp}}, #12]
-    store i32 %val0, i32* @var32
+    store volatile i32 %val0, i32* @var32
 ; CHECK: str [[REG32]], [{{x[0-9]+}}, #:lo12:var32]
 
-    %val1 = load i64* %addr1
+    %val1 = load volatile i64* %addr1
 ; CHECK: ldr [[REG64:x[0-9]+]], [{{x[0-9]+|sp}}]
-    store i64 %val1, i64* @var64
+    store volatile i64 %val1, i64* @var64
 ; CHECK: str [[REG64]], [{{x[0-9]+}}, #:lo12:var64]
 
     ret void
@@ -56,14 +58,14 @@ define void @check_byval_align(i32* byval %ignore, %myStruct* byval align 16 %st
     %addr0 = getelementptr %myStruct* %structval, i64 0, i32 2
     %addr1 = getelementptr %myStruct* %structval, i64 0, i32 0
 
-    %val0 = load i32* %addr0
+    %val0 = load volatile i32* %addr0
     ; Some weird move means x0 is used for one access
 ; CHECK: add x[[STRUCTVAL_ADDR:[0-9]+]], sp, #16
 ; CHECK: ldr [[REG32:w[0-9]+]], [x[[STRUCTVAL_ADDR]], #12]
     store i32 %val0, i32* @var32
 ; CHECK: str [[REG32]], [{{x[0-9]+}}, #:lo12:var32]
 
-    %val1 = load i64* %addr1
+    %val1 = load volatile i64* %addr1
 ; CHECK: ldr [[REG64:x[0-9]+]], [sp, #16]
     store i64 %val1, i64* @var64
 ; CHECK: str [[REG64]], [{{x[0-9]+}}, #:lo12:var64]
@@ -84,6 +86,7 @@ define double @return_double() {
 ; CHECK-LABEL: return_double:
     ret double 3.14
 ; CHECK: ldr d0, [{{x[0-9]+}}, #:lo12:.LCPI
+; CHECK-NOFP-NOT: ldr d0,
 }
 
 ; This is the kind of IR clang will produce for returning a struct
@@ -130,17 +133,18 @@ define i32 @struct_on_stack(i8 %var0, i16 %var1, i32 %var2, i64 %var3, i128 %var
                           double %notstacked) {
 ; CHECK-LABEL: struct_on_stack:
     %addr = getelementptr %myStruct* %struct, i64 0, i32 0
-    %val64 = load i64* %addr
-    store i64 %val64, i64* @var64
+    %val64 = load volatile i64* %addr
+    store volatile i64 %val64, i64* @var64
     ; Currently nothing on local stack, so struct should be at sp
 ; CHECK: ldr [[VAL64:x[0-9]+]], [sp]
 ; CHECK: str [[VAL64]], [{{x[0-9]+}}, #:lo12:var64]
 
-    store double %notstacked, double* @vardouble
+    store volatile double %notstacked, double* @vardouble
 ; CHECK-NOT: ldr d0
 ; CHECK: str d0, [{{x[0-9]+}}, #:lo12:vardouble
+; CHECK-NOFP-NOT: str d0,
 
-    %retval = load i32* %stacked
+    %retval = load volatile i32* %stacked
     ret i32 %retval
 ; CHECK: ldr w0, [sp, #16]
 }
@@ -176,10 +180,10 @@ define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3,
 ; CHECK: check_i128_stackalign
     store i128 %stack2, i128* @var128
     ; Nothing local on stack in current codegen, so first stack is 16 away
-; CHECK: ldr {{x[0-9]+}}, [sp, #16]
+; CHECK: add     x[[REG:[0-9]+]], sp, #16
+; CHECK: ldr {{x[0-9]+}}, [x[[REG]], #8]
     ; Important point is that we address sp+24 for second dword
-; CHECK: add     [[REG:x[0-9]+]], sp, #16
-; CHECK: ldr     {{x[0-9]+}}, {{\[}}[[REG]], #8]
+; CHECK: ldr     {{x[0-9]+}}, [sp, #16]
     ret void
 }
 
diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll
index b12130b..ac188bb 100644
--- a/test/CodeGen/AArch64/func-calls.ll
+++ b/test/CodeGen/AArch64/func-calls.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 %myStruct = type { i64 , i8, i32 }
 
@@ -21,16 +22,18 @@ define void @simple_args() {
   %char1 = load i8* @var8
   %char2 = load i8* @var8_2
   call void @take_i8s(i8 %char1, i8 %char2)
-; CHECK: ldrb w0, [{{x[0-9]+}}, #:lo12:var8]
-; CHECK: ldrb w1, [{{x[0-9]+}}, #:lo12:var8_2]
+; CHECK-DAG: ldrb w0, [{{x[0-9]+}}, #:lo12:var8]
+; CHECK-DAG: ldrb w1, [{{x[0-9]+}}, #:lo12:var8_2]
 ; CHECK: bl take_i8s
 
   %float1 = load float* @varfloat
   %float2 = load float* @varfloat_2
   call void @take_floats(float %float1, float %float2)
-; CHECK: ldr s1, [{{x[0-9]+}}, #:lo12:varfloat_2]
-; CHECK: ldr s0, [{{x[0-9]+}}, #:lo12:varfloat]
+; CHECK-DAG: ldr s1, [{{x[0-9]+}}, #:lo12:varfloat_2]
+; CHECK-DAG: ldr s0, [{{x[0-9]+}}, #:lo12:varfloat]
 ; CHECK: bl take_floats
+; CHECK-NOFP-NOT: ldr s1,
+; CHECK-NOFP-NOT: ldr s0,
 
   ret void
 }
@@ -52,6 +55,7 @@ define void @simple_rets() {
   store double %dbl, double* @vardouble
 ; CHECK: bl return_double
 ; CHECK: str d0, [{{x[0-9]+}}, #:lo12:vardouble]
+; CHECK-NOFP-NOT: str d0,
 
   %arr = call [2 x i64] @return_smallstruct()
   store [2 x i64] %arr, [2 x i64]* @varsmallstruct
@@ -75,17 +79,19 @@ declare void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
                           float %var8)
 
 define void @check_stack_args() {
+; CHECK-LABEL: check_stack_args:
   call i32 @struct_on_stack(i8 0, i16 12, i32 42, i64 99, i128 1,
                             i32* @var32, %myStruct* byval @varstruct,
                             i32 999, double 1.0)
   ; Want to check that the final double is passed in registers and
   ; that varstruct is passed on the stack. Rather dependent on how a
   ; memcpy gets created, but the following works for now.
-; CHECK: mov x0, sp
-; CHECK: str {{w[0-9]+}}, [x0]
-; CHECK: str {{w[0-9]+}}, [x0, #12]
-; CHECK: fmov d0,
+; CHECK: mov x[[SPREG:[0-9]+]], sp
+; CHECK-DAG: str {{w[0-9]+}}, [x[[SPREG]]]
+; CHECK-DAG: str {{w[0-9]+}}, [x[[SPREG]], #12]
+; CHECK-DAG: fmov d0,
 ; CHECK: bl struct_on_stack
+; CHECK-NOFP-NOT: fmov
 
   call void @stacked_fpu(float -1.0, double 1.0, float 4.0, float 2.0,
                          float -2.0, float -8.0, float 16.0, float 1.0,
diff --git a/test/CodeGen/AArch64/inline-asm-modifiers.ll b/test/CodeGen/AArch64/inline-asm-modifiers.ll
index d1b21f8..b7f4d3c 100644
--- a/test/CodeGen/AArch64/inline-asm-modifiers.ll
+++ b/test/CodeGen/AArch64/inline-asm-modifiers.ll
@@ -1,5 +1,4 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-ELF %s
 
 @var_simple = hidden global i32 0
 @var_got = global i32 0
@@ -23,12 +22,10 @@ define void @test_inline_modifier_L() nounwind {
 ; CHECK: ldr x0, [x0, #:gottprel_lo12:var_tlsie]
 ; CHECK: add x0, x0, #:tprel_lo12:var_tlsle
 
-; CHECK-ELF: R_AARCH64_ADD_ABS_LO12_NC var_simple
-; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC var_got
-; CHECK-ELF: R_AARCH64_TLSDESC_ADD_LO12_NC var_tlsgd
-; CHECK-ELF: R_AARCH64_TLSLD_ADD_DTPREL_LO12 var_tlsld
-; CHECK-ELF: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC var_tlsie
-; CHECK-ELF: R_AARCH64_TLSLE_ADD_TPREL_LO12 var_tlsle
+  call void asm sideeffect "add x0, x0, ${0:L}", "Si,~{x0}"(i32 64)
+  call void asm sideeffect "ldr x0, [x0, ${0:L}]", "Si,~{x0}"(i32 64)
+; CHECK: add x0, x0, #64
+; CHECK: ldr x0, [x0, #64]
 
   ret void
 }
@@ -40,9 +37,8 @@ define void @test_inline_modifier_G() nounwind {
 ; CHECK: add x0, x0, #:dtprel_hi12:var_tlsld, lsl #12
 ; CHECK: add x0, x0, #:tprel_hi12:var_tlsle, lsl #12
 
-; CHECK-ELF: R_AARCH64_TLSLD_ADD_DTPREL_HI12 var_tlsld
-; CHECK-ELF: R_AARCH64_TLSLE_ADD_TPREL_HI12 var_tlsle
-
+  call void asm sideeffect "add x0, x0, ${0:G}", "Si,~{x0}"(i32 42)
+; CHECK: add x0, x0, #42
   ret void
 }
 
@@ -58,10 +54,8 @@ define void @test_inline_modifier_A() nounwind {
 ; CHECK: adrp x0, :tlsdesc:var_tlsgd
 ; CHECK: adrp x0, :gottprel:var_tlsie
 
-; CHECK-ELF: R_AARCH64_ADR_PREL_PG_HI21 var_simple
-; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE var_got
-; CHECK-ELF: R_AARCH64_TLSDESC_ADR_PAGE var_tlsgd
-; CHECK-ELF: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 var_tlsie
+  call void asm sideeffect "adrp x0, ${0:A}", "Si,~{x0}"(i32 40)
+; CHECK: adrp x0, #40
 
   ret void
 }
@@ -87,6 +81,12 @@ define void @test_inline_modifier_wx(i32 %small, i64 %big) nounwind {
   call i32 asm sideeffect "add ${0:x}, ${1:x}, ${1:x}", "=r,r"(i32 0)
 ; CHECK: add {{w[0-9]+}}, wzr, wzr
 ; CHECK: add {{x[0-9]+}}, xzr, xzr
+
+  call i32 asm sideeffect "add ${0:w}, ${0:w}, ${1:w}", "=r,Ir,0"(i32 123, i32 %small)
+  call i64 asm sideeffect "add ${0:x}, ${0:x}, ${1:x}", "=r,Ir,0"(i32 456, i64 %big)
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #123
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #456
+
   ret void
 }
 
@@ -113,6 +113,18 @@ define void @test_inline_modifier_bhsdq() nounwind {
 ; CHECK: ldr s0, [sp]
 ; CHECK: ldr d0, [sp]
 ; CHECK: ldr q0, [sp]
+
+  call void asm sideeffect "fcmp b0, ${0:b}", "Yw"(float 0.0)
+  call void asm sideeffect "fcmp h0, ${0:h}", "Yw"(float 0.0)
+  call void asm sideeffect "fcmp s0, ${0:s}", "Yw"(float 0.0)
+  call void asm sideeffect "fcmp d0, ${0:d}", "Yw"(float 0.0)
+  call void asm sideeffect "fcmp q0, ${0:q}", "Yw"(float 0.0)
+; CHECK: fcmp b0, #0
+; CHECK: fcmp h0, #0
+; CHECK: fcmp s0, #0
+; CHECK: fcmp d0, #0
+; CHECK: fcmp q0, #0
+
   ret void
 }
 
@@ -123,3 +135,13 @@ define void @test_inline_modifier_c() nounwind {
 
   ret void
 }
+
+define void @test_inline_modifier_a() nounwind {
+; CHECK-LABEL: test_inline_modifier_a:
+  call void asm sideeffect "prfm pldl1keep, ${0:a}", "r"(i32* @var_simple)
+; CHECK: adrp [[VARHI:x[0-9]+]], var_simple
+; CHECK: add x[[VARADDR:[0-9]+]], [[VARHI]], #:lo12:var_simple
+; CHECK: prfm pldl1keep, [x[[VARADDR]]]
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/jump-table.ll b/test/CodeGen/AArch64/jump-table.ll
index 0f1e760..4bb0942 100644
--- a/test/CodeGen/AArch64/jump-table.ll
+++ b/test/CodeGen/AArch64/jump-table.ll
@@ -1,6 +1,5 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 ; RUN: llc -code-model=large -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK-LARGE %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s -check-prefix=CHECK-ELF
 
 define i32 @test_jumptable(i32 %in) {
 ; CHECK: test_jumptable
@@ -48,19 +47,3 @@ lbl4:
 ; CHECK-NEXT: .xword
 ; CHECK-NEXT: .xword
 ; CHECK-NEXT: .xword
-
-; ELF tests:
-
-; First make sure we get a page/lo12 pair in .text to pick up the jump-table
-
-; CHECK-ELF:      Relocations [
-; CHECK-ELF:        Section ({{[0-9]+}}) .rela.text {
-; CHECK-ELF-NEXT:     0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 .rodata
-; CHECK-ELF-NEXT:     0x{{[0-9,A-F]+}} R_AARCH64_ADD_ABS_LO12_NC .rodata
-; CHECK-ELF:        }
-
-; Also check the targets in .rodata are relocated
-; CHECK-ELF:        Section ({{[0-9]+}}) .rela.rodata {
-; CHECK-ELF-NEXT:     0x{{[0-9,A-F]+}} R_AARCH64_ABS64 .text
-; CHECK-ELF:        }
-; CHECK-ELF:      ]
diff --git a/test/CodeGen/AArch64/ldst-regoffset.ll b/test/CodeGen/AArch64/ldst-regoffset.ll
index c83fb52..db30fd9 100644
--- a/test/CodeGen/AArch64/ldst-regoffset.ll
+++ b/test/CodeGen/AArch64/ldst-regoffset.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 @var_8bit = global i8 0
 @var_16bit = global i16 0
@@ -197,11 +198,13 @@ define void @ldst_float(float* %base, i32 %off32, i64 %off64) {
    %val_sxtwN = load volatile float* %addr_sxtwN
    store volatile float %val_sxtwN, float* @var_float
 ; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #2]
+; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
 
   %addr_lslN = getelementptr float* %base, i64 %off64
   %val_lslN = load volatile float* %addr_lslN
   store volatile float %val_lslN, float* @var_float
 ; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #2]
+; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
 
   %addrint_uxtw = ptrtoint float* %base to i64
   %offset_uxtw = zext i32 %off32 to i64
@@ -210,6 +213,7 @@ define void @ldst_float(float* %base, i32 %off32, i64 %off64) {
   %val_uxtw = load volatile float* %addr_uxtw
   store volatile float %val_uxtw, float* @var_float
 ; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
 
   %base_sxtw = ptrtoint float* %base to i64
   %offset_sxtw = sext i32 %off32 to i64
@@ -218,6 +222,7 @@ define void @ldst_float(float* %base, i32 %off32, i64 %off64) {
   %val64_sxtw = load volatile float* %addr_sxtw
   store volatile float %val64_sxtw, float* @var_float
 ; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
 
   %base_lsl = ptrtoint float* %base to i64
   %addrint_lsl = add i64 %base_lsl, %off64
@@ -225,6 +230,7 @@ define void @ldst_float(float* %base, i32 %off32, i64 %off64) {
   %val64_lsl = load volatile float* %addr_lsl
   store volatile float %val64_lsl, float* @var_float
 ; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
 
   %base_uxtwN = ptrtoint float* %base to i64
   %offset_uxtwN = zext i32 %off32 to i64
@@ -234,6 +240,7 @@ define void @ldst_float(float* %base, i32 %off32, i64 %off64) {
   %val64 = load volatile float* @var_float
   store volatile float %val64, float* %addr_uxtwN
 ; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #2]
+; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
    ret void
 }
 
@@ -244,11 +251,13 @@ define void @ldst_double(double* %base, i32 %off32, i64 %off64) {
    %val_sxtwN = load volatile double* %addr_sxtwN
    store volatile double %val_sxtwN, double* @var_double
 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #3]
+; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
 
   %addr_lslN = getelementptr double* %base, i64 %off64
   %val_lslN = load volatile double* %addr_lslN
   store volatile double %val_lslN, double* @var_double
 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #3]
+; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
 
   %addrint_uxtw = ptrtoint double* %base to i64
   %offset_uxtw = zext i32 %off32 to i64
@@ -257,6 +266,7 @@ define void @ldst_double(double* %base, i32 %off32, i64 %off64) {
   %val_uxtw = load volatile double* %addr_uxtw
   store volatile double %val_uxtw, double* @var_double
 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
 
   %base_sxtw = ptrtoint double* %base to i64
   %offset_sxtw = sext i32 %off32 to i64
@@ -265,6 +275,7 @@ define void @ldst_double(double* %base, i32 %off32, i64 %off64) {
   %val64_sxtw = load volatile double* %addr_sxtw
   store volatile double %val64_sxtw, double* @var_double
 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
 
   %base_lsl = ptrtoint double* %base to i64
   %addrint_lsl = add i64 %base_lsl, %off64
@@ -272,6 +283,7 @@ define void @ldst_double(double* %base, i32 %off32, i64 %off64) {
   %val64_lsl = load volatile double* %addr_lsl
   store volatile double %val64_lsl, double* @var_double
 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
 
   %base_uxtwN = ptrtoint double* %base to i64
   %offset_uxtwN = zext i32 %off32 to i64
@@ -281,6 +293,7 @@ define void @ldst_double(double* %base, i32 %off32, i64 %off64) {
   %val64 = load volatile double* @var_double
   store volatile double %val64, double* %addr_uxtwN
 ; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #3]
+; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
    ret void
 }
 
@@ -292,11 +305,13 @@ define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) {
    %val_sxtwN = load volatile fp128* %addr_sxtwN
    store volatile fp128 %val_sxtwN, fp128* %base
 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
+; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
 
   %addr_lslN = getelementptr fp128* %base, i64 %off64
   %val_lslN = load volatile fp128* %addr_lslN
   store volatile fp128 %val_lslN, fp128* %base
 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #4]
+; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
 
   %addrint_uxtw = ptrtoint fp128* %base to i64
   %offset_uxtw = zext i32 %off32 to i64
@@ -305,6 +320,7 @@ define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) {
   %val_uxtw = load volatile fp128* %addr_uxtw
   store volatile fp128 %val_uxtw, fp128* %base
 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
 
   %base_sxtw = ptrtoint fp128* %base to i64
   %offset_sxtw = sext i32 %off32 to i64
@@ -313,6 +329,7 @@ define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) {
   %val64_sxtw = load volatile fp128* %addr_sxtw
   store volatile fp128 %val64_sxtw, fp128* %base
 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
 
   %base_lsl = ptrtoint fp128* %base to i64
   %addrint_lsl = add i64 %base_lsl, %off64
@@ -320,6 +337,7 @@ define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) {
   %val64_lsl = load volatile fp128* %addr_lsl
   store volatile fp128 %val64_lsl, fp128* %base
 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
 
   %base_uxtwN = ptrtoint fp128* %base to i64
   %offset_uxtwN = zext i32 %off32 to i64
@@ -329,5 +347,6 @@ define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) {
   %val64 = load volatile fp128* %base
   store volatile fp128 %val64, fp128* %addr_uxtwN
 ; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #4]
+; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
    ret void
 }
diff --git a/test/CodeGen/AArch64/ldst-unscaledimm.ll b/test/CodeGen/AArch64/ldst-unscaledimm.ll
index 03dedcc..bea5bb5 100644
--- a/test/CodeGen/AArch64/ldst-unscaledimm.ll
+++ b/test/CodeGen/AArch64/ldst-unscaledimm.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 @var_8bit = global i8 0
 @var_16bit = global i16 0
@@ -194,9 +195,11 @@ define void @ldst_float() {
 
   %valfp = load volatile float* %addrfp
 ; CHECK: ldur {{s[0-9]+}}, [{{x[0-9]+}}, #-5]
+; CHECK-NOFP-NOT: ldur {{s[0-9]+}},
 
   store volatile float %valfp, float* %addrfp
 ; CHECK: stur {{s[0-9]+}}, [{{x[0-9]+}}, #-5]
+; CHECK-NOFP-NOT: stur {{s[0-9]+}},
 
   ret void
 }
@@ -210,9 +213,11 @@ define void @ldst_double() {
 
   %valfp = load volatile double* %addrfp
 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #4]
+; CHECK-NOFP-NOT: ldur {{d[0-9]+}},
 
   store volatile double %valfp, double* %addrfp
 ; CHECK: stur {{d[0-9]+}}, [{{x[0-9]+}}, #4]
+; CHECK-NOFP-NOT: stur {{d[0-9]+}},
 
    ret void
 }
diff --git a/test/CodeGen/AArch64/ldst-unsignedimm.ll b/test/CodeGen/AArch64/ldst-unsignedimm.ll
index 77cef4e..44c1586 100644
--- a/test/CodeGen/AArch64/ldst-unsignedimm.ll
+++ b/test/CodeGen/AArch64/ldst-unsignedimm.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 @var_8bit = global i8 0
 @var_16bit = global i16 0
@@ -230,9 +231,11 @@ define void @ldst_float() {
    %valfp = load volatile float* @var_float
 ; CHECK: adrp {{x[0-9]+}}, var_float
 ; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_float]
+; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
 
   store volatile float %valfp, float* @var_float
 ; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_float]
+; CHECK-NOFP-NOT: str {{s[0-9]+}},
 
    ret void
 }
@@ -243,9 +246,11 @@ define void @ldst_double() {
    %valfp = load volatile double* @var_double
 ; CHECK: adrp {{x[0-9]+}}, var_double
 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_double]
+; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
 
   store volatile double %valfp, double* @var_double
 ; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_double]
+; CHECK-NOFP-NOT: str {{d[0-9]+}},
 
    ret void
 }
diff --git a/test/CodeGen/AArch64/lit.local.cfg b/test/CodeGen/AArch64/lit.local.cfg
index c5ce241..9a66a00 100644
--- a/test/CodeGen/AArch64/lit.local.cfg
+++ b/test/CodeGen/AArch64/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'AArch64' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/AArch64/literal_pools.ll b/test/CodeGen/AArch64/literal_pools.ll
index b82f290..fc33aee 100644
--- a/test/CodeGen/AArch64/literal_pools.ll
+++ b/test/CodeGen/AArch64/literal_pools.ll
@@ -1,5 +1,7 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -code-model=large | FileCheck --check-prefix=CHECK-LARGE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -code-model=large -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP-LARGE %s
 
 @var32 = global i32 0
 @var64 = global i64 0
@@ -65,8 +67,8 @@ define void @floating_lits() {
   %floatval = load float* @varfloat
   %newfloat = fadd float %floatval, 128.0
 ; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI1_[0-9]+]]
-; CHECK: ldr {{s[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
-; CHECK: fadd
+; CHECK: ldr [[LIT128:s[0-9]+]], [x[[LITBASE]], #:lo12:[[CURLIT]]]
+; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
 
 ; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI1_[0-9]+]]
 ; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
@@ -74,20 +76,26 @@ define void @floating_lits() {
 ; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]]
 ; CHECK-LARGE: ldr {{s[0-9]+}}, [x[[LITADDR]]]
 ; CHECK-LARGE: fadd
+; CHECK-NOFP-LARGE-NOT: ldr {{s[0-9]+}},
+; CHECK-NOFP-LARGE-NOT: fadd
 
   store float %newfloat, float* @varfloat
 
   %doubleval = load double* @vardouble
   %newdouble = fadd double %doubleval, 129.0
 ; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI1_[0-9]+]]
-; CHECK: ldr {{d[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
-; CHECK: fadd
+; CHECK: ldr [[LIT129:d[0-9]+]], [x[[LITBASE]], #:lo12:[[CURLIT]]]
+; CHECK: fadd {{s[0-9]+}}, {{s[0-9]+}}, [[LIT128]]
+; CHECK: fadd {{d[0-9]+}}, {{d[0-9]+}}, [[LIT129]]
+; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
+; CHECK-NOFP-NOT: fadd
 
 ; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI1_[0-9]+]]
 ; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
 ; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
 ; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]]
 ; CHECK-LARGE: ldr {{d[0-9]+}}, [x[[LITADDR]]]
+; CHECK-NOFP-LARGE-NOT: ldr {{d[0-9]+}},
 
   store double %newdouble, double* @vardouble
 
diff --git a/test/CodeGen/AArch64/neon-2velem-high.ll b/test/CodeGen/AArch64/neon-2velem-high.ll
new file mode 100644
index 0000000..97031d9
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-2velem-high.ll
@@ -0,0 +1,331 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+
+declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
+
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
+
+declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>)
+
+declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>)
+
+declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
+
+declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>)
+
+declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>)
+
+declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>)
+
+declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>)
+
+define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) {
+; CHECK: test_vmull_high_n_s16:
+; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
+  %vmull15.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  ret <4 x i32> %vmull15.i.i
+}
+
+define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) {
+; CHECK: test_vmull_high_n_s32:
+; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
+  %vmull9.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  ret <2 x i64> %vmull9.i.i
+}
+
+define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) {
+; CHECK: test_vmull_high_n_u16:
+; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
+  %vmull15.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  ret <4 x i32> %vmull15.i.i
+}
+
+define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) {
+; CHECK: test_vmull_high_n_u32:
+; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
+  %vmull9.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  ret <2 x i64> %vmull9.i.i
+}
+
+define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) {
+; CHECK: test_vqdmull_high_n_s16:
+; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
+  %vqdmull15.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  ret <4 x i32> %vqdmull15.i.i
+}
+
+define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) {
+; CHECK: test_vqdmull_high_n_s32:
+; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
+  %vqdmull9.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  ret <2 x i64> %vqdmull9.i.i
+}
+
+define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
+; CHECK: test_vmlal_high_n_s16:
+; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
+  ret <4 x i32> %add.i.i
+}
+
+define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
+; CHECK: test_vmlal_high_n_s32:
+; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
+  ret <2 x i64> %add.i.i
+}
+
+define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
+; CHECK: test_vmlal_high_n_u16:
+; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
+  ret <4 x i32> %add.i.i
+}
+
+define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
+; CHECK: test_vmlal_high_n_u32:
+; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
+  ret <2 x i64> %add.i.i
+}
+
+define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
+; CHECK: test_vqdmlal_high_n_s16:
+; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
+  %vqdmlal15.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %vqdmlal17.i.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i)
+  ret <4 x i32> %vqdmlal17.i.i
+}
+
+define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
+; CHECK: test_vqdmlal_high_n_s32:
+; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
+  %vqdmlal9.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %vqdmlal11.i.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i)
+  ret <2 x i64> %vqdmlal11.i.i
+}
+
+define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
+; CHECK: test_vmlsl_high_n_s16:
+; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
+  ret <4 x i32> %sub.i.i
+}
+
+define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
+; CHECK: test_vmlsl_high_n_s32:
+; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
+  ret <2 x i64> %sub.i.i
+}
+
+define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
+; CHECK: test_vmlsl_high_n_u16:
+; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
+  ret <4 x i32> %sub.i.i
+}
+
+define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
+; CHECK: test_vmlsl_high_n_u32:
+; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
+  ret <2 x i64> %sub.i.i
+}
+
+define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
+; CHECK: test_vqdmlsl_high_n_s16:
+; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
+  %vqdmlsl15.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %vqdmlsl17.i.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i)
+  ret <4 x i32> %vqdmlsl17.i.i
+}
+
+define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
+; CHECK: test_vqdmlsl_high_n_s32:
+; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
+  %vqdmlsl9.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %vqdmlsl11.i.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i)
+  ret <2 x i64> %vqdmlsl11.i.i
+}
+
+define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) {
+; CHECK: test_vmul_n_f32:
+; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %vecinit.i = insertelement <2 x float> undef, float %b, i32 0
+  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %b, i32 1
+  %mul.i = fmul <2 x float> %vecinit1.i, %a
+  ret <2 x float> %mul.i
+}
+
+define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) {
+; CHECK: test_vmulq_n_f32:
+; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %vecinit.i = insertelement <4 x float> undef, float %b, i32 0
+  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %b, i32 1
+  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %b, i32 2
+  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %b, i32 3
+  %mul.i = fmul <4 x float> %vecinit3.i, %a
+  ret <4 x float> %mul.i
+}
+
+define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) {
+; CHECK: test_vmulq_n_f64:
+; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+entry:
+  %vecinit.i = insertelement <2 x double> undef, double %b, i32 0
+  %vecinit1.i = insertelement <2 x double> %vecinit.i, double %b, i32 1
+  %mul.i = fmul <2 x double> %vecinit1.i, %a
+  ret <2 x double> %mul.i
+}
+
+define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) {
+; CHECK: test_vfma_n_f32:
+; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
+  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %vecinit1.i, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) {
+; CHECK: test_vfmaq_n_f32:
+; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
+  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
+  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
+  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %vecinit3.i, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) {
+; CHECK: test_vfms_n_f32:
+; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
+  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
+  %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
+  %1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %vecinit1.i, <2 x float> %a)
+  ret <2 x float> %1
+}
+
+define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) {
+; CHECK: test_vfmsq_n_f32:
+; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
+  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
+  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
+  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
+  %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
+  %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %vecinit3.i, <4 x float> %a)
+  ret <4 x float> %1
+}
diff --git a/test/CodeGen/AArch64/neon-2velem.ll b/test/CodeGen/AArch64/neon-2velem.ll
new file mode 100644
index 0000000..9d61842
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-2velem.ll
@@ -0,0 +1,2550 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+
+declare <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double>, <2 x double>)
+
+declare <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float>, <4 x float>)
+
+declare <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float>, <2 x float>)
+
+declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>)
+
+declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>)
+
+declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>)
+
+declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>)
+
+declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>)
+
+declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>)
+
+declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>)
+
+declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>)
+
+declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>)
+
+declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
+
+declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>)
+
+declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>)
+
+declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>)
+
+declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>)
+
+define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmla_lane_s16:
+; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i16> %shuffle, %b
+  %add = add <4 x i16> %mul, %a
+  ret <4 x i16> %add
+}
+
+define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlaq_lane_s16:
+; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <8 x i16> %shuffle, %b
+  %add = add <8 x i16> %mul, %a
+  ret <8 x i16> %add
+}
+
+define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmla_lane_s32:
+; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %mul = mul <2 x i32> %shuffle, %b
+  %add = add <2 x i32> %mul, %a
+  ret <2 x i32> %add
+}
+
+define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlaq_lane_s32:
+; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %mul = mul <4 x i32> %shuffle, %b
+  %add = add <4 x i32> %mul, %a
+  ret <4 x i32> %add
+}
+
+define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmla_laneq_s16:
+; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <4 x i16> %shuffle, %b
+  %add = add <4 x i16> %mul, %a
+  ret <4 x i16> %add
+}
+
+define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlaq_laneq_s16:
+; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <8 x i16> %shuffle, %b
+  %add = add <8 x i16> %mul, %a
+  ret <8 x i16> %add
+}
+
+define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmla_laneq_s32:
+; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %mul = mul <2 x i32> %shuffle, %b
+  %add = add <2 x i32> %mul, %a
+  ret <2 x i32> %add
+}
+
+define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlaq_laneq_s32:
+; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i32> %shuffle, %b
+  %add = add <4 x i32> %mul, %a
+  ret <4 x i32> %add
+}
+
+define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmls_lane_s16:
+; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i16> %shuffle, %b
+  %sub = sub <4 x i16> %a, %mul
+  ret <4 x i16> %sub
+}
+
+define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlsq_lane_s16:
+; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <8 x i16> %shuffle, %b
+  %sub = sub <8 x i16> %a, %mul
+  ret <8 x i16> %sub
+}
+
+define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmls_lane_s32:
+; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %mul = mul <2 x i32> %shuffle, %b
+  %sub = sub <2 x i32> %a, %mul
+  ret <2 x i32> %sub
+}
+
+define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlsq_lane_s32:
+; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %mul = mul <4 x i32> %shuffle, %b
+  %sub = sub <4 x i32> %a, %mul
+  ret <4 x i32> %sub
+}
+
+define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmls_laneq_s16:
+; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <4 x i16> %shuffle, %b
+  %sub = sub <4 x i16> %a, %mul
+  ret <4 x i16> %sub
+}
+
+define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlsq_laneq_s16:
+; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <8 x i16> %shuffle, %b
+  %sub = sub <8 x i16> %a, %mul
+  ret <8 x i16> %sub
+}
+
+define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmls_laneq_s32:
+; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %mul = mul <2 x i32> %shuffle, %b
+  %sub = sub <2 x i32> %a, %mul
+  ret <2 x i32> %sub
+}
+
+define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlsq_laneq_s32:
+; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i32> %shuffle, %b
+  %sub = sub <4 x i32> %a, %mul
+  ret <4 x i32> %sub
+}
+
+define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmul_lane_s16:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmulq_lane_s16:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmul_lane_s32:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmulq_lane_s32:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmul_lane_u16:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmulq_lane_u16:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmul_lane_u32:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmulq_lane_u32:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmul_laneq_s16:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmulq_laneq_s16:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmul_laneq_s32:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmulq_laneq_s32:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmul_laneq_u16:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmulq_laneq_u16:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmul_laneq_u32:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmulq_laneq_u32:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
+; CHECK: test_vfma_lane_f32:
+; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
+
+define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
+; CHECK: test_vfmaq_lane_f32:
+; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
+
+define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
+; CHECK: test_vfma_laneq_f32:
+; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
+; CHECK: test_vfmaq_laneq_f32:
+; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
+; CHECK: test_vfms_lane_f32:
+; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> <i32 1, i32 1>
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
+; CHECK: test_vfmsq_lane_f32:
+; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
+; CHECK: test_vfms_laneq_f32:
+; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> <i32 3, i32 3>
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
+; CHECK: test_vfmsq_laneq_f32:
+; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
+; CHECK: test_vfmaq_lane_f64:
+; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+entry:
+  %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
+  ret <2 x double> %0
+}
+
+declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
+
+define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
+; CHECK: test_vfmaq_laneq_f64:
+; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
+entry:
+  %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
+  ret <2 x double> %0
+}
+
+define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
+; CHECK: test_vfmsq_lane_f64:
+; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+entry:
+  %sub = fsub <1 x double> <double -0.000000e+00>, %v
+  %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
+  ret <2 x double> %0
+}
+
+define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
+; CHECK: test_vfmsq_laneq_f64:
+; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
+entry:
+  %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
+  %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
+  ret <2 x double> %0
+}
+
+define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlal_lane_s16:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlal_lane_s32:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlal_laneq_s16:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlal_laneq_s32:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlal_high_lane_s16:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlal_high_lane_s32:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlal_high_laneq_s16:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlal_high_laneq_s32:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlsl_lane_s16:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlsl_lane_s32:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlsl_laneq_s16:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlsl_laneq_s32:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlsl_high_lane_s16:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlsl_high_lane_s32:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlsl_high_laneq_s16:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlsl_high_laneq_s32:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlal_lane_u16:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlal_lane_u32:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlal_laneq_u16:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlal_laneq_u32:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlal_high_lane_u16:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlal_high_lane_u32:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlal_high_laneq_u16:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlal_high_laneq_u32:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlsl_lane_u16:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlsl_lane_u32:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlsl_laneq_u16:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlsl_laneq_u32:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlsl_high_lane_u16:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlsl_high_lane_u32:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlsl_high_laneq_u16:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlsl_high_laneq_u32:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmull_lane_s16:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmull_lane_s32:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmull_lane_u16:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmull_lane_u32:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmull_high_lane_s16:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmull_high_lane_s32:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmull_high_lane_u16:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmull_high_lane_u32:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmull_laneq_s16:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmull_laneq_s32:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmull_laneq_u16:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmull_laneq_u32:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmull_high_laneq_s16:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmull_high_laneq_s32:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmull_high_laneq_u16:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmull_high_laneq_u32:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vqdmlal_lane_s16:
+; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
+  ret <4 x i32> %vqdmlal4.i
+}
+
+define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vqdmlal_lane_s32:
+; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
+  ret <2 x i64> %vqdmlal4.i
+}
+
+define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vqdmlal_high_lane_s16:
+; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
+  ret <4 x i32> %vqdmlal4.i
+}
+
+define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vqdmlal_high_lane_s32:
+; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
+  ret <2 x i64> %vqdmlal4.i
+}
+
+define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vqdmlsl_lane_s16:
+; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
+  ret <4 x i32> %vqdmlsl4.i
+}
+
+define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vqdmlsl_lane_s32:
+; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
+  ret <2 x i64> %vqdmlsl4.i
+}
+
+define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vqdmlsl_high_lane_s16:
+; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
+  ret <4 x i32> %vqdmlsl4.i
+}
+
+define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vqdmlsl_high_lane_s32:
+; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
+  ret <2 x i64> %vqdmlsl4.i
+}
+
+define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqdmull_lane_s16:
+; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqdmull_lane_s32:
+; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vqdmull_laneq_s16:
+; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vqdmull_laneq_s32:
+; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqdmull_high_lane_s16:
+; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqdmull_high_lane_s32:
+; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vqdmull_high_laneq_s16:
+; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vqdmull_high_laneq_s32:
+; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqdmulh_lane_s16:
+; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i16> %vqdmulh2.i
+}
+
+define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqdmulhq_lane_s16:
+; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  %vqdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
+  ret <8 x i16> %vqdmulh2.i
+}
+
+define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqdmulh_lane_s32:
+; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i32> %vqdmulh2.i
+}
+
+define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqdmulhq_lane_s32:
+; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %vqdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
+  ret <4 x i32> %vqdmulh2.i
+}
+
+define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqrdmulh_lane_s16:
+; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqrdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i16> %vqrdmulh2.i
+}
+
+define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqrdmulhq_lane_s16:
+; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  %vqrdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
+  ret <8 x i16> %vqrdmulh2.i
+}
+
+define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqrdmulh_lane_s32:
+; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqrdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i32> %vqrdmulh2.i
+}
+
+define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqrdmulhq_lane_s32:
+; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %vqrdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
+  ret <4 x i32> %vqrdmulh2.i
+}
+
+define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) {
+; CHECK: test_vmul_lane_f32:
+; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
+  %mul = fmul <2 x float> %shuffle, %a
+  ret <2 x float> %mul
+}
+
+define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) {
+; CHECK: test_vmul_lane_f64:
+; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+entry:
+  %0 = bitcast <1 x double> %a to <8 x i8>
+  %1 = bitcast <8 x i8> %0 to double
+  %extract = extractelement <1 x double> %v, i32 0
+  %2 = fmul double %1, %extract
+  %3 = insertelement <1 x double> undef, double %2, i32 0
+  ret <1 x double> %3
+}
+
+define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) {
+; CHECK: test_vmulq_lane_f32:
+; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %mul = fmul <4 x float> %shuffle, %a
+  ret <4 x float> %mul
+}
+
+define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) {
+; CHECK: test_vmulq_lane_f64:
+; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+entry:
+  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
+  %mul = fmul <2 x double> %shuffle, %a
+  ret <2 x double> %mul
+}
+
+define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) {
+; CHECK: test_vmul_laneq_f32:
+; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
+  %mul = fmul <2 x float> %shuffle, %a
+  ret <2 x float> %mul
+}
+
+define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) {
+; CHECK: test_vmul_laneq_f64:
+; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+entry:
+  %0 = bitcast <1 x double> %a to <8 x i8>
+  %1 = bitcast <8 x i8> %0 to double
+  %extract = extractelement <2 x double> %v, i32 1
+  %2 = fmul double %1, %extract
+  %3 = insertelement <1 x double> undef, double %2, i32 0
+  ret <1 x double> %3
+}
+
+define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) {
+; CHECK: test_vmulq_laneq_f32:
+; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = fmul <4 x float> %shuffle, %a
+  ret <4 x float> %mul
+}
+
+define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) {
+; CHECK: test_vmulq_laneq_f64:
+; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  %mul = fmul <2 x double> %shuffle, %a
+  ret <2 x double> %mul
+}
+
+define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) {
+; CHECK: test_vmulx_lane_f32:
+; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
+  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
+  ret <2 x float> %vmulx2.i
+}
+
+define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) {
+; CHECK: test_vmulxq_lane_f32:
+; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
+  ret <4 x float> %vmulx2.i
+}
+
+define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) {
+; CHECK: test_vmulxq_lane_f64:
+; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+entry:
+  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
+  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
+  ret <2 x double> %vmulx2.i
+}
+
+define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) {
+; CHECK: test_vmulx_laneq_f32:
+; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
+  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
+  ret <2 x float> %vmulx2.i
+}
+
+define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) {
+; CHECK: test_vmulxq_laneq_f32:
+; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
+  ret <4 x float> %vmulx2.i
+}
+
+define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) {
+; CHECK: test_vmulxq_laneq_f64:
+; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
+  ret <2 x double> %vmulx2.i
+}
+
+define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmla_lane_s16_0:
+; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %b
+  %add = add <4 x i16> %mul, %a
+  ret <4 x i16> %add
+}
+
+define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlaq_lane_s16_0:
+; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %b
+  %add = add <8 x i16> %mul, %a
+  ret <8 x i16> %add
+}
+
+define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmla_lane_s32_0:
+; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %b
+  %add = add <2 x i32> %mul, %a
+  ret <2 x i32> %add
+}
+
+define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlaq_lane_s32_0:
+; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %b
+  %add = add <4 x i32> %mul, %a
+  ret <4 x i32> %add
+}
+
+define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmla_laneq_s16_0:
+; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %b
+  %add = add <4 x i16> %mul, %a
+  ret <4 x i16> %add
+}
+
+define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlaq_laneq_s16_0:
+; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %b
+  %add = add <8 x i16> %mul, %a
+  ret <8 x i16> %add
+}
+
+define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmla_laneq_s32_0:
+; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %b
+  %add = add <2 x i32> %mul, %a
+  ret <2 x i32> %add
+}
+
+define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlaq_laneq_s32_0:
+; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %b
+  %add = add <4 x i32> %mul, %a
+  ret <4 x i32> %add
+}
+
+define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmls_lane_s16_0:
+; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %b
+  %sub = sub <4 x i16> %a, %mul
+  ret <4 x i16> %sub
+}
+
+define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlsq_lane_s16_0:
+; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %b
+  %sub = sub <8 x i16> %a, %mul
+  ret <8 x i16> %sub
+}
+
+define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmls_lane_s32_0:
+; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %b
+  %sub = sub <2 x i32> %a, %mul
+  ret <2 x i32> %sub
+}
+
+define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlsq_lane_s32_0:
+; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %b
+  %sub = sub <4 x i32> %a, %mul
+  ret <4 x i32> %sub
+}
+
+define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmls_laneq_s16_0:
+; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %b
+  %sub = sub <4 x i16> %a, %mul
+  ret <4 x i16> %sub
+}
+
+define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlsq_laneq_s16_0:
+; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %b
+  %sub = sub <8 x i16> %a, %mul
+  ret <8 x i16> %sub
+}
+
+define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmls_laneq_s32_0:
+; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %b
+  %sub = sub <2 x i32> %a, %mul
+  ret <2 x i32> %sub
+}
+
+define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlsq_laneq_s32_0:
+; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %b
+  %sub = sub <4 x i32> %a, %mul
+  ret <4 x i32> %sub
+}
+
+define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmul_lane_s16_0:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmulq_lane_s16_0:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmul_lane_s32_0:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmulq_lane_s32_0:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmul_lane_u16_0:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmulq_lane_u16_0:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmul_lane_u32_0:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmulq_lane_u32_0:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmul_laneq_s16_0:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmulq_laneq_s16_0:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmul_laneq_s32_0:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmulq_laneq_s32_0:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmul_laneq_u16_0:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmulq_laneq_u16_0:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmul_laneq_u32_0:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmulq_laneq_u32_0:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
+; CHECK: test_vfma_lane_f32_0:
+; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
+; CHECK: test_vfmaq_lane_f32_0:
+; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
+; CHECK: test_vfma_laneq_f32_0:
+; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
+; CHECK: test_vfmaq_laneq_f32_0:
+; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
+; CHECK: test_vfms_lane_f32_0:
+; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
+; CHECK: test_vfmsq_lane_f32_0:
+; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
+; CHECK: test_vfms_laneq_f32_0:
+; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
+; CHECK: test_vfmsq_laneq_f32_0:
+; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
+; CHECK: test_vfmaq_laneq_f64_0:
+; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+entry:
+  %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
+  ret <2 x double> %0
+}
+
+define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
+; CHECK: test_vfmsq_laneq_f64_0:
+; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+entry:
+  %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
+  %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
+  ret <2 x double> %0
+}
+
+define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlal_lane_s16_0:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlal_lane_s32_0:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlal_laneq_s16_0:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlal_laneq_s32_0:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlal_high_lane_s16_0:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlal_high_lane_s32_0:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlal_high_laneq_s16_0:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlal_high_laneq_s32_0:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlsl_lane_s16_0:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlsl_lane_s32_0:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlsl_laneq_s16_0:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlsl_laneq_s32_0:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlsl_high_lane_s16_0:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlsl_high_lane_s32_0:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlsl_high_laneq_s16_0:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlsl_high_laneq_s32_0:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlal_lane_u16_0:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlal_lane_u32_0:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlal_laneq_u16_0:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlal_laneq_u32_0:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlal_high_lane_u16_0:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlal_high_lane_u32_0:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlal_high_laneq_u16_0:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlal_high_laneq_u32_0:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlsl_lane_u16_0:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlsl_lane_u32_0:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlsl_laneq_u16_0:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlsl_laneq_u32_0:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlsl_high_lane_u16_0:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlsl_high_lane_u32_0:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlsl_high_laneq_u16_0:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlsl_high_laneq_u32_0:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmull_lane_s16_0:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmull_lane_s32_0:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmull_lane_u16_0:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmull_lane_u32_0:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmull_high_lane_s16_0:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmull_high_lane_s32_0:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmull_high_lane_u16_0:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmull_high_lane_u32_0:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmull_laneq_s16_0:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmull_laneq_s32_0:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmull_laneq_u16_0:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmull_laneq_u32_0:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmull_high_laneq_s16_0:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmull_high_laneq_s32_0:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmull_high_laneq_u16_0:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmull_high_laneq_u32_0:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vqdmlal_lane_s16_0:
+; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
+  ret <4 x i32> %vqdmlal4.i
+}
+
+define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vqdmlal_lane_s32_0:
+; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
+  ret <2 x i64> %vqdmlal4.i
+}
+
+define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vqdmlal_high_lane_s16_0:
+; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
+  ret <4 x i32> %vqdmlal4.i
+}
+
+define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vqdmlal_high_lane_s32_0:
+; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
+  ret <2 x i64> %vqdmlal4.i
+}
+
+define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vqdmlsl_lane_s16_0:
+; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
+  ret <4 x i32> %vqdmlsl4.i
+}
+
+define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vqdmlsl_lane_s32_0:
+; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
+  ret <2 x i64> %vqdmlsl4.i
+}
+
+define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vqdmlsl_high_lane_s16_0:
+; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
+  ret <4 x i32> %vqdmlsl4.i
+}
+
+define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vqdmlsl_high_lane_s32_0:
+; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
+  ret <2 x i64> %vqdmlsl4.i
+}
+
+define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqdmull_lane_s16_0:
+; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqdmull_lane_s32_0:
+; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vqdmull_laneq_s16_0:
+; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vqdmull_laneq_s32_0:
+; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqdmull_high_lane_s16_0:
+; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqdmull_high_lane_s32_0:
+; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vqdmull_high_laneq_s16_0:
+; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vqdmull_high_laneq_s32_0:
+; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqdmulh_lane_s16_0:
+; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i16> %vqdmulh2.i
+}
+
+define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqdmulhq_lane_s16_0:
+; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
+  %vqdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
+  ret <8 x i16> %vqdmulh2.i
+}
+
+define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqdmulh_lane_s32_0:
+; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i32> %vqdmulh2.i
+}
+
+define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqdmulhq_lane_s32_0:
+; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
+  %vqdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
+  ret <4 x i32> %vqdmulh2.i
+}
+
+define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqrdmulh_lane_s16_0:
+; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqrdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i16> %vqrdmulh2.i
+}
+
+define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqrdmulhq_lane_s16_0:
+; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
+  %vqrdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
+  ret <8 x i16> %vqrdmulh2.i
+}
+
+define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqrdmulh_lane_s32_0:
+; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqrdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i32> %vqrdmulh2.i
+}
+
+define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqrdmulhq_lane_s32_0:
+; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
+  %vqrdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
+  ret <4 x i32> %vqrdmulh2.i
+}
+
+define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) {
+; CHECK: test_vmul_lane_f32_0:
+; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
+  %mul = fmul <2 x float> %shuffle, %a
+  ret <2 x float> %mul
+}
+
+define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
+; CHECK: test_vmulq_lane_f32_0:
+; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
+  %mul = fmul <4 x float> %shuffle, %a
+  ret <4 x float> %mul
+}
+
+define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
+; CHECK: test_vmul_laneq_f32_0:
+; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
+  %mul = fmul <2 x float> %shuffle, %a
+  ret <2 x float> %mul
+}
+
+define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) {
+; CHECK: test_vmul_laneq_f64_0:
+; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+entry:
+  %0 = bitcast <1 x double> %a to <8 x i8>
+  %1 = bitcast <8 x i8> %0 to double
+  %extract = extractelement <2 x double> %v, i32 0
+  %2 = fmul double %1, %extract
+  %3 = insertelement <1 x double> undef, double %2, i32 0
+  ret <1 x double> %3
+}
+
+define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) {
+; CHECK: test_vmulq_laneq_f32_0:
+; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
+  %mul = fmul <4 x float> %shuffle, %a
+  ret <4 x float> %mul
+}
+
+define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
+; CHECK: test_vmulq_laneq_f64_0:
+; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+entry:
+  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
+  %mul = fmul <2 x double> %shuffle, %a
+  ret <2 x double> %mul
+}
+
+define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) {
+; CHECK: test_vmulx_lane_f32_0:
+; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
+  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
+  ret <2 x float> %vmulx2.i
+}
+
+define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
+; CHECK: test_vmulxq_lane_f32_0:
+; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
+  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
+  ret <4 x float> %vmulx2.i
+}
+
+define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) {
+; CHECK: test_vmulxq_lane_f64_0:
+; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+entry:
+  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
+  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
+  ret <2 x double> %vmulx2.i
+}
+
+define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
+; CHECK: test_vmulx_laneq_f32_0:
+; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
+  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
+  ret <2 x float> %vmulx2.i
+}
+
+define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) {
+; CHECK: test_vmulxq_laneq_f32_0:
+; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
+  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
+  ret <4 x float> %vmulx2.i
+}
+
+define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
+; CHECK: test_vmulxq_laneq_f64_0:
+; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+entry:
+  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
+  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
+  ret <2 x double> %vmulx2.i
+}
+
diff --git a/test/CodeGen/AArch64/neon-3vdiff.ll b/test/CodeGen/AArch64/neon-3vdiff.ll
new file mode 100644
index 0000000..171e2b2
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-3vdiff.ll
@@ -0,0 +1,1806 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+declare <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>)
+
+declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>)
+
+declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>)
+
+declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
+
+declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>)
+
+declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>)
+
+declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>)
+
+declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>)
+
+declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>)
+
+declare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>)
+
+declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>)
+
+declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>)
+
+declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>)
+
+declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>)
+
+declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>)
+
+declare <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>)
+
+declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>)
+
+declare <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32>, <4 x i32>)
+
+declare <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16>, <8 x i16>)
+
+declare <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64>, <2 x i64>)
+
+declare <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32>, <4 x i32>)
+
+declare <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vaddl_s8:
+; CHECK: saddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = sext <8 x i8> %a to <8 x i16>
+  %vmovl.i2.i = sext <8 x i8> %b to <8 x i16>
+  %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vaddl_s16:
+; CHECK: saddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = sext <4 x i16> %a to <4 x i32>
+  %vmovl.i2.i = sext <4 x i16> %b to <4 x i32>
+  %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vaddl_s32:
+; CHECK: saddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = sext <2 x i32> %a to <2 x i64>
+  %vmovl.i2.i = sext <2 x i32> %b to <2 x i64>
+  %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vaddl_u8:
+; CHECK: uaddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = zext <8 x i8> %a to <8 x i16>
+  %vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
+  %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vaddl_u16:
+; CHECK: uaddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = zext <4 x i16> %a to <4 x i32>
+  %vmovl.i2.i = zext <4 x i16> %b to <4 x i32>
+  %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vaddl_u32:
+; CHECK: uaddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = zext <2 x i32> %a to <2 x i64>
+  %vmovl.i2.i = zext <2 x i32> %b to <2 x i64>
+  %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddl_high_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vaddl_high_s8:
+; CHECK: saddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16>
+  %add.i = add <8 x i16> %0, %1
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddl_high_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vaddl_high_s16:
+; CHECK: saddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32>
+  %add.i = add <4 x i32> %0, %1
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddl_high_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vaddl_high_s32:
+; CHECK: saddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64>
+  %add.i = add <2 x i64> %0, %1
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddl_high_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vaddl_high_u8:
+; CHECK: uaddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16>
+  %add.i = add <8 x i16> %0, %1
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddl_high_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vaddl_high_u16:
+; CHECK: uaddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32>
+  %add.i = add <4 x i32> %0, %1
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddl_high_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vaddl_high_u32:
+; CHECK: uaddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64>
+  %add.i = add <2 x i64> %0, %1
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) {
+; CHECK: test_vaddw_s8:
+; CHECK: saddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = sext <8 x i8> %b to <8 x i16>
+  %add.i = add <8 x i16> %vmovl.i.i, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) {
+; CHECK: test_vaddw_s16:
+; CHECK: saddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = sext <4 x i16> %b to <4 x i32>
+  %add.i = add <4 x i32> %vmovl.i.i, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) {
+; CHECK: test_vaddw_s32:
+; CHECK: saddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = sext <2 x i32> %b to <2 x i64>
+  %add.i = add <2 x i64> %vmovl.i.i, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) {
+; CHECK: test_vaddw_u8:
+; CHECK: uaddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = zext <8 x i8> %b to <8 x i16>
+  %add.i = add <8 x i16> %vmovl.i.i, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) {
+; CHECK: test_vaddw_u16:
+; CHECK: uaddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = zext <4 x i16> %b to <4 x i32>
+  %add.i = add <4 x i32> %vmovl.i.i, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) {
+; CHECK: test_vaddw_u32:
+; CHECK: uaddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = zext <2 x i32> %b to <2 x i64>
+  %add.i = add <2 x i64> %vmovl.i.i, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddw_high_s8(<8 x i16> %a, <16 x i8> %b) {
+; CHECK: test_vaddw_high_s8:
+; CHECK: saddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %add.i = add <8 x i16> %0, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddw_high_s16(<4 x i32> %a, <8 x i16> %b) {
+; CHECK: test_vaddw_high_s16:
+; CHECK: saddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %add.i = add <4 x i32> %0, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddw_high_s32(<2 x i64> %a, <4 x i32> %b) {
+; CHECK: test_vaddw_high_s32:
+; CHECK: saddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %add.i = add <2 x i64> %0, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddw_high_u8(<8 x i16> %a, <16 x i8> %b) {
+; CHECK: test_vaddw_high_u8:
+; CHECK: uaddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %add.i = add <8 x i16> %0, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddw_high_u16(<4 x i32> %a, <8 x i16> %b) {
+; CHECK: test_vaddw_high_u16:
+; CHECK: uaddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %add.i = add <4 x i32> %0, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddw_high_u32(<2 x i64> %a, <4 x i32> %b) {
+; CHECK: test_vaddw_high_u32:
+; CHECK: uaddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %add.i = add <2 x i64> %0, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsubl_s8:
+; CHECK: ssubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = sext <8 x i8> %a to <8 x i16>
+  %vmovl.i2.i = sext <8 x i8> %b to <8 x i16>
+  %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsubl_s16:
+; CHECK: ssubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = sext <4 x i16> %a to <4 x i32>
+  %vmovl.i2.i = sext <4 x i16> %b to <4 x i32>
+  %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vsubl_s32:
+; CHECK: ssubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = sext <2 x i32> %a to <2 x i64>
+  %vmovl.i2.i = sext <2 x i32> %b to <2 x i64>
+  %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsubl_u8:
+; CHECK: usubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = zext <8 x i8> %a to <8 x i16>
+  %vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
+  %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsubl_u16:
+; CHECK: usubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = zext <4 x i16> %a to <4 x i32>
+  %vmovl.i2.i = zext <4 x i16> %b to <4 x i32>
+  %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vsubl_u32:
+; CHECK: usubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = zext <2 x i32> %a to <2 x i64>
+  %vmovl.i2.i = zext <2 x i32> %b to <2 x i64>
+  %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubl_high_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsubl_high_s8:
+; CHECK: ssubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16>
+  %sub.i = sub <8 x i16> %0, %1
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubl_high_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsubl_high_s16:
+; CHECK: ssubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32>
+  %sub.i = sub <4 x i32> %0, %1
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubl_high_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsubl_high_s32:
+; CHECK: ssubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64>
+  %sub.i = sub <2 x i64> %0, %1
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubl_high_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsubl_high_u8:
+; CHECK: usubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16>
+  %sub.i = sub <8 x i16> %0, %1
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubl_high_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsubl_high_u16:
+; CHECK: usubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32>
+  %sub.i = sub <4 x i32> %0, %1
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubl_high_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsubl_high_u32:
+; CHECK: usubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64>
+  %sub.i = sub <2 x i64> %0, %1
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) {
+; CHECK: test_vsubw_s8:
+; CHECK: ssubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = sext <8 x i8> %b to <8 x i16>
+  %sub.i = sub <8 x i16> %a, %vmovl.i.i
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) {
+; CHECK: test_vsubw_s16:
+; CHECK: ssubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = sext <4 x i16> %b to <4 x i32>
+  %sub.i = sub <4 x i32> %a, %vmovl.i.i
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) {
+; CHECK: test_vsubw_s32:
+; CHECK: ssubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = sext <2 x i32> %b to <2 x i64>
+  %sub.i = sub <2 x i64> %a, %vmovl.i.i
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) {
+; CHECK: test_vsubw_u8:
+; CHECK: usubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = zext <8 x i8> %b to <8 x i16>
+  %sub.i = sub <8 x i16> %a, %vmovl.i.i
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) {
+; CHECK: test_vsubw_u16:
+; CHECK: usubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = zext <4 x i16> %b to <4 x i32>
+  %sub.i = sub <4 x i32> %a, %vmovl.i.i
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) {
+; CHECK: test_vsubw_u32:
+; CHECK: usubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = zext <2 x i32> %b to <2 x i64>
+  %sub.i = sub <2 x i64> %a, %vmovl.i.i
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubw_high_s8(<8 x i16> %a, <16 x i8> %b) {
+; CHECK: test_vsubw_high_s8:
+; CHECK: ssubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %sub.i = sub <8 x i16> %a, %0
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubw_high_s16(<4 x i32> %a, <8 x i16> %b) {
+; CHECK: test_vsubw_high_s16:
+; CHECK: ssubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %sub.i = sub <4 x i32> %a, %0
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubw_high_s32(<2 x i64> %a, <4 x i32> %b) {
+; CHECK: test_vsubw_high_s32:
+; CHECK: ssubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %sub.i = sub <2 x i64> %a, %0
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubw_high_u8(<8 x i16> %a, <16 x i8> %b) {
+; CHECK: test_vsubw_high_u8:
+; CHECK: usubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %sub.i = sub <8 x i16> %a, %0
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubw_high_u16(<4 x i32> %a, <8 x i16> %b) {
+; CHECK: test_vsubw_high_u16:
+; CHECK: usubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %sub.i = sub <4 x i32> %a, %0
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubw_high_u32(<2 x i64> %a, <4 x i32> %b) {
+; CHECK: test_vsubw_high_u32:
+; CHECK: usubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %sub.i = sub <2 x i64> %a, %0
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vaddhn_s16:
+; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vaddhn.i = add <8 x i16> %a, %b
+  %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8>
+  ret <8 x i8> %vaddhn2.i
+}
+
+define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vaddhn_s32:
+; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vaddhn.i = add <4 x i32> %a, %b
+  %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
+  %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16>
+  ret <4 x i16> %vaddhn2.i
+}
+
+define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vaddhn_s64:
+; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vaddhn.i = add <2 x i64> %a, %b
+  %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
+  %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32>
+  ret <2 x i32> %vaddhn2.i
+}
+
+define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vaddhn_u16:
+; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vaddhn.i = add <8 x i16> %a, %b
+  %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8>
+  ret <8 x i8> %vaddhn2.i
+}
+
+define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vaddhn_u32:
+; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vaddhn.i = add <4 x i32> %a, %b
+  %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
+  %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16>
+  ret <4 x i16> %vaddhn2.i
+}
+
+define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vaddhn_u64:
+; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vaddhn.i = add <2 x i64> %a, %b
+  %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
+  %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32>
+  ret <2 x i32> %vaddhn2.i
+}
+
+define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vaddhn_high_s16:
+; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vaddhn.i.i = add <8 x i16> %a, %b
+  %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8>
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vaddhn_high_s32:
+; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vaddhn.i.i = add <4 x i32> %a, %b
+  %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
+  %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16>
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vaddhn_high_s64:
+; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vaddhn.i.i = add <2 x i64> %a, %b
+  %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
+  %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32>
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vaddhn_high_u16:
+; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vaddhn.i.i = add <8 x i16> %a, %b
+  %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8>
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vaddhn_high_u32:
+; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vaddhn.i.i = add <4 x i32> %a, %b
+  %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
+  %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16>
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vaddhn_high_u64:
+; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vaddhn.i.i = add <2 x i64> %a, %b
+  %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
+  %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32>
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vraddhn_s16:
+; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vraddhn2.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i8> %vraddhn2.i
+}
+
+define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vraddhn_s32:
+; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vraddhn2.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i16> %vraddhn2.i
+}
+
+define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vraddhn_s64:
+; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vraddhn2.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i32> %vraddhn2.i
+}
+
+define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vraddhn_u16:
+; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vraddhn2.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i8> %vraddhn2.i
+}
+
+define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vraddhn_u32:
+; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vraddhn2.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i16> %vraddhn2.i
+}
+
+define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vraddhn_u64:
+; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vraddhn2.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i32> %vraddhn2.i
+}
+
+define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vraddhn_high_s16:
+; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vraddhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vraddhn_high_s32:
+; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vraddhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vraddhn_high_s64:
+; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vraddhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vraddhn_high_u16:
+; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vraddhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vraddhn_high_u32:
+; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vraddhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vraddhn_high_u64:
+; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vraddhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsubhn_s16:
+; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vsubhn.i = sub <8 x i16> %a, %b
+  %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8>
+  ret <8 x i8> %vsubhn2.i
+}
+
+define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsubhn_s32:
+; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vsubhn.i = sub <4 x i32> %a, %b
+  %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
+  %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16>
+  ret <4 x i16> %vsubhn2.i
+}
+
+define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsubhn_s64:
+; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vsubhn.i = sub <2 x i64> %a, %b
+  %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
+  %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32>
+  ret <2 x i32> %vsubhn2.i
+}
+
+define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsubhn_u16:
+; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vsubhn.i = sub <8 x i16> %a, %b
+  %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8>
+  ret <8 x i8> %vsubhn2.i
+}
+
+define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsubhn_u32:
+; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vsubhn.i = sub <4 x i32> %a, %b
+  %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
+  %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16>
+  ret <4 x i16> %vsubhn2.i
+}
+
+define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsubhn_u64:
+; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vsubhn.i = sub <2 x i64> %a, %b
+  %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
+  %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32>
+  ret <2 x i32> %vsubhn2.i
+}
+
+define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsubhn_high_s16:
+; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vsubhn.i.i = sub <8 x i16> %a, %b
+  %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8>
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsubhn_high_s32:
+; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vsubhn.i.i = sub <4 x i32> %a, %b
+  %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
+  %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16>
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsubhn_high_s64:
+; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vsubhn.i.i = sub <2 x i64> %a, %b
+  %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
+  %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32>
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsubhn_high_u16:
+; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vsubhn.i.i = sub <8 x i16> %a, %b
+  %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8>
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsubhn_high_u32:
+; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vsubhn.i.i = sub <4 x i32> %a, %b
+  %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
+  %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16>
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsubhn_high_u64:
+; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vsubhn.i.i = sub <2 x i64> %a, %b
+  %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
+  %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32>
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vrsubhn_s16:
+; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vrsubhn2.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i8> %vrsubhn2.i
+}
+
+define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vrsubhn_s32:
+; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vrsubhn2.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i16> %vrsubhn2.i
+}
+
+define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vrsubhn_s64:
+; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vrsubhn2.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i32> %vrsubhn2.i
+}
+
+define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vrsubhn_u16:
+; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vrsubhn2.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i8> %vrsubhn2.i
+}
+
+define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vrsubhn_u32:
+; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vrsubhn2.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i16> %vrsubhn2.i
+}
+
+define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vrsubhn_u64:
+; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vrsubhn2.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i32> %vrsubhn2.i
+}
+
+define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vrsubhn_high_s16:
+; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vrsubhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vrsubhn_high_s32:
+; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vrsubhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vrsubhn_high_s64:
+; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vrsubhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vrsubhn_high_u16:
+; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vrsubhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vrsubhn_high_u32:
+; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vrsubhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vrsubhn_high_u64:
+; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vrsubhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vabdl_s8:
+; CHECK: sabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vabd.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b)
+  %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16>
+  ret <8 x i16> %vmovl.i.i
+}
+
+define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vabdl_s16:
+; CHECK: sabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vabd2.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b)
+  %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32>
+  ret <4 x i32> %vmovl.i.i
+}
+
+define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vabdl_s32:
+; CHECK: sabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vabd2.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b)
+  %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64>
+  ret <2 x i64> %vmovl.i.i
+}
+
+define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vabdl_u8:
+; CHECK: uabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vabd.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b)
+  %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16>
+  ret <8 x i16> %vmovl.i.i
+}
+
+define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vabdl_u16:
+; CHECK: uabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vabd2.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b)
+  %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32>
+  ret <4 x i32> %vmovl.i.i
+}
+
+define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vabdl_u32:
+; CHECK: uabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vabd2.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b)
+  %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64>
+  ret <2 x i64> %vmovl.i.i
+}
+
+define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vabal_s8:
+; CHECK: sabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c)
+  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
+  %add.i = add <8 x i16> %vmovl.i.i.i, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vabal_s16:
+; CHECK: sabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c)
+  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
+  %add.i = add <4 x i32> %vmovl.i.i.i, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vabal_s32:
+; CHECK: sabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c)
+  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
+  %add.i = add <2 x i64> %vmovl.i.i.i, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vabal_u8:
+; CHECK: uabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c)
+  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
+  %add.i = add <8 x i16> %vmovl.i.i.i, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vabal_u16:
+; CHECK: uabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c)
+  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
+  %add.i = add <4 x i32> %vmovl.i.i.i, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vabal_u32:
+; CHECK: uabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c)
+  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
+  %add.i = add <2 x i64> %vmovl.i.i.i, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vabdl_high_s8:
+; CHECK: sabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
+  ret <8 x i16> %vmovl.i.i.i
+}
+
+define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vabdl_high_s16:
+; CHECK: sabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
+  ret <4 x i32> %vmovl.i.i.i
+}
+
+define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vabdl_high_s32:
+; CHECK: sabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
+  ret <2 x i64> %vmovl.i.i.i
+}
+
+define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vabdl_high_u8:
+; CHECK: uabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
+  ret <8 x i16> %vmovl.i.i.i
+}
+
+define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vabdl_high_u16:
+; CHECK: uabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
+  ret <4 x i32> %vmovl.i.i.i
+}
+
+define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vabdl_high_u32:
+; CHECK: uabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
+  ret <2 x i64> %vmovl.i.i.i
+}
+
+define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vabal_high_s8:
+; CHECK: sabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vabd.i.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16>
+  %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a
+  ret <8 x i16> %add.i.i
+}
+
+define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vabal_high_s16:
+; CHECK: sabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vabd2.i.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32>
+  %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a
+  ret <4 x i32> %add.i.i
+}
+
+define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vabal_high_s32:
+; CHECK: sabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vabd2.i.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64>
+  %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a
+  ret <2 x i64> %add.i.i
+}
+
+define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vabal_high_u8:
+; CHECK: uabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vabd.i.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16>
+  %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a
+  ret <8 x i16> %add.i.i
+}
+
+define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vabal_high_u16:
+; CHECK: uabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vabd2.i.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32>
+  %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a
+  ret <4 x i32> %add.i.i
+}
+
+define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vabal_high_u32:
+; CHECK: uabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vabd2.i.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64>
+  %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a
+  ret <2 x i64> %add.i.i
+}
+
+define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vmull_s8:
+; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %a, <8 x i8> %b)
+  ret <8 x i16> %vmull.i
+}
+
+define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vmull_s16:
+; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %b)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vmull_s32:
+; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %b)
+  ret <2 x i64> %vmull2.i
+}
+
+define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vmull_u8:
+; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %a, <8 x i8> %b)
+  ret <8 x i16> %vmull.i
+}
+
+define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vmull_u16:
+; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %b)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vmull_u32:
+; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %b)
+  ret <2 x i64> %vmull2.i
+}
+
+define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vmull_high_s8:
+; CHECK: smull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  ret <8 x i16> %vmull.i.i
+}
+
+define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vmull_high_s16:
+; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  ret <4 x i32> %vmull2.i.i
+}
+
+define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vmull_high_s32:
+; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  ret <2 x i64> %vmull2.i.i
+}
+
+define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vmull_high_u8:
+; CHECK: umull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  ret <8 x i16> %vmull.i.i
+}
+
+define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vmull_high_u16:
+; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  ret <4 x i32> %vmull2.i.i
+}
+
+define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vmull_high_u32:
+; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  ret <2 x i64> %vmull2.i.i
+}
+
+define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vmlal_s8:
+; CHECK: smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c)
+  %add.i = add <8 x i16> %vmull.i.i, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vmlal_s16:
+; CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %add.i = add <4 x i32> %vmull2.i.i, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vmlal_s32:
+; CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %add.i = add <2 x i64> %vmull2.i.i, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vmlal_u8:
+; CHECK: umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c)
+  %add.i = add <8 x i16> %vmull.i.i, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vmlal_u16:
+; CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %add.i = add <4 x i32> %vmull2.i.i, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vmlal_u32:
+; CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %add.i = add <2 x i64> %vmull2.i.i, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vmlal_high_s8:
+; CHECK: smlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %add.i.i = add <8 x i16> %vmull.i.i.i, %a
+  ret <8 x i16> %add.i.i
+}
+
+define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vmlal_high_s16:
+; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
+  ret <4 x i32> %add.i.i
+}
+
+define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vmlal_high_s32:
+; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
+  ret <2 x i64> %add.i.i
+}
+
+define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vmlal_high_u8:
+; CHECK: umlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %add.i.i = add <8 x i16> %vmull.i.i.i, %a
+  ret <8 x i16> %add.i.i
+}
+
+define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vmlal_high_u16:
+; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
+  ret <4 x i32> %add.i.i
+}
+
+define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vmlal_high_u32:
+; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
+  ret <2 x i64> %add.i.i
+}
+
+define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vmlsl_s8:
+; CHECK: smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c)
+  %sub.i = sub <8 x i16> %a, %vmull.i.i
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vmlsl_s16:
+; CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %sub.i = sub <4 x i32> %a, %vmull2.i.i
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vmlsl_s32:
+; CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %sub.i = sub <2 x i64> %a, %vmull2.i.i
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vmlsl_u8:
+; CHECK: umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c)
+  %sub.i = sub <8 x i16> %a, %vmull.i.i
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vmlsl_u16:
+; CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %sub.i = sub <4 x i32> %a, %vmull2.i.i
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vmlsl_u32:
+; CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %sub.i = sub <2 x i64> %a, %vmull2.i.i
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vmlsl_high_s8:
+; CHECK: smlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i
+  ret <8 x i16> %sub.i.i
+}
+
+define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vmlsl_high_s16:
+; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
+  ret <4 x i32> %sub.i.i
+}
+
+define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vmlsl_high_s32:
+; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
+  ret <2 x i64> %sub.i.i
+}
+
+define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vmlsl_high_u8:
+; CHECK: umlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i
+  ret <8 x i16> %sub.i.i
+}
+
+define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vmlsl_high_u16:
+; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
+  ret <4 x i32> %sub.i.i
+}
+
+define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vmlsl_high_u32:
+; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
+  ret <2 x i64> %sub.i.i
+}
+
+define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vqdmull_s16:
+; CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vqdmull_s32:
+; CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vqdmlal_s16:
+; CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
+  ret <4 x i32> %vqdmlal4.i
+}
+
+define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vqdmlal_s32:
+; CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
+  ret <2 x i64> %vqdmlal4.i
+}
+
+define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vqdmlsl_s16:
+; CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
+  ret <4 x i32> %vqdmlsl4.i
+}
+
+define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vqdmlsl_s32:
+; CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
+  ret <2 x i64> %vqdmlsl4.i
+}
+
+define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vqdmull_high_s16:
+; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vqdmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  ret <4 x i32> %vqdmull2.i.i
+}
+
+define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vqdmull_high_s32:
+; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vqdmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  ret <2 x i64> %vqdmull2.i.i
+}
+
+define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vqdmlal_high_s16:
+; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vqdmlal2.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vqdmlal4.i.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i.i)
+  ret <4 x i32> %vqdmlal4.i.i
+}
+
+define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vqdmlal_high_s32:
+; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vqdmlal2.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vqdmlal4.i.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i.i)
+  ret <2 x i64> %vqdmlal4.i.i
+}
+
+define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vqdmlsl_high_s16:
+; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vqdmlsl2.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vqdmlsl4.i.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i.i)
+  ret <4 x i32> %vqdmlsl4.i.i
+}
+
+define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vqdmlsl_high_s32:
+; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vqdmlsl2.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vqdmlsl4.i.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i.i)
+  ret <2 x i64> %vqdmlsl4.i.i
+}
+
+define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vmull_p8:
+; CHECK: pmull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %a, <8 x i8> %b)
+  ret <8 x i16> %vmull.i
+}
+
+define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vmull_high_p8:
+; CHECK: pmull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  ret <8 x i16> %vmull.i.i
+}
+
diff --git a/test/CodeGen/AArch64/neon-aba-abd.ll b/test/CodeGen/AArch64/neon-aba-abd.ll
index b423666..5400984 100644
--- a/test/CodeGen/AArch64/neon-aba-abd.ll
+++ b/test/CodeGen/AArch64/neon-aba-abd.ll
@@ -157,6 +157,16 @@ define <2 x i32> @test_sabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
   ret <2 x i32> %abd
 }
 
+define <2 x i32> @test_sabd_v2i32_const() {
+; CHECK: test_sabd_v2i32_const:
+; CHECK: movi     d1, #0xffffffff0000
+; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s
+  %1 = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(
+    <2 x i32> <i32 -2147483648, i32 2147450880>,
+    <2 x i32> <i32 -65536, i32 65535>)
+  ret <2 x i32> %1
+}
+
 define <2 x i32> @test_saba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; CHECK: test_saba_v2i32:
   %abd = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
@@ -223,4 +233,4 @@ define <2 x double> @test_fabd_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
   %abd = call <2 x double> @llvm.arm.neon.vabds.v2f64(<2 x double> %lhs, <2 x double> %rhs)
 ; CHECK: fabd v0.2d, v0.2d, v1.2d
   ret <2 x double> %abd
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/AArch64/neon-across.ll b/test/CodeGen/AArch64/neon-across.ll
new file mode 100644
index 0000000..733db97
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-across.ll
@@ -0,0 +1,476 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+declare <1 x float> @llvm.aarch64.neon.vminnmv.v1f32.v4f32(<4 x float>)
+
+declare <1 x float> @llvm.aarch64.neon.vmaxnmv.v1f32.v4f32(<4 x float>)
+
+declare <1 x float> @llvm.aarch64.neon.vminv.v1f32.v4f32(<4 x float>)
+
+declare <1 x float> @llvm.aarch64.neon.vmaxv.v1f32.v4f32(<4 x float>)
+
+declare <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32>)
+
+declare <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v8i16(<8 x i16>)
+
+declare <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v16i8(<16 x i8>)
+
+declare <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v4i16(<4 x i16>)
+
+declare <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v8i8(<8 x i8>)
+
+declare <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v4i32(<4 x i32>)
+
+declare <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v8i16(<8 x i16>)
+
+declare <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v16i8(<16 x i8>)
+
+declare <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v4i32(<4 x i32>)
+
+declare <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v8i16(<8 x i16>)
+
+declare <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v16i8(<16 x i8>)
+
+declare <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v4i16(<4 x i16>)
+
+declare <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v8i8(<8 x i8>)
+
+declare <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v4i16(<4 x i16>)
+
+declare <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v8i8(<8 x i8>)
+
+declare <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v4i32(<4 x i32>)
+
+declare <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v8i16(<8 x i16>)
+
+declare <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v16i8(<16 x i8>)
+
+declare <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v4i32(<4 x i32>)
+
+declare <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v8i16(<8 x i16>)
+
+declare <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v16i8(<16 x i8>)
+
+declare <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v4i16(<4 x i16>)
+
+declare <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v8i8(<8 x i8>)
+
+declare <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v4i16(<4 x i16>)
+
+declare <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v8i8(<8 x i8>)
+
+declare <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v4i32(<4 x i32>)
+
+declare <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v8i16(<8 x i16>)
+
+declare <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v16i8(<16 x i8>)
+
+declare <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v4i32(<4 x i32>)
+
+declare <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v8i16(<8 x i16>)
+
+declare <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v16i8(<16 x i8>)
+
+declare <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v4i16(<4 x i16>)
+
+declare <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v8i8(<8 x i8>)
+
+declare <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v4i16(<4 x i16>)
+
+declare <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v8i8(<8 x i8>)
+
+define i16 @test_vaddlv_s8(<8 x i8> %a) {
+; CHECK: test_vaddlv_s8:
+; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %saddlv.i = tail call <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v8i8(<8 x i8> %a)
+  %0 = extractelement <1 x i16> %saddlv.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vaddlv_s16(<4 x i16> %a) {
+; CHECK: test_vaddlv_s16:
+; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %saddlv.i = tail call <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v4i16(<4 x i16> %a)
+  %0 = extractelement <1 x i32> %saddlv.i, i32 0
+  ret i32 %0
+}
+
+define i16 @test_vaddlv_u8(<8 x i8> %a) {
+; CHECK: test_vaddlv_u8:
+; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %uaddlv.i = tail call <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v8i8(<8 x i8> %a)
+  %0 = extractelement <1 x i16> %uaddlv.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vaddlv_u16(<4 x i16> %a) {
+; CHECK: test_vaddlv_u16:
+; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %uaddlv.i = tail call <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v4i16(<4 x i16> %a)
+  %0 = extractelement <1 x i32> %uaddlv.i, i32 0
+  ret i32 %0
+}
+
+define i16 @test_vaddlvq_s8(<16 x i8> %a) {
+; CHECK: test_vaddlvq_s8:
+; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %saddlv.i = tail call <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v16i8(<16 x i8> %a)
+  %0 = extractelement <1 x i16> %saddlv.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vaddlvq_s16(<8 x i16> %a) {
+; CHECK: test_vaddlvq_s16:
+; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %saddlv.i = tail call <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v8i16(<8 x i16> %a)
+  %0 = extractelement <1 x i32> %saddlv.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vaddlvq_s32(<4 x i32> %a) {
+; CHECK: test_vaddlvq_s32:
+; CHECK: saddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %saddlv.i = tail call <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v4i32(<4 x i32> %a)
+  %0 = extractelement <1 x i64> %saddlv.i, i32 0
+  ret i64 %0
+}
+
+define i16 @test_vaddlvq_u8(<16 x i8> %a) {
+; CHECK: test_vaddlvq_u8:
+; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %uaddlv.i = tail call <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v16i8(<16 x i8> %a)
+  %0 = extractelement <1 x i16> %uaddlv.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vaddlvq_u16(<8 x i16> %a) {
+; CHECK: test_vaddlvq_u16:
+; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %uaddlv.i = tail call <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v8i16(<8 x i16> %a)
+  %0 = extractelement <1 x i32> %uaddlv.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vaddlvq_u32(<4 x i32> %a) {
+; CHECK: test_vaddlvq_u32:
+; CHECK: uaddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %uaddlv.i = tail call <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v4i32(<4 x i32> %a)
+  %0 = extractelement <1 x i64> %uaddlv.i, i32 0
+  ret i64 %0
+}
+
+define i8 @test_vmaxv_s8(<8 x i8> %a) {
+; CHECK: test_vmaxv_s8:
+; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %smaxv.i = tail call <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v8i8(<8 x i8> %a)
+  %0 = extractelement <1 x i8> %smaxv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vmaxv_s16(<4 x i16> %a) {
+; CHECK: test_vmaxv_s16:
+; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %smaxv.i = tail call <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v4i16(<4 x i16> %a)
+  %0 = extractelement <1 x i16> %smaxv.i, i32 0
+  ret i16 %0
+}
+
+define i8 @test_vmaxv_u8(<8 x i8> %a) {
+; CHECK: test_vmaxv_u8:
+; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %umaxv.i = tail call <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v8i8(<8 x i8> %a)
+  %0 = extractelement <1 x i8> %umaxv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vmaxv_u16(<4 x i16> %a) {
+; CHECK: test_vmaxv_u16:
+; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %umaxv.i = tail call <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v4i16(<4 x i16> %a)
+  %0 = extractelement <1 x i16> %umaxv.i, i32 0
+  ret i16 %0
+}
+
+define i8 @test_vmaxvq_s8(<16 x i8> %a) {
+; CHECK: test_vmaxvq_s8:
+; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %smaxv.i = tail call <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v16i8(<16 x i8> %a)
+  %0 = extractelement <1 x i8> %smaxv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vmaxvq_s16(<8 x i16> %a) {
+; CHECK: test_vmaxvq_s16:
+; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %smaxv.i = tail call <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v8i16(<8 x i16> %a)
+  %0 = extractelement <1 x i16> %smaxv.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vmaxvq_s32(<4 x i32> %a) {
+; CHECK: test_vmaxvq_s32:
+; CHECK: smaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %smaxv.i = tail call <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v4i32(<4 x i32> %a)
+  %0 = extractelement <1 x i32> %smaxv.i, i32 0
+  ret i32 %0
+}
+
+define i8 @test_vmaxvq_u8(<16 x i8> %a) {
+; CHECK: test_vmaxvq_u8:
+; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %umaxv.i = tail call <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v16i8(<16 x i8> %a)
+  %0 = extractelement <1 x i8> %umaxv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vmaxvq_u16(<8 x i16> %a) {
+; CHECK: test_vmaxvq_u16:
+; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %umaxv.i = tail call <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v8i16(<8 x i16> %a)
+  %0 = extractelement <1 x i16> %umaxv.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vmaxvq_u32(<4 x i32> %a) {
+; CHECK: test_vmaxvq_u32:
+; CHECK: umaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %umaxv.i = tail call <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v4i32(<4 x i32> %a)
+  %0 = extractelement <1 x i32> %umaxv.i, i32 0
+  ret i32 %0
+}
+
+define i8 @test_vminv_s8(<8 x i8> %a) {
+; CHECK: test_vminv_s8:
+; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %sminv.i = tail call <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v8i8(<8 x i8> %a)
+  %0 = extractelement <1 x i8> %sminv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vminv_s16(<4 x i16> %a) {
+; CHECK: test_vminv_s16:
+; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %sminv.i = tail call <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v4i16(<4 x i16> %a)
+  %0 = extractelement <1 x i16> %sminv.i, i32 0
+  ret i16 %0
+}
+
+define i8 @test_vminv_u8(<8 x i8> %a) {
+; CHECK: test_vminv_u8:
+; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %uminv.i = tail call <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v8i8(<8 x i8> %a)
+  %0 = extractelement <1 x i8> %uminv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vminv_u16(<4 x i16> %a) {
+; CHECK: test_vminv_u16:
+; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %uminv.i = tail call <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v4i16(<4 x i16> %a)
+  %0 = extractelement <1 x i16> %uminv.i, i32 0
+  ret i16 %0
+}
+
+define i8 @test_vminvq_s8(<16 x i8> %a) {
+; CHECK: test_vminvq_s8:
+; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %sminv.i = tail call <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v16i8(<16 x i8> %a)
+  %0 = extractelement <1 x i8> %sminv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vminvq_s16(<8 x i16> %a) {
+; CHECK: test_vminvq_s16:
+; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %sminv.i = tail call <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v8i16(<8 x i16> %a)
+  %0 = extractelement <1 x i16> %sminv.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vminvq_s32(<4 x i32> %a) {
+; CHECK: test_vminvq_s32:
+; CHECK: sminv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %sminv.i = tail call <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v4i32(<4 x i32> %a)
+  %0 = extractelement <1 x i32> %sminv.i, i32 0
+  ret i32 %0
+}
+
+define i8 @test_vminvq_u8(<16 x i8> %a) {
+; CHECK: test_vminvq_u8:
+; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %uminv.i = tail call <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v16i8(<16 x i8> %a)
+  %0 = extractelement <1 x i8> %uminv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vminvq_u16(<8 x i16> %a) {
+; CHECK: test_vminvq_u16:
+; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %uminv.i = tail call <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v8i16(<8 x i16> %a)
+  %0 = extractelement <1 x i16> %uminv.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vminvq_u32(<4 x i32> %a) {
+; CHECK: test_vminvq_u32:
+; CHECK: uminv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %uminv.i = tail call <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v4i32(<4 x i32> %a)
+  %0 = extractelement <1 x i32> %uminv.i, i32 0
+  ret i32 %0
+}
+
+define i8 @test_vaddv_s8(<8 x i8> %a) {
+; CHECK: test_vaddv_s8:
+; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v8i8(<8 x i8> %a)
+  %0 = extractelement <1 x i8> %vaddv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vaddv_s16(<4 x i16> %a) {
+; CHECK: test_vaddv_s16:
+; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v4i16(<4 x i16> %a)
+  %0 = extractelement <1 x i16> %vaddv.i, i32 0
+  ret i16 %0
+}
+
+define i8 @test_vaddv_u8(<8 x i8> %a) {
+; CHECK: test_vaddv_u8:
+; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v8i8(<8 x i8> %a)
+  %0 = extractelement <1 x i8> %vaddv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vaddv_u16(<4 x i16> %a) {
+; CHECK: test_vaddv_u16:
+; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v4i16(<4 x i16> %a)
+  %0 = extractelement <1 x i16> %vaddv.i, i32 0
+  ret i16 %0
+}
+
+define i8 @test_vaddvq_s8(<16 x i8> %a) {
+; CHECK: test_vaddvq_s8:
+; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v16i8(<16 x i8> %a)
+  %0 = extractelement <1 x i8> %vaddv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vaddvq_s16(<8 x i16> %a) {
+; CHECK: test_vaddvq_s16:
+; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v8i16(<8 x i16> %a)
+  %0 = extractelement <1 x i16> %vaddv.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vaddvq_s32(<4 x i32> %a) {
+; CHECK: test_vaddvq_s32:
+; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %vaddv.i = tail call <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32> %a)
+  %0 = extractelement <1 x i32> %vaddv.i, i32 0
+  ret i32 %0
+}
+
+define i8 @test_vaddvq_u8(<16 x i8> %a) {
+; CHECK: test_vaddvq_u8:
+; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v16i8(<16 x i8> %a)
+  %0 = extractelement <1 x i8> %vaddv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vaddvq_u16(<8 x i16> %a) {
+; CHECK: test_vaddvq_u16:
+; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v8i16(<8 x i16> %a)
+  %0 = extractelement <1 x i16> %vaddv.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vaddvq_u32(<4 x i32> %a) {
+; CHECK: test_vaddvq_u32:
+; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %vaddv.i = tail call <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32> %a)
+  %0 = extractelement <1 x i32> %vaddv.i, i32 0
+  ret i32 %0
+}
+
+define float @test_vmaxvq_f32(<4 x float> %a) {
+; CHECK: test_vmaxvq_f32:
+; CHECK: fmaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %vmaxv.i = tail call <1 x float> @llvm.aarch64.neon.vmaxv.v1f32.v4f32(<4 x float> %a)
+  %0 = extractelement <1 x float> %vmaxv.i, i32 0
+  ret float %0
+}
+
+define float @test_vminvq_f32(<4 x float> %a) {
+; CHECK: test_vminvq_f32:
+; CHECK: fminv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %vminv.i = tail call <1 x float> @llvm.aarch64.neon.vminv.v1f32.v4f32(<4 x float> %a)
+  %0 = extractelement <1 x float> %vminv.i, i32 0
+  ret float %0
+}
+
+define float @test_vmaxnmvq_f32(<4 x float> %a) {
+; CHECK: test_vmaxnmvq_f32:
+; CHECK: fmaxnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %vmaxnmv.i = tail call <1 x float> @llvm.aarch64.neon.vmaxnmv.v1f32.v4f32(<4 x float> %a)
+  %0 = extractelement <1 x float> %vmaxnmv.i, i32 0
+  ret float %0
+}
+
+define float @test_vminnmvq_f32(<4 x float> %a) {
+; CHECK: test_vminnmvq_f32:
+; CHECK: fminnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %vminnmv.i = tail call <1 x float> @llvm.aarch64.neon.vminnmv.v1f32.v4f32(<4 x float> %a)
+  %0 = extractelement <1 x float> %vminnmv.i, i32 0
+  ret float %0
+}
+
diff --git a/test/CodeGen/AArch64/neon-add-sub.ll b/test/CodeGen/AArch64/neon-add-sub.ll
index 65ec8a2..078ba14 100644
--- a/test/CodeGen/AArch64/neon-add-sub.ll
+++ b/test/CodeGen/AArch64/neon-add-sub.ll
@@ -118,15 +118,120 @@ define <2 x double> @sub2xdouble(<2 x double> %A, <2 x double> %B) {
 	ret <2 x double> %tmp3
 }
 
-define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) {
-;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
-	%tmp3 = add <1 x i64> %A, %B;
-	ret <1 x i64> %tmp3
+define <1 x double> @test_vadd_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vadd_f64
+; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fadd <1 x double> %a, %b
+  ret <1 x double> %1
 }
 
-define <1 x i64> @sub1xi64(<1 x i64> %A, <1 x i64> %B) {
-;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
-	%tmp3 = sub <1 x i64> %A, %B;
-	ret <1 x i64> %tmp3
+define <1 x double> @test_vmul_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vmul_f64
+; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fmul <1 x double> %a, %b
+  ret <1 x double> %1
 }
 
+define <1 x double> @test_vdiv_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vdiv_f64
+; CHECK: fdiv d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fdiv <1 x double> %a, %b
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vmla_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
+; CHECK-LABEL: test_vmla_f64
+; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fmul <1 x double> %b, %c
+  %2 = fadd <1 x double> %1, %a
+  ret <1 x double> %2
+}
+
+define <1 x double> @test_vmls_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
+; CHECK-LABEL: test_vmls_f64
+; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fmul <1 x double> %b, %c
+  %2 = fsub <1 x double> %a, %1
+  ret <1 x double> %2
+}
+
+define <1 x double> @test_vfms_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
+; CHECK-LABEL: test_vfms_f64
+; CHECK: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fsub <1 x double> <double -0.000000e+00>, %b
+  %2 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %1, <1 x double> %c, <1 x double> %a)
+  ret <1 x double> %2
+}
+
+define <1 x double> @test_vfma_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
+; CHECK-LABEL: test_vfma_f64
+; CHECK: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vsub_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vsub_f64
+; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fsub <1 x double> %a, %b
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vabd_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vabd_f64
+; CHECK: fabd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.arm.neon.vabds.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vmax_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vmax_f64
+; CHECK: fmax d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.arm.neon.vmaxs.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vmin_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vmin_f64
+; CHECK: fmin d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.arm.neon.vmins.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vmaxnm_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vmaxnm_f64
+; CHECK: fmaxnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.aarch64.neon.vmaxnm.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vminnm_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vminnm_f64
+; CHECK: fminnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.aarch64.neon.vminnm.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vabs_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vabs_f64
+; CHECK: fabs d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.fabs.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vneg_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vneg_f64
+; CHECK: fneg d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fsub <1 x double> <double -0.000000e+00>, %a
+  ret <1 x double> %1
+}
+
+declare <1 x double> @llvm.fabs.v1f64(<1 x double>)
+declare <1 x double> @llvm.aarch64.neon.vminnm.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.aarch64.neon.vmaxnm.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.arm.neon.vmins.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.arm.neon.vmaxs.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.arm.neon.vabds.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.fma.v1f64(<1 x double>, <1 x double>, <1 x double>)
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-bsl.ll b/test/CodeGen/AArch64/neon-bsl.ll
new file mode 100644
index 0000000..6bd923d
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-bsl.ll
@@ -0,0 +1,222 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+declare <2 x double> @llvm.arm.neon.vbsl.v2f64(<2 x double>, <2 x double>, <2 x double>)
+
+declare <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
+
+declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
+
+declare <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float>, <4 x float>, <4 x float>)
+
+declare <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
+
+declare <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+
+declare <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>)
+
+declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>)
+
+declare <1 x double> @llvm.arm.neon.vbsl.v1f64(<1 x double>, <1 x double>, <1 x double>)
+
+declare <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float>, <2 x float>, <2 x float>)
+
+declare <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64>, <1 x i64>, <1 x i64>)
+
+declare <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
+
+define <8 x i8> @test_vbsl_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
+; CHECK-LABEL: test_vbsl_s8:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3)
+  ret <8 x i8> %vbsl.i
+}
+
+define <8 x i8> @test_vbsl_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
+; CHECK-LABEL: test_vbsl_s16:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3)
+  %0 = bitcast <4 x i16> %vbsl3.i to <8 x i8>
+  ret <8 x i8> %0
+}
+
+define <2 x i32> @test_vbsl_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) {
+; CHECK-LABEL: test_vbsl_s32:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3)
+  ret <2 x i32> %vbsl3.i
+}
+
+define <1 x i64> @test_vbsl_s64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
+; CHECK-LABEL: test_vbsl_s64:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3)
+  ret <1 x i64> %vbsl3.i
+}
+
+define <8 x i8> @test_vbsl_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
+; CHECK-LABEL: test_vbsl_u8:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3)
+  ret <8 x i8> %vbsl.i
+}
+
+define <4 x i16> @test_vbsl_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
+; CHECK-LABEL: test_vbsl_u16:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3)
+  ret <4 x i16> %vbsl3.i
+}
+
+define <2 x i32> @test_vbsl_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) {
+; CHECK-LABEL: test_vbsl_u32:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3)
+  ret <2 x i32> %vbsl3.i
+}
+
+define <1 x i64> @test_vbsl_u64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
+; CHECK-LABEL: test_vbsl_u64:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3)
+  ret <1 x i64> %vbsl3.i
+}
+
+define <2 x float> @test_vbsl_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) {
+; CHECK-LABEL: test_vbsl_f32:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3)
+  ret <2 x float> %vbsl3.i
+}
+
+define <1 x double> @test_vbsl_f64(<1 x i64> %v1, <1 x double> %v2, <1 x double> %v3) {
+; CHECK-LABEL: test_vbsl_f64:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl.i = bitcast <1 x i64> %v1 to <1 x double>
+  %vbsl3.i = tail call <1 x double> @llvm.arm.neon.vbsl.v1f64(<1 x double> %vbsl.i, <1 x double> %v2, <1 x double> %v3)
+  ret <1 x double> %vbsl3.i
+}
+
+define <8 x i8> @test_vbsl_p8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
+; CHECK-LABEL: test_vbsl_p8:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3)
+  ret <8 x i8> %vbsl.i
+}
+
+define <4 x i16> @test_vbsl_p16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
+; CHECK-LABEL: test_vbsl_p16:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3)
+  ret <4 x i16> %vbsl3.i
+}
+
+define <16 x i8> @test_vbslq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
+; CHECK-LABEL: test_vbslq_s8:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3)
+  ret <16 x i8> %vbsl.i
+}
+
+define <8 x i16> @test_vbslq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
+; CHECK-LABEL: test_vbslq_s16:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3)
+  ret <8 x i16> %vbsl3.i
+}
+
+define <4 x i32> @test_vbslq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+; CHECK-LABEL: test_vbslq_s32:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3)
+  ret <4 x i32> %vbsl3.i
+}
+
+define <2 x i64> @test_vbslq_s64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) {
+; CHECK-LABEL: test_vbslq_s64:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3)
+  ret <2 x i64> %vbsl3.i
+}
+
+define <16 x i8> @test_vbslq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
+; CHECK-LABEL: test_vbslq_u8:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3)
+  ret <16 x i8> %vbsl.i
+}
+
+define <8 x i16> @test_vbslq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
+; CHECK-LABEL: test_vbslq_u16:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3)
+  ret <8 x i16> %vbsl3.i
+}
+
+define <4 x i32> @test_vbslq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+; CHECK-LABEL: test_vbslq_u32:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3)
+  ret <4 x i32> %vbsl3.i
+}
+
+define <2 x i64> @test_vbslq_u64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) {
+; CHECK-LABEL: test_vbslq_u64:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3)
+  ret <2 x i64> %vbsl3.i
+}
+
+define <4 x float> @test_vbslq_f32(<4 x i32> %v1, <4 x float> %v2, <4 x float> %v3) {
+; CHECK-LABEL: test_vbslq_f32:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl.i = bitcast <4 x i32> %v1 to <4 x float>
+  %vbsl3.i = tail call <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float> %vbsl.i, <4 x float> %v2, <4 x float> %v3)
+  ret <4 x float> %vbsl3.i
+}
+
+define <16 x i8> @test_vbslq_p8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
+; CHECK-LABEL: test_vbslq_p8:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3)
+  ret <16 x i8> %vbsl.i
+}
+
+define <8 x i16> @test_vbslq_p16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
+; CHECK-LABEL: test_vbslq_p16:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3)
+  ret <8 x i16> %vbsl3.i
+}
+
+define <2 x double> @test_vbslq_f64(<2 x i64> %v1, <2 x double> %v2, <2 x double> %v3) {
+; CHECK-LABEL: test_vbslq_f64:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl.i = bitcast <2 x i64> %v1 to <2 x double>
+  %vbsl3.i = tail call <2 x double> @llvm.arm.neon.vbsl.v2f64(<2 x double> %vbsl.i, <2 x double> %v2, <2 x double> %v3)
+  ret <2 x double> %vbsl3.i
+}
+
diff --git a/test/CodeGen/AArch64/neon-compare-instructions.ll b/test/CodeGen/AArch64/neon-compare-instructions.ll
index 0848f9b..68f0342 100644
--- a/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -51,8 +51,7 @@ define <2 x i64> @cmeq2xi64(<2 x i64> %A, <2 x i64> %B) {
 
 define <8 x i8> @cmne8xi8(<8 x i8> %A, <8 x i8> %B) {
 ;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp ne <8 x i8> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
@@ -60,8 +59,7 @@ define <8 x i8> @cmne8xi8(<8 x i8> %A, <8 x i8> %B) {
 
 define <16 x i8> @cmne16xi8(<16 x i8> %A, <16 x i8> %B) {
 ;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <16 x i8> %A, %B;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
@@ -69,8 +67,7 @@ define <16 x i8> @cmne16xi8(<16 x i8> %A, <16 x i8> %B) {
 
 define <4 x i16> @cmne4xi16(<4 x i16> %A, <4 x i16> %B) {
 ;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp ne <4 x i16> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
@@ -78,8 +75,7 @@ define <4 x i16> @cmne4xi16(<4 x i16> %A, <4 x i16> %B) {
 
 define <8 x i16> @cmne8xi16(<8 x i16> %A, <8 x i16> %B) {
 ;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <8 x i16> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
@@ -87,8 +83,7 @@ define <8 x i16> @cmne8xi16(<8 x i16> %A, <8 x i16> %B) {
 
 define <2 x i32> @cmne2xi32(<2 x i32> %A, <2 x i32> %B) {
 ;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp ne <2 x i32> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -96,8 +91,7 @@ define <2 x i32> @cmne2xi32(<2 x i32> %A, <2 x i32> %B) {
 
 define <4 x i32> @cmne4xi32(<4 x i32> %A, <4 x i32> %B) {
 ;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <4 x i32> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -105,8 +99,7 @@ define <4 x i32> @cmne4xi32(<4 x i32> %A, <4 x i32> %B) {
 
 define <2 x i64> @cmne2xi64(<2 x i64> %A, <2 x i64> %B) {
 ;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <2 x i64> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -867,8 +860,7 @@ define <2 x i64> @cmltz2xi64(<2 x i64> %A) {
 
 define <8 x i8> @cmneqz8xi8(<8 x i8> %A) {
 ;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x0
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp ne <8 x i8> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
@@ -876,8 +868,7 @@ define <8 x i8> @cmneqz8xi8(<8 x i8> %A) {
 
 define <16 x i8> @cmneqz16xi8(<16 x i8> %A) {
 ;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <16 x i8> %A, zeroinitializer;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
@@ -885,8 +876,7 @@ define <16 x i8> @cmneqz16xi8(<16 x i8> %A) {
 
 define <4 x i16> @cmneqz4xi16(<4 x i16> %A) {
 ;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0x0
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp ne <4 x i16> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
@@ -894,8 +884,7 @@ define <4 x i16> @cmneqz4xi16(<4 x i16> %A) {
 
 define <8 x i16> @cmneqz8xi16(<8 x i16> %A) {
 ;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0x0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <8 x i16> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
@@ -903,8 +892,7 @@ define <8 x i16> @cmneqz8xi16(<8 x i16> %A) {
 
 define <2 x i32> @cmneqz2xi32(<2 x i32> %A) {
 ;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0x0
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp ne <2 x i32> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -912,8 +900,7 @@ define <2 x i32> @cmneqz2xi32(<2 x i32> %A) {
 
 define <4 x i32> @cmneqz4xi32(<4 x i32> %A) {
 ;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0x0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <4 x i32> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -921,8 +908,7 @@ define <4 x i32> @cmneqz4xi32(<4 x i32> %A) {
 
 define <2 x i64> @cmneqz2xi64(<2 x i64> %A) {
 ;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0x0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <2 x i64> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1369,8 +1355,7 @@ define <2 x i32> @fcmuno2xfloat(<2 x float> %A, <2 x float> %B) {
 ;CHECK: fcmge {{v[0-9]+}}.2s, v0.2s, v1.2s
 ;CHECK-NEXT: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
 ;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp uno <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1382,8 +1367,7 @@ define <4 x i32> @fcmuno4xfloat(<4 x float> %A, <4 x float> %B) {
 ;CHECK: fcmge {{v[0-9]+}}.4s, v0.4s, v1.4s
 ;CHECK-NEXT: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
 ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uno <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1395,8 +1379,7 @@ define <2 x i64> @fcmuno2xdouble(<2 x double> %A, <2 x double> %B) {
 ;CHECK: fcmge {{v[0-9]+}}.2d, v0.2d, v1.2d
 ;CHECK-NEXT: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
 ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uno <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1408,8 +1391,7 @@ define <2 x i32> @fcmueq2xfloat(<2 x float> %A, <2 x float> %B) {
 ;CHECK: fcmgt {{v[0-9]+}}.2s, v0.2s, v1.2s
 ;CHECK-NEXT: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
 ;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ueq <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1421,8 +1403,7 @@ define <4 x i32> @fcmueq4xfloat(<4 x float> %A, <4 x float> %B) {
 ;CHECK: fcmgt {{v[0-9]+}}.4s, v0.4s, v1.4s
 ;CHECK-NEXT: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
 ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ueq <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1434,8 +1415,7 @@ define <2 x i64> @fcmueq2xdouble(<2 x double> %A, <2 x double> %B) {
 ;CHECK: fcmgt {{v[0-9]+}}.2d, v0.2d, v1.2d
 ;CHECK-NEXT: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
 ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ueq <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1445,8 +1425,7 @@ define <2 x i32> @fcmuge2xfloat(<2 x float> %A, <2 x float> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UGE = ULE with swapped operands, ULE implemented as !OGT.
 ;CHECK: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp uge <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1456,8 +1435,7 @@ define <4 x i32> @fcmuge4xfloat(<4 x float> %A, <4 x float> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UGE = ULE with swapped operands, ULE implemented as !OGT.
 ;CHECK: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uge <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1467,8 +1445,7 @@ define <2 x i64> @fcmuge2xdouble(<2 x double> %A, <2 x double> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UGE = ULE with swapped operands, ULE implemented as !OGT.
 ;CHECK: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uge <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1478,8 +1455,7 @@ define <2 x i32> @fcmugt2xfloat(<2 x float> %A, <2 x float> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UGT = ULT with swapped operands, ULT implemented as !OGE.
 ;CHECK: fcmge {{v[0-9]+}}.2s, v1.2s, v0.2s
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ugt <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1489,16 +1465,14 @@ define <4 x i32> @fcmugt4xfloat(<4 x float> %A, <4 x float> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UGT = ULT with swapped operands, ULT implemented as !OGE.
 ;CHECK: fcmge {{v[0-9]+}}.4s, v1.4s, v0.4s
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ugt <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 define <2 x i64> @fcmugt2xdouble(<2 x double> %A, <2 x double> %B) {
 ;CHECK: fcmge {{v[0-9]+}}.2d, v1.2d, v0.2d
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ugt <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1508,8 +1482,7 @@ define <2 x i32> @fcmule2xfloat(<2 x float> %A, <2 x float> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ULE implemented as !OGT.
 ;CHECK: fcmgt {{v[0-9]+}}.2s, v0.2s, v1.2s
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ule <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1519,8 +1492,7 @@ define <4 x i32> @fcmule4xfloat(<4 x float> %A, <4 x float> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ULE implemented as !OGT.
 ;CHECK: fcmgt {{v[0-9]+}}.4s, v0.4s, v1.4s
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ule <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1529,8 +1501,7 @@ define <2 x i64> @fcmule2xdouble(<2 x double> %A, <2 x double> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ULE implemented as !OGT.
 ;CHECK: fcmgt {{v[0-9]+}}.2d, v0.2d, v1.2d
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ule <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1540,8 +1511,7 @@ define <2 x i32> @fcmult2xfloat(<2 x float> %A, <2 x float> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ULT implemented as !OGE.
 ;CHECK: fcmge {{v[0-9]+}}.2s, v0.2s, v1.2s
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ult <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1551,8 +1521,7 @@ define <4 x i32> @fcmult4xfloat(<4 x float> %A, <4 x float> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ULT implemented as !OGE.
 ;CHECK: fcmge {{v[0-9]+}}.4s, v0.4s, v1.4s
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ult <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1561,8 +1530,7 @@ define <2 x i64> @fcmult2xdouble(<2 x double> %A, <2 x double> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ULT implemented as !OGE.
 ;CHECK: fcmge {{v[0-9]+}}.2d, v0.2d, v1.2d
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ult <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1572,8 +1540,7 @@ define <2 x i32> @fcmune2xfloat(<2 x float> %A, <2 x float> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UNE = !OEQ.
 ;CHECK: fcmeq {{v[0-9]+}}.2s, v0.2s, v1.2s
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp une <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1583,8 +1550,7 @@ define <4 x i32> @fcmune4xfloat(<4 x float> %A, <4 x float> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UNE = !OEQ.
 ;CHECK: fcmeq {{v[0-9]+}}.4s, v0.4s, v1.4s
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp une <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1593,8 +1559,7 @@ define <2 x i64> @fcmune2xdouble(<2 x double> %A, <2 x double> %B) {
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UNE = !OEQ.
 ;CHECK: fcmeq {{v[0-9]+}}.2d, v0.2d, v1.2d
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp une <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1766,8 +1731,7 @@ define <2 x i32> @fcmueqz2xfloat(<2 x float> %A) {
 ;CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
 ;CHECK-NEXT: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
 ;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ueq <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1778,8 +1742,7 @@ define <4 x i32> @fcmueqz4xfloat(<4 x float> %A) {
 ;CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
 ;CHECK-NEXT: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
 ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ueq <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1790,8 +1753,7 @@ define <2 x i64> @fcmueqz2xdouble(<2 x double> %A) {
 ;CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
 ;CHECK-NEXT: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
 ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ueq <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1800,8 +1762,7 @@ define <2 x i64> @fcmueqz2xdouble(<2 x double> %A) {
 define <2 x i32> @fcmugez2xfloat(<2 x float> %A) {
 ; UGE with zero = !OLT
 ;CHECK: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp uge <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1810,8 +1771,7 @@ define <2 x i32> @fcmugez2xfloat(<2 x float> %A) {
 define <4 x i32> @fcmugez4xfloat(<4 x float> %A) {
 ; UGE with zero = !OLT
 ;CHECK: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uge <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1819,8 +1779,7 @@ define <4 x i32> @fcmugez4xfloat(<4 x float> %A) {
 define <2 x i64> @fcmugez2xdouble(<2 x double> %A) {
 ; UGE with zero = !OLT
 ;CHECK: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uge <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1829,8 +1788,7 @@ define <2 x i64> @fcmugez2xdouble(<2 x double> %A) {
 define <2 x i32> @fcmugtz2xfloat(<2 x float> %A) {
 ; UGT with zero = !OLE
 ;CHECK: fcmle {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ugt <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1839,8 +1797,7 @@ define <2 x i32> @fcmugtz2xfloat(<2 x float> %A) {
 define <4 x i32> @fcmugtz4xfloat(<4 x float> %A) {
 ; UGT with zero = !OLE
 ;CHECK: fcmle {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ugt <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1848,8 +1805,7 @@ define <4 x i32> @fcmugtz4xfloat(<4 x float> %A) {
 define <2 x i64> @fcmugtz2xdouble(<2 x double> %A) {
 ; UGT with zero = !OLE
 ;CHECK: fcmle {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ugt <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1858,8 +1814,7 @@ define <2 x i64> @fcmugtz2xdouble(<2 x double> %A) {
 define <2 x i32> @fcmultz2xfloat(<2 x float> %A) {
 ; ULT with zero = !OGE
 ;CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ult <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1867,8 +1822,7 @@ define <2 x i32> @fcmultz2xfloat(<2 x float> %A) {
 
 define <4 x i32> @fcmultz4xfloat(<4 x float> %A) {
 ;CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ult <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1876,8 +1830,7 @@ define <4 x i32> @fcmultz4xfloat(<4 x float> %A) {
 
 define <2 x i64> @fcmultz2xdouble(<2 x double> %A) {
 ;CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ult <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1887,8 +1840,7 @@ define <2 x i64> @fcmultz2xdouble(<2 x double> %A) {
 define <2 x i32> @fcmulez2xfloat(<2 x float> %A) {
 ; ULE with zero = !OGT
 ;CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ule <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1897,8 +1849,7 @@ define <2 x i32> @fcmulez2xfloat(<2 x float> %A) {
 define <4 x i32> @fcmulez4xfloat(<4 x float> %A) {
 ; ULE with zero = !OGT
 ;CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ule <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1907,8 +1858,7 @@ define <4 x i32> @fcmulez4xfloat(<4 x float> %A) {
 define <2 x i64> @fcmulez2xdouble(<2 x double> %A) {
 ; ULE with zero = !OGT
 ;CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ule <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1917,8 +1867,7 @@ define <2 x i64> @fcmulez2xdouble(<2 x double> %A) {
 define <2 x i32> @fcmunez2xfloat(<2 x float> %A) {
 ; UNE with zero = !OEQ with zero
 ;CHECK: fcmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp une <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1927,8 +1876,7 @@ define <2 x i32> @fcmunez2xfloat(<2 x float> %A) {
 define <4 x i32> @fcmunez4xfloat(<4 x float> %A) {
 ; UNE with zero = !OEQ with zero
 ;CHECK: fcmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp une <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1936,8 +1884,7 @@ define <4 x i32> @fcmunez4xfloat(<4 x float> %A) {
 define <2 x i64> @fcmunez2xdouble(<2 x double> %A) {
 ; UNE with zero = !OEQ with zero
 ;CHECK: fcmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp une <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1949,8 +1896,7 @@ define <2 x i32> @fcmunoz2xfloat(<2 x float> %A) {
 ;CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
 ;CHECK-NEXT: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
 ;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp uno <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1961,8 +1907,7 @@ define <4 x i32> @fcmunoz4xfloat(<4 x float> %A) {
 ;CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
 ;CHECK-NEXT: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
 ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uno <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -1973,8 +1918,7 @@ define <2 x i64> @fcmunoz2xdouble(<2 x double> %A) {
 ;CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
 ;CHECK-NEXT: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
 ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff
-;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uno <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll
new file mode 100644
index 0000000..e18530e
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-copy.ll
@@ -0,0 +1,615 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+
+
+define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
+;CHECK: ins {{v[0-31]+}}.b[15], {{w[0-31]+}}
+  %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15
+  ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) {
+;CHECK: ins {{v[0-31]+}}.h[6], {{w[0-31]+}}
+  %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6
+  ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) {
+;CHECK: ins {{v[0-31]+}}.s[2], {{w[0-31]+}}
+  %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2
+  ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
+;CHECK: ins {{v[0-31]+}}.d[1], {{x[0-31]+}}
+  %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1
+  ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
+;CHECK: ins {{v[0-31]+}}.b[5], {{w[0-31]+}}
+  %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
+  ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
+;CHECK: ins {{v[0-31]+}}.h[3], {{w[0-31]+}}
+  %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
+  ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
+;CHECK: ins {{v[0-31]+}}.s[1], {{w[0-31]+}}
+  %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
+  ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.b[15], {{v[0-31]+}}.b[2]
+  %tmp3 = extractelement <16 x i8> %tmp1, i32 2
+  %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
+  ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.h[7], {{v[0-31]+}}.h[2]
+  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
+  %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
+  ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[2]
+  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
+  %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.d[1], {{v[0-31]+}}.d[0]
+  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
+  %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
+  ret <2 x i64> %tmp4
+}
+
+define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[2]
+  %tmp3 = extractelement <4 x float> %tmp1, i32 2
+  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
+  ret <4 x float> %tmp4
+}
+
+define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.d[1], {{v[0-31]+}}.d[0]
+  %tmp3 = extractelement <2 x double> %tmp1, i32 0
+  %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
+  ret <2 x double> %tmp4
+}
+
+define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.b[15], {{v[0-31]+}}.b[2]
+  %tmp3 = extractelement <8 x i8> %tmp1, i32 2
+  %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
+  ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.h[7], {{v[0-31]+}}.h[2]
+  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
+  %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
+  ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[1]
+  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
+  %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.d[1], {{v[0-31]+}}.d[0]
+  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
+  %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
+  ret <2 x i64> %tmp4
+}
+
+define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[1]
+  %tmp3 = extractelement <2 x float> %tmp1, i32 1
+  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
+  ret <4 x float> %tmp4
+}
+
+define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.d[1], {{v[0-31]+}}.d[0]
+  %tmp3 = extractelement <1 x double> %tmp1, i32 0
+  %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
+  ret <2 x double> %tmp4
+}
+
+define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.b[7], {{v[0-31]+}}.b[2]
+  %tmp3 = extractelement <16 x i8> %tmp1, i32 2
+  %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
+  ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.h[3], {{v[0-31]+}}.h[2]
+  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
+  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
+  ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[2]
+  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
+  %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
+  ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.d[0], {{v[0-31]+}}.d[0]
+  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
+  %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
+  ret <1 x i64> %tmp4
+}
+
+define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[2]
+  %tmp3 = extractelement <4 x float> %tmp1, i32 2
+  %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
+  ret <2 x float> %tmp4
+}
+
+define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.d[0], {{v[0-31]+}}.d[0]
+  %tmp3 = extractelement <2 x double> %tmp1, i32 0
+  %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
+  ret <1 x double> %tmp4
+}
+
+define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.b[4], {{v[0-31]+}}.b[2]
+  %tmp3 = extractelement <8 x i8> %tmp1, i32 2
+  %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
+  ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.h[3], {{v[0-31]+}}.h[2]
+  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
+  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
+  ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[0]
+  %tmp3 = extractelement <2 x i32> %tmp1, i32 0
+  %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
+  ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.d[0], {{v[0-31]+}}.d[0]
+  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
+  %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
+  ret <1 x i64> %tmp4
+}
+
+define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[0]
+  %tmp3 = extractelement <2 x float> %tmp1, i32 0
+  %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
+  ret <2 x float> %tmp4
+}
+
+define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.d[0], {{v[0-31]+}}.d[0]
+  %tmp3 = extractelement <1 x double> %tmp1, i32 0
+  %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
+  ret <1 x double> %tmp4
+}
+
+define i32 @umovw16b(<16 x i8> %tmp1) {
+;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.b[8]
+  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
+  %tmp4 = zext i8 %tmp3 to i32
+  ret i32 %tmp4
+}
+
+define i32 @umovw8h(<8 x i16> %tmp1) {
+;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.h[2]
+  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
+  %tmp4 = zext i16 %tmp3 to i32
+  ret i32 %tmp4
+}
+
+define i32 @umovw4s(<4 x i32> %tmp1) {
+;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.s[2]
+  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
+  ret i32 %tmp3
+}
+
+define i64 @umovx2d(<2 x i64> %tmp1) {
+;CHECK: umov {{x[0-31]+}}, {{v[0-31]+}}.d[0]
+  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
+  ret i64 %tmp3
+}
+
+define i32 @umovw8b(<8 x i8> %tmp1) {
+;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.b[7]
+  %tmp3 = extractelement <8 x i8> %tmp1, i32 7
+  %tmp4 = zext i8 %tmp3 to i32
+  ret i32 %tmp4
+}
+
+define i32 @umovw4h(<4 x i16> %tmp1) {
+;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.h[2]
+  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
+  %tmp4 = zext i16 %tmp3 to i32
+  ret i32 %tmp4
+}
+
+define i32 @umovw2s(<2 x i32> %tmp1) {
+;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.s[1]
+  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
+  ret i32 %tmp3
+}
+
+define i64 @umovx1d(<1 x i64> %tmp1) {
+;CHECK: fmov {{x[0-31]+}}, {{d[0-31]+}}
+  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
+  ret i64 %tmp3
+}
+
+define i32 @smovw16b(<16 x i8> %tmp1) {
+;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.b[8]
+  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
+  %tmp4 = sext i8 %tmp3 to i32
+  %tmp5 = add i32 5, %tmp4
+  ret i32 %tmp5
+}
+
+define i32 @smovw8h(<8 x i16> %tmp1) {
+;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.h[2]
+  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
+  %tmp4 = sext i16 %tmp3 to i32
+  %tmp5 = add i32 5, %tmp4
+  ret i32 %tmp5
+}
+
+define i32 @smovx16b(<16 x i8> %tmp1) {
+;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.b[8]
+  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
+  %tmp4 = sext i8 %tmp3 to i32
+  ret i32 %tmp4
+}
+
+define i32 @smovx8h(<8 x i16> %tmp1) {
+;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.h[2]
+  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
+  %tmp4 = sext i16 %tmp3 to i32
+  ret i32 %tmp4
+}
+
+define i64 @smovx4s(<4 x i32> %tmp1) {
+;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.s[2]
+  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
+  %tmp4 = sext i32 %tmp3 to i64
+  ret i64 %tmp4
+}
+
+define i32 @smovw8b(<8 x i8> %tmp1) {
+;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.b[4]
+  %tmp3 = extractelement <8 x i8> %tmp1, i32 4
+  %tmp4 = sext i8 %tmp3 to i32
+  %tmp5 = add i32 5, %tmp4
+  ret i32 %tmp5
+}
+
+define i32 @smovw4h(<4 x i16> %tmp1) {
+;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.h[2]
+  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
+  %tmp4 = sext i16 %tmp3 to i32
+  %tmp5 = add i32 5, %tmp4
+  ret i32 %tmp5
+}
+
+define i32 @smovx8b(<8 x i8> %tmp1) {
+;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.b[6]
+  %tmp3 = extractelement <8 x i8> %tmp1, i32 6
+  %tmp4 = sext i8 %tmp3 to i32
+  ret i32 %tmp4
+}
+
+define i32 @smovx4h(<4 x i16> %tmp1) {
+;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.h[2]
+  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
+  %tmp4 = sext i16 %tmp3 to i32
+  ret i32 %tmp4
+}
+
+define i64 @smovx2s(<2 x i32> %tmp1) {
+;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.s[1]
+  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
+  %tmp4 = sext i32 %tmp3 to i64
+  ret i64 %tmp4
+}
+
+define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
+;CHECK: ins  {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+  %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
+  ret <8 x i8> %vset_lane
+}
+
+define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
+;CHECK: ins  {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6]
+  %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
+  ret <16 x i8> %vset_lane
+}
+
+define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
+;CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0]
+  %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
+  ret <8 x i8> %vset_lane
+}
+
+define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
+;CHECK: ins {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15]
+  %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  ret <16 x i8> %vset_lane
+}
+
+define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+  %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
+  %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
+  %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
+  %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
+  %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
+  %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
+  %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
+  %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
+  ret <8 x i8> %vecinit7.i
+}
+
+define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
+  %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
+  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
+  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
+  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
+  ret <4 x i16> %vecinit3.i
+}
+
+define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
+  %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
+  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
+  ret <2 x i32> %vecinit1.i
+}
+
+define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
+;CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+  %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
+  ret <1 x i64> %vecinit.i
+}
+
+define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
+  %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
+  %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
+  %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
+  %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
+  %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
+  %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
+  %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
+  %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
+  %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
+  %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
+  %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
+  %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
+  %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
+  %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
+  %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
+  %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
+  ret <16 x i8> %vecinit15.i
+}
+
+define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+  %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
+  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
+  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
+  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
+  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
+  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
+  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
+  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
+  ret <8 x i16> %vecinit7.i
+}
+
+define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
+  %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
+  %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
+  %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
+  %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
+  ret <4 x i32> %vecinit3.i
+}
+
+define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
+  %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
+  %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
+  ret <2 x i64> %vecinit1.i
+}
+
+define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
+  %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <8 x i8> %shuffle
+}
+
+define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
+  %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i16> %shuffle
+}
+
+define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i32> %shuffle
+}
+
+define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
+;CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
+  %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <16 x i8> %shuffle
+}
+
+define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
+;CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
+  %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  ret <8 x i16> %shuffle
+}
+
+define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
+;CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %shuffle
+}
+
+define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
+;CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+  %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %shuffle
+}
+
+define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
+  %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <8 x i8> %shuffle
+}
+
+define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
+  %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i16> %shuffle
+}
+
+define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i32> %shuffle
+}
+
+define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
+  %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <16 x i8> %shuffle
+}
+
+define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
+;CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
+  %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  ret <8 x i16> %shuffle
+}
+
+define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %shuffle
+}
+
+define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+  %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %shuffle
+}
+
+define i64 @test_bitcastv8i8toi64(<8 x i8> %in) {
+; CHECK-LABEL: test_bitcastv8i8toi64:
+   %res = bitcast <8 x i8> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+   ret i64 %res
+}
+
+define i64 @test_bitcastv4i16toi64(<4 x i16> %in) {
+; CHECK-LABEL: test_bitcastv4i16toi64:
+   %res = bitcast <4 x i16> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+   ret i64 %res
+}
+
+define i64 @test_bitcastv2i32toi64(<2 x i32> %in) {
+; CHECK-LABEL: test_bitcastv2i32toi64:
+   %res = bitcast <2 x i32> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+   ret i64 %res
+}
+
+define i64 @test_bitcastv2f32toi64(<2 x float> %in) {
+; CHECK-LABEL: test_bitcastv2f32toi64:
+   %res = bitcast <2 x float> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+   ret i64 %res
+}
+
+define i64 @test_bitcastv1i64toi64(<1 x i64> %in) {
+; CHECK-LABEL: test_bitcastv1i64toi64:
+   %res = bitcast <1 x i64> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+   ret i64 %res
+}
+
+define i64 @test_bitcastv1f64toi64(<1 x double> %in) {
+; CHECK-LABEL: test_bitcastv1f64toi64:
+   %res = bitcast <1 x double> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+   ret i64 %res
+}
+
+define <8 x i8> @test_bitcasti64tov8i8(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov8i8:
+   %res = bitcast i64 %in to <8 x i8>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+   ret <8 x i8> %res
+}
+
+define <4 x i16> @test_bitcasti64tov4i16(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov4i16:
+   %res = bitcast i64 %in to <4 x i16>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+   ret <4 x i16> %res
+}
+
+define <2 x i32> @test_bitcasti64tov2i32(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov2i32:
+   %res = bitcast i64 %in to <2 x i32>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+   ret <2 x i32> %res
+}
+
+define <2 x float> @test_bitcasti64tov2f32(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov2f32:
+   %res = bitcast i64 %in to <2 x float>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+   ret <2 x float> %res
+}
+
+define <1 x i64> @test_bitcasti64tov1i64(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov1i64:
+   %res = bitcast i64 %in to <1 x i64>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+   ret <1 x i64> %res
+}
+
+define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov1f64:
+   %res = bitcast i64 %in to <1 x double>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+   ret <1 x double> %res
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-crypto.ll b/test/CodeGen/AArch64/neon-crypto.ll
new file mode 100644
index 0000000..0283e0e
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-crypto.ll
@@ -0,0 +1,149 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -mattr=+crypto | FileCheck %s
+; RUN: not llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon 2>&1 | FileCheck --check-prefix=CHECK-NO-CRYPTO %s
+
+declare <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1
+
+declare <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1
+
+declare <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1
+
+declare <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1
+
+declare <4 x i32> @llvm.aarch64.neon.sha1m(<4 x i32>, <1 x i32>, <4 x i32>) #1
+
+declare <4 x i32> @llvm.aarch64.neon.sha1p(<4 x i32>, <1 x i32>, <4 x i32>) #1
+
+declare <4 x i32> @llvm.aarch64.neon.sha1c(<4 x i32>, <1 x i32>, <4 x i32>) #1
+
+declare <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32>, <4 x i32>) #1
+
+declare <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32>, <4 x i32>) #1
+
+declare <1 x i32> @llvm.arm.neon.sha1h.v1i32(<1 x i32>) #1
+
+declare <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8>) #1
+
+declare <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8>) #1
+
+declare <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8>, <16 x i8>) #1
+
+declare <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8>, <16 x i8>) #1
+
+define <16 x i8> @test_vaeseq_u8(<16 x i8> %data, <16 x i8> %key) {
+; CHECK: test_vaeseq_u8:
+; CHECK: aese {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NO-CRYPTO: Cannot select: intrinsic %llvm.arm.neon.aese
+entry:
+  %aese.i = tail call <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8> %data, <16 x i8> %key)
+  ret <16 x i8> %aese.i
+}
+
+define <16 x i8> @test_vaesdq_u8(<16 x i8> %data, <16 x i8> %key) {
+; CHECK: test_vaesdq_u8:
+; CHECK: aesd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %aesd.i = tail call <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8> %data, <16 x i8> %key)
+  ret <16 x i8> %aesd.i
+}
+
+define <16 x i8> @test_vaesmcq_u8(<16 x i8> %data) {
+; CHECK: test_vaesmcq_u8:
+; CHECK: aesmc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %aesmc.i = tail call <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8> %data)
+  ret <16 x i8> %aesmc.i
+}
+
+define <16 x i8> @test_vaesimcq_u8(<16 x i8> %data) {
+; CHECK: test_vaesimcq_u8:
+; CHECK: aesimc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %aesimc.i = tail call <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8> %data)
+  ret <16 x i8> %aesimc.i
+}
+
+define i32 @test_vsha1h_u32(i32 %hash_e) {
+; CHECK: test_vsha1h_u32:
+; CHECK: sha1h {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %sha1h.i = insertelement <1 x i32> undef, i32 %hash_e, i32 0
+  %sha1h1.i = tail call <1 x i32> @llvm.arm.neon.sha1h.v1i32(<1 x i32> %sha1h.i)
+  %0 = extractelement <1 x i32> %sha1h1.i, i32 0
+  ret i32 %0
+}
+
+define <4 x i32> @test_vsha1su1q_u32(<4 x i32> %tw0_3, <4 x i32> %w12_15) {
+; CHECK: test_vsha1su1q_u32:
+; CHECK: sha1su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %sha1su12.i = tail call <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32> %tw0_3, <4 x i32> %w12_15)
+  ret <4 x i32> %sha1su12.i
+}
+
+define <4 x i32> @test_vsha256su0q_u32(<4 x i32> %w0_3, <4 x i32> %w4_7) {
+; CHECK: test_vsha256su0q_u32:
+; CHECK: sha256su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %sha256su02.i = tail call <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32> %w0_3, <4 x i32> %w4_7)
+  ret <4 x i32> %sha256su02.i
+}
+
+define <4 x i32> @test_vsha1cq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) {
+; CHECK: test_vsha1cq_u32:
+; CHECK: sha1c {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %sha1c.i = insertelement <1 x i32> undef, i32 %hash_e, i32 0
+  %sha1c1.i = tail call <4 x i32> @llvm.aarch64.neon.sha1c(<4 x i32> %hash_abcd, <1 x i32> %sha1c.i, <4 x i32> %wk)
+  ret <4 x i32> %sha1c1.i
+}
+
+define <4 x i32> @test_vsha1pq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) {
+; CHECK: test_vsha1pq_u32:
+; CHECK: sha1p {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %sha1p.i = insertelement <1 x i32> undef, i32 %hash_e, i32 0
+  %sha1p1.i = tail call <4 x i32> @llvm.aarch64.neon.sha1p(<4 x i32> %hash_abcd, <1 x i32> %sha1p.i, <4 x i32> %wk)
+  ret <4 x i32> %sha1p1.i
+}
+
+define <4 x i32> @test_vsha1mq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) {
+; CHECK: test_vsha1mq_u32:
+; CHECK: sha1m {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %sha1m.i = insertelement <1 x i32> undef, i32 %hash_e, i32 0
+  %sha1m1.i = tail call <4 x i32> @llvm.aarch64.neon.sha1m(<4 x i32> %hash_abcd, <1 x i32> %sha1m.i, <4 x i32> %wk)
+  ret <4 x i32> %sha1m1.i
+}
+
+define <4 x i32> @test_vsha1su0q_u32(<4 x i32> %w0_3, <4 x i32> %w4_7, <4 x i32> %w8_11) {
+; CHECK: test_vsha1su0q_u32:
+; CHECK: sha1su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %sha1su03.i = tail call <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32> %w0_3, <4 x i32> %w4_7, <4 x i32> %w8_11)
+  ret <4 x i32> %sha1su03.i
+}
+
+define <4 x i32> @test_vsha256hq_u32(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) {
+; CHECK: test_vsha256hq_u32:
+; CHECK: sha256h {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %sha256h3.i = tail call <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk)
+  ret <4 x i32> %sha256h3.i
+}
+
+define <4 x i32> @test_vsha256h2q_u32(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) {
+; CHECK: test_vsha256h2q_u32:
+; CHECK: sha256h2 {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %sha256h23.i = tail call <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk)
+  ret <4 x i32> %sha256h23.i
+}
+
+define <4 x i32> @test_vsha256su1q_u32(<4 x i32> %tw0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) {
+; CHECK: test_vsha256su1q_u32:
+; CHECK: sha256su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %sha256su13.i = tail call <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32> %tw0_3, <4 x i32> %w8_11, <4 x i32> %w12_15)
+  ret <4 x i32> %sha256su13.i
+}
+
diff --git a/test/CodeGen/AArch64/neon-diagnostics.ll b/test/CodeGen/AArch64/neon-diagnostics.ll
new file mode 100644
index 0000000..f546aa7
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-diagnostics.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
+; CHECK: test_vfma_lane_f32:
+; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
+; CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
+  %mul = fmul <2 x float> %shuffle, %b
+  %add = fadd <2 x float> %mul, %a
+  ret <2 x float> %add
+}
+
+define <4 x i32> @test_vshrn_not_match(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vshrn_not_match
+; CHECK-NOT: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #35
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %2 = ashr <2 x i64> %b, <i64 35, i64 35>
+  %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
+  %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %4
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-extract.ll b/test/CodeGen/AArch64/neon-extract.ll
new file mode 100644
index 0000000..5c52cd3
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-extract.ll
@@ -0,0 +1,190 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <8 x i8> @test_vext_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vext_s8:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2
+entry:
+  %vext = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
+  ret <8 x i8> %vext
+}
+
+define <4 x i16> @test_vext_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vext_s16:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x6
+entry:
+  %vext = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+  ret <4 x i16> %vext
+}
+
+define <2 x i32> @test_vext_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vext_s32:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4
+entry:
+  %vext = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x i32> %vext
+}
+
+define <1 x i64> @test_vext_s64(<1 x i64> %a, <1 x i64> %b) {
+; CHECK: test_vext_s64:
+entry:
+  %vext = shufflevector <1 x i64> %a, <1 x i64> %b, <1 x i32> <i32 0>
+  ret <1 x i64> %vext
+}
+
+define <16 x i8> @test_vextq_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vextq_s8:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x2
+entry:
+  %vext = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
+  ret <16 x i8> %vext
+}
+
+define <8 x i16> @test_vextq_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vextq_s16:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6
+entry:
+  %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+  ret <8 x i16> %vext
+}
+
+define <4 x i32> @test_vextq_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vextq_s32:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x4
+entry:
+  %vext = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  ret <4 x i32> %vext
+}
+
+define <2 x i64> @test_vextq_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vextq_s64:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8
+entry:
+  %vext = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x i64> %vext
+}
+
+define <8 x i8> @test_vext_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vext_u8:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2
+entry:
+  %vext = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
+  ret <8 x i8> %vext
+}
+
+define <4 x i16> @test_vext_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vext_u16:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x6
+entry:
+  %vext = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+  ret <4 x i16> %vext
+}
+
+define <2 x i32> @test_vext_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vext_u32:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4
+entry:
+  %vext = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x i32> %vext
+}
+
+define <1 x i64> @test_vext_u64(<1 x i64> %a, <1 x i64> %b) {
+; CHECK: test_vext_u64:
+entry:
+  %vext = shufflevector <1 x i64> %a, <1 x i64> %b, <1 x i32> <i32 0>
+  ret <1 x i64> %vext
+}
+
+define <16 x i8> @test_vextq_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vextq_u8:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x2
+entry:
+  %vext = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
+  ret <16 x i8> %vext
+}
+
+define <8 x i16> @test_vextq_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vextq_u16:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6
+entry:
+  %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+  ret <8 x i16> %vext
+}
+
+define <4 x i32> @test_vextq_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vextq_u32:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x4
+entry:
+  %vext = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  ret <4 x i32> %vext
+}
+
+define <2 x i64> @test_vextq_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vextq_u64:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8
+entry:
+  %vext = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x i64> %vext
+}
+
+define <2 x float> @test_vext_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vext_f32:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4
+entry:
+  %vext = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x float> %vext
+}
+
+define <1 x double> @test_vext_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK: test_vext_f64:
+entry:
+  %vext = shufflevector <1 x double> %a, <1 x double> %b, <1 x i32> <i32 0>
+  ret <1 x double> %vext
+}
+
+define <4 x float> @test_vextq_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vextq_f32:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x4
+entry:
+  %vext = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  ret <4 x float> %vext
+}
+
+define <2 x double> @test_vextq_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vextq_f64:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8
+entry:
+  %vext = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x double> %vext
+}
+
+define <8 x i8> @test_vext_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vext_p8:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2
+entry:
+  %vext = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
+  ret <8 x i8> %vext
+}
+
+define <4 x i16> @test_vext_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vext_p16:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x6
+entry:
+  %vext = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+  ret <4 x i16> %vext
+}
+
+define <16 x i8> @test_vextq_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vextq_p8:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x2
+entry:
+  %vext = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
+  ret <16 x i8> %vext
+}
+
+define <8 x i16> @test_vextq_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vextq_p16:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6
+entry:
+  %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+  ret <8 x i16> %vext
+}
diff --git a/test/CodeGen/AArch64/neon-misc-scalar.ll b/test/CodeGen/AArch64/neon-misc-scalar.ll
new file mode 100644
index 0000000..cca8deb
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-misc-scalar.ll
@@ -0,0 +1,60 @@
+;RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+declare <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64>)
+
+declare <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64>)
+
+declare <1 x i64> @llvm.arm.neon.vabs.v1i64(<1 x i64>)
+
+declare <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>)
+
+declare <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_vuqadd_s64(<1 x i64> %a, <1 x i64> %b) {
+entry:
+  ; CHECK: test_vuqadd_s64
+  %vuqadd2.i = tail call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
+  ; CHECK: suqadd d{{[0-9]+}}, d{{[0-9]+}}
+  ret <1 x i64> %vuqadd2.i
+}
+
+define <1 x i64> @test_vsqadd_u64(<1 x i64> %a, <1 x i64> %b) {
+entry:
+  ; CHECK: test_vsqadd_u64
+  %vsqadd2.i = tail call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
+  ; CHECK: usqadd d{{[0-9]+}}, d{{[0-9]+}}
+  ret <1 x i64> %vsqadd2.i
+}
+
+define <1 x i64> @test_vabs_s64(<1 x i64> %a) {
+  ; CHECK: test_vabs_s64
+entry:
+  %vabs1.i = tail call <1 x i64> @llvm.arm.neon.vabs.v1i64(<1 x i64> %a)
+  ; CHECK: abs d{{[0-9]+}}, d{{[0-9]+}}
+  ret <1 x i64> %vabs1.i
+}
+
+define <1 x i64> @test_vqabs_s64(<1 x i64> %a) {
+  ; CHECK: test_vqabs_s64
+entry:
+  %vqabs1.i = tail call <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64> %a)
+  ; CHECK: sqabs d{{[0-9]+}}, d{{[0-9]+}}
+  ret <1 x i64> %vqabs1.i
+}
+
+define <1 x i64> @test_vqneg_s64(<1 x i64> %a) {
+  ; CHECK: test_vqneg_s64
+entry:
+  %vqneg1.i = tail call <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64> %a)
+  ; CHECK: sqneg d{{[0-9]+}}, d{{[0-9]+}}
+  ret <1 x i64> %vqneg1.i
+}
+
+define <1 x i64> @test_vneg_s64(<1 x i64> %a) {
+  ; CHECK: test_vneg_s64
+entry:
+  %sub.i = sub <1 x i64> zeroinitializer, %a
+  ; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}}
+  ret <1 x i64> %sub.i
+}
+
diff --git a/test/CodeGen/AArch64/neon-misc.ll b/test/CodeGen/AArch64/neon-misc.ll
new file mode 100644
index 0000000..9660bf2
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-misc.ll
@@ -0,0 +1,1799 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+
+
+define <8 x i8> @test_vrev16_s8(<8 x i8> %a) #0 {
+; CHECK: rev16 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vrev16q_s8(<16 x i8> %a) #0 {
+; CHECK: rev16 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+  ret <16 x i8> %shuffle.i
+}
+
+define <8 x i8> @test_vrev32_s8(<8 x i8> %a) #0 {
+; CHECK: rev32 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vrev32_s16(<4 x i16> %a) #0 {
+; CHECK: rev32 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  ret <4 x i16> %shuffle.i
+}
+
+define <16 x i8> @test_vrev32q_s8(<16 x i8> %a) #0 {
+; CHECK: rev32 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+  ret <16 x i8> %shuffle.i
+}
+
+define <8 x i16> @test_vrev32q_s16(<8 x i16> %a) #0 {
+; CHECK: rev32 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+  ret <8 x i16> %shuffle.i
+}
+
+define <8 x i8> @test_vrev64_s8(<8 x i8> %a) #0 {
+; CHECK: rev64 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vrev64_s16(<4 x i16> %a) #0 {
+; CHECK: rev64 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vrev64_s32(<2 x i32> %a) #0 {
+; CHECK: rev64 v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+  ret <2 x i32> %shuffle.i
+}
+
+define <2 x float> @test_vrev64_f32(<2 x float> %a) #0 {
+; CHECK: rev64 v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+  ret <2 x float> %shuffle.i
+}
+
+define <16 x i8> @test_vrev64q_s8(<16 x i8> %a) #0 {
+; CHECK: rev64 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+  ret <16 x i8> %shuffle.i
+}
+
+define <8 x i16> @test_vrev64q_s16(<8 x i16> %a) #0 {
+; CHECK: rev64 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+  ret <8 x i16> %shuffle.i
+}
+
+define <4 x i32> @test_vrev64q_s32(<4 x i32> %a) #0 {
+; CHECK: rev64 v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  ret <4 x i32> %shuffle.i
+}
+
+define <4 x float> @test_vrev64q_f32(<4 x float> %a) #0 {
+; CHECK: rev64 v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  ret <4 x float> %shuffle.i
+}
+
+define <4 x i16> @test_vpaddl_s8(<8 x i8> %a) #0 {
+; CHECK: saddlp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
+  %vpaddl.i = tail call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %a) #4
+  ret <4 x i16> %vpaddl.i
+}
+
+define <2 x i32> @test_vpaddl_s16(<4 x i16> %a) #0 {
+; CHECK: saddlp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
+  %vpaddl1.i = tail call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %a) #4
+  ret <2 x i32> %vpaddl1.i
+}
+
+define <1 x i64> @test_vpaddl_s32(<2 x i32> %a) #0 {
+; CHECK: saddlp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
+  %vpaddl1.i = tail call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %a) #4
+  ret <1 x i64> %vpaddl1.i
+}
+
+define <4 x i16> @test_vpaddl_u8(<8 x i8> %a) #0 {
+; CHECK: uaddlp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
+  %vpaddl.i = tail call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %a) #4
+  ret <4 x i16> %vpaddl.i
+}
+
+define <2 x i32> @test_vpaddl_u16(<4 x i16> %a) #0 {
+; CHECK: uaddlp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
+  %vpaddl1.i = tail call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %a) #4
+  ret <2 x i32> %vpaddl1.i
+}
+
+define <1 x i64> @test_vpaddl_u32(<2 x i32> %a) #0 {
+; CHECK: uaddlp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
+  %vpaddl1.i = tail call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %a) #4
+  ret <1 x i64> %vpaddl1.i
+}
+
+define <8 x i16> @test_vpaddlq_s8(<16 x i8> %a) #0 {
+; CHECK: saddlp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
+  %vpaddl.i = tail call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %a) #4
+  ret <8 x i16> %vpaddl.i
+}
+
+define <4 x i32> @test_vpaddlq_s16(<8 x i16> %a) #0 {
+; CHECK: saddlp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
+  %vpaddl1.i = tail call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %a) #4
+  ret <4 x i32> %vpaddl1.i
+}
+
+define <2 x i64> @test_vpaddlq_s32(<4 x i32> %a) #0 {
+; CHECK: saddlp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
+  %vpaddl1.i = tail call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %a) #4
+  ret <2 x i64> %vpaddl1.i
+}
+
+define <8 x i16> @test_vpaddlq_u8(<16 x i8> %a) #0 {
+; CHECK: uaddlp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
+  %vpaddl.i = tail call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %a) #4
+  ret <8 x i16> %vpaddl.i
+}
+
+define <4 x i32> @test_vpaddlq_u16(<8 x i16> %a) #0 {
+; CHECK: uaddlp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
+  %vpaddl1.i = tail call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %a) #4
+  ret <4 x i32> %vpaddl1.i
+}
+
+define <2 x i64> @test_vpaddlq_u32(<4 x i32> %a) #0 {
+; CHECK: uaddlp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
+  %vpaddl1.i = tail call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %a) #4
+  ret <2 x i64> %vpaddl1.i
+}
+
+define <4 x i16> @test_vpadal_s8(<4 x i16> %a, <8 x i8> %b) #0 {
+; CHECK: sadalp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
+  %vpadal1.i = tail call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b) #4
+  ret <4 x i16> %vpadal1.i
+}
+
+define <2 x i32> @test_vpadal_s16(<2 x i32> %a, <4 x i16> %b) #0 {
+; CHECK: sadalp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
+  %vpadal2.i = tail call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b) #4
+  ret <2 x i32> %vpadal2.i
+}
+
+define <1 x i64> @test_vpadal_s32(<1 x i64> %a, <2 x i32> %b) #0 {
+; CHECK: sadalp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
+  %vpadal2.i = tail call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b) #4
+  ret <1 x i64> %vpadal2.i
+}
+
+define <4 x i16> @test_vpadal_u8(<4 x i16> %a, <8 x i8> %b) #0 {
+; CHECK: uadalp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
+  %vpadal1.i = tail call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b) #4
+  ret <4 x i16> %vpadal1.i
+}
+
+define <2 x i32> @test_vpadal_u16(<2 x i32> %a, <4 x i16> %b) #0 {
+; CHECK: uadalp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
+  %vpadal2.i = tail call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b) #4
+  ret <2 x i32> %vpadal2.i
+}
+
+define <1 x i64> @test_vpadal_u32(<1 x i64> %a, <2 x i32> %b) #0 {
+; CHECK: uadalp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
+  %vpadal2.i = tail call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b) #4
+  ret <1 x i64> %vpadal2.i
+}
+
+define <8 x i16> @test_vpadalq_s8(<8 x i16> %a, <16 x i8> %b) #0 {
+; CHECK: sadalp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
+  %vpadal1.i = tail call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b) #4
+  ret <8 x i16> %vpadal1.i
+}
+
+define <4 x i32> @test_vpadalq_s16(<4 x i32> %a, <8 x i16> %b) #0 {
+; CHECK: sadalp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
+  %vpadal2.i = tail call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b) #4
+  ret <4 x i32> %vpadal2.i
+}
+
+define <2 x i64> @test_vpadalq_s32(<2 x i64> %a, <4 x i32> %b) #0 {
+; CHECK: sadalp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
+  %vpadal2.i = tail call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b) #4
+  ret <2 x i64> %vpadal2.i
+}
+
+define <8 x i16> @test_vpadalq_u8(<8 x i16> %a, <16 x i8> %b) #0 {
+; CHECK: uadalp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
+  %vpadal1.i = tail call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b) #4
+  ret <8 x i16> %vpadal1.i
+}
+
+define <4 x i32> @test_vpadalq_u16(<4 x i32> %a, <8 x i16> %b) #0 {
+; CHECK: uadalp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
+  %vpadal2.i = tail call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b) #4
+  ret <4 x i32> %vpadal2.i
+}
+
+define <2 x i64> @test_vpadalq_u32(<2 x i64> %a, <4 x i32> %b) #0 {
+; CHECK: uadalp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
+  %vpadal2.i = tail call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b) #4
+  ret <2 x i64> %vpadal2.i
+}
+
+define <8 x i8> @test_vqabs_s8(<8 x i8> %a) #0 {
+; CHECK: sqabs v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %vqabs.i = tail call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %a) #4
+  ret <8 x i8> %vqabs.i
+}
+
+define <16 x i8> @test_vqabsq_s8(<16 x i8> %a) #0 {
+; CHECK: sqabs v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %vqabs.i = tail call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %a) #4
+  ret <16 x i8> %vqabs.i
+}
+
+define <4 x i16> @test_vqabs_s16(<4 x i16> %a) #0 {
+; CHECK: sqabs v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+  %vqabs1.i = tail call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %a) #4
+  ret <4 x i16> %vqabs1.i
+}
+
+define <8 x i16> @test_vqabsq_s16(<8 x i16> %a) #0 {
+; CHECK: sqabs v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+  %vqabs1.i = tail call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %a) #4
+  ret <8 x i16> %vqabs1.i
+}
+
+define <2 x i32> @test_vqabs_s32(<2 x i32> %a) #0 {
+; CHECK: sqabs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vqabs1.i = tail call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %a) #4
+  ret <2 x i32> %vqabs1.i
+}
+
+define <4 x i32> @test_vqabsq_s32(<4 x i32> %a) #0 {
+; CHECK: sqabs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vqabs1.i = tail call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %a) #4
+  ret <4 x i32> %vqabs1.i
+}
+
+define <2 x i64> @test_vqabsq_s64(<2 x i64> %a) #0 {
+; CHECK: sqabs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vqabs1.i = tail call <2 x i64> @llvm.arm.neon.vqabs.v2i64(<2 x i64> %a) #4
+  ret <2 x i64> %vqabs1.i
+}
+
+define <8 x i8> @test_vqneg_s8(<8 x i8> %a) #0 {
+; CHECK: sqneg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %vqneg.i = tail call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %a) #4
+  ret <8 x i8> %vqneg.i
+}
+
+define <16 x i8> @test_vqnegq_s8(<16 x i8> %a) #0 {
+; CHECK: sqneg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %vqneg.i = tail call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %a) #4
+  ret <16 x i8> %vqneg.i
+}
+
+define <4 x i16> @test_vqneg_s16(<4 x i16> %a) #0 {
+; CHECK: sqneg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+  %vqneg1.i = tail call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %a) #4
+  ret <4 x i16> %vqneg1.i
+}
+
+define <8 x i16> @test_vqnegq_s16(<8 x i16> %a) #0 {
+; CHECK: sqneg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+  %vqneg1.i = tail call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %a) #4
+  ret <8 x i16> %vqneg1.i
+}
+
+define <2 x i32> @test_vqneg_s32(<2 x i32> %a) #0 {
+; CHECK: sqneg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vqneg1.i = tail call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %a) #4
+  ret <2 x i32> %vqneg1.i
+}
+
+define <4 x i32> @test_vqnegq_s32(<4 x i32> %a) #0 {
+; CHECK: sqneg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vqneg1.i = tail call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %a) #4
+  ret <4 x i32> %vqneg1.i
+}
+
+define <2 x i64> @test_vqnegq_s64(<2 x i64> %a) #0 {
+; CHECK: sqneg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vqneg1.i = tail call <2 x i64> @llvm.arm.neon.vqneg.v2i64(<2 x i64> %a) #4
+  ret <2 x i64> %vqneg1.i
+}
+
+define <8 x i8> @test_vneg_s8(<8 x i8> %a) #0 {
+; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %sub.i = sub <8 x i8> zeroinitializer, %a
+  ret <8 x i8> %sub.i
+}
+
+define <16 x i8> @test_vnegq_s8(<16 x i8> %a) #0 {
+; CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %sub.i = sub <16 x i8> zeroinitializer, %a
+  ret <16 x i8> %sub.i
+}
+
+define <4 x i16> @test_vneg_s16(<4 x i16> %a) #0 {
+; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+  %sub.i = sub <4 x i16> zeroinitializer, %a
+  ret <4 x i16> %sub.i
+}
+
+define <8 x i16> @test_vnegq_s16(<8 x i16> %a) #0 {
+; CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+  %sub.i = sub <8 x i16> zeroinitializer, %a
+  ret <8 x i16> %sub.i
+}
+
+define <2 x i32> @test_vneg_s32(<2 x i32> %a) #0 {
+; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %sub.i = sub <2 x i32> zeroinitializer, %a
+  ret <2 x i32> %sub.i
+}
+
+define <4 x i32> @test_vnegq_s32(<4 x i32> %a) #0 {
+; CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %sub.i = sub <4 x i32> zeroinitializer, %a
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vnegq_s64(<2 x i64> %a) #0 {
+; CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %sub.i = sub <2 x i64> zeroinitializer, %a
+  ret <2 x i64> %sub.i
+}
+
+define <2 x float> @test_vneg_f32(<2 x float> %a) #0 {
+; CHECK: fneg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
+  ret <2 x float> %sub.i
+}
+
+define <4 x float> @test_vnegq_f32(<4 x float> %a) #0 {
+; CHECK: fneg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
+  ret <4 x float> %sub.i
+}
+
+define <2 x double> @test_vnegq_f64(<2 x double> %a) #0 {
+; CHECK: fneg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
+  ret <2 x double> %sub.i
+}
+
+define <8 x i8> @test_vabs_s8(<8 x i8> %a) #0 {
+; CHECK: abs v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %vabs.i = tail call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %a) #4
+  ret <8 x i8> %vabs.i
+}
+
+define <16 x i8> @test_vabsq_s8(<16 x i8> %a) #0 {
+; CHECK: abs v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %vabs.i = tail call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %a) #4
+  ret <16 x i8> %vabs.i
+}
+
+define <4 x i16> @test_vabs_s16(<4 x i16> %a) #0 {
+; CHECK: abs v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+  %vabs1.i = tail call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %a) #4
+  ret <4 x i16> %vabs1.i
+}
+
+define <8 x i16> @test_vabsq_s16(<8 x i16> %a) #0 {
+; CHECK: abs v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+  %vabs1.i = tail call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %a) #4
+  ret <8 x i16> %vabs1.i
+}
+
+define <2 x i32> @test_vabs_s32(<2 x i32> %a) #0 {
+; CHECK: abs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vabs1.i = tail call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %a) #4
+  ret <2 x i32> %vabs1.i
+}
+
+define <4 x i32> @test_vabsq_s32(<4 x i32> %a) #0 {
+; CHECK: abs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vabs1.i = tail call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %a) #4
+  ret <4 x i32> %vabs1.i
+}
+
+define <2 x i64> @test_vabsq_s64(<2 x i64> %a) #0 {
+; CHECK: abs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vabs1.i = tail call <2 x i64> @llvm.arm.neon.vabs.v2i64(<2 x i64> %a) #4
+  ret <2 x i64> %vabs1.i
+}
+
+define <2 x float> @test_vabs_f32(<2 x float> %a) #1 {
+; CHECK: fabs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vabs1.i = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) #4
+  ret <2 x float> %vabs1.i
+}
+
+define <4 x float> @test_vabsq_f32(<4 x float> %a) #1 {
+; CHECK: fabs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vabs1.i = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) #4
+  ret <4 x float> %vabs1.i
+}
+
+define <2 x double> @test_vabsq_f64(<2 x double> %a) #1 {
+; CHECK: fabs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vabs1.i = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %a) #4
+  ret <2 x double> %vabs1.i
+}
+
+define <8 x i8> @test_vuqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+; CHECK: suqadd v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %vuqadd.i = tail call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+  ret <8 x i8> %vuqadd.i
+}
+
+define <16 x i8> @test_vuqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+; CHECK: suqadd v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %vuqadd.i = tail call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+  ret <16 x i8> %vuqadd.i
+}
+
+define <4 x i16> @test_vuqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+; CHECK: suqadd v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+  %vuqadd2.i = tail call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b) #4
+  ret <4 x i16> %vuqadd2.i
+}
+
+define <8 x i16> @test_vuqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+; CHECK: suqadd v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+  %vuqadd2.i = tail call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b) #4
+  ret <8 x i16> %vuqadd2.i
+}
+
+define <2 x i32> @test_vuqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK: suqadd v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vuqadd2.i = tail call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b) #4
+  ret <2 x i32> %vuqadd2.i
+}
+
+define <4 x i32> @test_vuqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+; CHECK: suqadd v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vuqadd2.i = tail call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b) #4
+  ret <4 x i32> %vuqadd2.i
+}
+
+define <2 x i64> @test_vuqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+; CHECK: suqadd v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vuqadd2.i = tail call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b) #4
+  ret <2 x i64> %vuqadd2.i
+}
+
+define <8 x i8> @test_vcls_s8(<8 x i8> %a) #0 {
+; CHECK: cls v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %vcls.i = tail call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a) #4
+  ret <8 x i8> %vcls.i
+}
+
+define <16 x i8> @test_vclsq_s8(<16 x i8> %a) #0 {
+; CHECK: cls v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %vcls.i = tail call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a) #4
+  ret <16 x i8> %vcls.i
+}
+
+define <4 x i16> @test_vcls_s16(<4 x i16> %a) #0 {
+; CHECK: cls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+  %vcls1.i = tail call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %a) #4
+  ret <4 x i16> %vcls1.i
+}
+
+define <8 x i16> @test_vclsq_s16(<8 x i16> %a) #0 {
+; CHECK: cls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+  %vcls1.i = tail call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %a) #4
+  ret <8 x i16> %vcls1.i
+}
+
+define <2 x i32> @test_vcls_s32(<2 x i32> %a) #0 {
+; CHECK: cls v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcls1.i = tail call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %a) #4
+  ret <2 x i32> %vcls1.i
+}
+
+define <4 x i32> @test_vclsq_s32(<4 x i32> %a) #0 {
+; CHECK: cls v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcls1.i = tail call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %a) #4
+  ret <4 x i32> %vcls1.i
+}
+
+define <8 x i8> @test_vclz_s8(<8 x i8> %a) #0 {
+; CHECK: clz v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %vclz.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #4
+  ret <8 x i8> %vclz.i
+}
+
+define <16 x i8> @test_vclzq_s8(<16 x i8> %a) #0 {
+; CHECK: clz v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %vclz.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #4
+  ret <16 x i8> %vclz.i
+}
+
+define <4 x i16> @test_vclz_s16(<4 x i16> %a) #0 {
+; CHECK: clz v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+  %vclz1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) #4
+  ret <4 x i16> %vclz1.i
+}
+
+define <8 x i16> @test_vclzq_s16(<8 x i16> %a) #0 {
+; CHECK: clz v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+  %vclz1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) #4
+  ret <8 x i16> %vclz1.i
+}
+
+define <2 x i32> @test_vclz_s32(<2 x i32> %a) #0 {
+; CHECK: clz v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vclz1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) #4
+  ret <2 x i32> %vclz1.i
+}
+
+define <4 x i32> @test_vclzq_s32(<4 x i32> %a) #0 {
+; CHECK: clz v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vclz1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) #4
+  ret <4 x i32> %vclz1.i
+}
+
+define <8 x i8> @test_vcnt_s8(<8 x i8> %a) #0 {
+; CHECK: cnt v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %vctpop.i = tail call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4
+  ret <8 x i8> %vctpop.i
+}
+
+define <16 x i8> @test_vcntq_s8(<16 x i8> %a) #0 {
+; CHECK: cnt v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %vctpop.i = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4
+  ret <16 x i8> %vctpop.i
+}
+
+define <8 x i8> @test_vmvn_s8(<8 x i8> %a) #0 {
+; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %neg.i = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  ret <8 x i8> %neg.i
+}
+
+define <16 x i8> @test_vmvnq_s8(<16 x i8> %a) #0 {
+; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %neg.i = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  ret <16 x i8> %neg.i
+}
+
+define <4 x i16> @test_vmvn_s16(<4 x i16> %a) #0 {
+; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %neg.i = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
+  ret <4 x i16> %neg.i
+}
+
+define <8 x i16> @test_vmvnq_s16(<8 x i16> %a) #0 {
+; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %neg.i = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  ret <8 x i16> %neg.i
+}
+
+define <2 x i32> @test_vmvn_s32(<2 x i32> %a) #0 {
+; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %neg.i = xor <2 x i32> %a, <i32 -1, i32 -1>
+  ret <2 x i32> %neg.i
+}
+
+define <4 x i32> @test_vmvnq_s32(<4 x i32> %a) #0 {
+; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %neg.i = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
+  ret <4 x i32> %neg.i
+}
+
+define <8 x i8> @test_vrbit_s8(<8 x i8> %a) #0 {
+; CHECK: rbit v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %vrbit.i = tail call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a) #4
+  ret <8 x i8> %vrbit.i
+}
+
+define <16 x i8> @test_vrbitq_s8(<16 x i8> %a) #0 {
+; CHECK: rbit v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %vrbit.i = tail call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a) #4
+  ret <16 x i8> %vrbit.i
+}
+
+define <8 x i8> @test_vmovn_s16(<8 x i16> %a) #0 {
+; CHECK: xtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
+  %vmovn.i = trunc <8 x i16> %a to <8 x i8>
+  ret <8 x i8> %vmovn.i
+}
+
+define <4 x i16> @test_vmovn_s32(<4 x i32> %a) #0 {
+; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
+  %vmovn.i = trunc <4 x i32> %a to <4 x i16>
+  ret <4 x i16> %vmovn.i
+}
+
+define <2 x i32> @test_vmovn_s64(<2 x i64> %a) #0 {
+; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
+  %vmovn.i = trunc <2 x i64> %a to <2 x i32>
+  ret <2 x i32> %vmovn.i
+}
+
+define <16 x i8> @test_vmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
+; CHECK: xtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
+  %vmovn.i.i = trunc <8 x i16> %b to <8 x i8>
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vmovn.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i8> %shuffle.i
+}
+
+define <8 x i16> @test_vmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
+; CHECK: xtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
+  %vmovn.i.i = trunc <4 x i32> %b to <4 x i16>
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vmovn.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %shuffle.i
+}
+
+define <4 x i32> @test_vmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
+; CHECK: xtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
+  %vmovn.i.i = trunc <2 x i64> %b to <2 x i32>
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vmovn.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i32> %shuffle.i
+}
+
+define <8 x i8> @test_vqmovun_s16(<8 x i16> %a) #0 {
+; CHECK: sqxtun v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
+  %vqdmull1.i = tail call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %a) #4
+  ret <8 x i8> %vqdmull1.i
+}
+
+define <4 x i16> @test_vqmovun_s32(<4 x i32> %a) #0 {
+; CHECK: sqxtun v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
+  %vqdmull1.i = tail call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %a) #4
+  ret <4 x i16> %vqdmull1.i
+}
+
+define <2 x i32> @test_vqmovun_s64(<2 x i64> %a) #0 {
+; CHECK: sqxtun v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
+  %vqdmull1.i = tail call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %a) #4
+  ret <2 x i32> %vqdmull1.i
+}
+
+define <16 x i8> @test_vqmovun_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
+; CHECK: sqxtun2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
+  %vqdmull1.i.i = tail call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %b) #4
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqdmull1.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i8> %shuffle.i
+}
+
+define <8 x i16> @test_vqmovun_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
+; CHECK: sqxtun2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
+  %vqdmull1.i.i = tail call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %b) #4
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqdmull1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %shuffle.i
+}
+
+define <4 x i32> @test_vqmovun_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
+; CHECK: sqxtun2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
+  %vqdmull1.i.i = tail call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %b) #4
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqdmull1.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i32> %shuffle.i
+}
+
+define <8 x i8> @test_vqmovn_s16(<8 x i16> %a) #0 {
+; CHECK: sqxtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
+  %vqmovn1.i = tail call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %a) #4
+  ret <8 x i8> %vqmovn1.i
+}
+
+define <4 x i16> @test_vqmovn_s32(<4 x i32> %a) #0 {
+; CHECK: sqxtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
+  %vqmovn1.i = tail call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %a) #4
+  ret <4 x i16> %vqmovn1.i
+}
+
+define <2 x i32> @test_vqmovn_s64(<2 x i64> %a) #0 {
+; CHECK: sqxtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
+  %vqmovn1.i = tail call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %a) #4
+  ret <2 x i32> %vqmovn1.i
+}
+
+define <16 x i8> @test_vqmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
+; CHECK: sqxtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
+  %vqmovn1.i.i = tail call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %b) #4
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqmovn1.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i8> %shuffle.i
+}
+
+define <8 x i16> @test_vqmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
+; CHECK: test_vqmovn_high_s32
+  %vqmovn1.i.i = tail call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %b) #4
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqmovn1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %shuffle.i
+}
+
+define <4 x i32> @test_vqmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
+; CHECK: test_vqmovn_high_s64
+  %vqmovn1.i.i = tail call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %b) #4
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqmovn1.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i32> %shuffle.i
+}
+
+define <8 x i8> @test_vqmovn_u16(<8 x i16> %a) #0 {
+; CHECK: uqxtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
+  %vqmovn1.i = tail call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %a) #4
+  ret <8 x i8> %vqmovn1.i
+}
+
+define <4 x i16> @test_vqmovn_u32(<4 x i32> %a) #0 {
+; CHECK: uqxtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
+  %vqmovn1.i = tail call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %a) #4
+  ret <4 x i16> %vqmovn1.i
+}
+
+define <2 x i32> @test_vqmovn_u64(<2 x i64> %a) #0 {
+; CHECK: uqxtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
+  %vqmovn1.i = tail call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %a) #4
+  ret <2 x i32> %vqmovn1.i
+}
+
+define <16 x i8> @test_vqmovn_high_u16(<8 x i8> %a, <8 x i16> %b) #0 {
+; CHECK: uqxtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
+  %vqmovn1.i.i = tail call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %b) #4
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqmovn1.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i8> %shuffle.i
+}
+
+define <8 x i16> @test_vqmovn_high_u32(<4 x i16> %a, <4 x i32> %b) #0 {
+; CHECK: uqxtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
+  %vqmovn1.i.i = tail call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %b) #4
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqmovn1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %shuffle.i
+}
+
+define <4 x i32> @test_vqmovn_high_u64(<2 x i32> %a, <2 x i64> %b) #0 {
+; CHECK: uqxtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
+  %vqmovn1.i.i = tail call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %b) #4
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqmovn1.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i32> %shuffle.i
+}
+
+define <8 x i16> @test_vshll_n_s8(<8 x i8> %a) #0 {
+; CHECK: shll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #8
+  %1 = sext <8 x i8> %a to <8 x i16>
+  %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  ret <8 x i16> %vshll_n
+}
+
+define <4 x i32> @test_vshll_n_s16(<4 x i16> %a) #0 {
+; CHECK: shll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #16
+  %1 = sext <4 x i16> %a to <4 x i32>
+  %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
+  ret <4 x i32> %vshll_n
+}
+
+define <2 x i64> @test_vshll_n_s32(<2 x i32> %a) #0 {
+; CHECK: shll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #32
+  %1 = sext <2 x i32> %a to <2 x i64>
+  %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
+  ret <2 x i64> %vshll_n
+}
+
+define <8 x i16> @test_vshll_n_u8(<8 x i8> %a) #0 {
+; CHECK: shll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #8
+  %1 = zext <8 x i8> %a to <8 x i16>
+  %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  ret <8 x i16> %vshll_n
+}
+
+define <4 x i32> @test_vshll_n_u16(<4 x i16> %a) #0 {
+; CHECK: shll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #16
+  %1 = zext <4 x i16> %a to <4 x i32>
+  %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
+  ret <4 x i32> %vshll_n
+}
+
+define <2 x i64> @test_vshll_n_u32(<2 x i32> %a) #0 {
+; CHECK: shll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #32
+  %1 = zext <2 x i32> %a to <2 x i64>
+  %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
+  ret <2 x i64> %vshll_n
+}
+
+define <8 x i16> @test_vshll_high_n_s8(<16 x i8> %a) #0 {
+; CHECK: shll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #8
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %1 = sext <8 x i8> %shuffle.i to <8 x i16>
+  %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  ret <8 x i16> %vshll_n
+}
+
+define <4 x i32> @test_vshll_high_n_s16(<8 x i16> %a) #0 {
+; CHECK: shll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #16
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %1 = sext <4 x i16> %shuffle.i to <4 x i32>
+  %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
+  ret <4 x i32> %vshll_n
+}
+
+define <2 x i64> @test_vshll_high_n_s32(<4 x i32> %a) #0 {
+; CHECK: shll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #32
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %1 = sext <2 x i32> %shuffle.i to <2 x i64>
+  %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
+  ret <2 x i64> %vshll_n
+}
+
+define <8 x i16> @test_vshll_high_n_u8(<16 x i8> %a) #0 {
+; CHECK: shll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #8
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %1 = zext <8 x i8> %shuffle.i to <8 x i16>
+  %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  ret <8 x i16> %vshll_n
+}
+
+define <4 x i32> @test_vshll_high_n_u16(<8 x i16> %a) #0 {
+; CHECK: shll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #16
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %1 = zext <4 x i16> %shuffle.i to <4 x i32>
+  %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
+  ret <4 x i32> %vshll_n
+}
+
+define <2 x i64> @test_vshll_high_n_u32(<4 x i32> %a) #0 {
+; CHECK: shll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #32
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %1 = zext <2 x i32> %shuffle.i to <2 x i64>
+  %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
+  ret <2 x i64> %vshll_n
+}
+
+define <4 x i16> @test_vcvt_f16_f32(<4 x float> %a) #0 {
+; CHECK: fcvtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
+  %vcvt1.i = tail call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %a) #4
+  ret <4 x i16> %vcvt1.i
+}
+
+define <8 x i16> @test_vcvt_high_f16_f32(<4 x i16> %a, <4 x float> %b) #0 {
+; CHECK: fcvtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
+  %vcvt1.i.i = tail call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %b) #4
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vcvt1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %shuffle.i
+}
+
+define <4 x float> @test_vcvt_f32_f16(<4 x i16> %a) #0 {
+; CHECK: fcvtl v{{[0-9]+}}.4s, v{{[0-9]+}}.4h
+  %vcvt1.i = tail call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %a) #4
+  ret <4 x float> %vcvt1.i
+}
+
+define <4 x float> @test_vcvt_high_f32_f16(<8 x i16> %a) #0 {
+; CHECK: fcvtl2 v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vcvt1.i.i = tail call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %shuffle.i.i) #4
+  ret <4 x float> %vcvt1.i.i
+}
+
+define <2 x float> @test_vcvt_f32_f64(<2 x double> %a) #0 {
+; CHECK: fcvtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
+  %vcvt.i = fptrunc <2 x double> %a to <2 x float>
+  ret <2 x float> %vcvt.i
+}
+
+define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 {
+; CHECK: fcvtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
+  %vcvt.i.i = fptrunc <2 x double> %b to <2 x float>
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvt.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %shuffle.i
+}
+
+define <2 x float> @test_vcvtx_f32_f64(<2 x double> %a) #0 {
+; CHECK: fcvtxn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
+  %vcvtx_f32_f641.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %a) #4
+  ret <2 x float> %vcvtx_f32_f641.i
+}
+
+define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 {
+; CHECK: fcvtxn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
+  %vcvtx_f32_f641.i.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %b) #4
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvtx_f32_f641.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vcvt_f64_f32(<2 x float> %a) #0 {
+; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s
+  %vcvt.i = fpext <2 x float> %a to <2 x double>
+  ret <2 x double> %vcvt.i
+}
+
+define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %a) #0 {
+; CHECK: fcvtl2 v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
+  %shuffle.i.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+  %vcvt.i.i = fpext <2 x float> %shuffle.i.i to <2 x double>
+  ret <2 x double> %vcvt.i.i
+}
+
+define <2 x float> @test_vrndn_f32(<2 x float> %a) #0 {
+; CHECK: frintn v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vrndn1.i = tail call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %a) #4
+  ret <2 x float> %vrndn1.i
+}
+
+define <4 x float> @test_vrndnq_f32(<4 x float> %a) #0 {
+; CHECK: frintn v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vrndn1.i = tail call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %a) #4
+  ret <4 x float> %vrndn1.i
+}
+
+define <2 x double> @test_vrndnq_f64(<2 x double> %a) #0 {
+; CHECK: frintn v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vrndn1.i = tail call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %a) #4
+  ret <2 x double> %vrndn1.i
+}
+
+define <2 x float> @test_vrnda_f32(<2 x float> %a) #0 {
+; CHECK: frinta v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vrnda1.i = tail call <2 x float> @llvm.round.v2f32(<2 x float> %a) #4
+  ret <2 x float> %vrnda1.i
+}
+
+define <4 x float> @test_vrndaq_f32(<4 x float> %a) #0 {
+; CHECK: frinta v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+   %vrnda1.i = tail call <4 x float> @llvm.round.v4f32(<4 x float> %a) #4
+  ret <4 x float> %vrnda1.i
+}
+
+define <2 x double> @test_vrndaq_f64(<2 x double> %a) #0 {
+; CHECK: frinta v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vrnda1.i = tail call <2 x double> @llvm.round.v2f64(<2 x double> %a) #4
+  ret <2 x double> %vrnda1.i
+}
+
+define <2 x float> @test_vrndp_f32(<2 x float> %a) #0 {
+; CHECK: frintp v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vrndp1.i = tail call <2 x float> @llvm.ceil.v2f32(<2 x float> %a) #4
+  ret <2 x float> %vrndp1.i
+}
+
+define <4 x float> @test_vrndpq_f32(<4 x float> %a) #0 {
+; CHECK: frintp v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+ %vrndp1.i = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) #4
+  ret <4 x float> %vrndp1.i
+}
+
+define <2 x double> @test_vrndpq_f64(<2 x double> %a) #0 {
+; CHECK: frintp v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vrndp1.i = tail call <2 x double> @llvm.ceil.v2f64(<2 x double> %a) #4
+  ret <2 x double> %vrndp1.i
+}
+
+define <2 x float> @test_vrndm_f32(<2 x float> %a) #0 {
+; CHECK: frintm v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vrndm1.i = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %a) #4
+  ret <2 x float> %vrndm1.i
+}
+
+define <4 x float> @test_vrndmq_f32(<4 x float> %a) #0 {
+; CHECK: frintm v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vrndm1.i = tail call <4 x float> @llvm.floor.v4f32(<4 x float> %a) #4
+  ret <4 x float> %vrndm1.i
+}
+
+define <2 x double> @test_vrndmq_f64(<2 x double> %a) #0 {
+; CHECK: frintm v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+   %vrndm1.i = tail call <2 x double> @llvm.floor.v2f64(<2 x double> %a) #4
+  ret <2 x double> %vrndm1.i
+}
+
+define <2 x float> @test_vrndx_f32(<2 x float> %a) #0 {
+; CHECK: frintx v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vrndx1.i = tail call <2 x float> @llvm.rint.v2f32(<2 x float> %a) #4
+  ret <2 x float> %vrndx1.i
+}
+
+define <4 x float> @test_vrndxq_f32(<4 x float> %a) #0 {
+; CHECK: frintx v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vrndx1.i = tail call <4 x float> @llvm.rint.v4f32(<4 x float> %a) #4
+  ret <4 x float> %vrndx1.i
+}
+
+define <2 x double> @test_vrndxq_f64(<2 x double> %a) #0 {
+; CHECK: frintx v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vrndx1.i = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %a) #4
+  ret <2 x double> %vrndx1.i
+}
+
+define <2 x float> @test_vrnd_f32(<2 x float> %a) #0 {
+; CHECK: frintz v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+   %vrnd1.i = tail call <2 x float> @llvm.trunc.v2f32(<2 x float> %a) #4
+  ret <2 x float> %vrnd1.i
+}
+
+define <4 x float> @test_vrndq_f32(<4 x float> %a) #0 {
+; CHECK: frintz v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vrnd1.i = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> %a) #4
+  ret <4 x float> %vrnd1.i
+}
+
+define <2 x double> @test_vrndq_f64(<2 x double> %a) #0 {
+; CHECK: frintz v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vrnd1.i = tail call <2 x double> @llvm.trunc.v2f64(<2 x double> %a) #4
+  ret <2 x double> %vrnd1.i
+}
+
+define <2 x float> @test_vrndi_f32(<2 x float> %a) #0 {
+; CHECK: frinti v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vrndi1.i = tail call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %a) #4
+  ret <2 x float> %vrndi1.i
+}
+
+define <4 x float> @test_vrndiq_f32(<4 x float> %a) #0 {
+; CHECK: frinti v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vrndi1.i = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a) #4
+  ret <4 x float> %vrndi1.i
+}
+
+define <2 x double> @test_vrndiq_f64(<2 x double> %a) #0 {
+; CHECK: frinti v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vrndi1.i = tail call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a) #4
+  ret <2 x double> %vrndi1.i
+}
+
+define <2 x i32> @test_vcvt_s32_f32(<2 x float> %a) #0 {
+; CHECK: fcvtzs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvt.i = fptosi <2 x float> %a to <2 x i32>
+  ret <2 x i32> %vcvt.i
+}
+
+define <4 x i32> @test_vcvtq_s32_f32(<4 x float> %a) #0 {
+; CHECK: fcvtzs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvt.i = fptosi <4 x float> %a to <4 x i32>
+  ret <4 x i32> %vcvt.i
+}
+
+define <2 x i64> @test_vcvtq_s64_f64(<2 x double> %a) #0 {
+; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvt.i = fptosi <2 x double> %a to <2 x i64>
+  ret <2 x i64> %vcvt.i
+}
+
+define <2 x i32> @test_vcvt_u32_f32(<2 x float> %a) #0 {
+; CHECK: fcvtzu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvt.i = fptoui <2 x float> %a to <2 x i32>
+  ret <2 x i32> %vcvt.i
+}
+
+define <4 x i32> @test_vcvtq_u32_f32(<4 x float> %a) #0 {
+; CHECK: fcvtzu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvt.i = fptoui <4 x float> %a to <4 x i32>
+  ret <4 x i32> %vcvt.i
+}
+
+define <2 x i64> @test_vcvtq_u64_f64(<2 x double> %a) #0 {
+; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvt.i = fptoui <2 x double> %a to <2 x i64>
+  ret <2 x i64> %vcvt.i
+}
+
+define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) #0 {
+; CHECK: fcvtns v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvtns_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float> %a) #4
+  ret <2 x i32> %vcvtns_f321.i
+}
+
+define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) #0 {
+; CHECK: fcvtns v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvtns_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float> %a) #4
+  ret <4 x i32> %vcvtns_f321.i
+}
+
+define <2 x i64> @test_vcvtnq_s64_f64(<2 x double> %a) #0 {
+; CHECK: fcvtns v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvtns_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double> %a) #4
+  ret <2 x i64> %vcvtns_f641.i
+}
+
+define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) #0 {
+; CHECK: fcvtnu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvtnu_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float> %a) #4
+  ret <2 x i32> %vcvtnu_f321.i
+}
+
+define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) #0 {
+; CHECK: fcvtnu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvtnu_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float> %a) #4
+  ret <4 x i32> %vcvtnu_f321.i
+}
+
+define <2 x i64> @test_vcvtnq_u64_f64(<2 x double> %a) #0 {
+; CHECK: fcvtnu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvtnu_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double> %a) #4
+  ret <2 x i64> %vcvtnu_f641.i
+}
+
+define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) #0 {
+; CHECK: fcvtps v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvtps_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float> %a) #4
+  ret <2 x i32> %vcvtps_f321.i
+}
+
+define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) #0 {
+; CHECK: fcvtps v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvtps_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float> %a) #4
+  ret <4 x i32> %vcvtps_f321.i
+}
+
+define <2 x i64> @test_vcvtpq_s64_f64(<2 x double> %a) #0 {
+; CHECK: fcvtps v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvtps_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double> %a) #4
+  ret <2 x i64> %vcvtps_f641.i
+}
+
+define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) #0 {
+; CHECK: fcvtpu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvtpu_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float> %a) #4
+  ret <2 x i32> %vcvtpu_f321.i
+}
+
+define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) #0 {
+; CHECK: fcvtpu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvtpu_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float> %a) #4
+  ret <4 x i32> %vcvtpu_f321.i
+}
+
+define <2 x i64> @test_vcvtpq_u64_f64(<2 x double> %a) #0 {
+; CHECK: fcvtpu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvtpu_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double> %a) #4
+  ret <2 x i64> %vcvtpu_f641.i
+}
+
+define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) #0 {
+; CHECK: fcvtms v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvtms_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float> %a) #4
+  ret <2 x i32> %vcvtms_f321.i
+}
+
+define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) #0 {
+; CHECK: fcvtms v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvtms_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float> %a) #4
+  ret <4 x i32> %vcvtms_f321.i
+}
+
+define <2 x i64> @test_vcvtmq_s64_f64(<2 x double> %a) #0 {
+; CHECK: fcvtms v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvtms_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double> %a) #4
+  ret <2 x i64> %vcvtms_f641.i
+}
+
+define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) #0 {
+; CHECK: fcvtmu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvtmu_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float> %a) #4
+  ret <2 x i32> %vcvtmu_f321.i
+}
+
+define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) #0 {
+; CHECK: fcvtmu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvtmu_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float> %a) #4
+  ret <4 x i32> %vcvtmu_f321.i
+}
+
+define <2 x i64> @test_vcvtmq_u64_f64(<2 x double> %a) #0 {
+; CHECK: fcvtmu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvtmu_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double> %a) #4
+  ret <2 x i64> %vcvtmu_f641.i
+}
+
+define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) #0 {
+; CHECK: fcvtas v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvtas_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float> %a) #4
+  ret <2 x i32> %vcvtas_f321.i
+}
+
+define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) #0 {
+; CHECK: fcvtas v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvtas_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float> %a) #4
+  ret <4 x i32> %vcvtas_f321.i
+}
+
+define <2 x i64> @test_vcvtaq_s64_f64(<2 x double> %a) #0 {
+; CHECK: fcvtas v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvtas_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double> %a) #4
+  ret <2 x i64> %vcvtas_f641.i
+}
+
+define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) #0 {
+; CHECK: fcvtau v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvtau_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float> %a) #4
+  ret <2 x i32> %vcvtau_f321.i
+}
+
+define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) #0 {
+; CHECK: fcvtau v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvtau_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float> %a) #4
+  ret <4 x i32> %vcvtau_f321.i
+}
+
+define <2 x i64> @test_vcvtaq_u64_f64(<2 x double> %a) #0 {
+; CHECK: fcvtau v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvtau_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double> %a) #4
+  ret <2 x i64> %vcvtau_f641.i
+}
+
+define <2 x float> @test_vrsqrte_f32(<2 x float> %a) #0 {
+; CHECK: frsqrte v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vrsqrte1.i = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %a) #4
+  ret <2 x float> %vrsqrte1.i
+}
+
+define <4 x float> @test_vrsqrteq_f32(<4 x float> %a) #0 {
+; CHECK: frsqrte v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vrsqrte1.i = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %a) #4
+  ret <4 x float> %vrsqrte1.i
+}
+
+define <2 x double> @test_vrsqrteq_f64(<2 x double> %a) #0 {
+; CHECK: frsqrte v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vrsqrte1.i = tail call <2 x double> @llvm.arm.neon.vrsqrte.v2f64(<2 x double> %a) #4
+  ret <2 x double> %vrsqrte1.i
+}
+
+define <2 x float> @test_vrecpe_f32(<2 x float> %a) #0 {
+; CHECK: frecpe v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vrecpe1.i = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %a) #4
+  ret <2 x float> %vrecpe1.i
+}
+
+define <4 x float> @test_vrecpeq_f32(<4 x float> %a) #0 {
+; CHECK: frecpe v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vrecpe1.i = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %a) #4
+  ret <4 x float> %vrecpe1.i
+}
+
+define <2 x double> @test_vrecpeq_f64(<2 x double> %a) #0 {
+; CHECK: frecpe v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vrecpe1.i = tail call <2 x double> @llvm.arm.neon.vrecpe.v2f64(<2 x double> %a) #4
+  ret <2 x double> %vrecpe1.i
+}
+
+define <2 x i32> @test_vrecpe_u32(<2 x i32> %a) #0 {
+; CHECK: urecpe v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vrecpe1.i = tail call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %a) #4
+  ret <2 x i32> %vrecpe1.i
+}
+
+define <4 x i32> @test_vrecpeq_u32(<4 x i32> %a) #0 {
+; CHECK: urecpe v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vrecpe1.i = tail call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %a) #4
+  ret <4 x i32> %vrecpe1.i
+}
+
+define <2 x float> @test_vsqrt_f32(<2 x float> %a) #0 {
+; CHECK: fsqrt v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vsqrt1.i = tail call <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #4
+  ret <2 x float> %vsqrt1.i
+}
+
+define <4 x float> @test_vsqrtq_f32(<4 x float> %a) #0 {
+; CHECK: fsqrt v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vsqrt1.i = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #4
+  ret <4 x float> %vsqrt1.i
+}
+
+define <2 x double> @test_vsqrtq_f64(<2 x double> %a) #0 {
+; CHECK: fsqrt v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vsqrt1.i = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #4
+  ret <2 x double> %vsqrt1.i
+}
+
+define <2 x float> @test_vcvt_f32_s32(<2 x i32> %a) #0 {
+; CHECK: scvtf v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvt.i = sitofp <2 x i32> %a to <2 x float>
+  ret <2 x float> %vcvt.i
+}
+
+define <2 x float> @test_vcvt_f32_u32(<2 x i32> %a) #0 {
+; CHECK: ucvtf v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvt.i = uitofp <2 x i32> %a to <2 x float>
+  ret <2 x float> %vcvt.i
+}
+
+define <4 x float> @test_vcvtq_f32_s32(<4 x i32> %a) #0 {
+; CHECK: scvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvt.i = sitofp <4 x i32> %a to <4 x float>
+  ret <4 x float> %vcvt.i
+}
+
+define <4 x float> @test_vcvtq_f32_u32(<4 x i32> %a) #0 {
+; CHECK: ucvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvt.i = uitofp <4 x i32> %a to <4 x float>
+  ret <4 x float> %vcvt.i
+}
+
+define <2 x double> @test_vcvtq_f64_s64(<2 x i64> %a) #0 {
+; CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvt.i = sitofp <2 x i64> %a to <2 x double>
+  ret <2 x double> %vcvt.i
+}
+
+define <2 x double> @test_vcvtq_f64_u64(<2 x i64> %a) #0 {
+; CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvt.i = uitofp <2 x i64> %a to <2 x double>
+  ret <2 x double> %vcvt.i
+}
+
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #2
+
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #2
+
+declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #2
+
+declare <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32>) #2
+
+declare <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32>) #2
+
+declare <2 x double> @llvm.arm.neon.vrecpe.v2f64(<2 x double>) #2
+
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) #2
+
+declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) #2
+
+declare <2 x double> @llvm.arm.neon.vrsqrte.v2f64(<2 x double>) #2
+
+declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) #2
+
+declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) #2
+
+declare <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double>) #2
+
+declare <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float>) #2
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float>) #2
+
+declare <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double>) #2
+
+declare <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float>) #2
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float>) #2
+
+declare <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double>) #2
+
+declare <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float>) #2
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float>) #2
+
+declare <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double>) #2
+
+declare <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float>) #2
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float>) #2
+
+declare <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double>) #2
+
+declare <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float>) #2
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float>) #2
+
+declare <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double>) #2
+
+declare <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float>) #2
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float>) #2
+
+declare <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double>) #2
+
+declare <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float>) #2
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float>) #2
+
+declare <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double>) #2
+
+declare <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float>) #2
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float>) #2
+
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #3
+
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) #3
+
+declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) #3
+
+declare <2 x double> @llvm.trunc.v2f64(<2 x double>) #3
+
+declare <4 x float> @llvm.trunc.v4f32(<4 x float>) #3
+
+declare <2 x float> @llvm.trunc.v2f32(<2 x float>) #3
+
+declare <2 x double> @llvm.rint.v2f64(<2 x double>) #3
+
+declare <4 x float> @llvm.rint.v4f32(<4 x float>) #3
+
+declare <2 x float> @llvm.rint.v2f32(<2 x float>) #3
+
+declare <2 x double> @llvm.floor.v2f64(<2 x double>) #3
+
+declare <4 x float> @llvm.floor.v4f32(<4 x float>) #3
+
+declare <2 x float> @llvm.floor.v2f32(<2 x float>) #3
+
+declare <2 x double> @llvm.ceil.v2f64(<2 x double>) #3
+
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>) #3
+
+declare <2 x float> @llvm.ceil.v2f32(<2 x float>) #3
+
+declare <2 x double> @llvm.round.v2f64(<2 x double>) #3
+
+declare <4 x float> @llvm.round.v4f32(<4 x float>) #3
+
+declare <2 x float> @llvm.round.v2f32(<2 x float>) #3
+
+declare <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double>) #2
+
+declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) #2
+
+declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) #2
+
+declare <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double>) #2
+
+declare <2 x float> @llvm.aarch64.neon.fcvtn.v2f32.v2f64(<2 x double>) #2
+
+declare <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64>) #2
+
+declare <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32>) #2
+
+declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) #2
+
+declare <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64>) #2
+
+declare <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32>) #2
+
+declare <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16>) #2
+
+declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) #2
+
+declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) #2
+
+declare <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) #2
+
+declare <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8>) #2
+
+declare <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8>) #2
+
+declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) #2
+
+declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>) #2
+
+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) #2
+
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) #2
+
+declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) #2
+
+declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) #2
+
+declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) #2
+
+declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) #2
+
+declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) #2
+
+declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) #2
+
+declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) #2
+
+declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) #2
+
+declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) #2
+
+declare <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8>) #2
+
+declare <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64>, <2 x i64>) #2
+
+declare <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32>, <4 x i32>) #2
+
+declare <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32>, <2 x i32>) #2
+
+declare <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16>, <8 x i16>) #2
+
+declare <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16>, <4 x i16>) #2
+
+declare <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8>, <16 x i8>) #2
+
+declare <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8>, <8 x i8>) #2
+
+declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #3
+
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #3
+
+declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #3
+
+declare <2 x i64> @llvm.arm.neon.vabs.v2i64(<2 x i64>) #2
+
+declare <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32>) #2
+
+declare <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32>) #2
+
+declare <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16>) #2
+
+declare <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16>) #2
+
+declare <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8>) #2
+
+declare <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8>) #2
+
+declare <2 x i64> @llvm.arm.neon.vqneg.v2i64(<2 x i64>) #2
+
+declare <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32>) #2
+
+declare <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32>) #2
+
+declare <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16>) #2
+
+declare <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16>) #2
+
+declare <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8>) #2
+
+declare <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8>) #2
+
+declare <2 x i64> @llvm.arm.neon.vqabs.v2i64(<2 x i64>) #2
+
+declare <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32>) #2
+
+declare <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32>) #2
+
+declare <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16>) #2
+
+declare <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16>) #2
+
+declare <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8>) #2
+
+declare <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8>) #2
+
+declare <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64>, <4 x i32>) #2
+
+declare <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32>, <8 x i16>) #2
+
+declare <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16>, <16 x i8>) #2
+
+declare <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64>, <4 x i32>) #2
+
+declare <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32>, <8 x i16>) #2
+
+declare <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16>, <16 x i8>) #2
+
+declare <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64>, <2 x i32>) #2
+
+declare <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32>, <4 x i16>) #2
+
+declare <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16>, <8 x i8>) #2
+
+declare <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64>, <2 x i32>) #2
+
+declare <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32>, <4 x i16>) #2
+
+declare <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16>, <8 x i8>) #2
+
+declare <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32>) #2
+
+declare <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16>) #2
+
+declare <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8>) #2
+
+declare <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32>) #2
+
+declare <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16>) #2
+
+declare <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8>) #2
+
+declare <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32>) #2
+
+declare <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16>) #2
+
+declare <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8>) #2
+
+declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) #2
+
+declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) #2
+
+declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) #2
+
+declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) #2
+
+declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) #2
+
+
+define <1 x i64> @test_vcvt_s64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvt_s64_f64
+; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fptosi <1 x double> %a to <1 x i64>
+  ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvt_u64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvt_u64_f64
+; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fptoui <1 x double> %a to <1 x i64>
+  ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvtn_s64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvtn_s64_f64
+; CHECK: fcvtns d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %a)
+  ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvtn_u64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvtn_u64_f64
+; CHECK: fcvtnu d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %a)
+  ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvtp_s64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvtp_s64_f64
+; CHECK: fcvtps d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %a)
+  ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvtp_u64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvtp_u64_f64
+; CHECK: fcvtpu d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %a)
+  ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvtm_s64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvtm_s64_f64
+; CHECK: fcvtms d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %a)
+  ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvtm_u64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvtm_u64_f64
+; CHECK: fcvtmu d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %a)
+  ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvta_s64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvta_s64_f64
+; CHECK: fcvtas d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %a)
+  ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvta_u64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvta_u64_f64
+; CHECK: fcvtau d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %a)
+  ret <1 x i64> %1
+}
+
+define <1 x double> @test_vcvt_f64_s64(<1 x i64> %a) {
+; CHECK-LABEL: test_vcvt_f64_s64
+; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = sitofp <1 x i64> %a to <1 x double>
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vcvt_f64_u64(<1 x i64> %a) {
+; CHECK-LABEL: test_vcvt_f64_u64
+; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = uitofp <1 x i64> %a to <1 x double>
+  ret <1 x double> %1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double>)
+
+define <1 x double> @test_vrndn_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrndn_f64
+; CHECK: frintn d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vrnda_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrnda_f64
+; CHECK: frinta d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.round.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vrndp_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrndp_f64
+; CHECK: frintp d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.ceil.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vrndm_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrndm_f64
+; CHECK: frintm d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.floor.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vrndx_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrndx_f64
+; CHECK: frintx d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.rint.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vrnd_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrnd_f64
+; CHECK: frintz d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.trunc.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vrndi_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrndi_f64
+; CHECK: frinti d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+declare <1 x double> @llvm.nearbyint.v1f64(<1 x double>)
+declare <1 x double> @llvm.trunc.v1f64(<1 x double>)
+declare <1 x double> @llvm.rint.v1f64(<1 x double>)
+declare <1 x double> @llvm.floor.v1f64(<1 x double>)
+declare <1 x double> @llvm.ceil.v1f64(<1 x double>)
+declare <1 x double> @llvm.round.v1f64(<1 x double>)
+declare <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double>)
+
+define <1 x double> @test_vrsqrte_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrsqrte_f64
+; CHECK: frsqrte d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vrecpe_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrecpe_f64
+; CHECK: frecpe d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vsqrt_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vsqrt_f64
+; CHECK: fsqrt d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vrecps_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vrecps_f64
+; CHECK: frecps d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vrsqrts_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vrsqrts_f64
+; CHECK: frsqrts d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+declare <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.sqrt.v1f64(<1 x double>)
+declare <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double>)
+declare <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double>)
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-mov.ll b/test/CodeGen/AArch64/neon-mov.ll
index 42f6a89..60b13b8 100644
--- a/test/CodeGen/AArch64/neon-mov.ll
+++ b/test/CodeGen/AArch64/neon-mov.ll
@@ -202,4 +202,16 @@ define <2 x double> @fmov2d() {
 	ret <2 x double> < double -1.2e1, double -1.2e1>
 }
 
+define <2 x i32> @movi1d_1() {
+; CHECK: movi    d0, #0xffffffff0000
+  ret <2 x i32> < i32  -65536, i32 65535>
+}
+
+
+declare <2 x i32> @test_movi1d(<2 x i32>, <2 x i32>)
+define <2 x i32> @movi1d() {
+; CHECK: movi     d1, #0xffffffff0000
+  %1 = tail call <2 x i32> @test_movi1d(<2 x i32> <i32 -2147483648, i32 2147450880>, <2 x i32> <i32 -65536, i32 65535>)
+  ret <2 x i32> %1
+}
 
diff --git a/test/CodeGen/AArch64/neon-perm.ll b/test/CodeGen/AArch64/neon-perm.ll
new file mode 100644
index 0000000..fa4d54d
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-perm.ll
@@ -0,0 +1,1693 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+%struct.int8x8x2_t = type { [2 x <8 x i8>] }
+%struct.int16x4x2_t = type { [2 x <4 x i16>] }
+%struct.int32x2x2_t = type { [2 x <2 x i32>] }
+%struct.uint8x8x2_t = type { [2 x <8 x i8>] }
+%struct.uint16x4x2_t = type { [2 x <4 x i16>] }
+%struct.uint32x2x2_t = type { [2 x <2 x i32>] }
+%struct.float32x2x2_t = type { [2 x <2 x float>] }
+%struct.poly8x8x2_t = type { [2 x <8 x i8>] }
+%struct.poly16x4x2_t = type { [2 x <4 x i16>] }
+%struct.int8x16x2_t = type { [2 x <16 x i8>] }
+%struct.int16x8x2_t = type { [2 x <8 x i16>] }
+%struct.int32x4x2_t = type { [2 x <4 x i32>] }
+%struct.uint8x16x2_t = type { [2 x <16 x i8>] }
+%struct.uint16x8x2_t = type { [2 x <8 x i16>] }
+%struct.uint32x4x2_t = type { [2 x <4 x i32>] }
+%struct.float32x4x2_t = type { [2 x <4 x float>] }
+%struct.poly8x16x2_t = type { [2 x <16 x i8>] }
+%struct.poly16x8x2_t = type { [2 x <8 x i16>] }
+
+define <8 x i8> @test_vuzp1_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp1_s8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp1q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp1q_s8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp1_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp1_s16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp1q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp1q_s16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vuzp1_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp1_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vuzp1q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzp1q_s32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vuzp1q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vuzp1q_s64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vuzp1_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp1_u8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp1q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp1q_u8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp1_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp1_u16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp1q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp1q_u16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vuzp1_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp1_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vuzp1q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzp1q_u32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vuzp1q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vuzp1q_u64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vuzp1_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vuzp1_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vuzp1q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vuzp1q_f32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vuzp1q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vuzp1q_f64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vuzp1_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp1_p8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp1q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp1q_p8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp1_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp1_p16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp1q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp1q_p16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  ret <8 x i16> %shuffle.i
+}
+
+define <8 x i8> @test_vuzp2_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp2_s8:
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp2q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp2q_s8:
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp2_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp2_s16:
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp2q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp2q_s16:
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vuzp2_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp2_s32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vuzp2q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzp2q_s32:
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vuzp2q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vuzp2q_s64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vuzp2_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp2_u8:
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp2q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp2q_u8:
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp2_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp2_u16:
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp2q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp2q_u16:
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vuzp2_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp2_u32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vuzp2q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzp2q_u32:
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vuzp2q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vuzp2q_u64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vuzp2_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vuzp2_f32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vuzp2q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vuzp2q_f32:
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vuzp2q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vuzp2q_f64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vuzp2_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp2_p8:
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp2q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp2q_p8:
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp2_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp2_p16:
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp2q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp2q_p16:
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <8 x i8> @test_vzip1_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip1_s8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip1q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip1q_s8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip1_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip1_s16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip1q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip1q_s16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vzip1_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip1_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vzip1q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzip1q_s32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vzip1q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vzip1q_s64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vzip1_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip1_u8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip1q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip1q_u8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip1_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip1_u16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip1q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip1q_u16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vzip1_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip1_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vzip1q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzip1q_u32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vzip1q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vzip1q_u64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vzip1_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vzip1_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vzip1q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vzip1q_f32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vzip1q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vzip1q_f64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vzip1_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip1_p8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip1q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip1q_p8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip1_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip1_p16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip1q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip1q_p16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x i16> %shuffle.i
+}
+
+define <8 x i8> @test_vzip2_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip2_s8:
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip2q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip2q_s8:
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip2_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip2_s16:
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip2q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip2q_s16:
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vzip2_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip2_s32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vzip2q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzip2q_s32:
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vzip2q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vzip2q_s64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vzip2_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip2_u8:
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip2q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip2q_u8:
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip2_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip2_u16:
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip2q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip2q_u16:
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vzip2_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip2_u32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vzip2q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzip2q_u32:
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vzip2q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vzip2q_u64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vzip2_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vzip2_f32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vzip2q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vzip2q_f32:
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vzip2q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vzip2q_f64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vzip2_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip2_p8:
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip2q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip2q_p8:
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip2_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip2_p16:
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip2q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip2q_p16:
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn1_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn1_s8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn1q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn1q_s8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn1_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn1_s16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn1q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn1q_s16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vtrn1_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn1_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vtrn1q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrn1q_s32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vtrn1q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vtrn1q_s64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn1_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn1_u8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn1q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn1q_u8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn1_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn1_u16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn1q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn1q_u16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vtrn1_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn1_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vtrn1q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrn1q_u32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vtrn1q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vtrn1q_u64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vtrn1_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vtrn1_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vtrn1q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vtrn1q_f32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vtrn1q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vtrn1q_f64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn1_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn1_p8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn1q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn1q_p8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn1_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn1_p16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn1q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn1q_p16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i16> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn2_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn2_s8:
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn2q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn2q_s8:
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn2_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn2_s16:
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn2q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn2q_s16:
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vtrn2_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn2_s32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vtrn2q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrn2q_s32:
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vtrn2q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vtrn2q_s64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn2_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn2_u8:
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn2q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn2q_u8:
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn2_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn2_u16:
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn2q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn2q_u16:
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vtrn2_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn2_u32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vtrn2q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrn2q_u32:
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vtrn2q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vtrn2q_u64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vtrn2_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vtrn2_f32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vtrn2q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vtrn2q_f32:
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vtrn2q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vtrn2q_f64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn2_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn2_p8:
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn2q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn2q_p8:
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn2_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn2_p16:
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn2q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn2q_p16:
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define %struct.int8x8x2_t @test_vuzp_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp_s8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vuzp.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %vuzp1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1
+  ret %struct.int8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x4x2_t @test_vuzp_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp_s16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vuzp.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %vuzp1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vuzp1.i, 0, 1
+  ret %struct.int16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x2x2_t @test_vuzp_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vuzp1.i, 0, 1
+  ret %struct.int32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x8x2_t @test_vuzp_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp_u8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vuzp.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %vuzp1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.uint8x8x2_t undef, <8 x i8> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1
+  ret %struct.uint8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x4x2_t @test_vuzp_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp_u16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vuzp.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %vuzp1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.uint16x4x2_t undef, <4 x i16> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint16x4x2_t %.fca.0.0.insert, <4 x i16> %vuzp1.i, 0, 1
+  ret %struct.uint16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x2x2_t @test_vuzp_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.uint32x2x2_t undef, <2 x i32> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint32x2x2_t %.fca.0.0.insert, <2 x i32> %vuzp1.i, 0, 1
+  ret %struct.uint32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x2x2_t @test_vuzp_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vuzp_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vuzp.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+  %vuzp1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vuzp1.i, 0, 1
+  ret %struct.float32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x8x2_t @test_vuzp_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp_p8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vuzp.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %vuzp1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.poly8x8x2_t undef, <8 x i8> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1
+  ret %struct.poly8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x4x2_t @test_vuzp_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp_p16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vuzp.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %vuzp1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.poly16x4x2_t undef, <4 x i16> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly16x4x2_t %.fca.0.0.insert, <4 x i16> %vuzp1.i, 0, 1
+  ret %struct.poly16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x16x2_t @test_vuzpq_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzpq_s8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vuzp.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  %vuzp1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vuzp1.i, 0, 1
+  ret %struct.int8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x8x2_t @test_vuzpq_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzpq_s16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vuzp.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %vuzp1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vuzp1.i, 0, 1
+  ret %struct.int16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x4x2_t @test_vuzpq_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzpq_s32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vuzp.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %vuzp1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vuzp1.i, 0, 1
+  ret %struct.int32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x16x2_t @test_vuzpq_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzpq_u8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vuzp.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  %vuzp1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vuzp1.i, 0, 1
+  ret %struct.uint8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x8x2_t @test_vuzpq_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzpq_u16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vuzp.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %vuzp1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.uint16x8x2_t undef, <8 x i16> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint16x8x2_t %.fca.0.0.insert, <8 x i16> %vuzp1.i, 0, 1
+  ret %struct.uint16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x4x2_t @test_vuzpq_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzpq_u32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vuzp.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %vuzp1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.uint32x4x2_t undef, <4 x i32> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint32x4x2_t %.fca.0.0.insert, <4 x i32> %vuzp1.i, 0, 1
+  ret %struct.uint32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x4x2_t @test_vuzpq_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vuzpq_f32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vuzp.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %vuzp1.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vuzp1.i, 0, 1
+  ret %struct.float32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x16x2_t @test_vuzpq_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzpq_p8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vuzp.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  %vuzp1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vuzp1.i, 0, 1
+  ret %struct.poly8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x8x2_t @test_vuzpq_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzpq_p16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vuzp.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %vuzp1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.poly16x8x2_t undef, <8 x i16> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly16x8x2_t %.fca.0.0.insert, <8 x i16> %vuzp1.i, 0, 1
+  ret %struct.poly16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x8x2_t @test_vzip_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip_s8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vzip.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  %vzip1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vzip1.i, 0, 1
+  ret %struct.int8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x4x2_t @test_vzip_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip_s16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vzip.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  %vzip1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vzip1.i, 0, 1
+  ret %struct.int16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x2x2_t @test_vzip_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vzip1.i, 0, 1
+  ret %struct.int32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x8x2_t @test_vzip_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip_u8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vzip.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  %vzip1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.uint8x8x2_t undef, <8 x i8> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vzip1.i, 0, 1
+  ret %struct.uint8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x4x2_t @test_vzip_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip_u16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vzip.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  %vzip1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.uint16x4x2_t undef, <4 x i16> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint16x4x2_t %.fca.0.0.insert, <4 x i16> %vzip1.i, 0, 1
+  ret %struct.uint16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x2x2_t @test_vzip_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.uint32x2x2_t undef, <2 x i32> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint32x2x2_t %.fca.0.0.insert, <2 x i32> %vzip1.i, 0, 1
+  ret %struct.uint32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x2x2_t @test_vzip_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vzip_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vzip.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+  %vzip1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vzip1.i, 0, 1
+  ret %struct.float32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x8x2_t @test_vzip_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip_p8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vzip.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  %vzip1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.poly8x8x2_t undef, <8 x i8> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly8x8x2_t %.fca.0.0.insert, <8 x i8> %vzip1.i, 0, 1
+  ret %struct.poly8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x4x2_t @test_vzip_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip_p16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vzip.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  %vzip1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.poly16x4x2_t undef, <4 x i16> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly16x4x2_t %.fca.0.0.insert, <4 x i16> %vzip1.i, 0, 1
+  ret %struct.poly16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x16x2_t @test_vzipq_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzipq_s8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vzip.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  %vzip1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vzip1.i, 0, 1
+  ret %struct.int8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x8x2_t @test_vzipq_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzipq_s16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vzip.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  %vzip1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vzip1.i, 0, 1
+  ret %struct.int16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x4x2_t @test_vzipq_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzipq_s32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vzip.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  %vzip1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vzip1.i, 0, 1
+  ret %struct.int32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x16x2_t @test_vzipq_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzipq_u8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vzip.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  %vzip1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vzip1.i, 0, 1
+  ret %struct.uint8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x8x2_t @test_vzipq_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzipq_u16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vzip.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  %vzip1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.uint16x8x2_t undef, <8 x i16> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint16x8x2_t %.fca.0.0.insert, <8 x i16> %vzip1.i, 0, 1
+  ret %struct.uint16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x4x2_t @test_vzipq_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzipq_u32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vzip.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  %vzip1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.uint32x4x2_t undef, <4 x i32> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint32x4x2_t %.fca.0.0.insert, <4 x i32> %vzip1.i, 0, 1
+  ret %struct.uint32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x4x2_t @test_vzipq_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vzipq_f32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vzip.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  %vzip1.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vzip1.i, 0, 1
+  ret %struct.float32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x16x2_t @test_vzipq_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzipq_p8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vzip.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  %vzip1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vzip1.i, 0, 1
+  ret %struct.poly8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x8x2_t @test_vzipq_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzipq_p16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vzip.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  %vzip1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.poly16x8x2_t undef, <8 x i16> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly16x8x2_t %.fca.0.0.insert, <8 x i16> %vzip1.i, 0, 1
+  ret %struct.poly16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x8x2_t @test_vtrn_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn_s8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vtrn.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %vtrn1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vtrn1.i, 0, 1
+  ret %struct.int8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x4x2_t @test_vtrn_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn_s16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vtrn.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %vtrn1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vtrn1.i, 0, 1
+  ret %struct.int16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x2x2_t @test_vtrn_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vtrn1.i, 0, 1
+  ret %struct.int32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x8x2_t @test_vtrn_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn_u8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vtrn.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %vtrn1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.uint8x8x2_t undef, <8 x i8> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vtrn1.i, 0, 1
+  ret %struct.uint8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x4x2_t @test_vtrn_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn_u16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vtrn.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %vtrn1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.uint16x4x2_t undef, <4 x i16> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint16x4x2_t %.fca.0.0.insert, <4 x i16> %vtrn1.i, 0, 1
+  ret %struct.uint16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x2x2_t @test_vtrn_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.uint32x2x2_t undef, <2 x i32> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint32x2x2_t %.fca.0.0.insert, <2 x i32> %vtrn1.i, 0, 1
+  ret %struct.uint32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x2x2_t @test_vtrn_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vtrn_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vtrn.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+  %vtrn1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vtrn1.i, 0, 1
+  ret %struct.float32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x8x2_t @test_vtrn_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn_p8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vtrn.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %vtrn1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.poly8x8x2_t undef, <8 x i8> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly8x8x2_t %.fca.0.0.insert, <8 x i8> %vtrn1.i, 0, 1
+  ret %struct.poly8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x4x2_t @test_vtrn_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn_p16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vtrn.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %vtrn1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.poly16x4x2_t undef, <4 x i16> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly16x4x2_t %.fca.0.0.insert, <4 x i16> %vtrn1.i, 0, 1
+  ret %struct.poly16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x16x2_t @test_vtrnq_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrnq_s8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vtrn.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  %vtrn1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vtrn1.i, 0, 1
+  ret %struct.int8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x8x2_t @test_vtrnq_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrnq_s16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vtrn.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %vtrn1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vtrn1.i, 0, 1
+  ret %struct.int16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x4x2_t @test_vtrnq_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrnq_s32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vtrn.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %vtrn1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vtrn1.i, 0, 1
+  ret %struct.int32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x16x2_t @test_vtrnq_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrnq_u8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vtrn.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  %vtrn1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vtrn1.i, 0, 1
+  ret %struct.uint8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x8x2_t @test_vtrnq_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrnq_u16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vtrn.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %vtrn1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.uint16x8x2_t undef, <8 x i16> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint16x8x2_t %.fca.0.0.insert, <8 x i16> %vtrn1.i, 0, 1
+  ret %struct.uint16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x4x2_t @test_vtrnq_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrnq_u32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vtrn.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %vtrn1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.uint32x4x2_t undef, <4 x i32> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint32x4x2_t %.fca.0.0.insert, <4 x i32> %vtrn1.i, 0, 1
+  ret %struct.uint32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x4x2_t @test_vtrnq_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vtrnq_f32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vtrn.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %vtrn1.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vtrn1.i, 0, 1
+  ret %struct.float32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x16x2_t @test_vtrnq_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrnq_p8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vtrn.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  %vtrn1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vtrn1.i, 0, 1
+  ret %struct.poly8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x8x2_t @test_vtrnq_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrnq_p16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vtrn.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %vtrn1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.poly16x8x2_t undef, <8 x i16> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly16x8x2_t %.fca.0.0.insert, <8 x i16> %vtrn1.i, 0, 1
+  ret %struct.poly16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x8x2_t @test_uzp(<16 x i8> %y) {
+; CHECK: test_uzp:
+
+  %vuzp.i = shufflevector <16 x i8> %y, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %vuzp1.i = shufflevector <16 x i8> %y, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.uint8x8x2_t undef, <8 x i8> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1
+  ret %struct.uint8x8x2_t %.fca.0.1.insert
+
+; CHECK: dup	{{d[0-9]+}}, {{v[0-9]+}}.d[1]
+; CHECK-NEXT: uzp1	{{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-NEXT: uzp2	{{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
diff --git a/test/CodeGen/AArch64/neon-rounding-shift.ll b/test/CodeGen/AArch64/neon-rounding-shift.ll
index 404e491..5b4ec28 100644
--- a/test/CodeGen/AArch64/neon-rounding-shift.ll
+++ b/test/CodeGen/AArch64/neon-rounding-shift.ll
@@ -102,23 +102,6 @@ define <4 x i32> @test_srshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
   ret <4 x i32> %tmp1
 }
 
-declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_urshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_urshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: urshl d0, d0, d1
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_srshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_srshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: srshl d0, d0, d1
-  ret <1 x i64> %tmp1
-}
-
 declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>)
 declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>)
 
diff --git a/test/CodeGen/AArch64/neon-saturating-add-sub.ll b/test/CodeGen/AArch64/neon-saturating-add-sub.ll
index b2fac1f..fc60d90 100644
--- a/test/CodeGen/AArch64/neon-saturating-add-sub.ll
+++ b/test/CodeGen/AArch64/neon-saturating-add-sub.ll
@@ -102,22 +102,7 @@ define <4 x i32> @test_sqadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
   ret <4 x i32> %tmp1
 }
 
-declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqadd_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: uqadd d0, d0, d1
-  ret <1 x i64> %tmp1
-}
 
-define <1 x i64> @test_sqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqadd_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: sqadd d0, d0, d1
-  ret <1 x i64> %tmp1
-}
 
 declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>)
 declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
@@ -254,21 +239,3 @@ define <2 x i64> @test_sqsub_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; CHECK: sqsub v0.2d, v0.2d, v1.2d
   ret <2 x i64> %tmp1
 }
-
-declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqsub_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: uqsub d0, d0, d1
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqsub_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: sqsub d0, d0, d1
-  ret <1 x i64> %tmp1
-}
-
diff --git a/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll b/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll
index 05d8dfe..d89262c 100644
--- a/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll
+++ b/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll
@@ -102,23 +102,6 @@ define <4 x i32> @test_sqrshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
   ret <4 x i32> %tmp1
 }
 
-declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqrshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: uqrshl d0, d0, d1
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqrshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: sqrshl d0, d0, d1
-  ret <1 x i64> %tmp1
-}
-
 declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>)
 declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>)
 
diff --git a/test/CodeGen/AArch64/neon-saturating-shift.ll b/test/CodeGen/AArch64/neon-saturating-shift.ll
index 3b7f78c..11009fb 100644
--- a/test/CodeGen/AArch64/neon-saturating-shift.ll
+++ b/test/CodeGen/AArch64/neon-saturating-shift.ll
@@ -102,23 +102,6 @@ define <4 x i32> @test_sqshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
   ret <4 x i32> %tmp1
 }
 
-declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: uqshl d0, d0, d1
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: sqshl d0, d0, d1
-  ret <1 x i64> %tmp1
-}
-
 declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>)
 declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>)
 
diff --git a/test/CodeGen/AArch64/neon-scalar-abs.ll b/test/CodeGen/AArch64/neon-scalar-abs.ll
new file mode 100644
index 0000000..03a89e04
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-abs.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define i64 @test_vabsd_s64(i64 %a) {
+; CHECK: test_vabsd_s64
+; CHECK: abs {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vabs.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vabs1.i = tail call <1 x i64> @llvm.aarch64.neon.vabs(<1 x i64> %vabs.i)
+  %0 = extractelement <1 x i64> %vabs1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vabs(<1 x i64>)
+
+define i8 @test_vqabsb_s8(i8 %a) {
+; CHECK: test_vqabsb_s8
+; CHECK: sqabs {{b[0-9]+}}, {{b[0-9]+}}
+entry:
+  %vqabs.i = insertelement <1 x i8> undef, i8 %a, i32 0
+  %vqabs1.i = call <1 x i8> @llvm.arm.neon.vqabs.v1i8(<1 x i8> %vqabs.i)
+  %0 = extractelement <1 x i8> %vqabs1.i, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.arm.neon.vqabs.v1i8(<1 x i8>)
+
+define i16 @test_vqabsh_s16(i16 %a) {
+; CHECK: test_vqabsh_s16
+; CHECK: sqabs {{h[0-9]+}}, {{h[0-9]+}}
+entry:
+  %vqabs.i = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vqabs1.i = call <1 x i16> @llvm.arm.neon.vqabs.v1i16(<1 x i16> %vqabs.i)
+  %0 = extractelement <1 x i16> %vqabs1.i, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.arm.neon.vqabs.v1i16(<1 x i16>)
+
+define i32 @test_vqabss_s32(i32 %a) {
+; CHECK: test_vqabss_s32
+; CHECK: sqabs {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vqabs.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vqabs1.i = call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %vqabs.i)
+  %0 = extractelement <1 x i32> %vqabs1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32>)
+
+define i64 @test_vqabsd_s64(i64 %a) {
+; CHECK: test_vqabsd_s64
+; CHECK: sqabs {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vqabs.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vqabs1.i = call <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64> %vqabs.i)
+  %0 = extractelement <1 x i64> %vqabs1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64>)
diff --git a/test/CodeGen/AArch64/neon-scalar-add-sub.ll b/test/CodeGen/AArch64/neon-scalar-add-sub.ll
new file mode 100644
index 0000000..09ca880
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-add-sub.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) {
+;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+	%tmp3 = add <1 x i64> %A, %B;
+	ret <1 x i64> %tmp3
+}
+
+define <1 x i64> @sub1xi64(<1 x i64> %A, <1 x i64> %B) {
+;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+	%tmp3 = sub <1 x i64> %A, %B;
+	ret <1 x i64> %tmp3
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_add_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_add_v1i64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_uadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uadd_v1i64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_sub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sub_v1i64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_usub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_usub_v1i64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+
+
diff --git a/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll b/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
new file mode 100644
index 0000000..8ce42de
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
@@ -0,0 +1,108 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+
+declare float @llvm.fma.f32(float, float, float)
+declare double @llvm.fma.f64(double, double, double)
+
+define float @test_fmla_ss4S(float %a, float %b, <4 x float> %v) {
+  ; CHECK: test_fmla_ss4S
+  ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
+  ret float %tmp2
+}
+
+define float @test_fmla_ss4S_swap(float %a, float %b, <4 x float> %v) {
+  ; CHECK: test_fmla_ss4S_swap
+  ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = call float @llvm.fma.f32(float %tmp1, float %a, float %a)
+  ret float %tmp2
+}
+
+define float @test_fmla_ss2S(float %a, float %b, <2 x float> %v) {
+  ; CHECK: test_fmla_ss2S
+  ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[1]
+  %tmp1 = extractelement <2 x float> %v, i32 1
+  %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
+  ret float %tmp2
+}
+
+define double @test_fmla_ddD(double %a, double %b, <1 x double> %v) {
+  ; CHECK: test_fmla_ddD
+  ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[0]
+  %tmp1 = extractelement <1 x double> %v, i32 0
+  %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
+  ret double %tmp2
+}
+
+define double @test_fmla_dd2D(double %a, double %b, <2 x double> %v) {
+  ; CHECK: test_fmla_dd2D
+  ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
+  ret double %tmp2
+}
+
+define double @test_fmla_dd2D_swap(double %a, double %b, <2 x double> %v) {
+  ; CHECK: test_fmla_dd2D_swap
+  ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a)
+  ret double %tmp2
+}
+
+define float @test_fmls_ss4S(float %a, float %b, <4 x float> %v) {
+  ; CHECK: test_fmls_ss4S
+  ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = fsub float -0.0, %tmp1
+  %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a)
+  ret float %tmp3
+}
+
+define float @test_fmls_ss4S_swap(float %a, float %b, <4 x float> %v) {
+  ; CHECK: test_fmls_ss4S_swap
+  ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = fsub float -0.0, %tmp1
+  %tmp3 = call float @llvm.fma.f32(float %tmp1, float %tmp2, float %a)
+  ret float %tmp3
+}
+
+
+define float @test_fmls_ss2S(float %a, float %b, <2 x float> %v) {
+  ; CHECK: test_fmls_ss2S
+  ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[1]
+  %tmp1 = extractelement <2 x float> %v, i32 1
+  %tmp2 = fsub float -0.0, %tmp1
+  %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a)
+  ret float %tmp3
+}
+
+define double @test_fmls_ddD(double %a, double %b, <1 x double> %v) {
+  ; CHECK: test_fmls_ddD
+  ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[0]
+  %tmp1 = extractelement <1 x double> %v, i32 0
+  %tmp2 = fsub double -0.0, %tmp1
+  %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a)
+  ret double %tmp3
+}
+
+define double @test_fmls_dd2D(double %a, double %b, <2 x double> %v) {
+  ; CHECK: test_fmls_dd2D
+  ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = fsub double -0.0, %tmp1
+  %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a)
+  ret double %tmp3
+}
+
+define double @test_fmls_dd2D_swap(double %a, double %b, <2 x double> %v) {
+  ; CHECK: test_fmls_dd2D_swap
+  ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = fsub double -0.0, %tmp1
+  %tmp3 = call double @llvm.fma.f64(double %tmp1, double %tmp2, double %a)
+  ret double %tmp3
+}
+
diff --git a/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll b/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll
new file mode 100644
index 0000000..968ad3e
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll
@@ -0,0 +1,124 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+
+define float @test_fmul_lane_ss2S(float %a, <2 x float> %v) {
+  ; CHECK: test_fmul_lane_ss2S
+  ; CHECK: fmul {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[1]
+  %tmp1 = extractelement <2 x float> %v, i32 1
+  %tmp2 = fmul float %a, %tmp1;
+  ret float %tmp2;
+}
+
+define float @test_fmul_lane_ss2S_swap(float %a, <2 x float> %v) {
+  ; CHECK: test_fmul_lane_ss2S_swap
+  ; CHECK: fmul {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[1]
+  %tmp1 = extractelement <2 x float> %v, i32 1
+  %tmp2 = fmul float %tmp1, %a;
+  ret float %tmp2;
+}
+
+
+define float @test_fmul_lane_ss4S(float %a, <4 x float> %v) {
+  ; CHECK: test_fmul_lane_ss4S
+  ; CHECK: fmul {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = fmul float %a, %tmp1;
+  ret float %tmp2;
+}
+
+define float @test_fmul_lane_ss4S_swap(float %a, <4 x float> %v) {
+  ; CHECK: test_fmul_lane_ss4S_swap
+  ; CHECK: fmul {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = fmul float %tmp1, %a;
+  ret float %tmp2;
+}
+
+
+define double @test_fmul_lane_ddD(double %a, <1 x double> %v) {
+  ; CHECK: test_fmul_lane_ddD
+  ; CHECK: fmul {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[0]
+  %tmp1 = extractelement <1 x double> %v, i32 0
+  %tmp2 = fmul double %a, %tmp1;
+  ret double %tmp2;
+}
+
+
+
+define double @test_fmul_lane_dd2D(double %a, <2 x double> %v) {
+  ; CHECK: test_fmul_lane_dd2D
+  ; CHECK: fmul {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = fmul double %a, %tmp1;
+  ret double %tmp2;
+}
+
+
+define double @test_fmul_lane_dd2D_swap(double %a, <2 x double> %v) {
+  ; CHECK: test_fmul_lane_dd2D_swap
+  ; CHECK: fmul {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = fmul double %tmp1, %a;
+  ret double %tmp2;
+}
+
+declare float @llvm.aarch64.neon.vmulx.f32(float, float)
+
+define float @test_fmulx_lane_f32(float %a, <2 x float> %v) {
+  ; CHECK: test_fmulx_lane_f32
+  ; CHECK: fmulx {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[1]
+  %tmp1 = extractelement <2 x float> %v, i32 1
+  %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %tmp1)
+  ret float %tmp2;
+}
+
+define float @test_fmulx_laneq_f32(float %a, <4 x float> %v) {
+  ; CHECK: test_fmulx_laneq_f32
+  ; CHECK: fmulx {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %tmp1)
+  ret float %tmp2;
+}
+
+define float @test_fmulx_laneq_f32_swap(float %a, <4 x float> %v) {
+  ; CHECK: test_fmulx_laneq_f32_swap
+  ; CHECK: fmulx {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %tmp1, float %a)
+  ret float %tmp2;
+}
+
+declare double @llvm.aarch64.neon.vmulx.f64(double, double)
+
+define double @test_fmulx_lane_f64(double %a, <1 x double> %v) {
+  ; CHECK: test_fmulx_lane_f64
+  ; CHECK: fmulx {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[0]
+  %tmp1 = extractelement <1 x double> %v, i32 0
+  %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1)
+  ret double %tmp2;
+}
+
+define double @test_fmulx_laneq_f64_0(double %a, <2 x double> %v) {
+  ; CHECK: test_fmulx_laneq_f64_0
+  ; CHECK: fmulx {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[0]
+  %tmp1 = extractelement <2 x double> %v, i32 0
+  %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1)
+  ret double %tmp2;
+}
+
+
+define double @test_fmulx_laneq_f64_1(double %a, <2 x double> %v) {
+  ; CHECK: test_fmulx_laneq_f64_1
+  ; CHECK: fmulx {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1)
+  ret double %tmp2;
+}
+
+define double @test_fmulx_laneq_f64_1_swap(double %a, <2 x double> %v) {
+  ; CHECK: test_fmulx_laneq_f64_1_swap
+  ; CHECK: fmulx {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %tmp1, double %a)
+  ret double %tmp2;
+}
+
diff --git a/test/CodeGen/AArch64/neon-scalar-compare.ll b/test/CodeGen/AArch64/neon-scalar-compare.ll
new file mode 100644
index 0000000..5f10cbb
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-compare.ll
@@ -0,0 +1,343 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+;; Scalar Integer Compare
+
+define i64 @test_vceqd(i64 %a, i64 %b) {
+; CHECK: test_vceqd
+; CHECK: cmeq {{d[0-9]+}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vceq.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vceq1.i = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vceq2.i = call <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1i64.v1i64(<1 x i64> %vceq.i, <1 x i64> %vceq1.i)
+  %0 = extractelement <1 x i64> %vceq2.i, i32 0
+  ret i64 %0
+}
+
+define i64 @test_vceqzd(i64 %a) {
+; CHECK: test_vceqzd
+; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0
+entry:
+  %vceqz.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vceqz1.i = call <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1i64.v1i64(<1 x i64> %vceqz.i, <1 x i64> zeroinitializer)
+  %0 = extractelement <1 x i64> %vceqz1.i, i32 0
+  ret i64 %0
+}
+
+define i64 @test_vcged(i64 %a, i64 %b) {
+; CHECK: test_vcged
+; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcge.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vcge1.i = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vcge2.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64> %vcge.i, <1 x i64> %vcge1.i)
+  %0 = extractelement <1 x i64> %vcge2.i, i32 0
+  ret i64 %0
+}
+
+define i64 @test_vcgezd(i64 %a) {
+; CHECK: test_vcgezd
+; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, #0x0
+entry:
+  %vcgez.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vcgez1.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64> %vcgez.i, <1 x i64> zeroinitializer)
+  %0 = extractelement <1 x i64> %vcgez1.i, i32 0
+  ret i64 %0
+}
+
+define i64 @test_vcgtd(i64 %a, i64 %b) {
+; CHECK: test_vcgtd
+; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcgt.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vcgt1.i = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vcgt2.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64> %vcgt.i, <1 x i64> %vcgt1.i)
+  %0 = extractelement <1 x i64> %vcgt2.i, i32 0
+  ret i64 %0
+}
+
+define i64 @test_vcgtzd(i64 %a) {
+; CHECK: test_vcgtzd
+; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, #0x0
+entry:
+  %vcgtz.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vcgtz1.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64> %vcgtz.i, <1 x i64> zeroinitializer)
+  %0 = extractelement <1 x i64> %vcgtz1.i, i32 0
+  ret i64 %0
+}
+
+define i64 @test_vcled(i64 %a, i64 %b) {
+; CHECK: test_vcled
+; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcgt.i = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vcgt1.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vcgt2.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64> %vcgt.i, <1 x i64> %vcgt1.i)
+  %0 = extractelement <1 x i64> %vcgt2.i, i32 0
+  ret i64 %0
+}
+
+define i64 @test_vclezd(i64 %a) {
+; CHECK: test_vclezd
+; CHECK: cmle {{d[0-9]}}, {{d[0-9]}}, #0x0
+entry:
+  %vclez.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vclez1.i = call <1 x i64> @llvm.aarch64.neon.vclez.v1i64.v1i64.v1i64(<1 x i64> %vclez.i, <1 x i64> zeroinitializer)
+  %0 = extractelement <1 x i64> %vclez1.i, i32 0
+  ret i64 %0
+}
+
+define i64 @test_vcltd(i64 %a, i64 %b) {
+; CHECK: test_vcltd
+; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcge.i = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vcge1.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vcge2.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64> %vcge.i, <1 x i64> %vcge1.i)
+  %0 = extractelement <1 x i64> %vcge2.i, i32 0
+  ret i64 %0
+}
+
+define i64 @test_vcltzd(i64 %a) {
+; CHECK: test_vcltzd
+; CHECK: cmlt {{d[0-9]}}, {{d[0-9]}}, #0x0
+entry:
+  %vcltz.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vcltz1.i = call <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1i64.v1i64(<1 x i64> %vcltz.i, <1 x i64> zeroinitializer)
+  %0 = extractelement <1 x i64> %vcltz1.i, i32 0
+  ret i64 %0
+}
+
+define i64 @test_vtstd(i64 %a, i64 %b) {
+; CHECK: test_vtstd
+; CHECK: cmtst {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vtst.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vtst1.i = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vtst2.i = call <1 x i64> @llvm.aarch64.neon.vtstd.v1i64.v1i64.v1i64(<1 x i64> %vtst.i, <1 x i64> %vtst1.i)
+  %0 = extractelement <1 x i64> %vtst2.i, i32 0
+  ret i64 %0
+}
+
+
+define <1 x i64> @test_vcage_f64(<1 x double> %a, <1 x double> %b) #0 {
+; CHECK: test_vcage_f64
+; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %vcage2.i = tail call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %a, <1 x double> %b) #2
+  ret <1 x i64> %vcage2.i
+}
+
+define <1 x i64> @test_vcagt_f64(<1 x double> %a, <1 x double> %b) #0 {
+; CHECK: test_vcagt_f64
+; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %vcagt2.i = tail call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %a, <1 x double> %b) #2
+  ret <1 x i64> %vcagt2.i
+}
+
+define <1 x i64> @test_vcale_f64(<1 x double> %a, <1 x double> %b) #0 {
+; CHECK: test_vcale_f64
+; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %vcage2.i = tail call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %b, <1 x double> %a) #2
+  ret <1 x i64> %vcage2.i
+}
+
+define <1 x i64> @test_vcalt_f64(<1 x double> %a, <1 x double> %b) #0 {
+; CHECK: test_vcalt_f64
+; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %vcagt2.i = tail call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %b, <1 x double> %a) #2
+  ret <1 x i64> %vcagt2.i
+}
+
+define <1 x i64> @test_vceq_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK: test_vceq_s64
+; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = icmp eq <1 x i64> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vceq_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK: test_vceq_u64
+; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = icmp eq <1 x i64> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vceq_f64(<1 x double> %a, <1 x double> %b) #0 {
+; CHECK: test_vceq_f64
+; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = fcmp oeq <1 x double> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vcge_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK: test_vcge_s64
+; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = icmp sge <1 x i64> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vcge_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK: test_vcge_u64
+; CHECK: cmhs {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = icmp uge <1 x i64> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vcge_f64(<1 x double> %a, <1 x double> %b) #0 {
+; CHECK: test_vcge_f64
+; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = fcmp oge <1 x double> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vcle_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK: test_vcle_s64
+; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = icmp sle <1 x i64> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vcle_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK: test_vcle_u64
+; CHECK: cmhs {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = icmp ule <1 x i64> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vcle_f64(<1 x double> %a, <1 x double> %b) #0 {
+; CHECK: test_vcle_f64
+; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = fcmp ole <1 x double> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vcgt_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK: test_vcgt_s64
+; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = icmp sgt <1 x i64> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vcgt_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK: test_vcgt_u64
+; CHECK: cmhi {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = icmp ugt <1 x i64> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vcgt_f64(<1 x double> %a, <1 x double> %b) #0 {
+; CHECK: test_vcgt_f64
+; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = fcmp ogt <1 x double> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vclt_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK: test_vclt_s64
+; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = icmp slt <1 x i64> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vclt_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK: test_vclt_u64
+; CHECK: cmhi {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = icmp ult <1 x i64> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vclt_f64(<1 x double> %a, <1 x double> %b) #0 {
+; CHECK: test_vclt_f64
+; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = fcmp olt <1 x double> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vceqz_s64(<1 x i64> %a) #0 {
+; CHECK: test_vceqz_s64
+; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0
+  %1 = icmp eq <1 x i64> %a, zeroinitializer
+  %vceqz.i = zext <1 x i1> %1 to <1 x i64>
+  ret <1 x i64> %vceqz.i
+}
+
+define <1 x i64> @test_vceqz_u64(<1 x i64> %a) #0 {
+; CHECK: test_vceqz_u64
+; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0
+  %1 = icmp eq <1 x i64> %a, zeroinitializer
+  %vceqz.i = zext <1 x i1> %1 to <1 x i64>
+  ret <1 x i64> %vceqz.i
+}
+
+define <1 x i64> @test_vceqz_p64(<1 x i64> %a) #0 {
+; CHECK: test_vceqz_p64
+; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0
+  %1 = icmp eq <1 x i64> %a, zeroinitializer
+  %vceqz.i = zext <1 x i1> %1 to <1 x i64>
+  ret <1 x i64> %vceqz.i
+}
+
+define <2 x i64> @test_vceqzq_p64(<2 x i64> %a) #0 {
+; CHECK: test_vceqzq_p64
+; CHECK: cmeq  {{v[0-9]}}.2d, {{v[0-9]}}.2d, #0
+  %1 = icmp eq <2 x i64> %a, zeroinitializer
+  %vceqz.i = zext <2 x i1> %1 to <2 x i64>
+  ret <2 x i64> %vceqz.i
+}
+
+define <1 x i64> @test_vcgez_s64(<1 x i64> %a) #0 {
+; CHECK: test_vcgez_s64
+; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, #0x0
+  %1 = icmp sge <1 x i64> %a, zeroinitializer
+  %vcgez.i = zext <1 x i1> %1 to <1 x i64>
+  ret <1 x i64> %vcgez.i
+}
+
+define <1 x i64> @test_vclez_s64(<1 x i64> %a) #0 {
+; CHECK: test_vclez_s64
+; CHECK: cmle {{d[0-9]}}, {{d[0-9]}}, #0x0
+  %1 = icmp sle <1 x i64> %a, zeroinitializer
+  %vclez.i = zext <1 x i1> %1 to <1 x i64>
+  ret <1 x i64> %vclez.i
+}
+
+define <1 x i64> @test_vcgtz_s64(<1 x i64> %a) #0 {
+; CHECK: test_vcgtz_s64
+; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, #0x0
+  %1 = icmp sgt <1 x i64> %a, zeroinitializer
+  %vcgtz.i = zext <1 x i1> %1 to <1 x i64>
+  ret <1 x i64> %vcgtz.i
+}
+
+define <1 x i64> @test_vcltz_s64(<1 x i64> %a) #0 {
+; CHECK: test_vcltz_s64
+; CHECK: cmlt {{d[0-9]}}, {{d[0-9]}}, #0
+  %1 = icmp slt <1 x i64> %a, zeroinitializer
+  %vcltz.i = zext <1 x i1> %1 to <1 x i64>
+  ret <1 x i64> %vcltz.i
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double>, <1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double>, <1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.vtstd.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vchs.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vclez.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vchi.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
diff --git a/test/CodeGen/AArch64/neon-scalar-copy.ll b/test/CodeGen/AArch64/neon-scalar-copy.ll
new file mode 100644
index 0000000..d433ff5
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-copy.ll
@@ -0,0 +1,88 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+define float @test_dup_sv2S(<2 x float> %v) {
+ ;CHECK: test_dup_sv2S
+ ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[1]
+ %tmp1 = extractelement <2 x float> %v, i32 1
+ ret float  %tmp1
+}
+
+define float @test_dup_sv4S(<4 x float> %v) {
+ ;CHECK: test_dup_sv4S
+ ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[0]
+ %tmp1 = extractelement <4 x float> %v, i32 0
+ ret float  %tmp1
+}
+
+define double @test_dup_dvD(<1 x double> %v) {
+ ;CHECK: test_dup_dvD
+ ;CHECK-NOT: dup {{d[0-31]+}}, {{v[0-31]+}}.d[0]
+ ;CHECK: ret
+ %tmp1 = extractelement <1 x double> %v, i32 0
+ ret double  %tmp1
+}
+
+define double @test_dup_dv2D(<2 x double> %v) {
+ ;CHECK: test_dup_dv2D
+ ;CHECK: dup {{d[0-31]+}}, {{v[0-31]+}}.d[1]
+ %tmp1 = extractelement <2 x double> %v, i32 1
+ ret double  %tmp1
+}
+
+define <1 x i8> @test_vector_dup_bv16B(<16 x i8> %v1) {
+ ;CHECK: test_vector_dup_bv16B
+ ;CHECK: dup {{b[0-31]+}}, {{v[0-31]+}}.b[14]
+ %shuffle.i = shufflevector <16 x i8> %v1, <16 x i8> undef, <1 x i32> <i32 14> 
+ ret <1 x i8> %shuffle.i
+}
+
+define <1 x i8> @test_vector_dup_bv8B(<8 x i8> %v1) {
+ ;CHECK: test_vector_dup_bv8B
+ ;CHECK: dup {{b[0-31]+}}, {{v[0-31]+}}.b[7]
+ %shuffle.i = shufflevector <8 x i8> %v1, <8 x i8> undef, <1 x i32> <i32 7> 
+ ret <1 x i8> %shuffle.i
+}
+
+define <1 x i16> @test_vector_dup_hv8H(<8 x i16> %v1) {
+ ;CHECK: test_vector_dup_hv8H
+ ;CHECK: dup {{h[0-31]+}}, {{v[0-31]+}}.h[7]
+ %shuffle.i = shufflevector <8 x i16> %v1, <8 x i16> undef, <1 x i32> <i32 7> 
+ ret <1 x i16> %shuffle.i
+}
+
+define <1 x i16> @test_vector_dup_hv4H(<4 x i16> %v1) {
+ ;CHECK: test_vector_dup_hv4H
+ ;CHECK: dup {{h[0-31]+}}, {{v[0-31]+}}.h[3]
+ %shuffle.i = shufflevector <4 x i16> %v1, <4 x i16> undef, <1 x i32> <i32 3> 
+ ret <1 x i16> %shuffle.i
+}
+
+define <1 x i32> @test_vector_dup_sv4S(<4 x i32> %v1) {
+ ;CHECK: test_vector_dup_sv4S
+ ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[3]
+ %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <1 x i32> <i32 3> 
+ ret <1 x i32> %shuffle
+}
+
+define <1 x i32> @test_vector_dup_sv2S(<2 x i32> %v1) {
+ ;CHECK: test_vector_dup_sv2S
+ ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[1]
+ %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <1 x i32> <i32 1> 
+ ret <1 x i32> %shuffle
+}
+
+define <1 x i64> @test_vector_dup_dv2D(<2 x i64> %v1) {
+ ;CHECK: test_vector_dup_dv2D
+ ;CHECK: dup {{d[0-31]+}}, {{v[0-31]+}}.d[1]
+ %shuffle.i = shufflevector <2 x i64> %v1, <2 x i64> undef, <1 x i32> <i32 1> 
+ ret <1 x i64> %shuffle.i
+}
+
+define <1 x i64> @test_vector_copy_dup_dv2D(<1 x i64> %a, <2 x i64> %c) {
+  ;CHECK: test_vector_copy_dup_dv2D
+  ;CHECK: dup {{d[0-31]+}}, {{v[0-31]+}}.d[1]
+  %vget_lane = extractelement <2 x i64> %c, i32 1
+  %vset_lane = insertelement <1 x i64> undef, i64 %vget_lane, i32 0
+  ret <1 x i64> %vset_lane
+}
+
diff --git a/test/CodeGen/AArch64/neon-scalar-cvt.ll b/test/CodeGen/AArch64/neon-scalar-cvt.ll
new file mode 100644
index 0000000..a06d5d6
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-cvt.ll
@@ -0,0 +1,137 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+define float @test_vcvts_f32_s32(i32 %a) {
+; CHECK: test_vcvts_f32_s32
+; CHECK: scvtf {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vcvtf.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %0 = call float @llvm.aarch64.neon.vcvtf32.s32(<1 x i32> %vcvtf.i)
+  ret float %0
+}
+
+declare float @llvm.aarch64.neon.vcvtf32.s32(<1 x i32>)
+
+define double @test_vcvtd_f64_s64(i64 %a) {
+; CHECK: test_vcvtd_f64_s64
+; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vcvtf.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %0 = call double @llvm.aarch64.neon.vcvtf64.s64(<1 x i64> %vcvtf.i)
+  ret double %0
+}
+
+declare double @llvm.aarch64.neon.vcvtf64.s64(<1 x i64>)
+
+define float @test_vcvts_f32_u32(i32 %a) {
+; CHECK: test_vcvts_f32_u32
+; CHECK: ucvtf {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vcvtf.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %0 = call float @llvm.aarch64.neon.vcvtf32.u32(<1 x i32> %vcvtf.i)
+  ret float %0
+}
+
+declare float @llvm.aarch64.neon.vcvtf32.u32(<1 x i32>)
+
+define double @test_vcvtd_f64_u64(i64 %a) {
+; CHECK: test_vcvtd_f64_u64
+; CHECK: ucvtf {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vcvtf.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %0 = call double @llvm.aarch64.neon.vcvtf64.u64(<1 x i64> %vcvtf.i)
+  ret double %0
+}
+
+declare double @llvm.aarch64.neon.vcvtf64.u64(<1 x i64>)
+
+define float @test_vcvts_n_f32_s32(i32 %a) {
+; CHECK: test_vcvts_n_f32_s32
+; CHECK: scvtf {{s[0-9]+}}, {{s[0-9]+}}, #1
+entry:
+  %vcvtf = insertelement <1 x i32> undef, i32 %a, i32 0
+  %0 = call float @llvm.aarch64.neon.vcvtf32.n.s32(<1 x i32> %vcvtf, i32 1)
+  ret float %0
+}
+
+declare float @llvm.aarch64.neon.vcvtf32.n.s32(<1 x i32>, i32)
+
+define double @test_vcvtd_n_f64_s64(i64 %a) {
+; CHECK: test_vcvtd_n_f64_s64
+; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}, #1
+entry:
+  %vcvtf = insertelement <1 x i64> undef, i64 %a, i32 0
+  %0 = call double @llvm.aarch64.neon.vcvtf64.n.s64(<1 x i64> %vcvtf, i32 1)
+  ret double %0
+}
+
+declare double @llvm.aarch64.neon.vcvtf64.n.s64(<1 x i64>, i32)
+
+define float @test_vcvts_n_f32_u32(i32 %a) {
+; CHECK: test_vcvts_n_f32_u32
+; CHECK: ucvtf {{s[0-9]+}}, {{s[0-9]+}}, #1
+entry:
+  %vcvtf = insertelement <1 x i32> undef, i32 %a, i32 0
+  %0 = call float @llvm.aarch64.neon.vcvtf32.n.u32(<1 x i32> %vcvtf, i32 1)
+  ret float %0
+}
+
+declare float @llvm.aarch64.neon.vcvtf32.n.u32(<1 x i32>, i32)
+
+define double @test_vcvtd_n_f64_u64(i64 %a) {
+; CHECK: test_vcvtd_n_f64_u64
+; CHECK: ucvtf {{d[0-9]+}}, {{d[0-9]+}}, #1
+entry:
+  %vcvtf = insertelement <1 x i64> undef, i64 %a, i32 0
+  %0 = call double @llvm.aarch64.neon.vcvtf64.n.u64(<1 x i64> %vcvtf, i32 1)
+  ret double %0
+}
+
+declare double @llvm.aarch64.neon.vcvtf64.n.u64(<1 x i64>, i32)
+
+define i32 @test_vcvts_n_s32_f32(float %a) {
+; CHECK: test_vcvts_n_s32_f32
+; CHECK: fcvtzs {{s[0-9]+}}, {{s[0-9]+}}, #1
+entry:
+  %fcvtzs = insertelement <1 x float> undef, float %a, i32 0
+  %fcvtzs1 = call <1 x i32> @llvm.aarch64.neon.vcvts.n.s32.f32(<1 x float> %fcvtzs, i32 1)
+  %0 = extractelement <1 x i32> %fcvtzs1, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vcvts.n.s32.f32(<1 x float>, i32)
+
+define i64 @test_vcvtd_n_s64_f64(double %a) {
+; CHECK: test_vcvtd_n_s64_f64
+; CHECK: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}, #1
+entry:
+  %fcvtzs = insertelement <1 x double> undef, double %a, i32 0
+  %fcvtzs1 = call <1 x i64> @llvm.aarch64.neon.vcvtd.n.s64.f64(<1 x double> %fcvtzs, i32 1)
+  %0 = extractelement <1 x i64> %fcvtzs1, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vcvtd.n.s64.f64(<1 x double>, i32)
+
+define i32 @test_vcvts_n_u32_f32(float %a) {
+; CHECK: test_vcvts_n_u32_f32
+; CHECK: fcvtzu {{s[0-9]+}}, {{s[0-9]+}}, #32
+entry:
+  %fcvtzu = insertelement <1 x float> undef, float %a, i32 0
+  %fcvtzu1 = call <1 x i32> @llvm.aarch64.neon.vcvts.n.u32.f32(<1 x float> %fcvtzu, i32 32)
+  %0 = extractelement <1 x i32> %fcvtzu1, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vcvts.n.u32.f32(<1 x float>, i32)
+
+define i64 @test_vcvtd_n_u64_f64(double %a) {
+; CHECK: test_vcvtd_n_u64_f64
+; CHECK: fcvtzu {{d[0-9]+}}, {{d[0-9]+}}, #64
+entry:
+  %fcvtzu = insertelement <1 x double> undef, double %a, i32 0
+  %fcvtzu1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtd.n.u64.f64(<1 x double> %fcvtzu, i32 64)
+  %0 = extractelement <1 x i64> %fcvtzu1, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vcvtd.n.u64.f64(<1 x double>, i32)
diff --git a/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll b/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll
new file mode 100644
index 0000000..faf521b
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll
@@ -0,0 +1,104 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+define i8 @test_vqmovunh_s16(i16 %a) {
+; CHECK: test_vqmovunh_s16
+; CHECK: sqxtun {{b[0-9]+}}, {{h[0-9]+}}
+entry:
+  %vqmovun.i = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vqmovun1.i = call <1 x i8> @llvm.arm.neon.vqmovnsu.v1i8(<1 x i16> %vqmovun.i)
+  %0 = extractelement <1 x i8> %vqmovun1.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vqmovuns_s32(i32 %a) {
+; CHECK: test_vqmovuns_s32
+; CHECK: sqxtun {{h[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vqmovun.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vqmovun1.i = call <1 x i16> @llvm.arm.neon.vqmovnsu.v1i16(<1 x i32> %vqmovun.i)
+  %0 = extractelement <1 x i16> %vqmovun1.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vqmovund_s64(i64 %a) {
+; CHECK: test_vqmovund_s64
+; CHECK: sqxtun {{s[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vqmovun.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vqmovun1.i = call <1 x i32> @llvm.arm.neon.vqmovnsu.v1i32(<1 x i64> %vqmovun.i)
+  %0 = extractelement <1 x i32> %vqmovun1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i8> @llvm.arm.neon.vqmovnsu.v1i8(<1 x i16>)
+declare <1 x i16> @llvm.arm.neon.vqmovnsu.v1i16(<1 x i32>)
+declare <1 x i32> @llvm.arm.neon.vqmovnsu.v1i32(<1 x i64>)
+
+define i8 @test_vqmovnh_s16(i16 %a) {
+; CHECK: test_vqmovnh_s16
+; CHECK: sqxtn {{b[0-9]+}}, {{h[0-9]+}}
+entry:
+  %vqmovn.i = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vqmovn1.i = call <1 x i8> @llvm.arm.neon.vqmovns.v1i8(<1 x i16> %vqmovn.i)
+  %0 = extractelement <1 x i8> %vqmovn1.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vqmovns_s32(i32 %a) {
+; CHECK: test_vqmovns_s32
+; CHECK: sqxtn {{h[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vqmovn.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vqmovn1.i = call <1 x i16> @llvm.arm.neon.vqmovns.v1i16(<1 x i32> %vqmovn.i)
+  %0 = extractelement <1 x i16> %vqmovn1.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vqmovnd_s64(i64 %a) {
+; CHECK: test_vqmovnd_s64
+; CHECK: sqxtn {{s[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vqmovn.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vqmovn1.i = call <1 x i32> @llvm.arm.neon.vqmovns.v1i32(<1 x i64> %vqmovn.i)
+  %0 = extractelement <1 x i32> %vqmovn1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i8> @llvm.arm.neon.vqmovns.v1i8(<1 x i16>)
+declare <1 x i16> @llvm.arm.neon.vqmovns.v1i16(<1 x i32>)
+declare <1 x i32> @llvm.arm.neon.vqmovns.v1i32(<1 x i64>)
+
+define i8 @test_vqmovnh_u16(i16 %a) {
+; CHECK: test_vqmovnh_u16
+; CHECK: uqxtn {{b[0-9]+}}, {{h[0-9]+}}
+entry:
+  %vqmovn.i = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vqmovn1.i = call <1 x i8> @llvm.arm.neon.vqmovnu.v1i8(<1 x i16> %vqmovn.i)
+  %0 = extractelement <1 x i8> %vqmovn1.i, i32 0
+  ret i8 %0
+}
+
+
+define i16 @test_vqmovns_u32(i32 %a) {
+; CHECK: test_vqmovns_u32
+; CHECK: uqxtn {{h[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vqmovn.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vqmovn1.i = call <1 x i16> @llvm.arm.neon.vqmovnu.v1i16(<1 x i32> %vqmovn.i)
+  %0 = extractelement <1 x i16> %vqmovn1.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vqmovnd_u64(i64 %a) {
+; CHECK: test_vqmovnd_u64
+; CHECK: uqxtn {{s[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vqmovn.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vqmovn1.i = call <1 x i32> @llvm.arm.neon.vqmovnu.v1i32(<1 x i64> %vqmovn.i)
+  %0 = extractelement <1 x i32> %vqmovn1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i8> @llvm.arm.neon.vqmovnu.v1i8(<1 x i16>)
+declare <1 x i16> @llvm.arm.neon.vqmovnu.v1i16(<1 x i32>)
+declare <1 x i32> @llvm.arm.neon.vqmovnu.v1i32(<1 x i64>)
diff --git a/test/CodeGen/AArch64/neon-scalar-fabd.ll b/test/CodeGen/AArch64/neon-scalar-fabd.ll
new file mode 100644
index 0000000..75686d3
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-fabd.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define float @test_vabds_f32(float %a, float %b) {
+; CHECK-LABEL: test_vabds_f32
+; CHECK: fabd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vabd.i = insertelement <1 x float> undef, float %a, i32 0
+  %vabd1.i = insertelement <1 x float> undef, float %b, i32 0
+  %vabd2.i = call <1 x float> @llvm.aarch64.neon.vabd.v1f32(<1 x float> %vabd.i, <1 x float> %vabd1.i)
+  %0 = extractelement <1 x float> %vabd2.i, i32 0
+  ret float %0
+}
+
+define double @test_vabdd_f64(double %a, double %b) {
+; CHECK-LABEL: test_vabdd_f64
+; CHECK: fabd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vabd.i = insertelement <1 x double> undef, double %a, i32 0
+  %vabd1.i = insertelement <1 x double> undef, double %b, i32 0
+  %vabd2.i = call <1 x double> @llvm.aarch64.neon.vabd.v1f64(<1 x double> %vabd.i, <1 x double> %vabd1.i)
+  %0 = extractelement <1 x double> %vabd2.i, i32 0
+  ret double %0
+}
+
+declare <1 x double> @llvm.aarch64.neon.vabd.v1f64(<1 x double>, <1 x double>)
+declare <1 x float> @llvm.aarch64.neon.vabd.v1f32(<1 x float>, <1 x float>)
diff --git a/test/CodeGen/AArch64/neon-scalar-fcvt.ll b/test/CodeGen/AArch64/neon-scalar-fcvt.ll
new file mode 100644
index 0000000..d7b84fa
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-fcvt.ll
@@ -0,0 +1,255 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+;; Scalar Floating-point Convert
+
+define float @test_vcvtxn(double %a) {
+; CHECK: test_vcvtxn
+; CHECK: fcvtxn {{s[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtf.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtf1.i = tail call <1 x float> @llvm.aarch64.neon.fcvtxn.v1f32.v1f64(<1 x double> %vcvtf.i)
+  %0 = extractelement <1 x float> %vcvtf1.i, i32 0
+  ret float %0
+}
+
+declare <1 x float> @llvm.aarch64.neon.fcvtxn.v1f32.v1f64(<1 x double>)
+
+define i32 @test_vcvtass(float %a) {
+; CHECK: test_vcvtass
+; CHECK: fcvtas {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtas.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtas1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.v1f32(<1 x float> %vcvtas.i)
+  %0 = extractelement <1 x i32> %vcvtas1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.v1f32(<1 x float>)
+
+define i64 @test_test_vcvtasd(double %a) {
+; CHECK: test_test_vcvtasd
+; CHECK: fcvtas {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtas.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtas1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %vcvtas.i)
+  %0 = extractelement <1 x i64> %vcvtas1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtaus(float %a) {
+; CHECK: test_vcvtaus
+; CHECK: fcvtau {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtau.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtau1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.v1f32(<1 x float> %vcvtau.i)
+  %0 = extractelement <1 x i32> %vcvtau1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtaud(double %a) {
+; CHECK: test_vcvtaud
+; CHECK: fcvtau {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtau.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtau1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %vcvtau.i)
+  %0 = extractelement <1 x i64> %vcvtau1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double>) 
+
+define i32 @test_vcvtmss(float %a) {
+; CHECK: test_vcvtmss
+; CHECK: fcvtms {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtms.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtms1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.v1f32(<1 x float> %vcvtms.i)
+  %0 = extractelement <1 x i32> %vcvtms1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtmd_s64_f64(double %a) {
+; CHECK: test_vcvtmd_s64_f64
+; CHECK: fcvtms {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtms.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtms1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %vcvtms.i)
+  %0 = extractelement <1 x i64> %vcvtms1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtmus(float %a) {
+; CHECK: test_vcvtmus
+; CHECK: fcvtmu {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtmu.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtmu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.v1f32(<1 x float> %vcvtmu.i)
+  %0 = extractelement <1 x i32> %vcvtmu1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtmud(double %a) {
+; CHECK: test_vcvtmud
+; CHECK: fcvtmu {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtmu.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtmu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %vcvtmu.i)
+  %0 = extractelement <1 x i64> %vcvtmu1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtnss(float %a) {
+; CHECK: test_vcvtnss
+; CHECK: fcvtns {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtns.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtns1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.v1f32(<1 x float> %vcvtns.i)
+  %0 = extractelement <1 x i32> %vcvtns1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtnd_s64_f64(double %a) {
+; CHECK: test_vcvtnd_s64_f64
+; CHECK: fcvtns {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtns.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtns1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %vcvtns.i)
+  %0 = extractelement <1 x i64> %vcvtns1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtnus(float %a) {
+; CHECK: test_vcvtnus
+; CHECK: fcvtnu {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtnu.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtnu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.v1f32(<1 x float> %vcvtnu.i)
+  %0 = extractelement <1 x i32> %vcvtnu1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtnud(double %a) {
+; CHECK: test_vcvtnud
+; CHECK: fcvtnu {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtnu.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtnu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %vcvtnu.i)
+  %0 = extractelement <1 x i64> %vcvtnu1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtpss(float %a) {
+; CHECK: test_vcvtpss
+; CHECK: fcvtps {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtps.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtps1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.v1f32(<1 x float> %vcvtps.i)
+  %0 = extractelement <1 x i32> %vcvtps1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtpd_s64_f64(double %a) {
+; CHECK: test_vcvtpd_s64_f64
+; CHECK: fcvtps {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtps.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtps1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %vcvtps.i)
+  %0 = extractelement <1 x i64> %vcvtps1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtpus(float %a) {
+; CHECK: test_vcvtpus
+; CHECK: fcvtpu {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtpu.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtpu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.v1f32(<1 x float> %vcvtpu.i)
+  %0 = extractelement <1 x i32> %vcvtpu1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtpud(double %a) {
+; CHECK: test_vcvtpud
+; CHECK: fcvtpu {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtpu.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtpu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %vcvtpu.i)
+  %0 = extractelement <1 x i64> %vcvtpu1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtss(float %a) {
+; CHECK: test_vcvtss
+; CHECK: fcvtzs {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtzs.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtzs1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.v1f32(<1 x float> %vcvtzs.i)
+  %0 = extractelement <1 x i32> %vcvtzs1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtd_s64_f64(double %a) {
+; CHECK: test_vcvtd_s64_f64
+; CHECK: fcvtzs {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvzs.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvzs1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> %vcvzs.i)
+  %0 = extractelement <1 x i64> %vcvzs1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtus(float %a) {
+; CHECK: test_vcvtus
+; CHECK: fcvtzu {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtzu.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtzu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.v1f32(<1 x float> %vcvtzu.i)
+  %0 = extractelement <1 x i32> %vcvtzu1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtud(double %a) {
+; CHECK: test_vcvtud
+; CHECK: fcvtzu {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtzu.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtzu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> %vcvtzu.i)
+  %0 = extractelement <1 x i64> %vcvtzu1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double>)
diff --git a/test/CodeGen/AArch64/neon-scalar-fp-compare.ll b/test/CodeGen/AArch64/neon-scalar-fp-compare.ll
new file mode 100644
index 0000000..a6e5859
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-fp-compare.ll
@@ -0,0 +1,328 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+;; Scalar Floating-point Compare
+
+define i32 @test_vceqs_f32(float %a, float %b) {
+; CHECK: test_vceqs_f32
+; CHECK: fcmeq {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vceq.i = insertelement <1 x float> undef, float %a, i32 0
+  %vceq1.i = insertelement <1 x float> undef, float %b, i32 0
+  %vceq2.i = call <1 x i32> @llvm.aarch64.neon.vceq.v1i32.v1f32.v1f32(<1 x float> %vceq.i, <1 x float> %vceq1.i)
+  %0 = extractelement <1 x i32> %vceq2.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vceqd_f64(double %a, double %b) {
+; CHECK: test_vceqd_f64
+; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vceq.i = insertelement <1 x double> undef, double %a, i32 0
+  %vceq1.i = insertelement <1 x double> undef, double %b, i32 0
+  %vceq2.i = call <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1f64.v1f64(<1 x double> %vceq.i, <1 x double> %vceq1.i)
+  %0 = extractelement <1 x i64> %vceq2.i, i32 0
+  ret i64 %0
+}
+
+define <1 x i64> @test_vceqz_f64(<1 x double> %a) #0 {
+; CHECK: test_vceqz_f64
+; CHECK: fcmeq  {{d[0-9]+}}, {{d[0-9]+}}, #0.0
+entry:
+  %0 = fcmp oeq <1 x double> %a, zeroinitializer
+  %vceqz.i = zext <1 x i1> %0 to <1 x i64>
+  ret <1 x i64> %vceqz.i
+}
+
+define i32 @test_vceqzs_f32(float %a) {
+; CHECK: test_vceqzs_f32
+; CHECK: fcmeq {{s[0-9]}}, {{s[0-9]}}, #0.0
+entry:
+  %vceq.i = insertelement <1 x float> undef, float %a, i32 0
+  %vceq1.i = call <1 x i32> @llvm.aarch64.neon.vceq.v1i32.v1f32.v1f32(<1 x float> %vceq.i, <1 x float> zeroinitializer)
+  %0 = extractelement <1 x i32> %vceq1.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vceqzd_f64(double %a) {
+; CHECK: test_vceqzd_f64
+; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, #0.0
+entry:
+  %vceq.i = insertelement <1 x double> undef, double %a, i32 0
+  %vceq1.i = tail call <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1f64.v1f32(<1 x double> %vceq.i, <1 x float> zeroinitializer) #5
+  %0 = extractelement <1 x i64> %vceq1.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vcges_f32(float %a, float %b) {
+; CHECK: test_vcges_f32
+; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcge.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcge1.i = insertelement <1 x float> undef, float %b, i32 0
+  %vcge2.i = call <1 x i32> @llvm.aarch64.neon.vcge.v1i32.v1f32.v1f32(<1 x float> %vcge.i, <1 x float> %vcge1.i)
+  %0 = extractelement <1 x i32> %vcge2.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vcged_f64(double %a, double %b) {
+; CHECK: test_vcged_f64
+; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcge.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcge1.i = insertelement <1 x double> undef, double %b, i32 0
+  %vcge2.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1f64.v1f64(<1 x double> %vcge.i, <1 x double> %vcge1.i)
+  %0 = extractelement <1 x i64> %vcge2.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vcgezs_f32(float %a) {
+; CHECK: test_vcgezs_f32
+; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, #0.0
+entry:
+  %vcge.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcge1.i = call <1 x i32> @llvm.aarch64.neon.vcge.v1i32.v1f32.v1f32(<1 x float> %vcge.i, <1 x float> zeroinitializer)
+  %0 = extractelement <1 x i32> %vcge1.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vcgezd_f64(double %a) {
+; CHECK: test_vcgezd_f64
+; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, #0.0
+entry:
+  %vcge.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcge1.i = tail call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1f64.v1f32(<1 x double> %vcge.i, <1 x float> zeroinitializer) #5
+  %0 = extractelement <1 x i64> %vcge1.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vcgts_f32(float %a, float %b) {
+; CHECK: test_vcgts_f32
+; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcgt.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcgt1.i = insertelement <1 x float> undef, float %b, i32 0
+  %vcgt2.i = call <1 x i32> @llvm.aarch64.neon.vcgt.v1i32.v1f32.v1f32(<1 x float> %vcgt.i, <1 x float> %vcgt1.i)
+  %0 = extractelement <1 x i32> %vcgt2.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vcgtd_f64(double %a, double %b) {
+; CHECK: test_vcgtd_f64
+; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcgt.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcgt1.i = insertelement <1 x double> undef, double %b, i32 0
+  %vcgt2.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1f64.v1f64(<1 x double> %vcgt.i, <1 x double> %vcgt1.i)
+  %0 = extractelement <1 x i64> %vcgt2.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vcgtzs_f32(float %a) {
+; CHECK: test_vcgtzs_f32
+; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, #0.0
+entry:
+  %vcgt.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcgt1.i = call <1 x i32> @llvm.aarch64.neon.vcgt.v1i32.v1f32.v1f32(<1 x float> %vcgt.i, <1 x float> zeroinitializer)
+  %0 = extractelement <1 x i32> %vcgt1.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vcgtzd_f64(double %a) {
+; CHECK: test_vcgtzd_f64
+; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, #0.0
+entry:
+  %vcgt.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcgt1.i = tail call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1f64.v1f32(<1 x double> %vcgt.i, <1 x float> zeroinitializer) #5
+  %0 = extractelement <1 x i64> %vcgt1.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vcles_f32(float %a, float %b) {
+; CHECK: test_vcles_f32
+; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcge.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcge1.i = insertelement <1 x float> undef, float %b, i32 0
+  %vcge2.i = call <1 x i32> @llvm.aarch64.neon.vcge.v1i32.v1f32.v1f32(<1 x float> %vcge.i, <1 x float> %vcge1.i)
+  %0 = extractelement <1 x i32> %vcge2.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vcled_f64(double %a, double %b) {
+; CHECK: test_vcled_f64
+; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcge.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcge1.i = insertelement <1 x double> undef, double %b, i32 0
+  %vcge2.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1f64.v1f64(<1 x double> %vcge.i, <1 x double> %vcge1.i)
+  %0 = extractelement <1 x i64> %vcge2.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vclezs_f32(float %a) {
+; CHECK: test_vclezs_f32
+; CHECK: fcmle {{s[0-9]}}, {{s[0-9]}}, #0.0
+entry:
+  %vcle.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcle1.i = call <1 x i32> @llvm.aarch64.neon.vclez.v1i32.v1f32.v1f32(<1 x float> %vcle.i, <1 x float> zeroinitializer)
+  %0 = extractelement <1 x i32> %vcle1.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vclezd_f64(double %a) {
+; CHECK: test_vclezd_f64
+; CHECK: fcmle {{d[0-9]}}, {{d[0-9]}}, #0.0
+entry:
+  %vcle.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcle1.i = tail call <1 x i64> @llvm.aarch64.neon.vclez.v1i64.v1f64.v1f32(<1 x double> %vcle.i, <1 x float> zeroinitializer) #5
+  %0 = extractelement <1 x i64> %vcle1.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vclts_f32(float %a, float %b) {
+; CHECK: test_vclts_f32
+; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcgt.i = insertelement <1 x float> undef, float %b, i32 0
+  %vcgt1.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcgt2.i = call <1 x i32> @llvm.aarch64.neon.vcgt.v1i32.v1f32.v1f32(<1 x float> %vcgt.i, <1 x float> %vcgt1.i)
+  %0 = extractelement <1 x i32> %vcgt2.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vcltd_f64(double %a, double %b) {
+; CHECK: test_vcltd_f64
+; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcgt.i = insertelement <1 x double> undef, double %b, i32 0
+  %vcgt1.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcgt2.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1f64.v1f64(<1 x double> %vcgt.i, <1 x double> %vcgt1.i)
+  %0 = extractelement <1 x i64> %vcgt2.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vcltzs_f32(float %a) {
+; CHECK: test_vcltzs_f32
+; CHECK: fcmlt {{s[0-9]}}, {{s[0-9]}}, #0.0
+entry:
+  %vclt.i = insertelement <1 x float> undef, float %a, i32 0
+  %vclt1.i = call <1 x i32> @llvm.aarch64.neon.vcltz.v1i32.v1f32.v1f32(<1 x float> %vclt.i, <1 x float> zeroinitializer)
+  %0 = extractelement <1 x i32> %vclt1.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vcltzd_f64(double %a) {
+; CHECK: test_vcltzd_f64
+; CHECK: fcmlt {{d[0-9]}}, {{d[0-9]}}, #0.0
+entry:
+  %vclt.i = insertelement <1 x double> undef, double %a, i32 0
+  %vclt1.i = tail call <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1f64.v1f32(<1 x double> %vclt.i, <1 x float> zeroinitializer) #5
+  %0 = extractelement <1 x i64> %vclt1.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vcages_f32(float %a, float %b) {
+; CHECK: test_vcages_f32
+; CHECK: facge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcage.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcage1.i = insertelement <1 x float> undef, float %b, i32 0
+  %vcage2.i = call <1 x i32> @llvm.aarch64.neon.vcage.v1i32.v1f32.v1f32(<1 x float> %vcage.i, <1 x float> %vcage1.i)
+  %0 = extractelement <1 x i32> %vcage2.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vcaged_f64(double %a, double %b) {
+; CHECK: test_vcaged_f64
+; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcage.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcage1.i = insertelement <1 x double> undef, double %b, i32 0
+  %vcage2.i = call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %vcage.i, <1 x double> %vcage1.i)
+  %0 = extractelement <1 x i64> %vcage2.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vcagts_f32(float %a, float %b) {
+; CHECK: test_vcagts_f32
+; CHECK: facgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcagt.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcagt1.i = insertelement <1 x float> undef, float %b, i32 0
+  %vcagt2.i = call <1 x i32> @llvm.aarch64.neon.vcagt.v1i32.v1f32.v1f32(<1 x float> %vcagt.i, <1 x float> %vcagt1.i)
+  %0 = extractelement <1 x i32> %vcagt2.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vcagtd_f64(double %a, double %b) {
+; CHECK: test_vcagtd_f64
+; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcagt.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcagt1.i = insertelement <1 x double> undef, double %b, i32 0
+  %vcagt2.i = call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %vcagt.i, <1 x double> %vcagt1.i)
+  %0 = extractelement <1 x i64> %vcagt2.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vcales_f32(float %a, float %b) {
+; CHECK: test_vcales_f32
+; CHECK: facge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcage.i = insertelement <1 x float> undef, float %b, i32 0
+  %vcage1.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcage2.i = call <1 x i32> @llvm.aarch64.neon.vcage.v1i32.v1f32.v1f32(<1 x float> %vcage.i, <1 x float> %vcage1.i)
+  %0 = extractelement <1 x i32> %vcage2.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vcaled_f64(double %a, double %b) {
+; CHECK: test_vcaled_f64
+; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcage.i = insertelement <1 x double> undef, double %b, i32 0
+  %vcage1.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcage2.i = call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %vcage.i, <1 x double> %vcage1.i)
+  %0 = extractelement <1 x i64> %vcage2.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vcalts_f32(float %a, float %b) {
+; CHECK: test_vcalts_f32
+; CHECK: facgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcalt.i = insertelement <1 x float> undef, float %b, i32 0
+  %vcalt1.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcalt2.i = call <1 x i32> @llvm.aarch64.neon.vcagt.v1i32.v1f32.v1f32(<1 x float> %vcalt.i, <1 x float> %vcalt1.i)
+  %0 = extractelement <1 x i32> %vcalt2.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vcaltd_f64(double %a, double %b) {
+; CHECK: test_vcaltd_f64
+; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcalt.i = insertelement <1 x double> undef, double %b, i32 0
+  %vcalt1.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcalt2.i = call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %vcalt.i, <1 x double> %vcalt1.i)
+  %0 = extractelement <1 x i64> %vcalt2.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vceq.v1i32.v1f32.v1f32(<1 x float>, <1 x float>)
+declare <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1f64.v1f32(<1 x double>, <1 x float>)
+declare <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1f64.v1f64(<1 x double>, <1 x double>)
+declare <1 x i32> @llvm.aarch64.neon.vcge.v1i32.v1f32.v1f32(<1 x float>, <1 x float>)
+declare <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1f64.v1f32(<1 x double>, <1 x float>)
+declare <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1f64.v1f64(<1 x double>, <1 x double>)
+declare <1 x i32> @llvm.aarch64.neon.vclez.v1i32.v1f32.v1f32(<1 x float>, <1 x float>)
+declare <1 x i64> @llvm.aarch64.neon.vclez.v1i64.v1f64.v1f32(<1 x double>, <1 x float>)
+declare <1 x i32> @llvm.aarch64.neon.vcgt.v1i32.v1f32.v1f32(<1 x float>, <1 x float>)
+declare <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1f64.v1f32(<1 x double>, <1 x float>)
+declare <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1f64.v1f64(<1 x double>, <1 x double>)
+declare <1 x i32> @llvm.aarch64.neon.vcltz.v1i32.v1f32.v1f32(<1 x float>, <1 x float>)
+declare <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1f64.v1f32(<1 x double>, <1 x float>)
+declare <1 x i32> @llvm.aarch64.neon.vcage.v1i32.v1f32.v1f32(<1 x float>, <1 x float>)
+declare <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double>, <1 x double>)
+declare <1 x i32> @llvm.aarch64.neon.vcagt.v1i32.v1f32.v1f32(<1 x float>, <1 x float>)
+declare <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double>, <1 x double>)
diff --git a/test/CodeGen/AArch64/neon-scalar-mul.ll b/test/CodeGen/AArch64/neon-scalar-mul.ll
new file mode 100644
index 0000000..991037f
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-mul.ll
@@ -0,0 +1,143 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+define i16 @test_vqdmulhh_s16(i16 %a, i16 %b) {
+; CHECK: test_vqdmulhh_s16
+; CHECK: sqdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+  %1 = insertelement <1 x i16> undef, i16 %a, i32 0
+  %2 = insertelement <1 x i16> undef, i16 %b, i32 0
+  %3 = call <1 x i16> @llvm.arm.neon.vqdmulh.v1i16(<1 x i16> %1, <1 x i16> %2)
+  %4 = extractelement <1 x i16> %3, i32 0
+  ret i16 %4
+}
+
+define i32 @test_vqdmulhs_s32(i32 %a, i32 %b) {
+; CHECK: test_vqdmulhs_s32
+; CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  %1 = insertelement <1 x i32> undef, i32 %a, i32 0
+  %2 = insertelement <1 x i32> undef, i32 %b, i32 0
+  %3 = call <1 x i32> @llvm.arm.neon.vqdmulh.v1i32(<1 x i32> %1, <1 x i32> %2)
+  %4 = extractelement <1 x i32> %3, i32 0
+  ret i32 %4
+}
+
+declare <1 x i16> @llvm.arm.neon.vqdmulh.v1i16(<1 x i16>, <1 x i16>)
+declare <1 x i32> @llvm.arm.neon.vqdmulh.v1i32(<1 x i32>, <1 x i32>)
+
+define i16 @test_vqrdmulhh_s16(i16 %a, i16 %b) {
+; CHECK: test_vqrdmulhh_s16
+; CHECK: sqrdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+  %1 = insertelement <1 x i16> undef, i16 %a, i32 0
+  %2 = insertelement <1 x i16> undef, i16 %b, i32 0
+  %3 = call <1 x i16> @llvm.arm.neon.vqrdmulh.v1i16(<1 x i16> %1, <1 x i16> %2)
+  %4 = extractelement <1 x i16> %3, i32 0
+  ret i16 %4
+}
+
+define i32 @test_vqrdmulhs_s32(i32 %a, i32 %b) {
+; CHECK: test_vqrdmulhs_s32
+; CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  %1 = insertelement <1 x i32> undef, i32 %a, i32 0
+  %2 = insertelement <1 x i32> undef, i32 %b, i32 0
+  %3 = call <1 x i32> @llvm.arm.neon.vqrdmulh.v1i32(<1 x i32> %1, <1 x i32> %2)
+  %4 = extractelement <1 x i32> %3, i32 0
+  ret i32 %4
+}
+
+declare <1 x i16> @llvm.arm.neon.vqrdmulh.v1i16(<1 x i16>, <1 x i16>)
+declare <1 x i32> @llvm.arm.neon.vqrdmulh.v1i32(<1 x i32>, <1 x i32>)
+
+define float @test_vmulxs_f32(float %a, float %b) {
+; CHECK: test_vmulxs_f32
+; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  %1 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %b)
+  ret float %1
+}
+
+define double @test_vmulxd_f64(double %a, double %b) {
+; CHECK: test_vmulxd_f64
+; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  %1 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %b)
+  ret double %1
+}
+
+declare float @llvm.aarch64.neon.vmulx.f32(float, float)
+declare double @llvm.aarch64.neon.vmulx.f64(double, double)
+
+define i32 @test_vqdmlalh_s16(i32 %a, i16 %b, i16 %c) {
+; CHECK: test_vqdmlalh_s16
+; CHECK: sqdmlal {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+entry:
+  %vqdmlal.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vqdmlal1.i = insertelement <1 x i16> undef, i16 %b, i32 0
+  %vqdmlal2.i = insertelement <1 x i16> undef, i16 %c, i32 0
+  %vqdmlal3.i = call <1 x i32> @llvm.aarch64.neon.vqdmlal.v1i32(<1 x i32> %vqdmlal.i, <1 x i16> %vqdmlal1.i, <1 x i16> %vqdmlal2.i)
+  %0 = extractelement <1 x i32> %vqdmlal3.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vqdmlals_s32(i64 %a, i32 %b, i32 %c) {
+; CHECK: test_vqdmlals_s32
+; CHECK: sqdmlal {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vqdmlal.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vqdmlal1.i = insertelement <1 x i32> undef, i32 %b, i32 0
+  %vqdmlal2.i = insertelement <1 x i32> undef, i32 %c, i32 0
+  %vqdmlal3.i = call <1 x i64> @llvm.aarch64.neon.vqdmlal.v1i64(<1 x i64> %vqdmlal.i, <1 x i32> %vqdmlal1.i, <1 x i32> %vqdmlal2.i)
+  %0 = extractelement <1 x i64> %vqdmlal3.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vqdmlal.v1i32(<1 x i32>, <1 x i16>, <1 x i16>)
+declare <1 x i64> @llvm.aarch64.neon.vqdmlal.v1i64(<1 x i64>, <1 x i32>, <1 x i32>)
+
+define i32 @test_vqdmlslh_s16(i32 %a, i16 %b, i16 %c) {
+; CHECK: test_vqdmlslh_s16
+; CHECK: sqdmlsl {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+entry:
+  %vqdmlsl.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vqdmlsl1.i = insertelement <1 x i16> undef, i16 %b, i32 0
+  %vqdmlsl2.i = insertelement <1 x i16> undef, i16 %c, i32 0
+  %vqdmlsl3.i = call <1 x i32> @llvm.aarch64.neon.vqdmlsl.v1i32(<1 x i32> %vqdmlsl.i, <1 x i16> %vqdmlsl1.i, <1 x i16> %vqdmlsl2.i)
+  %0 = extractelement <1 x i32> %vqdmlsl3.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vqdmlsls_s32(i64 %a, i32 %b, i32 %c) {
+; CHECK: test_vqdmlsls_s32
+; CHECK: sqdmlsl {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vqdmlsl.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vqdmlsl1.i = insertelement <1 x i32> undef, i32 %b, i32 0
+  %vqdmlsl2.i = insertelement <1 x i32> undef, i32 %c, i32 0
+  %vqdmlsl3.i = call <1 x i64> @llvm.aarch64.neon.vqdmlsl.v1i64(<1 x i64> %vqdmlsl.i, <1 x i32> %vqdmlsl1.i, <1 x i32> %vqdmlsl2.i)
+  %0 = extractelement <1 x i64> %vqdmlsl3.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vqdmlsl.v1i32(<1 x i32>, <1 x i16>, <1 x i16>)
+declare <1 x i64> @llvm.aarch64.neon.vqdmlsl.v1i64(<1 x i64>, <1 x i32>, <1 x i32>)
+
+define i32 @test_vqdmullh_s16(i16 %a, i16 %b) {
+; CHECK: test_vqdmullh_s16
+; CHECK: sqdmull {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+entry:
+  %vqdmull.i = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vqdmull1.i = insertelement <1 x i16> undef, i16 %b, i32 0
+  %vqdmull2.i = call <1 x i32> @llvm.arm.neon.vqdmull.v1i32(<1 x i16> %vqdmull.i, <1 x i16> %vqdmull1.i)
+  %0 = extractelement <1 x i32> %vqdmull2.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vqdmulls_s32(i32 %a, i32 %b) {
+; CHECK: test_vqdmulls_s32
+; CHECK: sqdmull {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vqdmull.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vqdmull1.i = insertelement <1 x i32> undef, i32 %b, i32 0
+  %vqdmull2.i = call <1 x i64> @llvm.arm.neon.vqdmull.v1i64(<1 x i32> %vqdmull.i, <1 x i32> %vqdmull1.i)
+  %0 = extractelement <1 x i64> %vqdmull2.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i32> @llvm.arm.neon.vqdmull.v1i32(<1 x i16>, <1 x i16>)
+declare <1 x i64> @llvm.arm.neon.vqdmull.v1i64(<1 x i32>, <1 x i32>)
diff --git a/test/CodeGen/AArch64/neon-scalar-neg.ll b/test/CodeGen/AArch64/neon-scalar-neg.ll
new file mode 100644
index 0000000..4dc9d51
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-neg.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define i64 @test_vnegd_s64(i64 %a) {
+; CHECK: test_vnegd_s64
+; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vneg.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vneg1.i = tail call <1 x i64> @llvm.aarch64.neon.vneg(<1 x i64> %vneg.i)
+  %0 = extractelement <1 x i64> %vneg1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vneg(<1 x i64>)
+
+define i8 @test_vqnegb_s8(i8 %a) {
+; CHECK: test_vqnegb_s8
+; CHECK: sqneg {{b[0-9]+}}, {{b[0-9]+}}
+entry:
+  %vqneg.i = insertelement <1 x i8> undef, i8 %a, i32 0
+  %vqneg1.i = call <1 x i8> @llvm.arm.neon.vqneg.v1i8(<1 x i8> %vqneg.i)
+  %0 = extractelement <1 x i8> %vqneg1.i, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.arm.neon.vqneg.v1i8(<1 x i8>)
+
+define i16 @test_vqnegh_s16(i16 %a) {
+; CHECK: test_vqnegh_s16
+; CHECK: sqneg {{h[0-9]+}}, {{h[0-9]+}}
+entry:
+  %vqneg.i = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vqneg1.i = call <1 x i16> @llvm.arm.neon.vqneg.v1i16(<1 x i16> %vqneg.i)
+  %0 = extractelement <1 x i16> %vqneg1.i, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.arm.neon.vqneg.v1i16(<1 x i16>)
+
+define i32 @test_vqnegs_s32(i32 %a) {
+; CHECK: test_vqnegs_s32
+; CHECK: sqneg {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vqneg.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vqneg1.i = call <1 x i32> @llvm.arm.neon.vqneg.v1i32(<1 x i32> %vqneg.i)
+  %0 = extractelement <1 x i32> %vqneg1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.arm.neon.vqneg.v1i32(<1 x i32>)
+
+define i64 @test_vqnegd_s64(i64 %a) {
+; CHECK: test_vqnegd_s64
+; CHECK: sqneg {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vqneg.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vqneg1.i = call <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64> %vqneg.i)
+  %0 = extractelement <1 x i64> %vqneg1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64>)
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-scalar-recip.ll b/test/CodeGen/AArch64/neon-scalar-recip.ll
new file mode 100644
index 0000000..f21c27b
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-recip.ll
@@ -0,0 +1,116 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+define float @test_vrecpss_f32(float %a, float %b) {
+; CHECK: test_vrecpss_f32
+; CHECK: frecps {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  %1 = insertelement <1 x float> undef, float %a, i32 0
+  %2 = insertelement <1 x float> undef, float %b, i32 0
+  %3 = call <1 x float> @llvm.arm.neon.vrecps.v1f32(<1 x float> %1, <1 x float> %2)
+  %4 = extractelement <1 x float> %3, i32 0
+  ret float %4
+}
+
+define double @test_vrecpsd_f64(double %a, double %b) {
+; CHECK: test_vrecpsd_f64
+; CHECK: frecps {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  %1 = insertelement <1 x double> undef, double %a, i32 0
+  %2 = insertelement <1 x double> undef, double %b, i32 0
+  %3 = call <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double> %1, <1 x double> %2)
+  %4 = extractelement <1 x double> %3, i32 0
+  ret double %4
+}
+
+declare <1 x float> @llvm.arm.neon.vrecps.v1f32(<1 x float>, <1 x float>)
+declare <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double>, <1 x double>)
+
+define float @test_vrsqrtss_f32(float %a, float %b) {
+; CHECK: test_vrsqrtss_f32
+; CHECK: frsqrts {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  %1 = insertelement <1 x float> undef, float %a, i32 0
+  %2 = insertelement <1 x float> undef, float %b, i32 0
+  %3 = call <1 x float> @llvm.arm.neon.vrsqrts.v1f32(<1 x float> %1, <1 x float> %2)
+  %4 = extractelement <1 x float> %3, i32 0
+  ret float %4
+}
+
+define double @test_vrsqrtsd_f64(double %a, double %b) {
+; CHECK: test_vrsqrtsd_f64
+; CHECK: frsqrts {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  %1 = insertelement <1 x double> undef, double %a, i32 0
+  %2 = insertelement <1 x double> undef, double %b, i32 0
+  %3 = call <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double> %1, <1 x double> %2)
+  %4 = extractelement <1 x double> %3, i32 0
+  ret double %4
+}
+
+declare <1 x float> @llvm.arm.neon.vrsqrts.v1f32(<1 x float>, <1 x float>)
+declare <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double>, <1 x double>)
+
+define float @test_vrecpes_f32(float %a) {
+; CHECK: test_vrecpes_f32
+; CHECK: frecpe {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vrecpe.i = insertelement <1 x float> undef, float %a, i32 0
+  %vrecpe1.i = tail call <1 x float> @llvm.arm.neon.vrecpe.v1f32(<1 x float> %vrecpe.i)
+  %0 = extractelement <1 x float> %vrecpe1.i, i32 0
+  ret float %0
+}
+
+define double @test_vrecped_f64(double %a) {
+; CHECK: test_vrecped_f64
+; CHECK: frecpe {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vrecpe.i = insertelement <1 x double> undef, double %a, i32 0
+  %vrecpe1.i = tail call <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double> %vrecpe.i)
+  %0 = extractelement <1 x double> %vrecpe1.i, i32 0
+  ret double %0
+}
+
+declare <1 x float> @llvm.arm.neon.vrecpe.v1f32(<1 x float>)
+declare <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double>)
+
+define float @test_vrecpxs_f32(float %a) {
+; CHECK: test_vrecpxs_f32
+; CHECK: frecpx {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vrecpx.i = insertelement <1 x float> undef, float %a, i32 0
+  %vrecpx1.i = tail call <1 x float> @llvm.aarch64.neon.vrecpx.v1f32(<1 x float> %vrecpx.i)
+  %0 = extractelement <1 x float> %vrecpx1.i, i32 0
+  ret float %0
+}
+
+define double @test_vrecpxd_f64(double %a) {
+; CHECK: test_vrecpxd_f64
+; CHECK: frecpx {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vrecpx.i = insertelement <1 x double> undef, double %a, i32 0
+  %vrecpx1.i = tail call <1 x double> @llvm.aarch64.neon.vrecpx.v1f64(<1 x double> %vrecpx.i)
+  %0 = extractelement <1 x double> %vrecpx1.i, i32 0
+  ret double %0
+}
+
+declare <1 x float> @llvm.aarch64.neon.vrecpx.v1f32(<1 x float>)
+declare <1 x double> @llvm.aarch64.neon.vrecpx.v1f64(<1 x double>)
+
+define float @test_vrsqrtes_f32(float %a) {
+; CHECK: test_vrsqrtes_f32
+; CHECK: frsqrte {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vrsqrte.i = insertelement <1 x float> undef, float %a, i32 0
+  %vrsqrte1.i = tail call <1 x float> @llvm.arm.neon.vrsqrte.v1f32(<1 x float> %vrsqrte.i)
+  %0 = extractelement <1 x float> %vrsqrte1.i, i32 0
+  ret float %0
+}
+
+define double @test_vrsqrted_f64(double %a) {
+; CHECK: test_vrsqrted_f64
+; CHECK: frsqrte {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vrsqrte.i = insertelement <1 x double> undef, double %a, i32 0
+  %vrsqrte1.i = tail call <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double> %vrsqrte.i)
+  %0 = extractelement <1 x double> %vrsqrte1.i, i32 0
+  ret double %0
+}
+
+declare <1 x float> @llvm.arm.neon.vrsqrte.v1f32(<1 x float>)
+declare <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double>)
diff --git a/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll b/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll
new file mode 100644
index 0000000..80e8dc3
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll
@@ -0,0 +1,247 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64>)
+
+define <1 x i64> @test_addp_v1i64(<2 x i64> %a) {
+; CHECK: test_addp_v1i64:
+        %val = call <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64> %a)
+; CHECK: addp d0, v0.2d
+        ret <1 x i64> %val
+}
+
+declare <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float>)
+
+define <1 x float> @test_faddp_v1f32(<2 x float> %a) {
+; CHECK: test_faddp_v1f32:
+        %val = call <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float> %a)
+; CHECK: faddp s0, v0.2s
+        ret <1 x float> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double>)
+
+define <1 x double> @test_faddp_v1f64(<2 x double> %a) {
+; CHECK: test_faddp_v1f64:
+        %val = call <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double> %a)
+; CHECK: faddp d0, v0.2d
+        ret <1 x double> %val
+}
+
+
+declare <1 x float> @llvm.aarch64.neon.vpmax(<2 x float>)
+
+define <1 x float> @test_fmaxp_v1f32(<2 x float> %a) {
+; CHECK: test_fmaxp_v1f32:
+        %val = call <1 x float> @llvm.aarch64.neon.vpmax(<2 x float> %a)
+; CHECK: fmaxp s0, v0.2s
+        ret <1 x float> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double>)
+
+define <1 x double> @test_fmaxp_v1f64(<2 x double> %a) {
+; CHECK: test_fmaxp_v1f64:
+        %val = call <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double> %a)
+; CHECK: fmaxp d0, v0.2d
+        ret <1 x double> %val
+}
+
+
+declare <1 x float> @llvm.aarch64.neon.vpmin(<2 x float>)
+
+define <1 x float> @test_fminp_v1f32(<2 x float> %a) {
+; CHECK: test_fminp_v1f32:
+        %val = call <1 x float> @llvm.aarch64.neon.vpmin(<2 x float> %a)
+; CHECK: fminp s0, v0.2s
+        ret <1 x float> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.vpminq(<2 x double>)
+
+define <1 x double> @test_fminp_v1f64(<2 x double> %a) {
+; CHECK: test_fminp_v1f64:
+        %val = call <1 x double> @llvm.aarch64.neon.vpminq(<2 x double> %a)
+; CHECK: fminp d0, v0.2d
+        ret <1 x double> %val
+}
+
+declare <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float>)
+
+define <1 x float> @test_fmaxnmp_v1f32(<2 x float> %a) {
+; CHECK: test_fmaxnmp_v1f32:
+        %val = call <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float> %a)
+; CHECK: fmaxnmp s0, v0.2s
+        ret <1 x float> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double>)
+
+define <1 x double> @test_fmaxnmp_v1f64(<2 x double> %a) {
+; CHECK: test_fmaxnmp_v1f64:
+        %val = call <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double> %a)
+; CHECK: fmaxnmp d0, v0.2d
+        ret <1 x double> %val
+}
+
+declare <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float>)
+
+define <1 x float> @test_fminnmp_v1f32(<2 x float> %a) {
+; CHECK: test_fminnmp_v1f32:
+        %val = call <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float> %a)
+; CHECK: fminnmp s0, v0.2s
+        ret <1 x float> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double>)
+
+define <1 x double> @test_fminnmp_v1f64(<2 x double> %a) {
+; CHECK: test_fminnmp_v1f64:
+        %val = call <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double> %a)
+; CHECK: fminnmp d0, v0.2d
+        ret <1 x double> %val
+}
+
+define float @test_vaddv_f32(<2 x float> %a) {
+; CHECK-LABEL: test_vaddv_f32
+; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s
+  %1 = tail call <1 x float> @llvm.aarch64.neon.vaddv.v1f32.v2f32(<2 x float> %a)
+  %2 = extractelement <1 x float> %1, i32 0
+  ret float %2
+}
+
+define float @test_vaddvq_f32(<4 x float> %a) {
+; CHECK-LABEL: test_vaddvq_f32
+; CHECK: faddp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s
+  %1 = tail call <1 x float> @llvm.aarch64.neon.vaddv.v1f32.v4f32(<4 x float> %a)
+  %2 = extractelement <1 x float> %1, i32 0
+  ret float %2
+}
+
+define double @test_vaddvq_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vaddvq_f64
+; CHECK: faddp {{d[0-9]+}}, {{v[0-9]+}}.2d
+  %1 = tail call <1 x double> @llvm.aarch64.neon.vaddv.v1f64.v2f64(<2 x double> %a)
+  %2 = extractelement <1 x double> %1, i32 0
+  ret double %2
+}
+
+define float @test_vmaxv_f32(<2 x float> %a) {
+; CHECK-LABEL: test_vmaxv_f32
+; CHECK: fmaxp {{s[0-9]+}}, {{v[0-9]+}}.2s
+  %1 = tail call <1 x float> @llvm.aarch64.neon.vmaxv.v1f32.v2f32(<2 x float> %a)
+  %2 = extractelement <1 x float> %1, i32 0
+  ret float %2
+}
+
+define double @test_vmaxvq_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vmaxvq_f64
+; CHECK: fmaxp {{d[0-9]+}}, {{v[0-9]+}}.2d
+  %1 = tail call <1 x double> @llvm.aarch64.neon.vmaxv.v1f64.v2f64(<2 x double> %a)
+  %2 = extractelement <1 x double> %1, i32 0
+  ret double %2
+}
+
+define float @test_vminv_f32(<2 x float> %a) {
+; CHECK-LABEL: test_vminv_f32
+; CHECK: fminp {{s[0-9]+}}, {{v[0-9]+}}.2s
+  %1 = tail call <1 x float> @llvm.aarch64.neon.vminv.v1f32.v2f32(<2 x float> %a)
+  %2 = extractelement <1 x float> %1, i32 0
+  ret float %2
+}
+
+define double @test_vminvq_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vminvq_f64
+; CHECK: fminp {{d[0-9]+}}, {{v[0-9]+}}.2d
+  %1 = tail call <1 x double> @llvm.aarch64.neon.vminv.v1f64.v2f64(<2 x double> %a)
+  %2 = extractelement <1 x double> %1, i32 0
+  ret double %2
+}
+
+define double @test_vmaxnmvq_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vmaxnmvq_f64
+; CHECK: fmaxnmp {{d[0-9]+}}, {{v[0-9]+}}.2d
+  %1 = tail call <1 x double> @llvm.aarch64.neon.vmaxnmv.v1f64.v2f64(<2 x double> %a)
+  %2 = extractelement <1 x double> %1, i32 0
+  ret double %2
+}
+
+define float @test_vmaxnmv_f32(<2 x float> %a) {
+; CHECK-LABEL: test_vmaxnmv_f32
+; CHECK: fmaxnmp {{s[0-9]+}}, {{v[0-9]+}}.2s
+  %1 = tail call <1 x float> @llvm.aarch64.neon.vmaxnmv.v1f32.v2f32(<2 x float> %a)
+  %2 = extractelement <1 x float> %1, i32 0
+  ret float %2
+}
+
+define double @test_vminnmvq_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vminnmvq_f64
+; CHECK: fminnmp {{d[0-9]+}}, {{v[0-9]+}}.2d
+  %1 = tail call <1 x double> @llvm.aarch64.neon.vminnmv.v1f64.v2f64(<2 x double> %a)
+  %2 = extractelement <1 x double> %1, i32 0
+  ret double %2
+}
+
+define float @test_vminnmv_f32(<2 x float> %a) {
+; CHECK-LABEL: test_vminnmv_f32
+; CHECK: fminnmp {{s[0-9]+}}, {{v[0-9]+}}.2s
+  %1 = tail call <1 x float> @llvm.aarch64.neon.vminnmv.v1f32.v2f32(<2 x float> %a)
+  %2 = extractelement <1 x float> %1, i32 0
+  ret float %2
+}
+
+define <2 x i64> @test_vpaddq_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vpaddq_s64
+; CHECK: addp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+  %1 = tail call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_vpaddq_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vpaddq_u64
+; CHECK: addp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+  %1 = tail call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %1
+}
+
+define i64 @test_vaddvq_s64(<2 x i64> %a) {
+; CHECK-LABEL: test_vaddvq_s64
+; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64> %a)
+  %2 = extractelement <1 x i64> %1, i32 0
+  ret i64 %2
+}
+
+define i64 @test_vaddvq_u64(<2 x i64> %a) {
+; CHECK-LABEL: test_vaddvq_u64
+; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64> %a)
+  %2 = extractelement <1 x i64> %1, i32 0
+  ret i64 %2
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64>)
+
+declare <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64>, <2 x i64>)
+
+declare <1 x float> @llvm.aarch64.neon.vminnmv.v1f32.v2f32(<2 x float>)
+
+declare <1 x double> @llvm.aarch64.neon.vminnmv.v1f64.v2f64(<2 x double>)
+
+declare <1 x float> @llvm.aarch64.neon.vmaxnmv.v1f32.v2f32(<2 x float>)
+
+declare <1 x double> @llvm.aarch64.neon.vmaxnmv.v1f64.v2f64(<2 x double>)
+
+declare <1 x double> @llvm.aarch64.neon.vminv.v1f64.v2f64(<2 x double>)
+
+declare <1 x float> @llvm.aarch64.neon.vminv.v1f32.v2f32(<2 x float>)
+
+declare <1 x double> @llvm.aarch64.neon.vmaxv.v1f64.v2f64(<2 x double>)
+
+declare <1 x float> @llvm.aarch64.neon.vmaxv.v1f32.v2f32(<2 x float>)
+
+declare <1 x double> @llvm.aarch64.neon.vaddv.v1f64.v2f64(<2 x double>)
+
+declare <1 x float> @llvm.aarch64.neon.vaddv.v1f32.v4f32(<4 x float>)
+
+declare <1 x float> @llvm.aarch64.neon.vaddv.v1f32.v2f32(<2 x float>)
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll b/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll
new file mode 100644
index 0000000..83ceb4e
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+
+declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_urshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_urshl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: urshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_srshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_srshl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: srshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_urshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_urshl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: urshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_srshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_srshl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: srshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+
+
diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll b/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll
new file mode 100644
index 0000000..bd66f80
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll
@@ -0,0 +1,242 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <1 x i8> @llvm.arm.neon.vqaddu.v1i8(<1 x i8>, <1 x i8>)
+declare <1 x i8> @llvm.arm.neon.vqadds.v1i8(<1 x i8>, <1 x i8>)
+
+define <1 x i8> @test_uqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_uqadd_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.arm.neon.vqaddu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: uqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+  ret <1 x i8> %tmp1
+}
+
+define <1 x i8> @test_sqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_sqadd_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.arm.neon.vqadds.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: sqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+  ret <1 x i8> %tmp1
+}
+
+declare <1 x i8> @llvm.arm.neon.vqsubu.v1i8(<1 x i8>, <1 x i8>)
+declare <1 x i8> @llvm.arm.neon.vqsubs.v1i8(<1 x i8>, <1 x i8>)
+
+define <1 x i8> @test_uqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_uqsub_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.arm.neon.vqsubu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: uqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+  ret <1 x i8> %tmp1
+}
+
+define <1 x i8> @test_sqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_sqsub_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.arm.neon.vqsubs.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: sqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+  ret <1 x i8> %tmp1
+}
+
+declare <1 x i16> @llvm.arm.neon.vqaddu.v1i16(<1 x i16>, <1 x i16>)
+declare <1 x i16> @llvm.arm.neon.vqadds.v1i16(<1 x i16>, <1 x i16>)
+
+define <1 x i16> @test_uqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_uqadd_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.arm.neon.vqaddu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: uqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+  ret <1 x i16> %tmp1
+}
+
+define <1 x i16> @test_sqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_sqadd_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.arm.neon.vqadds.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: sqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+  ret <1 x i16> %tmp1
+}
+
+declare <1 x i16> @llvm.arm.neon.vqsubu.v1i16(<1 x i16>, <1 x i16>)
+declare <1 x i16> @llvm.arm.neon.vqsubs.v1i16(<1 x i16>, <1 x i16>)
+
+define <1 x i16> @test_uqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_uqsub_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.arm.neon.vqsubu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: uqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+  ret <1 x i16> %tmp1
+}
+
+define <1 x i16> @test_sqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_sqsub_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.arm.neon.vqsubs.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: sqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+  ret <1 x i16> %tmp1
+}
+
+declare <1 x i32> @llvm.arm.neon.vqaddu.v1i32(<1 x i32>, <1 x i32>)
+declare <1 x i32> @llvm.arm.neon.vqadds.v1i32(<1 x i32>, <1 x i32>)
+
+define <1 x i32> @test_uqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_uqadd_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.arm.neon.vqaddu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: uqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+  ret <1 x i32> %tmp1
+}
+
+define <1 x i32> @test_sqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_sqadd_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.arm.neon.vqadds.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: sqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+  ret <1 x i32> %tmp1
+}
+
+declare <1 x i32> @llvm.arm.neon.vqsubu.v1i32(<1 x i32>, <1 x i32>)
+declare <1 x i32> @llvm.arm.neon.vqsubs.v1i32(<1 x i32>, <1 x i32>)
+
+define <1 x i32> @test_uqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_uqsub_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.arm.neon.vqsubu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: uqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+  ret <1 x i32> %tmp1
+}
+
+
+define <1 x i32> @test_sqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_sqsub_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.arm.neon.vqsubs.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: sqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+  ret <1 x i32> %tmp1
+}
+
+declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqadd_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqadd_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqsub_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqsub_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define i8 @test_vuqaddb_s8(i8 %a, i8 %b) {
+; CHECK: test_vuqaddb_s8
+; CHECK: suqadd {{b[0-9]+}}, {{b[0-9]+}}
+entry:
+  %vuqadd.i = insertelement <1 x i8> undef, i8 %a, i32 0
+  %vuqadd1.i = insertelement <1 x i8> undef, i8 %b, i32 0
+  %vuqadd2.i = call <1 x i8> @llvm.aarch64.neon.vuqadd.v1i8(<1 x i8> %vuqadd.i, <1 x i8> %vuqadd1.i)
+  %0 = extractelement <1 x i8> %vuqadd2.i, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8>, <1 x i8>)
+
+define i16 @test_vuqaddh_s16(i16 %a, i16 %b) {
+; CHECK: test_vuqaddh_s16
+; CHECK: suqadd {{h[0-9]+}}, {{h[0-9]+}}
+entry:
+  %vuqadd.i = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vuqadd1.i = insertelement <1 x i16> undef, i16 %b, i32 0
+  %vuqadd2.i = call <1 x i16> @llvm.aarch64.neon.vuqadd.v1i16(<1 x i16> %vuqadd.i, <1 x i16> %vuqadd1.i)
+  %0 = extractelement <1 x i16> %vuqadd2.i, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16>, <1 x i16>)
+
+define i32 @test_vuqadds_s32(i32 %a, i32 %b) {
+; CHECK: test_vuqadds_s32
+; CHECK: suqadd {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vuqadd.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vuqadd1.i = insertelement <1 x i32> undef, i32 %b, i32 0
+  %vuqadd2.i = call <1 x i32> @llvm.aarch64.neon.vuqadd.v1i32(<1 x i32> %vuqadd.i, <1 x i32> %vuqadd1.i)
+  %0 = extractelement <1 x i32> %vuqadd2.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vsqadd.v1i32(<1 x i32>, <1 x i32>)
+
+define i64 @test_vuqaddd_s64(i64 %a, i64 %b) {
+; CHECK: test_vuqaddd_s64
+; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vuqadd.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vuqadd1.i = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vuqadd2.i = call <1 x i64> @llvm.aarch64.neon.vuqadd.v1i64(<1 x i64> %vuqadd.i, <1 x i64> %vuqadd1.i)
+  %0 = extractelement <1 x i64> %vuqadd2.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vsqadd.v1i64(<1 x i64>, <1 x i64>)
+
+define i8 @test_vsqaddb_u8(i8 %a, i8 %b) {
+; CHECK: test_vsqaddb_u8
+; CHECK: usqadd {{b[0-9]+}}, {{b[0-9]+}}
+entry:
+  %vsqadd.i = insertelement <1 x i8> undef, i8 %a, i32 0
+  %vsqadd1.i = insertelement <1 x i8> undef, i8 %b, i32 0
+  %vsqadd2.i = call <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8> %vsqadd.i, <1 x i8> %vsqadd1.i)
+  %0 = extractelement <1 x i8> %vsqadd2.i, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vuqadd.v1i8(<1 x i8>, <1 x i8>)
+
+define i16 @test_vsqaddh_u16(i16 %a, i16 %b) {
+; CHECK: test_vsqaddh_u16
+; CHECK: usqadd {{h[0-9]+}}, {{h[0-9]+}}
+entry:
+  %vsqadd.i = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vsqadd1.i = insertelement <1 x i16> undef, i16 %b, i32 0
+  %vsqadd2.i = call <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16> %vsqadd.i, <1 x i16> %vsqadd1.i)
+  %0 = extractelement <1 x i16> %vsqadd2.i, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vuqadd.v1i16(<1 x i16>, <1 x i16>)
+
+define i32 @test_vsqadds_u32(i32 %a, i32 %b) {
+; CHECK: test_vsqadds_u32
+; CHECK: usqadd {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vsqadd.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vsqadd1.i = insertelement <1 x i32> undef, i32 %b, i32 0
+  %vsqadd2.i = call <1 x i32> @llvm.aarch64.neon.vsqadd.v1i32(<1 x i32> %vsqadd.i, <1 x i32> %vsqadd1.i)
+  %0 = extractelement <1 x i32> %vsqadd2.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vuqadd.v1i32(<1 x i32>, <1 x i32>)
+
+define i64 @test_vsqaddd_u64(i64 %a, i64 %b) {
+; CHECK: test_vsqaddd_u64
+; CHECK: usqadd {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vsqadd.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsqadd1.i = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vsqadd2.i = call <1 x i64> @llvm.aarch64.neon.vsqadd.v1i64(<1 x i64> %vsqadd.i, <1 x i64> %vsqadd1.i)
+  %0 = extractelement <1 x i64> %vsqadd2.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vuqadd.v1i64(<1 x i64>, <1 x i64>)
diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll b/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll
new file mode 100644
index 0000000..0fd67df
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll
@@ -0,0 +1,94 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqrshl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqrshl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8>, <1 x i8>)
+declare <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8>, <1 x i8>)
+
+define <1 x i8> @test_uqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_uqrshl_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: uqrshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+
+  ret <1 x i8> %tmp1
+}
+
+define <1 x i8> @test_sqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_sqrshl_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: sqrshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+  ret <1 x i8> %tmp1
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16>, <1 x i16>)
+declare <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16>, <1 x i16>)
+
+define <1 x i16> @test_uqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_uqrshl_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: uqrshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+
+  ret <1 x i16> %tmp1
+}
+
+define <1 x i16> @test_sqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_sqrshl_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: sqrshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+  ret <1 x i16> %tmp1
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32>, <1 x i32>)
+declare <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32>, <1 x i32>)
+
+define <1 x i32> @test_uqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_uqrshl_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: uqrshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+
+  ret <1 x i32> %tmp1
+}
+
+define <1 x i32> @test_sqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_sqrshl_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: sqrshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+  ret <1 x i32> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqrshl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqrshl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+
+
diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll b/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll
new file mode 100644
index 0000000..8fdea24
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll
@@ -0,0 +1,88 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqshl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqshl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8>, <1 x i8>)
+declare <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8>, <1 x i8>)
+
+define <1 x i8> @test_uqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_uqshl_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: uqshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+  ret <1 x i8> %tmp1
+}
+
+define <1 x i8> @test_sqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_sqshl_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: sqshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+  ret <1 x i8> %tmp1
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16>, <1 x i16>)
+declare <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16>, <1 x i16>)
+
+define <1 x i16> @test_uqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_uqshl_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: uqshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+  ret <1 x i16> %tmp1
+}
+
+define <1 x i16> @test_sqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_sqshl_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: sqshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+  ret <1 x i16> %tmp1
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32>, <1 x i32>)
+declare <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32>, <1 x i32>)
+
+define <1 x i32> @test_uqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_uqshl_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: uqshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+  ret <1 x i32> %tmp1
+}
+
+define <1 x i32> @test_sqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_sqshl_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: sqshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+  ret <1 x i32> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqshl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqshl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+
diff --git a/test/CodeGen/AArch64/neon-scalar-shift-imm.ll b/test/CodeGen/AArch64/neon-scalar-shift-imm.ll
new file mode 100644
index 0000000..6224361
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-shift-imm.ll
@@ -0,0 +1,531 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define i64 @test_vshrd_n_s64(i64 %a) {
+; CHECK: test_vshrd_n_s64
+; CHECK: sshr {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vsshr = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsshr1 = call <1 x i64> @llvm.aarch64.neon.vshrds.n(<1 x i64> %vsshr, i32 63)
+  %0 = extractelement <1 x i64> %vsshr1, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vshrds.n(<1 x i64>, i32)
+
+define i64 @test_vshrd_n_u64(i64 %a) {
+; CHECK: test_vshrd_n_u64
+; CHECK: ushr {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vushr = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vushr1 = call <1 x i64> @llvm.aarch64.neon.vshrdu.n(<1 x i64> %vushr, i32 63)
+  %0 = extractelement <1 x i64> %vushr1, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vshrdu.n(<1 x i64>, i32)
+
+define i64 @test_vrshrd_n_s64(i64 %a) {
+; CHECK: test_vrshrd_n_s64
+; CHECK: srshr {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vsrshr = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsrshr1 = call <1 x i64> @llvm.aarch64.neon.vsrshr.v1i64(<1 x i64> %vsrshr, i32 63)
+  %0 = extractelement <1 x i64> %vsrshr1, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vsrshr.v1i64(<1 x i64>, i32)
+
+define i64 @test_vrshrd_n_u64(i64 %a) {
+; CHECK: test_vrshrd_n_u64
+; CHECK: urshr {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vurshr = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vurshr1 = call <1 x i64> @llvm.aarch64.neon.vurshr.v1i64(<1 x i64> %vurshr, i32 63)
+  %0 = extractelement <1 x i64> %vurshr1, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vurshr.v1i64(<1 x i64>, i32)
+
+define i64 @test_vsrad_n_s64(i64 %a, i64 %b) {
+; CHECK: test_vsrad_n_s64
+; CHECK: ssra {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vssra = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vssra1 = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vssra2 = call <1 x i64> @llvm.aarch64.neon.vsrads.n(<1 x i64> %vssra, <1 x i64> %vssra1, i32 63)
+  %0 = extractelement <1 x i64> %vssra2, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vsrads.n(<1 x i64>, <1 x i64>, i32)
+
+define i64 @test_vsrad_n_u64(i64 %a, i64 %b) {
+; CHECK: test_vsrad_n_u64
+; CHECK: usra {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vusra = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vusra1 = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vusra2 = call <1 x i64> @llvm.aarch64.neon.vsradu.n(<1 x i64> %vusra, <1 x i64> %vusra1, i32 63)
+  %0 = extractelement <1 x i64> %vusra2, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vsradu.n(<1 x i64>, <1 x i64>, i32)
+
+define i64 @test_vrsrad_n_s64(i64 %a, i64 %b) {
+; CHECK: test_vrsrad_n_s64
+; CHECK: srsra {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vsrsra = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsrsra1 = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vsrsra2 = call <1 x i64> @llvm.aarch64.neon.vrsrads.n(<1 x i64> %vsrsra, <1 x i64> %vsrsra1, i32 63)
+  %0 = extractelement <1 x i64> %vsrsra2, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vrsrads.n(<1 x i64>, <1 x i64>, i32)
+
+define i64 @test_vrsrad_n_u64(i64 %a, i64 %b) {
+; CHECK: test_vrsrad_n_u64
+; CHECK: ursra {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vursra = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vursra1 = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vursra2 = call <1 x i64> @llvm.aarch64.neon.vrsradu.n(<1 x i64> %vursra, <1 x i64> %vursra1, i32 63)
+  %0 = extractelement <1 x i64> %vursra2, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vrsradu.n(<1 x i64>, <1 x i64>, i32)
+
+define i64 @test_vshld_n_s64(i64 %a) {
+; CHECK: test_vshld_n_s64
+; CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vshl = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vshl1 = call <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64> %vshl, i32 63)
+  %0 = extractelement <1 x i64> %vshl1, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64>, i32)
+
+define i64 @test_vshld_n_u64(i64 %a) {
+; CHECK: test_vshld_n_u64
+; CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vshl = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vshl1 = call <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64> %vshl, i32 63)
+  %0 = extractelement <1 x i64> %vshl1, i32 0
+  ret i64 %0
+}
+
+define i8 @test_vqshlb_n_s8(i8 %a) {
+; CHECK: test_vqshlb_n_s8
+; CHECK: sqshl {{b[0-9]+}}, {{b[0-9]+}}, #7
+entry:
+  %vsqshl = insertelement <1 x i8> undef, i8 %a, i32 0
+  %vsqshl1 = call <1 x i8> @llvm.aarch64.neon.vqshls.n.v1i8(<1 x i8> %vsqshl, i32 7)
+  %0 = extractelement <1 x i8> %vsqshl1, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vqshls.n.v1i8(<1 x i8>, i32)
+
+define i16 @test_vqshlh_n_s16(i16 %a) {
+; CHECK: test_vqshlh_n_s16
+; CHECK: sqshl {{h[0-9]+}}, {{h[0-9]+}}, #15
+entry:
+  %vsqshl = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vsqshl1 = call <1 x i16> @llvm.aarch64.neon.vqshls.n.v1i16(<1 x i16> %vsqshl, i32 15)
+  %0 = extractelement <1 x i16> %vsqshl1, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vqshls.n.v1i16(<1 x i16>, i32)
+
+define i32 @test_vqshls_n_s32(i32 %a) {
+; CHECK: test_vqshls_n_s32
+; CHECK: sqshl {{s[0-9]+}}, {{s[0-9]+}}, #31
+entry:
+  %vsqshl = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vsqshl1 = call <1 x i32> @llvm.aarch64.neon.vqshls.n.v1i32(<1 x i32> %vsqshl, i32 31)
+  %0 = extractelement <1 x i32> %vsqshl1, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vqshls.n.v1i32(<1 x i32>, i32)
+
+define i64 @test_vqshld_n_s64(i64 %a) {
+; CHECK: test_vqshld_n_s64
+; CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vsqshl = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsqshl1 = call <1 x i64> @llvm.aarch64.neon.vqshls.n.v1i64(<1 x i64> %vsqshl, i32 63)
+  %0 = extractelement <1 x i64> %vsqshl1, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vqshls.n.v1i64(<1 x i64>, i32)
+
+define i8 @test_vqshlb_n_u8(i8 %a) {
+; CHECK: test_vqshlb_n_u8
+; CHECK: uqshl {{b[0-9]+}}, {{b[0-9]+}}, #7
+entry:
+  %vuqshl = insertelement <1 x i8> undef, i8 %a, i32 0
+  %vuqshl1 = call <1 x i8> @llvm.aarch64.neon.vqshlu.n.v1i8(<1 x i8> %vuqshl, i32 7)
+  %0 = extractelement <1 x i8> %vuqshl1, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vqshlu.n.v1i8(<1 x i8>, i32)
+
+define i16 @test_vqshlh_n_u16(i16 %a) {
+; CHECK: test_vqshlh_n_u16
+; CHECK: uqshl {{h[0-9]+}}, {{h[0-9]+}}, #15
+entry:
+  %vuqshl = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vuqshl1 = call <1 x i16> @llvm.aarch64.neon.vqshlu.n.v1i16(<1 x i16> %vuqshl, i32 15)
+  %0 = extractelement <1 x i16> %vuqshl1, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vqshlu.n.v1i16(<1 x i16>, i32)
+
+define i32 @test_vqshls_n_u32(i32 %a) {
+; CHECK: test_vqshls_n_u32
+; CHECK: uqshl {{s[0-9]+}}, {{s[0-9]+}}, #31
+entry:
+  %vuqshl = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vuqshl1 = call <1 x i32> @llvm.aarch64.neon.vqshlu.n.v1i32(<1 x i32> %vuqshl, i32 31)
+  %0 = extractelement <1 x i32> %vuqshl1, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vqshlu.n.v1i32(<1 x i32>, i32)
+
+define i64 @test_vqshld_n_u64(i64 %a) {
+; CHECK: test_vqshld_n_u64
+; CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vuqshl = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vuqshl1 = call <1 x i64> @llvm.aarch64.neon.vqshlu.n.v1i64(<1 x i64> %vuqshl, i32 63)
+  %0 = extractelement <1 x i64> %vuqshl1, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vqshlu.n.v1i64(<1 x i64>, i32)
+
+define i8 @test_vqshlub_n_s8(i8 %a) {
+; CHECK: test_vqshlub_n_s8
+; CHECK: sqshlu {{b[0-9]+}}, {{b[0-9]+}}, #7
+entry:
+  %vsqshlu = insertelement <1 x i8> undef, i8 %a, i32 0
+  %vsqshlu1 = call <1 x i8> @llvm.aarch64.neon.vsqshlu.v1i8(<1 x i8> %vsqshlu, i32 7)
+  %0 = extractelement <1 x i8> %vsqshlu1, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vsqshlu.v1i8(<1 x i8>, i32)
+
+define i16 @test_vqshluh_n_s16(i16 %a) {
+; CHECK: test_vqshluh_n_s16
+; CHECK: sqshlu {{h[0-9]+}}, {{h[0-9]+}}, #15
+entry:
+  %vsqshlu = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vsqshlu1 = call <1 x i16> @llvm.aarch64.neon.vsqshlu.v1i16(<1 x i16> %vsqshlu, i32 15)
+  %0 = extractelement <1 x i16> %vsqshlu1, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vsqshlu.v1i16(<1 x i16>, i32)
+
+define i32 @test_vqshlus_n_s32(i32 %a) {
+; CHECK: test_vqshlus_n_s32
+; CHECK: sqshlu {{s[0-9]+}}, {{s[0-9]+}}, #31
+entry:
+  %vsqshlu = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vsqshlu1 = call <1 x i32> @llvm.aarch64.neon.vsqshlu.v1i32(<1 x i32> %vsqshlu, i32 31)
+  %0 = extractelement <1 x i32> %vsqshlu1, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vsqshlu.v1i32(<1 x i32>, i32)
+
+define i64 @test_vqshlud_n_s64(i64 %a) {
+; CHECK: test_vqshlud_n_s64
+; CHECK: sqshlu {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vsqshlu = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsqshlu1 = call <1 x i64> @llvm.aarch64.neon.vsqshlu.v1i64(<1 x i64> %vsqshlu, i32 63)
+  %0 = extractelement <1 x i64> %vsqshlu1, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vsqshlu.v1i64(<1 x i64>, i32)
+
+define i64 @test_vsrid_n_s64(i64 %a, i64 %b) {
+; CHECK: test_vsrid_n_s64
+; CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vsri = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsri1 = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vsri2 = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> %vsri, <1 x i64> %vsri1, i32 63)
+  %0 = extractelement <1 x i64> %vsri2, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64>, <1 x i64>, i32)
+
+define i64 @test_vsrid_n_u64(i64 %a, i64 %b) {
+; CHECK: test_vsrid_n_u64
+; CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vsri = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsri1 = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vsri2 = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> %vsri, <1 x i64> %vsri1, i32 63)
+  %0 = extractelement <1 x i64> %vsri2, i32 0
+  ret i64 %0
+}
+
+define i64 @test_vslid_n_s64(i64 %a, i64 %b) {
+; CHECK: test_vslid_n_s64
+; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vsli = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsli1 = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vsli2 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %vsli, <1 x i64> %vsli1, i32 63)
+  %0 = extractelement <1 x i64> %vsli2, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32)
+
+define i64 @test_vslid_n_u64(i64 %a, i64 %b) {
+; CHECK: test_vslid_n_u64
+; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vsli = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsli1 = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vsli2 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %vsli, <1 x i64> %vsli1, i32 63)
+  %0 = extractelement <1 x i64> %vsli2, i32 0
+  ret i64 %0
+}
+
+define i8 @test_vqshrnh_n_s16(i16 %a) {
+; CHECK: test_vqshrnh_n_s16
+; CHECK: sqshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
+entry:
+  %vsqshrn = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vsqshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqshrn.v1i8(<1 x i16> %vsqshrn, i32 8)
+  %0 = extractelement <1 x i8> %vsqshrn1, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vsqshrn.v1i8(<1 x i16>, i32)
+
+define i16 @test_vqshrns_n_s32(i32 %a) {
+; CHECK: test_vqshrns_n_s32
+; CHECK: sqshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
+entry:
+  %vsqshrn = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vsqshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqshrn.v1i16(<1 x i32> %vsqshrn, i32 16)
+  %0 = extractelement <1 x i16> %vsqshrn1, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vsqshrn.v1i16(<1 x i32>, i32)
+
+define i32 @test_vqshrnd_n_s64(i64 %a) {
+; CHECK: test_vqshrnd_n_s64
+; CHECK: sqshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
+entry:
+  %vsqshrn = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsqshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqshrn.v1i32(<1 x i64> %vsqshrn, i32 32)
+  %0 = extractelement <1 x i32> %vsqshrn1, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vsqshrn.v1i32(<1 x i64>, i32)
+
+define i8 @test_vqshrnh_n_u16(i16 %a) {
+; CHECK: test_vqshrnh_n_u16
+; CHECK: uqshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
+entry:
+  %vuqshrn = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vuqshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqshrn.v1i8(<1 x i16> %vuqshrn, i32 8)
+  %0 = extractelement <1 x i8> %vuqshrn1, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vuqshrn.v1i8(<1 x i16>, i32)
+
+define i16 @test_vqshrns_n_u32(i32 %a) {
+; CHECK: test_vqshrns_n_u32
+; CHECK: uqshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
+entry:
+  %vuqshrn = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vuqshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqshrn.v1i16(<1 x i32> %vuqshrn, i32 16)
+  %0 = extractelement <1 x i16> %vuqshrn1, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vuqshrn.v1i16(<1 x i32>, i32)
+
+define i32 @test_vqshrnd_n_u64(i64 %a) {
+; CHECK: test_vqshrnd_n_u64
+; CHECK: uqshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
+entry:
+  %vuqshrn = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vuqshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqshrn.v1i32(<1 x i64> %vuqshrn, i32 32)
+  %0 = extractelement <1 x i32> %vuqshrn1, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vuqshrn.v1i32(<1 x i64>, i32)
+
+define i8 @test_vqrshrnh_n_s16(i16 %a) {
+; CHECK: test_vqrshrnh_n_s16
+; CHECK: sqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
+entry:
+  %vsqrshrn = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vsqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrn.v1i8(<1 x i16> %vsqrshrn, i32 8)
+  %0 = extractelement <1 x i8> %vsqrshrn1, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vsqrshrn.v1i8(<1 x i16>, i32)
+
+define i16 @test_vqrshrns_n_s32(i32 %a) {
+; CHECK: test_vqrshrns_n_s32
+; CHECK: sqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
+entry:
+  %vsqrshrn = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vsqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrn.v1i16(<1 x i32> %vsqrshrn, i32 16)
+  %0 = extractelement <1 x i16> %vsqrshrn1, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vsqrshrn.v1i16(<1 x i32>, i32)
+
+define i32 @test_vqrshrnd_n_s64(i64 %a) {
+; CHECK: test_vqrshrnd_n_s64
+; CHECK: sqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
+entry:
+  %vsqrshrn = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrn.v1i32(<1 x i64> %vsqrshrn, i32 32)
+  %0 = extractelement <1 x i32> %vsqrshrn1, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vsqrshrn.v1i32(<1 x i64>, i32)
+
+define i8 @test_vqrshrnh_n_u16(i16 %a) {
+; CHECK: test_vqrshrnh_n_u16
+; CHECK: uqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
+entry:
+  %vuqrshrn = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vuqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqrshrn.v1i8(<1 x i16> %vuqrshrn, i32 8)
+  %0 = extractelement <1 x i8> %vuqrshrn1, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vuqrshrn.v1i8(<1 x i16>, i32)
+
+define i16 @test_vqrshrns_n_u32(i32 %a) {
+; CHECK: test_vqrshrns_n_u32
+; CHECK: uqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
+entry:
+  %vuqrshrn = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vuqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %vuqrshrn, i32 16)
+  %0 = extractelement <1 x i16> %vuqrshrn1, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32>, i32)
+
+define i32 @test_vqrshrnd_n_u64(i64 %a) {
+; CHECK: test_vqrshrnd_n_u64
+; CHECK: uqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
+entry:
+  %vuqrshrn = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vuqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqrshrn.v1i32(<1 x i64> %vuqrshrn, i32 32)
+  %0 = extractelement <1 x i32> %vuqrshrn1, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vuqrshrn.v1i32(<1 x i64>, i32)
+
+define i8 @test_vqshrunh_n_s16(i16 %a) {
+; CHECK: test_vqshrunh_n_s16
+; CHECK: sqshrun {{b[0-9]+}}, {{h[0-9]+}}, #8
+entry:
+  %vsqshrun = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vsqshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqshrun.v1i8(<1 x i16> %vsqshrun, i32 8)
+  %0 = extractelement <1 x i8> %vsqshrun1, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vsqshrun.v1i8(<1 x i16>, i32)
+
+define i16 @test_vqshruns_n_s32(i32 %a) {
+; CHECK: test_vqshruns_n_s32
+; CHECK: sqshrun {{h[0-9]+}}, {{s[0-9]+}}, #16
+entry:
+  %vsqshrun = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vsqshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqshrun.v1i16(<1 x i32> %vsqshrun, i32 16)
+  %0 = extractelement <1 x i16> %vsqshrun1, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vsqshrun.v1i16(<1 x i32>, i32)
+
+define i32 @test_vqshrund_n_s64(i64 %a) {
+; CHECK: test_vqshrund_n_s64
+; CHECK: sqshrun {{s[0-9]+}}, {{d[0-9]+}}, #32
+entry:
+  %vsqshrun = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsqshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqshrun.v1i32(<1 x i64> %vsqshrun, i32 32)
+  %0 = extractelement <1 x i32> %vsqshrun1, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vsqshrun.v1i32(<1 x i64>, i32)
+
+define i8 @test_vqrshrunh_n_s16(i16 %a) {
+; CHECK: test_vqrshrunh_n_s16
+; CHECK: sqrshrun {{b[0-9]+}}, {{h[0-9]+}}, #8
+entry:
+  %vsqrshrun = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vsqrshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrun.v1i8(<1 x i16> %vsqrshrun, i32 8)
+  %0 = extractelement <1 x i8> %vsqrshrun1, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vsqrshrun.v1i8(<1 x i16>, i32)
+
+define i16 @test_vqrshruns_n_s32(i32 %a) {
+; CHECK: test_vqrshruns_n_s32
+; CHECK: sqrshrun {{h[0-9]+}}, {{s[0-9]+}}, #16
+entry:
+  %vsqrshrun = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vsqrshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrun.v1i16(<1 x i32> %vsqrshrun, i32 16)
+  %0 = extractelement <1 x i16> %vsqrshrun1, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vsqrshrun.v1i16(<1 x i32>, i32)
+
+define i32 @test_vqrshrund_n_s64(i64 %a) {
+; CHECK: test_vqrshrund_n_s64
+; CHECK: sqrshrun {{s[0-9]+}}, {{d[0-9]+}}, #32
+entry:
+  %vsqrshrun = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsqrshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrun.v1i32(<1 x i64> %vsqrshrun, i32 32)
+  %0 = extractelement <1 x i32> %vsqrshrun1, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vsqrshrun.v1i32(<1 x i64>, i32)
diff --git a/test/CodeGen/AArch64/neon-scalar-shift.ll b/test/CodeGen/AArch64/neon-scalar-shift.ll
new file mode 100644
index 0000000..1222be5
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-shift.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_ushl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: ushl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sshl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: sshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_ushl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_ushl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: ushl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sshl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: sshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+
diff --git a/test/CodeGen/AArch64/neon-shift-left-long.ll b/test/CodeGen/AArch64/neon-shift-left-long.ll
new file mode 100644
index 0000000..d45c476
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-shift-left-long.ll
@@ -0,0 +1,193 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <8 x i16> @test_sshll_v8i8(<8 x i8> %a) {
+; CHECK: test_sshll_v8i8:
+; CHECK: sshll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #3
+  %1 = sext <8 x i8> %a to <8 x i16>
+  %tmp = shl <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %tmp
+}
+
+define <4 x i32> @test_sshll_v4i16(<4 x i16> %a) {
+; CHECK: test_sshll_v4i16:
+; CHECK: sshll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #9
+  %1 = sext <4 x i16> %a to <4 x i32>
+  %tmp = shl <4 x i32> %1, <i32 9, i32 9, i32 9, i32 9>
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_sshll_v2i32(<2 x i32> %a) {
+; CHECK: test_sshll_v2i32:
+; CHECK: sshll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #19
+  %1 = sext <2 x i32> %a to <2 x i64>
+  %tmp = shl <2 x i64> %1, <i64 19, i64 19>
+  ret <2 x i64> %tmp
+}
+
+define <8 x i16> @test_ushll_v8i8(<8 x i8> %a) {
+; CHECK: test_ushll_v8i8:
+; CHECK: ushll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #3
+  %1 = zext <8 x i8> %a to <8 x i16>
+  %tmp = shl <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %tmp
+}
+
+define <4 x i32> @test_ushll_v4i16(<4 x i16> %a) {
+; CHECK: test_ushll_v4i16:
+; CHECK: ushll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #9
+  %1 = zext <4 x i16> %a to <4 x i32>
+  %tmp = shl <4 x i32> %1, <i32 9, i32 9, i32 9, i32 9>
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_ushll_v2i32(<2 x i32> %a) {
+; CHECK: test_ushll_v2i32:
+; CHECK: ushll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #19
+  %1 = zext <2 x i32> %a to <2 x i64>
+  %tmp = shl <2 x i64> %1, <i64 19, i64 19>
+  ret <2 x i64> %tmp
+}
+
+define <8 x i16> @test_sshll2_v16i8(<16 x i8> %a) {
+; CHECK: test_sshll2_v16i8:
+; CHECK: sshll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #3
+  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %2 = sext <8 x i8> %1 to <8 x i16>
+  %tmp = shl <8 x i16> %2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %tmp
+}
+
+define <4 x i32> @test_sshll2_v8i16(<8 x i16> %a) {
+; CHECK: test_sshll2_v8i16:
+; CHECK: sshll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #9
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %2 = sext <4 x i16> %1 to <4 x i32>
+  %tmp = shl <4 x i32> %2, <i32 9, i32 9, i32 9, i32 9>
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_sshll2_v4i32(<4 x i32> %a) {
+; CHECK: test_sshll2_v4i32:
+; CHECK: sshll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #19
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %2 = sext <2 x i32> %1 to <2 x i64>
+  %tmp = shl <2 x i64> %2, <i64 19, i64 19>
+  ret <2 x i64> %tmp
+}
+
+define <8 x i16> @test_ushll2_v16i8(<16 x i8> %a) {
+; CHECK: test_ushll2_v16i8:
+; CHECK: ushll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #3
+  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %2 = zext <8 x i8> %1 to <8 x i16>
+  %tmp = shl <8 x i16> %2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %tmp
+}
+
+define <4 x i32> @test_ushll2_v8i16(<8 x i16> %a) {
+; CHECK: test_ushll2_v8i16:
+; CHECK: ushll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #9
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %2 = zext <4 x i16> %1 to <4 x i32>
+  %tmp = shl <4 x i32> %2, <i32 9, i32 9, i32 9, i32 9>
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_ushll2_v4i32(<4 x i32> %a) {
+; CHECK: test_ushll2_v4i32:
+; CHECK: ushll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #19
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %2 = zext <2 x i32> %1 to <2 x i64>
+  %tmp = shl <2 x i64> %2, <i64 19, i64 19>
+  ret <2 x i64> %tmp
+}
+
+define <8 x i16> @test_sshll_shl0_v8i8(<8 x i8> %a) {
+; CHECK: test_sshll_shl0_v8i8:
+; CHECK: sshll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #0
+  %tmp = sext <8 x i8> %a to <8 x i16>
+  ret <8 x i16> %tmp
+}
+
+define <4 x i32> @test_sshll_shl0_v4i16(<4 x i16> %a) {
+; CHECK: test_sshll_shl0_v4i16:
+; CHECK: sshll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #0
+  %tmp = sext <4 x i16> %a to <4 x i32>
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_sshll_shl0_v2i32(<2 x i32> %a) {
+; CHECK: test_sshll_shl0_v2i32:
+; CHECK: sshll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #0
+  %tmp = sext <2 x i32> %a to <2 x i64>
+  ret <2 x i64> %tmp
+}
+
+define <8 x i16> @test_ushll_shl0_v8i8(<8 x i8> %a) {
+; CHECK: test_ushll_shl0_v8i8:
+; CHECK: ushll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #0
+  %tmp = zext <8 x i8> %a to <8 x i16>
+  ret <8 x i16> %tmp
+}
+
+define <4 x i32> @test_ushll_shl0_v4i16(<4 x i16> %a) {
+; CHECK: test_ushll_shl0_v4i16:
+; CHECK: ushll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #0
+  %tmp = zext <4 x i16> %a to <4 x i32>
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_ushll_shl0_v2i32(<2 x i32> %a) {
+; CHECK: test_ushll_shl0_v2i32:
+; CHECK: ushll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #0
+  %tmp = zext <2 x i32> %a to <2 x i64>
+  ret <2 x i64> %tmp
+}
+
+define <8 x i16> @test_sshll2_shl0_v16i8(<16 x i8> %a) {
+; CHECK: test_sshll2_shl0_v16i8:
+; CHECK: sshll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #0
+  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %tmp = sext <8 x i8> %1 to <8 x i16>
+  ret <8 x i16> %tmp
+}
+
+define <4 x i32> @test_sshll2_shl0_v8i16(<8 x i16> %a) {
+; CHECK: test_sshll2_shl0_v8i16:
+; CHECK: sshll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #0
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %tmp = sext <4 x i16> %1 to <4 x i32>
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_sshll2_shl0_v4i32(<4 x i32> %a) {
+; CHECK: test_sshll2_shl0_v4i32:
+; CHECK: sshll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #0
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %tmp = sext <2 x i32> %1 to <2 x i64>
+  ret <2 x i64> %tmp
+}
+
+define <8 x i16> @test_ushll2_shl0_v16i8(<16 x i8> %a) {
+; CHECK: test_ushll2_shl0_v16i8:
+; CHECK: ushll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #0
+  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %tmp = zext <8 x i8> %1 to <8 x i16>
+  ret <8 x i16> %tmp
+}
+
+define <4 x i32> @test_ushll2_shl0_v8i16(<8 x i16> %a) {
+; CHECK: test_ushll2_shl0_v8i16:
+; CHECK: ushll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #0
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %tmp = zext <4 x i16> %1 to <4 x i32>
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_ushll2_shl0_v4i32(<4 x i32> %a) {
+; CHECK: test_ushll2_shl0_v4i32:
+; CHECK: ushll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #0
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %tmp = zext <2 x i32> %1 to <2 x i64>
+  ret <2 x i64> %tmp
+}
diff --git a/test/CodeGen/AArch64/neon-shift.ll b/test/CodeGen/AArch64/neon-shift.ll
index 45a2605..33b04ce 100644
--- a/test/CodeGen/AArch64/neon-shift.ll
+++ b/test/CodeGen/AArch64/neon-shift.ll
@@ -102,23 +102,6 @@ define <4 x i32> @test_sshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
   ret <4 x i32> %tmp1
 }
 
-declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_ushl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: ushl d0, d0, d1
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: sshl d0, d0, d1
-  ret <1 x i64> %tmp1
-}
-
 declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>)
 declare <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64>, <2 x i64>)
 
@@ -137,4 +120,52 @@ define <2 x i64> @test_sshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 }
 
 
+define <8 x i8> @test_shl_v8i8(<8 x i8> %a) {
+; CHECK: test_shl_v8i8:
+; CHECK: shl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %tmp = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <8 x i8> %tmp
+}
+
+define <4 x i16> @test_shl_v4i16(<4 x i16> %a) {
+; CHECK: test_shl_v4i16:
+; CHECK: shl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %tmp = shl <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
+  ret <4 x i16> %tmp
+}
+
+define <2 x i32> @test_shl_v2i32(<2 x i32> %a) {
+; CHECK: test_shl_v2i32:
+; CHECK: shl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %tmp = shl <2 x i32> %a, <i32 3, i32 3>
+  ret <2 x i32> %tmp
+}
+
+define <16 x i8> @test_shl_v16i8(<16 x i8> %a) {
+; CHECK: test_shl_v16i8:
+; CHECK: shl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %tmp = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %tmp
+}
+
+define <8 x i16> @test_shl_v8i16(<8 x i16> %a) {
+; CHECK: test_shl_v8i16:
+; CHECK: shl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %tmp = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %tmp
+}
+
+define <4 x i32> @test_shl_v4i32(<4 x i32> %a) {
+; CHECK: test_shl_v4i32:
+; CHECK: shl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %tmp = shl <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_shl_v2i64(<2 x i64> %a) {
+; CHECK: test_shl_v2i64:
+; CHECK: shl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #63
+  %tmp = shl <2 x i64> %a, <i64 63, i64 63>
+  ret <2 x i64> %tmp
+}
 
diff --git a/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll b/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll
new file mode 100644
index 0000000..d5557c0
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll
@@ -0,0 +1,2314 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define void @test_ldst1_v16i8(<16 x i8>* %ptr, <16 x i8>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v16i8:
+; CHECK: ld1 {v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
+  %tmp = load <16 x i8>* %ptr
+  store <16 x i8> %tmp, <16 x i8>* %ptr2
+  ret void
+}
+
+define void @test_ldst1_v8i16(<8 x i16>* %ptr, <8 x i16>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v8i16:
+; CHECK: ld1 {v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
+  %tmp = load <8 x i16>* %ptr
+  store <8 x i16> %tmp, <8 x i16>* %ptr2
+  ret void
+}
+
+define void @test_ldst1_v4i32(<4 x i32>* %ptr, <4 x i32>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v4i32:
+; CHECK: ld1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
+  %tmp = load <4 x i32>* %ptr
+  store <4 x i32> %tmp, <4 x i32>* %ptr2
+  ret void
+}
+
+define void @test_ldst1_v2i64(<2 x i64>* %ptr, <2 x i64>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v2i64:
+; CHECK: ld1 {v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
+  %tmp = load <2 x i64>* %ptr
+  store <2 x i64> %tmp, <2 x i64>* %ptr2
+  ret void
+}
+
+define void @test_ldst1_v8i8(<8 x i8>* %ptr, <8 x i8>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v8i8:
+; CHECK: ld1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
+  %tmp = load <8 x i8>* %ptr
+  store <8 x i8> %tmp, <8 x i8>* %ptr2
+  ret void
+}
+
+define void @test_ldst1_v4i16(<4 x i16>* %ptr, <4 x i16>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v4i16:
+; CHECK: ld1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
+  %tmp = load <4 x i16>* %ptr
+  store <4 x i16> %tmp, <4 x i16>* %ptr2
+  ret void
+}
+
+define void @test_ldst1_v2i32(<2 x i32>* %ptr, <2 x i32>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v2i32:
+; CHECK: ld1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
+  %tmp = load <2 x i32>* %ptr
+  store <2 x i32> %tmp, <2 x i32>* %ptr2
+  ret void
+}
+
+define void @test_ldst1_v1i64(<1 x i64>* %ptr, <1 x i64>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v1i64:
+; CHECK: ld1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
+  %tmp = load <1 x i64>* %ptr
+  store <1 x i64> %tmp, <1 x i64>* %ptr2
+  ret void
+}
+
+%struct.int8x16x2_t = type { [2 x <16 x i8>] }
+%struct.int16x8x2_t = type { [2 x <8 x i16>] }
+%struct.int32x4x2_t = type { [2 x <4 x i32>] }
+%struct.int64x2x2_t = type { [2 x <2 x i64>] }
+%struct.float32x4x2_t = type { [2 x <4 x float>] }
+%struct.float64x2x2_t = type { [2 x <2 x double>] }
+%struct.int8x8x2_t = type { [2 x <8 x i8>] }
+%struct.int16x4x2_t = type { [2 x <4 x i16>] }
+%struct.int32x2x2_t = type { [2 x <2 x i32>] }
+%struct.int64x1x2_t = type { [2 x <1 x i64>] }
+%struct.float32x2x2_t = type { [2 x <2 x float>] }
+%struct.float64x1x2_t = type { [2 x <1 x double>] }
+%struct.int8x16x3_t = type { [3 x <16 x i8>] }
+%struct.int16x8x3_t = type { [3 x <8 x i16>] }
+%struct.int32x4x3_t = type { [3 x <4 x i32>] }
+%struct.int64x2x3_t = type { [3 x <2 x i64>] }
+%struct.float32x4x3_t = type { [3 x <4 x float>] }
+%struct.float64x2x3_t = type { [3 x <2 x double>] }
+%struct.int8x8x3_t = type { [3 x <8 x i8>] }
+%struct.int16x4x3_t = type { [3 x <4 x i16>] }
+%struct.int32x2x3_t = type { [3 x <2 x i32>] }
+%struct.int64x1x3_t = type { [3 x <1 x i64>] }
+%struct.float32x2x3_t = type { [3 x <2 x float>] }
+%struct.float64x1x3_t = type { [3 x <1 x double>] }
+%struct.int8x16x4_t = type { [4 x <16 x i8>] }
+%struct.int16x8x4_t = type { [4 x <8 x i16>] }
+%struct.int32x4x4_t = type { [4 x <4 x i32>] }
+%struct.int64x2x4_t = type { [4 x <2 x i64>] }
+%struct.float32x4x4_t = type { [4 x <4 x float>] }
+%struct.float64x2x4_t = type { [4 x <2 x double>] }
+%struct.int8x8x4_t = type { [4 x <8 x i8>] }
+%struct.int16x4x4_t = type { [4 x <4 x i16>] }
+%struct.int32x2x4_t = type { [4 x <2 x i32>] }
+%struct.int64x1x4_t = type { [4 x <1 x i64>] }
+%struct.float32x2x4_t = type { [4 x <2 x float>] }
+%struct.float64x1x4_t = type { [4 x <1 x double>] }
+
+
+define <16 x i8> @test_vld1q_s8(i8* readonly %a) {
+; CHECK-LABEL: test_vld1q_s8
+; CHECK: ld1 {v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
+  %vld1 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %a, i32 1)
+  ret <16 x i8> %vld1
+}
+
+define <8 x i16> @test_vld1q_s16(i16* readonly %a) {
+; CHECK-LABEL: test_vld1q_s16
+; CHECK: ld1 {v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %vld1 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %1, i32 2)
+  ret <8 x i16> %vld1
+}
+
+define <4 x i32> @test_vld1q_s32(i32* readonly %a) {
+; CHECK-LABEL: test_vld1q_s32
+; CHECK: ld1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %vld1 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %1, i32 4)
+  ret <4 x i32> %vld1
+}
+
+define <2 x i64> @test_vld1q_s64(i64* readonly %a) {
+; CHECK-LABEL: test_vld1q_s64
+; CHECK: ld1 {v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %vld1 = tail call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %1, i32 8)
+  ret <2 x i64> %vld1
+}
+
+define <4 x float> @test_vld1q_f32(float* readonly %a) {
+; CHECK-LABEL: test_vld1q_f32
+; CHECK: ld1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %1, i32 4)
+  ret <4 x float> %vld1
+}
+
+define <2 x double> @test_vld1q_f64(double* readonly %a) {
+; CHECK-LABEL: test_vld1q_f64
+; CHECK: ld1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %vld1 = tail call <2 x double> @llvm.arm.neon.vld1.v2f64(i8* %1, i32 8)
+  ret <2 x double> %vld1
+}
+
+define <8 x i8> @test_vld1_s8(i8* readonly %a) {
+; CHECK-LABEL: test_vld1_s8
+; CHECK: ld1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
+  %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %a, i32 1)
+  ret <8 x i8> %vld1
+}
+
+define <4 x i16> @test_vld1_s16(i16* readonly %a) {
+; CHECK-LABEL: test_vld1_s16
+; CHECK: ld1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %vld1 = tail call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %1, i32 2)
+  ret <4 x i16> %vld1
+}
+
+define <2 x i32> @test_vld1_s32(i32* readonly %a) {
+; CHECK-LABEL: test_vld1_s32
+; CHECK: ld1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %vld1 = tail call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %1, i32 4)
+  ret <2 x i32> %vld1
+}
+
+define <1 x i64> @test_vld1_s64(i64* readonly %a) {
+; CHECK-LABEL: test_vld1_s64
+; CHECK: ld1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %vld1 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %1, i32 8)
+  ret <1 x i64> %vld1
+}
+
+define <2 x float> @test_vld1_f32(float* readonly %a) {
+; CHECK-LABEL: test_vld1_f32
+; CHECK: ld1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %vld1 = tail call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %1, i32 4)
+  ret <2 x float> %vld1
+}
+
+define <1 x double> @test_vld1_f64(double* readonly %a) {
+; CHECK-LABEL: test_vld1_f64
+; CHECK: ld1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %vld1 = tail call <1 x double> @llvm.arm.neon.vld1.v1f64(i8* %1, i32 8)
+  ret <1 x double> %vld1
+}
+
+define <8 x i8> @test_vld1_p8(i8* readonly %a) {
+; CHECK-LABEL: test_vld1_p8
+; CHECK: ld1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
+  %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %a, i32 1)
+  ret <8 x i8> %vld1
+}
+
+define <4 x i16> @test_vld1_p16(i16* readonly %a) {
+; CHECK-LABEL: test_vld1_p16
+; CHECK: ld1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %vld1 = tail call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %1, i32 2)
+  ret <4 x i16> %vld1
+}
+
+define %struct.int8x16x2_t @test_vld2q_s8(i8* readonly %a) {
+; CHECK-LABEL: test_vld2q_s8
+; CHECK: ld2 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
+  %vld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %a, i32 1)
+  %vld2.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2.fca.1.extract, 0, 1
+  ret %struct.int8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x8x2_t @test_vld2q_s16(i16* readonly %a) {
+; CHECK-LABEL: test_vld2q_s16
+; CHECK: ld2 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %vld2 = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16(i8* %1, i32 2)
+  %vld2.fca.0.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vld2.fca.1.extract, 0, 1
+  ret %struct.int16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x4x2_t @test_vld2q_s32(i32* readonly %a) {
+; CHECK-LABEL: test_vld2q_s32
+; CHECK: ld2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %vld2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8* %1, i32 4)
+  %vld2.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vld2.fca.1.extract, 0, 1
+  ret %struct.int32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int64x2x2_t @test_vld2q_s64(i64* readonly %a) {
+; CHECK-LABEL: test_vld2q_s64
+; CHECK: ld2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %vld2 = tail call { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2.v2i64(i8* %1, i32 8)
+  %vld2.fca.0.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.int64x2x2_t undef, <2 x i64> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x2x2_t %.fca.0.0.insert, <2 x i64> %vld2.fca.1.extract, 0, 1
+  ret %struct.int64x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x4x2_t @test_vld2q_f32(float* readonly %a) {
+; CHECK-LABEL: test_vld2q_f32
+; CHECK: ld2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4)
+  %vld2.fca.0.extract = extractvalue { <4 x float>, <4 x float> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <4 x float>, <4 x float> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vld2.fca.1.extract, 0, 1
+  ret %struct.float32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.float64x2x2_t @test_vld2q_f64(double* readonly %a) {
+; CHECK-LABEL: test_vld2q_f64
+; CHECK: ld2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %vld2 = tail call { <2 x double>, <2 x double> } @llvm.arm.neon.vld2.v2f64(i8* %1, i32 8)
+  %vld2.fca.0.extract = extractvalue { <2 x double>, <2 x double> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <2 x double>, <2 x double> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.float64x2x2_t undef, <2 x double> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x2x2_t %.fca.0.0.insert, <2 x double> %vld2.fca.1.extract, 0, 1
+  ret %struct.float64x2x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x8x2_t @test_vld2_s8(i8* readonly %a) {
+; CHECK-LABEL: test_vld2_s8
+; CHECK: ld2 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
+  %vld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8* %a, i32 1)
+  %vld2.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vld2.fca.1.extract, 0, 1
+  ret %struct.int8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x4x2_t @test_vld2_s16(i16* readonly %a) {
+; CHECK-LABEL: test_vld2_s16
+; CHECK: ld2 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %vld2 = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16(i8* %1, i32 2)
+  %vld2.fca.0.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vld2.fca.1.extract, 0, 1
+  ret %struct.int16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x2x2_t @test_vld2_s32(i32* readonly %a) {
+; CHECK-LABEL: test_vld2_s32
+; CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %vld2 = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8* %1, i32 4)
+  %vld2.fca.0.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vld2.fca.1.extract, 0, 1
+  ret %struct.int32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.int64x1x2_t @test_vld2_s64(i64* readonly %a) {
+; CHECK-LABEL: test_vld2_s64
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %vld2 = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8* %1, i32 8)
+  %vld2.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.int64x1x2_t undef, <1 x i64> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x1x2_t %.fca.0.0.insert, <1 x i64> %vld2.fca.1.extract, 0, 1
+  ret %struct.int64x1x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x2x2_t @test_vld2_f32(float* readonly %a) {
+; CHECK-LABEL: test_vld2_f32
+; CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %vld2 = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8* %1, i32 4)
+  %vld2.fca.0.extract = extractvalue { <2 x float>, <2 x float> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <2 x float>, <2 x float> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vld2.fca.1.extract, 0, 1
+  ret %struct.float32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float64x1x2_t @test_vld2_f64(double* readonly %a) {
+; CHECK-LABEL: test_vld2_f64
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %vld2 = tail call { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8* %1, i32 8)
+  %vld2.fca.0.extract = extractvalue { <1 x double>, <1 x double> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <1 x double>, <1 x double> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.float64x1x2_t undef, <1 x double> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x1x2_t %.fca.0.0.insert, <1 x double> %vld2.fca.1.extract, 0, 1
+  ret %struct.float64x1x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x16x3_t @test_vld3q_s8(i8* readonly %a) {
+; CHECK-LABEL: test_vld3q_s8
+; CHECK: ld3 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
+  %vld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %a, i32 1)
+  %vld3.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.int8x16x3_t undef, <16 x i8> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x16x3_t %.fca.0.0.insert, <16 x i8> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int8x16x3_t %.fca.0.1.insert, <16 x i8> %vld3.fca.2.extract, 0, 2
+  ret %struct.int8x16x3_t %.fca.0.2.insert
+}
+
+define %struct.int16x8x3_t @test_vld3q_s16(i16* readonly %a) {
+; CHECK-LABEL: test_vld3q_s16
+; CHECK: ld3 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %vld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16(i8* %1, i32 2)
+  %vld3.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.int16x8x3_t undef, <8 x i16> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x3_t %.fca.0.0.insert, <8 x i16> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x8x3_t %.fca.0.1.insert, <8 x i16> %vld3.fca.2.extract, 0, 2
+  ret %struct.int16x8x3_t %.fca.0.2.insert
+}
+
+define %struct.int32x4x3_t @test_vld3q_s32(i32* readonly %a) {
+; CHECK-LABEL: test_vld3q_s32
+; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %vld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8* %1, i32 4)
+  %vld3.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.int32x4x3_t undef, <4 x i32> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x3_t %.fca.0.0.insert, <4 x i32> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x4x3_t %.fca.0.1.insert, <4 x i32> %vld3.fca.2.extract, 0, 2
+  ret %struct.int32x4x3_t %.fca.0.2.insert
+}
+
+define %struct.int64x2x3_t @test_vld3q_s64(i64* readonly %a) {
+; CHECK-LABEL: test_vld3q_s64
+; CHECK: ld3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %vld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3.v2i64(i8* %1, i32 8)
+  %vld3.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.int64x2x3_t undef, <2 x i64> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x2x3_t %.fca.0.0.insert, <2 x i64> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x2x3_t %.fca.0.1.insert, <2 x i64> %vld3.fca.2.extract, 0, 2
+  ret %struct.int64x2x3_t %.fca.0.2.insert
+}
+
+define %struct.float32x4x3_t @test_vld3q_f32(float* readonly %a) {
+; CHECK-LABEL: test_vld3q_f32
+; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8* %1, i32 4)
+  %vld3.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.float32x4x3_t undef, <4 x float> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x3_t %.fca.0.0.insert, <4 x float> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x4x3_t %.fca.0.1.insert, <4 x float> %vld3.fca.2.extract, 0, 2
+  ret %struct.float32x4x3_t %.fca.0.2.insert
+}
+
+define %struct.float64x2x3_t @test_vld3q_f64(double* readonly %a) {
+; CHECK-LABEL: test_vld3q_f64
+; CHECK: ld3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %vld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3.v2f64(i8* %1, i32 8)
+  %vld3.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.float64x2x3_t undef, <2 x double> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x2x3_t %.fca.0.0.insert, <2 x double> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x2x3_t %.fca.0.1.insert, <2 x double> %vld3.fca.2.extract, 0, 2
+  ret %struct.float64x2x3_t %.fca.0.2.insert
+}
+
+define %struct.int8x8x3_t @test_vld3_s8(i8* readonly %a) {
+; CHECK-LABEL: test_vld3_s8
+; CHECK: ld3 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
+  %vld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8* %a, i32 1)
+  %vld3.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.int8x8x3_t undef, <8 x i8> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x3_t %.fca.0.0.insert, <8 x i8> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int8x8x3_t %.fca.0.1.insert, <8 x i8> %vld3.fca.2.extract, 0, 2
+  ret %struct.int8x8x3_t %.fca.0.2.insert
+}
+
+define %struct.int16x4x3_t @test_vld3_s16(i16* readonly %a) {
+; CHECK-LABEL: test_vld3_s16
+; CHECK: ld3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %vld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8* %1, i32 2)
+  %vld3.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.int16x4x3_t undef, <4 x i16> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x3_t %.fca.0.0.insert, <4 x i16> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x4x3_t %.fca.0.1.insert, <4 x i16> %vld3.fca.2.extract, 0, 2
+  ret %struct.int16x4x3_t %.fca.0.2.insert
+}
+
+define %struct.int32x2x3_t @test_vld3_s32(i32* readonly %a) {
+; CHECK-LABEL: test_vld3_s32
+; CHECK: ld3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %vld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32(i8* %1, i32 4)
+  %vld3.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.int32x2x3_t undef, <2 x i32> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x3_t %.fca.0.0.insert, <2 x i32> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x2x3_t %.fca.0.1.insert, <2 x i32> %vld3.fca.2.extract, 0, 2
+  ret %struct.int32x2x3_t %.fca.0.2.insert
+}
+
+define %struct.int64x1x3_t @test_vld3_s64(i64* readonly %a) {
+; CHECK-LABEL: test_vld3_s64
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %vld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8* %1, i32 8)
+  %vld3.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.int64x1x3_t undef, <1 x i64> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x1x3_t %.fca.0.0.insert, <1 x i64> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x1x3_t %.fca.0.1.insert, <1 x i64> %vld3.fca.2.extract, 0, 2
+  ret %struct.int64x1x3_t %.fca.0.2.insert
+}
+
+define %struct.float32x2x3_t @test_vld3_f32(float* readonly %a) {
+; CHECK-LABEL: test_vld3_f32
+; CHECK: ld3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %vld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32(i8* %1, i32 4)
+  %vld3.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.float32x2x3_t undef, <2 x float> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x3_t %.fca.0.0.insert, <2 x float> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x2x3_t %.fca.0.1.insert, <2 x float> %vld3.fca.2.extract, 0, 2
+  ret %struct.float32x2x3_t %.fca.0.2.insert
+}
+
+define %struct.float64x1x3_t @test_vld3_f64(double* readonly %a) {
+; CHECK-LABEL: test_vld3_f64
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %vld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8* %1, i32 8)
+  %vld3.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.float64x1x3_t undef, <1 x double> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x1x3_t %.fca.0.0.insert, <1 x double> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x1x3_t %.fca.0.1.insert, <1 x double> %vld3.fca.2.extract, 0, 2
+  ret %struct.float64x1x3_t %.fca.0.2.insert
+}
+
+define %struct.int8x16x4_t @test_vld4q_s8(i8* readonly %a) {
+; CHECK-LABEL: test_vld4q_s8
+; CHECK: ld4 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
+  %vld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %a, i32 1)
+  %vld4.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.int8x16x4_t undef, <16 x i8> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x16x4_t %.fca.0.0.insert, <16 x i8> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int8x16x4_t %.fca.0.1.insert, <16 x i8> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int8x16x4_t %.fca.0.2.insert, <16 x i8> %vld4.fca.3.extract, 0, 3
+  ret %struct.int8x16x4_t %.fca.0.3.insert
+}
+
+define %struct.int16x8x4_t @test_vld4q_s16(i16* readonly %a) {
+; CHECK-LABEL: test_vld4q_s16
+; CHECK: ld4 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %vld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8* %1, i32 2)
+  %vld4.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.int16x8x4_t undef, <8 x i16> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x4_t %.fca.0.0.insert, <8 x i16> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x8x4_t %.fca.0.1.insert, <8 x i16> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int16x8x4_t %.fca.0.2.insert, <8 x i16> %vld4.fca.3.extract, 0, 3
+  ret %struct.int16x8x4_t %.fca.0.3.insert
+}
+
+define %struct.int32x4x4_t @test_vld4q_s32(i32* readonly %a) {
+; CHECK-LABEL: test_vld4q_s32
+; CHECK: ld4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %vld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32(i8* %1, i32 4)
+  %vld4.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.int32x4x4_t undef, <4 x i32> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x4_t %.fca.0.0.insert, <4 x i32> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x4x4_t %.fca.0.1.insert, <4 x i32> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int32x4x4_t %.fca.0.2.insert, <4 x i32> %vld4.fca.3.extract, 0, 3
+  ret %struct.int32x4x4_t %.fca.0.3.insert
+}
+
+define %struct.int64x2x4_t @test_vld4q_s64(i64* readonly %a) {
+; CHECK-LABEL: test_vld4q_s64
+; CHECK: ld4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %vld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4.v2i64(i8* %1, i32 8)
+  %vld4.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.int64x2x4_t undef, <2 x i64> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x2x4_t %.fca.0.0.insert, <2 x i64> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x2x4_t %.fca.0.1.insert, <2 x i64> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int64x2x4_t %.fca.0.2.insert, <2 x i64> %vld4.fca.3.extract, 0, 3
+  ret %struct.int64x2x4_t %.fca.0.3.insert
+}
+
+define %struct.float32x4x4_t @test_vld4q_f32(float* readonly %a) {
+; CHECK-LABEL: test_vld4q_f32
+; CHECK: ld4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %vld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8* %1, i32 4)
+  %vld4.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.float32x4x4_t undef, <4 x float> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x4_t %.fca.0.0.insert, <4 x float> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x4x4_t %.fca.0.1.insert, <4 x float> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float32x4x4_t %.fca.0.2.insert, <4 x float> %vld4.fca.3.extract, 0, 3
+  ret %struct.float32x4x4_t %.fca.0.3.insert
+}
+
+define %struct.float64x2x4_t @test_vld4q_f64(double* readonly %a) {
+; CHECK-LABEL: test_vld4q_f64
+; CHECK: ld4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %vld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4.v2f64(i8* %1, i32 8)
+  %vld4.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.float64x2x4_t undef, <2 x double> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x2x4_t %.fca.0.0.insert, <2 x double> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x2x4_t %.fca.0.1.insert, <2 x double> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float64x2x4_t %.fca.0.2.insert, <2 x double> %vld4.fca.3.extract, 0, 3
+  ret %struct.float64x2x4_t %.fca.0.3.insert
+}
+
+define %struct.int8x8x4_t @test_vld4_s8(i8* readonly %a) {
+; CHECK-LABEL: test_vld4_s8
+; CHECK: ld4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
+  %vld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %a, i32 1)
+  %vld4.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.int8x8x4_t undef, <8 x i8> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x4_t %.fca.0.0.insert, <8 x i8> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int8x8x4_t %.fca.0.1.insert, <8 x i8> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int8x8x4_t %.fca.0.2.insert, <8 x i8> %vld4.fca.3.extract, 0, 3
+  ret %struct.int8x8x4_t %.fca.0.3.insert
+}
+
+define %struct.int16x4x4_t @test_vld4_s16(i16* readonly %a) {
+; CHECK-LABEL: test_vld4_s16
+; CHECK: ld4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %vld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8* %1, i32 2)
+  %vld4.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.int16x4x4_t undef, <4 x i16> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x4_t %.fca.0.0.insert, <4 x i16> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x4x4_t %.fca.0.1.insert, <4 x i16> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int16x4x4_t %.fca.0.2.insert, <4 x i16> %vld4.fca.3.extract, 0, 3
+  ret %struct.int16x4x4_t %.fca.0.3.insert
+}
+
+define %struct.int32x2x4_t @test_vld4_s32(i32* readonly %a) {
+; CHECK-LABEL: test_vld4_s32
+; CHECK: ld4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %vld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32(i8* %1, i32 4)
+  %vld4.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.int32x2x4_t undef, <2 x i32> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x4_t %.fca.0.0.insert, <2 x i32> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x2x4_t %.fca.0.1.insert, <2 x i32> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int32x2x4_t %.fca.0.2.insert, <2 x i32> %vld4.fca.3.extract, 0, 3
+  ret %struct.int32x2x4_t %.fca.0.3.insert
+}
+
+define %struct.int64x1x4_t @test_vld4_s64(i64* readonly %a) {
+; CHECK-LABEL: test_vld4_s64
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %vld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8* %1, i32 8)
+  %vld4.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.int64x1x4_t undef, <1 x i64> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x1x4_t %.fca.0.0.insert, <1 x i64> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x1x4_t %.fca.0.1.insert, <1 x i64> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int64x1x4_t %.fca.0.2.insert, <1 x i64> %vld4.fca.3.extract, 0, 3
+  ret %struct.int64x1x4_t %.fca.0.3.insert
+}
+
+define %struct.float32x2x4_t @test_vld4_f32(float* readonly %a) {
+; CHECK-LABEL: test_vld4_f32
+; CHECK: ld4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %vld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32(i8* %1, i32 4)
+  %vld4.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.float32x2x4_t undef, <2 x float> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x4_t %.fca.0.0.insert, <2 x float> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x2x4_t %.fca.0.1.insert, <2 x float> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float32x2x4_t %.fca.0.2.insert, <2 x float> %vld4.fca.3.extract, 0, 3
+  ret %struct.float32x2x4_t %.fca.0.3.insert
+}
+
+define %struct.float64x1x4_t @test_vld4_f64(double* readonly %a) {
+; CHECK-LABEL: test_vld4_f64
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %vld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8* %1, i32 8)
+  %vld4.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.float64x1x4_t undef, <1 x double> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x1x4_t %.fca.0.0.insert, <1 x double> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x1x4_t %.fca.0.1.insert, <1 x double> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float64x1x4_t %.fca.0.2.insert, <1 x double> %vld4.fca.3.extract, 0, 3
+  ret %struct.float64x1x4_t %.fca.0.3.insert
+}
+
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32)
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32)
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32)
+declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32)
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32)
+declare <2 x double> @llvm.arm.neon.vld1.v2f64(i8*, i32)
+declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32)
+declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32)
+declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32)
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32)
+declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32)
+declare <1 x double> @llvm.arm.neon.vld1.v1f64(i8*, i32)
+declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32)
+declare { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16(i8*, i32)
+declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8*, i32)
+declare { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2.v2i64(i8*, i32)
+declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8*, i32)
+declare { <2 x double>, <2 x double> } @llvm.arm.neon.vld2.v2f64(i8*, i32)
+declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8*, i32)
+declare { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16(i8*, i32)
+declare { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8*, i32)
+declare { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8*, i32)
+declare { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8*, i32)
+declare { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8*, i32)
+declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32)
+declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16(i8*, i32)
+declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8*, i32)
+declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3.v2i64(i8*, i32)
+declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8*, i32)
+declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3.v2f64(i8*, i32)
+declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8*, i32)
+declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8*, i32)
+declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32(i8*, i32)
+declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8*, i32)
+declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32(i8*, i32)
+declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8*, i32)
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32)
+declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8*, i32)
+declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32(i8*, i32)
+declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4.v2i64(i8*, i32)
+declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8*, i32)
+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4.v2f64(i8*, i32)
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32)
+declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8*, i32)
+declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32(i8*, i32)
+declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8*, i32)
+declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32(i8*, i32)
+declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8*, i32)
+
+define void @test_vst1q_s8(i8* %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vst1q_s8
+; CHECK: st1 {v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+  tail call void @llvm.arm.neon.vst1.v16i8(i8* %a, <16 x i8> %b, i32 1)
+  ret void
+}
+
+define void @test_vst1q_s16(i16* %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vst1q_s16
+; CHECK: st1 {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst1.v8i16(i8* %1, <8 x i16> %b, i32 2)
+  ret void
+}
+
+define void @test_vst1q_s32(i32* %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vst1q_s32
+; CHECK: st1 {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst1.v4i32(i8* %1, <4 x i32> %b, i32 4)
+  ret void
+}
+
+define void @test_vst1q_s64(i64* %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vst1q_s64
+; CHECK: st1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst1.v2i64(i8* %1, <2 x i64> %b, i32 8)
+  ret void
+}
+
+define void @test_vst1q_f32(float* %a, <4 x float> %b) {
+; CHECK-LABEL: test_vst1q_f32
+; CHECK: st1 {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* %1, <4 x float> %b, i32 4)
+  ret void
+}
+
+define void @test_vst1q_f64(double* %a, <2 x double> %b) {
+; CHECK-LABEL: test_vst1q_f64
+; CHECK: st1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst1.v2f64(i8* %1, <2 x double> %b, i32 8)
+  ret void
+}
+
+define void @test_vst1_s8(i8* %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vst1_s8
+; CHECK: st1 {v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %a, <8 x i8> %b, i32 1)
+  ret void
+}
+
+define void @test_vst1_s16(i16* %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vst1_s16
+; CHECK: st1 {v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst1.v4i16(i8* %1, <4 x i16> %b, i32 2)
+  ret void
+}
+
+define void @test_vst1_s32(i32* %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vst1_s32
+; CHECK: st1 {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst1.v2i32(i8* %1, <2 x i32> %b, i32 4)
+  ret void
+}
+
+define void @test_vst1_s64(i64* %a, <1 x i64> %b) {
+; CHECK-LABEL: test_vst1_s64
+; CHECK: st1 {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst1.v1i64(i8* %1, <1 x i64> %b, i32 8)
+  ret void
+}
+
+define void @test_vst1_f32(float* %a, <2 x float> %b) {
+; CHECK-LABEL: test_vst1_f32
+; CHECK: st1 {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst1.v2f32(i8* %1, <2 x float> %b, i32 4)
+  ret void
+}
+
+define void @test_vst1_f64(double* %a, <1 x double> %b) {
+; CHECK-LABEL: test_vst1_f64
+; CHECK: st1 {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst1.v1f64(i8* %1, <1 x double> %b, i32 8)
+  ret void
+}
+
+define void @test_vst2q_s8(i8* %a, [2 x <16 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_s8
+; CHECK: st2 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %b.coerce, 1
+  tail call void @llvm.arm.neon.vst2.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, i32 1)
+  ret void
+}
+
+define void @test_vst2q_s16(i16* %a, [2 x <8 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_s16
+; CHECK: st2 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i16>] %b.coerce, 1
+  %1 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst2.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, i32 2)
+  ret void
+}
+
+define void @test_vst2q_s32(i32* %a, [2 x <4 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_s32
+; CHECK: st2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %b.coerce, 1
+  %1 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst2.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, i32 4)
+  ret void
+}
+
+define void @test_vst2q_s64(i64* %a, [2 x <2 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_s64
+; CHECK: st2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i64>] %b.coerce, 1
+  %1 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst2.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, i32 8)
+  ret void
+}
+
+define void @test_vst2q_f32(float* %a, [2 x <4 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_f32
+; CHECK: st2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <4 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <4 x float>] %b.coerce, 1
+  %1 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, i32 4)
+  ret void
+}
+
+define void @test_vst2q_f64(double* %a, [2 x <2 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_f64
+; CHECK: st2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x double>] %b.coerce, 1
+  %1 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst2.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, i32 8)
+  ret void
+}
+
+define void @test_vst2_s8(i8* %a, [2 x <8 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst2_s8
+; CHECK: st2 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %b.coerce, 1
+  tail call void @llvm.arm.neon.vst2.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, i32 1)
+  ret void
+}
+
+define void @test_vst2_s16(i16* %a, [2 x <4 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst2_s16
+; CHECK: st2 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i16>] %b.coerce, 1
+  %1 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst2.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, i32 2)
+  ret void
+}
+
+define void @test_vst2_s32(i32* %a, [2 x <2 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst2_s32
+; CHECK: st2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i32>] %b.coerce, 1
+  %1 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst2.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, i32 4)
+  ret void
+}
+
+define void @test_vst2_s64(i64* %a, [2 x <1 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst2_s64
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <1 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <1 x i64>] %b.coerce, 1
+  %1 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst2.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, i32 8)
+  ret void
+}
+
+define void @test_vst2_f32(float* %a, [2 x <2 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst2_f32
+; CHECK: st2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x float>] %b.coerce, 1
+  %1 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst2.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, i32 4)
+  ret void
+}
+
+define void @test_vst2_f64(double* %a, [2 x <1 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst2_f64
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <1 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <1 x double>] %b.coerce, 1
+  %1 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst2.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, i32 8)
+  ret void
+}
+
+define void @test_vst3q_s8(i8* %a, [3 x <16 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_s8
+; CHECK: st3 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %b.coerce, 2
+  tail call void @llvm.arm.neon.vst3.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, i32 1)
+  ret void
+}
+
+define void @test_vst3q_s16(i16* %a, [3 x <8 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_s16
+; CHECK: st3 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i16>] %b.coerce, 2
+  %1 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst3.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, i32 2)
+  ret void
+}
+
+define void @test_vst3q_s32(i32* %a, [3 x <4 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_s32
+; CHECK: st3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i32>] %b.coerce, 2
+  %1 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst3.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, i32 4)
+  ret void
+}
+
+define void @test_vst3q_s64(i64* %a, [3 x <2 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_s64
+; CHECK: st3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i64>] %b.coerce, 2
+  %1 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst3.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, i32 8)
+  ret void
+}
+
+define void @test_vst3q_f32(float* %a, [3 x <4 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_f32
+; CHECK: st3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <4 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <4 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <4 x float>] %b.coerce, 2
+  %1 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst3.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, i32 4)
+  ret void
+}
+
+define void @test_vst3q_f64(double* %a, [3 x <2 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_f64
+; CHECK: st3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x double>] %b.coerce, 2
+  %1 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst3.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, i32 8)
+  ret void
+}
+
+define void @test_vst3_s8(i8* %a, [3 x <8 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst3_s8
+; CHECK: st3 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i8>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i8>] %b.coerce, 2
+  tail call void @llvm.arm.neon.vst3.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, i32 1)
+  ret void
+}
+
+define void @test_vst3_s16(i16* %a, [3 x <4 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst3_s16
+; CHECK: st3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i16>] %b.coerce, 2
+  %1 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst3.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, i32 2)
+  ret void
+}
+
+define void @test_vst3_s32(i32* %a, [3 x <2 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst3_s32
+; CHECK: st3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i32>] %b.coerce, 2
+  %1 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst3.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, i32 4)
+  ret void
+}
+
+define void @test_vst3_s64(i64* %a, [3 x <1 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst3_s64
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <1 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <1 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <1 x i64>] %b.coerce, 2
+  %1 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst3.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, i32 8)
+  ret void
+}
+
+define void @test_vst3_f32(float* %a, [3 x <2 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst3_f32
+; CHECK: st3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x float>] %b.coerce, 2
+  %1 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst3.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, i32 4)
+  ret void
+}
+
+define void @test_vst3_f64(double* %a, [3 x <1 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst3_f64
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <1 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <1 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <1 x double>] %b.coerce, 2
+  %1 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst3.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, i32 8)
+  ret void
+}
+
+define void @test_vst4q_s8(i8* %a, [4 x <16 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_s8
+; CHECK: st4 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <16 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <16 x i8>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <16 x i8>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <16 x i8>] %b.coerce, 3
+  tail call void @llvm.arm.neon.vst4.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, <16 x i8> %b.coerce.fca.3.extract, i32 1)
+  ret void
+}
+
+define void @test_vst4q_s16(i16* %a, [4 x <8 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_s16
+; CHECK: st4 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i16>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i16>] %b.coerce, 3
+  %1 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst4.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, <8 x i16> %b.coerce.fca.3.extract, i32 2)
+  ret void
+}
+
+define void @test_vst4q_s32(i32* %a, [4 x <4 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_s32
+; CHECK: st4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i32>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i32>] %b.coerce, 3
+  %1 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst4.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, <4 x i32> %b.coerce.fca.3.extract, i32 4)
+  ret void
+}
+
+define void @test_vst4q_s64(i64* %a, [4 x <2 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_s64
+; CHECK: st4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i64>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i64>] %b.coerce, 3
+  %1 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst4.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, <2 x i64> %b.coerce.fca.3.extract, i32 8)
+  ret void
+}
+
+define void @test_vst4q_f32(float* %a, [4 x <4 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_f32
+; CHECK: st4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <4 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <4 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <4 x float>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <4 x float>] %b.coerce, 3
+  %1 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst4.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, <4 x float> %b.coerce.fca.3.extract, i32 4)
+  ret void
+}
+
+define void @test_vst4q_f64(double* %a, [4 x <2 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_f64
+; CHECK: st4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x double>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x double>] %b.coerce, 3
+  %1 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst4.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, <2 x double> %b.coerce.fca.3.extract, i32 8)
+  ret void
+}
+
+define void @test_vst4_s8(i8* %a, [4 x <8 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst4_s8
+; CHECK: st4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i8>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i8>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i8>] %b.coerce, 3
+  tail call void @llvm.arm.neon.vst4.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, <8 x i8> %b.coerce.fca.3.extract, i32 1)
+  ret void
+}
+
+define void @test_vst4_s16(i16* %a, [4 x <4 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst4_s16
+; CHECK: st4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i16>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i16>] %b.coerce, 3
+  %1 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst4.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, <4 x i16> %b.coerce.fca.3.extract, i32 2)
+  ret void
+}
+
+define void @test_vst4_s32(i32* %a, [4 x <2 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst4_s32
+; CHECK: st4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i32>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i32>] %b.coerce, 3
+  %1 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst4.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, <2 x i32> %b.coerce.fca.3.extract, i32 4)
+  ret void
+}
+
+define void @test_vst4_s64(i64* %a, [4 x <1 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst4_s64
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <1 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <1 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <1 x i64>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <1 x i64>] %b.coerce, 3
+  %1 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst4.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, <1 x i64> %b.coerce.fca.3.extract, i32 8)
+  ret void
+}
+
+define void @test_vst4_f32(float* %a, [4 x <2 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst4_f32
+; CHECK: st4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x float>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x float>] %b.coerce, 3
+  %1 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst4.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, <2 x float> %b.coerce.fca.3.extract, i32 4)
+  ret void
+}
+
+define void @test_vst4_f64(double* %a, [4 x <1 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst4_f64
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <1 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <1 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <1 x double>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <1 x double>] %b.coerce, 3
+  %1 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst4.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, <1 x double> %b.coerce.fca.3.extract, i32 8)
+  ret void
+}
+
+declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32)
+declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32)
+declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32)
+declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>, i32)
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32)
+declare void @llvm.arm.neon.vst1.v2f64(i8*, <2 x double>, i32)
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32)
+declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32)
+declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32)
+declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>, i32)
+declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32)
+declare void @llvm.arm.neon.vst1.v1f64(i8*, <1 x double>, i32)
+declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32)
+declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>, i32)
+declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32)
+declare void @llvm.arm.neon.vst2.v2i64(i8*, <2 x i64>, <2 x i64>, i32)
+declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32)
+declare void @llvm.arm.neon.vst2.v2f64(i8*, <2 x double>, <2 x double>, i32)
+declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32)
+declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32)
+declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32)
+declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32)
+declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>, i32)
+declare void @llvm.arm.neon.vst2.v1f64(i8*, <1 x double>, <1 x double>, i32)
+declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32)
+declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32)
+declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32)
+declare void @llvm.arm.neon.vst3.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32)
+declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32)
+declare void @llvm.arm.neon.vst3.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32)
+declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
+declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32)
+declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32)
+declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32)
+declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32)
+declare void @llvm.arm.neon.vst3.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32)
+declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32)
+declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32)
+declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32)
+declare void @llvm.arm.neon.vst4.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32)
+declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32)
+declare void @llvm.arm.neon.vst4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32)
+declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32)
+declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32)
+declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32)
+declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32)
+declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32)
+declare void @llvm.arm.neon.vst4.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32)
+
+define %struct.int8x16x2_t @test_vld1q_s8_x2(i8* %a)  {
+; CHECK-LABEL: test_vld1q_s8_x2
+; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+  %1 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8* %a, i32 1)
+  %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
+  %3 = extractvalue { <16 x i8>, <16 x i8> } %1, 1
+  %4 = insertvalue %struct.int8x16x2_t undef, <16 x i8> %2, 0, 0
+  %5 = insertvalue %struct.int8x16x2_t %4, <16 x i8> %3, 0, 1
+  ret %struct.int8x16x2_t %5
+}
+
+define %struct.int16x8x2_t @test_vld1q_s16_x2(i16* %a)  {
+; CHECK-LABEL: test_vld1q_s16_x2
+; CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8* %1, i32 2)
+  %3 = extractvalue { <8 x i16>, <8 x i16> } %2, 0
+  %4 = extractvalue { <8 x i16>, <8 x i16> } %2, 1
+  %5 = insertvalue %struct.int16x8x2_t undef, <8 x i16> %3, 0, 0
+  %6 = insertvalue %struct.int16x8x2_t %5, <8 x i16> %4, 0, 1
+  ret %struct.int16x8x2_t %6
+}
+
+define %struct.int32x4x2_t @test_vld1q_s32_x2(i32* %a)  {
+; CHECK-LABEL: test_vld1q_s32_x2
+; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x2.v4i32(i8* %1, i32 4)
+  %3 = extractvalue { <4 x i32>, <4 x i32> } %2, 0
+  %4 = extractvalue { <4 x i32>, <4 x i32> } %2, 1
+  %5 = insertvalue %struct.int32x4x2_t undef, <4 x i32> %3, 0, 0
+  %6 = insertvalue %struct.int32x4x2_t %5, <4 x i32> %4, 0, 1
+  ret %struct.int32x4x2_t %6
+}
+
+define %struct.int64x2x2_t @test_vld1q_s64_x2(i64* %a)  {
+; CHECK-LABEL: test_vld1q_s64_x2
+; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x2.v2i64(i8* %1, i32 8)
+  %3 = extractvalue { <2 x i64>, <2 x i64> } %2, 0
+  %4 = extractvalue { <2 x i64>, <2 x i64> } %2, 1
+  %5 = insertvalue %struct.int64x2x2_t undef, <2 x i64> %3, 0, 0
+  %6 = insertvalue %struct.int64x2x2_t %5, <2 x i64> %4, 0, 1
+  ret %struct.int64x2x2_t %6
+}
+
+define %struct.float32x4x2_t @test_vld1q_f32_x2(float* %a)  {
+; CHECK-LABEL: test_vld1q_f32_x2
+; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x2.v4f32(i8* %1, i32 4)
+  %3 = extractvalue { <4 x float>, <4 x float> } %2, 0
+  %4 = extractvalue { <4 x float>, <4 x float> } %2, 1
+  %5 = insertvalue %struct.float32x4x2_t undef, <4 x float> %3, 0, 0
+  %6 = insertvalue %struct.float32x4x2_t %5, <4 x float> %4, 0, 1
+  ret %struct.float32x4x2_t %6
+}
+
+
+define %struct.float64x2x2_t @test_vld1q_f64_x2(double* %a)  {
+; CHECK-LABEL: test_vld1q_f64_x2
+; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x2.v2f64(i8* %1, i32 8)
+  %3 = extractvalue { <2 x double>, <2 x double> } %2, 0
+  %4 = extractvalue { <2 x double>, <2 x double> } %2, 1
+  %5 = insertvalue %struct.float64x2x2_t undef, <2 x double> %3, 0, 0
+  %6 = insertvalue %struct.float64x2x2_t %5, <2 x double> %4, 0, 1
+  ret %struct.float64x2x2_t %6
+}
+
+define %struct.int8x8x2_t @test_vld1_s8_x2(i8* %a)  {
+; CHECK-LABEL: test_vld1_s8_x2
+; CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+  %1 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x2.v8i8(i8* %a, i32 1)
+  %2 = extractvalue { <8 x i8>, <8 x i8> } %1, 0
+  %3 = extractvalue { <8 x i8>, <8 x i8> } %1, 1
+  %4 = insertvalue %struct.int8x8x2_t undef, <8 x i8> %2, 0, 0
+  %5 = insertvalue %struct.int8x8x2_t %4, <8 x i8> %3, 0, 1
+  ret %struct.int8x8x2_t %5
+}
+
+define %struct.int16x4x2_t @test_vld1_s16_x2(i16* %a)  {
+; CHECK-LABEL: test_vld1_s16_x2
+; CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x2.v4i16(i8* %1, i32 2)
+  %3 = extractvalue { <4 x i16>, <4 x i16> } %2, 0
+  %4 = extractvalue { <4 x i16>, <4 x i16> } %2, 1
+  %5 = insertvalue %struct.int16x4x2_t undef, <4 x i16> %3, 0, 0
+  %6 = insertvalue %struct.int16x4x2_t %5, <4 x i16> %4, 0, 1
+  ret %struct.int16x4x2_t %6
+}
+
+define %struct.int32x2x2_t @test_vld1_s32_x2(i32* %a)  {
+; CHECK-LABEL: test_vld1_s32_x2
+; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x2.v2i32(i8* %1, i32 4)
+  %3 = extractvalue { <2 x i32>, <2 x i32> } %2, 0
+  %4 = extractvalue { <2 x i32>, <2 x i32> } %2, 1
+  %5 = insertvalue %struct.int32x2x2_t undef, <2 x i32> %3, 0, 0
+  %6 = insertvalue %struct.int32x2x2_t %5, <2 x i32> %4, 0, 1
+  ret %struct.int32x2x2_t %6
+}
+
+define %struct.int64x1x2_t @test_vld1_s64_x2(i64* %a)  {
+; CHECK-LABEL: test_vld1_s64_x2
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x2.v1i64(i8* %1, i32 8)
+  %3 = extractvalue { <1 x i64>, <1 x i64> } %2, 0
+  %4 = extractvalue { <1 x i64>, <1 x i64> } %2, 1
+  %5 = insertvalue %struct.int64x1x2_t undef, <1 x i64> %3, 0, 0
+  %6 = insertvalue %struct.int64x1x2_t %5, <1 x i64> %4, 0, 1
+  ret %struct.int64x1x2_t %6
+}
+
+define %struct.float32x2x2_t @test_vld1_f32_x2(float* %a)  {
+; CHECK-LABEL: test_vld1_f32_x2
+; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x2.v2f32(i8* %1, i32 4)
+  %3 = extractvalue { <2 x float>, <2 x float> } %2, 0
+  %4 = extractvalue { <2 x float>, <2 x float> } %2, 1
+  %5 = insertvalue %struct.float32x2x2_t undef, <2 x float> %3, 0, 0
+  %6 = insertvalue %struct.float32x2x2_t %5, <2 x float> %4, 0, 1
+  ret %struct.float32x2x2_t %6
+}
+
+define %struct.float64x1x2_t @test_vld1_f64_x2(double* %a)  {
+; CHECK-LABEL: test_vld1_f64_x2
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x2.v1f64(i8* %1, i32 8)
+  %3 = extractvalue { <1 x double>, <1 x double> } %2, 0
+  %4 = extractvalue { <1 x double>, <1 x double> } %2, 1
+  %5 = insertvalue %struct.float64x1x2_t undef, <1 x double> %3, 0, 0
+  %6 = insertvalue %struct.float64x1x2_t %5, <1 x double> %4, 0, 1
+  ret %struct.float64x1x2_t %6
+}
+
+define %struct.int8x16x3_t @test_vld1q_s8_x3(i8* %a)  {
+; CHECK-LABEL: test_vld1q_s8_x3
+; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+; [{{x[0-9]+|sp}}]
+  %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x3.v16i8(i8* %a, i32 1)
+  %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 0
+  %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 1
+  %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 2
+  %5 = insertvalue %struct.int8x16x3_t undef, <16 x i8> %2, 0, 0
+  %6 = insertvalue %struct.int8x16x3_t %5, <16 x i8> %3, 0, 1
+  %7 = insertvalue %struct.int8x16x3_t %6, <16 x i8> %4, 0, 2
+  ret %struct.int8x16x3_t %7
+}
+
+define %struct.int16x8x3_t @test_vld1q_s16_x3(i16* %a)  {
+; CHECK-LABEL: test_vld1q_s16_x3
+; CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+; [{{x[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %2 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8* %1, i32 2)
+  %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 0
+  %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 1
+  %5 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 2
+  %6 = insertvalue %struct.int16x8x3_t undef, <8 x i16> %3, 0, 0
+  %7 = insertvalue %struct.int16x8x3_t %6, <8 x i16> %4, 0, 1
+  %8 = insertvalue %struct.int16x8x3_t %7, <8 x i16> %5, 0, 2
+  ret %struct.int16x8x3_t %8
+}
+
+define %struct.int32x4x3_t @test_vld1q_s32_x3(i32* %a)  {
+; CHECK-LABEL: test_vld1q_s32_x3
+; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+; [{{x[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %2 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x3.v4i32(i8* %1, i32 4)
+  %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 0
+  %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 1
+  %5 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 2
+  %6 = insertvalue %struct.int32x4x3_t undef, <4 x i32> %3, 0, 0
+  %7 = insertvalue %struct.int32x4x3_t %6, <4 x i32> %4, 0, 1
+  %8 = insertvalue %struct.int32x4x3_t %7, <4 x i32> %5, 0, 2
+  ret %struct.int32x4x3_t %8
+}
+
+define %struct.int64x2x3_t @test_vld1q_s64_x3(i64* %a)  {
+; CHECK-LABEL: test_vld1q_s64_x3
+; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+; [{{x[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %2 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8* %1, i32 8)
+  %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 0
+  %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 1
+  %5 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 2
+  %6 = insertvalue %struct.int64x2x3_t undef, <2 x i64> %3, 0, 0
+  %7 = insertvalue %struct.int64x2x3_t %6, <2 x i64> %4, 0, 1
+  %8 = insertvalue %struct.int64x2x3_t %7, <2 x i64> %5, 0, 2
+  ret %struct.int64x2x3_t %8
+}
+
+define %struct.float32x4x3_t @test_vld1q_f32_x3(float* %a)  {
+; CHECK-LABEL: test_vld1q_f32_x3
+; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+; [{{x[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %2 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x3.v4f32(i8* %1, i32 4)
+  %3 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 0
+  %4 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 1
+  %5 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 2
+  %6 = insertvalue %struct.float32x4x3_t undef, <4 x float> %3, 0, 0
+  %7 = insertvalue %struct.float32x4x3_t %6, <4 x float> %4, 0, 1
+  %8 = insertvalue %struct.float32x4x3_t %7, <4 x float> %5, 0, 2
+  ret %struct.float32x4x3_t %8
+}
+
+
+define %struct.float64x2x3_t @test_vld1q_f64_x3(double* %a)  {
+; CHECK-LABEL: test_vld1q_f64_x3
+; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+; [{{x[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %2 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x3.v2f64(i8* %1, i32 8)
+  %3 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 0
+  %4 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 1
+  %5 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 2
+  %6 = insertvalue %struct.float64x2x3_t undef, <2 x double> %3, 0, 0
+  %7 = insertvalue %struct.float64x2x3_t %6, <2 x double> %4, 0, 1
+  %8 = insertvalue %struct.float64x2x3_t %7, <2 x double> %5, 0, 2
+  ret %struct.float64x2x3_t %8
+}
+
+define %struct.int8x8x3_t @test_vld1_s8_x3(i8* %a)  {
+; CHECK-LABEL: test_vld1_s8_x3
+; CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+; [{{x[0-9]+|sp}}]
+  %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x3.v8i8(i8* %a, i32 1)
+  %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
+  %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
+  %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
+  %5 = insertvalue %struct.int8x8x3_t undef, <8 x i8> %2, 0, 0
+  %6 = insertvalue %struct.int8x8x3_t %5, <8 x i8> %3, 0, 1
+  %7 = insertvalue %struct.int8x8x3_t %6, <8 x i8> %4, 0, 2
+  ret %struct.int8x8x3_t %7
+}
+
+define %struct.int16x4x3_t @test_vld1_s16_x3(i16* %a)  {
+; CHECK-LABEL: test_vld1_s16_x3
+; CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+; [{{x[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x3.v4i16(i8* %1, i32 2)
+  %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 0
+  %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 1
+  %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 2
+  %6 = insertvalue %struct.int16x4x3_t undef, <4 x i16> %3, 0, 0
+  %7 = insertvalue %struct.int16x4x3_t %6, <4 x i16> %4, 0, 1
+  %8 = insertvalue %struct.int16x4x3_t %7, <4 x i16> %5, 0, 2
+  ret %struct.int16x4x3_t %8
+}
+
+define %struct.int32x2x3_t @test_vld1_s32_x3(i32* %a)  {
+  %1 = bitcast i32* %a to i8*
+; CHECK-LABEL: test_vld1_s32_x3
+; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+; [{{x[0-9]+|sp}}]
+  %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x3.v2i32(i8* %1, i32 4)
+  %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 0
+  %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 1
+  %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 2
+  %6 = insertvalue %struct.int32x2x3_t undef, <2 x i32> %3, 0, 0
+  %7 = insertvalue %struct.int32x2x3_t %6, <2 x i32> %4, 0, 1
+  %8 = insertvalue %struct.int32x2x3_t %7, <2 x i32> %5, 0, 2
+  ret %struct.int32x2x3_t %8
+}
+
+define %struct.int64x1x3_t @test_vld1_s64_x3(i64* %a)  {
+; CHECK-LABEL: test_vld1_s64_x3
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+; [{{x[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %2 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x3.v1i64(i8* %1, i32 8)
+  %3 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 0
+  %4 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 1
+  %5 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 2
+  %6 = insertvalue %struct.int64x1x3_t undef, <1 x i64> %3, 0, 0
+  %7 = insertvalue %struct.int64x1x3_t %6, <1 x i64> %4, 0, 1
+  %8 = insertvalue %struct.int64x1x3_t %7, <1 x i64> %5, 0, 2
+  ret %struct.int64x1x3_t %8
+}
+
+define %struct.float32x2x3_t @test_vld1_f32_x3(float* %a)  {
+; CHECK-LABEL: test_vld1_f32_x3
+; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+; [{{x[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %2 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x3.v2f32(i8* %1, i32 4)
+  %3 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 0
+  %4 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 1
+  %5 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 2
+  %6 = insertvalue %struct.float32x2x3_t undef, <2 x float> %3, 0, 0
+  %7 = insertvalue %struct.float32x2x3_t %6, <2 x float> %4, 0, 1
+  %8 = insertvalue %struct.float32x2x3_t %7, <2 x float> %5, 0, 2
+  ret %struct.float32x2x3_t %8
+}
+
+
+define %struct.float64x1x3_t @test_vld1_f64_x3(double* %a)  {
+; CHECK-LABEL: test_vld1_f64_x3
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+; [{{x[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %2 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x3.v1f64(i8* %1, i32 8)
+  %3 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 0
+  %4 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 1
+  %5 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 2
+  %6 = insertvalue %struct.float64x1x3_t undef, <1 x double> %3, 0, 0
+  %7 = insertvalue %struct.float64x1x3_t %6, <1 x double> %4, 0, 1
+  %8 = insertvalue %struct.float64x1x3_t %7, <1 x double> %5, 0, 2
+  ret %struct.float64x1x3_t %8
+}
+
+define %struct.int8x16x4_t @test_vld1q_s8_x4(i8* %a)  {
+; CHECK-LABEL: test_vld1q_s8_x4
+; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+; v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+  %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x4.v16i8(i8* %a, i32 1)
+  %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 0
+  %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 1
+  %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 2
+  %5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 3
+  %6 = insertvalue %struct.int8x16x4_t undef, <16 x i8> %2, 0, 0
+  %7 = insertvalue %struct.int8x16x4_t %6, <16 x i8> %3, 0, 1
+  %8 = insertvalue %struct.int8x16x4_t %7, <16 x i8> %4, 0, 2
+  %9 = insertvalue %struct.int8x16x4_t %8, <16 x i8> %5, 0, 3
+  ret %struct.int8x16x4_t %9
+}
+
+define %struct.int16x8x4_t @test_vld1q_s16_x4(i16* %a)  {
+; CHECK-LABEL: test_vld1q_s16_x4
+; CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+; v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %2 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x4.v8i16(i8* %1, i32 2)
+  %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 0
+  %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 1
+  %5 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 2
+  %6 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 3
+  %7 = insertvalue %struct.int16x8x4_t undef, <8 x i16> %3, 0, 0
+  %8 = insertvalue %struct.int16x8x4_t %7, <8 x i16> %4, 0, 1
+  %9 = insertvalue %struct.int16x8x4_t %8, <8 x i16> %5, 0, 2
+  %10 = insertvalue %struct.int16x8x4_t %9, <8 x i16> %6, 0, 3
+  ret %struct.int16x8x4_t %10
+}
+
+define %struct.int32x4x4_t @test_vld1q_s32_x4(i32* %a)  {
+; CHECK-LABEL: test_vld1q_s32_x4
+; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %2 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x4.v4i32(i8* %1, i32 4)
+  %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 0
+  %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 1
+  %5 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 2
+  %6 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 3
+  %7 = insertvalue %struct.int32x4x4_t undef, <4 x i32> %3, 0, 0
+  %8 = insertvalue %struct.int32x4x4_t %7, <4 x i32> %4, 0, 1
+  %9 = insertvalue %struct.int32x4x4_t %8, <4 x i32> %5, 0, 2
+  %10 = insertvalue %struct.int32x4x4_t %9, <4 x i32> %6, 0, 3
+  ret %struct.int32x4x4_t %10
+}
+
+define %struct.int64x2x4_t @test_vld1q_s64_x4(i64* %a)  {
+; CHECK-LABEL: test_vld1q_s64_x4
+; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+; v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %2 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x4.v2i64(i8* %1, i32 8)
+  %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 0
+  %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 1
+  %5 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 2
+  %6 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 3
+  %7 = insertvalue %struct.int64x2x4_t undef, <2 x i64> %3, 0, 0
+  %8 = insertvalue %struct.int64x2x4_t %7, <2 x i64> %4, 0, 1
+  %9 = insertvalue %struct.int64x2x4_t %8, <2 x i64> %5, 0, 2
+  %10 = insertvalue %struct.int64x2x4_t %9, <2 x i64> %6, 0, 3
+  ret %struct.int64x2x4_t %10
+}
+
+define %struct.float32x4x4_t @test_vld1q_f32_x4(float* %a)  {
+; CHECK-LABEL: test_vld1q_f32_x4
+; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %2 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8* %1, i32 4)
+  %3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 0
+  %4 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 1
+  %5 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 2
+  %6 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 3
+  %7 = insertvalue %struct.float32x4x4_t undef, <4 x float> %3, 0, 0
+  %8 = insertvalue %struct.float32x4x4_t %7, <4 x float> %4, 0, 1
+  %9 = insertvalue %struct.float32x4x4_t %8, <4 x float> %5, 0, 2
+  %10 = insertvalue %struct.float32x4x4_t %9, <4 x float> %6, 0, 3
+  ret %struct.float32x4x4_t %10
+}
+
+define %struct.float64x2x4_t @test_vld1q_f64_x4(double* %a)  {
+; CHECK-LABEL: test_vld1q_f64_x4
+; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %2 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x4.v2f64(i8* %1, i32 8)
+  %3 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 0
+  %4 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 1
+  %5 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 2
+  %6 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 3
+  %7 = insertvalue %struct.float64x2x4_t undef, <2 x double> %3, 0, 0
+  %8 = insertvalue %struct.float64x2x4_t %7, <2 x double> %4, 0, 1
+  %9 = insertvalue %struct.float64x2x4_t %8, <2 x double> %5, 0, 2
+  %10 = insertvalue %struct.float64x2x4_t %9, <2 x double> %6, 0, 3
+  ret %struct.float64x2x4_t %10
+}
+
+define %struct.int8x8x4_t @test_vld1_s8_x4(i8* %a)  {
+; CHECK-LABEL: test_vld1_s8_x4
+; CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+; v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+  %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8* %a, i32 1)
+  %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
+  %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
+  %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
+  %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 3
+  %6 = insertvalue %struct.int8x8x4_t undef, <8 x i8> %2, 0, 0
+  %7 = insertvalue %struct.int8x8x4_t %6, <8 x i8> %3, 0, 1
+  %8 = insertvalue %struct.int8x8x4_t %7, <8 x i8> %4, 0, 2
+  %9 = insertvalue %struct.int8x8x4_t %8, <8 x i8> %5, 0, 3
+  ret %struct.int8x8x4_t %9
+}
+
+define %struct.int16x4x4_t @test_vld1_s16_x4(i16* %a)  {
+; CHECK-LABEL: test_vld1_s16_x4
+; CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+; v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x4.v4i16(i8* %1, i32 2)
+  %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 0
+  %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 1
+  %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 2
+  %6 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 3
+  %7 = insertvalue %struct.int16x4x4_t undef, <4 x i16> %3, 0, 0
+  %8 = insertvalue %struct.int16x4x4_t %7, <4 x i16> %4, 0, 1
+  %9 = insertvalue %struct.int16x4x4_t %8, <4 x i16> %5, 0, 2
+  %10 = insertvalue %struct.int16x4x4_t %9, <4 x i16> %6, 0, 3
+  ret %struct.int16x4x4_t %10
+}
+
+define %struct.int32x2x4_t @test_vld1_s32_x4(i32* %a)  {
+; CHECK-LABEL: test_vld1_s32_x4
+; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+; v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x4.v2i32(i8* %1, i32 4)
+  %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 0
+  %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 1
+  %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 2
+  %6 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 3
+  %7 = insertvalue %struct.int32x2x4_t undef, <2 x i32> %3, 0, 0
+  %8 = insertvalue %struct.int32x2x4_t %7, <2 x i32> %4, 0, 1
+  %9 = insertvalue %struct.int32x2x4_t %8, <2 x i32> %5, 0, 2
+  %10 = insertvalue %struct.int32x2x4_t %9, <2 x i32> %6, 0, 3
+  ret %struct.int32x2x4_t %10
+}
+
+define %struct.int64x1x4_t @test_vld1_s64_x4(i64* %a)  {
+; CHECK-LABEL: test_vld1_s64_x4
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+; v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %2 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x4.v1i64(i8* %1, i32 8)
+  %3 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 0
+  %4 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 1
+  %5 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 2
+  %6 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 3
+  %7 = insertvalue %struct.int64x1x4_t undef, <1 x i64> %3, 0, 0
+  %8 = insertvalue %struct.int64x1x4_t %7, <1 x i64> %4, 0, 1
+  %9 = insertvalue %struct.int64x1x4_t %8, <1 x i64> %5, 0, 2
+  %10 = insertvalue %struct.int64x1x4_t %9, <1 x i64> %6, 0, 3
+  ret %struct.int64x1x4_t %10
+}
+
+define %struct.float32x2x4_t @test_vld1_f32_x4(float* %a)  {
+; CHECK-LABEL: test_vld1_f32_x4
+; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+; v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %2 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x4.v2f32(i8* %1, i32 4)
+  %3 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 0
+  %4 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 1
+  %5 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 2
+  %6 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 3
+  %7 = insertvalue %struct.float32x2x4_t undef, <2 x float> %3, 0, 0
+  %8 = insertvalue %struct.float32x2x4_t %7, <2 x float> %4, 0, 1
+  %9 = insertvalue %struct.float32x2x4_t %8, <2 x float> %5, 0, 2
+  %10 = insertvalue %struct.float32x2x4_t %9, <2 x float> %6, 0, 3
+  ret %struct.float32x2x4_t %10
+}
+
+
+define %struct.float64x1x4_t @test_vld1_f64_x4(double* %a)  {
+; CHECK-LABEL: test_vld1_f64_x4
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+; v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %2 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x4.v1f64(i8* %1, i32 8)
+  %3 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 0
+  %4 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 1
+  %5 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 2
+  %6 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 3
+  %7 = insertvalue %struct.float64x1x4_t undef, <1 x double> %3, 0, 0
+  %8 = insertvalue %struct.float64x1x4_t %7, <1 x double> %4, 0, 1
+  %9 = insertvalue %struct.float64x1x4_t %8, <1 x double> %5, 0, 2
+  %10 = insertvalue %struct.float64x1x4_t %9, <1 x double> %6, 0, 3
+  ret %struct.float64x1x4_t %10
+}
+
+define void @test_vst1q_s8_x2(i8* %a, [2 x <16 x i8>] %b)  {
+; CHECK-LABEL: test_vst1q_s8_x2
+; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <16 x i8>] %b, 0
+  %2 = extractvalue [2 x <16 x i8>] %b, 1
+  tail call void @llvm.aarch64.neon.vst1x2.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, i32 1)
+  ret void
+}
+
+define void @test_vst1q_s16_x2(i16* %a, [2 x <8 x i16>] %b)  {
+; CHECK-LABEL: test_vst1q_s16_x2
+; CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <8 x i16>] %b, 0
+  %2 = extractvalue [2 x <8 x i16>] %b, 1
+  %3 = bitcast i16* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x2.v8i16(i8* %3, <8 x i16> %1, <8 x i16> %2, i32 2)
+  ret void
+}
+
+define void @test_vst1q_s32_x2(i32* %a, [2 x <4 x i32>] %b)  {
+; CHECK-LABEL: test_vst1q_s32_x2
+; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <4 x i32>] %b, 0
+  %2 = extractvalue [2 x <4 x i32>] %b, 1
+  %3 = bitcast i32* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x2.v4i32(i8* %3, <4 x i32> %1, <4 x i32> %2, i32 4)
+  ret void
+}
+
+define void @test_vst1q_s64_x2(i64* %a, [2 x <2 x i64>] %b)  {
+; CHECK-LABEL: test_vst1q_s64_x2
+; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <2 x i64>] %b, 0
+  %2 = extractvalue [2 x <2 x i64>] %b, 1
+  %3 = bitcast i64* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x2.v2i64(i8* %3, <2 x i64> %1, <2 x i64> %2, i32 8)
+  ret void
+}
+
+define void @test_vst1q_f32_x2(float* %a, [2 x <4 x float>] %b)  {
+; CHECK-LABEL: test_vst1q_f32_x2
+; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <4 x float>] %b, 0
+  %2 = extractvalue [2 x <4 x float>] %b, 1
+  %3 = bitcast float* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x2.v4f32(i8* %3, <4 x float> %1, <4 x float> %2, i32 4)
+  ret void
+}
+
+
+define void @test_vst1q_f64_x2(double* %a, [2 x <2 x double>] %b)  {
+; CHECK-LABEL: test_vst1q_f64_x2
+; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <2 x double>] %b, 0
+  %2 = extractvalue [2 x <2 x double>] %b, 1
+  %3 = bitcast double* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x2.v2f64(i8* %3, <2 x double> %1, <2 x double> %2, i32 8)
+  ret void
+}
+
+define void @test_vst1_s8_x2(i8* %a, [2 x <8 x i8>] %b)  {
+; CHECK-LABEL: test_vst1_s8_x2
+; CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <8 x i8>] %b, 0
+  %2 = extractvalue [2 x <8 x i8>] %b, 1
+  tail call void @llvm.aarch64.neon.vst1x2.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 1)
+  ret void
+}
+
+define void @test_vst1_s16_x2(i16* %a, [2 x <4 x i16>] %b)  {
+; CHECK-LABEL: test_vst1_s16_x2
+; CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <4 x i16>] %b, 0
+  %2 = extractvalue [2 x <4 x i16>] %b, 1
+  %3 = bitcast i16* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x2.v4i16(i8* %3, <4 x i16> %1, <4 x i16> %2, i32 2)
+  ret void
+}
+
+define void @test_vst1_s32_x2(i32* %a, [2 x <2 x i32>] %b)  {
+; CHECK-LABEL: test_vst1_s32_x2
+; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <2 x i32>] %b, 0
+  %2 = extractvalue [2 x <2 x i32>] %b, 1
+  %3 = bitcast i32* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x2.v2i32(i8* %3, <2 x i32> %1, <2 x i32> %2, i32 4)
+  ret void
+}
+
+define void @test_vst1_s64_x2(i64* %a, [2 x <1 x i64>] %b)  {
+; CHECK-LABEL: test_vst1_s64_x2
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <1 x i64>] %b, 0
+  %2 = extractvalue [2 x <1 x i64>] %b, 1
+  %3 = bitcast i64* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x2.v1i64(i8* %3, <1 x i64> %1, <1 x i64> %2, i32 8)
+  ret void
+}
+
+define void @test_vst1_f32_x2(float* %a, [2 x <2 x float>] %b)  {
+; CHECK-LABEL: test_vst1_f32_x2
+; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <2 x float>] %b, 0
+  %2 = extractvalue [2 x <2 x float>] %b, 1
+  %3 = bitcast float* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x2.v2f32(i8* %3, <2 x float> %1, <2 x float> %2, i32 4)
+  ret void
+}
+
+define void @test_vst1_f64_x2(double* %a, [2 x <1 x double>] %b)  {
+; CHECK-LABEL: test_vst1_f64_x2
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <1 x double>] %b, 0
+  %2 = extractvalue [2 x <1 x double>] %b, 1
+  %3 = bitcast double* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x2.v1f64(i8* %3, <1 x double> %1, <1 x double> %2, i32 8)
+  ret void
+}
+
+define void @test_vst1q_s8_x3(i8* %a, [3 x <16 x i8>] %b)  {
+; CHECK-LABEL: test_vst1q_s8_x3
+; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <16 x i8>] %b, 0
+  %2 = extractvalue [3 x <16 x i8>] %b, 1
+  %3 = extractvalue [3 x <16 x i8>] %b, 2
+  tail call void @llvm.aarch64.neon.vst1x3.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3, i32 1)
+  ret void
+}
+
+define void @test_vst1q_s16_x3(i16* %a, [3 x <8 x i16>] %b)  {
+; CHECK-LABEL: test_vst1q_s16_x3
+; CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <8 x i16>] %b, 0
+  %2 = extractvalue [3 x <8 x i16>] %b, 1
+  %3 = extractvalue [3 x <8 x i16>] %b, 2
+  %4 = bitcast i16* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v8i16(i8* %4, <8 x i16> %1, <8 x i16> %2, <8 x i16> %3, i32 2)
+  ret void
+}
+
+define void @test_vst1q_s32_x3(i32* %a, [3 x <4 x i32>] %b)  {
+; CHECK-LABEL: test_vst1q_s32_x3
+; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <4 x i32>] %b, 0
+  %2 = extractvalue [3 x <4 x i32>] %b, 1
+  %3 = extractvalue [3 x <4 x i32>] %b, 2
+  %4 = bitcast i32* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v4i32(i8* %4, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3, i32 4)
+  ret void
+}
+
+define void @test_vst1q_s64_x3(i64* %a, [3 x <2 x i64>] %b)  {
+; CHECK-LABEL: test_vst1q_s64_x3
+; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <2 x i64>] %b, 0
+  %2 = extractvalue [3 x <2 x i64>] %b, 1
+  %3 = extractvalue [3 x <2 x i64>] %b, 2
+  %4 = bitcast i64* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v2i64(i8* %4, <2 x i64> %1, <2 x i64> %2, <2 x i64> %3, i32 8)
+  ret void
+}
+
+define void @test_vst1q_f32_x3(float* %a, [3 x <4 x float>] %b)  {
+; CHECK-LABEL: test_vst1q_f32_x3
+; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <4 x float>] %b, 0
+  %2 = extractvalue [3 x <4 x float>] %b, 1
+  %3 = extractvalue [3 x <4 x float>] %b, 2
+  %4 = bitcast float* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v4f32(i8* %4, <4 x float> %1, <4 x float> %2, <4 x float> %3, i32 4)
+  ret void
+}
+
+define void @test_vst1q_f64_x3(double* %a, [3 x <2 x double>] %b)  {
+; CHECK-LABEL: test_vst1q_f64_x3
+; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <2 x double>] %b, 0
+  %2 = extractvalue [3 x <2 x double>] %b, 1
+  %3 = extractvalue [3 x <2 x double>] %b, 2
+  %4 = bitcast double* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v2f64(i8* %4, <2 x double> %1, <2 x double> %2, <2 x double> %3, i32 8)
+  ret void
+}
+
+define void @test_vst1_s8_x3(i8* %a, [3 x <8 x i8>] %b)  {
+; CHECK-LABEL: test_vst1_s8_x3
+; CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <8 x i8>] %b, 0
+  %2 = extractvalue [3 x <8 x i8>] %b, 1
+  %3 = extractvalue [3 x <8 x i8>] %b, 2
+  tail call void @llvm.aarch64.neon.vst1x3.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, i32 1)
+  ret void
+}
+
+define void @test_vst1_s16_x3(i16* %a, [3 x <4 x i16>] %b)  {
+; CHECK-LABEL: test_vst1_s16_x3
+; CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <4 x i16>] %b, 0
+  %2 = extractvalue [3 x <4 x i16>] %b, 1
+  %3 = extractvalue [3 x <4 x i16>] %b, 2
+  %4 = bitcast i16* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v4i16(i8* %4, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 2)
+  ret void
+}
+
+define void @test_vst1_s32_x3(i32* %a, [3 x <2 x i32>] %b)  {
+; CHECK-LABEL: test_vst1_s32_x3
+; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <2 x i32>] %b, 0
+  %2 = extractvalue [3 x <2 x i32>] %b, 1
+  %3 = extractvalue [3 x <2 x i32>] %b, 2
+  %4 = bitcast i32* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v2i32(i8* %4, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, i32 4)
+  ret void
+}
+
+define void @test_vst1_s64_x3(i64* %a, [3 x <1 x i64>] %b)  {
+; CHECK-LABEL: test_vst1_s64_x3
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <1 x i64>] %b, 0
+  %2 = extractvalue [3 x <1 x i64>] %b, 1
+  %3 = extractvalue [3 x <1 x i64>] %b, 2
+  %4 = bitcast i64* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v1i64(i8* %4, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, i32 8)
+  ret void
+}
+
+define void @test_vst1_f32_x3(float* %a, [3 x <2 x float>] %b)  {
+; CHECK-LABEL: test_vst1_f32_x3
+; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <2 x float>] %b, 0
+  %2 = extractvalue [3 x <2 x float>] %b, 1
+  %3 = extractvalue [3 x <2 x float>] %b, 2
+  %4 = bitcast float* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v2f32(i8* %4, <2 x float> %1, <2 x float> %2, <2 x float> %3, i32 4)
+  ret void
+}
+
+define void @test_vst1_f64_x3(double* %a, [3 x <1 x double>] %b)  {
+; CHECK-LABEL: test_vst1_f64_x3
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <1 x double>] %b, 0
+  %2 = extractvalue [3 x <1 x double>] %b, 1
+  %3 = extractvalue [3 x <1 x double>] %b, 2
+  %4 = bitcast double* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v1f64(i8* %4, <1 x double> %1, <1 x double> %2, <1 x double> %3, i32 8)
+  ret void
+}
+
+define void @test_vst1q_s8_x4(i8* %a, [4 x <16 x i8>] %b)  {
+; CHECK-LABEL: test_vst1q_s8_x4
+; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+; v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <16 x i8>] %b, 0
+  %2 = extractvalue [4 x <16 x i8>] %b, 1
+  %3 = extractvalue [4 x <16 x i8>] %b, 2
+  %4 = extractvalue [4 x <16 x i8>] %b, 3
+  tail call void @llvm.aarch64.neon.vst1x4.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3, <16 x i8> %4, i32 1)
+  ret void
+}
+
+define void @test_vst1q_s16_x4(i16* %a, [4 x <8 x i16>] %b)  {
+; CHECK-LABEL: test_vst1q_s16_x4
+; CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+; v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <8 x i16>] %b, 0
+  %2 = extractvalue [4 x <8 x i16>] %b, 1
+  %3 = extractvalue [4 x <8 x i16>] %b, 2
+  %4 = extractvalue [4 x <8 x i16>] %b, 3
+  %5 = bitcast i16* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v8i16(i8* %5, <8 x i16> %1, <8 x i16> %2, <8 x i16> %3, <8 x i16> %4, i32 2)
+  ret void
+}
+
+define void @test_vst1q_s32_x4(i32* %a, [4 x <4 x i32>] %b)  {
+; CHECK-LABEL: test_vst1q_s32_x4
+; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <4 x i32>] %b, 0
+  %2 = extractvalue [4 x <4 x i32>] %b, 1
+  %3 = extractvalue [4 x <4 x i32>] %b, 2
+  %4 = extractvalue [4 x <4 x i32>] %b, 3
+  %5 = bitcast i32* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v4i32(i8* %5, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, i32 4)
+  ret void
+}
+
+define void @test_vst1q_s64_x4(i64* %a, [4 x <2 x i64>] %b)  {
+; CHECK-LABEL: test_vst1q_s64_x4
+; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+; v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <2 x i64>] %b, 0
+  %2 = extractvalue [4 x <2 x i64>] %b, 1
+  %3 = extractvalue [4 x <2 x i64>] %b, 2
+  %4 = extractvalue [4 x <2 x i64>] %b, 3
+  %5 = bitcast i64* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v2i64(i8* %5, <2 x i64> %1, <2 x i64> %2, <2 x i64> %3, <2 x i64> %4, i32 8)
+  ret void
+}
+
+define void @test_vst1q_f32_x4(float* %a, [4 x <4 x float>] %b)  {
+; CHECK-LABEL: test_vst1q_f32_x4
+; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <4 x float>] %b, 0
+  %2 = extractvalue [4 x <4 x float>] %b, 1
+  %3 = extractvalue [4 x <4 x float>] %b, 2
+  %4 = extractvalue [4 x <4 x float>] %b, 3
+  %5 = bitcast float* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v4f32(i8* %5, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, i32 4)
+  ret void
+}
+
+define void @test_vst1q_f64_x4(double* %a, [4 x <2 x double>] %b)  {
+; CHECK-LABEL: test_vst1q_f64_x4
+; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+; v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <2 x double>] %b, 0
+  %2 = extractvalue [4 x <2 x double>] %b, 1
+  %3 = extractvalue [4 x <2 x double>] %b, 2
+  %4 = extractvalue [4 x <2 x double>] %b, 3
+  %5 = bitcast double* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 8)
+  ret void
+}
+
+define void @test_vst1_s8_x4(i8* %a, [4 x <8 x i8>] %b)  {
+; CHECK-LABEL: test_vst1_s8_x4
+; CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+; v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <8 x i8>] %b, 0
+  %2 = extractvalue [4 x <8 x i8>] %b, 1
+  %3 = extractvalue [4 x <8 x i8>] %b, 2
+  %4 = extractvalue [4 x <8 x i8>] %b, 3
+  tail call void @llvm.aarch64.neon.vst1x4.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, <8 x i8> %4, i32 1)
+  ret void
+}
+
+define void @test_vst1_s16_x4(i16* %a, [4 x <4 x i16>] %b)  {
+; CHECK-LABEL: test_vst1_s16_x4
+; CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+; v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <4 x i16>] %b, 0
+  %2 = extractvalue [4 x <4 x i16>] %b, 1
+  %3 = extractvalue [4 x <4 x i16>] %b, 2
+  %4 = extractvalue [4 x <4 x i16>] %b, 3
+  %5 = bitcast i16* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v4i16(i8* %5, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, <4 x i16> %4, i32 2)
+  ret void
+}
+
+define void @test_vst1_s32_x4(i32* %a, [4 x <2 x i32>] %b)  {
+; CHECK-LABEL: test_vst1_s32_x4
+; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+; v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <2 x i32>] %b, 0
+  %2 = extractvalue [4 x <2 x i32>] %b, 1
+  %3 = extractvalue [4 x <2 x i32>] %b, 2
+  %4 = extractvalue [4 x <2 x i32>] %b, 3
+  %5 = bitcast i32* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v2i32(i8* %5, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, <2 x i32> %4, i32 4)
+  ret void
+}
+
+define void @test_vst1_s64_x4(i64* %a, [4 x <1 x i64>] %b)  {
+; CHECK-LABEL: test_vst1_s64_x4
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+; v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <1 x i64>] %b, 0
+  %2 = extractvalue [4 x <1 x i64>] %b, 1
+  %3 = extractvalue [4 x <1 x i64>] %b, 2
+  %4 = extractvalue [4 x <1 x i64>] %b, 3
+  %5 = bitcast i64* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v1i64(i8* %5, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, <1 x i64> %4, i32 8)
+  ret void
+}
+
+define void @test_vst1_f32_x4(float* %a, [4 x <2 x float>] %b)  {
+; CHECK-LABEL: test_vst1_f32_x4
+; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+; v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <2 x float>] %b, 0
+  %2 = extractvalue [4 x <2 x float>] %b, 1
+  %3 = extractvalue [4 x <2 x float>] %b, 2
+  %4 = extractvalue [4 x <2 x float>] %b, 3
+  %5 = bitcast float* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v2f32(i8* %5, <2 x float> %1, <2 x float> %2, <2 x float> %3, <2 x float> %4, i32 4)
+  ret void
+}
+
+define void @test_vst1_f64_x4(double* %a, [4 x <1 x double>] %b)  {
+; CHECK-LABEL: test_vst1_f64_x4
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+; v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <1 x double>] %b, 0
+  %2 = extractvalue [4 x <1 x double>] %b, 1
+  %3 = extractvalue [4 x <1 x double>] %b, 2
+  %4 = extractvalue [4 x <1 x double>] %b, 3
+  %5 = bitcast double* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v1f64(i8* %5, <1 x double> %1, <1 x double> %2, <1 x double> %3, <1 x double> %4, i32 8)
+  ret void
+}
+
+declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8*, i32)
+declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8*, i32)
+declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x2.v4i32(i8*, i32)
+declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x2.v2i64(i8*, i32)
+declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x2.v4f32(i8*, i32)
+declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x2.v2f64(i8*, i32)
+declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x2.v8i8(i8*, i32)
+declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x2.v4i16(i8*, i32)
+declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x2.v2i32(i8*, i32)
+declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x2.v1i64(i8*, i32)
+declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x2.v2f32(i8*, i32)
+declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x2.v1f64(i8*, i32)
+declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x3.v16i8(i8*, i32)
+declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8*, i32)
+declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x3.v4i32(i8*, i32)
+declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8*, i32)
+declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x3.v4f32(i8*, i32)
+declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x3.v2f64(i8*, i32)
+declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x3.v8i8(i8*, i32)
+declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x3.v4i16(i8*, i32)
+declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x3.v2i32(i8*, i32)
+declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x3.v1i64(i8*, i32)
+declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x3.v2f32(i8*, i32)
+declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x3.v1f64(i8*, i32)
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x4.v16i8(i8*, i32)
+declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x4.v8i16(i8*, i32)
+declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x4.v4i32(i8*, i32)
+declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x4.v2i64(i8*, i32)
+declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8*, i32)
+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x4.v2f64(i8*, i32)
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8*, i32)
+declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x4.v4i16(i8*, i32)
+declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x4.v2i32(i8*, i32)
+declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x4.v1i64(i8*, i32)
+declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x4.v2f32(i8*, i32)
+declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x4.v1f64(i8*, i32)
+declare void @llvm.aarch64.neon.vst1x2.v16i8(i8*, <16 x i8>, <16 x i8>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v8i16(i8*, <8 x i16>, <8 x i16>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v4i32(i8*, <4 x i32>, <4 x i32>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v2i64(i8*, <2 x i64>, <2 x i64>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v4f32(i8*, <4 x float>, <4 x float>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v2f64(i8*, <2 x double>, <2 x double>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v8i8(i8*, <8 x i8>, <8 x i8>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v4i16(i8*, <4 x i16>, <4 x i16>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v2i32(i8*, <2 x i32>, <2 x i32>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v1i64(i8*, <1 x i64>, <1 x i64>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v2f32(i8*, <2 x float>, <2 x float>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v1f64(i8*, <1 x double>, <1 x double>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32)
diff --git a/test/CodeGen/AArch64/neon-simd-ldst-one.ll b/test/CodeGen/AArch64/neon-simd-ldst-one.ll
new file mode 100644
index 0000000..3f28320
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-simd-ldst-one.ll
@@ -0,0 +1,2113 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+%struct.int8x16x2_t = type { [2 x <16 x i8>] }
+%struct.int16x8x2_t = type { [2 x <8 x i16>] }
+%struct.int32x4x2_t = type { [2 x <4 x i32>] }
+%struct.int64x2x2_t = type { [2 x <2 x i64>] }
+%struct.float32x4x2_t = type { [2 x <4 x float>] }
+%struct.float64x2x2_t = type { [2 x <2 x double>] }
+%struct.int8x8x2_t = type { [2 x <8 x i8>] }
+%struct.int16x4x2_t = type { [2 x <4 x i16>] }
+%struct.int32x2x2_t = type { [2 x <2 x i32>] }
+%struct.int64x1x2_t = type { [2 x <1 x i64>] }
+%struct.float32x2x2_t = type { [2 x <2 x float>] }
+%struct.float64x1x2_t = type { [2 x <1 x double>] }
+%struct.int8x16x3_t = type { [3 x <16 x i8>] }
+%struct.int16x8x3_t = type { [3 x <8 x i16>] }
+%struct.int32x4x3_t = type { [3 x <4 x i32>] }
+%struct.int64x2x3_t = type { [3 x <2 x i64>] }
+%struct.float32x4x3_t = type { [3 x <4 x float>] }
+%struct.float64x2x3_t = type { [3 x <2 x double>] }
+%struct.int8x8x3_t = type { [3 x <8 x i8>] }
+%struct.int16x4x3_t = type { [3 x <4 x i16>] }
+%struct.int32x2x3_t = type { [3 x <2 x i32>] }
+%struct.int64x1x3_t = type { [3 x <1 x i64>] }
+%struct.float32x2x3_t = type { [3 x <2 x float>] }
+%struct.float64x1x3_t = type { [3 x <1 x double>] }
+%struct.int8x16x4_t = type { [4 x <16 x i8>] }
+%struct.int16x8x4_t = type { [4 x <8 x i16>] }
+%struct.int32x4x4_t = type { [4 x <4 x i32>] }
+%struct.int64x2x4_t = type { [4 x <2 x i64>] }
+%struct.float32x4x4_t = type { [4 x <4 x float>] }
+%struct.float64x2x4_t = type { [4 x <2 x double>] }
+%struct.int8x8x4_t = type { [4 x <8 x i8>] }
+%struct.int16x4x4_t = type { [4 x <4 x i16>] }
+%struct.int32x2x4_t = type { [4 x <2 x i32>] }
+%struct.int64x1x4_t = type { [4 x <1 x i64>] }
+%struct.float32x2x4_t = type { [4 x <2 x float>] }
+%struct.float64x1x4_t = type { [4 x <1 x double>] }
+
+define <16 x i8> @test_vld1q_dup_s8(i8* %a) {
+; CHECK-LABEL: test_vld1q_dup_s8
+; CHECK: ld1r {{{v[0-9]+}}.16b}, [x0]
+entry:
+  %0 = load i8* %a, align 1
+  %1 = insertelement <16 x i8> undef, i8 %0, i32 0
+  %lane = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
+  ret <16 x i8> %lane
+}
+
+define <8 x i16> @test_vld1q_dup_s16(i16* %a) {
+; CHECK-LABEL: test_vld1q_dup_s16
+; CHECK: ld1r {{{v[0-9]+}}.8h}, [x0]
+entry:
+  %0 = load i16* %a, align 2
+  %1 = insertelement <8 x i16> undef, i16 %0, i32 0
+  %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
+  ret <8 x i16> %lane
+}
+
+define <4 x i32> @test_vld1q_dup_s32(i32* %a) {
+; CHECK-LABEL: test_vld1q_dup_s32
+; CHECK: ld1r {{{v[0-9]+}}.4s}, [x0]
+entry:
+  %0 = load i32* %a, align 4
+  %1 = insertelement <4 x i32> undef, i32 %0, i32 0
+  %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
+  ret <4 x i32> %lane
+}
+
+define <2 x i64> @test_vld1q_dup_s64(i64* %a) {
+; CHECK-LABEL: test_vld1q_dup_s64
+; CHECK: ld1r {{{v[0-9]+}}.2d}, [x0]
+entry:
+  %0 = load i64* %a, align 8
+  %1 = insertelement <2 x i64> undef, i64 %0, i32 0
+  %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %lane
+}
+
+define <4 x float> @test_vld1q_dup_f32(float* %a) {
+; CHECK-LABEL: test_vld1q_dup_f32
+; CHECK: ld1r {{{v[0-9]+}}.4s}, [x0]
+entry:
+  %0 = load float* %a, align 4
+  %1 = insertelement <4 x float> undef, float %0, i32 0
+  %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
+  ret <4 x float> %lane
+}
+
+define <2 x double> @test_vld1q_dup_f64(double* %a) {
+; CHECK-LABEL: test_vld1q_dup_f64
+; CHECK: ld1r {{{v[0-9]+}}.2d}, [x0]
+entry:
+  %0 = load double* %a, align 8
+  %1 = insertelement <2 x double> undef, double %0, i32 0
+  %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer
+  ret <2 x double> %lane
+}
+
+define <8 x i8> @test_vld1_dup_s8(i8* %a) {
+; CHECK-LABEL: test_vld1_dup_s8
+; CHECK: ld1r {{{v[0-9]+}}.8b}, [x0]
+entry:
+  %0 = load i8* %a, align 1
+  %1 = insertelement <8 x i8> undef, i8 %0, i32 0
+  %lane = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
+  ret <8 x i8> %lane
+}
+
+define <4 x i16> @test_vld1_dup_s16(i16* %a) {
+; CHECK-LABEL: test_vld1_dup_s16
+; CHECK: ld1r {{{v[0-9]+}}.4h}, [x0]
+entry:
+  %0 = load i16* %a, align 2
+  %1 = insertelement <4 x i16> undef, i16 %0, i32 0
+  %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer
+  ret <4 x i16> %lane
+}
+
+define <2 x i32> @test_vld1_dup_s32(i32* %a) {
+; CHECK-LABEL: test_vld1_dup_s32
+; CHECK: ld1r {{{v[0-9]+}}.2s}, [x0]
+entry:
+  %0 = load i32* %a, align 4
+  %1 = insertelement <2 x i32> undef, i32 %0, i32 0
+  %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer
+  ret <2 x i32> %lane
+}
+
+define <1 x i64> @test_vld1_dup_s64(i64* %a) {
+; CHECK-LABEL: test_vld1_dup_s64
+; CHECK: ld1r {{{v[0-9]+}}.1d}, [x0]
+entry:
+  %0 = load i64* %a, align 8
+  %1 = insertelement <1 x i64> undef, i64 %0, i32 0
+  ret <1 x i64> %1
+}
+
+define <2 x float> @test_vld1_dup_f32(float* %a) {
+; CHECK-LABEL: test_vld1_dup_f32
+; CHECK: ld1r {{{v[0-9]+}}.2s}, [x0]
+entry:
+  %0 = load float* %a, align 4
+  %1 = insertelement <2 x float> undef, float %0, i32 0
+  %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer
+  ret <2 x float> %lane
+}
+
+define <1 x double> @test_vld1_dup_f64(double* %a) {
+; CHECK-LABEL: test_vld1_dup_f64
+; CHECK: ld1r {{{v[0-9]+}}.1d}, [x0]
+entry:
+  %0 = load double* %a, align 8
+  %1 = insertelement <1 x double> undef, double %0, i32 0
+  ret <1 x double> %1
+}
+
+define %struct.int8x16x2_t @test_vld2q_dup_s8(i8* %a) {
+; CHECK-LABEL: test_vld2q_dup_s8
+; CHECK: ld2r {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, [x0]
+entry:
+  %vld_dup = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1)
+  %0 = extractvalue { <16 x i8>, <16 x i8> } %vld_dup, 0
+  %lane = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
+  %1 = extractvalue { <16 x i8>, <16 x i8> } %vld_dup, 1
+  %lane1 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %lane1, 0, 1
+  ret %struct.int8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x8x2_t @test_vld2q_dup_s16(i16* %a) {
+; CHECK-LABEL: test_vld2q_dup_s16
+; CHECK: ld2r {{{v[0-9]+}}.8h, {{v[0-9]+}}.8h}, [x0]
+entry:
+  %0 = bitcast i16* %a to i8*
+  %vld_dup = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16(i8* %0, <8 x i16> undef, <8 x i16> undef, i32 0, i32 2)
+  %1 = extractvalue { <8 x i16>, <8 x i16> } %vld_dup, 0
+  %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
+  %2 = extractvalue { <8 x i16>, <8 x i16> } %vld_dup, 1
+  %lane1 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %lane1, 0, 1
+  ret %struct.int16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x4x2_t @test_vld2q_dup_s32(i32* %a) {
+; CHECK-LABEL: test_vld2q_dup_s32
+; CHECK: ld2r {{{v[0-9]+}}.4s, {{v[0-9]+}}.4s}, [x0]
+entry:
+  %0 = bitcast i32* %a to i8*
+  %vld_dup = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %0, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4)
+  %1 = extractvalue { <4 x i32>, <4 x i32> } %vld_dup, 0
+  %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
+  %2 = extractvalue { <4 x i32>, <4 x i32> } %vld_dup, 1
+  %lane1 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %lane1, 0, 1
+  ret %struct.int32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int64x2x2_t @test_vld2q_dup_s64(i64* %a) {
+; CHECK-LABEL: test_vld2q_dup_s64
+; CHECK: ld2r {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [x0]
+entry:
+  %0 = bitcast i64* %a to i8*
+  %vld_dup = tail call { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2lane.v2i64(i8* %0, <2 x i64> undef, <2 x i64> undef, i32 0, i32 8)
+  %1 = extractvalue { <2 x i64>, <2 x i64> } %vld_dup, 0
+  %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x i64>, <2 x i64> } %vld_dup, 1
+  %lane1 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int64x2x2_t undef, <2 x i64> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x2x2_t %.fca.0.0.insert, <2 x i64> %lane1, 0, 1
+  ret %struct.int64x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x4x2_t @test_vld2q_dup_f32(float* %a) {
+; CHECK-LABEL: test_vld2q_dup_f32
+; CHECK: ld2r {{{v[0-9]+}}.4s, {{v[0-9]+}}.4s}, [x0]
+entry:
+  %0 = bitcast float* %a to i8*
+  %vld_dup = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32(i8* %0, <4 x float> undef, <4 x float> undef, i32 0, i32 4)
+  %1 = extractvalue { <4 x float>, <4 x float> } %vld_dup, 0
+  %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
+  %2 = extractvalue { <4 x float>, <4 x float> } %vld_dup, 1
+  %lane1 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %lane1, 0, 1
+  ret %struct.float32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.float64x2x2_t @test_vld2q_dup_f64(double* %a) {
+; CHECK-LABEL: test_vld2q_dup_f64
+; CHECK: ld2r {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [x0]
+entry:
+  %0 = bitcast double* %a to i8*
+  %vld_dup = tail call { <2 x double>, <2 x double> } @llvm.arm.neon.vld2lane.v2f64(i8* %0, <2 x double> undef, <2 x double> undef, i32 0, i32 8)
+  %1 = extractvalue { <2 x double>, <2 x double> } %vld_dup, 0
+  %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x double>, <2 x double> } %vld_dup, 1
+  %lane1 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.float64x2x2_t undef, <2 x double> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x2x2_t %.fca.0.0.insert, <2 x double> %lane1, 0, 1
+  ret %struct.float64x2x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x8x2_t @test_vld2_dup_s8(i8* %a) {
+; CHECK-LABEL: test_vld2_dup_s8
+; CHECK: ld2r {{{v[0-9]+}}.8b, {{v[0-9]+}}.8b}, [x0]
+entry:
+  %vld_dup = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
+  %0 = extractvalue { <8 x i8>, <8 x i8> } %vld_dup, 0
+  %lane = shufflevector <8 x i8> %0, <8 x i8> undef, <8 x i32> zeroinitializer
+  %1 = extractvalue { <8 x i8>, <8 x i8> } %vld_dup, 1
+  %lane1 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %lane1, 0, 1
+  ret %struct.int8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x4x2_t @test_vld2_dup_s16(i16* %a) {
+; CHECK-LABEL: test_vld2_dup_s16
+; CHECK: ld2r {{{v[0-9]+}}.4h, {{v[0-9]+}}.4h}, [x0]
+entry:
+  %0 = bitcast i16* %a to i8*
+  %vld_dup = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16(i8* %0, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+  %1 = extractvalue { <4 x i16>, <4 x i16> } %vld_dup, 0
+  %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer
+  %2 = extractvalue { <4 x i16>, <4 x i16> } %vld_dup, 1
+  %lane1 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %lane1, 0, 1
+  ret %struct.int16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x2x2_t @test_vld2_dup_s32(i32* %a) {
+; CHECK-LABEL: test_vld2_dup_s32
+; CHECK: ld2r {{{v[0-9]+}}.2s, {{v[0-9]+}}.2s}, [x0]
+entry:
+  %0 = bitcast i32* %a to i8*
+  %vld_dup = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32(i8* %0, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4)
+  %1 = extractvalue { <2 x i32>, <2 x i32> } %vld_dup, 0
+  %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x i32>, <2 x i32> } %vld_dup, 1
+  %lane1 = shufflevector <2 x i32> %2, <2 x i32> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %lane1, 0, 1
+  ret %struct.int32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.int64x1x2_t @test_vld2_dup_s64(i64* %a) {
+; CHECK-LABEL: test_vld2_dup_s64
+; CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [x0]
+entry:
+  %0 = bitcast i64* %a to i8*
+  %vld_dup = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8* %0, i32 8)
+  %vld_dup.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld_dup, 0
+  %vld_dup.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld_dup, 1
+  %.fca.0.0.insert = insertvalue %struct.int64x1x2_t undef, <1 x i64> %vld_dup.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x1x2_t %.fca.0.0.insert, <1 x i64> %vld_dup.fca.1.extract, 0, 1
+  ret %struct.int64x1x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x2x2_t @test_vld2_dup_f32(float* %a) {
+; CHECK-LABEL: test_vld2_dup_f32
+; CHECK: ld2r {{{v[0-9]+}}.2s, {{v[0-9]+}}.2s}, [x0]
+entry:
+  %0 = bitcast float* %a to i8*
+  %vld_dup = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32(i8* %0, <2 x float> undef, <2 x float> undef, i32 0, i32 4)
+  %1 = extractvalue { <2 x float>, <2 x float> } %vld_dup, 0
+  %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x float>, <2 x float> } %vld_dup, 1
+  %lane1 = shufflevector <2 x float> %2, <2 x float> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %lane1, 0, 1
+  ret %struct.float32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float64x1x2_t @test_vld2_dup_f64(double* %a) {
+; CHECK-LABEL: test_vld2_dup_f64
+; CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [x0]
+entry:
+  %0 = bitcast double* %a to i8*
+  %vld_dup = tail call { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8* %0, i32 8)
+  %vld_dup.fca.0.extract = extractvalue { <1 x double>, <1 x double> } %vld_dup, 0
+  %vld_dup.fca.1.extract = extractvalue { <1 x double>, <1 x double> } %vld_dup, 1
+  %.fca.0.0.insert = insertvalue %struct.float64x1x2_t undef, <1 x double> %vld_dup.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x1x2_t %.fca.0.0.insert, <1 x double> %vld_dup.fca.1.extract, 0, 1
+  ret %struct.float64x1x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x16x3_t @test_vld3q_dup_s8(i8* %a) {
+; CHECK-LABEL: test_vld3q_dup_s8
+; CHECK: ld3r {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, [x0]
+entry:
+  %vld_dup = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1)
+  %0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 0
+  %lane = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
+  %1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 1
+  %lane1 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
+  %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 2
+  %lane2 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int8x16x3_t undef, <16 x i8> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x16x3_t %.fca.0.0.insert, <16 x i8> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int8x16x3_t %.fca.0.1.insert, <16 x i8> %lane2, 0, 2
+  ret %struct.int8x16x3_t %.fca.0.2.insert
+}
+
+define %struct.int16x8x3_t @test_vld3q_dup_s16(i16* %a) {
+; CHECK-LABEL: test_vld3q_dup_s16
+; CHECK: ld3r {{{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h}, [x0]
+entry:
+  %0 = bitcast i16* %a to i8*
+  %vld_dup = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8* %0, <8 x i16> undef, <8 x i16> undef, <8 x i16> undef, i32 0, i32 2)
+  %1 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 0
+  %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
+  %2 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 1
+  %lane1 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer
+  %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 2
+  %lane2 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int16x8x3_t undef, <8 x i16> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x3_t %.fca.0.0.insert, <8 x i16> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x8x3_t %.fca.0.1.insert, <8 x i16> %lane2, 0, 2
+  ret %struct.int16x8x3_t %.fca.0.2.insert
+}
+
+define %struct.int32x4x3_t @test_vld3q_dup_s32(i32* %a) {
+; CHECK-LABEL: test_vld3q_dup_s32
+; CHECK: ld3r {{{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s}, [x0]
+entry:
+  %0 = bitcast i32* %a to i8*
+  %vld_dup = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8* %0, <4 x i32> undef, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4)
+  %1 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 0
+  %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
+  %2 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 1
+  %lane1 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
+  %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 2
+  %lane2 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int32x4x3_t undef, <4 x i32> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x3_t %.fca.0.0.insert, <4 x i32> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x4x3_t %.fca.0.1.insert, <4 x i32> %lane2, 0, 2
+  ret %struct.int32x4x3_t %.fca.0.2.insert
+}
+
+define %struct.int64x2x3_t @test_vld3q_dup_s64(i64* %a) {
+; CHECK-LABEL: test_vld3q_dup_s64
+; CHECK: ld3r {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [x0]
+entry:
+  %0 = bitcast i64* %a to i8*
+  %vld_dup = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3lane.v2i64(i8* %0, <2 x i64> undef, <2 x i64> undef, <2 x i64> undef, i32 0, i32 8)
+  %1 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 0
+  %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 1
+  %lane1 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer
+  %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 2
+  %lane2 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int64x2x3_t undef, <2 x i64> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x2x3_t %.fca.0.0.insert, <2 x i64> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x2x3_t %.fca.0.1.insert, <2 x i64> %lane2, 0, 2
+  ret %struct.int64x2x3_t %.fca.0.2.insert
+}
+
+define %struct.float32x4x3_t @test_vld3q_dup_f32(float* %a) {
+; CHECK-LABEL: test_vld3q_dup_f32
+; CHECK: ld3r {{{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s}, [x0]
+entry:
+  %0 = bitcast float* %a to i8*
+  %vld_dup = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32(i8* %0, <4 x float> undef, <4 x float> undef, <4 x float> undef, i32 0, i32 4)
+  %1 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld_dup, 0
+  %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
+  %2 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld_dup, 1
+  %lane1 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer
+  %3 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld_dup, 2
+  %lane2 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.float32x4x3_t undef, <4 x float> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x3_t %.fca.0.0.insert, <4 x float> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x4x3_t %.fca.0.1.insert, <4 x float> %lane2, 0, 2
+  ret %struct.float32x4x3_t %.fca.0.2.insert
+}
+
+define %struct.float64x2x3_t @test_vld3q_dup_f64(double* %a) {
+; CHECK-LABEL: test_vld3q_dup_f64
+; CHECK: ld3r {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [x0]
+entry:
+  %0 = bitcast double* %a to i8*
+  %vld_dup = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3lane.v2f64(i8* %0, <2 x double> undef, <2 x double> undef, <2 x double> undef, i32 0, i32 8)
+  %1 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld_dup, 0
+  %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld_dup, 1
+  %lane1 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
+  %3 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld_dup, 2
+  %lane2 = shufflevector <2 x double> %3, <2 x double> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.float64x2x3_t undef, <2 x double> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x2x3_t %.fca.0.0.insert, <2 x double> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x2x3_t %.fca.0.1.insert, <2 x double> %lane2, 0, 2
+  ret %struct.float64x2x3_t %.fca.0.2.insert
+}
+
+define %struct.int8x8x3_t @test_vld3_dup_s8(i8* %a) {
+; CHECK-LABEL: test_vld3_dup_s8
+; CHECK: ld3r {{{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b}, [x0]
+entry:
+  %vld_dup = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
+  %0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 0
+  %lane = shufflevector <8 x i8> %0, <8 x i8> undef, <8 x i32> zeroinitializer
+  %1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 1
+  %lane1 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
+  %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 2
+  %lane2 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int8x8x3_t undef, <8 x i8> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x3_t %.fca.0.0.insert, <8 x i8> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int8x8x3_t %.fca.0.1.insert, <8 x i8> %lane2, 0, 2
+  ret %struct.int8x8x3_t %.fca.0.2.insert
+}
+
+define %struct.int16x4x3_t @test_vld3_dup_s16(i16* %a) {
+; CHECK-LABEL: test_vld3_dup_s16
+; CHECK: ld3r {{{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h}, [x0]
+entry:
+  %0 = bitcast i16* %a to i8*
+  %vld_dup = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %0, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+  %1 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 0
+  %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer
+  %2 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 1
+  %lane1 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> zeroinitializer
+  %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 2
+  %lane2 = shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int16x4x3_t undef, <4 x i16> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x3_t %.fca.0.0.insert, <4 x i16> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x4x3_t %.fca.0.1.insert, <4 x i16> %lane2, 0, 2
+  ret %struct.int16x4x3_t %.fca.0.2.insert
+}
+
+define %struct.int32x2x3_t @test_vld3_dup_s32(i32* %a) {
+; CHECK-LABEL: test_vld3_dup_s32
+; CHECK: ld3r {{{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s}, [x0]
+entry:
+  %0 = bitcast i32* %a to i8*
+  %vld_dup = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32(i8* %0, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4)
+  %1 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 0
+  %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 1
+  %lane1 = shufflevector <2 x i32> %2, <2 x i32> undef, <2 x i32> zeroinitializer
+  %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 2
+  %lane2 = shufflevector <2 x i32> %3, <2 x i32> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int32x2x3_t undef, <2 x i32> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x3_t %.fca.0.0.insert, <2 x i32> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x2x3_t %.fca.0.1.insert, <2 x i32> %lane2, 0, 2
+  ret %struct.int32x2x3_t %.fca.0.2.insert
+}
+
+define %struct.int64x1x3_t @test_vld3_dup_s64(i64* %a) {
+; CHECK-LABEL: test_vld3_dup_s64
+; CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [x0]
+entry:
+  %0 = bitcast i64* %a to i8*
+  %vld_dup = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8* %0, i32 8)
+  %vld_dup.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 0
+  %vld_dup.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 1
+  %vld_dup.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 2
+  %.fca.0.0.insert = insertvalue %struct.int64x1x3_t undef, <1 x i64> %vld_dup.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x1x3_t %.fca.0.0.insert, <1 x i64> %vld_dup.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x1x3_t %.fca.0.1.insert, <1 x i64> %vld_dup.fca.2.extract, 0, 2
+  ret %struct.int64x1x3_t %.fca.0.2.insert
+}
+
+define %struct.float32x2x3_t @test_vld3_dup_f32(float* %a) {
+; CHECK-LABEL: test_vld3_dup_f32
+; CHECK: ld3r {{{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s}, [x0]
+entry:
+  %0 = bitcast float* %a to i8*
+  %vld_dup = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8* %0, <2 x float> undef, <2 x float> undef, <2 x float> undef, i32 0, i32 4)
+  %1 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld_dup, 0
+  %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld_dup, 1
+  %lane1 = shufflevector <2 x float> %2, <2 x float> undef, <2 x i32> zeroinitializer
+  %3 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld_dup, 2
+  %lane2 = shufflevector <2 x float> %3, <2 x float> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.float32x2x3_t undef, <2 x float> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x3_t %.fca.0.0.insert, <2 x float> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x2x3_t %.fca.0.1.insert, <2 x float> %lane2, 0, 2
+  ret %struct.float32x2x3_t %.fca.0.2.insert
+}
+
+define %struct.float64x1x3_t @test_vld3_dup_f64(double* %a) {
+; CHECK-LABEL: test_vld3_dup_f64
+; CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [x0]
+entry:
+  %0 = bitcast double* %a to i8*
+  %vld_dup = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8* %0, i32 8)
+  %vld_dup.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld_dup, 0
+  %vld_dup.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld_dup, 1
+  %vld_dup.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld_dup, 2
+  %.fca.0.0.insert = insertvalue %struct.float64x1x3_t undef, <1 x double> %vld_dup.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x1x3_t %.fca.0.0.insert, <1 x double> %vld_dup.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x1x3_t %.fca.0.1.insert, <1 x double> %vld_dup.fca.2.extract, 0, 2
+  ret %struct.float64x1x3_t %.fca.0.2.insert
+}
+
+define %struct.int8x16x4_t @test_vld4q_dup_s8(i8* %a) {
+; CHECK-LABEL: test_vld4q_dup_s8
+; CHECK: ld4r {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, [x0]
+entry:
+  %vld_dup = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1)
+  %0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 0
+  %lane = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
+  %1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 1
+  %lane1 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
+  %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 2
+  %lane2 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer
+  %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 3
+  %lane3 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int8x16x4_t undef, <16 x i8> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x16x4_t %.fca.0.0.insert, <16 x i8> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int8x16x4_t %.fca.0.1.insert, <16 x i8> %lane2, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int8x16x4_t %.fca.0.2.insert, <16 x i8> %lane3, 0, 3
+  ret %struct.int8x16x4_t %.fca.0.3.insert
+}
+
+define %struct.int16x8x4_t @test_vld4q_dup_s16(i16* %a) {
+; CHECK-LABEL: test_vld4q_dup_s16
+; CHECK: ld4r {{{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h}, [x0]
+entry:
+  %0 = bitcast i16* %a to i8*
+  %vld_dup = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16(i8* %0, <8 x i16> undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> undef, i32 0, i32 2)
+  %1 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 0
+  %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
+  %2 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 1
+  %lane1 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer
+  %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 2
+  %lane2 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
+  %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 3
+  %lane3 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int16x8x4_t undef, <8 x i16> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x4_t %.fca.0.0.insert, <8 x i16> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x8x4_t %.fca.0.1.insert, <8 x i16> %lane2, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int16x8x4_t %.fca.0.2.insert, <8 x i16> %lane3, 0, 3
+  ret %struct.int16x8x4_t %.fca.0.3.insert
+}
+
+define %struct.int32x4x4_t @test_vld4q_dup_s32(i32* %a) {
+; CHECK-LABEL: test_vld4q_dup_s32
+; CHECK: ld4r {{{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s}, [x0]
+entry:
+  %0 = bitcast i32* %a to i8*
+  %vld_dup = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8* %0, <4 x i32> undef, <4 x i32> undef, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4)
+  %1 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 0
+  %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
+  %2 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 1
+  %lane1 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
+  %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 2
+  %lane2 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer
+  %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 3
+  %lane3 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int32x4x4_t undef, <4 x i32> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x4_t %.fca.0.0.insert, <4 x i32> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x4x4_t %.fca.0.1.insert, <4 x i32> %lane2, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int32x4x4_t %.fca.0.2.insert, <4 x i32> %lane3, 0, 3
+  ret %struct.int32x4x4_t %.fca.0.3.insert
+}
+
+define %struct.int64x2x4_t @test_vld4q_dup_s64(i64* %a) {
+; CHECK-LABEL: test_vld4q_dup_s64
+; CHECK: ld4r {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [x0]
+entry:
+  %0 = bitcast i64* %a to i8*
+  %vld_dup = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4lane.v2i64(i8* %0, <2 x i64> undef, <2 x i64> undef, <2 x i64> undef, <2 x i64> undef, i32 0, i32 8)
+  %1 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 0
+  %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 1
+  %lane1 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer
+  %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 2
+  %lane2 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer
+  %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 3
+  %lane3 = shufflevector <2 x i64> %4, <2 x i64> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int64x2x4_t undef, <2 x i64> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x2x4_t %.fca.0.0.insert, <2 x i64> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x2x4_t %.fca.0.1.insert, <2 x i64> %lane2, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int64x2x4_t %.fca.0.2.insert, <2 x i64> %lane3, 0, 3
+  ret %struct.int64x2x4_t %.fca.0.3.insert
+}
+
+define %struct.float32x4x4_t @test_vld4q_dup_f32(float* %a) {
+; CHECK-LABEL: test_vld4q_dup_f32
+; CHECK: ld4r {{{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s}, [x0]
+entry:
+  %0 = bitcast float* %a to i8*
+  %vld_dup = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32(i8* %0, <4 x float> undef, <4 x float> undef, <4 x float> undef, <4 x float> undef, i32 0, i32 4)
+  %1 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 0
+  %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
+  %2 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 1
+  %lane1 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer
+  %3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 2
+  %lane2 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> zeroinitializer
+  %4 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 3
+  %lane3 = shufflevector <4 x float> %4, <4 x float> undef, <4 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.float32x4x4_t undef, <4 x float> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x4_t %.fca.0.0.insert, <4 x float> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x4x4_t %.fca.0.1.insert, <4 x float> %lane2, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float32x4x4_t %.fca.0.2.insert, <4 x float> %lane3, 0, 3
+  ret %struct.float32x4x4_t %.fca.0.3.insert
+}
+
+define %struct.float64x2x4_t @test_vld4q_dup_f64(double* %a) {
+; CHECK-LABEL: test_vld4q_dup_f64
+; CHECK: ld4r {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [x0]
+entry:
+  %0 = bitcast double* %a to i8*
+  %vld_dup = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %0, <2 x double> undef, <2 x double> undef, <2 x double> undef, <2 x double> undef, i32 0, i32 8)
+  %1 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 0
+  %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 1
+  %lane1 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
+  %3 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 2
+  %lane2 = shufflevector <2 x double> %3, <2 x double> undef, <2 x i32> zeroinitializer
+  %4 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 3
+  %lane3 = shufflevector <2 x double> %4, <2 x double> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.float64x2x4_t undef, <2 x double> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x2x4_t %.fca.0.0.insert, <2 x double> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x2x4_t %.fca.0.1.insert, <2 x double> %lane2, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float64x2x4_t %.fca.0.2.insert, <2 x double> %lane3, 0, 3
+  ret %struct.float64x2x4_t %.fca.0.3.insert
+}
+
+define %struct.int8x8x4_t @test_vld4_dup_s8(i8* %a) {
+; CHECK-LABEL: test_vld4_dup_s8
+; CHECK: ld4r {{{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b}, [x0]
+entry:
+  %vld_dup = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
+  %0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 0
+  %lane = shufflevector <8 x i8> %0, <8 x i8> undef, <8 x i32> zeroinitializer
+  %1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 1
+  %lane1 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
+  %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 2
+  %lane2 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer
+  %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 3
+  %lane3 = shufflevector <8 x i8> %3, <8 x i8> undef, <8 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int8x8x4_t undef, <8 x i8> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x4_t %.fca.0.0.insert, <8 x i8> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int8x8x4_t %.fca.0.1.insert, <8 x i8> %lane2, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int8x8x4_t %.fca.0.2.insert, <8 x i8> %lane3, 0, 3
+  ret %struct.int8x8x4_t %.fca.0.3.insert
+}
+
+define %struct.int16x4x4_t @test_vld4_dup_s16(i16* %a) {
+; CHECK-LABEL: test_vld4_dup_s16
+; CHECK: ld4r {{{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h}, [x0]
+entry:
+  %0 = bitcast i16* %a to i8*
+  %vld_dup = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16(i8* %0, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+  %1 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 0
+  %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer
+  %2 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 1
+  %lane1 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> zeroinitializer
+  %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 2
+  %lane2 = shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> zeroinitializer
+  %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 3
+  %lane3 = shufflevector <4 x i16> %4, <4 x i16> undef, <4 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int16x4x4_t undef, <4 x i16> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x4_t %.fca.0.0.insert, <4 x i16> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x4x4_t %.fca.0.1.insert, <4 x i16> %lane2, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int16x4x4_t %.fca.0.2.insert, <4 x i16> %lane3, 0, 3
+  ret %struct.int16x4x4_t %.fca.0.3.insert
+}
+
+define %struct.int32x2x4_t @test_vld4_dup_s32(i32* %a) {
+; CHECK-LABEL: test_vld4_dup_s32
+; CHECK: ld4r {{{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s}, [x0]
+entry:
+  %0 = bitcast i32* %a to i8*
+  %vld_dup = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %0, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4)
+  %1 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 0
+  %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 1
+  %lane1 = shufflevector <2 x i32> %2, <2 x i32> undef, <2 x i32> zeroinitializer
+  %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 2
+  %lane2 = shufflevector <2 x i32> %3, <2 x i32> undef, <2 x i32> zeroinitializer
+  %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 3
+  %lane3 = shufflevector <2 x i32> %4, <2 x i32> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int32x2x4_t undef, <2 x i32> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x4_t %.fca.0.0.insert, <2 x i32> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x2x4_t %.fca.0.1.insert, <2 x i32> %lane2, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int32x2x4_t %.fca.0.2.insert, <2 x i32> %lane3, 0, 3
+  ret %struct.int32x2x4_t %.fca.0.3.insert
+}
+
+define %struct.int64x1x4_t @test_vld4_dup_s64(i64* %a) {
+; CHECK-LABEL: test_vld4_dup_s64
+; CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [x0]
+entry:
+  %0 = bitcast i64* %a to i8*
+  %vld_dup = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8* %0, i32 8)
+  %vld_dup.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 0
+  %vld_dup.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 1
+  %vld_dup.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 2
+  %vld_dup.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 3
+  %.fca.0.0.insert = insertvalue %struct.int64x1x4_t undef, <1 x i64> %vld_dup.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x1x4_t %.fca.0.0.insert, <1 x i64> %vld_dup.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x1x4_t %.fca.0.1.insert, <1 x i64> %vld_dup.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int64x1x4_t %.fca.0.2.insert, <1 x i64> %vld_dup.fca.3.extract, 0, 3
+  ret %struct.int64x1x4_t %.fca.0.3.insert
+}
+
+define %struct.float32x2x4_t @test_vld4_dup_f32(float* %a) {
+; CHECK-LABEL: test_vld4_dup_f32
+; CHECK: ld4r {{{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s}, [x0]
+entry:
+  %0 = bitcast float* %a to i8*
+  %vld_dup = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32(i8* %0, <2 x float> undef, <2 x float> undef, <2 x float> undef, <2 x float> undef, i32 0, i32 4)
+  %1 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 0
+  %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 1
+  %lane1 = shufflevector <2 x float> %2, <2 x float> undef, <2 x i32> zeroinitializer
+  %3 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 2
+  %lane2 = shufflevector <2 x float> %3, <2 x float> undef, <2 x i32> zeroinitializer
+  %4 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 3
+  %lane3 = shufflevector <2 x float> %4, <2 x float> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.float32x2x4_t undef, <2 x float> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x4_t %.fca.0.0.insert, <2 x float> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x2x4_t %.fca.0.1.insert, <2 x float> %lane2, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float32x2x4_t %.fca.0.2.insert, <2 x float> %lane3, 0, 3
+  ret %struct.float32x2x4_t %.fca.0.3.insert
+}
+
+define %struct.float64x1x4_t @test_vld4_dup_f64(double* %a) {
+; CHECK-LABEL: test_vld4_dup_f64
+; CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [x0]
+entry:
+  %0 = bitcast double* %a to i8*
+  %vld_dup = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8* %0, i32 8)
+  %vld_dup.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 0
+  %vld_dup.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 1
+  %vld_dup.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 2
+  %vld_dup.fca.3.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 3
+  %.fca.0.0.insert = insertvalue %struct.float64x1x4_t undef, <1 x double> %vld_dup.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x1x4_t %.fca.0.0.insert, <1 x double> %vld_dup.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x1x4_t %.fca.0.1.insert, <1 x double> %vld_dup.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float64x1x4_t %.fca.0.2.insert, <1 x double> %vld_dup.fca.3.extract, 0, 3
+  ret %struct.float64x1x4_t %.fca.0.3.insert
+}
+
+define <16 x i8> @test_vld1q_lane_s8(i8* %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vld1q_lane_s8
+; CHECK: ld1 {{{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %0 = load i8* %a, align 1
+  %vld1_lane = insertelement <16 x i8> %b, i8 %0, i32 15
+  ret <16 x i8> %vld1_lane
+}
+
+define <8 x i16> @test_vld1q_lane_s16(i16* %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vld1q_lane_s16
+; CHECK: ld1 {{{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %0 = load i16* %a, align 2
+  %vld1_lane = insertelement <8 x i16> %b, i16 %0, i32 7
+  ret <8 x i16> %vld1_lane
+}
+
+define <4 x i32> @test_vld1q_lane_s32(i32* %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vld1q_lane_s32
+; CHECK: ld1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %0 = load i32* %a, align 4
+  %vld1_lane = insertelement <4 x i32> %b, i32 %0, i32 3
+  ret <4 x i32> %vld1_lane
+}
+
+define <2 x i64> @test_vld1q_lane_s64(i64* %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vld1q_lane_s64
+; CHECK: ld1 {{{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %0 = load i64* %a, align 8
+  %vld1_lane = insertelement <2 x i64> %b, i64 %0, i32 1
+  ret <2 x i64> %vld1_lane
+}
+
+define <4 x float> @test_vld1q_lane_f32(float* %a, <4 x float> %b) {
+; CHECK-LABEL: test_vld1q_lane_f32
+; CHECK: ld1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %0 = load float* %a, align 4
+  %vld1_lane = insertelement <4 x float> %b, float %0, i32 3
+  ret <4 x float> %vld1_lane
+}
+
+define <2 x double> @test_vld1q_lane_f64(double* %a, <2 x double> %b) {
+; CHECK-LABEL: test_vld1q_lane_f64
+; CHECK: ld1 {{{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %0 = load double* %a, align 8
+  %vld1_lane = insertelement <2 x double> %b, double %0, i32 1
+  ret <2 x double> %vld1_lane
+}
+
+define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vld1_lane_s8
+; CHECK: ld1 {{{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %0 = load i8* %a, align 1
+  %vld1_lane = insertelement <8 x i8> %b, i8 %0, i32 7
+  ret <8 x i8> %vld1_lane
+}
+
+define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vld1_lane_s16
+; CHECK: ld1 {{{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %0 = load i16* %a, align 2
+  %vld1_lane = insertelement <4 x i16> %b, i16 %0, i32 3
+  ret <4 x i16> %vld1_lane
+}
+
+define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vld1_lane_s32
+; CHECK: ld1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %0 = load i32* %a, align 4
+  %vld1_lane = insertelement <2 x i32> %b, i32 %0, i32 1
+  ret <2 x i32> %vld1_lane
+}
+
+define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) {
+; CHECK-LABEL: test_vld1_lane_s64
+; CHECK: ld1r {{{v[0-9]+}}.1d}, [x0]
+entry:
+  %0 = load i64* %a, align 8
+  %vld1_lane = insertelement <1 x i64> undef, i64 %0, i32 0
+  ret <1 x i64> %vld1_lane
+}
+
+define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) {
+; CHECK-LABEL: test_vld1_lane_f32
+; CHECK: ld1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %0 = load float* %a, align 4
+  %vld1_lane = insertelement <2 x float> %b, float %0, i32 1
+  ret <2 x float> %vld1_lane
+}
+
+define <1 x double> @test_vld1_lane_f64(double* %a, <1 x double> %b) {
+; CHECK-LABEL: test_vld1_lane_f64
+; CHECK: ld1r {{{v[0-9]+}}.1d}, [x0]
+entry:
+  %0 = load double* %a, align 8
+  %vld1_lane = insertelement <1 x double> undef, double %0, i32 0
+  ret <1 x double> %vld1_lane
+}
+
+define %struct.int16x8x2_t @test_vld2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vld2q_lane_s16
+; CHECK: ld2 {{{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i16>] %b.coerce, 1
+  %0 = bitcast i16* %a to i8*
+  %vld2_lane = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, i32 7, i32 2)
+  %vld2_lane.fca.0.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2_lane, 0
+  %vld2_lane.fca.1.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2_lane, 1
+  %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vld2_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vld2_lane.fca.1.extract, 0, 1
+  ret %struct.int16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x4x2_t @test_vld2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vld2q_lane_s32
+; CHECK: ld2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %b.coerce, 1
+  %0 = bitcast i32* %a to i8*
+  %vld2_lane = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, i32 3, i32 4)
+  %vld2_lane.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2_lane, 0
+  %vld2_lane.fca.1.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2_lane, 1
+  %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vld2_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vld2_lane.fca.1.extract, 0, 1
+  ret %struct.int32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int64x2x2_t @test_vld2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vld2q_lane_s64
+; CHECK: ld2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i64>] %b.coerce, 1
+  %0 = bitcast i64* %a to i8*
+  %vld2_lane = tail call { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, i32 1, i32 8)
+  %vld2_lane.fca.0.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2_lane, 0
+  %vld2_lane.fca.1.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2_lane, 1
+  %.fca.0.0.insert = insertvalue %struct.int64x2x2_t undef, <2 x i64> %vld2_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x2x2_t %.fca.0.0.insert, <2 x i64> %vld2_lane.fca.1.extract, 0, 1
+  ret %struct.int64x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x4x2_t @test_vld2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) {
+; CHECK-LABEL: test_vld2q_lane_f32
+; CHECK: ld2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <4 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <4 x float>] %b.coerce, 1
+  %0 = bitcast float* %a to i8*
+  %vld2_lane = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, i32 3, i32 4)
+  %vld2_lane.fca.0.extract = extractvalue { <4 x float>, <4 x float> } %vld2_lane, 0
+  %vld2_lane.fca.1.extract = extractvalue { <4 x float>, <4 x float> } %vld2_lane, 1
+  %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vld2_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vld2_lane.fca.1.extract, 0, 1
+  ret %struct.float32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.float64x2x2_t @test_vld2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) {
+; CHECK-LABEL: test_vld2q_lane_f64
+; CHECK: ld2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x double>] %b.coerce, 1
+  %0 = bitcast double* %a to i8*
+  %vld2_lane = tail call { <2 x double>, <2 x double> } @llvm.arm.neon.vld2lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, i32 1, i32 8)
+  %vld2_lane.fca.0.extract = extractvalue { <2 x double>, <2 x double> } %vld2_lane, 0
+  %vld2_lane.fca.1.extract = extractvalue { <2 x double>, <2 x double> } %vld2_lane, 1
+  %.fca.0.0.insert = insertvalue %struct.float64x2x2_t undef, <2 x double> %vld2_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x2x2_t %.fca.0.0.insert, <2 x double> %vld2_lane.fca.1.extract, 0, 1
+  ret %struct.float64x2x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x8x2_t @test_vld2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vld2_lane_s8
+; CHECK: ld2 {{{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %b.coerce, 1
+  %vld2_lane = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, i32 7, i32 1)
+  %vld2_lane.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_lane, 0
+  %vld2_lane.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_lane, 1
+  %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vld2_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vld2_lane.fca.1.extract, 0, 1
+  ret %struct.int8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x4x2_t @test_vld2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vld2_lane_s16
+; CHECK: ld2 {{{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i16>] %b.coerce, 1
+  %0 = bitcast i16* %a to i8*
+  %vld2_lane = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, i32 3, i32 2)
+  %vld2_lane.fca.0.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_lane, 0
+  %vld2_lane.fca.1.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_lane, 1
+  %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vld2_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vld2_lane.fca.1.extract, 0, 1
+  ret %struct.int16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x2x2_t @test_vld2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vld2_lane_s32
+; CHECK: ld2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i32>] %b.coerce, 1
+  %0 = bitcast i32* %a to i8*
+  %vld2_lane = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, i32 1, i32 4)
+  %vld2_lane.fca.0.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2_lane, 0
+  %vld2_lane.fca.1.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2_lane, 1
+  %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vld2_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vld2_lane.fca.1.extract, 0, 1
+  ret %struct.int32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.int64x1x2_t @test_vld2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vld2_lane_s64
+; CHECK: ld2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <1 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <1 x i64>] %b.coerce, 1
+  %0 = bitcast i64* %a to i8*
+  %vld2_lane = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, i32 0, i32 8)
+  %vld2_lane.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane, 0
+  %vld2_lane.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane, 1
+  %.fca.0.0.insert = insertvalue %struct.int64x1x2_t undef, <1 x i64> %vld2_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x1x2_t %.fca.0.0.insert, <1 x i64> %vld2_lane.fca.1.extract, 0, 1
+  ret %struct.int64x1x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x2x2_t @test_vld2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) {
+; CHECK-LABEL: test_vld2_lane_f32
+; CHECK: ld2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x float>] %b.coerce, 1
+  %0 = bitcast float* %a to i8*
+  %vld2_lane = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, i32 1, i32 4)
+  %vld2_lane.fca.0.extract = extractvalue { <2 x float>, <2 x float> } %vld2_lane, 0
+  %vld2_lane.fca.1.extract = extractvalue { <2 x float>, <2 x float> } %vld2_lane, 1
+  %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vld2_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vld2_lane.fca.1.extract, 0, 1
+  ret %struct.float32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float64x1x2_t @test_vld2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) {
+; CHECK-LABEL: test_vld2_lane_f64
+; CHECK: ld2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <1 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <1 x double>] %b.coerce, 1
+  %0 = bitcast double* %a to i8*
+  %vld2_lane = tail call { <1 x double>, <1 x double> } @llvm.arm.neon.vld2lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, i32 0, i32 8)
+  %vld2_lane.fca.0.extract = extractvalue { <1 x double>, <1 x double> } %vld2_lane, 0
+  %vld2_lane.fca.1.extract = extractvalue { <1 x double>, <1 x double> } %vld2_lane, 1
+  %.fca.0.0.insert = insertvalue %struct.float64x1x2_t undef, <1 x double> %vld2_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x1x2_t %.fca.0.0.insert, <1 x double> %vld2_lane.fca.1.extract, 0, 1
+  ret %struct.float64x1x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x8x3_t @test_vld3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vld3q_lane_s16
+; CHECK: ld3 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i16>] %b.coerce, 2
+  %0 = bitcast i16* %a to i8*
+  %vld3_lane = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, i32 7, i32 2)
+  %vld3_lane.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 2
+  %.fca.0.0.insert = insertvalue %struct.int16x8x3_t undef, <8 x i16> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x3_t %.fca.0.0.insert, <8 x i16> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x8x3_t %.fca.0.1.insert, <8 x i16> %vld3_lane.fca.2.extract, 0, 2
+  ret %struct.int16x8x3_t %.fca.0.2.insert
+}
+
+define %struct.int32x4x3_t @test_vld3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vld3q_lane_s32
+; CHECK: ld3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i32>] %b.coerce, 2
+  %0 = bitcast i32* %a to i8*
+  %vld3_lane = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, i32 3, i32 4)
+  %vld3_lane.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 2
+  %.fca.0.0.insert = insertvalue %struct.int32x4x3_t undef, <4 x i32> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x3_t %.fca.0.0.insert, <4 x i32> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x4x3_t %.fca.0.1.insert, <4 x i32> %vld3_lane.fca.2.extract, 0, 2
+  ret %struct.int32x4x3_t %.fca.0.2.insert
+}
+
+define %struct.int64x2x3_t @test_vld3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vld3q_lane_s64
+; CHECK: ld3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i64>] %b.coerce, 2
+  %0 = bitcast i64* %a to i8*
+  %vld3_lane = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, i32 1, i32 8)
+  %vld3_lane.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 2
+  %.fca.0.0.insert = insertvalue %struct.int64x2x3_t undef, <2 x i64> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x2x3_t %.fca.0.0.insert, <2 x i64> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x2x3_t %.fca.0.1.insert, <2 x i64> %vld3_lane.fca.2.extract, 0, 2
+  ret %struct.int64x2x3_t %.fca.0.2.insert
+}
+
+define %struct.float32x4x3_t @test_vld3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) {
+; CHECK-LABEL: test_vld3q_lane_f32
+; CHECK: ld3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <4 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <4 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <4 x float>] %b.coerce, 2
+  %0 = bitcast float* %a to i8*
+  %vld3_lane = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, i32 3, i32 4)
+  %vld3_lane.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 2
+  %.fca.0.0.insert = insertvalue %struct.float32x4x3_t undef, <4 x float> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x3_t %.fca.0.0.insert, <4 x float> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x4x3_t %.fca.0.1.insert, <4 x float> %vld3_lane.fca.2.extract, 0, 2
+  ret %struct.float32x4x3_t %.fca.0.2.insert
+}
+
+define %struct.float64x2x3_t @test_vld3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) {
+; CHECK-LABEL: test_vld3q_lane_f64
+; CHECK: ld3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x double>] %b.coerce, 2
+  %0 = bitcast double* %a to i8*
+  %vld3_lane = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, i32 1, i32 8)
+  %vld3_lane.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 2
+  %.fca.0.0.insert = insertvalue %struct.float64x2x3_t undef, <2 x double> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x2x3_t %.fca.0.0.insert, <2 x double> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x2x3_t %.fca.0.1.insert, <2 x double> %vld3_lane.fca.2.extract, 0, 2
+  ret %struct.float64x2x3_t %.fca.0.2.insert
+}
+
+define %struct.int8x8x3_t @test_vld3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vld3_lane_s8
+; CHECK: ld3 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i8>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i8>] %b.coerce, 2
+  %vld3_lane = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, i32 7, i32 1)
+  %vld3_lane.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 2
+  %.fca.0.0.insert = insertvalue %struct.int8x8x3_t undef, <8 x i8> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x3_t %.fca.0.0.insert, <8 x i8> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int8x8x3_t %.fca.0.1.insert, <8 x i8> %vld3_lane.fca.2.extract, 0, 2
+  ret %struct.int8x8x3_t %.fca.0.2.insert
+}
+
+define %struct.int16x4x3_t @test_vld3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vld3_lane_s16
+; CHECK: ld3 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i16>] %b.coerce, 2
+  %0 = bitcast i16* %a to i8*
+  %vld3_lane = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, i32 3, i32 2)
+  %vld3_lane.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 2
+  %.fca.0.0.insert = insertvalue %struct.int16x4x3_t undef, <4 x i16> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x3_t %.fca.0.0.insert, <4 x i16> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x4x3_t %.fca.0.1.insert, <4 x i16> %vld3_lane.fca.2.extract, 0, 2
+  ret %struct.int16x4x3_t %.fca.0.2.insert
+}
+
+define %struct.int32x2x3_t @test_vld3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vld3_lane_s32
+; CHECK: ld3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i32>] %b.coerce, 2
+  %0 = bitcast i32* %a to i8*
+  %vld3_lane = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, i32 1, i32 4)
+  %vld3_lane.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 2
+  %.fca.0.0.insert = insertvalue %struct.int32x2x3_t undef, <2 x i32> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x3_t %.fca.0.0.insert, <2 x i32> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x2x3_t %.fca.0.1.insert, <2 x i32> %vld3_lane.fca.2.extract, 0, 2
+  ret %struct.int32x2x3_t %.fca.0.2.insert
+}
+
+define %struct.int64x1x3_t @test_vld3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vld3_lane_s64
+; CHECK: ld3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <1 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <1 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <1 x i64>] %b.coerce, 2
+  %0 = bitcast i64* %a to i8*
+  %vld3_lane = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, i32 0, i32 8)
+  %vld3_lane.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 2
+  %.fca.0.0.insert = insertvalue %struct.int64x1x3_t undef, <1 x i64> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x1x3_t %.fca.0.0.insert, <1 x i64> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x1x3_t %.fca.0.1.insert, <1 x i64> %vld3_lane.fca.2.extract, 0, 2
+  ret %struct.int64x1x3_t %.fca.0.2.insert
+}
+
+define %struct.float32x2x3_t @test_vld3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) {
+; CHECK-LABEL: test_vld3_lane_f32
+; CHECK: ld3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x float>] %b.coerce, 2
+  %0 = bitcast float* %a to i8*
+  %vld3_lane = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, i32 1, i32 4)
+  %vld3_lane.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 2
+  %.fca.0.0.insert = insertvalue %struct.float32x2x3_t undef, <2 x float> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x3_t %.fca.0.0.insert, <2 x float> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x2x3_t %.fca.0.1.insert, <2 x float> %vld3_lane.fca.2.extract, 0, 2
+  ret %struct.float32x2x3_t %.fca.0.2.insert
+}
+
+define %struct.float64x1x3_t @test_vld3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) {
+; CHECK-LABEL: test_vld3_lane_f64
+; CHECK: ld3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <1 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <1 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <1 x double>] %b.coerce, 2
+  %0 = bitcast double* %a to i8*
+  %vld3_lane = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, i32 0, i32 8)
+  %vld3_lane.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 2
+  %.fca.0.0.insert = insertvalue %struct.float64x1x3_t undef, <1 x double> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x1x3_t %.fca.0.0.insert, <1 x double> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x1x3_t %.fca.0.1.insert, <1 x double> %vld3_lane.fca.2.extract, 0, 2
+  ret %struct.float64x1x3_t %.fca.0.2.insert
+}
+
+define %struct.int8x16x4_t @test_vld4q_lane_s8(i8* %a, [4 x <16 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vld4q_lane_s8
+; CHECK: ld4 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <16 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <16 x i8>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <16 x i8>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <16 x i8>] %b.coerce, 3
+  %vld3_lane = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4lane.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, <16 x i8> %b.coerce.fca.3.extract, i32 15, i32 1)
+  %vld3_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.int8x16x4_t undef, <16 x i8> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x16x4_t %.fca.0.0.insert, <16 x i8> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int8x16x4_t %.fca.0.1.insert, <16 x i8> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int8x16x4_t %.fca.0.2.insert, <16 x i8> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.int8x16x4_t %.fca.0.3.insert
+}
+
+define %struct.int16x8x4_t @test_vld4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vld4q_lane_s16
+; CHECK: ld4 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i16>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i16>] %b.coerce, 3
+  %0 = bitcast i16* %a to i8*
+  %vld3_lane = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, <8 x i16> %b.coerce.fca.3.extract, i32 7, i32 2)
+  %vld3_lane.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.int16x8x4_t undef, <8 x i16> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x4_t %.fca.0.0.insert, <8 x i16> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x8x4_t %.fca.0.1.insert, <8 x i16> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int16x8x4_t %.fca.0.2.insert, <8 x i16> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.int16x8x4_t %.fca.0.3.insert
+}
+
+define %struct.int32x4x4_t @test_vld4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vld4q_lane_s32
+; CHECK: ld4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i32>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i32>] %b.coerce, 3
+  %0 = bitcast i32* %a to i8*
+  %vld3_lane = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, <4 x i32> %b.coerce.fca.3.extract, i32 3, i32 4)
+  %vld3_lane.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.int32x4x4_t undef, <4 x i32> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x4_t %.fca.0.0.insert, <4 x i32> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x4x4_t %.fca.0.1.insert, <4 x i32> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int32x4x4_t %.fca.0.2.insert, <4 x i32> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.int32x4x4_t %.fca.0.3.insert
+}
+
+define %struct.int64x2x4_t @test_vld4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vld4q_lane_s64
+; CHECK: ld4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i64>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i64>] %b.coerce, 3
+  %0 = bitcast i64* %a to i8*
+  %vld3_lane = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, <2 x i64> %b.coerce.fca.3.extract, i32 1, i32 8)
+  %vld3_lane.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.int64x2x4_t undef, <2 x i64> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x2x4_t %.fca.0.0.insert, <2 x i64> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x2x4_t %.fca.0.1.insert, <2 x i64> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int64x2x4_t %.fca.0.2.insert, <2 x i64> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.int64x2x4_t %.fca.0.3.insert
+}
+
+define %struct.float32x4x4_t @test_vld4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) {
+; CHECK-LABEL: test_vld4q_lane_f32
+; CHECK: ld4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <4 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <4 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <4 x float>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <4 x float>] %b.coerce, 3
+  %0 = bitcast float* %a to i8*
+  %vld3_lane = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, <4 x float> %b.coerce.fca.3.extract, i32 3, i32 4)
+  %vld3_lane.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.float32x4x4_t undef, <4 x float> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x4_t %.fca.0.0.insert, <4 x float> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x4x4_t %.fca.0.1.insert, <4 x float> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float32x4x4_t %.fca.0.2.insert, <4 x float> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.float32x4x4_t %.fca.0.3.insert
+}
+
+define %struct.float64x2x4_t @test_vld4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) {
+; CHECK-LABEL: test_vld4q_lane_f64
+; CHECK: ld4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x double>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x double>] %b.coerce, 3
+  %0 = bitcast double* %a to i8*
+  %vld3_lane = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, <2 x double> %b.coerce.fca.3.extract, i32 1, i32 8)
+  %vld3_lane.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.float64x2x4_t undef, <2 x double> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x2x4_t %.fca.0.0.insert, <2 x double> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x2x4_t %.fca.0.1.insert, <2 x double> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float64x2x4_t %.fca.0.2.insert, <2 x double> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.float64x2x4_t %.fca.0.3.insert
+}
+
+define %struct.int8x8x4_t @test_vld4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vld4_lane_s8
+; CHECK: ld4 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i8>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i8>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i8>] %b.coerce, 3
+  %vld3_lane = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, <8 x i8> %b.coerce.fca.3.extract, i32 7, i32 1)
+  %vld3_lane.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.int8x8x4_t undef, <8 x i8> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x4_t %.fca.0.0.insert, <8 x i8> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int8x8x4_t %.fca.0.1.insert, <8 x i8> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int8x8x4_t %.fca.0.2.insert, <8 x i8> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.int8x8x4_t %.fca.0.3.insert
+}
+
+define %struct.int16x4x4_t @test_vld4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vld4_lane_s16
+; CHECK: ld4 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i16>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i16>] %b.coerce, 3
+  %0 = bitcast i16* %a to i8*
+  %vld3_lane = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, <4 x i16> %b.coerce.fca.3.extract, i32 3, i32 2)
+  %vld3_lane.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.int16x4x4_t undef, <4 x i16> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x4_t %.fca.0.0.insert, <4 x i16> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x4x4_t %.fca.0.1.insert, <4 x i16> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int16x4x4_t %.fca.0.2.insert, <4 x i16> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.int16x4x4_t %.fca.0.3.insert
+}
+
+define %struct.int32x2x4_t @test_vld4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vld4_lane_s32
+; CHECK: ld4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i32>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i32>] %b.coerce, 3
+  %0 = bitcast i32* %a to i8*
+  %vld3_lane = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, <2 x i32> %b.coerce.fca.3.extract, i32 1, i32 4)
+  %vld3_lane.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.int32x2x4_t undef, <2 x i32> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x4_t %.fca.0.0.insert, <2 x i32> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x2x4_t %.fca.0.1.insert, <2 x i32> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int32x2x4_t %.fca.0.2.insert, <2 x i32> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.int32x2x4_t %.fca.0.3.insert
+}
+
+define %struct.int64x1x4_t @test_vld4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vld4_lane_s64
+; CHECK: ld4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <1 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <1 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <1 x i64>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <1 x i64>] %b.coerce, 3
+  %0 = bitcast i64* %a to i8*
+  %vld3_lane = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, <1 x i64> %b.coerce.fca.3.extract, i32 0, i32 8)
+  %vld3_lane.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.int64x1x4_t undef, <1 x i64> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x1x4_t %.fca.0.0.insert, <1 x i64> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x1x4_t %.fca.0.1.insert, <1 x i64> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int64x1x4_t %.fca.0.2.insert, <1 x i64> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.int64x1x4_t %.fca.0.3.insert
+}
+
+define %struct.float32x2x4_t @test_vld4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) {
+; CHECK-LABEL: test_vld4_lane_f32
+; CHECK: ld4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x float>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x float>] %b.coerce, 3
+  %0 = bitcast float* %a to i8*
+  %vld3_lane = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, <2 x float> %b.coerce.fca.3.extract, i32 1, i32 4)
+  %vld3_lane.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.float32x2x4_t undef, <2 x float> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x4_t %.fca.0.0.insert, <2 x float> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x2x4_t %.fca.0.1.insert, <2 x float> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float32x2x4_t %.fca.0.2.insert, <2 x float> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.float32x2x4_t %.fca.0.3.insert
+}
+
+define %struct.float64x1x4_t @test_vld4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) {
+; CHECK-LABEL: test_vld4_lane_f64
+; CHECK: ld4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <1 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <1 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <1 x double>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <1 x double>] %b.coerce, 3
+  %0 = bitcast double* %a to i8*
+  %vld3_lane = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, <1 x double> %b.coerce.fca.3.extract, i32 0, i32 8)
+  %vld3_lane.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.float64x1x4_t undef, <1 x double> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x1x4_t %.fca.0.0.insert, <1 x double> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x1x4_t %.fca.0.1.insert, <1 x double> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float64x1x4_t %.fca.0.2.insert, <1 x double> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.float64x1x4_t %.fca.0.3.insert
+}
+
+define void @test_vst1q_lane_s8(i8* %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vst1q_lane_s8
+; CHECK: st1 {{{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <16 x i8> %b, i32 15
+  store i8 %0, i8* %a, align 1
+  ret void
+}
+
+define void @test_vst1q_lane_s16(i16* %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vst1q_lane_s16
+; CHECK: st1 {{{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <8 x i16> %b, i32 7
+  store i16 %0, i16* %a, align 2
+  ret void
+}
+
+define void @test_vst1q_lane_s32(i32* %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vst1q_lane_s32
+; CHECK: st1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <4 x i32> %b, i32 3
+  store i32 %0, i32* %a, align 4
+  ret void
+}
+
+define void @test_vst1q_lane_s64(i64* %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vst1q_lane_s64
+; CHECK: st1 {{{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <2 x i64> %b, i32 1
+  store i64 %0, i64* %a, align 8
+  ret void
+}
+
+define void @test_vst1q_lane_f32(float* %a, <4 x float> %b) {
+; CHECK-LABEL: test_vst1q_lane_f32
+; CHECK: st1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <4 x float> %b, i32 3
+  store float %0, float* %a, align 4
+  ret void
+}
+
+define void @test_vst1q_lane_f64(double* %a, <2 x double> %b) {
+; CHECK-LABEL: test_vst1q_lane_f64
+; CHECK: st1 {{{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <2 x double> %b, i32 1
+  store double %0, double* %a, align 8
+  ret void
+}
+
+define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vst1_lane_s8
+; CHECK: st1 {{{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <8 x i8> %b, i32 7
+  store i8 %0, i8* %a, align 1
+  ret void
+}
+
+define void @test_vst1_lane_s16(i16* %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vst1_lane_s16
+; CHECK: st1 {{{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <4 x i16> %b, i32 3
+  store i16 %0, i16* %a, align 2
+  ret void
+}
+
+define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vst1_lane_s32
+; CHECK: st1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <2 x i32> %b, i32 1
+  store i32 %0, i32* %a, align 4
+  ret void
+}
+
+define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) {
+; CHECK-LABEL: test_vst1_lane_s64
+; CHECK: st1 {{{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <1 x i64> %b, i32 0
+  store i64 %0, i64* %a, align 8
+  ret void
+}
+
+define void @test_vst1_lane_f32(float* %a, <2 x float> %b) {
+; CHECK-LABEL: test_vst1_lane_f32
+; CHECK: st1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <2 x float> %b, i32 1
+  store float %0, float* %a, align 4
+  ret void
+}
+
+define void @test_vst1_lane_f64(double* %a, <1 x double> %b) {
+; CHECK-LABEL: test_vst1_lane_f64
+; CHECK: st1 {{{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <1 x double> %b, i32 0
+  store double %0, double* %a, align 8
+  ret void
+}
+
+define void @test_vst2q_lane_s8(i8* %a, [2 x <16 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_lane_s8
+; CHECK: st2 {{{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %b.coerce, 1
+  tail call void @llvm.arm.neon.vst2lane.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, i32 15, i32 1)
+  ret void
+}
+
+define void @test_vst2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_lane_s16
+; CHECK: st2 {{{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i16>] %b.coerce, 1
+  %0 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst2lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, i32 7, i32 2)
+  ret void
+}
+
+define void @test_vst2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_lane_s32
+; CHECK: st2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %b.coerce, 1
+  %0 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst2lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, i32 3, i32 4)
+  ret void
+}
+
+define void @test_vst2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_lane_s64
+; CHECK: st2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i64>] %b.coerce, 1
+  %0 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst2lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, i32 1, i32 8)
+  ret void
+}
+
+define void @test_vst2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_lane_f32
+; CHECK: st2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <4 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <4 x float>] %b.coerce, 1
+  %0 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst2lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, i32 3, i32 4)
+  ret void
+}
+
+define void @test_vst2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_lane_f64
+; CHECK: st2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x double>] %b.coerce, 1
+  %0 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst2lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, i32 1, i32 8)
+  ret void
+}
+
+define void @test_vst2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst2_lane_s8
+; CHECK: st2 {{{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %b.coerce, 1
+  tail call void @llvm.arm.neon.vst2lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, i32 7, i32 1)
+  ret void
+}
+
+define void @test_vst2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst2_lane_s16
+; CHECK: st2 {{{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i16>] %b.coerce, 1
+  %0 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst2lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, i32 3, i32 2)
+  ret void
+}
+
+define void @test_vst2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst2_lane_s32
+; CHECK: st2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i32>] %b.coerce, 1
+  %0 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst2lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, i32 1, i32 4)
+  ret void
+}
+
+define void @test_vst2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst2_lane_s64
+; CHECK: st2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <1 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <1 x i64>] %b.coerce, 1
+  %0 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst2lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, i32 0, i32 8)
+  ret void
+}
+
+define void @test_vst2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst2_lane_f32
+; CHECK: st2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x float>] %b.coerce, 1
+  %0 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst2lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, i32 1, i32 4)
+  ret void
+}
+
+define void @test_vst2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst2_lane_f64
+; CHECK: st2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <1 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <1 x double>] %b.coerce, 1
+  %0 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst2lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, i32 0, i32 8)
+  ret void
+}
+
+define void @test_vst3q_lane_s8(i8* %a, [3 x <16 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_lane_s8
+; CHECK: st3 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %b.coerce, 2
+  tail call void @llvm.arm.neon.vst3lane.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, i32 15, i32 1)
+  ret void
+}
+
+define void @test_vst3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_lane_s16
+; CHECK: st3 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i16>] %b.coerce, 2
+  %0 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst3lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, i32 7, i32 2)
+  ret void
+}
+
+define void @test_vst3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_lane_s32
+; CHECK: st3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i32>] %b.coerce, 2
+  %0 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst3lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, i32 3, i32 4)
+  ret void
+}
+
+define void @test_vst3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_lane_s64
+; CHECK: st3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i64>] %b.coerce, 2
+  %0 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst3lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, i32 1, i32 8)
+  ret void
+}
+
+define void @test_vst3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_lane_f32
+; CHECK: st3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <4 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <4 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <4 x float>] %b.coerce, 2
+  %0 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst3lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, i32 3, i32 4)
+  ret void
+}
+
+define void @test_vst3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_lane_f64
+; CHECK: st3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x double>] %b.coerce, 2
+  %0 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst3lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, i32 1, i32 8)
+  ret void
+}
+
+define void @test_vst3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst3_lane_s8
+; CHECK: st3 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i8>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i8>] %b.coerce, 2
+  tail call void @llvm.arm.neon.vst3lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, i32 7, i32 1)
+  ret void
+}
+
+define void @test_vst3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst3_lane_s16
+; CHECK: st3 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i16>] %b.coerce, 2
+  %0 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst3lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, i32 3, i32 2)
+  ret void
+}
+
+define void @test_vst3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst3_lane_s32
+; CHECK: st3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i32>] %b.coerce, 2
+  %0 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst3lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, i32 1, i32 4)
+  ret void
+}
+
+define void @test_vst3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst3_lane_s64
+; CHECK: st3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <1 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <1 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <1 x i64>] %b.coerce, 2
+  %0 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst3lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, i32 0, i32 8)
+  ret void
+}
+
+define void @test_vst3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst3_lane_f32
+; CHECK: st3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x float>] %b.coerce, 2
+  %0 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst3lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, i32 1, i32 4)
+  ret void
+}
+
+define void @test_vst3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst3_lane_f64
+; CHECK: st3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <1 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <1 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <1 x double>] %b.coerce, 2
+  %0 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst3lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, i32 0, i32 8)
+  ret void
+}
+
+define void @test_vst4q_lane_s8(i16* %a, [4 x <16 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_lane_s8
+; CHECK: st4 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <16 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <16 x i8>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <16 x i8>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <16 x i8>] %b.coerce, 3
+  %0 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v16i8(i8* %0, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, <16 x i8> %b.coerce.fca.3.extract, i32 15, i32 2)
+  ret void
+}
+
+define void @test_vst4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_lane_s16
+; CHECK: st4 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i16>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i16>] %b.coerce, 3
+  %0 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, <8 x i16> %b.coerce.fca.3.extract, i32 7, i32 2)
+  ret void
+}
+
+define void @test_vst4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_lane_s32
+; CHECK: st4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i32>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i32>] %b.coerce, 3
+  %0 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, <4 x i32> %b.coerce.fca.3.extract, i32 3, i32 4)
+  ret void
+}
+
+define void @test_vst4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_lane_s64
+; CHECK: st4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i64>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i64>] %b.coerce, 3
+  %0 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, <2 x i64> %b.coerce.fca.3.extract, i32 1, i32 8)
+  ret void
+}
+
+define void @test_vst4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_lane_f32
+; CHECK: st4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <4 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <4 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <4 x float>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <4 x float>] %b.coerce, 3
+  %0 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, <4 x float> %b.coerce.fca.3.extract, i32 3, i32 4)
+  ret void
+}
+
+define void @test_vst4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_lane_f64
+; CHECK: st4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x double>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x double>] %b.coerce, 3
+  %0 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, <2 x double> %b.coerce.fca.3.extract, i32 1, i32 8)
+  ret void
+}
+
+define void @test_vst4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst4_lane_s8
+; CHECK: st4 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i8>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i8>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i8>] %b.coerce, 3
+  tail call void @llvm.arm.neon.vst4lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, <8 x i8> %b.coerce.fca.3.extract, i32 7, i32 1)
+  ret void
+}
+
+define void @test_vst4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst4_lane_s16
+; CHECK: st4 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i16>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i16>] %b.coerce, 3
+  %0 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, <4 x i16> %b.coerce.fca.3.extract, i32 3, i32 2)
+  ret void
+}
+
+define void @test_vst4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst4_lane_s32
+; CHECK: st4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i32>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i32>] %b.coerce, 3
+  %0 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, <2 x i32> %b.coerce.fca.3.extract, i32 1, i32 4)
+  ret void
+}
+
+define void @test_vst4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst4_lane_s64
+; CHECK: st4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <1 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <1 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <1 x i64>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <1 x i64>] %b.coerce, 3
+  %0 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, <1 x i64> %b.coerce.fca.3.extract, i32 0, i32 8)
+  ret void
+}
+
+define void @test_vst4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst4_lane_f32
+; CHECK: st4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x float>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x float>] %b.coerce, 3
+  %0 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, <2 x float> %b.coerce.fca.3.extract, i32 1, i32 4)
+  ret void
+}
+
+define void @test_vst4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst4_lane_f64
+; CHECK: st4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <1 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <1 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <1 x double>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <1 x double>] %b.coerce, 3
+  %0 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, <1 x double> %b.coerce.fca.3.extract, i32 0, i32 8)
+  ret void
+}
+
+declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32)
+declare { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32)
+declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32)
+declare { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2lane.v2i64(i8*, <2 x i64>, <2 x i64>, i32, i32)
+declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32)
+declare { <2 x double>, <2 x double> } @llvm.arm.neon.vld2lane.v2f64(i8*, <2 x double>, <2 x double>, i32, i32)
+declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32)
+declare { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32)
+declare { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32)
+declare { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8*, i32)
+declare { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32)
+declare { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8*, i32)
+declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32)
+declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32)
+declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32)
+declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32)
+declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32)
+declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32, i32)
+declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32)
+declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
+declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32)
+declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8*, i32)
+declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32)
+declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8*, i32)
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32)
+declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32)
+declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32)
+declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32)
+declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32)
+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32)
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32)
+declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
+declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32)
+declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8*, i32)
+declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32)
+declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8*, i32)
+declare { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2lane.v1i64(i8*, <1 x i64>, <1 x i64>, i32, i32)
+declare { <1 x double>, <1 x double> } @llvm.arm.neon.vld2lane.v1f64(i8*, <1 x double>, <1 x double>, i32, i32)
+declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
+declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32, i32)
+declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
+declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v2i64(i8*, <2 x i64>, <2 x i64>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v2f64(i8*, <2 x double>, <2 x double>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v1i64(i8*, <1 x i64>, <1 x i64>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v1f64(i8*, <1 x double>, <1 x double>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32, i32)
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-simd-ldst.ll b/test/CodeGen/AArch64/neon-simd-ldst.ll
new file mode 100644
index 0000000..afc0901
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-simd-ldst.ll
@@ -0,0 +1,164 @@
+; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define void @test_ldstq_4v(i8* noalias %io, i32 %count) {
+; CHECK-LABEL: test_ldstq_4v
+; CHECK: ld4     {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
+; CHECK: st4     {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
+entry:
+  %tobool62 = icmp eq i32 %count, 0
+  br i1 %tobool62, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %count.addr.063 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
+  %dec = add i32 %count.addr.063, -1
+  %vld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %io, i32 1)
+  %vld4.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 3
+  tail call void @llvm.arm.neon.vst4.v16i8(i8* %io, <16 x i8> %vld4.fca.0.extract, <16 x i8> %vld4.fca.1.extract, <16 x i8> %vld4.fca.2.extract, <16 x i8> %vld4.fca.3.extract, i32 1)
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  ret void
+}
+
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32)
+
+declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32)
+
+define void @test_ldstq_3v(i8* noalias %io, i32 %count) {
+; CHECK-LABEL: test_ldstq_3v
+; CHECK: ld3     {v0.16b, v1.16b, v2.16b}, [x0]
+; CHECK: st3     {v0.16b, v1.16b, v2.16b}, [x0]
+entry:
+  %tobool47 = icmp eq i32 %count, 0
+  br i1 %tobool47, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %count.addr.048 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
+  %dec = add i32 %count.addr.048, -1
+  %vld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %io, i32 1)
+  %vld3.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 2
+  tail call void @llvm.arm.neon.vst3.v16i8(i8* %io, <16 x i8> %vld3.fca.0.extract, <16 x i8> %vld3.fca.1.extract, <16 x i8> %vld3.fca.2.extract, i32 1)
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  ret void
+}
+
+declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32)
+
+declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32)
+
+define void @test_ldstq_2v(i8* noalias %io, i32 %count) {
+; CHECK-LABEL: test_ldstq_2v
+; CHECK: ld2     {v0.16b, v1.16b}, [x0]
+; CHECK: st2     {v0.16b, v1.16b}, [x0]
+entry:
+  %tobool22 = icmp eq i32 %count, 0
+  br i1 %tobool22, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %count.addr.023 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
+  %dec = add i32 %count.addr.023, -1
+  %vld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %io, i32 1)
+  %vld2.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 1
+  tail call void @llvm.arm.neon.vst2.v16i8(i8* %io, <16 x i8> %vld2.fca.0.extract, <16 x i8> %vld2.fca.1.extract, i32 1)
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  ret void
+}
+
+declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32)
+
+declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32)
+
+define void @test_ldst_4v(i8* noalias %io, i32 %count) {
+; CHECK-LABEL: test_ldst_4v
+; CHECK: ld4     {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
+; CHECK: st4     {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
+entry:
+  %tobool42 = icmp eq i32 %count, 0
+  br i1 %tobool42, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %count.addr.043 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
+  %dec = add i32 %count.addr.043, -1
+  %vld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %io, i32 1)
+  %vld4.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 3
+  tail call void @llvm.arm.neon.vst4.v8i8(i8* %io, <8 x i8> %vld4.fca.0.extract, <8 x i8> %vld4.fca.1.extract, <8 x i8> %vld4.fca.2.extract, <8 x i8> %vld4.fca.3.extract, i32 1)
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  ret void
+}
+
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32)
+
+declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32)
+
+define void @test_ldst_3v(i8* noalias %io, i32 %count) {
+; CHECK-LABEL: test_ldst_3v
+; CHECK: ld3     {v0.8b, v1.8b, v2.8b}, [x0]
+; CHECK: st3     {v0.8b, v1.8b, v2.8b}, [x0]
+entry:
+  %tobool32 = icmp eq i32 %count, 0
+  br i1 %tobool32, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %count.addr.033 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
+  %dec = add i32 %count.addr.033, -1
+  %vld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8* %io, i32 1)
+  %vld3.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 2
+  tail call void @llvm.arm.neon.vst3.v8i8(i8* %io, <8 x i8> %vld3.fca.0.extract, <8 x i8> %vld3.fca.1.extract, <8 x i8> %vld3.fca.2.extract, i32 1)
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  ret void
+}
+
+declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8*, i32)
+
+declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
+
+define void @test_ldst_2v(i8* noalias %io, i32 %count) {
+; CHECK-LABEL: test_ldst_2v
+; CHECK: ld2     {v0.8b, v1.8b}, [x0]
+; CHECK: st2     {v0.8b, v1.8b}, [x0]
+entry:
+  %tobool22 = icmp eq i32 %count, 0
+  br i1 %tobool22, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %count.addr.023 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
+  %dec = add i32 %count.addr.023, -1
+  %vld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8* %io, i32 1)
+  %vld2.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 1
+  tail call void @llvm.arm.neon.vst2.v8i8(i8* %io, <8 x i8> %vld2.fca.0.extract, <8 x i8> %vld2.fca.1.extract, i32 1)
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  ret void
+}
+
+declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8*, i32)
+
+declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32)
+
diff --git a/test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll b/test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll
new file mode 100644
index 0000000..156fe1d
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll
@@ -0,0 +1,354 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+;Check for a post-increment updating load.
+define <4 x i16> @test_vld1_fx_update(i16** %ptr) nounwind {
+; CHECK: test_vld1_fx_update
+; CHECK: ld1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}], #8
+  %A = load i16** %ptr
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 2)
+  %tmp2 = getelementptr i16* %A, i32 4
+  store i16* %tmp2, i16** %ptr
+  ret <4 x i16> %tmp1
+}
+
+;Check for a post-increment updating load with register increment.
+define <2 x i32> @test_vld1_reg_update(i32** %ptr, i32 %inc) nounwind {
+; CHECK: test_vld1_reg_update
+; CHECK: ld1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %A = load i32** %ptr
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 4)
+  %tmp2 = getelementptr i32* %A, i32 %inc
+  store i32* %tmp2, i32** %ptr
+  ret <2 x i32> %tmp1
+}
+
+define <2 x float> @test_vld2_fx_update(float** %ptr) nounwind {
+; CHECK: test_vld2_fx_update
+; CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}], #16
+  %A = load float** %ptr
+  %tmp0 = bitcast float* %A to i8*
+  %tmp1 = call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 4)
+  %tmp2 = extractvalue { <2 x float>, <2 x float> } %tmp1, 0
+  %tmp3 = getelementptr float* %A, i32 4
+  store float* %tmp3, float** %ptr
+  ret <2 x float> %tmp2
+}
+
+define <16 x i8> @test_vld2_reg_update(i8** %ptr, i32 %inc) nounwind {
+; CHECK: test_vld2_reg_update
+; CHECK: ld2 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %A = load i8** %ptr
+  %tmp0 = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %A, i32 1)
+  %tmp1 = extractvalue { <16 x i8>, <16 x i8> } %tmp0, 0
+  %tmp2 = getelementptr i8* %A, i32 %inc
+  store i8* %tmp2, i8** %ptr
+  ret <16 x i8> %tmp1
+}
+
+define <4 x i32> @test_vld3_fx_update(i32** %ptr) nounwind {
+; CHECK: test_vld3_fx_update
+; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}], #48
+  %A = load i32** %ptr
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 4)
+  %tmp2 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %tmp1, 0
+  %tmp3 = getelementptr i32* %A, i32 12
+  store i32* %tmp3, i32** %ptr
+  ret <4 x i32> %tmp2
+}
+
+define <4 x i16> @test_vld3_reg_update(i16** %ptr, i32 %inc) nounwind {
+; CHECK: test_vld3_reg_update
+; CHECK: ld3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %A = load i16** %ptr
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 2)
+  %tmp2 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %tmp1, 0
+  %tmp3 = getelementptr i16* %A, i32 %inc
+  store i16* %tmp3, i16** %ptr
+  ret <4 x i16> %tmp2
+}
+
+define <8 x i16> @test_vld4_fx_update(i16** %ptr) nounwind {
+; CHECK: test_vld4_fx_update
+; CHECK: ld4 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}], #64
+  %A = load i16** %ptr
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 8)
+  %tmp2 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %tmp1, 0
+  %tmp3 = getelementptr i16* %A, i32 32
+  store i16* %tmp3, i16** %ptr
+  ret <8 x i16> %tmp2
+}
+
+define <8 x i8> @test_vld4_reg_update(i8** %ptr, i32 %inc) nounwind {
+; CHECK: test_vld4_reg_update
+; CHECK: ld4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %A = load i8** %ptr
+  %tmp0 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %A, i32 1)
+  %tmp1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %tmp0, 0
+  %tmp2 = getelementptr i8* %A, i32 %inc
+  store i8* %tmp2, i8** %ptr
+  ret <8 x i8> %tmp1
+}
+
+define void @test_vst1_fx_update(float** %ptr, <2 x float> %B) nounwind {
+; CHECK: test_vst1_fx_update
+; CHECK: st1 {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}], #8
+  %A = load float** %ptr
+  %tmp0 = bitcast float* %A to i8*
+  call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %B, i32 4)
+  %tmp2 = getelementptr float* %A, i32 2
+  store float* %tmp2, float** %ptr
+  ret void
+}
+
+define void @test_vst1_reg_update(i16** %ptr, <8 x i16> %B, i32 %inc) nounwind {
+; CHECK: test_vst1_reg_update
+; CHECK: st1 {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}], x{{[0-9]+}}
+  %A = load i16** %ptr
+  %tmp0 = bitcast i16* %A to i8*
+  call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %B, i32 2)
+  %tmp1 = getelementptr i16* %A, i32 %inc
+  store i16* %tmp1, i16** %ptr
+  ret void
+}
+
+define void @test_vst2_fx_update(i64** %ptr, <1 x i64> %B) nounwind {
+; CHECK: test_vst2_fx_update
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}], #16
+  %A = load i64** %ptr
+  %tmp0 = bitcast i64* %A to i8*
+  call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %B, <1 x i64> %B, i32 8)
+  %tmp1 = getelementptr i64* %A, i32 2
+  store i64* %tmp1, i64** %ptr
+  ret void
+}
+
+define void @test_vst2_reg_update(i8** %ptr, <8 x i8> %B, i32 %inc) nounwind {
+; CHECK: test_vst2_reg_update
+; CHECK: st2 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}], x{{[0-9]+}}
+  %A = load i8** %ptr
+  call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %B, <8 x i8> %B, i32 4)
+  %tmp0 = getelementptr i8* %A, i32 %inc
+  store i8* %tmp0, i8** %ptr
+  ret void
+}
+
+define void @test_vst3_fx_update(i32** %ptr, <2 x i32> %B) nounwind {
+; CHECK: test_vst3_fx_update
+; CHECK: st3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}], #24
+  %A = load i32** %ptr
+  %tmp0 = bitcast i32* %A to i8*
+  call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %B, <2 x i32> %B, <2 x i32> %B, i32 4)
+  %tmp1 = getelementptr i32* %A, i32 6
+  store i32* %tmp1, i32** %ptr
+  ret void
+}
+
+define void @test_vst3_reg_update(i16** %ptr, <8 x i16> %B, i32 %inc) nounwind {
+; CHECK: test_vst3_reg_update
+; CHECK: st3 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}], x{{[0-9]+}}
+  %A = load i16** %ptr
+  %tmp0 = bitcast i16* %A to i8*
+  call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %B, <8 x i16> %B, <8 x i16> %B, i32 2)
+  %tmp1 = getelementptr i16* %A, i32 %inc
+  store i16* %tmp1, i16** %ptr
+  ret void
+}
+
+define void @test_vst4_fx_update(float** %ptr, <4 x float> %B) nounwind {
+; CHECK: test_vst4_fx_update
+; CHECK: st4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}], #64
+  %A = load float** %ptr
+  %tmp0 = bitcast float* %A to i8*
+  call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %B, <4 x float> %B, <4 x float> %B, <4 x float> %B, i32 4)
+  %tmp1 = getelementptr float* %A, i32 16
+  store float* %tmp1, float** %ptr
+  ret void
+}
+
+define void @test_vst4_reg_update(i8** %ptr, <8 x i8> %B, i32 %inc) nounwind {
+; CHECK: test_vst4_reg_update
+; CHECK: st4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}], x{{[0-9]+}}
+  %A = load i8** %ptr
+  call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %B, <8 x i8> %B, <8 x i8> %B, <8 x i8> %B, i32 1)
+  %tmp0 = getelementptr i8* %A, i32 %inc
+  store i8* %tmp0, i8** %ptr
+  ret void
+}
+
+
+declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32)
+declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32)
+declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32)
+declare { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8*, i32)
+declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8*, i32)
+declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8*, i32)
+declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8*, i32)
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32)
+
+declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32)
+declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32)
+declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32)
+declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32)
+declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32)
+declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32)
+declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32)
+declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32)
+
+define <16 x i8> @test_vld1x2_fx_update(i8* %a, i8** %ptr) {
+; CHECK: test_vld1x2_fx_update
+; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}], #32
+  %1 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8* %a, i32 1)
+  %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
+  %tmp1 = getelementptr i8* %a, i32 32
+  store i8* %tmp1, i8** %ptr
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vld1x2_reg_update(i16* %a, i16** %ptr, i32 %inc) {
+; CHECK: test_vld1x2_reg_update
+; CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = bitcast i16* %a to i8*
+  %2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8* %1, i32 2)
+  %3 = extractvalue { <8 x i16>, <8 x i16> } %2, 0
+  %tmp1 = getelementptr i16* %a, i32 %inc
+  store i16* %tmp1, i16** %ptr
+  ret <8 x i16> %3
+}
+
+define <2 x i64> @test_vld1x3_fx_update(i64* %a, i64** %ptr) {
+; CHECK: test_vld1x3_fx_update
+; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}], #48
+  %1 = bitcast i64* %a to i8*
+  %2 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8* %1, i32 8)
+  %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 0
+  %tmp1 = getelementptr i64* %a, i32 6
+  store i64* %tmp1, i64** %ptr
+  ret  <2 x i64> %3
+}
+
+define <8 x i16> @test_vld1x3_reg_update(i16* %a, i16** %ptr, i32 %inc) {
+; CHECK: test_vld1x3_reg_update
+; CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = bitcast i16* %a to i8*
+  %2 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8* %1, i32 2)
+  %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 0
+  %tmp1 = getelementptr i16* %a, i32 %inc
+  store i16* %tmp1, i16** %ptr
+  ret <8 x i16> %3
+}
+
+define <4 x float> @test_vld1x4_fx_update(float* %a, float** %ptr) {
+; CHECK: test_vld1x4_fx_update
+; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}], #64
+  %1 = bitcast float* %a to i8*
+  %2 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8* %1, i32 4)
+  %3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 0
+  %tmp1 = getelementptr float* %a, i32 16
+  store float* %tmp1, float** %ptr
+  ret <4 x float> %3
+}
+
+define <8 x i8> @test_vld1x4_reg_update(i8* readonly %a, i8** %ptr, i32 %inc) #0 {
+; CHECK: test_vld1x4_reg_update
+; CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8* %a, i32 1)
+  %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
+  %tmp1 = getelementptr i8* %a, i32 %inc
+  store i8* %tmp1, i8** %ptr
+  ret <8 x i8> %2
+}
+
+define void @test_vst1x2_fx_update(i8* %a, [2 x <16 x i8>] %b.coerce, i8** %ptr) #2 {
+; CHECK: test_vst1x2_fx_update
+; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}], #32
+  %1 = extractvalue [2 x <16 x i8>] %b.coerce, 0
+  %2 = extractvalue [2 x <16 x i8>] %b.coerce, 1
+  tail call void @llvm.aarch64.neon.vst1x2.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, i32 1)
+  %tmp1 = getelementptr i8* %a, i32 32
+  store i8* %tmp1, i8** %ptr
+  ret void
+}
+
+define void @test_vst1x2_reg_update(i16* %a, [2 x <8 x i16>] %b.coerce, i16** %ptr, i32 %inc) #2 {
+; CHECK: test_vst1x2_reg_update
+; CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = extractvalue [2 x <8 x i16>] %b.coerce, 0
+  %2 = extractvalue [2 x <8 x i16>] %b.coerce, 1
+  %3 = bitcast i16* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x2.v8i16(i8* %3, <8 x i16> %1, <8 x i16> %2, i32 2)
+  %tmp1 = getelementptr i16* %a, i32 %inc
+  store i16* %tmp1, i16** %ptr
+  ret void
+}
+
+define void @test_vst1x3_fx_update(i32* %a, [3 x <2 x i32>] %b.coerce, i32** %ptr) #2 {
+; CHECK: test_vst1x3_fx_update
+; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}], #24
+  %1 = extractvalue [3 x <2 x i32>] %b.coerce, 0
+  %2 = extractvalue [3 x <2 x i32>] %b.coerce, 1
+  %3 = extractvalue [3 x <2 x i32>] %b.coerce, 2
+  %4 = bitcast i32* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v2i32(i8* %4, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, i32 4)
+  %tmp1 = getelementptr i32* %a, i32 6
+  store i32* %tmp1, i32** %ptr
+  ret void
+}
+
+define void @test_vst1x3_reg_update(i64* %a, [3 x <1 x i64>] %b.coerce, i64** %ptr, i32 %inc) #2 {
+; CHECK: test_vst1x3_reg_update
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = extractvalue [3 x <1 x i64>] %b.coerce, 0
+  %2 = extractvalue [3 x <1 x i64>] %b.coerce, 1
+  %3 = extractvalue [3 x <1 x i64>] %b.coerce, 2
+  %4 = bitcast i64* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v1i64(i8* %4, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, i32 8)
+  %tmp1 = getelementptr i64* %a, i32 %inc
+  store i64* %tmp1, i64** %ptr
+  ret void
+}
+
+define void @test_vst1x4_fx_update(float* %a, [4 x <4 x float>] %b.coerce, float** %ptr) #2 {
+; CHECK: test_vst1x4_fx_update
+; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}], #64
+  %1 = extractvalue [4 x <4 x float>] %b.coerce, 0
+  %2 = extractvalue [4 x <4 x float>] %b.coerce, 1
+  %3 = extractvalue [4 x <4 x float>] %b.coerce, 2
+  %4 = extractvalue [4 x <4 x float>] %b.coerce, 3
+  %5 = bitcast float* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v4f32(i8* %5, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, i32 4)
+  %tmp1 = getelementptr float* %a, i32 16
+  store float* %tmp1, float** %ptr
+  ret void
+}
+
+define void @test_vst1x4_reg_update(double* %a, [4 x <2 x double>] %b.coerce, double** %ptr, i32 %inc) #2 {
+; CHECK: test_vst1x4_reg_update
+; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = extractvalue [4 x <2 x double>] %b.coerce, 0
+  %2 = extractvalue [4 x <2 x double>] %b.coerce, 1
+  %3 = extractvalue [4 x <2 x double>] %b.coerce, 2
+  %4 = extractvalue [4 x <2 x double>] %b.coerce, 3
+  %5 = bitcast double* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 8)
+  %tmp1 = getelementptr double* %a, i32 %inc
+  store double* %tmp1, double** %ptr
+  ret void
+}
+
+declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8*, i32)
+declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8*, i32)
+declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8*, i32)
+declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8*, i32)
+declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8*, i32)
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8*, i32)
+declare void @llvm.aarch64.neon.vst1x2.v16i8(i8*, <16 x i8>, <16 x i8>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v8i16(i8*, <8 x i16>, <8 x i16>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) #3
+declare void @llvm.aarch64.neon.vst1x4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32) #3
diff --git a/test/CodeGen/AArch64/neon-simd-post-ldst-one.ll b/test/CodeGen/AArch64/neon-simd-post-ldst-one.ll
new file mode 100644
index 0000000..80a9347
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-simd-post-ldst-one.ll
@@ -0,0 +1,319 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define { [2 x <16 x i8>] } @test_vld2q_dup_fx_update(i8* %a, i8** %ptr) {
+; CHECK-LABEL: test_vld2q_dup_fx_update
+; CHECK: ld2r  {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}], #2
+  %1 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1)
+  %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
+  %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer
+  %4 = extractvalue { <16 x i8>, <16 x i8> } %1, 1
+  %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> zeroinitializer
+  %6 = insertvalue { [2 x <16 x i8>] } undef, <16 x i8> %3, 0, 0
+  %7 = insertvalue { [2 x <16 x i8>] } %6, <16 x i8> %5, 0, 1
+  %tmp1 = getelementptr i8* %a, i32 2
+  store i8* %tmp1, i8** %ptr
+  ret { [2 x <16 x i8>] } %7
+}
+
+define { [2 x <4 x i32>] } @test_vld2q_dup_reg_update(i32* %a, i32** %ptr, i32 %inc) {
+; CHECK-LABEL: test_vld2q_dup_reg_update
+; CHECK: ld2r  {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = bitcast i32* %a to i8*
+  %2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %1, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4)
+  %3 = extractvalue { <4 x i32>, <4 x i32> } %2, 0
+  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer
+  %5 = extractvalue { <4 x i32>, <4 x i32> } %2, 1
+  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <4 x i32> zeroinitializer
+  %7 = insertvalue { [2 x <4 x i32>] } undef, <4 x i32> %4, 0, 0
+  %8 = insertvalue { [2 x <4 x i32>] } %7, <4 x i32> %6, 0, 1
+  %tmp1 = getelementptr i32* %a, i32 %inc
+  store i32* %tmp1, i32** %ptr
+  ret { [2 x <4 x i32>] } %8
+}
+
+define { [3 x <4 x i16>] } @test_vld3_dup_fx_update(i16* %a, i16** %ptr) {
+; CHECK-LABEL: test_vld3_dup_fx_update
+; CHECK: ld3r  {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}], #6
+  %1 = bitcast i16* %a to i8*
+  %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %1, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+  %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 0
+  %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> zeroinitializer
+  %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 1
+  %6 = shufflevector <4 x i16> %5, <4 x i16> undef, <4 x i32> zeroinitializer
+  %7 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 2
+  %8 = shufflevector <4 x i16> %7, <4 x i16> undef, <4 x i32> zeroinitializer
+  %9 = insertvalue { [3 x <4 x i16>] }  undef, <4 x i16> %4, 0, 0
+  %10 = insertvalue { [3 x <4 x i16>] }  %9, <4 x i16> %6, 0, 1
+  %11 = insertvalue { [3 x <4 x i16>] }  %10, <4 x i16> %8, 0, 2
+  %tmp1 = getelementptr i16* %a, i32 3
+  store i16* %tmp1, i16** %ptr
+  ret { [3 x <4 x i16>] }  %11
+}
+
+define { [3 x <8 x i8>] } @test_vld3_dup_reg_update(i8* %a, i8** %ptr, i32 %inc) {
+; CHECK-LABEL: test_vld3_dup_reg_update
+; CHECK: ld3r  {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
+  %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
+  %3 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer
+  %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
+  %5 = shufflevector <8 x i8> %4, <8 x i8> undef, <8 x i32> zeroinitializer
+  %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
+  %7 = shufflevector <8 x i8> %6, <8 x i8> undef, <8 x i32> zeroinitializer
+  %8 = insertvalue { [3 x <8 x i8>] } undef, <8 x i8> %3, 0, 0
+  %9 = insertvalue { [3 x <8 x i8>] } %8, <8 x i8> %5, 0, 1
+  %10 = insertvalue { [3 x <8 x i8>] } %9, <8 x i8> %7, 0, 2
+  %tmp1 = getelementptr i8* %a, i32 %inc
+  store i8* %tmp1, i8** %ptr
+  ret { [3 x <8 x i8>] }%10
+}
+
+define { [4 x <2 x i32>] } @test_vld4_dup_fx_update(i32* %a, i32** %ptr) #0 {
+; CHECK-LABEL: test_vld4_dup_fx_update
+; CHECK: ld4r  {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}], #16
+  %1 = bitcast i32* %a to i8*
+  %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %1, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4)
+  %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 0
+  %4 = shufflevector <2 x i32> %3, <2 x i32> undef, <2 x i32> zeroinitializer
+  %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 1
+  %6 = shufflevector <2 x i32> %5, <2 x i32> undef, <2 x i32> zeroinitializer
+  %7 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 2
+  %8 = shufflevector <2 x i32> %7, <2 x i32> undef, <2 x i32> zeroinitializer
+  %9 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 3
+  %10 = shufflevector <2 x i32> %9, <2 x i32> undef, <2 x i32> zeroinitializer
+  %11 = insertvalue { [4 x <2 x i32>] } undef, <2 x i32> %4, 0, 0
+  %12 = insertvalue { [4 x <2 x i32>] } %11, <2 x i32> %6, 0, 1
+  %13 = insertvalue { [4 x <2 x i32>] } %12, <2 x i32> %8, 0, 2
+  %14 = insertvalue { [4 x <2 x i32>] } %13, <2 x i32> %10, 0, 3
+  %tmp1 = getelementptr i32* %a, i32 4
+  store i32* %tmp1, i32** %ptr
+  ret { [4 x <2 x i32>] } %14
+}
+
+define { [4 x <2 x double>] } @test_vld4_dup_reg_update(double* %a, double** %ptr, i32 %inc) {
+; CHECK-LABEL: test_vld4_dup_reg_update
+; CHECK: ld4r  {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = bitcast double* %a to i8*
+  %2 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %1, <2 x double> undef, <2 x double> undef, <2 x double> undef, <2 x double> undef, i32 0, i32 8)
+  %3 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 0
+  %4 = shufflevector <2 x double> %3, <2 x double> undef, <2 x i32> zeroinitializer
+  %5 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 1
+  %6 = shufflevector <2 x double> %5, <2 x double> undef, <2 x i32> zeroinitializer
+  %7 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 2
+  %8 = shufflevector <2 x double> %7, <2 x double> undef, <2 x i32> zeroinitializer
+  %9 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 3
+  %10 = shufflevector <2 x double> %9, <2 x double> undef, <2 x i32> zeroinitializer
+  %11 = insertvalue { [4 x <2 x double>] } undef, <2 x double> %4, 0, 0
+  %12 = insertvalue { [4 x <2 x double>] } %11, <2 x double> %6, 0, 1
+  %13 = insertvalue { [4 x <2 x double>] } %12, <2 x double> %8, 0, 2
+  %14 = insertvalue { [4 x <2 x double>] } %13, <2 x double> %10, 0, 3
+  %tmp1 = getelementptr double* %a, i32 %inc
+  store double* %tmp1, double** %ptr
+  ret { [4 x <2 x double>] } %14
+}
+
+define { [2 x <8 x i8>] } @test_vld2_lane_fx_update(i8*  %a, [2 x <8 x i8>] %b, i8** %ptr) {
+; CHECK-LABEL: test_vld2_lane_fx_update
+; CHECK: ld2  {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [x{{[0-9]+|sp}}], #2
+  %1 = extractvalue [2 x <8 x i8>] %b, 0
+  %2 = extractvalue [2 x <8 x i8>] %b, 1
+  %3 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 7, i32 1)
+  %4 = extractvalue { <8 x i8>, <8 x i8> } %3, 0
+  %5 = extractvalue { <8 x i8>, <8 x i8> } %3, 1
+  %6 = insertvalue { [2 x <8 x i8>] } undef, <8 x i8> %4, 0, 0
+  %7 = insertvalue { [2 x <8 x i8>] } %6, <8 x i8> %5, 0, 1
+  %tmp1 = getelementptr i8* %a, i32 2
+  store i8* %tmp1, i8** %ptr
+  ret { [2 x <8 x i8>] } %7
+}
+
+define { [2 x <8 x i8>] } @test_vld2_lane_reg_update(i8*  %a, [2 x <8 x i8>] %b, i8** %ptr, i32 %inc) {
+; CHECK-LABEL: test_vld2_lane_reg_update
+; CHECK: ld2  {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[6], [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = extractvalue [2 x <8 x i8>] %b, 0
+  %2 = extractvalue [2 x <8 x i8>] %b, 1
+  %3 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 6, i32 1)
+  %4 = extractvalue { <8 x i8>, <8 x i8> } %3, 0
+  %5 = extractvalue { <8 x i8>, <8 x i8> } %3, 1
+  %6 = insertvalue { [2 x <8 x i8>] } undef, <8 x i8> %4, 0, 0
+  %7 = insertvalue { [2 x <8 x i8>] } %6, <8 x i8> %5, 0, 1
+  %tmp1 = getelementptr i8* %a, i32 %inc
+  store i8* %tmp1, i8** %ptr
+  ret { [2 x <8 x i8>] } %7
+}
+
+define { [3 x <2 x float>] } @test_vld3_lane_fx_update(float* %a, [3 x <2 x float>] %b, float** %ptr) {
+; CHECK-LABEL: test_vld3_lane_fx_update
+; CHECK: ld3  {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [x{{[0-9]+|sp}}], #12
+  %1 = extractvalue [3 x <2 x float>] %b, 0
+  %2 = extractvalue [3 x <2 x float>] %b, 1
+  %3 = extractvalue [3 x <2 x float>] %b, 2
+  %4 = bitcast float* %a to i8*
+  %5 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8* %4, <2 x float> %1, <2 x float> %2, <2 x float> %3, i32 1, i32 4)
+  %6 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %5, 0
+  %7 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %5, 1
+  %8 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %5, 2
+  %9 = insertvalue { [3 x <2 x float>] } undef, <2 x float> %6, 0, 0
+  %10 = insertvalue { [3 x <2 x float>] } %9, <2 x float> %7, 0, 1
+  %11 = insertvalue { [3 x <2 x float>] } %10, <2 x float> %8, 0, 2
+  %tmp1 = getelementptr float* %a, i32 3
+  store float* %tmp1, float** %ptr
+  ret { [3 x <2 x float>] } %11
+}
+
+define { [3 x <4 x i16>] } @test_vld3_lane_reg_update(i16* %a, [3 x <4 x i16>] %b, i16** %ptr, i32 %inc) {
+; CHECK-LABEL: test_vld3_lane_reg_update
+; CHECK: ld3  {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = extractvalue [3 x <4 x i16>] %b, 0
+  %2 = extractvalue [3 x <4 x i16>] %b, 1
+  %3 = extractvalue [3 x <4 x i16>] %b, 2
+  %4 = bitcast i16* %a to i8*
+  %5 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %4, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 3, i32 2)
+  %6 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %5, 0
+  %7 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %5, 1
+  %8 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %5, 2
+  %9 = insertvalue { [3 x <4 x i16>] } undef, <4 x i16> %6, 0, 0
+  %10 = insertvalue { [3 x <4 x i16>] } %9, <4 x i16> %7, 0, 1
+  %11 = insertvalue { [3 x <4 x i16>] } %10, <4 x i16> %8, 0, 2
+  %tmp1 = getelementptr i16* %a, i32 %inc
+  store i16* %tmp1, i16** %ptr
+  ret { [3 x <4 x i16>] } %11
+}
+
+define { [4 x <2 x i32>] } @test_vld4_lane_fx_update(i32* readonly %a, [4 x <2 x i32>] %b, i32** %ptr) {
+; CHECK-LABEL: test_vld4_lane_fx_update
+; CHECK: ld4  {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [x{{[0-9]+|sp}}], #16
+  %1 = extractvalue [4 x <2 x i32>] %b, 0
+  %2 = extractvalue [4 x <2 x i32>] %b, 1
+  %3 = extractvalue [4 x <2 x i32>] %b, 2
+  %4 = extractvalue [4 x <2 x i32>] %b, 3
+  %5 = bitcast i32* %a to i8*
+  %6 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %5, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, <2 x i32> %4, i32 1, i32 4)
+  %7 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 0
+  %8 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 1
+  %9 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 2
+  %10 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 3
+  %11 = insertvalue { [4 x <2 x i32>] } undef, <2 x i32> %7, 0, 0
+  %12 = insertvalue { [4 x <2 x i32>] } %11, <2 x i32> %8, 0, 1
+  %13 = insertvalue { [4 x <2 x i32>] } %12, <2 x i32> %9, 0, 2
+  %14 = insertvalue { [4 x <2 x i32>] } %13, <2 x i32> %10, 0, 3
+  %tmp1 = getelementptr i32* %a, i32 4
+  store i32* %tmp1, i32** %ptr
+  ret { [4 x <2 x i32>] } %14
+}
+
+define { [4 x <2 x double>] } @test_vld4_lane_reg_update(double* readonly %a, [4 x <2 x double>] %b, double** %ptr, i32 %inc) {
+; CHECK-LABEL: test_vld4_lane_reg_update
+; CHECK: ld4  {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = extractvalue [4 x <2 x double>] %b, 0
+  %2 = extractvalue [4 x <2 x double>] %b, 1
+  %3 = extractvalue [4 x <2 x double>] %b, 2
+  %4 = extractvalue [4 x <2 x double>] %b, 3
+  %5 = bitcast double* %a to i8*
+  %6 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 1, i32 8)
+  %7 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 0
+  %8 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 1
+  %9 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 2
+  %10 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 3
+  %11 = insertvalue { [4 x <2 x double>] } undef, <2 x double> %7, 0, 0
+  %12 = insertvalue { [4 x <2 x double>] } %11, <2 x double> %8, 0, 1
+  %13 = insertvalue { [4 x <2 x double>] } %12, <2 x double> %9, 0, 2
+  %14 = insertvalue { [4 x <2 x double>] } %13, <2 x double> %10, 0, 3
+  %tmp1 = getelementptr double* %a, i32 %inc
+  store double* %tmp1, double** %ptr
+  ret { [4 x <2 x double>] } %14
+}
+
+define void @test_vst2_lane_fx_update(i8* %a, [2 x <8 x i8>] %b, i8** %ptr) {
+; CHECK-LABEL: test_vst2_lane_fx_update
+; CHECK: st2  {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [x{{[0-9]+|sp}}], #2
+  %1 = extractvalue [2 x <8 x i8>] %b, 0
+  %2 = extractvalue [2 x <8 x i8>] %b, 1
+  call void @llvm.arm.neon.vst2lane.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 7, i32 1)
+  %tmp1 = getelementptr i8* %a, i32 2
+  store i8* %tmp1, i8** %ptr
+  ret void
+}
+
+define void @test_vst2_lane_reg_update(i32* %a, [2 x <2 x i32>] %b.coerce, i32** %ptr, i32 %inc) {
+; CHECK-LABEL: test_vst2_lane_reg_update
+; CHECK: st2  {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = extractvalue [2 x <2 x i32>] %b.coerce, 0
+  %2 = extractvalue [2 x <2 x i32>] %b.coerce, 1
+  %3 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst2lane.v2i32(i8* %3, <2 x i32> %1, <2 x i32> %2, i32 1, i32 4)
+  %tmp1 = getelementptr i32* %a, i32 %inc
+  store i32* %tmp1, i32** %ptr
+  ret void
+}
+
+define void @test_vst3_lane_fx_update(float* %a, [3 x <4 x float>] %b, float** %ptr) {
+; CHECK-LABEL: test_vst3_lane_fx_update
+; CHECK: st3  {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [x{{[0-9]+|sp}}], #12
+  %1 = extractvalue [3 x <4 x float>] %b, 0
+  %2 = extractvalue [3 x <4 x float>] %b, 1
+  %3 = extractvalue [3 x <4 x float>] %b, 2
+  %4 = bitcast float* %a to i8*
+  call void @llvm.arm.neon.vst3lane.v4f32(i8* %4, <4 x float> %1, <4 x float> %2, <4 x float> %3, i32 3, i32 4)
+  %tmp1 = getelementptr float* %a, i32 3
+  store float* %tmp1, float** %ptr
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_vst3_lane_reg_update(i16* %a, [3 x <4 x i16>] %b, i16** %ptr, i32 %inc) {
+; CHECK-LABEL: test_vst3_lane_reg_update
+; CHECK: st3  {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = extractvalue [3 x <4 x i16>] %b, 0
+  %2 = extractvalue [3 x <4 x i16>] %b, 1
+  %3 = extractvalue [3 x <4 x i16>] %b, 2
+  %4 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst3lane.v4i16(i8* %4, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 3, i32 2)
+  %tmp1 = getelementptr i16* %a, i32 %inc
+  store i16* %tmp1, i16** %ptr
+  ret void
+}
+
+define void @test_vst4_lane_fx_update(double* %a, [4 x <2 x double>] %b.coerce, double** %ptr) {
+; CHECK-LABEL: test_vst4_lane_fx_update
+; CHECK: st4  {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [x{{[0-9]+|sp}}], #32
+  %1 = extractvalue [4 x <2 x double>] %b.coerce, 0
+  %2 = extractvalue [4 x <2 x double>] %b.coerce, 1
+  %3 = extractvalue [4 x <2 x double>] %b.coerce, 2
+  %4 = extractvalue [4 x <2 x double>] %b.coerce, 3
+  %5 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 1, i32 8)
+  %tmp1 = getelementptr double* %a, i32 4
+  store double* %tmp1, double** %ptr
+  ret void
+}
+
+
+define void @test_vst4_lane_reg_update(float* %a, [4 x <2 x float>] %b.coerce, float** %ptr, i32 %inc) {
+; CHECK-LABEL: test_vst4_lane_reg_update
+; CHECK: st4  {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = extractvalue [4 x <2 x float>] %b.coerce, 0
+  %2 = extractvalue [4 x <2 x float>] %b.coerce, 1
+  %3 = extractvalue [4 x <2 x float>] %b.coerce, 2
+  %4 = extractvalue [4 x <2 x float>] %b.coerce, 3
+  %5 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v2f32(i8* %5, <2 x float> %1, <2 x float> %2, <2 x float> %3, <2 x float> %4, i32 1, i32 4)
+  %tmp1 = getelementptr float* %a, i32 %inc
+  store float* %tmp1, float** %ptr
+  ret void
+}
+
+declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32)
+declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32)
+declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32)
+declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
+declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32)
+declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32)
+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32)
+declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32)
diff --git a/test/CodeGen/AArch64/neon-simd-shift.ll b/test/CodeGen/AArch64/neon-simd-shift.ll
new file mode 100644
index 0000000..fd76265
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-simd-shift.ll
@@ -0,0 +1,1556 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) {
+; CHECK: test_vshr_n_s8
+; CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vshr_n = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <8 x i8> %vshr_n
+}
+
+define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) {
+; CHECK: test_vshr_n_s16
+; CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vshr_n = ashr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
+  ret <4 x i16> %vshr_n
+}
+
+define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) {
+; CHECK: test_vshr_n_s32
+; CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vshr_n = ashr <2 x i32> %a, <i32 3, i32 3>
+  ret <2 x i32> %vshr_n
+}
+
+define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) {
+; CHECK: test_vshrq_n_s8
+; CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vshr_n = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %vshr_n
+}
+
+define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) {
+; CHECK: test_vshrq_n_s16
+; CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vshr_n = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %vshr_n
+}
+
+define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) {
+; CHECK: test_vshrq_n_s32
+; CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vshr_n = ashr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %vshr_n
+}
+
+define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) {
+; CHECK: test_vshrq_n_s64
+; CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vshr_n = ashr <2 x i64> %a, <i64 3, i64 3>
+  ret <2 x i64> %vshr_n
+}
+
+define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) {
+; CHECK: test_vshr_n_u8
+; CHECK: ushr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vshr_n = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <8 x i8> %vshr_n
+}
+
+define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) {
+; CHECK: test_vshr_n_u16
+; CHECK: ushr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vshr_n = lshr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
+  ret <4 x i16> %vshr_n
+}
+
+define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) {
+; CHECK: test_vshr_n_u32
+; CHECK: ushr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vshr_n = lshr <2 x i32> %a, <i32 3, i32 3>
+  ret <2 x i32> %vshr_n
+}
+
+define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) {
+; CHECK: test_vshrq_n_u8
+; CHECK: ushr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vshr_n = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %vshr_n
+}
+
+define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) {
+; CHECK: test_vshrq_n_u16
+; CHECK: ushr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vshr_n = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %vshr_n
+}
+
+define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) {
+; CHECK: test_vshrq_n_u32
+; CHECK: ushr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vshr_n = lshr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %vshr_n
+}
+
+define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) {
+; CHECK: test_vshrq_n_u64
+; CHECK: ushr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vshr_n = lshr <2 x i64> %a, <i64 3, i64 3>
+  ret <2 x i64> %vshr_n
+}
+
+define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsra_n_s8
+; CHECK: ssra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vsra_n = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %1 = add <8 x i8> %vsra_n, %a
+  ret <8 x i8> %1
+}
+
+define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsra_n_s16
+; CHECK: ssra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vsra_n = ashr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
+  %1 = add <4 x i16> %vsra_n, %a
+  ret <4 x i16> %1
+}
+
+define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vsra_n_s32
+; CHECK: ssra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vsra_n = ashr <2 x i32> %b, <i32 3, i32 3>
+  %1 = add <2 x i32> %vsra_n, %a
+  ret <2 x i32> %1
+}
+
+define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsraq_n_s8
+; CHECK: ssra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vsra_n = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %1 = add <16 x i8> %vsra_n, %a
+  ret <16 x i8> %1
+}
+
+define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsraq_n_s16
+; CHECK: ssra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vsra_n = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %1 = add <8 x i16> %vsra_n, %a
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsraq_n_s32
+; CHECK: ssra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vsra_n = ashr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
+  %1 = add <4 x i32> %vsra_n, %a
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsraq_n_s64
+; CHECK: ssra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vsra_n = ashr <2 x i64> %b, <i64 3, i64 3>
+  %1 = add <2 x i64> %vsra_n, %a
+  ret <2 x i64> %1
+}
+
+define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsra_n_u8
+; CHECK: usra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vsra_n = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %1 = add <8 x i8> %vsra_n, %a
+  ret <8 x i8> %1
+}
+
+define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsra_n_u16
+; CHECK: usra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vsra_n = lshr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
+  %1 = add <4 x i16> %vsra_n, %a
+  ret <4 x i16> %1
+}
+
+define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vsra_n_u32
+; CHECK: usra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vsra_n = lshr <2 x i32> %b, <i32 3, i32 3>
+  %1 = add <2 x i32> %vsra_n, %a
+  ret <2 x i32> %1
+}
+
+define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsraq_n_u8
+; CHECK: usra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vsra_n = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %1 = add <16 x i8> %vsra_n, %a
+  ret <16 x i8> %1
+}
+
+define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsraq_n_u16
+; CHECK: usra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vsra_n = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %1 = add <8 x i16> %vsra_n, %a
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsraq_n_u32
+; CHECK: usra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vsra_n = lshr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
+  %1 = add <4 x i32> %vsra_n, %a
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsraq_n_u64
+; CHECK: usra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vsra_n = lshr <2 x i64> %b, <i64 3, i64 3>
+  %1 = add <2 x i64> %vsra_n, %a
+  ret <2 x i64> %1
+}
+
+define <8 x i8> @test_vrshr_n_s8(<8 x i8> %a) {
+; CHECK: test_vrshr_n_s8
+; CHECK: srshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vrshr_n = tail call <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8> %a, i32 3)
+  ret <8 x i8> %vrshr_n
+}
+
+
+define <4 x i16> @test_vrshr_n_s16(<4 x i16> %a) {
+; CHECK: test_vrshr_n_s16
+; CHECK: srshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vrshr_n = tail call <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16> %a, i32 3)
+  ret <4 x i16> %vrshr_n
+}
+
+
+define <2 x i32> @test_vrshr_n_s32(<2 x i32> %a) {
+; CHECK: test_vrshr_n_s32
+; CHECK: srshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vrshr_n = tail call <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32> %a, i32 3)
+  ret <2 x i32> %vrshr_n
+}
+
+
+define <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) {
+; CHECK: test_vrshrq_n_s8
+; CHECK: srshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vrshr_n = tail call <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8> %a, i32 3)
+  ret <16 x i8> %vrshr_n
+}
+
+
+define <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) {
+; CHECK: test_vrshrq_n_s16
+; CHECK: srshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vrshr_n = tail call <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16> %a, i32 3)
+  ret <8 x i16> %vrshr_n
+}
+
+
+define <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) {
+; CHECK: test_vrshrq_n_s32
+; CHECK: srshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vrshr_n = tail call <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32> %a, i32 3)
+  ret <4 x i32> %vrshr_n
+}
+
+
+define <2 x i64> @test_vrshrq_n_s64(<2 x i64> %a) {
+; CHECK: test_vrshrq_n_s64
+; CHECK: srshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vrshr_n = tail call <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64> %a, i32 3)
+  ret <2 x i64> %vrshr_n
+}
+
+
+define <8 x i8> @test_vrshr_n_u8(<8 x i8> %a) {
+; CHECK: test_vrshr_n_u8
+; CHECK: urshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vrshr_n = tail call <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8> %a, i32 3)
+  ret <8 x i8> %vrshr_n
+}
+
+
+define <4 x i16> @test_vrshr_n_u16(<4 x i16> %a) {
+; CHECK: test_vrshr_n_u16
+; CHECK: urshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vrshr_n = tail call <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16> %a, i32 3)
+  ret <4 x i16> %vrshr_n
+}
+
+
+define <2 x i32> @test_vrshr_n_u32(<2 x i32> %a) {
+; CHECK: test_vrshr_n_u32
+; CHECK: urshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vrshr_n = tail call <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32> %a, i32 3)
+  ret <2 x i32> %vrshr_n
+}
+
+
+define <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) {
+; CHECK: test_vrshrq_n_u8
+; CHECK: urshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vrshr_n = tail call <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8> %a, i32 3)
+  ret <16 x i8> %vrshr_n
+}
+
+
+define <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) {
+; CHECK: test_vrshrq_n_u16
+; CHECK: urshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vrshr_n = tail call <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16> %a, i32 3)
+  ret <8 x i16> %vrshr_n
+}
+
+
+define <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) {
+; CHECK: test_vrshrq_n_u32
+; CHECK: urshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vrshr_n = tail call <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32> %a, i32 3)
+  ret <4 x i32> %vrshr_n
+}
+
+
+define <2 x i64> @test_vrshrq_n_u64(<2 x i64> %a) {
+; CHECK: test_vrshrq_n_u64
+; CHECK: urshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vrshr_n = tail call <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64> %a, i32 3)
+  ret <2 x i64> %vrshr_n
+}
+
+
+define <8 x i8> @test_vrsra_n_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vrsra_n_s8
+; CHECK: srsra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %1 = tail call <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8> %b, i32 3)
+  %vrsra_n = add <8 x i8> %1, %a
+  ret <8 x i8> %vrsra_n
+}
+
+define <4 x i16> @test_vrsra_n_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vrsra_n_s16
+; CHECK: srsra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %1 = tail call <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16> %b, i32 3)
+  %vrsra_n = add <4 x i16> %1, %a
+  ret <4 x i16> %vrsra_n
+}
+
+define <2 x i32> @test_vrsra_n_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vrsra_n_s32
+; CHECK: srsra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %1 = tail call <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32> %b, i32 3)
+  %vrsra_n = add <2 x i32> %1, %a
+  ret <2 x i32> %vrsra_n
+}
+
+define <16 x i8> @test_vrsraq_n_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vrsraq_n_s8
+; CHECK: srsra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %1 = tail call <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8> %b, i32 3)
+  %vrsra_n = add <16 x i8> %1, %a
+  ret <16 x i8> %vrsra_n
+}
+
+define <8 x i16> @test_vrsraq_n_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vrsraq_n_s16
+; CHECK: srsra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %1 = tail call <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16> %b, i32 3)
+  %vrsra_n = add <8 x i16> %1, %a
+  ret <8 x i16> %vrsra_n
+}
+
+define <4 x i32> @test_vrsraq_n_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vrsraq_n_s32
+; CHECK: srsra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %1 = tail call <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32> %b, i32 3)
+  %vrsra_n = add <4 x i32> %1, %a
+  ret <4 x i32> %vrsra_n
+}
+
+define <2 x i64> @test_vrsraq_n_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vrsraq_n_s64
+; CHECK: srsra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %1 = tail call <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64> %b, i32 3)
+  %vrsra_n = add <2 x i64> %1, %a
+  ret <2 x i64> %vrsra_n
+}
+
+define <8 x i8> @test_vrsra_n_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vrsra_n_u8
+; CHECK: ursra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %1 = tail call <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8> %b, i32 3)
+  %vrsra_n = add <8 x i8> %1, %a
+  ret <8 x i8> %vrsra_n
+}
+
+define <4 x i16> @test_vrsra_n_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vrsra_n_u16
+; CHECK: ursra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %1 = tail call <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16> %b, i32 3)
+  %vrsra_n = add <4 x i16> %1, %a
+  ret <4 x i16> %vrsra_n
+}
+
+define <2 x i32> @test_vrsra_n_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vrsra_n_u32
+; CHECK: ursra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %1 = tail call <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32> %b, i32 3)
+  %vrsra_n = add <2 x i32> %1, %a
+  ret <2 x i32> %vrsra_n
+}
+
+define <16 x i8> @test_vrsraq_n_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vrsraq_n_u8
+; CHECK: ursra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %1 = tail call <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8> %b, i32 3)
+  %vrsra_n = add <16 x i8> %1, %a
+  ret <16 x i8> %vrsra_n
+}
+
+define <8 x i16> @test_vrsraq_n_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vrsraq_n_u16
+; CHECK: ursra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %1 = tail call <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16> %b, i32 3)
+  %vrsra_n = add <8 x i16> %1, %a
+  ret <8 x i16> %vrsra_n
+}
+
+define <4 x i32> @test_vrsraq_n_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vrsraq_n_u32
+; CHECK: ursra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %1 = tail call <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32> %b, i32 3)
+  %vrsra_n = add <4 x i32> %1, %a
+  ret <4 x i32> %vrsra_n
+}
+
+define <2 x i64> @test_vrsraq_n_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vrsraq_n_u64
+; CHECK: ursra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %1 = tail call <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64> %b, i32 3)
+  %vrsra_n = add <2 x i64> %1, %a
+  ret <2 x i64> %vrsra_n
+}
+
+define <8 x i8> @test_vsri_n_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsri_n_s8
+; CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vsri_n = tail call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+  ret <8 x i8> %vsri_n
+}
+
+
+define <4 x i16> @test_vsri_n_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsri_n_s16
+; CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vsri = tail call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %a, <4 x i16> %b, i32 3)
+  ret <4 x i16> %vsri
+}
+
+
+define <2 x i32> @test_vsri_n_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vsri_n_s32
+; CHECK: sri {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vsri = tail call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> %a, <2 x i32> %b, i32 3)
+  ret <2 x i32> %vsri
+}
+
+
+define <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsriq_n_s8
+; CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vsri_n = tail call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+  ret <16 x i8> %vsri_n
+}
+
+
+define <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsriq_n_s16
+; CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vsri = tail call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 3)
+  ret <8 x i16> %vsri
+}
+
+
+define <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsriq_n_s32
+; CHECK: sri {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vsri = tail call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> %a, <4 x i32> %b, i32 3)
+  ret <4 x i32> %vsri
+}
+
+
+define <2 x i64> @test_vsriq_n_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsriq_n_s64
+; CHECK: sri {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vsri = tail call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> %a, <2 x i64> %b, i32 3)
+  ret <2 x i64> %vsri
+}
+
+define <8 x i8> @test_vsri_n_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsri_n_p8
+; CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vsri_n = tail call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+  ret <8 x i8> %vsri_n
+}
+
+define <4 x i16> @test_vsri_n_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsri_n_p16
+; CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15
+  %vsri = tail call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %a, <4 x i16> %b, i32 15)
+  ret <4 x i16> %vsri
+}
+
+define <16 x i8> @test_vsriq_n_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsriq_n_p8
+; CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vsri_n = tail call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+  ret <16 x i8> %vsri_n
+}
+
+define <8 x i16> @test_vsriq_n_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsriq_n_p16
+; CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15
+  %vsri = tail call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 15)
+  ret <8 x i16> %vsri
+}
+
+define <8 x i8> @test_vsli_n_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsli_n_s8
+; CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vsli_n = tail call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+  ret <8 x i8> %vsli_n
+}
+
+define <4 x i16> @test_vsli_n_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsli_n_s16
+; CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vsli = tail call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %a, <4 x i16> %b, i32 3)
+  ret <4 x i16> %vsli
+}
+
+define <2 x i32> @test_vsli_n_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vsli_n_s32
+; CHECK: sli {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vsli = tail call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %a, <2 x i32> %b, i32 3)
+  ret <2 x i32> %vsli
+}
+
+define <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsliq_n_s8
+; CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vsli_n = tail call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+  ret <16 x i8> %vsli_n
+}
+
+define <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsliq_n_s16
+; CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vsli = tail call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 3)
+  ret <8 x i16> %vsli
+}
+
+define <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsliq_n_s32
+; CHECK: sli {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vsli = tail call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %a, <4 x i32> %b, i32 3)
+  ret <4 x i32> %vsli
+}
+
+define <2 x i64> @test_vsliq_n_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsliq_n_s64
+; CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vsli = tail call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %a, <2 x i64> %b, i32 3)
+  ret <2 x i64> %vsli
+}
+
+define <8 x i8> @test_vsli_n_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsli_n_p8
+; CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vsli_n = tail call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+  ret <8 x i8> %vsli_n
+}
+
+define <4 x i16> @test_vsli_n_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsli_n_p16
+; CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15
+  %vsli = tail call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %a, <4 x i16> %b, i32 15)
+  ret <4 x i16> %vsli
+}
+
+define <16 x i8> @test_vsliq_n_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsliq_n_p8
+; CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vsli_n = tail call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+  ret <16 x i8> %vsli_n
+}
+
+define <8 x i16> @test_vsliq_n_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsliq_n_p16
+; CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15
+  %vsli = tail call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 15)
+  ret <8 x i16> %vsli
+}
+
+define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) {
+; CHECK: test_vqshl_n_s8
+; CHECK: sqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vqshl = tail call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+  ret <8 x i8> %vqshl
+}
+
+
+define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) {
+; CHECK: test_vqshl_n_s16
+; CHECK: sqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vqshl = tail call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
+  ret <4 x i16> %vqshl
+}
+
+
+define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) {
+; CHECK: test_vqshl_n_s32
+; CHECK: sqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vqshl = tail call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> <i32 3, i32 3>)
+  ret <2 x i32> %vqshl
+}
+
+
+define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) {
+; CHECK: test_vqshlq_n_s8
+; CHECK: sqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+  ret <16 x i8> %vqshl_n
+}
+
+
+define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) {
+; CHECK: test_vqshlq_n_s16
+; CHECK: sqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vqshl = tail call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+  ret <8 x i16> %vqshl
+}
+
+
+define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) {
+; CHECK: test_vqshlq_n_s32
+; CHECK: sqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vqshl = tail call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+  ret <4 x i32> %vqshl
+}
+
+
+define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) {
+; CHECK: test_vqshlq_n_s64
+; CHECK: sqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vqshl = tail call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> <i64 3, i64 3>)
+  ret <2 x i64> %vqshl
+}
+
+
+define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) {
+; CHECK: test_vqshl_n_u8
+; CHECK: uqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+  ret <8 x i8> %vqshl_n
+}
+
+
+define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) {
+; CHECK: test_vqshl_n_u16
+; CHECK: uqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vqshl = tail call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
+  ret <4 x i16> %vqshl
+}
+
+
+define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) {
+; CHECK: test_vqshl_n_u32
+; CHECK: uqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vqshl = tail call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> <i32 3, i32 3>)
+  ret <2 x i32> %vqshl
+}
+
+
+define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) {
+; CHECK: test_vqshlq_n_u8
+; CHECK: uqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+  ret <16 x i8> %vqshl_n
+}
+
+
+define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) {
+; CHECK: test_vqshlq_n_u16
+; CHECK: uqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vqshl = tail call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+  ret <8 x i16> %vqshl
+}
+
+
+define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) {
+; CHECK: test_vqshlq_n_u32
+; CHECK: uqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vqshl = tail call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+  ret <4 x i32> %vqshl
+}
+
+
+define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) {
+; CHECK: test_vqshlq_n_u64
+; CHECK: uqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vqshl = tail call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> <i64 3, i64 3>)
+  ret <2 x i64> %vqshl
+}
+
+define <8 x i8> @test_vqshlu_n_s8(<8 x i8> %a) {
+; CHECK: test_vqshlu_n_s8
+; CHECK: sqshlu {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vqshlu = tail call <8 x i8> @llvm.aarch64.neon.vsqshlu.v8i8(<8 x i8> %a, i32 3)
+  ret <8 x i8> %vqshlu
+}
+
+
+define <4 x i16> @test_vqshlu_n_s16(<4 x i16> %a) {
+; CHECK: test_vqshlu_n_s16
+; CHECK: sqshlu {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vqshlu = tail call <4 x i16> @llvm.aarch64.neon.vsqshlu.v4i16(<4 x i16> %a, i32 3)
+  ret <4 x i16> %vqshlu
+}
+
+
+define <2 x i32> @test_vqshlu_n_s32(<2 x i32> %a) {
+; CHECK: test_vqshlu_n_s32
+; CHECK: sqshlu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vqshlu = tail call <2 x i32> @llvm.aarch64.neon.vsqshlu.v2i32(<2 x i32> %a, i32 3)
+  ret <2 x i32> %vqshlu
+}
+
+
+define <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) {
+; CHECK: test_vqshluq_n_s8
+; CHECK: sqshlu {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vqshlu = tail call <16 x i8> @llvm.aarch64.neon.vsqshlu.v16i8(<16 x i8> %a, i32 3)
+  ret <16 x i8> %vqshlu
+}
+
+
+define <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) {
+; CHECK: test_vqshluq_n_s16
+; CHECK: sqshlu {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vqshlu = tail call <8 x i16> @llvm.aarch64.neon.vsqshlu.v8i16(<8 x i16> %a, i32 3)
+  ret <8 x i16> %vqshlu
+}
+
+
+define <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) {
+; CHECK: test_vqshluq_n_s32
+; CHECK: sqshlu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vqshlu = tail call <4 x i32> @llvm.aarch64.neon.vsqshlu.v4i32(<4 x i32> %a, i32 3)
+  ret <4 x i32> %vqshlu
+}
+
+
+define <2 x i64> @test_vqshluq_n_s64(<2 x i64> %a) {
+; CHECK: test_vqshluq_n_s64
+; CHECK: sqshlu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vqshlu = tail call <2 x i64> @llvm.aarch64.neon.vsqshlu.v2i64(<2 x i64> %a, i32 3)
+  ret <2 x i64> %vqshlu
+}
+
+
+define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) {
+; CHECK: test_vshrn_n_s16
+; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %1 = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
+  ret <8 x i8> %vshrn_n
+}
+
+define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) {
+; CHECK: test_vshrn_n_s32
+; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %1 = ashr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
+  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
+  ret <4 x i16> %vshrn_n
+}
+
+define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) {
+; CHECK: test_vshrn_n_s64
+; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %1 = ashr <2 x i64> %a, <i64 19, i64 19>
+  %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
+  ret <2 x i32> %vshrn_n
+}
+
+define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) {
+; CHECK: test_vshrn_n_u16
+; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %1 = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
+  ret <8 x i8> %vshrn_n
+}
+
+define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) {
+; CHECK: test_vshrn_n_u32
+; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %1 = lshr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
+  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
+  ret <4 x i16> %vshrn_n
+}
+
+define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) {
+; CHECK: test_vshrn_n_u64
+; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %1 = lshr <2 x i64> %a, <i64 19, i64 19>
+  %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
+  ret <2 x i32> %vshrn_n
+}
+
+define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vshrn_high_n_s16
+; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %1 = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
+  %2 = bitcast <8 x i8> %a to <1 x i64>
+  %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %4
+}
+
+define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vshrn_high_n_s32
+; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %1 = ashr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
+  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
+  %2 = bitcast <4 x i16> %a to <1 x i64>
+  %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %4
+}
+
+define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vshrn_high_n_s64
+; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %2 = ashr <2 x i64> %b, <i64 19, i64 19>
+  %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
+  %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %4
+}
+
+define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vshrn_high_n_u16
+; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %1 = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
+  %2 = bitcast <8 x i8> %a to <1 x i64>
+  %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %4
+}
+
+define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vshrn_high_n_u32
+; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %1 = lshr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
+  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
+  %2 = bitcast <4 x i16> %a to <1 x i64>
+  %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %4
+}
+
+define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vshrn_high_n_u64
+; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %2 = lshr <2 x i64> %b, <i64 19, i64 19>
+  %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
+  %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %4
+}
+
+define <8 x i8> @test_vqshrun_n_s16(<8 x i16> %a) {
+; CHECK: test_vqshrun_n_s16
+; CHECK: sqshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16> %a, i32 3)
+  ret <8 x i8> %vqshrun
+}
+
+
+define <4 x i16> @test_vqshrun_n_s32(<4 x i32> %a) {
+; CHECK: test_vqshrun_n_s32
+; CHECK: sqshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32> %a, i32 9)
+  ret <4 x i16> %vqshrun
+}
+
+define <2 x i32> @test_vqshrun_n_s64(<2 x i64> %a) {
+; CHECK: test_vqshrun_n_s64
+; CHECK: sqshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64> %a, i32 19)
+  ret <2 x i32> %vqshrun
+}
+
+define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqshrun_high_n_s16
+; CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqshrun_high_n_s32
+; CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqshrun_high_n_s64
+; CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <8 x i8> @test_vrshrn_n_s16(<8 x i16> %a) {
+; CHECK: test_vrshrn_n_s16
+; CHECK: rshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16> %a, i32 3)
+  ret <8 x i8> %vrshrn
+}
+
+
+define <4 x i16> @test_vrshrn_n_s32(<4 x i32> %a) {
+; CHECK: test_vrshrn_n_s32
+; CHECK: rshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32> %a, i32 9)
+  ret <4 x i16> %vrshrn
+}
+
+
+define <2 x i32> @test_vrshrn_n_s64(<2 x i64> %a) {
+; CHECK: test_vrshrn_n_s64
+; CHECK: rshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64> %a, i32 19)
+  ret <2 x i32> %vrshrn
+}
+
+define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vrshrn_high_n_s16
+; CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vrshrn_high_n_s32
+; CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vrshrn_high_n_s64
+; CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <8 x i8> @test_vqrshrun_n_s16(<8 x i16> %a) {
+; CHECK: test_vqrshrun_n_s16
+; CHECK: sqrshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16> %a, i32 3)
+  ret <8 x i8> %vqrshrun
+}
+
+define <4 x i16> @test_vqrshrun_n_s32(<4 x i32> %a) {
+; CHECK: test_vqrshrun_n_s32
+; CHECK: sqrshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32> %a, i32 9)
+  ret <4 x i16> %vqrshrun
+}
+
+define <2 x i32> @test_vqrshrun_n_s64(<2 x i64> %a) {
+; CHECK: test_vqrshrun_n_s64
+; CHECK: sqrshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64> %a, i32 19)
+  ret <2 x i32> %vqrshrun
+}
+
+define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqrshrun_high_n_s16
+; CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqrshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqrshrun_high_n_s32
+; CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqrshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqrshrun_high_n_s64
+; CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqrshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <8 x i8> @test_vqshrn_n_s16(<8 x i16> %a) {
+; CHECK: test_vqshrn_n_s16
+; CHECK: sqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16> %a, i32 3)
+  ret <8 x i8> %vqshrn
+}
+
+
+define <4 x i16> @test_vqshrn_n_s32(<4 x i32> %a) {
+; CHECK: test_vqshrn_n_s32
+; CHECK: sqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32> %a, i32 9)
+  ret <4 x i16> %vqshrn
+}
+
+
+define <2 x i32> @test_vqshrn_n_s64(<2 x i64> %a) {
+; CHECK: test_vqshrn_n_s64
+; CHECK: sqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64> %a, i32 19)
+  ret <2 x i32> %vqshrn
+}
+
+
+define <8 x i8> @test_vqshrn_n_u16(<8 x i16> %a) {
+; CHECK: test_vqshrn_n_u16
+; CHECK: uqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16> %a, i32 3)
+  ret <8 x i8> %vqshrn
+}
+
+
+define <4 x i16> @test_vqshrn_n_u32(<4 x i32> %a) {
+; CHECK: test_vqshrn_n_u32
+; CHECK: uqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32> %a, i32 9)
+  ret <4 x i16> %vqshrn
+}
+
+
+define <2 x i32> @test_vqshrn_n_u64(<2 x i64> %a) {
+; CHECK: test_vqshrn_n_u64
+; CHECK: uqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64> %a, i32 19)
+  ret <2 x i32> %vqshrn
+}
+
+
+define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqshrn_high_n_s16
+; CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqshrn_high_n_s32
+; CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqshrn_high_n_s64
+; CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqshrn_high_n_u16
+; CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqshrn_high_n_u32
+; CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqshrn_high_n_u64
+; CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <8 x i8> @test_vqrshrn_n_s16(<8 x i16> %a) {
+; CHECK: test_vqrshrn_n_s16
+; CHECK: sqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16> %a, i32 3)
+  ret <8 x i8> %vqrshrn
+}
+
+
+define <4 x i16> @test_vqrshrn_n_s32(<4 x i32> %a) {
+; CHECK: test_vqrshrn_n_s32
+; CHECK: sqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32> %a, i32 9)
+  ret <4 x i16> %vqrshrn
+}
+
+
+define <2 x i32> @test_vqrshrn_n_s64(<2 x i64> %a) {
+; CHECK: test_vqrshrn_n_s64
+; CHECK: sqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64> %a, i32 19)
+  ret <2 x i32> %vqrshrn
+}
+
+
+define <8 x i8> @test_vqrshrn_n_u16(<8 x i16> %a) {
+; CHECK: test_vqrshrn_n_u16
+; CHECK: uqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16> %a, i32 3)
+  ret <8 x i8> %vqrshrn
+}
+
+
+define <4 x i16> @test_vqrshrn_n_u32(<4 x i32> %a) {
+; CHECK: test_vqrshrn_n_u32
+; CHECK: uqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32> %a, i32 9)
+  ret <4 x i16> %vqrshrn
+}
+
+
+define <2 x i32> @test_vqrshrn_n_u64(<2 x i64> %a) {
+; CHECK: test_vqrshrn_n_u64
+; CHECK: uqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64> %a, i32 19)
+  ret <2 x i32> %vqrshrn
+}
+
+
+define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqrshrn_high_n_s16
+; CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqrshrn_high_n_s32
+; CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqrshrn_high_n_s64
+; CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqrshrn_high_n_u16
+; CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqrshrn_high_n_u32
+; CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqrshrn_high_n_u64
+; CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <2 x float> @test_vcvt_n_f32_s32(<2 x i32> %a) {
+; CHECK: test_vcvt_n_f32_s32
+; CHECK: scvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
+  %vcvt = tail call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %a, i32 31)
+  ret <2 x float> %vcvt
+}
+
+define <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) {
+; CHECK: test_vcvtq_n_f32_s32
+; CHECK: scvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
+  %vcvt = tail call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %a, i32 31)
+  ret <4 x float> %vcvt
+}
+
+define <2 x double> @test_vcvtq_n_f64_s64(<2 x i64> %a) {
+; CHECK: test_vcvtq_n_f64_s64
+; CHECK: scvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
+  %vcvt = tail call <2 x double> @llvm.arm.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %a, i32 50)
+  ret <2 x double> %vcvt
+}
+
+define <2 x float> @test_vcvt_n_f32_u32(<2 x i32> %a) {
+; CHECK: test_vcvt_n_f32_u32
+; CHECK: ucvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
+  %vcvt = tail call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %a, i32 31)
+  ret <2 x float> %vcvt
+}
+
+define <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) {
+; CHECK: test_vcvtq_n_f32_u32
+; CHECK: ucvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
+  %vcvt = tail call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %a, i32 31)
+  ret <4 x float> %vcvt
+}
+
+define <2 x double> @test_vcvtq_n_f64_u64(<2 x i64> %a) {
+; CHECK: test_vcvtq_n_f64_u64
+; CHECK: ucvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
+  %vcvt = tail call <2 x double> @llvm.arm.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %a, i32 50)
+  ret <2 x double> %vcvt
+}
+
+define <2 x i32> @test_vcvt_n_s32_f32(<2 x float> %a) {
+; CHECK: test_vcvt_n_s32_f32
+; CHECK: fcvtzs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
+  %vcvt = tail call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %a, i32 31)
+  ret <2 x i32> %vcvt
+}
+
+define <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) {
+; CHECK: test_vcvtq_n_s32_f32
+; CHECK: fcvtzs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
+  %vcvt = tail call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %a, i32 31)
+  ret <4 x i32> %vcvt
+}
+
+define <2 x i64> @test_vcvtq_n_s64_f64(<2 x double> %a) {
+; CHECK: test_vcvtq_n_s64_f64
+; CHECK: fcvtzs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
+  %vcvt = tail call <2 x i64> @llvm.arm.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> %a, i32 50)
+  ret <2 x i64> %vcvt
+}
+
+define <2 x i32> @test_vcvt_n_u32_f32(<2 x float> %a) {
+; CHECK: test_vcvt_n_u32_f32
+; CHECK: fcvtzu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
+  %vcvt = tail call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %a, i32 31)
+  ret <2 x i32> %vcvt
+}
+
+define <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) {
+; CHECK: test_vcvt_n_u32_f32
+; CHECK: fcvtzu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
+  %vcvt = tail call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %a, i32 31)
+  ret <4 x i32> %vcvt
+}
+
+define <2 x i64> @test_vcvtq_n_u64_f64(<2 x double> %a) {
+; CHECK: test_vcvtq_n_u64_f64
+; CHECK: fcvtzu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
+  %vcvt = tail call <2 x i64> @llvm.arm.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> %a, i32 50)
+  ret <2 x i64> %vcvt
+}
+
+declare <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32>, i32)
+
+declare <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8>, i32)
+
+declare <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16>, i32)
+
+declare <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32>, i32)
+
+declare <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32>, i32)
+
+declare <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8>, i32)
+
+declare <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16>, i32)
+
+declare <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32>, i32)
+
+declare <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8>, <8 x i8>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16>, <4 x i16>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32>, <2 x i32>, i32)
+
+declare <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8>, <16 x i8>, i32)
+
+declare <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16>, <8 x i16>, i32)
+
+declare <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32>, <4 x i32>, i32)
+
+declare <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64>, <2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32)
+
+declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32)
+
+declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32)
+
+declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32)
+
+declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vsqshlu.v8i8(<8 x i8>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsqshlu.v4i16(<4 x i16>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsqshlu.v2i32(<2 x i32>, i32)
+
+declare <16 x i8> @llvm.aarch64.neon.vsqshlu.v16i8(<16 x i8>, i32)
+
+declare <8 x i16> @llvm.aarch64.neon.vsqshlu.v8i16(<8 x i16>, i32)
+
+declare <4 x i32> @llvm.aarch64.neon.vsqshlu.v4i32(<4 x i32>, i32)
+
+declare <2 x i64> @llvm.aarch64.neon.vsqshlu.v2i64(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>)
+
+declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>)
+
+declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>)
+
+declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>)
+
+declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>)
+
+declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>)
+
+declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>)
+
+declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>)
+
+declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>) 
+
+declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>) 
+
+declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>) 
+
+declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>)
+
+declare <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64>, i32)
+
+declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32)
+
+declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32)
+
+declare <2 x double> @llvm.arm.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32)
+
+declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32)
+
+declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32)
+
+declare <2 x double> @llvm.arm.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32)
+
+declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32)
+
+declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32)
+
+declare <2 x i64> @llvm.arm.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32)
+
+declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32)
+
+declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32)
+
+declare <2 x i64> @llvm.arm.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32)
+
+define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvt_n_s64_f64
+; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64
+  %1 = tail call <1 x i64> @llvm.arm.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64)
+  ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvt_n_u64_f64
+; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64
+  %1 = tail call <1 x i64> @llvm.arm.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64)
+  ret <1 x i64> %1
+}
+
+define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) {
+; CHECK-LABEL: test_vcvt_n_f64_s64
+; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
+  %1 = tail call <1 x double> @llvm.arm.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) {
+; CHECK-LABEL: test_vcvt_n_f64_u64
+; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
+  %1 = tail call <1 x double> @llvm.arm.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
+  ret <1 x double> %1
+}
+
+declare <1 x i64> @llvm.arm.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32)
+declare <1 x i64> @llvm.arm.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32)
+declare <1 x double> @llvm.arm.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32)
+declare <1 x double> @llvm.arm.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32)
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-simd-tbl.ll b/test/CodeGen/AArch64/neon-simd-tbl.ll
new file mode 100644
index 0000000..8eac1e8
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-simd-tbl.ll
@@ -0,0 +1,828 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+declare <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
+
+declare <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
+
+declare <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
+
+declare <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
+
+declare <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
+
+declare <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
+
+declare <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
+
+declare <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8>, <16 x i8>, <8 x i8>)
+
+declare <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8>, <16 x i8>, <8 x i8>)
+
+declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>)
+
+declare <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8>, <8 x i8>)
+
+declare <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
+
+declare <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
+
+declare <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
+
+declare <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8>, <16 x i8>)
+
+declare <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
+
+declare <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
+
+define <8 x i8> @test_vtbl1_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtbl1_s8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl11.i
+}
+
+define <8 x i8> @test_vqtbl1_s8(<16 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vqtbl1_s8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %a, <8 x i8> %b)
+  ret <8 x i8> %vtbl1.i
+}
+
+define <8 x i8> @test_vtbl2_s8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vtbl2_s8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1
+  %vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl17.i
+}
+
+define <8 x i8> @test_vqtbl2_s8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vqtbl2_s8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
+  %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl2.i
+}
+
+define <8 x i8> @test_vtbl3_s8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vtbl3_s8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2
+  %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl212.i
+}
+
+define <8 x i8> @test_vqtbl3_s8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vqtbl3_s8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
+  %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl3.i
+}
+
+define <8 x i8> @test_vtbl4_s8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vtbl4_s8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2
+  %__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3
+  %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl216.i
+}
+
+define <8 x i8> @test_vqtbl4_s8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vqtbl4_s8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
+  %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
+  %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl4.i
+}
+
+define <16 x i8> @test_vqtbl1q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vqtbl1q_s8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %vtbl1.i
+}
+
+define <16 x i8> @test_vqtbl2q_s8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) {
+; CHECK: test_vqtbl2q_s8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
+  %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b)
+  ret <16 x i8> %vtbl2.i
+}
+
+define <16 x i8> @test_vqtbl3q_s8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) {
+; CHECK: test_vqtbl3q_s8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
+  %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b)
+  ret <16 x i8> %vtbl3.i
+}
+
+define <16 x i8> @test_vqtbl4q_s8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) {
+; CHECK: test_vqtbl4q_s8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
+  %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
+  %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b)
+  ret <16 x i8> %vtbl4.i
+}
+
+define <8 x i8> @test_vtbx1_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vtbx1_s8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %c)
+  %0 = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+  %1 = sext <8 x i1> %0 to <8 x i8>
+  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i)
+  ret <8 x i8> %vbsl.i
+}
+
+define <8 x i8> @test_vtbx2_s8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vtbx2_s8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1
+  %vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx17.i
+}
+
+define <8 x i8> @test_vtbx3_s8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vtbx3_s8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2
+  %vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c)
+  %0 = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
+  %1 = sext <8 x i1> %0 to <8 x i8>
+  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i)
+  ret <8 x i8> %vbsl.i
+}
+
+define <8 x i8> @test_vtbx4_s8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vtbx4_s8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2
+  %__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3
+  %vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx216.i
+}
+
+define <8 x i8> @test_vqtbx1_s8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vqtbx1_s8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c)
+  ret <8 x i8> %vtbx1.i
+}
+
+define <8 x i8> @test_vqtbx2_s8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vqtbx2_s8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
+  %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx2.i
+}
+
+define <8 x i8> @test_vqtbx3_s8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vqtbx3_s8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
+  %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx3.i
+}
+
+define <8 x i8> @test_vqtbx4_s8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vqtbx4_s8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
+  %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
+  %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx4.i
+}
+
+define <16 x i8> @test_vqtbx1q_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vqtbx1q_s8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
+  ret <16 x i8> %vtbx1.i
+}
+
+define <16 x i8> @test_vqtbx2q_s8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) {
+; CHECK: test_vqtbx2q_s8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
+  %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c)
+  ret <16 x i8> %vtbx2.i
+}
+
+define <16 x i8> @test_vqtbx3q_s8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) {
+; CHECK: test_vqtbx3q_s8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
+  %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c)
+  ret <16 x i8> %vtbx3.i
+}
+
+define <16 x i8> @test_vqtbx4q_s8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) {
+; CHECK: test_vqtbx4q_s8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
+  %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
+  %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c)
+  ret <16 x i8> %vtbx4.i
+}
+
+define <8 x i8> @test_vtbl1_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtbl1_u8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl11.i
+}
+
+define <8 x i8> @test_vqtbl1_u8(<16 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vqtbl1_u8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %a, <8 x i8> %b)
+  ret <8 x i8> %vtbl1.i
+}
+
+define <8 x i8> @test_vtbl2_u8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vtbl2_u8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1
+  %vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl17.i
+}
+
+define <8 x i8> @test_vqtbl2_u8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vqtbl2_u8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
+  %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl2.i
+}
+
+define <8 x i8> @test_vtbl3_u8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vtbl3_u8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2
+  %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl212.i
+}
+
+define <8 x i8> @test_vqtbl3_u8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vqtbl3_u8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
+  %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl3.i
+}
+
+define <8 x i8> @test_vtbl4_u8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vtbl4_u8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2
+  %__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3
+  %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl216.i
+}
+
+define <8 x i8> @test_vqtbl4_u8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vqtbl4_u8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
+  %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
+  %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl4.i
+}
+
+define <16 x i8> @test_vqtbl1q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vqtbl1q_u8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %vtbl1.i
+}
+
+define <16 x i8> @test_vqtbl2q_u8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) {
+; CHECK: test_vqtbl2q_u8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
+  %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b)
+  ret <16 x i8> %vtbl2.i
+}
+
+define <16 x i8> @test_vqtbl3q_u8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) {
+; CHECK: test_vqtbl3q_u8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
+  %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b)
+  ret <16 x i8> %vtbl3.i
+}
+
+define <16 x i8> @test_vqtbl4q_u8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) {
+; CHECK: test_vqtbl4q_u8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
+  %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
+  %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b)
+  ret <16 x i8> %vtbl4.i
+}
+
+define <8 x i8> @test_vtbx1_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vtbx1_u8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %c)
+  %0 = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+  %1 = sext <8 x i1> %0 to <8 x i8>
+  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i)
+  ret <8 x i8> %vbsl.i
+}
+
+define <8 x i8> @test_vtbx2_u8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vtbx2_u8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1
+  %vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx17.i
+}
+
+define <8 x i8> @test_vtbx3_u8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vtbx3_u8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2
+  %vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c)
+  %0 = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
+  %1 = sext <8 x i1> %0 to <8 x i8>
+  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i)
+  ret <8 x i8> %vbsl.i
+}
+
+define <8 x i8> @test_vtbx4_u8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vtbx4_u8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2
+  %__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3
+  %vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx216.i
+}
+
+define <8 x i8> @test_vqtbx1_u8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vqtbx1_u8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c)
+  ret <8 x i8> %vtbx1.i
+}
+
+define <8 x i8> @test_vqtbx2_u8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vqtbx2_u8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
+  %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx2.i
+}
+
+define <8 x i8> @test_vqtbx3_u8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vqtbx3_u8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
+  %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx3.i
+}
+
+define <8 x i8> @test_vqtbx4_u8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vqtbx4_u8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
+  %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
+  %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx4.i
+}
+
+define <16 x i8> @test_vqtbx1q_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vqtbx1q_u8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
+  ret <16 x i8> %vtbx1.i
+}
+
+define <16 x i8> @test_vqtbx2q_u8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) {
+; CHECK: test_vqtbx2q_u8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
+  %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c)
+  ret <16 x i8> %vtbx2.i
+}
+
+define <16 x i8> @test_vqtbx3q_u8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) {
+; CHECK: test_vqtbx3q_u8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
+  %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c)
+  ret <16 x i8> %vtbx3.i
+}
+
+define <16 x i8> @test_vqtbx4q_u8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) {
+; CHECK: test_vqtbx4q_u8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
+  %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
+  %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c)
+  ret <16 x i8> %vtbx4.i
+}
+
+define <8 x i8> @test_vtbl1_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtbl1_p8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl11.i
+}
+
+define <8 x i8> @test_vqtbl1_p8(<16 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vqtbl1_p8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %a, <8 x i8> %b)
+  ret <8 x i8> %vtbl1.i
+}
+
+define <8 x i8> @test_vtbl2_p8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vtbl2_p8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1
+  %vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl17.i
+}
+
+define <8 x i8> @test_vqtbl2_p8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vqtbl2_p8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
+  %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl2.i
+}
+
+define <8 x i8> @test_vtbl3_p8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vtbl3_p8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2
+  %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl212.i
+}
+
+define <8 x i8> @test_vqtbl3_p8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vqtbl3_p8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
+  %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl3.i
+}
+
+define <8 x i8> @test_vtbl4_p8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vtbl4_p8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2
+  %__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3
+  %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl216.i
+}
+
+define <8 x i8> @test_vqtbl4_p8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vqtbl4_p8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
+  %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
+  %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl4.i
+}
+
+define <16 x i8> @test_vqtbl1q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vqtbl1q_p8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %vtbl1.i
+}
+
+define <16 x i8> @test_vqtbl2q_p8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) {
+; CHECK: test_vqtbl2q_p8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
+  %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b)
+  ret <16 x i8> %vtbl2.i
+}
+
+define <16 x i8> @test_vqtbl3q_p8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) {
+; CHECK: test_vqtbl3q_p8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
+  %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b)
+  ret <16 x i8> %vtbl3.i
+}
+
+define <16 x i8> @test_vqtbl4q_p8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) {
+; CHECK: test_vqtbl4q_p8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
+  %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
+  %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b)
+  ret <16 x i8> %vtbl4.i
+}
+
+define <8 x i8> @test_vtbx1_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vtbx1_p8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %c)
+  %0 = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+  %1 = sext <8 x i1> %0 to <8 x i8>
+  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i)
+  ret <8 x i8> %vbsl.i
+}
+
+define <8 x i8> @test_vtbx2_p8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vtbx2_p8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1
+  %vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx17.i
+}
+
+define <8 x i8> @test_vtbx3_p8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vtbx3_p8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2
+  %vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c)
+  %0 = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
+  %1 = sext <8 x i1> %0 to <8 x i8>
+  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i)
+  ret <8 x i8> %vbsl.i
+}
+
+define <8 x i8> @test_vtbx4_p8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vtbx4_p8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2
+  %__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3
+  %vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx216.i
+}
+
+define <8 x i8> @test_vqtbx1_p8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vqtbx1_p8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c)
+  ret <8 x i8> %vtbx1.i
+}
+
+define <8 x i8> @test_vqtbx2_p8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vqtbx2_p8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
+  %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx2.i
+}
+
+define <8 x i8> @test_vqtbx3_p8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vqtbx3_p8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
+  %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx3.i
+}
+
+define <8 x i8> @test_vqtbx4_p8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vqtbx4_p8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
+  %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
+  %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx4.i
+}
+
+define <16 x i8> @test_vqtbx1q_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vqtbx1q_p8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
+  ret <16 x i8> %vtbx1.i
+}
+
+define <16 x i8> @test_vqtbx2q_p8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) {
+; CHECK: test_vqtbx2q_p8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
+  %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c)
+  ret <16 x i8> %vtbx2.i
+}
+
+define <16 x i8> @test_vqtbx3q_p8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) {
+; CHECK: test_vqtbx3q_p8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
+  %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c)
+  ret <16 x i8> %vtbx3.i
+}
+
+define <16 x i8> @test_vqtbx4q_p8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) {
+; CHECK: test_vqtbx4q_p8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
+  %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
+  %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c)
+  ret <16 x i8> %vtbx4.i
+}
+
diff --git a/test/CodeGen/AArch64/neon-simd-vget.ll b/test/CodeGen/AArch64/neon-simd-vget.ll
new file mode 100644
index 0000000..6474499
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-simd-vget.ll
@@ -0,0 +1,225 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <8 x i8> @test_vget_high_s8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_high_s8:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_high_s16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_high_s16:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vget_high_s32(<4 x i32> %a) {
+; CHECK-LABEL: test_vget_high_s32:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <1 x i64> @test_vget_high_s64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_high_s64:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
+  ret <1 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vget_high_u8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_high_u8:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_high_u16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_high_u16:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vget_high_u32(<4 x i32> %a) {
+; CHECK-LABEL: test_vget_high_u32:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <1 x i64> @test_vget_high_u64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_high_u64:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
+  ret <1 x i64> %shuffle.i
+}
+
+define <1 x i64> @test_vget_high_p64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_high_p64:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
+  ret <1 x i64> %shuffle.i
+}
+
+define <4 x i16> @test_vget_high_f16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_high_f16:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x float> @test_vget_high_f32(<4 x float> %a) {
+; CHECK-LABEL: test_vget_high_f32:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+  ret <2 x float> %shuffle.i
+}
+
+define <8 x i8> @test_vget_high_p8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_high_p8:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_high_p16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_high_p16:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <1 x double> @test_vget_high_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vget_high_f64:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> undef, <1 x i32> <i32 1>
+  ret <1 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vget_low_s8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_low_s8:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_low_s16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_low_s16:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vget_low_s32(<4 x i32> %a) {
+; CHECK-LABEL: test_vget_low_s32:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x i32> %shuffle.i
+}
+
+define <1 x i64> @test_vget_low_s64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_low_s64:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
+  ret <1 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vget_low_u8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_low_u8:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_low_u16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_low_u16:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vget_low_u32(<4 x i32> %a) {
+; CHECK-LABEL: test_vget_low_u32:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x i32> %shuffle.i
+}
+
+define <1 x i64> @test_vget_low_u64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_low_u64:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
+  ret <1 x i64> %shuffle.i
+}
+
+define <1 x i64> @test_vget_low_p64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_low_p64:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
+  ret <1 x i64> %shuffle.i
+}
+
+define <4 x i16> @test_vget_low_f16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_low_f16:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x float> @test_vget_low_f32(<4 x float> %a) {
+; CHECK-LABEL: test_vget_low_f32:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuffle.i
+}
+
+define <8 x i8> @test_vget_low_p8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_low_p8:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_low_p16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_low_p16:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i16> %shuffle.i
+}
+
+define <1 x double> @test_vget_low_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vget_low_f64:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> undef, <1 x i32> zeroinitializer
+  ret <1 x double> %shuffle.i
+}
diff --git a/test/CodeGen/AArch64/regress-fp128-livein.ll b/test/CodeGen/AArch64/regress-fp128-livein.ll
new file mode 100644
index 0000000..cb8432a
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-fp128-livein.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s
+
+; Regression test for NZCV reg live-in not being added to fp128csel IfTrue BB,
+; causing a crash during live range calc.
+define void @fp128_livein(i64 %a) {
+  %tobool = icmp ne i64 %a, 0
+  %conv = zext i1 %tobool to i32
+  %conv2 = sitofp i32 %conv to fp128
+  %conv6 = sitofp i32 %conv to double
+  %call3 = tail call i32 @g(fp128 %conv2)
+  %call8 = tail call i32 @h(double %conv6)
+  ret void
+}
+
+declare i32 @f()
+declare i32 @g(fp128)
+declare i32 @h(double)
diff --git a/test/CodeGen/AArch64/returnaddr.ll b/test/CodeGen/AArch64/returnaddr.ll
new file mode 100644
index 0000000..c85f9ec
--- /dev/null
+++ b/test/CodeGen/AArch64/returnaddr.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu  | FileCheck %s
+
+define i8* @rt0(i32 %x) nounwind readnone {
+entry:
+; CHECK-LABEL: rt0:
+; CHECK: mov x0, x30
+  %0 = tail call i8* @llvm.returnaddress(i32 0)
+  ret i8* %0
+}
+
+define i8* @rt2() nounwind readnone {
+entry:
+; CHECK-LABEL: rt2:
+; CHECK: ldr x[[reg:[0-9]+]], [x29]
+; CHECK: ldr x[[reg]], [x[[reg]]]
+; CHECK: ldr x0, [x[[reg]], #8]
+  %0 = tail call i8* @llvm.returnaddress(i32 2)
+  ret i8* %0
+}
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone
diff --git a/test/CodeGen/AArch64/tls-dynamics.ll b/test/CodeGen/AArch64/tls-dynamics.ll
index 887d2f8..68c481c 100644
--- a/test/CodeGen/AArch64/tls-dynamics.ll
+++ b/test/CodeGen/AArch64/tls-dynamics.ll
@@ -10,8 +10,8 @@ define i32 @test_generaldynamic() {
   ret i32 %val
 
 ; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
-; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var]
+; CHECK-DAG: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var
+; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var]
 ; CHECK: .tlsdesccall general_dynamic_var
 ; CHECK-NEXT: blr [[CALLEE]]
 
@@ -19,8 +19,8 @@ define i32 @test_generaldynamic() {
 ; CHECK: ldr w0, [x[[TP]], x0]
 
 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
-; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_LD64_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
 
 }
@@ -31,8 +31,8 @@ define i32* @test_generaldynamic_addr() {
   ret i32* @general_dynamic_var
 
 ; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
-; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var]
+; CHECK-DAG: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var
+; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var]
 ; CHECK: .tlsdesccall general_dynamic_var
 ; CHECK-NEXT: blr [[CALLEE]]
 
@@ -40,8 +40,8 @@ define i32* @test_generaldynamic_addr() {
 ; CHECK: add x0, [[TP]], x0
 
 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
-; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_LD64_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
 
 }
@@ -55,8 +55,8 @@ define i32 @test_localdynamic() {
   ret i32 %val
 
 ; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK-DAG: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
 ; CHECK: .tlsdesccall _TLS_MODULE_BASE_
 ; CHECK-NEXT: blr [[CALLEE]]
 
@@ -66,8 +66,8 @@ define i32 @test_localdynamic() {
 ; CHECK: ldr w0, [x0, [[DTP_OFFSET]]]
 
 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
-; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_LD64_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
 
 }
@@ -78,8 +78,8 @@ define i32* @test_localdynamic_addr() {
   ret i32* @local_dynamic_var
 
 ; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK-DAG: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
 ; CHECK: .tlsdesccall _TLS_MODULE_BASE_
 ; CHECK-NEXT: blr [[CALLEE]]
 
@@ -89,8 +89,8 @@ define i32* @test_localdynamic_addr() {
 ; CHECK: add x0, x0, [[DTP_OFFSET]]
 
 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
-; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_LD64_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
 
 }
@@ -110,8 +110,8 @@ define i32 @test_localdynamic_deduplicate() {
   ret i32 %sum
 
 ; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK-DAG: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
 ; CHECK: .tlsdesccall _TLS_MODULE_BASE_
 ; CHECK-NEXT: blr [[CALLEE]]
 
diff --git a/test/CodeGen/AArch64/variadic.ll b/test/CodeGen/AArch64/variadic.ll
index 1c7e3bf..f3d376b 100644
--- a/test/CodeGen/AArch64/variadic.ll
+++ b/test/CodeGen/AArch64/variadic.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 < %s | FileCheck --check-prefix=CHECK-NOFP %s
 
 %va_list = type {i8*, i8*, i8*, i32, i32}
 
@@ -9,19 +10,28 @@ declare void @llvm.va_start(i8*)
 define void @test_simple(i32 %n, ...) {
 ; CHECK-LABEL: test_simple:
 ; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
+; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
 ; CHECK: mov x[[FPRBASE:[0-9]+]], sp
 ; CHECK: str q7, [x[[FPRBASE]], #112]
 ; CHECK: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]]
 ; CHECK: str x7, [x[[GPRBASE]], #48]
 
+; CHECK-NOFP: sub sp, sp, #[[STACKSIZE:[0-9]+]]
+; CHECK-NOFP: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+; CHECK-NOFP: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]]
+; CHECK-NOFP: str x7, [x[[GPRBASE]], #48]
+; CHECK-NOFP-NOT: str q7,
+; CHECK-NOFP: str x1, [sp, #[[GPRFROMSP]]]
+
 ; Omit the middle ones
 
 ; CHECK: str q0, [sp]
 ; CHECK: str x1, [sp, #[[GPRFROMSP]]]
 
+; CHECK-NOFP-NOT: str q0, [sp]
+
   %addr = bitcast %va_list* @var to i8*
   call void @llvm.va_start(i8* %addr)
-; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
 ; CHECK: movn [[VR_OFFS:w[0-9]+]], #127
 ; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
 ; CHECK: movn [[GR_OFFS:w[0-9]+]], #55
@@ -33,6 +43,14 @@ define void @test_simple(i32 %n, ...) {
 ; CHECK: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]]
 ; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
 
+; CHECK-NOFP: str wzr, [x[[VA_LIST]], #28]
+; CHECK-NOFP: movn [[GR_OFFS:w[0-9]+]], #55
+; CHECK-NOFP: str [[GR_OFFS]], [x[[VA_LIST]], #24]
+; CHECK-NOFP: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #56
+; CHECK-NOFP: str [[GR_TOP]], [x[[VA_LIST]], #8]
+; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]]
+; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
+
   ret void
 }
 
@@ -44,11 +62,19 @@ define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
 ; CHECK: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]]
 ; CHECK: str x7, [x[[GPRBASE]], #32]
 
+; CHECK-NOFP: sub sp, sp, #[[STACKSIZE:[0-9]+]]
+; CHECK-NOFP-NOT: str q7,
+; CHECK-NOFP: mov x[[GPRBASE:[0-9]+]], sp
+; CHECK-NOFP: str x7, [x[[GPRBASE]], #24]
+
 ; Omit the middle ones
 
 ; CHECK: str q1, [sp]
 ; CHECK: str x3, [sp, #[[GPRFROMSP]]]
 
+; CHECK-NOFP-NOT: str q1, [sp]
+; CHECK-NOFP: str x4, [sp]
+
   %addr = bitcast %va_list* @var to i8*
   call void @llvm.va_start(i8* %addr)
 ; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
@@ -63,6 +89,15 @@ define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
 ; CHECK: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]]
 ; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
 
+; CHECK-NOFP: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+; CHECK-NOFP: str wzr, [x[[VA_LIST]], #28]
+; CHECK-NOFP: movn [[GR_OFFS:w[0-9]+]], #31
+; CHECK-NOFP: str [[GR_OFFS]], [x[[VA_LIST]], #24]
+; CHECK-NOFP: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #32
+; CHECK-NOFP: str [[GR_TOP]], [x[[VA_LIST]], #8]
+; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]]
+; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
+
   ret void
 }
 
@@ -75,6 +110,9 @@ define void @test_nospare([8 x i64], [8 x float], ...) {
 ; CHECK: mov [[STACK:x[0-9]+]], sp
 ; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
 
+; CHECK-NOFP-NOT: sub sp, sp
+; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #64
+; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
   ret void
 }
 
@@ -87,6 +125,10 @@ define void @test_offsetstack([10 x i64], [3 x float], ...) {
 ; CHECK: str q7, [x[[FPRBASE]], #64]
 
 ; CHECK-NOT: str x{{[0-9]+}},
+
+; CHECK-NOFP-NOT: str q7,
+; CHECK-NOT: str x7,
+
 ; Omit the middle ones
 
 ; CHECK: str q3, [sp]
@@ -102,6 +144,11 @@ define void @test_offsetstack([10 x i64], [3 x float], ...) {
 ; CHECK: add [[STACK:x[0-9]+]], sp, #96
 ; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
 
+; CHECK-NOFP: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #40
+; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
+; CHECK-NOFP: str wzr, [x[[VA_LIST]], #28]
+; CHECK-NOFP: str wzr, [x[[VA_LIST]], #24]
   ret void
 }
 
@@ -110,12 +157,14 @@ declare void @llvm.va_end(i8*)
 define void @test_va_end() nounwind {
 ; CHECK-LABEL: test_va_end:
 ; CHECK-NEXT: BB#0
+; CHECK-NOFP: BB#0
 
   %addr = bitcast %va_list* @var to i8*
   call void @llvm.va_end(i8* %addr)
 
   ret void
 ; CHECK-NEXT: ret
+; CHECK-NOFP-NEXT: ret
 }
 
 declare void @llvm.va_copy(i8* %dest, i8* %src)
@@ -131,14 +180,25 @@ define void @test_va_copy() {
 ; Check beginning and end again:
 
 ; CHECK: ldr [[BLOCK:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var]
+; CHECK: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+; CHECK-NOFP: ldr [[BLOCK:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var]
+; CHECK-NOFP: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+
 ; CHECK: str [[BLOCK]], [{{x[0-9]+}}, #:lo12:second_list]
 
+; CHECK: ldr [[BLOCK:x[0-9]+]], [x[[SRC_LIST]], #24]
 ; CHECK: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list
-; CHECK: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
 
-; CHECK: ldr [[BLOCK:x[0-9]+]], [x[[SRC_LIST]], #24]
 ; CHECK: str [[BLOCK]], [x[[DEST_LIST]], #24]
 
+; CHECK-NOFP: str [[BLOCK]], [{{x[0-9]+}}, #:lo12:second_list]
+
+; CHECK-NOFP: ldr [[BLOCK:x[0-9]+]], [x[[SRC_LIST]], #24]
+; CHECK-NOFP: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list
+
+; CHECK-NOFP: str [[BLOCK]], [x[[DEST_LIST]], #24]
+
   ret void
 ; CHECK: ret
+; CHECK-NOFP: ret
 }
diff --git a/test/CodeGen/ARM/2009-10-16-Scope.ll b/test/CodeGen/ARM/2009-10-16-Scope.ll
index dd08b56..570fcf9 100644
--- a/test/CodeGen/ARM/2009-10-16-Scope.ll
+++ b/test/CodeGen/ARM/2009-10-16-Scope.ll
@@ -24,8 +24,7 @@ declare i32 @foo(i32) ssp
 
 !0 = metadata !{i32 5, i32 2, metadata !1, null}
 !1 = metadata !{i32 458763, null, metadata !2, i32 1, i32 1, i32 0}; [DW_TAG_lexical_block ]
-!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"bar", metadata !"bar", metadata !"bar", i32 4, null, i1 false, i1 true,
-                i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0}; [DW_TAG_subprogram ]
+!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"bar", metadata !"bar", metadata !"bar", i32 4, null, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0}; [DW_TAG_subprogram ]
 !3 = metadata !{i32 458769, metadata !8, i32 12, metadata !"clang 1.1", i1 true, metadata !"", i32 0, null, metadata !9, null, null, null, metadata !""}; [DW_TAG_compile_unit ]
 !4 = metadata !{i32 459008, metadata !5, metadata !"count_", metadata !3, i32 5, metadata !6}; [ DW_TAG_auto_variable ]
 !5 = metadata !{i32 458763, null, metadata !1, i32 1, i32 1, i32 0}; [DW_TAG_lexical_block ]
diff --git a/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll b/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
index 89f468a..35739d7 100644
--- a/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
+++ b/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
@@ -13,12 +13,13 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!15}
 !0 = metadata !{i32 524545, metadata !1, metadata !"b", metadata !2, i32 93, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 524334, metadata !12, null, metadata !"__addvsi3", metadata !"__addvsi3", metadata !"__addvsi3", i32 94, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 524329, metadata !12} ; [ DW_TAG_file_type ]
 !12 = metadata !{metadata !"libgcc2.c", metadata !"/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc"}
 !3 = metadata !{i32 524305, metadata !12, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", i1 true, metadata !"", i32 0, metadata !13, metadata !13, metadata !14, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !12, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 524309, metadata !12, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6, metadata !6, metadata !6}
 !6 = metadata !{i32 524310, metadata !12, null, metadata !"SItype", i32 152, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ]
 !7 = metadata !{i32 524329, metadata !"libgcc2.h", metadata !"/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc", metadata !3} ; [ DW_TAG_file_type ]
@@ -28,3 +29,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !11 = metadata !{i32 100, i32 0, metadata !10, null}
 !13 = metadata !{i32 0}
 !14 = metadata !{metadata !1}
+!15 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
index f4ad4bc..7aacd1a 100644
--- a/test/CodeGen/ARM/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
@@ -75,9 +75,10 @@ return:                                           ; preds = %entry
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!49}
 
 !0 = metadata !{i32 786478, metadata !48, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786451, metadata !48, null, metadata !"SVal", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ]
+!1 = metadata !{i32 786451, metadata !48, null, metadata !"SVal", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [SVal] [line 1, size 128, align 64, offset 0] [def] [from ]
 !2 = metadata !{i32 786473, metadata !48} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786449, metadata !48, i32 4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !47, metadata !47, metadata !46, metadata !47,  metadata !47, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9}
@@ -86,18 +87,18 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !7 = metadata !{i32 786445, metadata !48, metadata !1, metadata !"Kind", i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ]
 !8 = metadata !{i32 786468, metadata !48, null, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !9 = metadata !{i32 786478, metadata !48, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 786453, metadata !48, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!10 = metadata !{i32 786453, metadata !48, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !11 = metadata !{null, metadata !12, metadata !13}
 !12 = metadata !{i32 786447, metadata !48, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
 !13 = metadata !{i32 786468, metadata !48, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 786453, metadata !48, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!14 = metadata !{i32 786453, metadata !48, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !15 = metadata !{null, metadata !12}
 !16 = metadata !{i32 786478, metadata !48, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !17 = metadata !{i32 786478, metadata !48, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!18 = metadata !{i32 786453, metadata !48, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!18 = metadata !{i32 786453, metadata !48, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !19 = metadata !{metadata !13, metadata !13, metadata !1}
 !20 = metadata !{i32 786478, metadata !48, metadata !2, metadata !"main", metadata !"main", metadata !"main", i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!21 = metadata !{i32 786453, metadata !48, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!21 = metadata !{i32 786453, metadata !48, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !22 = metadata !{metadata !13}
 !23 = metadata !{i32 786689, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !24 = metadata !{i32 16, i32 0, metadata !17, null}
@@ -125,3 +126,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !46 = metadata !{metadata !0, metadata !9, metadata !16, metadata !17, metadata !20}
 !47 = metadata !{i32 0}
 !48 = metadata !{metadata !"small.cc", metadata !"/Users/manav/R8248330"}
+!49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll b/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll
index e6d1518..3053694 100644
--- a/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll
+++ b/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll
@@ -1,10 +1,79 @@
+; This tests that MC/asm header conversion is smooth and that the
+; build attributes are correct
+
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi | FileCheck %s --check-prefix=V6
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi | FileCheck %s --check-prefix=V6M
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s | FileCheck %s --check-prefix=ARM1156T2F-S
+; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi | FileCheck %s --check-prefix=V7M
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=V7
 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi | FileCheck %s --check-prefix=V8
 ; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi | FileCheck %s --check-prefix=Vt8
-; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=+v8fp | FileCheck %s --check-prefix=V8-V8FP
-; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=+neon | FileCheck %s --check-prefix=V8-NEON
-; This tests that MC/asm header conversion is smooth
-;
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-neon,-crypto | FileCheck %s --check-prefix=V8-FPARMv8
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-fp-armv8,-crypto | FileCheck %s --check-prefix=V8-NEON
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-crypto | FileCheck %s --check-prefix=V8-FPARMv8-NEON
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi | FileCheck %s --check-prefix=V8-FPARMv8-NEON-CRYPTO
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-A9-SOFT
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-A9-HARD
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9-mp | FileCheck %s --check-prefix=CORTEX-A9-MP
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15 | FileCheck %s --check-prefix=CORTEX-A15
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=CORTEX-M0
+; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-M4-SOFT
+; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-M4-HARD
+; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5 | FileCheck %s --check-prefix=CORTEX-R5
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53 | FileCheck %s --check-prefix=CORTEX-A53
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a57 | FileCheck %s --check-prefix=CORTEX-A57
+
+; V6:   .eabi_attribute 6, 6
+; V6:   .eabi_attribute 8, 1
+; V6:   .eabi_attribute 24, 1
+; V6:   .eabi_attribute 25, 1
+; V6-NOT:   .eabi_attribute 27
+; V6-NOT:   .eabi_attribute 28
+; V6-NOT:    .eabi_attribute 36
+; V6-NOT:    .eabi_attribute 42
+; V6-NOT:    .eabi_attribute 68
+
+; V6M:  .eabi_attribute 6, 12
+; V6M:  .eabi_attribute 7, 77
+; V6M:  .eabi_attribute 8, 0
+; V6M:  .eabi_attribute 9, 1
+; V6M:  .eabi_attribute 24, 1
+; V6M:  .eabi_attribute 25, 1
+; V6M-NOT:  .eabi_attribute 27
+; V6M-NOT:  .eabi_attribute 28
+; V6M-NOT:  .eabi_attribute 36
+; V6M-NOT:  .eabi_attribute 42
+; V6M-NOT:  .eabi_attribute 68
+
+; ARM1156T2F-S: .cpu arm1156t2f-s
+; ARM1156T2F-S: .eabi_attribute 6, 8
+; ARM1156T2F-S: .eabi_attribute 8, 1
+; ARM1156T2F-S: .eabi_attribute 9, 2
+; ARM1156T2F-S: .fpu vfpv2
+; ARM1156T2F-S: .eabi_attribute 20, 1
+; ARM1156T2F-S: .eabi_attribute 21, 1
+; ARM1156T2F-S: .eabi_attribute 23, 3
+; ARM1156T2F-S: .eabi_attribute 24, 1
+; ARM1156T2F-S: .eabi_attribute 25, 1
+; ARM1156T2F-S-NOT: .eabi_attribute 27
+; ARM1156T2F-S-NOT: .eabi_attribute 28
+; ARM1156T2F-S-NOT: .eabi_attribute 36
+; ARM1156T2F-S-NOT:    .eabi_attribute 42
+; ARM1156T2F-S-NOT:    .eabi_attribute 68
+
+; V7M:  .eabi_attribute 6, 10
+; V7M:  .eabi_attribute 7, 77
+; V7M:  .eabi_attribute 8, 0
+; V7M:  .eabi_attribute 9, 2
+; V7M:  .eabi_attribute 24, 1
+; V7M:  .eabi_attribute 25, 1
+; V7M-NOT:  .eabi_attribute 27
+; V7M-NOT:  .eabi_attribute 28
+; V7M-NOT:  .eabi_attribute 36
+; V7M-NOT:  .eabi_attribute 42
+; V7M:  .eabi_attribute 44, 0
+; V7M-NOT:  .eabi_attribute 68
+
 ; V7:      .syntax unified
 ; V7: .eabi_attribute 6, 10
 ; V7: .eabi_attribute 20, 1
@@ -12,6 +81,11 @@
 ; V7: .eabi_attribute 23, 3
 ; V7: .eabi_attribute 24, 1
 ; V7: .eabi_attribute 25, 1
+; V7-NOT: .eabi_attribute 27
+; V7-NOT: .eabi_attribute 28
+; V7-NOT: .eabi_attribute 36
+; V7-NOT:    .eabi_attribute 42
+; V7-NOT:    .eabi_attribute 68
 
 ; V8:      .syntax unified
 ; V8: .eabi_attribute 6, 14
@@ -19,14 +93,193 @@
 ; Vt8:     .syntax unified
 ; Vt8: .eabi_attribute 6, 14
 
-; V8-V8FP:      .syntax unified
-; V8-V8FP: .eabi_attribute 6, 14
-; V8-V8FP: .eabi_attribute 10, 7
+; V8-FPARMv8:      .syntax unified
+; V8-FPARMv8: .eabi_attribute 6, 14
+; V8-FPARMv8: .fpu fp-armv8
 
 ; V8-NEON:      .syntax unified
 ; V8-NEON: .eabi_attribute 6, 14
+; V8-NEON: .fpu neon
 ; V8-NEON: .eabi_attribute 12, 3
 
+; V8-FPARMv8-NEON:      .syntax unified
+; V8-FPARMv8-NEON: .eabi_attribute 6, 14
+; V8-FPARMv8-NEON: .fpu neon-fp-armv8
+; V8-FPARMv8-NEON: .eabi_attribute 12, 3
+
+; V8-FPARMv8-NEON-CRYPTO:      .syntax unified
+; V8-FPARMv8-NEON-CRYPTO: .eabi_attribute 6, 14
+; V8-FPARMv8-NEON-CRYPTO: .fpu crypto-neon-fp-armv8
+; V8-FPARMv8-NEON-CRYPTO: .eabi_attribute 12, 3
+
+; CORTEX-A9-SOFT:  .cpu cortex-a9
+; CORTEX-A9-SOFT:  .eabi_attribute 6, 10
+; CORTEX-A9-SOFT:  .eabi_attribute 7, 65
+; CORTEX-A9-SOFT:  .eabi_attribute 8, 1
+; CORTEX-A9-SOFT:  .eabi_attribute 9, 2
+; CORTEX-A9-SOFT:  .fpu neon
+; CORTEX-A9-SOFT:  .eabi_attribute 20, 1
+; CORTEX-A9-SOFT:  .eabi_attribute 21, 1
+; CORTEX-A9-SOFT:  .eabi_attribute 23, 3
+; CORTEX-A9-SOFT:  .eabi_attribute 24, 1
+; CORTEX-A9-SOFT:  .eabi_attribute 25, 1
+; CORTEX-A9-SOFT-NOT:  .eabi_attribute 27
+; CORTEX-A9-SOFT-NOT:  .eabi_attribute 28
+; CORTEX-A9-SOFT:  .eabi_attribute 36, 1
+; CORTEX-A9-SOFT-NOT:  .eabi_attribute 42
+; CORTEX-A9-SOFT:  .eabi_attribute 68, 1
+
+; CORTEX-A9-HARD:  .cpu cortex-a9
+; CORTEX-A9-HARD:  .eabi_attribute 6, 10
+; CORTEX-A9-HARD:  .eabi_attribute 7, 65
+; CORTEX-A9-HARD:  .eabi_attribute 8, 1
+; CORTEX-A9-HARD:  .eabi_attribute 9, 2
+; CORTEX-A9-HARD:  .fpu neon
+; CORTEX-A9-HARD:  .eabi_attribute 20, 1
+; CORTEX-A9-HARD:  .eabi_attribute 21, 1
+; CORTEX-A9-HARD:  .eabi_attribute 23, 3
+; CORTEX-A9-HARD:  .eabi_attribute 24, 1
+; CORTEX-A9-HARD:  .eabi_attribute 25, 1
+; CORTEX-A9-HARD-NOT:  .eabi_attribute 27
+; CORTEX-A9-HARD:  .eabi_attribute 28, 1
+; CORTEX-A9-HARD:  .eabi_attribute 36, 1
+; CORTEX-A9-HARD-NOT:  .eabi_attribute 42
+; CORTEX-A9-HARD:  .eabi_attribute 68, 1
+
+; CORTEX-A9-MP:  .cpu cortex-a9-mp
+; CORTEX-A9-MP:  .eabi_attribute 6, 10
+; CORTEX-A9-MP:  .eabi_attribute 7, 65
+; CORTEX-A9-MP:  .eabi_attribute 8, 1
+; CORTEX-A9-MP:  .eabi_attribute 9, 2
+; CORTEX-A9-MP:  .fpu neon
+; CORTEX-A9-MP:  .eabi_attribute 20, 1
+; CORTEX-A9-MP:  .eabi_attribute 21, 1
+; CORTEX-A9-MP:  .eabi_attribute 23, 3
+; CORTEX-A9-MP:  .eabi_attribute 24, 1
+; CORTEX-A9-MP:  .eabi_attribute 25, 1
+; CORTEX-A9-NOT:  .eabi_attribute 27
+; CORTEX-A9-NOT:  .eabi_attribute 28
+; CORTEX-A9-MP:  .eabi_attribute 36, 1
+; CORTEX-A9-MP:  .eabi_attribute 42, 1
+; CORTEX-A9-MP:  .eabi_attribute 68, 1
+
+; CORTEX-A15: .cpu cortex-a15
+; CORTEX-A15: .eabi_attribute 6, 10
+; CORTEX-A15: .eabi_attribute 7, 65
+; CORTEX-A15: .eabi_attribute 8, 1
+; CORTEX-A15: .eabi_attribute 9, 2
+; CORTEX-A15: .fpu neon-vfpv4
+; CORTEX-A15: .eabi_attribute 20, 1
+; CORTEX-A15: .eabi_attribute 21, 1
+; CORTEX-A15: .eabi_attribute 23, 3
+; CORTEX-A15: .eabi_attribute 24, 1
+; CORTEX-A15: .eabi_attribute 25, 1
+; CORTEX-A15-NOT: .eabi_attribute 27
+; CORTEX-A15-NOT: .eabi_attribute 28
+; CORTEX-A15: .eabi_attribute 36, 1
+; CORTEX-A15: .eabi_attribute 42, 1
+; CORTEX-A15: .eabi_attribute 44, 2
+; CORTEX-A15: .eabi_attribute 68, 3
+
+; CORTEX-M0:  .cpu cortex-m0
+; CORTEX-M0:  .eabi_attribute 6, 12
+; CORTEX-M0:  .eabi_attribute 7, 77
+; CORTEX-M0:  .eabi_attribute 8, 0
+; CORTEX-M0:  .eabi_attribute 9, 1
+; CORTEX-M0:  .eabi_attribute 24, 1
+; CORTEX-M0:  .eabi_attribute 25, 1
+; CORTEX-M0-NOT:  .eabi_attribute 27
+; CORTEX-M0-NOT:  .eabi_attribute 28
+; CORTEX-M0-NOT:  .eabi_attribute 36
+; CORTEX-M0-NOT:  .eabi_attribute 42
+; CORTEX-M0-NOT:  .eabi_attribute 68
+
+; CORTEX-M4-SOFT:  .cpu cortex-m4
+; CORTEX-M4-SOFT:  .eabi_attribute 6, 13
+; CORTEX-M4-SOFT:  .eabi_attribute 7, 77
+; CORTEX-M4-SOFT:  .eabi_attribute 8, 0
+; CORTEX-M4-SOFT:  .eabi_attribute 9, 2
+; CORTEX-M4-SOFT:  .fpu vfpv4-d16
+; CORTEX-M4-SOFT:  .eabi_attribute 20, 1
+; CORTEX-M4-SOFT:  .eabi_attribute 21, 1
+; CORTEX-M4-SOFT:  .eabi_attribute 23, 3
+; CORTEX-M4-SOFT:  .eabi_attribute 24, 1
+; CORTEX-M4-SOFT:  .eabi_attribute 25, 1
+; CORTEX-M4-SOFT:  .eabi_attribute 27, 1
+; CORTEX-M4-SOFT-NOT:  .eabi_attribute 28
+; CORTEX-M4-SOFT:  .eabi_attribute 36, 1
+; CORTEX-M4-SOFT-NOT:  .eabi_attribute 42
+; CORTEX-M4-SOFT:  .eabi_attribute 44, 0
+; CORTEX-M4-SOFT-NOT:  .eabi_attribute 68
+
+; CORTEX-M4-HARD:  .cpu cortex-m4
+; CORTEX-M4-HARD:  .eabi_attribute 6, 13
+; CORTEX-M4-HARD:  .eabi_attribute 7, 77
+; CORTEX-M4-HARD:  .eabi_attribute 8, 0
+; CORTEX-M4-HARD:  .eabi_attribute 9, 2
+; CORTEX-M4-HARD:  .fpu vfpv4-d16
+; CORTEX-M4-HARD:  .eabi_attribute 20, 1
+; CORTEX-M4-HARD:  .eabi_attribute 21, 1
+; CORTEX-M4-HARD:  .eabi_attribute 23, 3
+; CORTEX-M4-HARD:  .eabi_attribute 24, 1
+; CORTEX-M4-HARD:  .eabi_attribute 25, 1
+; CORTEX-M4-HARD:  .eabi_attribute 27, 1
+; CORTEX-M4-HARD:  .eabi_attribute 28, 1
+; CORTEX-M4-HARD:  .eabi_attribute 36, 1
+; CORTEX-M4-HARD-NOT:  .eabi_attribute 42
+; CORTEX-M4-HARD:  .eabi_attribute 44, 0
+; CORTEX-M4-HRAD-NOT:  .eabi_attribute 68
+
+; CORTEX-R5:  .cpu cortex-r5
+; CORTEX-R5:  .eabi_attribute 6, 10
+; CORTEX-R5:  .eabi_attribute 7, 82
+; CORTEX-R5:  .eabi_attribute 8, 1
+; CORTEX-R5:  .eabi_attribute 9, 2
+; CORTEX-R5:  .fpu vfpv3-d16
+; CORTEX-R5:  .eabi_attribute 20, 1
+; CORTEX-R5:  .eabi_attribute 21, 1
+; CORTEX-R5:  .eabi_attribute 23, 3
+; CORTEX-R5:  .eabi_attribute 24, 1
+; CORTEX-R5:  .eabi_attribute 25, 1
+; CORTEX-R5:  .eabi_attribute 27, 1
+; CORTEX-R5-NOT:  .eabi_attribute 28
+; CORTEX-R5-NOT:  .eabi_attribute 36
+; CORTEX-R5-NOT:  .eabi_attribute 42
+; CORTEX-R5:  .eabi_attribute 44, 2
+; CORTEX-R5-NOT:  .eabi_attribute 68
+
+; CORTEX-A53:  .cpu cortex-a53
+; CORTEX-A53:  .eabi_attribute 6, 14
+; CORTEX-A53:  .eabi_attribute 7, 65
+; CORTEX-A53:  .eabi_attribute 8, 1
+; CORTEX-A53:  .eabi_attribute 9, 2
+; CORTEX-A53:  .fpu crypto-neon-fp-armv8
+; CORTEX-A53:  .eabi_attribute 12, 3
+; CORTEX-A53:  .eabi_attribute 24, 1
+; CORTEX-A53:  .eabi_attribute 25, 1
+; CORTEX-A53-NOT:  .eabi_attribute 27
+; CORTEX-A53-NOT:  .eabi_attribute 28
+; CORTEX-A53:  .eabi_attribute 36, 1
+; CORTEX-A53:  .eabi_attribute 42, 1
+; CORTEX-A53:  .eabi_attribute 44, 2
+; CORTEX-A53:  .eabi_attribute 68, 3
+
+; CORTEX-A57:  .cpu cortex-a57
+; CORTEX-A57:  .eabi_attribute 6, 14
+; CORTEX-A57:  .eabi_attribute 7, 65
+; CORTEX-A57:  .eabi_attribute 8, 1
+; CORTEX-A57:  .eabi_attribute 9, 2
+; CORTEX-A57:  .fpu crypto-neon-fp-armv8
+; CORTEX-A57:  .eabi_attribute 12, 3
+; CORTEX-A57:  .eabi_attribute 24, 1
+; CORTEX-A57:  .eabi_attribute 25, 1
+; CORTEX-A57-NOT:  .eabi_attribute 27
+; CORTEX-A57-NOT:  .eabi_attribute 28
+; CORTEX-A57:  .eabi_attribute 36, 1
+; CORTEX-A57:  .eabi_attribute 42, 1
+; CORTEX-A57:  .eabi_attribute 44, 2
+; CORTEX-A57:  .eabi_attribute 68, 3
+
 define i32 @f(i64 %z) {
 	ret i32 0
 }
diff --git a/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll b/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
deleted file mode 100644
index d19adcc..0000000
--- a/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
+++ /dev/null
@@ -1,47 +0,0 @@
-; RUN: llc  %s -mtriple=arm-linux-gnueabi -filetype=obj -o - | \
-; RUN:    llvm-readobj -s -sd | FileCheck  -check-prefix=BASIC %s 
-; RUN: llc  %s -mtriple=armv7-linux-gnueabi -march=arm -mcpu=cortex-a8 \
-; RUN:    -mattr=-neon,-vfp3,+vfp2 \
-; RUN:    -arm-reserve-r9 -filetype=obj -o - | \
-; RUN:    llvm-readobj -s -sd | FileCheck  -check-prefix=CORTEXA8 %s
-
-
-; This tests that the extpected ARM attributes are emitted.
-;
-; BASIC:        Section {
-; BASIC:          Name: .ARM.attributes
-; BASIC-NEXT:     Type: SHT_ARM_ATTRIBUTES
-; BASIC-NEXT:     Flags [ (0x0)
-; BASIC-NEXT:     ]
-; BASIC-NEXT:     Address: 0x0
-; BASIC-NEXT:     Offset: 0x3C
-; BASIC-NEXT:     Size: 28
-; BASIC-NEXT:     Link: 0
-; BASIC-NEXT:     Info: 0
-; BASIC-NEXT:     AddressAlignment: 1
-; BASIC-NEXT:     EntrySize: 0
-; BASIC-NEXT:     SectionData (
-; BASIC-NEXT:       0000: 411B0000 00616561 62690001 11000000
-; BASIC-NEXT:       0010: 06011401 15011703 18011901
-; BASIC-NEXT:     )
-
-; CORTEXA8:        Name: .ARM.attributes
-; CORTEXA8-NEXT:     Type: SHT_ARM_ATTRIBUTES
-; CORTEXA8-NEXT:     Flags [ (0x0)
-; CORTEXA8-NEXT:     ]
-; CORTEXA8-NEXT:     Address: 0x0
-; CORTEXA8-NEXT:     Offset: 0x3C
-; CORTEXA8-NEXT:     Size: 47
-; CORTEXA8-NEXT:     Link: 0
-; CORTEXA8-NEXT:     Info: 0
-; CORTEXA8-NEXT:     AddressAlignment: 1
-; CORTEXA8-NEXT:     EntrySize: 0
-; CORTEXA8-NEXT:     SectionData (
-; CORTEXA8-NEXT:       0000: 412E0000 00616561 62690001 24000000
-; CORTEXA8-NEXT:       0010: 05434F52 5445582D 41380006 0A074108
-; CORTEXA8-NEXT:       0020: 0109020A 02140115 01170318 011901
-; CORTEXA8-NEXT:     )
-
-define i32 @f(i64 %z) {
-       ret i32 0
-}
diff --git a/test/CodeGen/ARM/2010-11-30-reloc-movt.ll b/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
deleted file mode 100644
index 6bea7b8..0000000
--- a/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: llc  %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \
-; RUN:    llvm-readobj -s -sr -sd | FileCheck  -check-prefix=OBJ %s
-
-target triple = "armv7-none-linux-gnueabi"
-
-@a = external global i8
-
-define arm_aapcs_vfpcc i32 @barf() nounwind {
-entry:
-  %0 = tail call arm_aapcs_vfpcc  i32 @foo(i8* @a) nounwind
-  ret i32 %0
-; OBJ:        Section {
-; OBJ:          Name: .text
-; OBJ:          SectionData (
-; OBJ-NEXT:       0000: 00482DE9 000000E3 000040E3 FEFFFFEB
-; OBJ-NEXT:       0010: 0088BDE8
-; OBJ-NEXT:     )
-; OBJ:          Relocations [
-; OBJ-NEXT:       0x4 R_ARM_MOVW_ABS_NC a
-; OBJ-NEXT:       0x8 R_ARM_MOVT_ABS
-; OBJ-NEXT:       0xC R_ARM_CALL foo
-; OBJ-NEXT:     ]
-
-}
-
-declare arm_aapcs_vfpcc i32 @foo(i8*)
-
diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
index 626d121..f57411b 100644
--- a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
@@ -76,11 +76,12 @@ entry:
 }
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!49}
 
 !0 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"get1", metadata !"get1", metadata !"get1", i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get1, null, null, metadata !42, i32 4} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !47, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, metadata !"", i32 0, metadata !48, metadata !48, metadata !40, metadata !41,  metadata !41, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !47, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !47, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5, metadata !5}
 !5 = metadata !{i32 786468, metadata !47, metadata !1, metadata !"_Bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"get2", metadata !"get2", metadata !"get2", i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get2, null, null, metadata !43, i32 7} ; [ DW_TAG_subprogram ]
@@ -126,3 +127,4 @@ entry:
 !46 = metadata !{metadata !27, metadata !28}
 !47 = metadata !{metadata !"foo.c", metadata !"/tmp/"}
 !48 = metadata !{i32 0}
+!49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
index f689d49..bc72e12 100644
--- a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
+++ b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
@@ -21,8 +21,8 @@ for.body:                                         ; preds = %_Z14printIsNotZeroi
   %x = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 0
   %y = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 1
   %inc = add i32 %i.022, 1
-  %tmp8 = load i32* %x, align 4, !tbaa !0
-  %tmp11 = load i32* %y, align 4, !tbaa !0
+  %tmp8 = load i32* %x, align 4
+  %tmp11 = load i32* %y, align 4
   %mul = mul nsw i32 %tmp11, %tmp8
   %tobool.i14 = icmp eq i32 %mul, 0
   br i1 %tobool.i14, label %_Z14printIsNotZeroi.exit17, label %if.then.i16
@@ -35,15 +35,10 @@ _Z14printIsNotZeroi.exit17:                       ; preds = %_Z14printIsNotZeroi
 
 _Z14printIsNotZeroi.exit17.for.body_crit_edge:    ; preds = %_Z14printIsNotZeroi.exit17
   %b.phi.trans.insert = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %inc, i32 3
-  %tmp3.pre = load i8* %b.phi.trans.insert, align 1, !tbaa !3
+  %tmp3.pre = load i8* %b.phi.trans.insert, align 1
   %phitmp27 = icmp eq i8 undef, 0
   br label %for.body
 
 for.end:                                          ; preds = %_Z14printIsNotZeroi.exit17
   ret void
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"bool", metadata !1}
diff --git a/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll b/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
index 348ec9f..e30c9c6 100644
--- a/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
+++ b/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
@@ -15,15 +15,14 @@ for.cond:
 
 for.body:
 ; CHECK: %for.
-; CHECK: movs r{{[0-9]+}}, #{{[01]}}
+; CHECK: mov{{.*}} r{{[0-9]+}}, #{{[01]}}
+; CHECK: mov{{.*}} r{{[0-9]+}}, #{{[01]}}
+; CHECK-NOT: mov r{{[0-9]+}}, #{{[01]}}
   %arrayidx = getelementptr i32* %A, i32 %0
   %tmp4 = load i32* %arrayidx, align 4
   %cmp6 = icmp eq i32 %tmp4, %value
   br i1 %cmp6, label %return, label %for.inc
 
-; CHECK: %for.
-; CHECK: movs r{{[0-9]+}}, #{{[01]}}
-
 for.inc:
   %inc = add i32 %0, 1
   br label %for.cond
diff --git a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
index 33826f8..bb78707 100644
--- a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
@@ -73,17 +73,18 @@ define i32 @get5(i32 %a) nounwind optsize ssp {
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!49}
 
 !0 = metadata !{i32 786449, metadata !47, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, metadata !48, metadata !48, metadata !40, metadata !41,  metadata !41, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"get1", metadata !"get1", metadata !"", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get1, null, null, metadata !42, i32 5} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"get1", metadata !"get1", metadata !"", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @get1, null, null, metadata !42, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [get1]
 !2 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !47, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !47, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"get2", metadata !"get2", metadata !"", i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get2, null, null, metadata !43, i32 8} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"get3", metadata !"get3", metadata !"", i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get3, null, null, metadata !44, i32 11} ; [ DW_TAG_subprogram ]
-!8 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"get4", metadata !"get4", metadata !"", i32 14, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get4, null, null, metadata !45, i32 14} ; [ DW_TAG_subprogram ]
-!9 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"get5", metadata !"get5", metadata !"", i32 17, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get5, null, null, metadata !46, i32 17} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"get2", metadata !"get2", metadata !"", i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @get2, null, null, metadata !43, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [get2]
+!7 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"get3", metadata !"get3", metadata !"", i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @get3, null, null, metadata !44, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [get3]
+!8 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"get4", metadata !"get4", metadata !"", i32 14, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @get4, null, null, metadata !45, i32 14} ; [ DW_TAG_subprogram ] [line 14] [def] [get4]
+!9 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"get5", metadata !"get5", metadata !"", i32 17, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @get5, null, null, metadata !46, i32 17} ; [ DW_TAG_subprogram ] [line 17] [def] [get5]
 !10 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 16777221, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !11 = metadata !{i32 786688, metadata !12, metadata !"b", metadata !2, i32 5, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
 !12 = metadata !{i32 786443, metadata !47, metadata !1, i32 5, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
@@ -123,3 +124,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !46 = metadata !{metadata !27, metadata !28}
 !47 = metadata !{metadata !"ss3.c", metadata !"/private/tmp"}
 !48 = metadata !{i32 0}
+!49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
index 91de08a..9163166 100644
--- a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
+++ b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
@@ -42,7 +42,7 @@ if.then:                                          ; preds = %land.lhs.true
 ; If-convert the return
 ; CHECK: it	ne
 ; Fold the CSR+return into a pop
-; CHECK: pop {r4, r5, r6, r7, pc}
+; CHECK: pop {r4, r5, r7, pc}
 sw.bb18:
   %call20 = tail call i32 @bar(i32 %in2) nounwind
   switch i32 %call20, label %sw.default56 [
diff --git a/test/CodeGen/ARM/2011-10-26-memset-inline.ll b/test/CodeGen/ARM/2011-10-26-memset-inline.ll
index ff049c8..03614ed 100644
--- a/test/CodeGen/ARM/2011-10-26-memset-inline.ll
+++ b/test/CodeGen/ARM/2011-10-26-memset-inline.ll
@@ -10,8 +10,8 @@ target triple = "thumbv7-apple-ios5.0.0"
 ; CHECK-GENERIT-NEXT: strb
 ; CHECK-GENERIT-NEXT: strb
 ; CHECK-GENERIT-NEXT: strb
-; CHECK-UNALIGNED:      strb
-; CHECK-UNALIGNED-NEXT: str 
+; CHECK-UNALIGNED:    strb
+; CHECK-UNALIGNED:    str
 define void @foo(i8* nocapture %c) nounwind optsize {
 entry:
   call void @llvm.memset.p0i8.i64(i8* %c, i8 -1, i64 5, i32 1, i1 false)
diff --git a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
index f563eee..850c511 100644
--- a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
+++ b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s
 
 ; Trigger multiple NEON stores.
-; CHECK:      vst1.64
-; CHECK-NEXT: vst1.64
+; CHECK: vst1.64
+; CHECK: vst1.64
 define void @f_0_40(i8* nocapture %c) nounwind optsize {
 entry:
   call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 40, i32 16, i1 false)
diff --git a/test/CodeGen/ARM/2012-08-30-select.ll b/test/CodeGen/ARM/2012-08-30-select.ll
index 2fd8df4..e78bbde 100644
--- a/test/CodeGen/ARM/2012-08-30-select.ll
+++ b/test/CodeGen/ARM/2012-08-30-select.ll
@@ -5,14 +5,11 @@
 ;CHECK: it  ne
 ;CHECK-NEXT: vmovne.i32
 ;CHECK: bx
-define <16 x i8> @select_s_v_v(i32 %avail, i8* %bar) {
+define <16 x i8> @select_s_v_v(<16 x i8> %vec, i32 %avail) {
 entry:
-  %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %bar, i32 1)
   %and = and i32 %avail, 1
   %tobool = icmp eq i32 %and, 0
-  %vld1. = select i1 %tobool, <16 x i8> %vld1, <16 x i8> zeroinitializer
-  ret <16 x i8> %vld1.
+  %ret = select i1 %tobool, <16 x i8> %vec, <16 x i8> zeroinitializer
+  ret <16 x i8> %ret
 }
 
-declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* , i32 )
-
diff --git a/test/CodeGen/ARM/2013-02-27-expand-vfma.ll b/test/CodeGen/ARM/2013-02-27-expand-vfma.ll
index 135b144..f812118 100644
--- a/test/CodeGen/ARM/2013-02-27-expand-vfma.ll
+++ b/test/CodeGen/ARM/2013-02-27-expand-vfma.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=armv7s-apple-darwin | FileCheck %s -check-prefix=VFP4
+; RUN: llc < %s -mtriple=armv7s-apple-darwin | FileCheck %s -check-prefix=CHECK-VFP4
 
 define <4 x float> @muladd(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind {
 ; CHECK-LABEL: muladd:
diff --git a/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll b/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll
index 2eeebac..c4f5f54 100644
--- a/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll
+++ b/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll
@@ -1,4 +1,6 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8 | FileCheck -check-prefix=CHECK-V8 %s
+; RUN: llc < %s -mtriple=thumbv7 -arm-restrict-it | FileCheck -check-prefix=CHECK-V8 %s
 ; rdar://13782395
 
 define i32 @t1(i32 %a, i32 %b, i8** %retaddr) {
@@ -81,7 +83,7 @@ KBBlockZero.exit:                                 ; preds = %bb2.i
 ; <rdar://problem/14379453>
 
 ; Hard-coded registers comes from the ABI.
-; CHECK: wrapDistance:
+; CHECK-LABEL: wrapDistance:
 ; CHECK: cmp r1, #59
 ; CHECK-NEXT: itt le
 ; CHECK-NEXT: suble r0, r2, #1
@@ -100,6 +102,27 @@ KBBlockZero.exit:                                 ; preds = %bb2.i
 ; CHECK: [[LABEL]]:
 ; CHECK-NEXT: subs r0, r1, r0
 ; CHECK-NEXT: bx lr
+
+; CHECK-V8-LABEL: wrapDistance:
+; CHECK-V8: cmp r1, #59
+; CHECK-V8-NEXT: bgt
+; CHECK-V8-NEXT: %if.then
+; CHECK-V8-NEXT: subs r0, r2, #1
+; CHECK-V8-NEXT: bx lr
+; CHECK-V8-NEXT: %if.else
+; CHECK-V8-NEXT: subs [[REG:r[0-9]+]], #120
+; CHECK-V8-NEXT: cmp [[REG]], r1
+; CHECK-V8-NEXT: bge
+; CHECK-V8-NEXT: %if.else
+; CHECK-V8-NEXT: cmp r0, #119
+; CHECK-V8-NEXT: bgt
+; CHECK-V8-NEXT: %if.then4
+; CHECK-V8-NEXT: adds r0, r1, #1
+; CHECK-V8-NEXT: bx lr
+; CHECK-V8-NEXT: %if.end5
+; CHECK-V8-NEXT: subs r0, r1, r0
+; CHECK-V8-NEXT: bx lr
+
 define i32 @wrapDistance(i32 %tx, i32 %sx, i32 %w) {
 entry:
   %cmp = icmp slt i32 %sx, 60
diff --git a/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll b/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll
new file mode 100644
index 0000000..defb946
--- /dev/null
+++ b/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -march=thumb -mattr=+v7,+thumb2 | FileCheck %s
+
+define i8 @f1(i8* %call1, i8* %call3, i32 %h, i32 %w, i32 %Width) {
+; CHECK: f1:
+entry:
+        %mul17 = mul nsw i32 %Width, %h
+        %add = add nsw i32 %mul17, %w
+        %sub19 = sub i32 %add, %Width
+        %sub20 = add i32 %sub19, -1
+        %arrayidx21 = getelementptr inbounds i8* %call1, i32 %sub20
+        %0 = load i8* %arrayidx21, align 1
+        %conv22 = zext i8 %0 to i32
+        %arrayidx25 = getelementptr inbounds i8* %call1, i32 %sub19
+        %1 = load i8* %arrayidx25, align 1
+        %conv26 = zext i8 %1 to i32
+        %mul23189 = add i32 %conv26, %conv22
+        %add30 = add i32 %sub19, 1
+        %arrayidx31 = getelementptr inbounds i8* %call1, i32 %add30
+        %2 = load i8* %arrayidx31, align 1
+        %conv32 = zext i8 %2 to i32
+; CHECK: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #-1]
+; CHECK-NEXT: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #1]
+        %add28190 = add i32 %mul23189, %conv32
+        %sub35 = add i32 %add, -1
+        %arrayidx36 = getelementptr inbounds i8* %call1, i32 %sub35
+        %3 = load i8* %arrayidx36, align 1
+        %conv37 = zext i8 %3 to i32
+        %add34191 = add i32 %add28190, %conv37
+        %arrayidx40 = getelementptr inbounds i8* %call1, i32 %add
+        %4 = load i8* %arrayidx40, align 1
+        %conv41 = zext i8 %4 to i32
+        %mul42 = mul nsw i32 %conv41, 255
+        %add44 = add i32 %add, 1
+        %arrayidx45 = getelementptr inbounds i8* %call1, i32 %add44
+        %5 = load i8* %arrayidx45, align 1
+        %conv46 = zext i8 %5 to i32
+; CHECK: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #-1]
+; CHECK-NEXT: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #1]
+        %add49 = add i32 %add, %Width
+        %sub50 = add i32 %add49, -1
+        %arrayidx51 = getelementptr inbounds i8* %call1, i32 %sub50
+        %6 = load i8* %arrayidx51, align 1
+        %conv52 = zext i8 %6 to i32
+        %arrayidx56 = getelementptr inbounds i8* %call1, i32 %add49
+        %7 = load i8* %arrayidx56, align 1
+        %conv57 = zext i8 %7 to i32
+        %add61 = add i32 %add49, 1
+        %arrayidx62 = getelementptr inbounds i8* %call1, i32 %add61
+        %8 = load i8* %arrayidx62, align 1
+        %conv63 = zext i8 %8 to i32
+; CHECK: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #-1]
+; CHECK-NEXT: ldrb{{[.w]*}} r{{[0-9]*}}, [r{{[0-9]*}}, #1]
+        %tmp = add i32 %add34191, %conv46
+        %tmp193 = add i32 %tmp, %conv52
+        %tmp194 = add i32 %tmp193, %conv57
+        %tmp195 = add i32 %tmp194, %conv63
+        %tmp196 = mul i32 %tmp195, -28
+        %add65 = add i32 %tmp196, %mul42
+        %9 = lshr i32 %add65, 8
+        %conv68 = trunc i32 %9 to i8
+        %arrayidx69 = getelementptr inbounds i8* %call3, i32 %add
+        store i8 %conv68, i8* %arrayidx69, align 1
+        ret i8 %conv68
+}
diff --git a/test/CodeGen/ARM/2013-10-11-select-stalls.ll b/test/CodeGen/ARM/2013-10-11-select-stalls.ll
new file mode 100644
index 0000000..33c0587
--- /dev/null
+++ b/test/CodeGen/ARM/2013-10-11-select-stalls.ll
@@ -0,0 +1,16 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -stats 2>&1 | not grep "Number of pipeline stalls"
+; Evaluate the two vld1.8 instructions in separate MBB's,
+; instead of stalling on one and conditionally overwriting its result.
+
+define <16 x i8> @multiselect(i32 %avail, i8* %foo, i8* %bar) {
+entry:
+  %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %foo, i32 1)
+  %vld2 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %bar, i32 1)
+  %and = and i32 %avail, 1
+  %tobool = icmp eq i32 %and, 0
+  %retv = select i1 %tobool, <16 x i8> %vld1, <16 x i8> %vld2
+  ret <16 x i8> %retv
+}
+
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* , i32 )
diff --git a/test/CodeGen/ARM/2013-11-08-inline-asm-neon-array.ll b/test/CodeGen/ARM/2013-11-08-inline-asm-neon-array.ll
new file mode 100644
index 0000000..5a86477
--- /dev/null
+++ b/test/CodeGen/ARM/2013-11-08-inline-asm-neon-array.ll
@@ -0,0 +1,16 @@
+;RUN:  not llc -mtriple=arm-linux-gnueabihf < %s 2>&1 | FileCheck %s
+
+; ModuleID = 'bug.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7--"
+
+%struct.uint8x8x4_t = type { [4 x <8 x i8>] }
+
+define void @foo() #0 {
+  %vsrc = alloca %struct.uint8x8x4_t, align 8
+  %ptr = alloca i8;
+  %1 = call i8* asm sideeffect "vld4.u8 ${0:h}, [$1], $2", "=*w,=r,r,1"(%struct.uint8x8x4_t* %vsrc, i32 0, i8* %ptr)
+  ret void
+}
+
+; CHECK: error: couldn't allocate output register for constraint 'w'
diff --git a/test/CodeGen/ARM/a15-SD-dep.ll b/test/CodeGen/ARM/a15-SD-dep.ll
index df921e0..019ff61 100644
--- a/test/CodeGen/ARM/a15-SD-dep.ll
+++ b/test/CodeGen/ARM/a15-SD-dep.ll
@@ -1,5 +1,5 @@
-; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -disable-a15-sd-optimization -verify-machineinstrs < %s  | FileCheck -check-prefix=DISABLED %s
-; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -verify-machineinstrs < %s | FileCheck -check-prefix=ENABLED %s
+; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -disable-a15-sd-optimization -verify-machineinstrs < %s  | FileCheck -check-prefix=CHECK-DISABLED %s
+; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-ENABLED %s
 
 ; CHECK-ENABLED-LABEL: t1:
 ; CHECK-DISABLED-LABEL: t1:
diff --git a/test/CodeGen/ARM/addrspacecast.ll b/test/CodeGen/ARM/addrspacecast.ll
new file mode 100644
index 0000000..2e98ba5
--- /dev/null
+++ b/test/CodeGen/ARM/addrspacecast.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=arm
+
+; Check that codegen for an addrspace cast succeeds without error.
+define <4 x i32 addrspace(1)*> @f (<4 x i32*> %x) {
+  %1 = addrspacecast <4 x i32*> %x to <4 x i32 addrspace(1)*>
+  ret <4 x i32 addrspace(1)*> %1
+}
diff --git a/test/CodeGen/ARM/aliases.ll b/test/CodeGen/ARM/aliases.ll
index d668334..f55ae10 100644
--- a/test/CodeGen/ARM/aliases.ll
+++ b/test/CodeGen/ARM/aliases.ll
@@ -1,15 +1,30 @@
-; RUN: llc < %s -mtriple=arm-linux-gnueabi -o %t
-; RUN: grep " = " %t   | count 5
-; RUN: grep globl %t | count 4
-; RUN: grep weak %t  | count 1
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s
 
-@bar = external global i32
+; CHECK: .globl	test
+
+; CHECK: .globl	foo1
+; CHECK: foo1 = bar
+
+; CHECK: .globl	foo2
+; CHECK: foo2 = bar
+
+; CHECK: .weak	bar_f
+; CHECK: bar_f = foo_f
+
+; CHECK: bar_i = bar
+
+; CHECK: .globl	A
+; CHECK: A = bar
+
+@bar = global i32 42
 @foo1 = alias i32* @bar
 @foo2 = alias i32* @bar
 
 %FunTy = type i32()
 
-declare i32 @foo_f()
+define i32 @foo_f() {
+  ret i32 0
+}
 @bar_f = alias weak %FunTy* @foo_f
 
 @bar_i = alias internal i32* @bar
diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll
index 0762070..88d797e 100644
--- a/test/CodeGen/ARM/arm-and-tst-peephole.ll
+++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll
@@ -1,6 +1,7 @@
 ; RUN: llc < %s -march=arm | FileCheck -check-prefix=ARM %s
 ; RUN: llc < %s -march=thumb | FileCheck -check-prefix=THUMB %s
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck -check-prefix=T2 %s
+; RUN: llc < %s -mtriple=thumbv8 | FileCheck -check-prefix=V8 %s
 
 ; FIXME: The -march=thumb test doesn't change if -disable-peephole is specified.
 
@@ -39,6 +40,17 @@ tailrecurse:                                      ; preds = %sw.bb, %entry
   br i1 %tst, label %sw.bb, label %tailrecurse.switch
 
 tailrecurse.switch:                               ; preds = %tailrecurse
+; V8-LABEL: %tailrecurse.switch
+; V8: cmp
+; V8-NEXT: beq
+; V8-NEXT: %tailrecurse.switch
+; V8: cmp
+; V8-NEXT: beq
+; V8-NEXT: %tailrecurse.switch
+; V8: cmp
+; V8-NEXT: beq
+; V8-NEXT: b	
+; The trailing space in the last line checks that the branch is unconditional
   switch i32 %and, label %sw.epilog [
     i32 1, label %sw.bb
     i32 3, label %sw.bb6
@@ -73,6 +85,7 @@ sw.epilog:                                        ; preds = %tailrecurse.switch
 ; ARM: bar
 ; THUMB: bar
 ; T2: bar
+; V8-LABEL: bar:
 define internal zeroext i8 @bar(%struct.S* %x, %struct.S* nocapture %y) nounwind readonly {
 entry:
   %0 = getelementptr inbounds %struct.S* %x, i32 0, i32 1, i32 0
@@ -81,22 +94,32 @@ entry:
 ; ARM: ands
 ; THUMB: ands
 ; T2: ands
+; V8: ands
+; V8-NEXT: beq
   %3 = and i32 %2, 112
   %4 = icmp eq i32 %3, 0
   br i1 %4, label %return, label %bb
 
 bb:                                               ; preds = %entry
+; V8-NEXT: %bb
   %5 = getelementptr inbounds %struct.S* %y, i32 0, i32 1, i32 0
   %6 = load i8* %5, align 1
   %7 = zext i8 %6 to i32
 ; ARM: andsne
 ; THUMB: ands
 ; T2: andsne
+; V8: ands
+; V8-NEXT: beq
   %8 = and i32 %7, 112
   %9 = icmp eq i32 %8, 0
   br i1 %9, label %return, label %bb2
 
 bb2:                                              ; preds = %bb
+; V8-NEXT: %bb2
+; V8-NEXT: cmp
+; V8-NEXT: it	ne
+; V8-NEXT: cmpne
+; V8-NEXT: bne
   %10 = icmp eq i32 %3, 16
   %11 = icmp eq i32 %8, 16
   %or.cond = or i1 %10, %11
diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll
index 8ec829c..0477d4f 100644
--- a/test/CodeGen/ARM/atomic-64bit.ll
+++ b/test/CodeGen/ARM/atomic-64bit.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabihf | FileCheck %s --check-prefix=CHECK-THUMB
+; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabihf -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-THUMB
 
 define i64 @test1(i64* %ptr, i64 %val) {
 ; CHECK-LABEL: test1:
@@ -175,28 +175,14 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
   ret i64 %r
 }
 
-; Compiles down to cmpxchg
-; FIXME: Should compile to a single ldrexd
+; Compiles down to a single ldrexd
 define i64 @test8(i64* %ptr) {
 ; CHECK-LABEL: test8:
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK: cmp [[REG1]]
-; CHECK: cmpeq [[REG2]]
-; CHECK: bne
-; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
-; CHECK: cmp
-; CHECK: bne
 ; CHECK: dmb {{ish$}}
 
 ; CHECK-THUMB-LABEL: test8:
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB: cmp [[REG1]]
-; CHECK-THUMB: it eq
-; CHECK-THUMB: cmpeq [[REG2]]
-; CHECK-THUMB: bne
-; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
-; CHECK-THUMB: cmp
-; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
   %r = load atomic i64* %ptr seq_cst, align 8
diff --git a/test/CodeGen/ARM/atomic-load-store.ll b/test/CodeGen/ARM/atomic-load-store.ll
index 476b3dd..53c7184 100644
--- a/test/CodeGen/ARM/atomic-load-store.ll
+++ b/test/CodeGen/ARM/atomic-load-store.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s -check-prefix=ARM
 ; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s -check-prefix=ARM
-; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s -check-prefix=THUMBTWO
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s -check-prefix=THUMBTWO
 ; RUN: llc < %s -mtriple=thumbv6-apple-ios | FileCheck %s -check-prefix=THUMBONE
 ; RUN  llc < %s -mtriple=armv4-apple-ios | FileCheck %s -check-prefix=ARMV4
 
diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll
index 6e6b363..9a79c9f 100644
--- a/test/CodeGen/ARM/atomic-op.ll
+++ b/test/CodeGen/ARM/atomic-op.ll
@@ -1,5 +1,7 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-T1
+; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs -mcpu=cortex-m0 | FileCheck %s --check-prefix=CHECK-T1
 
 define void @func(i32 %argc, i8** %argv) nounwind {
 entry:
@@ -24,78 +26,93 @@ entry:
   ; CHECK: ldrex
   ; CHECK: add
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_add_4
   %0 = atomicrmw add i32* %val1, i32 %tmp monotonic
 	store i32 %0, i32* %old
   ; CHECK: ldrex
   ; CHECK: sub
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_sub_4
   %1 = atomicrmw sub i32* %val2, i32 30 monotonic
 	store i32 %1, i32* %old
   ; CHECK: ldrex
   ; CHECK: add
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_add_4
   %2 = atomicrmw add i32* %val2, i32 1 monotonic
 	store i32 %2, i32* %old
   ; CHECK: ldrex
   ; CHECK: sub
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_sub_4
   %3 = atomicrmw sub i32* %val2, i32 1 monotonic
 	store i32 %3, i32* %old
   ; CHECK: ldrex
   ; CHECK: and
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_and_4
   %4 = atomicrmw and i32* %andt, i32 4080 monotonic
 	store i32 %4, i32* %old
   ; CHECK: ldrex
   ; CHECK: or
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_or_4
   %5 = atomicrmw or i32* %ort, i32 4080 monotonic
 	store i32 %5, i32* %old
   ; CHECK: ldrex
   ; CHECK: eor
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_xor_4
   %6 = atomicrmw xor i32* %xort, i32 4080 monotonic
 	store i32 %6, i32* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_min_4
   %7 = atomicrmw min i32* %val2, i32 16 monotonic
 	store i32 %7, i32* %old
 	%neg = sub i32 0, 1
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_min_4
   %8 = atomicrmw min i32* %val2, i32 %neg monotonic
 	store i32 %8, i32* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_max_4
   %9 = atomicrmw max i32* %val2, i32 1 monotonic
 	store i32 %9, i32* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_max_4
   %10 = atomicrmw max i32* %val2, i32 0 monotonic
 	store i32 %10, i32* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umin_4
   %11 = atomicrmw umin i32* %val2, i32 16 monotonic
 	store i32 %11, i32* %old
 	%uneg = sub i32 0, 1
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umin_4
   %12 = atomicrmw umin i32* %val2, i32 %uneg monotonic
 	store i32 %12, i32* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umax_4
   %13 = atomicrmw umax i32* %val2, i32 1 monotonic
 	store i32 %13, i32* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umax_4
   %14 = atomicrmw umax i32* %val2, i32 0 monotonic
 	store i32 %14, i32* %old
 
@@ -110,22 +127,26 @@ entry:
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umin_2
   %0 = atomicrmw umin i16* %val, i16 16 monotonic
   store i16 %0, i16* %old
   %uneg = sub i16 0, 1
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umin_2
   %1 = atomicrmw umin i16* %val, i16 %uneg monotonic
   store i16 %1, i16* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umax_2
   %2 = atomicrmw umax i16* %val, i16 1 monotonic
   store i16 %2, i16* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umax_2
   %3 = atomicrmw umax i16* %val, i16 0 monotonic
   store i16 %3, i16* %old
   ret void
@@ -139,22 +160,26 @@ entry:
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umin_1
   %0 = atomicrmw umin i8* %val, i8 16 monotonic
   store i8 %0, i8* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umin_1
   %uneg = sub i8 0, 1
   %1 = atomicrmw umin i8* %val, i8 %uneg monotonic
   store i8 %1, i8* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umax_1
   %2 = atomicrmw umax i8* %val, i8 1 monotonic
   store i8 %2, i8* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umax_1
   %3 = atomicrmw umax i8* %val, i8 0 monotonic
   store i8 %3, i8* %old
   ret void
diff --git a/test/CodeGen/ARM/atomic-ops-v8.ll b/test/CodeGen/ARM/atomic-ops-v8.ll
new file mode 100644
index 0000000..3f93929
--- /dev/null
+++ b/test/CodeGen/ARM/atomic-ops-v8.ll
@@ -0,0 +1,1344 @@
+; RUN: llc -mtriple=armv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=thumbv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+@var8 = global i8 0
+@var16 = global i16 0
+@var32 = global i32 0
+@var64 = global i64 0
+
+define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i8:
+   %old = atomicrmw add i8* @var8, i8 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i16:
+   %old = atomicrmw add i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i32:
+   %old = atomicrmw add i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i64:
+   %old = atomicrmw add i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: adds [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: adc{{(\.w)?}}  [[NEW2:r[0-9]+]], r[[OLD2]], r1
+; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i8:
+   %old = atomicrmw sub i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i16:
+   %old = atomicrmw sub i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i32:
+   %old = atomicrmw sub i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: strex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i64:
+   %old = atomicrmw sub i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: subs [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: sbc{{(\.w)?}}  [[NEW2:r[0-9]+]], r[[OLD2]], r1
+; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i8:
+   %old = atomicrmw and i8* @var8, i8 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i16:
+   %old = atomicrmw and i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i32:
+   %old = atomicrmw and i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i64:
+   %old = atomicrmw and i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: and{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: and{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1
+; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i8:
+   %old = atomicrmw or i8* @var8, i8 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i16:
+   %old = atomicrmw or i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i32:
+   %old = atomicrmw or i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: strex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i64:
+   %old = atomicrmw or i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: orr{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: orr{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1
+; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i8:
+   %old = atomicrmw xor i8* @var8, i8 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i16:
+   %old = atomicrmw xor i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i32:
+   %old = atomicrmw xor i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i64:
+   %old = atomicrmw xor i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: eor{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: eor{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1
+; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i8:
+   %old = atomicrmw xchg i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r0, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i16:
+   %old = atomicrmw xchg i16* @var16, i16 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r0, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i32:
+   %old = atomicrmw xchg i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r0, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i64:
+   %old = atomicrmw xchg i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i8:
+   %old = atomicrmw min i8* @var8, i8 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK-NEXT: sxtb r[[OLDX:[0-9]+]], r[[OLD]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp r[[OLDX]], r0
+; Thumb mode: it ge
+; CHECK:      movge r[[OLDX]], r0
+; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i16:
+   %old = atomicrmw min i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK-NEXT: sxth r[[OLDX:[0-9]+]], r[[OLD]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp r[[OLDX]], r0
+; Thumb mode: it ge
+; CHECK:      movge r[[OLDX]], r0
+; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i32:
+   %old = atomicrmw min i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it lt
+; CHECK:      movlt r[[NEW]], r[[OLD]]
+; CHECK-NEXT: strex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i64:
+   %old = atomicrmw min i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1
+; CHECK-NEXT: blt .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i8:
+   %old = atomicrmw max i8* @var8, i8 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK-NEXT: sxtb r[[OLDX:[0-9]+]], r[[OLD]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp r[[OLDX]], r0
+; Thumb mode: it le
+; CHECK:      movle r[[OLDX]], r0
+; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i16:
+   %old = atomicrmw max i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK-NEXT: sxth r[[OLDX:[0-9]+]], r[[OLD]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp r[[OLDX]], r0
+; Thumb mode: it le
+; CHECK:      movle r[[OLDX]], r0
+; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i32:
+   %old = atomicrmw max i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it gt
+; CHECK:      movgt r[[NEW]], r[[OLD]]
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i64:
+   %old = atomicrmw max i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1
+; CHECK-NEXT: bge .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i8:
+   %old = atomicrmw umin i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it lo
+; CHECK:      movlo r[[NEW]], r[[OLD]]
+; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i16:
+   %old = atomicrmw umin i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it lo
+; CHECK:      movlo r[[NEW]], r[[OLD]]
+; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i32:
+   %old = atomicrmw umin i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it lo
+; CHECK:      movlo r[[NEW]], r[[OLD]]
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i64:
+   %old = atomicrmw umin i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1
+; CHECK-NEXT: blo .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i8:
+   %old = atomicrmw umax i8* @var8, i8 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it hi
+; CHECK:      movhi r[[NEW]], r[[OLD]]
+; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i16:
+   %old = atomicrmw umax i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it hi
+; CHECK:      movhi r[[NEW]], r[[OLD]]
+; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i32:
+   %old = atomicrmw umax i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it hi
+; CHECK:      movhi r[[NEW]], r[[OLD]]
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i64:
+   %old = atomicrmw umax i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1
+; CHECK-NEXT: bhs .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i8:
+   %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp r[[OLD]], r0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+  ; As above, r1 is a reasonable guess.
+; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i16:
+   %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp r[[OLD]], r0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+  ; As above, r1 is a reasonable guess.
+; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i32:
+   %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp r[[OLD]], r0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+  ; As above, r1 is a reasonable guess.
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i64:
+   %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]]
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: cmp   [[OLD1]], r0
+; Thumb mode: it eq
+; CHECK:      cmpeq [[OLD2]], r1
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+  ; As above, r2, r3 is a reasonable guess.
+; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, [[OLD1]]
+; CHECK-NEXT: mov r1, [[OLD2]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_monotonic_i8() nounwind {
+; CHECK-LABEL: test_atomic_load_monotonic_i8:
+  %val = load atomic i8* @var8 monotonic, align 1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK: ldrb r0, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+  ret i8 %val
+}
+
+define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind {
+; CHECK-LABEL: test_atomic_load_monotonic_regoff_i8:
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i8*
+
+  %val = load atomic i8* %addr monotonic, align 1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: ldrb r0, [r0, r2]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+  ret i8 %val
+}
+
+define i8 @test_atomic_load_acquire_i8() nounwind {
+; CHECK-LABEL: test_atomic_load_acquire_i8:
+  %val = load atomic i8* @var8 acquire, align 1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: ldab r0, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+  ret i8 %val
+}
+
+define i8 @test_atomic_load_seq_cst_i8() nounwind {
+; CHECK-LABEL: test_atomic_load_seq_cst_i8:
+  %val = load atomic i8* @var8 seq_cst, align 1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: ldab r0, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+  ret i8 %val
+}
+
+define i16 @test_atomic_load_monotonic_i16() nounwind {
+; CHECK-LABEL: test_atomic_load_monotonic_i16:
+  %val = load atomic i16* @var16 monotonic, align 2
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movt r[[ADDR]], :upper16:var16
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: ldrh r0, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+  ret i16 %val
+}
+
+define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind {
+; CHECK-LABEL: test_atomic_load_monotonic_regoff_i32:
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i32*
+
+  %val = load atomic i32* %addr monotonic, align 4
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: ldr r0, [r0, r2]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+  ret i32 %val
+}
+
+define i64 @test_atomic_load_seq_cst_i64() nounwind {
+; CHECK-LABEL: test_atomic_load_seq_cst_i64:
+  %val = load atomic i64* @var64 seq_cst, align 8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movt r[[ADDR]], :upper16:var64
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: ldaexd r0, r1, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+  ret i64 %val
+}
+
+define void @test_atomic_store_monotonic_i8(i8 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_monotonic_i8:
+  store atomic i8 %val, i8* @var8 monotonic, align 1
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK: strb r0, [r[[ADDR]]]
+
+  ret void
+}
+
+define void @test_atomic_store_monotonic_regoff_i8(i64 %base, i64 %off, i8 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_monotonic_regoff_i8:
+
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i8*
+
+  store atomic i8 %val, i8* %addr monotonic, align 1
+; CHECK: ldrb{{(\.w)?}} [[VAL:r[0-9]+]], [sp]
+; CHECK: strb [[VAL]], [r0, r2]
+
+  ret void
+}
+
+define void @test_atomic_store_release_i8(i8 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_release_i8:
+  store atomic i8 %val, i8* @var8 release, align 1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: stlb r0, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+  ret void
+}
+
+define void @test_atomic_store_seq_cst_i8(i8 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_seq_cst_i8:
+  store atomic i8 %val, i8* @var8 seq_cst, align 1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: stlb r0, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+  ret void
+}
+
+define void @test_atomic_store_monotonic_i16(i16 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_monotonic_i16:
+  store atomic i16 %val, i16* @var16 monotonic, align 2
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movt r[[ADDR]], :upper16:var16
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: strh r0, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+  ret void
+}
+
+define void @test_atomic_store_monotonic_regoff_i32(i64 %base, i64 %off, i32 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_monotonic_regoff_i32:
+
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i32*
+
+  store atomic i32 %val, i32* %addr monotonic, align 4
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: ldr [[VAL:r[0-9]+]], [sp]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: str [[VAL]], [r0, r2]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+  ret void
+}
+
+define void @test_atomic_store_release_i64(i64 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_release_i64:
+  store atomic i64 %val, i64* @var64 release, align 8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+  ret void
+}
+
+define i32 @not.barriers(i32* %var, i1 %cond) {
+; CHECK-LABEL: not.barriers:
+  br i1 %cond, label %atomic_ver, label %simple_ver
+simple_ver:
+  %oldval = load i32* %var
+  %newval = add nsw i32 %oldval, -1
+  store i32 %newval, i32* %var
+  br label %somewhere
+atomic_ver:
+  fence seq_cst
+  %val = atomicrmw add i32* %var, i32 -1 monotonic
+  fence seq_cst
+  br label %somewhere
+; CHECK: dmb
+; CHECK: ldrex
+; CHECK: dmb
+  ; The key point here is that the second dmb isn't immediately followed by the
+  ; simple_ver basic block, which LLVM attempted to do when DMB had been marked
+  ; with isBarrier. For now, look for something that looks like "somewhere".
+; CHECK-NEXT: mov
+somewhere:
+  %combined = phi i32 [ %val, %atomic_ver ], [ %newval, %simple_ver]
+  ret i32 %combined
+}
diff --git a/test/CodeGen/ARM/build-attributes-encoding.s b/test/CodeGen/ARM/build-attributes-encoding.s
new file mode 100644
index 0000000..5ad51b2
--- /dev/null
+++ b/test/CodeGen/ARM/build-attributes-encoding.s
@@ -0,0 +1,85 @@
+// This tests that ARM attributes are properly encoded.
+
+// RUN: llvm-mc < %s -triple=arm-linux-gnueabi -filetype=obj -o - \
+// RUN:   | llvm-readobj -s -sd | FileCheck %s
+
+// Tag_CPU_name (=5)
+.cpu Cortex-A8
+
+// Tag_CPU_arch (=6)
+.eabi_attribute 6, 10
+
+// Tag_arch_profile (=7)
+.eabi_attribute 7, 'A'
+
+// Tag_ARM_ISA_use (=8)
+.eabi_attribute 8, 1
+
+// Tag_THUMB_ISA_use (=9)
+.eabi_attribute 9, 2
+
+// Tag_FP_arch (=10)
+.fpu vfpv3
+
+// Tag_Advanced_SIMD_arch (=12)
+.eabi_attribute 12, 2
+
+// Tag_ABI_FP_denormal (=20)
+.eabi_attribute 20, 1
+
+// Tag_ABI_FP_exceptions (=21)
+.eabi_attribute 21, 1
+
+// Tag_ABI_FP_number_model (=23)
+.eabi_attribute 23, 1
+
+// Tag_ABI_align_needed (=24)
+.eabi_attribute 24, 1
+
+// Tag_ABI_align_preserved (=25)
+.eabi_attribute 25, 1
+
+// Tag_ABI_HardFP_use (=27)
+.eabi_attribute 27, 0
+
+// Tag_ABI_VFP_args (=28)
+.eabi_attribute 28, 1
+
+// Tag_FP_HP_extension (=36)
+.eabi_attribute 36, 1
+
+// Tag_MPextension_use (=42)
+.eabi_attribute 42, 1
+
+// Tag_DIV_use (=44)
+.eabi_attribute 44, 2
+
+// Tag_Virtualization_use (=68)
+.eabi_attribute 68, 3
+
+// Check that values > 128 are encoded properly
+.eabi_attribute 110, 160
+
+// Check that tags > 128 are encoded properly
+.eabi_attribute 129, 1
+.eabi_attribute 250, 1
+
+// CHECK:        Section {
+// CHECK:          Name: .ARM.attributes
+// CHECK-NEXT:     Type: SHT_ARM_ATTRIBUTES
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x34
+// CHECK-NEXT:     Size: 70
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 41450000 00616561 62690001 3B000000
+// CHECK-NEXT:       0010: 05434F52 5445582D 41380006 0A074108
+// CHECK-NEXT:       0020: 0109020A 030C0214 01150117 01180119
+// CHECK-NEXT:       0030: 011B001C 0124012A 012C0244 036EA001
+// CHECK-NEXT:       0040: 810101FA 0101
+// CHECK-NEXT:     )
diff --git a/test/CodeGen/ARM/byval_load_align.ll b/test/CodeGen/ARM/byval_load_align.ll
new file mode 100644
index 0000000..2c0910c
--- /dev/null
+++ b/test/CodeGen/ARM/byval_load_align.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple thumbv7-apple-ios -O1 | FileCheck %s
+
+; rdar://15144402
+; Make sure we don't assume 4-byte alignment when loading from a byval argument
+; with alignment of 2.
+; CHECK: ldr r1, [r[[REG:[0-9]+]]]
+; CHECK: ldr r2, [r[[REG]], #4]
+; CHECK: ldr r3, [r[[REG]], #8]
+; CHECK-NOT: ldm
+; CHECK: .align	1 @ @sID
+
+%struct.ModuleID = type { [32 x i8], [32 x i8], i16 }
+
+@sID = internal constant %struct.ModuleID { [32 x i8] c"TEST\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", [32 x i8] c"1.0\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", i16 23 }, align 2
+
+; Function Attrs: nounwind ssp
+define void @Client() #0 {
+entry:
+  tail call void @Logger(i8 signext 97, %struct.ModuleID* byval @sID) #2
+  ret void
+}
+
+declare void @Logger(i8 signext, %struct.ModuleID* byval) #1
+
+attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
diff --git a/test/CodeGen/ARM/coalesce-dbgvalue.ll b/test/CodeGen/ARM/coalesce-dbgvalue.ll
index d4be6ee..86106a0 100644
--- a/test/CodeGen/ARM/coalesce-dbgvalue.ll
+++ b/test/CodeGen/ARM/coalesce-dbgvalue.ll
@@ -17,7 +17,7 @@ target triple = "thumbv7-apple-ios3.0.0"
 ; Function Attrs: nounwind ssp
 define i32 @pr16110() #0 {
 for.cond1.preheader:
-  store i32 0, i32* @c, align 4, !dbg !21, !tbaa !23
+  store i32 0, i32* @c, align 4, !dbg !21
   br label %for.cond1.outer, !dbg !26
 
 for.cond1:                                        ; preds = %for.end9, %for.cond1.outer
@@ -26,9 +26,9 @@ for.cond1:                                        ; preds = %for.end9, %for.cond
   br i1 %cmp, label %for.body2, label %for.end9, !dbg !26
 
 for.body2:                                        ; preds = %for.cond1
-  store i32 %storemerge11, i32* @b, align 4, !dbg !26, !tbaa !23
+  store i32 %storemerge11, i32* @b, align 4, !dbg !26
   tail call void @llvm.dbg.value(metadata !27, i64 0, metadata !11), !dbg !28
-  %0 = load i64* @a, align 8, !dbg !29, !tbaa !30
+  %0 = load i64* @a, align 8, !dbg !29
   %xor = xor i64 %0, %e.1.ph, !dbg !29
   %conv3 = trunc i64 %xor to i32, !dbg !29
   tail call void @llvm.dbg.value(metadata !{i32 %conv3}, i64 0, metadata !10), !dbg !29
@@ -44,7 +44,7 @@ land.end:                                         ; preds = %land.rhs, %for.body
   %1 = phi i1 [ false, %for.body2 ], [ %tobool5, %land.rhs ]
   %land.ext = zext i1 %1 to i32
   %call6 = tail call i32 bitcast (i32 (...)* @fn2 to i32 (i32, i32*)*)(i32 %land.ext, i32* null) #3
-  %2 = load i32* @b, align 4, !dbg !26, !tbaa !23
+  %2 = load i32* @b, align 4, !dbg !26
   %inc8 = add nsw i32 %2, 1, !dbg !26
   %phitmp = and i64 %xor, 4294967295, !dbg !26
   br label %for.cond1.outer, !dbg !26
@@ -52,7 +52,7 @@ land.end:                                         ; preds = %land.rhs, %for.body
 for.cond1.outer:                                  ; preds = %land.end, %for.cond1.preheader
   %storemerge11.ph = phi i32 [ %inc8, %land.end ], [ 0, %for.cond1.preheader ]
   %e.1.ph = phi i64 [ %phitmp, %land.end ], [ 0, %for.cond1.preheader ]
-  %3 = load i32* @d, align 4, !dbg !31, !tbaa !23
+  %3 = load i32* @d, align 4, !dbg !31
   %tobool10 = icmp eq i32 %3, 0, !dbg !31
   br label %for.cond1
 
@@ -60,7 +60,7 @@ for.end9:                                         ; preds = %for.cond1
   br i1 %tobool10, label %if.end, label %for.cond1, !dbg !31
 
 if.end:                                           ; preds = %for.end9
-  store i32 %storemerge11, i32* @b, align 4, !dbg !26, !tbaa !23
+  store i32 %storemerge11, i32* @b, align 4, !dbg !26
   ret i32 0, !dbg !32
 }
 
@@ -71,12 +71,13 @@ declare i32 @fn3(...) #1
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata) #2
 
-attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
 attributes #3 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!33}
 
 !0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 182024) (llvm/trunk 182023)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !15, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/d/b/pr16110.c] [DW_LANG_C99]
 !1 = metadata !{metadata !"pr16110.c", metadata !"/d/b"}
@@ -84,7 +85,7 @@ attributes #3 = { nounwind }
 !3 = metadata !{metadata !4}
 !4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"pr16110", metadata !"pr16110", metadata !"", i32 7, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @pr16110, null, null, metadata !9, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [pr16110]
 !5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/d/b/pr16110.c]
-!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{metadata !8}
 !8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !9 = metadata !{metadata !10, metadata !11}
@@ -101,13 +102,10 @@ attributes #3 = { nounwind }
 !20 = metadata !{i32 786484, i32 0, null, metadata !"d", metadata !"d", metadata !"", metadata !5, i32 4, metadata !8, i32 0, i32 1, i32* @d, null} ; [ DW_TAG_variable ] [d] [line 4] [def]
 !21 = metadata !{i32 10, i32 0, metadata !22, null}
 !22 = metadata !{i32 786443, metadata !1, metadata !4, i32 10, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/d/b/pr16110.c]
-!23 = metadata !{metadata !"int", metadata !24}
-!24 = metadata !{metadata !"omnipotent char", metadata !25}
-!25 = metadata !{metadata !"Simple C/C++ TBAA"}
 !26 = metadata !{i32 12, i32 0, metadata !13, null}
 !27 = metadata !{i32* null}
 !28 = metadata !{i32 13, i32 0, metadata !12, null}
 !29 = metadata !{i32 14, i32 0, metadata !12, null}
-!30 = metadata !{metadata !"long long", metadata !24}
 !31 = metadata !{i32 16, i32 0, metadata !4, null}
 !32 = metadata !{i32 18, i32 0, metadata !4, null}
+!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/constantfp.ll b/test/CodeGen/ARM/constantfp.ll
new file mode 100644
index 0000000..974bdd7
--- /dev/null
+++ b/test/CodeGen/ARM/constantfp.ll
@@ -0,0 +1,68 @@
+; RUN: llc -mtriple=armv7 -mattr=+neon -mcpu=swift %s -o - | FileCheck %s
+; RUN: llc -mtriple=armv7 -mattr=+neon -mcpu=cortex-a8 %s -o - | FileCheck --check-prefix=CHECK-NONEONFP %s
+; RUN: llc -mtriple=armv7 -mattr=-neon -mcpu=cortex-a8 %s -o - | FileCheck --check-prefix=CHECK-NONEON %s
+
+define arm_aapcs_vfpcc float @test_vmov_f32() {
+; CHECK-LABEL: test_vmov_f32:
+; CHECK: vmov.f32 d0, #1.0
+
+; CHECK-NONEONFP: vmov.f32 s0, #1.0
+  ret float 1.0
+}
+
+define arm_aapcs_vfpcc float @test_vmov_imm() {
+; CHECK-LABEL: test_vmov_imm:
+; CHECK: vmov.i32 d0, #0
+
+; CHECK-NONEON-LABEL: test_vmov_imm:
+; CHECK_NONEON: vldr s0, {{.?LCPI[0-9]+_[0-9]+}}
+  ret float 0.0
+}
+
+define arm_aapcs_vfpcc float @test_vmvn_imm() {
+; CHECK-LABEL: test_vmvn_imm:
+; CHECK: vmvn.i32 d0, #0xb0000000
+
+; CHECK-NONEON-LABEL: test_vmvn_imm:
+; CHECK_NONEON: vldr s0, {{.?LCPI[0-9]+_[0-9]+}}
+  ret float 8589934080.0
+}
+
+define arm_aapcs_vfpcc double @test_vmov_f64() {
+; CHECK-LABEL: test_vmov_f64:
+; CHECK: vmov.f64 d0, #1.0
+
+; CHECK-NONEON-LABEL: test_vmov_f64:
+; CHECK_NONEON: vmov.f64 d0, #1.0
+
+  ret double 1.0
+}
+
+define arm_aapcs_vfpcc double @test_vmov_double_imm() {
+; CHECK-LABEL: test_vmov_double_imm:
+; CHECK: vmov.i32 d0, #0
+
+; CHECK-NONEON-LABEL: test_vmov_double_imm:
+; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
+  ret double 0.0
+}
+
+define arm_aapcs_vfpcc double @test_vmvn_double_imm() {
+; CHECK-LABEL: test_vmvn_double_imm:
+; CHECK: vmvn.i32 d0, #0xb0000000
+
+; CHECK-NONEON-LABEL: test_vmvn_double_imm:
+; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
+  ret double 0x4fffffff4fffffff
+}
+
+; Make sure we don't ignore the high half of 64-bit values when deciding whether
+; a vmov/vmvn is possible.
+define arm_aapcs_vfpcc double @test_notvmvn_double_imm() {
+; CHECK-LABEL: test_notvmvn_double_imm:
+; CHECK: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
+
+; CHECK-NONEON-LABEL: test_notvmvn_double_imm:
+; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
+  ret double 0x4fffffffffffffff
+}
diff --git a/test/CodeGen/ARM/dagcombine-concatvector.ll b/test/CodeGen/ARM/dagcombine-concatvector.ll
index d8c6c64..2927ea2 100644
--- a/test/CodeGen/ARM/dagcombine-concatvector.ll
+++ b/test/CodeGen/ARM/dagcombine-concatvector.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7s-apple-ios3.0.0 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7s-apple-ios3.0.0 -mcpu=generic | FileCheck %s
 
 ; PR15525
 ; CHECK-LABEL: test1:
diff --git a/test/CodeGen/ARM/darwin-eabi.ll b/test/CodeGen/ARM/darwin-eabi.ll
new file mode 100644
index 0000000..f2cde71
--- /dev/null
+++ b/test/CodeGen/ARM/darwin-eabi.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=thumbv7m-apple-darwin -mcpu=cortex-m3 < %s | FileCheck %s --check-prefix=CHECK-M3
+; RUN: llc -mtriple=thumbv7em-apple-darwin -mcpu=cortex-m4 < %s | FileCheck %s --check-prefix=CHECK-M4
+; RUN: llc -mtriple=thumbv7-apple-darwin -mcpu=cortex-m3 < %s | FileCheck %s --check-prefix=CHECK-M3
+; RUN: llc -mtriple=thumbv7-apple-darwin -mcpu=cortex-m4 < %s | FileCheck %s --check-prefix=CHECK-M4
+
+define float @float_op(float %lhs, float %rhs) {
+  %sum = fadd float %lhs, %rhs
+  ret float %sum
+; CHECK-M3-LABEL: float_op:
+; CHECK-M3: blx ___addsf3
+
+; CHECK-M4-LABEL: float_op:
+; CHECK-M4: vadd.f32
+}
+
+define double @double_op(double %lhs, double %rhs) {
+  %sum = fadd double %lhs, %rhs
+  ret double %sum
+; CHECK-M3-LABEL: double_op:
+; CHECK-M3: blx ___adddf3
+
+; CHECK-M4-LABEL: double_op:
+; CHECK-M4: blx ___adddf3
+}
diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll
index 89ccb20..e8bf3ba 100644
--- a/test/CodeGen/ARM/debug-info-arg.ll
+++ b/test/CodeGen/ARM/debug-info-arg.ll
@@ -30,15 +30,16 @@ declare void @foobar(i64, i64)
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!33}
 
 !0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !30, null,  null, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !2, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, null, null, metadata !31, i32 11} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !2, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, null, null, metadata !31, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [foo]
 !2 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !32, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !32, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 786689, metadata !1, metadata !"this", metadata !2, i32 16777227, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !6 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 786451, metadata !32, metadata !0, metadata !"tag_s", i32 5, i64 96, i64 32, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!7 = metadata !{i32 786451, metadata !32, metadata !0, metadata !"tag_s", i32 5, i64 96, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [tag_s] [line 5, size 96, align 32, offset 0] [def] [from ]
 !8 = metadata !{metadata !9, metadata !11, metadata !12}
 !9 = metadata !{i32 786445, metadata !32, metadata !7, metadata !"x", i32 6, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
 !10 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
@@ -64,3 +65,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !30 = metadata !{metadata !1}
 !31 = metadata !{metadata !5, metadata !13, metadata !14, metadata !17, metadata !18, metadata!19}
 !32 = metadata !{metadata !"one.c", metadata !"/Volumes/Athwagate/R10048772"}
+!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll
index bd55786..6cbe4b4 100644
--- a/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/test/CodeGen/ARM/debug-info-blocks.ll
@@ -93,37 +93,38 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!162}
 
 !0 = metadata !{i32 786449, metadata !153, i32 16, metadata !"Apple clang version 2.1", i1 false, metadata !"", i32 2, metadata !147, metadata !26, metadata !148, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786433, metadata !160, metadata !0, metadata !"", i32 248, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !3, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!1 = metadata !{i32 786436, metadata !160, metadata !0, metadata !"", i32 248, i64 32, i64 32, i32 0, i32 0, null, metadata !3, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [line 248, size 32, align 32, offset 0] [def] [from ]
 !2 = metadata !{i32 786473, metadata !160} ; [ DW_TAG_file_type ]
 !3 = metadata !{metadata !4}
 !4 = metadata !{i32 786472, metadata !"Ver1", i64 0} ; [ DW_TAG_enumerator ]
-!5 = metadata !{i32 786433, metadata !160, metadata !0, metadata !"Mode", i32 79, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !7, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!5 = metadata !{i32 786436, metadata !160, metadata !0, metadata !"Mode", i32 79, i64 32, i64 32, i32 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [Mode] [line 79, size 32, align 32, offset 0] [def] [from ]
 !6 = metadata !{i32 786473, metadata !161} ; [ DW_TAG_file_type ]
 !7 = metadata !{metadata !8}
 !8 = metadata !{i32 786472, metadata !"One", i64 0} ; [ DW_TAG_enumerator ]
-!9 = metadata !{i32 786433, metadata !149, metadata !0, metadata !"", i32 15, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !11, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!9 = metadata !{i32 786436, metadata !149, metadata !0, metadata !"", i32 15, i64 32, i64 32, i32 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [line 15, size 32, align 32, offset 0] [def] [from ]
 !10 = metadata !{i32 786473, metadata !149} ; [ DW_TAG_file_type ]
 !11 = metadata !{metadata !12, metadata !13}
 !12 = metadata !{i32 786472, metadata !"Unknown", i64 0} ; [ DW_TAG_enumerator ]
 !13 = metadata !{i32 786472, metadata !"Known", i64 1} ; [ DW_TAG_enumerator ]
-!14 = metadata !{i32 786433, metadata !150, metadata !0, metadata !"", i32 20, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !16, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!14 = metadata !{i32 786436, metadata !150, metadata !0, metadata !"", i32 20, i64 32, i64 32, i32 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [line 20, size 32, align 32, offset 0] [def] [from ]
 !15 = metadata !{i32 786473, metadata !150} ; [ DW_TAG_file_type ]
 !16 = metadata !{metadata !17, metadata !18}
 !17 = metadata !{i32 786472, metadata !"Single", i64 0} ; [ DW_TAG_enumerator ]
 !18 = metadata !{i32 786472, metadata !"Double", i64 1} ; [ DW_TAG_enumerator ]
-!19 = metadata !{i32 786433, metadata !151, metadata !0, metadata !"", i32 14, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !21, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!19 = metadata !{i32 786436, metadata !151, metadata !0, metadata !"", i32 14, i64 32, i64 32, i32 0, i32 0, null, metadata !21, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [line 14, size 32, align 32, offset 0] [def] [from ]
 !20 = metadata !{i32 786473, metadata !151} ; [ DW_TAG_file_type ]
 !21 = metadata !{metadata !22}
 !22 = metadata !{i32 786472, metadata !"Eleven", i64 0} ; [ DW_TAG_enumerator ]
-!23 = metadata !{i32 786478, metadata !152, metadata !24, metadata !"foobar_func_block_invoke_0", metadata !"foobar_func_block_invoke_0", metadata !"", i32 609, metadata !25, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (i8*, %0*, [4 x i32], [4 x i32])* @foobar_func_block_invoke_0, null, null, null, i32 609} ; [ DW_TAG_subprogram ]
+!23 = metadata !{i32 786478, metadata !152, metadata !24, metadata !"foobar_func_block_invoke_0", metadata !"foobar_func_block_invoke_0", metadata !"", i32 609, metadata !25, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*, %0*, [4 x i32], [4 x i32])* @foobar_func_block_invoke_0, null, null, null, i32 609} ; [ DW_TAG_subprogram ] [line 609] [local] [def] [foobar_func_block_invoke_0]
 !24 = metadata !{i32 786473, metadata !152} ; [ DW_TAG_file_type ]
-!25 = metadata !{i32 786453, metadata !152, metadata !24, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !26, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!25 = metadata !{i32 786453, metadata !152, metadata !24, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !26, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !26 = metadata !{null}
 !27 = metadata !{i32 786689, metadata !23, metadata !".block_descriptor", metadata !24, i32 16777825, metadata !28, i32 64, null} ; [ DW_TAG_arg_variable ]
 !28 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 0, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ]
-!29 = metadata !{i32 786451, metadata !152, metadata !24, metadata !"__block_literal_14", i32 609, i64 256, i64 32, i32 0, i32 0, i32 0, metadata !30, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!29 = metadata !{i32 786451, metadata !152, metadata !24, metadata !"__block_literal_14", i32 609, i64 256, i64 32, i32 0, i32 0, null, metadata !30, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_literal_14] [line 609, size 256, align 32, offset 0] [def] [from ]
 !30 = metadata !{metadata !31, metadata !33, metadata !35, metadata !36, metadata !37, metadata !48, metadata !89, metadata !124}
 !31 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__isa", i32 609, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
 !32 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
@@ -133,7 +134,7 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !36 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__FuncPtr", i32 609, i64 32, i64 32, i64 96, i32 0, metadata !32} ; [ DW_TAG_member ]
 !37 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__descriptor", i32 609, i64 32, i64 32, i64 128, i32 0, metadata !38} ; [ DW_TAG_member ]
 !38 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !39} ; [ DW_TAG_pointer_type ]
-!39 = metadata !{i32 786451, metadata !153, metadata !0, metadata !"__block_descriptor_withcopydispose", i32 307, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !41, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!39 = metadata !{i32 786451, metadata !153, metadata !0, metadata !"__block_descriptor_withcopydispose", i32 307, i64 128, i64 32, i32 0, i32 0, null, metadata !41, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor_withcopydispose] [line 307, size 128, align 32, offset 0] [def] [from ]
 !40 = metadata !{i32 786473, metadata !153} ; [ DW_TAG_file_type ]
 !41 = metadata !{metadata !42, metadata !44, metadata !45, metadata !47}
 !42 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"reserved", i32 307, i64 32, i64 32, i64 0, i32 0, metadata !43} ; [ DW_TAG_member ]
@@ -144,7 +145,7 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !47 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"DestroyFuncPtr", i32 307, i64 32, i64 32, i64 96, i32 0, metadata !46} ; [ DW_TAG_member ]
 !48 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"mydata", i32 609, i64 32, i64 32, i64 160, i32 0, metadata !49} ; [ DW_TAG_member ]
 !49 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 0, i64 0, i32 0, metadata !50} ; [ DW_TAG_pointer_type ]
-!50 = metadata !{i32 786451, metadata !152, metadata !24, metadata !"", i32 0, i64 224, i64 0, i32 0, i32 16, i32 0, metadata !51, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!50 = metadata !{i32 786451, metadata !152, metadata !24, metadata !"", i32 0, i64 224, i64 0, i32 0, i32 16, null, metadata !51, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [line 0, size 224, align 0, offset 0] [def] [from ]
 !51 = metadata !{metadata !52, metadata !53, metadata !54, metadata !55, metadata !56, metadata !57, metadata !58}
 !52 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__isa", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
 !53 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__forwarding", i32 0, i64 32, i64 32, i64 32, i32 0, metadata !32} ; [ DW_TAG_member ]
@@ -154,17 +155,17 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !57 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__destroy_helper", i32 0, i64 32, i64 32, i64 160, i32 0, metadata !32} ; [ DW_TAG_member ]
 !58 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"mydata", i32 0, i64 32, i64 32, i64 192, i32 0, metadata !59} ; [ DW_TAG_member ]
 !59 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !60} ; [ DW_TAG_pointer_type ]
-!60 = metadata !{i32 786451, metadata !154, metadata !24, metadata !"UIMydata", i32 26, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !62, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!60 = metadata !{i32 786451, metadata !154, metadata !24, metadata !"UIMydata", i32 26, i64 128, i64 32, i32 0, i32 0, null, metadata !62, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [UIMydata] [line 26, size 128, align 32, offset 0] [def] [from ]
 !61 = metadata !{i32 786473, metadata !154} ; [ DW_TAG_file_type ]
 !62 = metadata !{metadata !63, metadata !71, metadata !75, metadata !79}
 !63 = metadata !{i32 786460, metadata !60, null, metadata !61, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
-!64 = metadata !{i32 786451, metadata !155, metadata !40, metadata !"NSO", i32 66, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !66, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!64 = metadata !{i32 786451, metadata !155, metadata !40, metadata !"NSO", i32 66, i64 32, i64 32, i32 0, i32 0, null, metadata !66, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [NSO] [line 66, size 32, align 32, offset 0] [def] [from ]
 !65 = metadata !{i32 786473, metadata !155} ; [ DW_TAG_file_type ]
 !66 = metadata !{metadata !67}
 !67 = metadata !{i32 786445, metadata !155, metadata !65, metadata !"isa", i32 67, i64 32, i64 32, i64 0, i32 2, metadata !68, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
 !68 = metadata !{i32 786454, metadata !153, metadata !0, metadata !"Class", i32 197, i64 0, i64 0, i64 0, i32 0, metadata !69} ; [ DW_TAG_typedef ]
 !69 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !70} ; [ DW_TAG_pointer_type ]
-!70 = metadata !{i32 786451, metadata !153, metadata !0, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!70 = metadata !{i32 786451, metadata !153, metadata !0, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [decl] [from ]
 !71 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"_mydataRef", i32 28, i64 32, i64 32, i64 32, i32 0, metadata !72, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
 !72 = metadata !{i32 786454, metadata !152, metadata !0, metadata !"CFTypeRef", i32 313, i64 0, i64 0, i64 0, i32 0, metadata !73} ; [ DW_TAG_typedef ]
 !73 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !74} ; [ DW_TAG_pointer_type ]
@@ -174,7 +175,7 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !77 = metadata !{i32 786473, metadata !156} ; [ DW_TAG_file_type ]
 !78 = metadata !{i32 786468, null, metadata !0, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !79 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"_mydataFlags", i32 37, i64 8, i64 8, i64 96, i32 0, metadata !80, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!80 = metadata !{i32 786451, metadata !154, metadata !0, metadata !"", i32 30, i64 8, i64 8, i32 0, i32 0, i32 0, metadata !81, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!80 = metadata !{i32 786451, metadata !154, metadata !0, metadata !"", i32 30, i64 8, i64 8, i32 0, i32 0, null, metadata !81, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [line 30, size 8, align 8, offset 0] [def] [from ]
 !81 = metadata !{metadata !82, metadata !84, metadata !85, metadata !86, metadata !87, metadata !88}
 !82 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"named", i32 31, i64 1, i64 32, i64 0, i32 0, metadata !83} ; [ DW_TAG_member ]
 !83 = metadata !{i32 786468, null, metadata !0, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
@@ -185,10 +186,10 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !88 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"isCIMydata", i32 36, i64 1, i64 32, i64 7, i32 0, metadata !83} ; [ DW_TAG_member ]
 !89 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"self", i32 609, i64 32, i64 32, i64 192, i32 0, metadata !90} ; [ DW_TAG_member ]
 !90 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !91} ; [ DW_TAG_pointer_type ]
-!91 = metadata !{i32 786451, metadata !152, metadata !40, metadata !"MyWork", i32 36, i64 384, i64 32, i32 0, i32 0, i32 0, metadata !92, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!91 = metadata !{i32 786451, metadata !152, metadata !40, metadata !"MyWork", i32 36, i64 384, i64 32, i32 0, i32 0, null, metadata !92, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [MyWork] [line 36, size 384, align 32, offset 0] [def] [from ]
 !92 = metadata !{metadata !93, metadata !98, metadata !101, metadata !107, metadata !123}
 !93 = metadata !{i32 786460, metadata !152, metadata !91, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !94} ; [ DW_TAG_inheritance ]
-!94 = metadata !{i32 786451, metadata !157, metadata !40, metadata !"twork", i32 43, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !96, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!94 = metadata !{i32 786451, metadata !157, metadata !40, metadata !"twork", i32 43, i64 32, i64 32, i32 0, i32 0, null, metadata !96, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [twork] [line 43, size 32, align 32, offset 0] [def] [from ]
 !95 = metadata !{i32 786473, metadata !157} ; [ DW_TAG_file_type ]
 !96 = metadata !{metadata !97}
 !97 = metadata !{i32 786460, metadata !94, null, metadata !95, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
@@ -197,23 +198,23 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !100 = metadata !{i32 786468, null, metadata !0, metadata !"long long unsigned int", i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !101 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_library", i32 39, i64 32, i64 32, i64 96, i32 1, metadata !102, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
 !102 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !103} ; [ DW_TAG_pointer_type ]
-!103 = metadata !{i32 786451, metadata !158, metadata !40, metadata !"MyLibrary2", i32 22, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !105, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!103 = metadata !{i32 786451, metadata !158, metadata !40, metadata !"MyLibrary2", i32 22, i64 32, i64 32, i32 0, i32 0, null, metadata !105, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [MyLibrary2] [line 22, size 32, align 32, offset 0] [def] [from ]
 !104 = metadata !{i32 786473, metadata !158} ; [ DW_TAG_file_type ]
 !105 = metadata !{metadata !106}
 !106 = metadata !{i32 786460, metadata !103, null, metadata !104, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
 !107 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_bounds", i32 40, i64 128, i64 32, i64 128, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
 !108 = metadata !{i32 786454, metadata !153, metadata !0, metadata !"CR", i32 33, i64 0, i64 0, i64 0, i32 0, metadata !109} ; [ DW_TAG_typedef ]
-!109 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"CR", i32 29, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !110, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!109 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"CR", i32 29, i64 128, i64 32, i32 0, i32 0, null, metadata !110, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [CR] [line 29, size 128, align 32, offset 0] [def] [from ]
 !110 = metadata !{metadata !111, metadata !117}
 !111 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"origin", i32 30, i64 64, i64 32, i64 0, i32 0, metadata !112} ; [ DW_TAG_member ]
 !112 = metadata !{i32 786454, metadata !156, metadata !0, metadata !"CP", i32 17, i64 0, i64 0, i64 0, i32 0, metadata !113} ; [ DW_TAG_typedef ]
-!113 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"CP", i32 13, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !114, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!113 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"CP", i32 13, i64 64, i64 32, i32 0, i32 0, null, metadata !114, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [CP] [line 13, size 64, align 32, offset 0] [def] [from ]
 !114 = metadata !{metadata !115, metadata !116}
 !115 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"x", i32 14, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
 !116 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"y", i32 15, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
 !117 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"size", i32 31, i64 64, i64 32, i64 64, i32 0, metadata !118} ; [ DW_TAG_member ]
 !118 = metadata !{i32 786454, metadata !156, metadata !0, metadata !"Size", i32 25, i64 0, i64 0, i64 0, i32 0, metadata !119} ; [ DW_TAG_typedef ]
-!119 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"Size", i32 21, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !120, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!119 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"Size", i32 21, i64 64, i64 32, i32 0, i32 0, null, metadata !120, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [Size] [line 21, size 64, align 32, offset 0] [def] [from ]
 !120 = metadata !{metadata !121, metadata !122}
 !121 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"width", i32 22, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
 !122 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"height", i32 23, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
@@ -221,7 +222,7 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !124 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"semi", i32 609, i64 32, i64 32, i64 224, i32 0, metadata !125} ; [ DW_TAG_member ]
 !125 = metadata !{i32 786454, metadata !152, metadata !0, metadata !"d_t", i32 35, i64 0, i64 0, i64 0, i32 0, metadata !126} ; [ DW_TAG_typedef ]
 !126 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !127} ; [ DW_TAG_pointer_type ]
-!127 = metadata !{i32 786451, metadata !159, metadata !0, metadata !"my_struct", i32 49, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!127 = metadata !{i32 786451, metadata !159, metadata !0, metadata !"my_struct", i32 49, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [my_struct] [line 49, size 0, align 0, offset 0] [decl] [from ]
 !128 = metadata !{i32 786473, metadata !159} ; [ DW_TAG_file_type ]
 !129 = metadata !{i32 609, i32 144, metadata !23, null}
 !130 = metadata !{i32 786689, metadata !23, metadata !"loadedMydata", metadata !24, i32 33555041, metadata !59, i32 0, null} ; [ DW_TAG_arg_variable ]
@@ -256,3 +257,4 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !159 = metadata !{metadata !"header15.h", metadata !"/Volumes/Sandbox/llvm"}
 !160 = metadata !{metadata !"header.h", metadata !"/Volumes/Sandbox/llvm"}
 !161 = metadata !{metadata !"header2.h", metadata !"/Volumes/Sandbox/llvm"}
+!162 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/debug-info-branch-folding.ll b/test/CodeGen/ARM/debug-info-branch-folding.ll
index 052fd22..8505f53 100644
--- a/test/CodeGen/ARM/debug-info-branch-folding.ll
+++ b/test/CodeGen/ARM/debug-info-branch-folding.ll
@@ -38,23 +38,25 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
+!llvm.module.flags = !{!56}
+
 !0 = metadata !{i32 786478, metadata !54, null, metadata !"test0001", metadata !"test0001", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !54} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, metadata !17, metadata !17, metadata !50, null,  null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786454, metadata !54, metadata !2, metadata !"v4f32", i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
-!6 = metadata !{i32 786433, metadata !54, metadata !2, metadata !"", i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ]
+!6 = metadata !{i32 786433, metadata !54, metadata !2, metadata !"", i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [from float]
 !7 = metadata !{i32 786468, null, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786465, i64 0, i64 4}         ; [ DW_TAG_subrange_type ]
-!10 = metadata !{i32 786478, metadata !54, null, metadata !"main", metadata !"main", metadata !"", i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**, i1)* @main, null, null, metadata !52, i32 0} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!10 = metadata !{i32 786478, metadata !54, null, metadata !"main", metadata !"main", metadata !"", i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**, i1)* @main, null, null, metadata !52, i32 0} ; [ DW_TAG_subprogram ] [line 59] [def] [scope 0] [main]
+!11 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{metadata !13}
 !13 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 786478, metadata !55, null, metadata !"printFV", metadata !"printFV", metadata !"", i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, null, metadata !53, i32 0} ; [ DW_TAG_subprogram ]
+!14 = metadata !{i32 786478, metadata !55, null, metadata !"printFV", metadata !"printFV", metadata !"", i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !53, i32 0} ; [ DW_TAG_subprogram ] [line 41] [local] [def] [scope 0] [printFV]
 !15 = metadata !{i32 786473, metadata !55} ; [ DW_TAG_file_type ]
-!16 = metadata !{i32 786453, metadata !55, metadata !15, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{i32 786453, metadata !55, metadata !15, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !17 = metadata !{null}
 !18 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
 !19 = metadata !{i32 786689, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
@@ -94,3 +96,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !53 = metadata !{metadata !30}
 !54 = metadata !{metadata !"build2.c", metadata !"/private/tmp"}
 !55 = metadata !{metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp"}
+!56 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/debug-info-d16-reg.ll b/test/CodeGen/ARM/debug-info-d16-reg.ll
index 11631ae..30a3e2d 100644
--- a/test/CodeGen/ARM/debug-info-d16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-d16-reg.ll
@@ -57,11 +57,12 @@ entry:
 declare i32 @puts(i8* nocapture) nounwind
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!48}
 
 !0 = metadata !{i32 786478, metadata !46, metadata !1, metadata !"printer", metadata !"printer", metadata !"printer", i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @printer, null, null, metadata !43, i32 12} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !46} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !46, i32 1, metadata !"(LLVM build 00)", i1 true, metadata !"", i32 0, metadata !47, metadata !47, metadata !42, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !46, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !46, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5, metadata !6, metadata !7, metadata !8}
 !5 = metadata !{i32 786468, metadata !46, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 786447, metadata !46, metadata !1, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
@@ -69,7 +70,7 @@ declare i32 @puts(i8* nocapture) nounwind
 !8 = metadata !{i32 786468, metadata !46, metadata !1, metadata !"unsigned char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
 !9 = metadata !{i32 786478, metadata !46, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"inlineprinter", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @inlineprinter, null, null, metadata !44, i32 5} ; [ DW_TAG_subprogram ]
 !10 = metadata !{i32 786478, metadata !46, metadata !1, metadata !"main", metadata !"main", metadata !"main", i32 18, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !45, i32 18} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, metadata !46, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{i32 786453, metadata !46, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{metadata !5, metadata !5, metadata !13}
 !13 = metadata !{i32 786447, metadata !46, metadata !1, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
 !14 = metadata !{i32 786447, metadata !46, metadata !1, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !15} ; [ DW_TAG_pointer_type ]
@@ -106,3 +107,4 @@ declare i32 @puts(i8* nocapture) nounwind
 !45 = metadata !{metadata !22, metadata !23, metadata !24}
 !46 = metadata !{metadata !"a.c", metadata !"/tmp/"}
 !47 = metadata !{i32 0}
+!48 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll
index af61f6c..ee515fd5 100644
--- a/test/CodeGen/ARM/debug-info-qreg.ll
+++ b/test/CodeGen/ARM/debug-info-qreg.ll
@@ -36,24 +36,25 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!56}
 
-!0 = metadata !{i32 786478, metadata !54, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 3} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !54, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [test0001]
 !1 = metadata !{i32 786473, metadata !54} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, metadata !17, metadata !17, metadata !50, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786454, metadata !54, metadata !2, metadata !"v4f32", i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
-!6 = metadata !{i32 786433, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ]
+!6 = metadata !{i32 786433, metadata !2, null, metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [from float]
 !7 = metadata !{i32 786468, null, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786465, i64 0, i64 4}         ; [ DW_TAG_subrange_type ]
-!10 = metadata !{i32 786478, metadata !54, metadata !1, metadata !"main", metadata !"main", metadata !"", i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !52, i32 59} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!10 = metadata !{i32 786478, metadata !54, metadata !1, metadata !"main", metadata !"main", metadata !"", i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !52, i32 59} ; [ DW_TAG_subprogram ] [line 59] [def] [main]
+!11 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{metadata !13}
 !13 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 786478, metadata !55, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, null, metadata !53, i32 41} ; [ DW_TAG_subprogram ]
+!14 = metadata !{i32 786478, metadata !55, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !53, i32 41} ; [ DW_TAG_subprogram ] [line 41] [local] [def] [printFV]
 !15 = metadata !{i32 786473, metadata !55} ; [ DW_TAG_file_type ]
-!16 = metadata !{i32 786453, metadata !55, metadata !15, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{i32 786453, metadata !55, metadata !15, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !17 = metadata !{null}
 !18 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
 !19 = metadata !{i32 786689, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
@@ -70,7 +71,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !30 = metadata !{i32 786689, metadata !14, metadata !"F", metadata !15, i32 16777257, metadata !31, i32 0, null} ; [ DW_TAG_arg_variable ]
 !31 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
 !32 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"FV", i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ]
-!33 = metadata !{i32 786455, metadata !55, metadata !2, metadata !"", i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ]
+!33 = metadata !{i32 786455, metadata !55, metadata !2, metadata !"", i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, null} ; [ DW_TAG_union_type ]
 !34 = metadata !{metadata !35, metadata !37}
 !35 = metadata !{i32 786445, metadata !55, metadata !15, metadata !"V", i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ]
 !36 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"v4sf", i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
@@ -93,3 +94,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !53 = metadata !{metadata !30}
 !54 = metadata !{metadata !"build2.c", metadata !"/private/tmp"}
 !55 = metadata !{metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp"}
+!56 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll
index 83e7dac..e92d977 100644
--- a/test/CodeGen/ARM/debug-info-s16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-s16-reg.ll
@@ -62,15 +62,16 @@ declare i32 @puts(i8* nocapture) nounwind optsize
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!53}
 
-!0 = metadata !{i32 786478, metadata !51, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @inlineprinter, null, null, metadata !48, i32 5} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !51, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, float, i8)* @inlineprinter, null, null, metadata !48, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [inlineprinter]
 !1 = metadata !{i32 786473, metadata !51} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !51, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, metadata !52, metadata !52, metadata !47, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !51, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !51, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !51, metadata !1, metadata !"printer", metadata !"printer", metadata !"", i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @printer, null, null, metadata !49, i32 12} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786478, metadata !51, metadata !1, metadata !"main", metadata !"main", metadata !"", i32 18, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !50, i32 18} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786478, metadata !51, metadata !1, metadata !"printer", metadata !"printer", metadata !"", i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, float, i8)* @printer, null, null, metadata !49, i32 12} ; [ DW_TAG_subprogram ] [line 12] [def] [printer]
+!7 = metadata !{i32 786478, metadata !51, metadata !1, metadata !"main", metadata !"main", metadata !"", i32 18, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !50, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [main]
 !8 = metadata !{i32 786689, metadata !0, metadata !"ptr", metadata !1, i32 16777220, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
 !9 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
 !10 = metadata !{i32 786689, metadata !0, metadata !"val", metadata !1, i32 33554436, metadata !11, i32 0, null} ; [ DW_TAG_arg_variable ]
@@ -116,3 +117,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !50 = metadata !{metadata !17, metadata !18, metadata !22}
 !51 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
 !52 = metadata !{i32 0}
+!53 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
index cc2e831..854fcab 100644
--- a/test/CodeGen/ARM/debug-info-sreg2.ll
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -40,11 +40,12 @@ declare float @_Z2f3f(float) optsize
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!20}
 
 !0 = metadata !{i32 786449, metadata !18, i32 4, metadata !"clang version 3.0 (trunk 130845)", i1 true, metadata !"", i32 0, metadata !19, metadata !19, metadata !16, null,  null, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !18, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @_Z3foov, null, null, metadata !17, i32 5} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !18, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @_Z3foov, null, null, metadata !17, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [foo]
 !2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 786688, metadata !6, metadata !"k", metadata !2, i32 6, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
 !6 = metadata !{i32 786443, metadata !18, metadata !1, i32 5, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
@@ -61,3 +62,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !17 = metadata !{metadata !5, metadata !8}
 !18 = metadata !{metadata !"k.cc", metadata !"/private/tmp"}
 !19 = metadata !{i32 0}
+!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/divmod.ll b/test/CodeGen/ARM/divmod.ll
index 06d6172..7be0c79 100644
--- a/test/CodeGen/ARM/divmod.ll
+++ b/test/CodeGen/ARM/divmod.ll
@@ -60,7 +60,7 @@ bb:
   %3 = load i32* @tabsize, align 4
   %4 = srem i32 %cols, %3
   %5 = sdiv i32 %cols, %3
-  %6 = tail call i32 @llvm.objectsize.i32(i8* null, i1 false)
+  %6 = tail call i32 @llvm.objectsize.i32.p0i8(i8* null, i1 false)
   %7 = tail call i8* @__memset_chk(i8* null, i32 9, i32 %5, i32 %6) nounwind
   br label %bb1
 
@@ -71,7 +71,7 @@ bb1:
   ret void
 }
 
-declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readnone
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readnone
 declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind
 
 ; rdar://11714607
diff --git a/test/CodeGen/ARM/fast-isel-align.ll b/test/CodeGen/ARM/fast-isel-align.ll
index 4e28a10..9c9a188 100644
--- a/test/CodeGen/ARM/fast-isel-align.ll
+++ b/test/CodeGen/ARM/fast-isel-align.ll
@@ -1,22 +1,22 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
-; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
-
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
-; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
-
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
-
-; RUN: llc < %s -O0  -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
-; RUN: llc < %s -O0  -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
-; RUN: llc < %s -O0 -arm-no-strict-align -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -arm-no-strict-align -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+
+; RUN: llc < %s -O0  -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0  -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+; RUN: llc < %s -O0 -arm-no-strict-align -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -arm-no-strict-align -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
 
 ; Check unaligned stores
 %struct.anon = type <{ float }>
diff --git a/test/CodeGen/ARM/fast-isel-binary.ll b/test/CodeGen/ARM/fast-isel-binary.ll
index 3159627..e1a2a4f 100644
--- a/test/CodeGen/ARM/fast-isel-binary.ll
+++ b/test/CodeGen/ARM/fast-isel-binary.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Test add with non-legal types
 
diff --git a/test/CodeGen/ARM/fast-isel-br-phi.ll b/test/CodeGen/ARM/fast-isel-br-phi.ll
index a0aba69..3b9d465 100644
--- a/test/CodeGen/ARM/fast-isel-br-phi.ll
+++ b/test/CodeGen/ARM/fast-isel-br-phi.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios
 
 ; This test ensures HandlePHINodesInSuccessorBlocks() is able to promote basic
 ; non-legal integer types (i.e., i1, i8, i16).
diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll
index d10a381..917a15d 100644
--- a/test/CodeGen/ARM/fast-isel-call.ll
+++ b/test/CodeGen/ARM/fast-isel-call.ll
@@ -1,12 +1,14 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=THUMB-NOVFP
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=THUMB-NOVFP
+
+; XFAIL: vg_leak
 
 ; Note that some of these tests assume that relocations are either
 ; movw/movt or constant pool loads. Different platforms will select
diff --git a/test/CodeGen/ARM/fast-isel-cmp-imm.ll b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
index 45ef4ed..55baf48 100644
--- a/test/CodeGen/ARM/fast-isel-cmp-imm.ll
+++ b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
 
 define void @t1a(float %a) uwtable ssp {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-conversion.ll b/test/CodeGen/ARM/fast-isel-conversion.ll
index e40891a..5983493 100644
--- a/test/CodeGen/ARM/fast-isel-conversion.ll
+++ b/test/CodeGen/ARM/fast-isel-conversion.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -verify-machineinstrs -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -verify-machineinstrs -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -verify-machineinstrs -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Test sitofp
 
diff --git a/test/CodeGen/ARM/fast-isel-ext.ll b/test/CodeGen/ARM/fast-isel-ext.ll
index 15d0d3c..de0dd19 100644
--- a/test/CodeGen/ARM/fast-isel-ext.ll
+++ b/test/CodeGen/ARM/fast-isel-ext.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=v7
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=v7
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv4t-apple-ios | FileCheck %s --check-prefix=prev6
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv4t-linux-gnueabi | FileCheck %s --check-prefix=prev6
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv5-apple-ios | FileCheck %s --check-prefix=prev6
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv5-linux-gnueabi | FileCheck %s --check-prefix=prev6
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=v7
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=v7
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=v7
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv4t-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=prev6
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv4t-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=prev6
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv5-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=prev6
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv5-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=prev6
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=v7
 
 ; Can't test pre-ARMv6 Thumb because ARM FastISel currently only supports
 ; Thumb2. The ARMFastISel::ARMEmitIntExt code should work for Thumb by always
diff --git a/test/CodeGen/ARM/fast-isel-icmp.ll b/test/CodeGen/ARM/fast-isel-icmp.ll
index 3dc1109..85f449e 100644
--- a/test/CodeGen/ARM/fast-isel-icmp.ll
+++ b/test/CodeGen/ARM/fast-isel-icmp.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
 
 define i32 @icmp_i16_signed(i16 %a, i16 %b) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll
index 572ac3a..b08b72b 100644
--- a/test/CodeGen/ARM/fast-isel-intrinsic.ll
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -1,9 +1,11 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls -verify-machineinstrs | FileCheck %s --check-prefix=ARM-LONG
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -arm-long-calls -verify-machineinstrs | FileCheck %s --check-prefix=ARM-LONG
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-LONG
+
+; XFAIL: vg_leak
 
 ; Note that some of these tests assume that relocations are either
 ; movw/movt or constant pool loads. Different platforms will select
diff --git a/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
index 2a88678..d9c9cc4 100644
--- a/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
+++ b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
 
 define i32 @t1(i32* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t1
   %add.ptr = getelementptr inbounds i32* %ptr, i32 -1
-  %0 = load i32* %add.ptr, align 4, !tbaa !0
+  %0 = load i32* %add.ptr, align 4
 ; THUMB: ldr r{{[0-9]}}, [r0, #-4]
   ret i32 %0
 }
@@ -13,7 +13,7 @@ define i32 @t2(i32* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t2
   %add.ptr = getelementptr inbounds i32* %ptr, i32 -63
-  %0 = load i32* %add.ptr, align 4, !tbaa !0
+  %0 = load i32* %add.ptr, align 4
 ; THUMB: ldr r{{[0-9]}}, [r0, #-252]
   ret i32 %0
 }
@@ -22,7 +22,7 @@ define i32 @t3(i32* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t3
   %add.ptr = getelementptr inbounds i32* %ptr, i32 -64
-  %0 = load i32* %add.ptr, align 4, !tbaa !0
+  %0 = load i32* %add.ptr, align 4
 ; THUMB: ldr r{{[0-9]}}, [r0]
   ret i32 %0
 }
@@ -31,7 +31,7 @@ define zeroext i16 @t4(i16* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t4
   %add.ptr = getelementptr inbounds i16* %ptr, i32 -1
-  %0 = load i16* %add.ptr, align 2, !tbaa !3
+  %0 = load i16* %add.ptr, align 2
 ; THUMB: ldrh r{{[0-9]}}, [r0, #-2]
   ret i16 %0
 }
@@ -40,7 +40,7 @@ define zeroext i16 @t5(i16* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t5
   %add.ptr = getelementptr inbounds i16* %ptr, i32 -127
-  %0 = load i16* %add.ptr, align 2, !tbaa !3
+  %0 = load i16* %add.ptr, align 2
 ; THUMB: ldrh r{{[0-9]}}, [r0, #-254]
   ret i16 %0
 }
@@ -49,7 +49,7 @@ define zeroext i16 @t6(i16* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t6
   %add.ptr = getelementptr inbounds i16* %ptr, i32 -128
-  %0 = load i16* %add.ptr, align 2, !tbaa !3
+  %0 = load i16* %add.ptr, align 2
 ; THUMB: ldrh r{{[0-9]}}, [r0]
   ret i16 %0
 }
@@ -58,7 +58,7 @@ define zeroext i8 @t7(i8* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t7
   %add.ptr = getelementptr inbounds i8* %ptr, i32 -1
-  %0 = load i8* %add.ptr, align 1, !tbaa !1
+  %0 = load i8* %add.ptr, align 1
 ; THUMB: ldrb r{{[0-9]}}, [r0, #-1]
   ret i8 %0
 }
@@ -67,7 +67,7 @@ define zeroext i8 @t8(i8* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t8
   %add.ptr = getelementptr inbounds i8* %ptr, i32 -255
-  %0 = load i8* %add.ptr, align 1, !tbaa !1
+  %0 = load i8* %add.ptr, align 1
 ; THUMB: ldrb r{{[0-9]}}, [r0, #-255]
   ret i8 %0
 }
@@ -76,7 +76,7 @@ define zeroext i8 @t9(i8* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t9
   %add.ptr = getelementptr inbounds i8* %ptr, i32 -256
-  %0 = load i8* %add.ptr, align 1, !tbaa !1
+  %0 = load i8* %add.ptr, align 1
 ; THUMB: ldrb r{{[0-9]}}, [r0]
   ret i8 %0
 }
@@ -85,7 +85,7 @@ define void @t10(i32* nocapture %ptr) nounwind {
 entry:
 ; THUMB: t10
   %add.ptr = getelementptr inbounds i32* %ptr, i32 -1
-  store i32 0, i32* %add.ptr, align 4, !tbaa !0
+  store i32 0, i32* %add.ptr, align 4
 ; THUMB: str r{{[0-9]}}, [r0, #-4]
   ret void
 }
@@ -94,7 +94,7 @@ define void @t11(i32* nocapture %ptr) nounwind {
 entry:
 ; THUMB: t11
   %add.ptr = getelementptr inbounds i32* %ptr, i32 -63
-  store i32 0, i32* %add.ptr, align 4, !tbaa !0
+  store i32 0, i32* %add.ptr, align 4
 ; THUMB: str r{{[0-9]}}, [r0, #-252]
   ret void
 }
@@ -103,7 +103,7 @@ define void @t12(i32* nocapture %ptr) nounwind {
 entry:
 ; THUMB: t12
   %add.ptr = getelementptr inbounds i32* %ptr, i32 -64
-  store i32 0, i32* %add.ptr, align 4, !tbaa !0
+  store i32 0, i32* %add.ptr, align 4
 ; THUMB: str r{{[0-9]}}, [r0]
   ret void
 }
@@ -112,7 +112,7 @@ define void @t13(i16* nocapture %ptr) nounwind {
 entry:
 ; THUMB: t13
   %add.ptr = getelementptr inbounds i16* %ptr, i32 -1
-  store i16 0, i16* %add.ptr, align 2, !tbaa !3
+  store i16 0, i16* %add.ptr, align 2
 ; THUMB: strh r{{[0-9]}}, [r0, #-2]
   ret void
 }
@@ -121,7 +121,7 @@ define void @t14(i16* nocapture %ptr) nounwind {
 entry:
 ; THUMB: t14
   %add.ptr = getelementptr inbounds i16* %ptr, i32 -127
-  store i16 0, i16* %add.ptr, align 2, !tbaa !3
+  store i16 0, i16* %add.ptr, align 2
 ; THUMB: strh r{{[0-9]}}, [r0, #-254]
   ret void
 }
@@ -130,7 +130,7 @@ define void @t15(i16* nocapture %ptr) nounwind {
 entry:
 ; THUMB: t15
   %add.ptr = getelementptr inbounds i16* %ptr, i32 -128
-  store i16 0, i16* %add.ptr, align 2, !tbaa !3
+  store i16 0, i16* %add.ptr, align 2
 ; THUMB: strh r{{[0-9]}}, [r0]
   ret void
 }
@@ -139,7 +139,7 @@ define void @t16(i8* nocapture %ptr) nounwind {
 entry:
 ; THUMB: t16
   %add.ptr = getelementptr inbounds i8* %ptr, i32 -1
-  store i8 0, i8* %add.ptr, align 1, !tbaa !1
+  store i8 0, i8* %add.ptr, align 1
 ; THUMB: strb r{{[0-9]}}, [r0, #-1]
   ret void
 }
@@ -148,7 +148,7 @@ define void @t17(i8* nocapture %ptr) nounwind {
 entry:
 ; THUMB: t17
   %add.ptr = getelementptr inbounds i8* %ptr, i32 -255
-  store i8 0, i8* %add.ptr, align 1, !tbaa !1
+  store i8 0, i8* %add.ptr, align 1
 ; THUMB: strb r{{[0-9]}}, [r0, #-255]
   ret void
 }
@@ -157,12 +157,7 @@ define void @t18(i8* nocapture %ptr) nounwind {
 entry:
 ; THUMB: t18
   %add.ptr = getelementptr inbounds i8* %ptr, i32 -256
-  store i8 0, i8* %add.ptr, align 1, !tbaa !1
+  store i8 0, i8* %add.ptr, align 1
 ; THUMB: strb r{{[0-9]}}, [r0]
   ret void
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/ARM/fast-isel-pic.ll b/test/CodeGen/ARM/fast-isel-pic.ll
index ad0f159..838c103 100644
--- a/test/CodeGen/ARM/fast-isel-pic.ll
+++ b/test/CodeGen/ARM/fast-isel-pic.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=arm-apple-ios | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARMv7
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s --check-prefix=THUMB-ELF
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=armv7-none-linux-gnueabi | FileCheck %s --check-prefix=ARMv7-ELF
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=armv7-none-linux-gnueabi | FileCheck %s --check-prefix=ARMv7-ELF
 
 @g = global i32 0, align 4
 
@@ -25,6 +25,8 @@ entry:
 ; ARMv7: add  [[reg2]], pc, [[reg2]]
 ; ARMv7-ELF: LoadGV
 ; ARMv7-ELF: ldr r[[reg2:[0-9]+]],
+; ARMv7-ELF: .LPC
+; ARMv7-ELF-NEXT: add r[[reg2]], pc
 ; ARMv7-ELF: ldr r[[reg3:[0-9]+]],
 ; ARMv7-ELF: ldr r[[reg2]], [r[[reg3]], r[[reg2]]]
   %tmp = load i32* @g
@@ -54,6 +56,8 @@ entry:
 ; ARMv7: ldr  r[[reg5]], [r[[reg5]]]
 ; ARMv7-ELF: LoadIndirectSymbol
 ; ARMv7-ELF: ldr r[[reg5:[0-9]+]],
+; ARMv7-ELF: .LPC
+; ARMv7-ELF-NEXT: add r[[reg5]], pc
 ; ARMv7-ELF: ldr r[[reg6:[0-9]+]],
 ; ARMv7-ELF: ldr r[[reg5]], [r[[reg6]], r[[reg5]]]
   %tmp = load i32* @i
diff --git a/test/CodeGen/ARM/fast-isel-ret.ll b/test/CodeGen/ARM/fast-isel-ret.ll
index ba5412c..8a68309 100644
--- a/test/CodeGen/ARM/fast-isel-ret.ll
+++ b/test/CodeGen/ARM/fast-isel-ret.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s
 
 ; Sign-extend of i1 currently not supported by fast-isel
 ;define signext i1 @ret0(i1 signext %a) nounwind uwtable ssp {
diff --git a/test/CodeGen/ARM/fast-isel-select.ll b/test/CodeGen/ARM/fast-isel-select.ll
index bb88814..40f8807 100644
--- a/test/CodeGen/ARM/fast-isel-select.ll
+++ b/test/CodeGen/ARM/fast-isel-select.ll
@@ -1,6 +1,7 @@
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv8-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @t1(i1 %c) nounwind readnone {
 entry:
@@ -39,15 +40,16 @@ define i32 @t3(i1 %c, i32 %a, i32 %b) nounwind readnone {
 entry:
 ; ARM: t3
 ; ARM: cmp r0, #0
-; ARM: movne r{{[1-9]}}, r{{[1-9]}}
-; ARM: mov r0, r{{[1-9]}}
+; ARM: movne r2, r1
+; ARM: add r0, r2, r1
 ; THUMB: t3
 ; THUMB: cmp r0, #0
 ; THUMB: it ne
-; THUMB: movne r{{[1-9]}}, r{{[1-9]}}
-; THUMB: mov r0, r{{[1-9]}}
+; THUMB: movne r2, r1
+; THUMB: add.w r0, r2, r1
   %0 = select i1 %c, i32 %a, i32 %b
-  ret i32 %0
+  %1 = add i32 %0, %a
+  ret i32 %1
 }
 
 define i32 @t4(i1 %c) nounwind readnone {
diff --git a/test/CodeGen/ARM/fast-isel-shifter.ll b/test/CodeGen/ARM/fast-isel-shifter.ll
index dbb1ce2..eb4b2b2 100644
--- a/test/CodeGen/ARM/fast-isel-shifter.ll
+++ b/test/CodeGen/ARM/fast-isel-shifter.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
 
 define i32 @shl() nounwind ssp {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-static.ll b/test/CodeGen/ARM/fast-isel-static.ll
index 7d86cb9..93c14a0 100644
--- a/test/CodeGen/ARM/fast-isel-static.ll
+++ b/test/CodeGen/ARM/fast-isel-static.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=LONG %s
-; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=LONG %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=NORM %s
-; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=NORM %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=CHECK-LONG %s
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=CHECK-LONG %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=CHECK-NORM %s
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=CHECK-NORM %s
 
 define void @myadd(float* %sum, float* %addend) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 0cebc90..5981cab 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
 
 ; Very basic fast-isel functionality.
 define i32 @test0(i32 %a, i32 %b) nounwind {
diff --git a/test/CodeGen/ARM/fastisel-gep-promote-before-add.ll b/test/CodeGen/ARM/fastisel-gep-promote-before-add.ll
new file mode 100644
index 0000000..a32ab6d
--- /dev/null
+++ b/test/CodeGen/ARM/fastisel-gep-promote-before-add.ll
@@ -0,0 +1,18 @@
+; fastisel should not fold add with non-pointer bitwidth
+; sext(a) + sext(b) != sext(a + b)
+; RUN: llc -mtriple=armv7-apple-ios %s -O0 -o - | FileCheck %s
+
+define zeroext i8 @gep_promotion(i8* %ptr) nounwind uwtable ssp {
+entry:
+  %ptr.addr = alloca i8*, align 8
+  %add = add i8 64, 64 ; 0x40 + 0x40
+  %0 = load i8** %ptr.addr, align 8
+
+  ; CHECK-LABEL: _gep_promotion:
+  ; CHECK: ldrb {{r[0-9]+}}, {{\[r[0-9]+\]}}
+  %arrayidx = getelementptr inbounds i8* %0, i8 %add
+
+  %1 = load i8* %arrayidx, align 1
+  ret i8 %1
+}
+
diff --git a/test/CodeGen/ARM/fold-stack-adjust.ll b/test/CodeGen/ARM/fold-stack-adjust.ll
new file mode 100644
index 0000000..67fd129
--- /dev/null
+++ b/test/CodeGen/ARM/fold-stack-adjust.ll
@@ -0,0 +1,164 @@
+; RUN: llc -mtriple=thumbv7-apple-darwin-eabi < %s | FileCheck %s
+; RUN: llc -mtriple=thumbv6m-apple-darwin-eabi -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-T1
+; RUN: llc -mtriple=thumbv7-apple-darwin-ios -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-IOS
+
+
+declare void @bar(i8*)
+
+%bigVec = type [2 x double]
+
+@var = global %bigVec zeroinitializer
+
+define void @check_simple() minsize {
+; CHECK-LABEL: check_simple:
+; CHECK: push.w {r7, r8, r9, r10, r11, lr}
+; CHECK-NOT: sub sp, sp,
+; ...
+; CHECK-NOT: add sp, sp,
+; CHECK: pop.w {r0, r1, r2, r3, r11, pc}
+
+; CHECK-T1-LABEL: check_simple:
+; CHECK-T1: push {r3, r4, r5, r6, r7, lr}
+; CHECK-T1: add r7, sp, #16
+; CHECK-T1-NOT: sub sp, sp,
+; ...
+; CHECK-T1-NOT: add sp, sp,
+; CHECK-T1: pop {r0, r1, r2, r3, r7, pc}
+
+  ; iOS always has a frame pointer and messing with the push affects
+  ; how it's set in the prologue. Make sure we get that right.
+; CHECK-IOS-LABEL: check_simple:
+; CHECK-IOS: push {r3, r4, r5, r6, r7, lr}
+; CHECK-NOT: sub sp,
+; CHECK-IOS: add r7, sp, #16
+; CHECK-NOT: sub sp,
+; ...
+; CHECK-NOT: add sp,
+; CHEC: pop {r3, r4, r5, r6, r7, pc}
+
+  %var = alloca i8, i32 16
+  call void @bar(i8* %var)
+  ret void
+}
+
+define void @check_simple_too_big() minsize {
+; CHECK-LABEL: check_simple_too_big:
+; CHECK: push.w {r11, lr}
+; CHECK: sub sp,
+; ...
+; CHECK: add sp,
+; CHECK: pop.w {r11, pc}
+  %var = alloca i8, i32 64
+  call void @bar(i8* %var)
+  ret void
+}
+
+define void @check_vfp_fold() minsize {
+; CHECK-LABEL: check_vfp_fold:
+; CHECK: push {r[[GLOBREG:[0-9]+]], lr}
+; CHECK: vpush {d6, d7, d8, d9}
+; CHECK-NOT: sub sp,
+; ...
+; CHECK: vldmia r[[GLOBREG]], {d8, d9}
+; ...
+; CHECK-NOT: add sp,
+; CHECK: vpop {d6, d7, d8, d9}
+; CHECKL pop {r[[GLOBREG]], pc}
+
+  ; iOS uses aligned NEON stores here, which is convenient since we
+  ; want to make sure that works too.
+; CHECK-IOS-LABEL: check_vfp_fold:
+; CHECK-IOS: push {r0, r1, r2, r3, r4, r7, lr}
+; CHECK-IOS: sub.w r4, sp, #16
+; CHECK-IOS: bic r4, r4, #15
+; CHECK-IOS: mov sp, r4
+; CHECK-IOS: vst1.64 {d8, d9}, [r4:128]
+; ...
+; CHECK-IOS: add r4, sp, #16
+; CHECK-IOS: vld1.64 {d8, d9}, [r4:128]
+; CHECK-IOS: mov sp, r4
+; CHECK-IOS: pop {r4, r7, pc}
+
+  %var = alloca i8, i32 16
+
+  %tmp = load %bigVec* @var
+  call void @bar(i8* %var)
+  store %bigVec %tmp, %bigVec* @var
+
+  ret void
+}
+
+; This function should use just enough space that the "add sp, sp, ..." could be
+; folded in except that doing so would clobber the value being returned.
+define i64 @check_no_return_clobber() minsize {
+; CHECK-LABEL: check_no_return_clobber:
+; CHECK: push.w {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NOT: sub sp,
+; ...
+; CHECK: add sp, #40
+; CHECK: pop.w {r11, pc}
+
+  ; Just to keep iOS FileCheck within previous function:
+; CHECK-IOS-LABEL: check_no_return_clobber:
+
+  %var = alloca i8, i32 40
+  call void @bar(i8* %var)
+  ret i64 0
+}
+
+define arm_aapcs_vfpcc double @check_vfp_no_return_clobber() minsize {
+; CHECK-LABEL: check_vfp_no_return_clobber:
+; CHECK: push {r[[GLOBREG:[0-9]+]], lr}
+; CHECK: vpush {d0, d1, d2, d3, d4, d5, d6, d7, d8, d9}
+; CHECK-NOT: sub sp,
+; ...
+; CHECK: add sp, #64
+; CHECK: vpop {d8, d9}
+; CHECK: pop {r[[GLOBREG]], pc}
+
+  %var = alloca i8, i32 64
+
+  %tmp = load %bigVec* @var
+  call void @bar(i8* %var)
+  store %bigVec %tmp, %bigVec* @var
+
+  ret double 1.0
+}
+
+@dbl = global double 0.0
+
+; PR18136: there was a bug determining where the first eligible pop in a
+; basic-block was when the entire block was epilogue code.
+define void @test_fold_point(i1 %tst) minsize {
+; CHECK-LABEL: test_fold_point:
+
+  ; Important to check for beginning of basic block, because if it gets
+  ; if-converted the test is probably no longer checking what it should.
+; CHECK: {{LBB[0-9]+_2}}:
+; CHECK-NEXT: vpop {d7, d8}
+; CHECK-NEXT: pop {r4, pc}
+
+  ; With a guaranteed frame-pointer, we want to make sure that its offset in the
+  ; push block is correct, even if a few registers have been tacked onto a later
+  ; vpush (PR18160).
+; CHECK-IOS-LABEL: test_fold_point:
+; CHECK-IOS: push {r4, r7, lr}
+; CHECK-IOS-NEXT: add r7, sp, #4
+; CHECK-IOS-NEXT: vpush {d7, d8}
+
+  ; We want some memory so there's a stack adjustment to fold...
+  %var = alloca i8, i32 8
+
+  ; We want a long-lived floating register so that a callee-saved dN is used and
+  ; there's both a vpop and a pop.
+  %live_val = load double* @dbl
+  br i1 %tst, label %true, label %end
+true:
+  call void @bar(i8* %var)
+  store double %live_val, double* @dbl
+  br label %end
+end:
+  ; We want the epilogue to be the only thing in a basic block so that we hit
+  ; the correct edge-case (first inst in block is correct one to adjust).
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/ARM/ifconv-kills.ll b/test/CodeGen/ARM/ifconv-kills.ll
new file mode 100644
index 0000000..bf54ba2
--- /dev/null
+++ b/test/CodeGen/ARM/ifconv-kills.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march arm -mcpu swift -verify-machineinstrs
+
+declare i32 @f(i32 %p0, i32 %p1)
+
+define i32 @foo(i32* %ptr) {
+entry:
+  %cmp = icmp ne i32* %ptr, null
+  br i1 %cmp, label %if.then, label %if.else
+
+; present something which can be easily if-converted
+if.then:
+  ; %R0 should be killed here
+  %valt = load i32* %ptr, align 4
+  br label %return
+
+if.else:
+  ; %R0 should be killed here, however after if-conversion the %R0 kill
+  ; has to be removed because if.then will follow after this and still
+  ; read it.
+  %addr = getelementptr inbounds i32* %ptr, i32 4
+  %vale = load i32* %addr, align 4
+  br label %return
+
+return:
+  %phival = phi i32 [ %valt, %if.then ], [ %vale, %if.else ]
+  ; suggest to bring %phival/%valt/%vale into %R1 (because otherwise there
+  ; will be no kills in if.then/if.else)
+  %retval = call i32 @f (i32 0, i32 %phival)
+  ret i32 %retval
+}
diff --git a/test/CodeGen/ARM/ifconv-regmask.ll b/test/CodeGen/ARM/ifconv-regmask.ll
new file mode 100644
index 0000000..d45f65f
--- /dev/null
+++ b/test/CodeGen/ARM/ifconv-regmask.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=thumbv7s-apple-ios6.0.0 -verify-machineinstrs
+
+%union.opcode = type { i32 }
+
+@opcode = external global %union.opcode, align 4
+
+; Function Attrs: nounwind ssp
+define i32 @sfu() {
+entry:
+  %bf.load = load i32* getelementptr inbounds (%union.opcode* @opcode, i32 0, i32 0), align 4
+  %bf.lshr = lshr i32 %bf.load, 26
+  %bf.clear = and i32 %bf.lshr, 7
+  switch i32 %bf.clear, label %return [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb1
+  ]
+
+sw.bb:                                            ; preds = %entry
+  %call = tail call i32 @func0()
+  br label %return
+
+sw.bb1:                                           ; preds = %entry
+  %call2 = tail call i32 @func1()
+  br label %return
+
+return:                                           ; preds = %sw.bb1, %sw.bb, %entry
+  %retval.0 = phi i32 [ %call2, %sw.bb1 ], [ %call, %sw.bb ], [ -1, %entry ]
+  ret i32 %retval.0
+}
+
+; Function Attrs: nounwind ssp
+declare i32 @func0()
+
+; Function Attrs: nounwind ssp
+declare i32 @func1()
diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll
index 99e84a6..1aeeb91 100644
--- a/test/CodeGen/ARM/indirectbr.ll
+++ b/test/CodeGen/ARM/indirectbr.ll
@@ -1,6 +1,7 @@
 ; RUN: llc < %s -relocation-model=pic -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=ARM
 ; RUN: llc < %s -relocation-model=pic -mtriple=thumbv6-apple-darwin | FileCheck %s -check-prefix=THUMB
 ; RUN: llc < %s -relocation-model=static -mtriple=thumbv7-apple-darwin | FileCheck %s -check-prefix=THUMB2
+; RUN: llc < %s -relocation-model=static -mtriple=thumbv8-apple-darwin | FileCheck %s -check-prefix=THUMB2
 
 @nextaddr = global i8* null                       ; <i8**> [#uses=2]
 @C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1]
@@ -48,14 +49,17 @@ L2:                                               ; preds = %L3, %bb2
 
 L1:                                               ; preds = %L2, %bb2
   %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]
+; ARM-LABEL: %L1
 ; ARM: ldr [[R1:r[0-9]+]], LCPI
 ; ARM: add [[R1b:r[0-9]+]], pc, [[R1]]
 ; ARM: str [[R1b]]
+; THUMB-LABEL: %L1
 ; THUMB: ldr
 ; THUMB: add
 ; THUMB: ldr [[R2:r[0-9]+]], LCPI
 ; THUMB: add [[R2]], pc
 ; THUMB: str [[R2]]
+; THUMB2-LABEL: %L1
 ; THUMB2: ldr [[R2:r[0-9]+]], LCPI
 ; THUMB2-NEXT: str{{(.w)?}} [[R2]]
   store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
diff --git a/test/CodeGen/ARM/inlineasm-64bit.ll b/test/CodeGen/ARM/inlineasm-64bit.ll
index b23db10..683a0c4 100644
--- a/test/CodeGen/ARM/inlineasm-64bit.ll
+++ b/test/CodeGen/ARM/inlineasm-64bit.ll
@@ -85,3 +85,22 @@ define void @strd_test(i64* %p, i32 %lo, i32 %hi) nounwind {
   tail call void asm sideeffect "strd $0, ${0:H}, [$1]", "r,r"(i64 %4, i64* %p) nounwind
   ret void
 }
+
+; Make sure we don't untie operands by mistake.
+define i64 @tied_64bit_test(i64 %in) nounwind {
+; CHECK-LABEL: tied_64bit_test:
+; CHECK: OUT([[OUTREG:r[0-9]+]]), IN([[OUTREG]])
+  %addr = alloca i64
+  call void asm "OUT($0), IN($1)", "=*rm,0"(i64* %addr, i64 %in)
+  ret i64 %in
+}
+
+; If we explicitly name a tied operand, then the code should lookup the operand
+; we were tied to for information about register class and so on.
+define i64 @tied_64bit_lookback_test(i64 %in) nounwind {
+; CHECK-LABEL: tied_64bit_lookback_test:
+; CHECK: OUTLO([[LO:r[0-9]+]]) OUTHI([[HI:r[0-9]+]]) INLO([[LO]]) INHI([[HI]])
+  %vars = call {i64, i32, i64} asm "OUTLO(${2:Q}) OUTHI(${2:R}) INLO(${3:Q}) INHI(${3:R})", "=r,=r,=r,2"(i64 %in)
+  %res = extractvalue {i64, i32, i64} %vars, 2
+  ret i64 %res
+}
diff --git a/test/CodeGen/ARM/interrupt-attr.ll b/test/CodeGen/ARM/interrupt-attr.ll
new file mode 100644
index 0000000..217fd69
--- /dev/null
+++ b/test/CodeGen/ARM/interrupt-attr.ll
@@ -0,0 +1,130 @@
+; RUN: llc -mtriple=arm-none-none-eabi -mcpu=cortex-a15 -o - %s | FileCheck --check-prefix=CHECK-A %s
+; RUN: llc -mtriple=thumb-none-none-eabi -mcpu=cortex-a15 -o - %s | FileCheck --check-prefix=CHECK-A-THUMB %s
+; RUN: llc -mtriple=thumb-apple-darwin -mcpu=cortex-m3 -o - %s | FileCheck --check-prefix=CHECK-M %s
+
+declare arm_aapcscc void @bar()
+
+@bigvar = global [16 x i32] zeroinitializer
+
+define arm_aapcscc void @irq_fn() alignstack(8) "interrupt"="IRQ" {
+  ; Must save all registers except banked sp and lr (we save lr anyway because
+  ; we actually need it at the end to execute the return ourselves).
+
+  ; Also need special function return setting pc and CPSR simultaneously.
+; CHECK-A-LABEL: irq_fn:
+; CHECK-A: push {r0, r1, r2, r3, r11, lr}
+; CHECK-A: add r11, sp, #16
+; CHECK-A: sub sp, sp, #{{[0-9]+}}
+; CHECK-A: bic sp, sp, #7
+; CHECK-A: bl bar
+; CHECK-A: sub sp, r11, #16
+; CHECK-A: pop {r0, r1, r2, r3, r11, lr}
+; CHECK-A: subs pc, lr, #4
+
+; CHECK-A-THUMB-LABEL: irq_fn:
+; CHECK-A-THUMB: push {r0, r1, r2, r3, r4, r7, lr}
+; CHECK-A-THUMB: mov r4, sp
+; CHECK-A-THUMB: add r7, sp, #20
+; CHECK-A-THUMB: bic r4, r4, #7
+; CHECK-A-THUMB: bl bar
+; CHECK-A-THUMB: sub.w r4, r7,  #20
+; CHECK-A-THUMB: mov sp, r4
+; CHECK-A-THUMB: pop.w {r0, r1, r2, r3, r4, r7, lr}
+; CHECK-A-THUMB: subs pc, lr, #4
+
+  ; Normal AAPCS function (r0-r3 pushed onto stack by hardware, lr set to
+  ; appropriate sentinel so no special return needed).
+; CHECK-M: push {r4, r7, lr}
+; CHECK-M: add r7, sp, #4
+; CHECK-M: sub sp, #4
+; CHECK-M: mov r4, sp
+; CHECK-M: mov sp, r4
+; CHECK-M: blx _bar
+; CHECK-M: subs r4, r7, #4
+; CHECK-M: mov sp, r4
+; CHECK-M: pop {r4, r7, pc}
+
+  call arm_aapcscc void @bar()
+  ret void
+}
+
+define arm_aapcscc void @fiq_fn() alignstack(8) "interrupt"="FIQ" {
+; CHECK-A-LABEL: fiq_fn:
+; CHECK-A: push {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr}
+  ; 32 to get past r0, r1, ..., r7
+; CHECK-A: add r11, sp, #32
+; CHECK-A: sub sp, sp, #{{[0-9]+}}
+; CHECK-A: bic sp, sp, #7
+; [...]
+  ; 32 must match above
+; CHECK-A: sub sp, r11, #32
+; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr}
+; CHECK-A: subs pc, lr, #4
+
+  %val = load volatile [16 x i32]* @bigvar
+  store volatile [16 x i32] %val, [16 x i32]* @bigvar
+  ret void
+}
+
+define arm_aapcscc void @swi_fn() alignstack(8) "interrupt"="SWI" {
+; CHECK-A-LABEL: swi_fn:
+; CHECK-A: push {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-A: add r11, sp, #44
+; CHECK-A: sub sp, sp, #{{[0-9]+}}
+; CHECK-A: bic sp, sp, #7
+; [...]
+; CHECK-A: sub sp, r11, #44
+; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-A: subs pc, lr, #0
+
+  %val = load volatile [16 x i32]* @bigvar
+  store volatile [16 x i32] %val, [16 x i32]* @bigvar
+  ret void
+}
+
+define arm_aapcscc void @undef_fn() alignstack(8) "interrupt"="UNDEF" {
+; CHECK-A-LABEL: undef_fn:
+; CHECK-A: push {r0, r1, r2, r3, r11, lr}
+; CHECK-A: add r11, sp, #16
+; CHECK-A: sub sp, sp, #{{[0-9]+}}
+; CHECK-A: bic sp, sp, #7
+; [...]
+; CHECK-A: sub sp, r11, #16
+; CHECK-A: pop {r0, r1, r2, r3, r11, lr}
+; CHECK-A: subs pc, lr, #0
+
+  call void @bar()
+  ret void
+}
+
+define arm_aapcscc void @abort_fn() alignstack(8) "interrupt"="ABORT" {
+; CHECK-A-LABEL: abort_fn:
+; CHECK-A: push {r0, r1, r2, r3, r11, lr}
+; CHECK-A: add r11, sp, #16
+; CHECK-A: sub sp, sp, #{{[0-9]+}}
+; CHECK-A: bic sp, sp, #7
+; [...]
+; CHECK-A: sub sp, r11, #16
+; CHECK-A: pop {r0, r1, r2, r3, r11, lr}
+; CHECK-A: subs pc, lr, #4
+
+  call void @bar()
+  ret void
+}
+
+@var = global double 0.0
+
+; We don't save VFP regs, since it would be a massive overhead in the general
+; case.
+define arm_aapcscc void @floating_fn() alignstack(8) "interrupt"="IRQ" {
+; CHECK-A-LABEL: floating_fn:
+; CHECK-A-NOT: vpush
+; CHECK-A-NOT: vstr
+; CHECK-A-NOT: vstm
+; CHECK-A: vadd.f64 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  %lhs = load volatile double* @var
+  %rhs = load volatile double* @var
+  %sum = fadd double %lhs, %rhs
+  store double %sum, double* @var
+  ret void
+}
diff --git a/test/CodeGen/ARM/intrinsics-crypto.ll b/test/CodeGen/ARM/intrinsics-crypto.ll
new file mode 100644
index 0000000..c038fe6
--- /dev/null
+++ b/test/CodeGen/ARM/intrinsics-crypto.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=armv8 -mattr=+crypto | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @test_aesde(<16 x i8>* %a, <16 x i8> *%b) {
+  %tmp = load <16 x i8>* %a
+  %tmp2 = load <16 x i8>* %b
+  %tmp3 = call <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8> %tmp, <16 x i8> %tmp2)
+  ; CHECK: aesd.8 q{{[0-9]+}}, q{{[0-9]+}}
+  %tmp4 = call <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8> %tmp3, <16 x i8> %tmp2)
+  ; CHECK: aese.8 q{{[0-9]+}}, q{{[0-9]+}}
+  %tmp5 = call <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8> %tmp4)
+  ; CHECK: aesimc.8 q{{[0-9]+}}, q{{[0-9]+}}
+  %tmp6 = call <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8> %tmp5)
+  ; CHECK: aesmc.8 q{{[0-9]+}}, q{{[0-9]+}}
+  ret <16 x i8> %tmp6
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_sha(<4 x i32> *%a, <4 x i32> *%b, <4 x i32> *%c) {
+  %tmp = load <4 x i32>* %a
+  %tmp2 = load <4 x i32>* %b
+  %tmp3 = load <4 x i32>* %c
+  %res1 = call <4 x i32> @llvm.arm.neon.sha1h.v4i32(<4 x i32> %tmp)
+  ; CHECK: sha1h.32 q{{[0-9]+}}, q{{[0-9]+}}
+  %res2 = call <4 x i32> @llvm.arm.neon.sha1c.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3, <4 x i32> %res1)
+  ; CHECK: sha1c.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %res3 = call <4 x i32> @llvm.arm.neon.sha1m.v4i32(<4 x i32> %res2, <4 x i32> %tmp3, <4 x i32> %res1)
+  ; CHECK: sha1m.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %res4 = call <4 x i32> @llvm.arm.neon.sha1p.v4i32(<4 x i32> %res3, <4 x i32> %tmp3, <4 x i32> %res1)
+  ; CHECK: sha1p.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %res5 = call <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32> %res4, <4 x i32> %tmp3, <4 x i32> %res1)
+  ; CHECK: sha1su0.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %res6 = call <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32> %res5, <4 x i32> %res1)
+  ; CHECK: sha1su1.32 q{{[0-9]+}}, q{{[0-9]+}}
+  %res7 = call <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32> %res6, <4 x i32> %tmp3, <4 x i32> %res1)
+  ; CHECK: sha256h.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %res8 = call <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32> %res7, <4 x i32> %tmp3, <4 x i32> %res1)
+  ; CHECK: sha256h2.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %res9 = call <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32> %res8, <4 x i32> %tmp3, <4 x i32> %res1)
+  ; CHECK: sha256su1.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %res10 = call <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32> %res9, <4 x i32> %tmp3)
+  ; CHECK: sha256su0.32 q{{[0-9]+}}, q{{[0-9]+}}
+  ret <4 x i32> %res10
+}
+
+declare <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8>)
+declare <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8>)
+declare <4 x i32> @llvm.arm.neon.sha1h.v4i32(<4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha1c.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha1m.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha1p.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32>, <4 x i32>)
diff --git a/test/CodeGen/ARM/intrinsics-v8.ll b/test/CodeGen/ARM/intrinsics-v8.ll
new file mode 100644
index 0000000..247bfc1
--- /dev/null
+++ b/test/CodeGen/ARM/intrinsics-v8.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=armv8 -mattr=+db | FileCheck %s
+
+define void @test() {
+  ; CHECK: dmb sy
+  call void @llvm.arm.dmb(i32 15)
+  ; CHECK: dmb osh
+  call void @llvm.arm.dmb(i32 3)
+  ; CHECK: dsb sy
+  call void @llvm.arm.dsb(i32 15)
+  ; CHECK: dsb ishld
+  call void @llvm.arm.dsb(i32 9)
+  ; CHECK: sevl
+  tail call void @llvm.arm.sevl() nounwind
+  ret void
+}
+
+declare void @llvm.arm.dmb(i32)
+declare void @llvm.arm.dsb(i32)
+declare void @llvm.arm.sevl() nounwind
diff --git a/test/CodeGen/ARM/lit.local.cfg b/test/CodeGen/ARM/lit.local.cfg
index 4d75f58..8a3ba96 100644
--- a/test/CodeGen/ARM/lit.local.cfg
+++ b/test/CodeGen/ARM/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.test']
-
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll
index a99a7ec..3e986d80 100644
--- a/test/CodeGen/ARM/long_shift.ll
+++ b/test/CodeGen/ARM/long_shift.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm | FileCheck %s
 
 define i64 @f0(i64 %A, i64 %B) {
-; CHECK: f0
+; CHECK-LABEL: f0:
 ; CHECK:      lsrs    r3, r3, #1
 ; CHECK-NEXT: rrx     r2, r2
 ; CHECK-NEXT: subs    r0, r0, r2
@@ -13,7 +13,7 @@ define i64 @f0(i64 %A, i64 %B) {
 }
 
 define i32 @f1(i64 %x, i64 %y) {
-; CHECK: f1
+; CHECK-LABEL: f1:
 ; CHECK: lsl{{.*}}r2
 	%a = shl i64 %x, %y
 	%b = trunc i64 %a to i32
@@ -21,7 +21,7 @@ define i32 @f1(i64 %x, i64 %y) {
 }
 
 define i32 @f2(i64 %x, i64 %y) {
-; CHECK: f2
+; CHECK-LABEL: f2:
 ; CHECK:      lsr{{.*}}r2
 ; CHECK-NEXT: rsb     r3, r2, #32
 ; CHECK-NEXT: sub     r2, r2, #32
@@ -34,7 +34,7 @@ define i32 @f2(i64 %x, i64 %y) {
 }
 
 define i32 @f3(i64 %x, i64 %y) {
-; CHECK: f3
+; CHECK-LABEL: f3:
 ; CHECK:      lsr{{.*}}r2
 ; CHECK-NEXT: rsb     r3, r2, #32
 ; CHECK-NEXT: sub     r2, r2, #32
diff --git a/test/CodeGen/ARM/misched-copy-arm.ll b/test/CodeGen/ARM/misched-copy-arm.ll
index c274545..5da335f 100644
--- a/test/CodeGen/ARM/misched-copy-arm.ll
+++ b/test/CodeGen/ARM/misched-copy-arm.ll
@@ -65,7 +65,7 @@ if.end28:                                         ; preds = %if.then24, %while.c
   %dst.1 = phi %struct.rtx_def* [ undef, %if.then24 ], [ %dst.0, %while.cond ], [ %dst.0, %while.cond ]
   %arrayidx30 = getelementptr inbounds %struct.rtx_def* %dst.1, i32 0, i32 1, i32 0
   %rtx31 = bitcast %union.rtunion_def* %arrayidx30 to %struct.rtx_def**
-  %0 = load %struct.rtx_def** %rtx31, align 4, !tbaa !0
+  %0 = load %struct.rtx_def** %rtx31, align 4
   br label %while.cond
 
 if.then46:                                        ; preds = %while.cond
@@ -77,7 +77,3 @@ if.end47:                                         ; preds = %while.cond
 }
 
 attributes #0 = { nounwind ssp }
-
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/neon-spfp.ll b/test/CodeGen/ARM/neon-spfp.ll
index 5385668..dd2e67f 100644
--- a/test/CodeGen/ARM/neon-spfp.ll
+++ b/test/CodeGen/ARM/neon-spfp.ll
@@ -1,20 +1,20 @@
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 | FileCheck %s -check-prefix=LINUXA5
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=LINUXA8
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 | FileCheck %s -check-prefix=LINUXA9
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 | FileCheck %s -check-prefix=LINUXA15
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift | FileCheck %s -check-prefix=LINUXSWIFT
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 | FileCheck %s -check-prefix=CHECK-LINUXA5
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK-LINUXA8
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 | FileCheck %s -check-prefix=CHECK-LINUXA9
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 | FileCheck %s -check-prefix=CHECK-LINUXA15
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift | FileCheck %s -check-prefix=CHECK-LINUXSWIFT
 
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA5
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA8
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA9
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA15
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFESWIFT
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK-UNSAFEA5
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK-UNSAFEA8
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK-UNSAFEA9
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK-UNSAFEA15
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift --enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK-UNSAFESWIFT
 
-; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a5 | FileCheck %s -check-prefix=DARWINA5
-; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=DARWINA8
-; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=DARWINA9
-; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a15 | FileCheck %s -check-prefix=DARWINA15
-; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=swift | FileCheck %s -check-prefix=DARWINSWIFT
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a5 | FileCheck %s -check-prefix=CHECK-DARWINA5
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK-DARWINA8
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=CHECK-DARWINA9
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a15 | FileCheck %s -check-prefix=CHECK-DARWINA15
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=swift | FileCheck %s -check-prefix=CHECK-DARWINSWIFT
 
 ; This test makes sure we're not lowering VMUL.f32 D* (aka. NEON) for single-prec. FP ops, since
 ; NEON is not fully IEEE 754 compliant, unless unsafe-math is selected.
diff --git a/test/CodeGen/ARM/neon_spill.ll b/test/CodeGen/ARM/neon_spill.ll
index 277bd05..d286d16 100644
--- a/test/CodeGen/ARM/neon_spill.ll
+++ b/test/CodeGen/ARM/neon_spill.ll
@@ -24,7 +24,7 @@ declare arm_aapcs_vfpcc %2** @func4()
 define arm_aapcs_vfpcc void @foo(%3* nocapture) nounwind align 2 {
   call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
   %2 = call arm_aapcs_vfpcc  %0** @func2() nounwind
-  %3 = load %0** %2, align 4, !tbaa !0
+  %3 = load %0** %2, align 4
   store float 0.000000e+00, float* undef, align 4
   %4 = call arm_aapcs_vfpcc  %2* @func3(%2* undef, %2* undef, i32 2956) nounwind
   call arm_aapcs_vfpcc  void @func1(%0* %3, float* undef, float* undef, %2* undef)
@@ -35,11 +35,11 @@ define arm_aapcs_vfpcc void @foo(%3* nocapture) nounwind align 2 {
   %6 = call arm_aapcs_vfpcc  %2** @func4() nounwind
   %7 = call arm_aapcs_vfpcc  %2* @func3(%2* undef, %2* undef, i32 2971) nounwind
   %8 = fadd float undef, -1.000000e+05
-  store float %8, float* undef, align 16, !tbaa !3
+  store float %8, float* undef, align 16
   %9 = call arm_aapcs_vfpcc  i32 @rand() nounwind
   %10 = fmul float undef, 2.000000e+05
   %11 = fadd float %10, -1.000000e+05
-  store float %11, float* undef, align 4, !tbaa !3
+  store float %11, float* undef, align 4
   call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
   ret void
 }
@@ -47,8 +47,3 @@ define arm_aapcs_vfpcc void @foo(%3* nocapture) nounwind align 2 {
 declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
 
 declare arm_aapcs_vfpcc i32 @rand()
-
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"float", metadata !1}
diff --git a/test/CodeGen/ARM/no-fpu.ll b/test/CodeGen/ARM/no-fpu.ll
new file mode 100644
index 0000000..fff4bcc
--- /dev/null
+++ b/test/CodeGen/ARM/no-fpu.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=armv7-none-gnueabi -mattr=-neon,-vfp2 | FileCheck --check-prefix=NONEON-NOVFP %s
+; RUN: llc < %s -mtriple=armv7-none-gnueabi -mattr=-neon | FileCheck --check-prefix=NONEON %s
+; RUN: llc < %s -mtriple=armv7-none-gnueabi -mattr=-vfp2 | FileCheck --check-prefix=NOVFP %s
+; RUN: llc < %s -mtriple=armv7-none-gnueabi -mattr=-neon,+vfp2 | FileCheck --check-prefix=NONEON-VFP %s
+
+; Check no NEON instructions are selected when feature is disabled.
+define void @neonop(i64* nocapture readonly %a, i64* nocapture %b) #0 {
+  %1 = bitcast i64* %a to <2 x i64>*
+  %wide.load = load <2 x i64>* %1, align 8
+  ; NONEON-NOVFP-NOT: vld1.64
+  ; NONEON-NOT: vld1.64
+  %add = add <2 x i64> %wide.load, %wide.load
+  ; NONEON-NOVFP-NOT: vadd.i64
+  ; NONEON-NOT: vadd.i64
+  %2 = bitcast i64* %b to <2 x i64>*
+  store <2 x i64> %add, <2 x i64>* %2, align 8
+  ; NONEON-NOVFP-NOT: vst1.64
+  ; NONEON-NOT: vst1.64
+  ret void
+}
+
+; Likewise with VFP instructions.
+define double @fpmult(double %a, double %b) {
+  %res = fmul double %a, %b
+  ; NONEON-NOVFP-NOT: vmov
+  ; NONEON-NOVFP-NOT: vmul.f64
+  ; NOVFP-NOT: vmov
+  ; NOVFP-NOT: vmul.f64
+  ; NONEON-VFP: vmov
+  ; NONEON-VFP: vmul.f64
+  ret double %res
+}
+
diff --git a/test/CodeGen/ARM/noreturn.ll b/test/CodeGen/ARM/noreturn.ll
new file mode 100644
index 0000000..4c876ce
--- /dev/null
+++ b/test/CodeGen/ARM/noreturn.ll
@@ -0,0 +1,50 @@
+; RUN: llc -O3 -o - %s | FileCheck %s
+; Test case from PR16882.
+target triple = "thumbv7s-apple-ios"
+
+define i32 @test1() {
+; CHECK-LABEL: @test1
+; CHECK-NOT: push
+entry:
+  tail call void @overflow() #0
+  unreachable
+}
+
+; Function Attrs: noreturn nounwind
+declare void @overflow() #0
+
+define i32 @test2(i32 %x, i32 %y) {
+; CHECK-LABEL: @test2
+; CHECK-NOT: push
+; CHECK-NOT: pop
+entry:
+  %conv = sext i32 %x to i64
+  %conv1 = sext i32 %y to i64
+  %mul = mul nsw i64 %conv1, %conv
+  %conv2 = trunc i64 %mul to i32
+  %conv3 = sext i32 %conv2 to i64
+  %cmp = icmp eq i64 %mul, %conv3
+  br i1 %cmp, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  tail call void @overflow() #0
+  unreachable
+
+if.end:                                           ; preds = %entry
+  ret i32 %conv2
+}
+
+; Test case for PR17825.
+define i32 @test3() {
+; CHECK-LABEL: @test3
+; CHECK: push
+entry:
+  tail call void @overflow_with_unwind() #1
+  unreachable
+}
+
+; Function Attrs: noreturn
+declare void @overflow_with_unwind() #1
+
+attributes #0 = { noreturn nounwind }
+attributes #1 = { noreturn }
diff --git a/test/CodeGen/ARM/optselect-regclass.ll b/test/CodeGen/ARM/optselect-regclass.ll
new file mode 100644
index 0000000..1aa4520
--- /dev/null
+++ b/test/CodeGen/ARM/optselect-regclass.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=arm -mcpu=swift -verify-machineinstrs
+%union.opcode.0.2.5.8.15.28 = type { i32 }
+
+@opcode = external global %union.opcode.0.2.5.8.15.28, align 4
+@operands = external hidden global [50 x i8], align 4
+@.str86 = external hidden unnamed_addr constant [13 x i8], align 1
+
+; Function Attrs: nounwind ssp
+define void @xfr() {
+entry:
+  %bf.load4 = load i32* getelementptr inbounds (%union.opcode.0.2.5.8.15.28* @opcode, i32 0, i32 0), align 4
+  %bf.clear10 = and i32 %bf.load4, 65535
+  %and11 = and i32 %bf.load4, 32768
+  %tobool12 = icmp ne i32 %and11, 0
+  %cond13 = select i1 %tobool12, i32 1073676288, i32 0
+  %or = or i32 %cond13, %bf.clear10
+  %shl = shl nuw i32 %or, 2
+  %add = add i32 0, %shl
+  tail call void (i8*, i32, i32, i8*, ...)* @__sprintf_chk(i8* getelementptr inbounds ([50 x i8]* @operands, i32 0, i32 0), i32 0, i32 50, i8* getelementptr inbounds ([13 x i8]* @.str86, i32 0, i32 0), i32 undef, i32 undef, i32 %add)
+  ret void
+}
+
+declare void @__sprintf_chk(i8*, i32, i32, i8*, ...)
diff --git a/test/CodeGen/ARM/pic.ll b/test/CodeGen/ARM/pic.ll
new file mode 100644
index 0000000..9fc7a63
--- /dev/null
+++ b/test/CodeGen/ARM/pic.ll
@@ -0,0 +1,23 @@
+; Check the function call in PIC relocation model.
+
+; If the relocation model is PIC, then the "bl" instruction for the function
+; call to the external function should come with PLT fixup type.
+
+; RUN:  llc < %s -mtriple=armv7-unknown-linux-gnueabi \
+; RUN:           -relocation-model=pic -fast-isel -verify-machineinstrs \
+; RUN:    | FileCheck %s
+
+define void @test() {
+entry:
+
+  %0 = call i32 @get()
+; CHECK: bl get(PLT)
+
+  call void @put(i32 %0)
+; CHECK: bl put(PLT)
+
+  ret void
+}
+
+declare i32 @get()
+declare void @put(i32)
diff --git a/test/CodeGen/ARM/prefetch-thumb.ll b/test/CodeGen/ARM/prefetch-thumb.ll
new file mode 100644
index 0000000..e6f6ae8
--- /dev/null
+++ b/test/CodeGen/ARM/prefetch-thumb.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=thumb -mattr=+v7         | FileCheck %s -check-prefix=THUMB2
+; TODO: This test case will be merged back into prefetch.ll when ARM mode issue is solved.
+
+declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind
+
+define void @t6() {
+entry:
+;ARM: t6:
+;ARM: pld [sp]
+;ARM: pld [sp, #50]
+
+;THUMB2: t6:
+;THUMB2: pld [sp]
+;THUMB2: pld [sp, #50]
+
+%red = alloca [100 x i8], align 1
+%0 = getelementptr inbounds [100 x i8]* %red, i32 0, i32 0
+%1 = getelementptr inbounds [100 x i8]* %red, i32 0, i32 50
+call void @llvm.prefetch(i8* %0, i32 0, i32 3, i32 1)
+call void @llvm.prefetch(i8* %1, i32 0, i32 3, i32 1)
+ret void
+}
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index 3fe2bb8..25484f4 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -239,10 +239,9 @@ bb14:                                             ; preds = %bb6
 ; PR7157
 define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
 ; CHECK-LABEL:        t9:
-; CHECK:        vldr
-; CHECK-NOT:    vmov d{{.*}}, d16
-; CHECK:        vmov.i32 d17
+; CHECK: vmov.i32 d16, #0x0
 ; CHECK-NEXT:   vst1.64 {d16, d17}, [r0:128]
+; CHECK-NEXT:   vorr d17, d16, d16
 ; CHECK-NEXT:   vst1.64 {d16, d17}, [r0:128]
   %3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2]
   %4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/ARM/select-imm.ll b/test/CodeGen/ARM/select-imm.ll
index 5e7506a..6f4bfb8 100644
--- a/test/CodeGen/ARM/select-imm.ll
+++ b/test/CodeGen/ARM/select-imm.ll
@@ -11,7 +11,7 @@ entry:
 
 ; ARMT2-LABEL: t1:
 ; ARMT2: movw [[R:r[0-1]]], #357
-; ARMT2: movgt [[R]], #123
+; ARMT2: movwgt [[R]], #123
 
 ; THUMB2-LABEL: t1:
 ; THUMB2: movw [[R:r[0-1]]], #357
@@ -25,9 +25,9 @@ entry:
 define i32 @t2(i32 %c) nounwind readnone {
 entry:
 ; ARM-LABEL: t2:
-; ARM: mov [[R:r[0-1]]], #123
-; ARM: movgt [[R]], #101
-; ARM: orrgt [[R]], [[R]], #256
+; ARM: mov [[R:r[0-9]+]], #101
+; ARM: orr [[R]], [[R]], #256
+; ARM: movle [[R]], #123
 
 ; ARMT2-LABEL: t2:
 ; ARMT2: mov [[R:r[0-1]]], #123
@@ -50,7 +50,7 @@ entry:
 
 ; ARMT2-LABEL: t3:
 ; ARMT2: mov [[R:r[0-1]]], #0
-; ARMT2: moveq [[R]], #1
+; ARMT2: movweq [[R]], #1
 
 ; THUMB2-LABEL: t3:
 ; THUMB2: mov{{(s|\.w)}} [[R:r[0-1]]], #0
diff --git a/test/CodeGen/ARM/select-undef.ll b/test/CodeGen/ARM/select-undef.ll
new file mode 100644
index 0000000..23f7eb8
--- /dev/null
+++ b/test/CodeGen/ARM/select-undef.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=arm -mcpu=swift -verify-machineinstrs
+define i32 @func(i32 %arg0, i32 %arg1) {
+entry:
+  %cmp = icmp slt i32 %arg0, 10
+  %v = select i1 %cmp, i32 undef, i32 %arg1
+  ret i32 %v
+}
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index d5c3a27..ed006d6 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -59,7 +59,7 @@ entry:
 define double @f7(double %a, double %b) {
 ;CHECK-LABEL: f7:
 ;CHECK: movlt
-;CHECK: movlt
+;CHECK: movge
 ;CHECK-VFP-LABEL: f7:
 ;CHECK-VFP: vmovmi
     %tmp = fcmp olt double %a, 1.234e+00
@@ -75,7 +75,7 @@ define double @f7(double %a, double %b) {
 ; into the constant pool based on the value of the "icmp". If we have one "it"
 ; block generated, odds are good that we have close to the ideal code for this:
 ;
-; CHECK-NEON:      _f8:
+; CHECK-NEON-LABEL: f8:
 ; CHECK-NEON:      movw    [[R3:r[0-9]+]], #1123
 ; CHECK-NEON:      adr     [[R2:r[0-9]+]], LCPI7_0
 ; CHECK-NEON-NEXT: cmp     r0, [[R3]]
@@ -113,7 +113,7 @@ entry:
   ret void
 }
 
-; CHECK: f10
+; CHECK-LABEL: f10:
 define float @f10(i32 %a, i32 %b) nounwind uwtable readnone ssp {
 ; CHECK-NOT: floatsisf
   %1 = icmp eq i32 %a, %b
@@ -122,7 +122,7 @@ define float @f10(i32 %a, i32 %b) nounwind uwtable readnone ssp {
   ret float %3
 }
 
-; CHECK: f11
+; CHECK-LABEL: f11:
 define float @f11(i32 %a, i32 %b) nounwind uwtable readnone ssp {
 ; CHECK-NOT: floatsisf
   %1 = icmp eq i32 %a, %b
@@ -130,7 +130,7 @@ define float @f11(i32 %a, i32 %b) nounwind uwtable readnone ssp {
   ret float %2
 }
 
-; CHECK: f12
+; CHECK-LABEL: f12:
 define float @f12(i32 %a, i32 %b) nounwind uwtable readnone ssp {
 ; CHECK-NOT: floatunsisf
   %1 = icmp eq i32 %a, %b
diff --git a/test/CodeGen/ARM/setcc-sentinals.ll b/test/CodeGen/ARM/setcc-sentinals.ll
new file mode 100644
index 0000000..8878f9b
--- /dev/null
+++ b/test/CodeGen/ARM/setcc-sentinals.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mcpu=cortex-a8 -march=arm -asm-verbose=false | FileCheck %s
+
+define zeroext i1 @test0(i32 %x) nounwind {
+; CHECK-LABEL: test0:
+; CHECK-NEXT: add [[REG:(r[0-9]+)|(lr)]], r0, #1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: cmp [[REG]], #1
+; CHECK-NEXT: movwhi r0, #1
+; CHECK-NEXT: bx  lr
+  %cmp1 = icmp ne i32 %x, -1
+  %not.cmp = icmp ne i32 %x, 0
+  %.cmp1 = and i1 %cmp1, %not.cmp
+  ret i1 %.cmp1
+}
diff --git a/test/CodeGen/ARM/sincos.ll b/test/CodeGen/ARM/sincos.ll
new file mode 100644
index 0000000..30b2664
--- /dev/null
+++ b/test/CodeGen/ARM/sincos.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios6 -mcpu=cortex-a8 | FileCheck %s --check-prefix=NOOPT
+; RUN: llc < %s -mtriple=armv7-apple-ios7 -mcpu=cortex-a8 | FileCheck %s --check-prefix=SINCOS
+
+; Combine sin / cos into a single call.
+; rdar://12856873
+
+define float @test1(float %x) nounwind {
+entry:
+; SINCOS-LABEL: test1:
+; SINCOS: bl ___sincosf_stret
+
+; NOOPT-LABEL: test1:
+; NOOPT: bl _sinf
+; NOOPT: bl _cosf
+  %call = tail call float @sinf(float %x) nounwind readnone
+  %call1 = tail call float @cosf(float %x) nounwind readnone
+  %add = fadd float %call, %call1
+  ret float %add
+}
+
+define double @test2(double %x) nounwind {
+entry:
+; SINCOS-LABEL: test2:
+; SINCOS: bl ___sincos_stret
+
+; NOOPT-LABEL: test2:
+; NOOPT: bl _sin
+; NOOPT: bl _cos
+  %call = tail call double @sin(double %x) nounwind readnone
+  %call1 = tail call double @cos(double %x) nounwind readnone
+  %add = fadd double %call, %call1
+  ret double %add
+}
+
+declare float  @sinf(float) readonly
+declare double @sin(double) readonly
+declare float @cosf(float) readonly
+declare double @cos(double) readonly
diff --git a/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll b/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll
new file mode 100644
index 0000000..f5cda14
--- /dev/null
+++ b/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll
@@ -0,0 +1,32 @@
+; RUN: llc -O3 -mcpu=swift -mtriple=armv7s-apple-ios6.0.0 %s -o /dev/null
+; rdar://14811848
+
+; Make sure that we do not emit the BMOVPCB_CALL instruction for now or if we
+; fix the assumptions in its implementation that we do not crash when doing it.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "armv7s-apple-ios6.0.0"
+
+@main.title = private unnamed_addr constant [15 x i8] c"foo and stuff\0A\00", align 1
+@.str = private unnamed_addr constant [3 x i8] c"%s\00", align 1
+
+; Function Attrs: nounwind optsize ssp
+define i32 @main() #0 {
+entry:
+  %title = alloca [15 x i8], align 1
+  %0 = getelementptr inbounds [15 x i8]* %title, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* getelementptr inbounds ([15 x i8]* @main.title, i32 0, i32 0), i32 15, i32 1, i1 false)
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i8* %0) #3
+  ret i32 0
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
+
+; Function Attrs: nounwind optsize
+declare i32 @printf(i8* nocapture readonly, ...) #2
+
+attributes #0 = { nounwind optsize ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind optsize }
diff --git a/test/CodeGen/ARM/struct-byval-frame-index.ll b/test/CodeGen/ARM/struct-byval-frame-index.ll
index ae68ce5..465ee12 100644
--- a/test/CodeGen/ARM/struct-byval-frame-index.ll
+++ b/test/CodeGen/ARM/struct-byval-frame-index.ll
@@ -72,10 +72,10 @@ declare void @SetMotionVectorsMB(%structK* nocapture, i32) #1
 ; Function Attrs: nounwind
 define void @set_stored_macroblock_parameters() #1 {
 entry:
-  %0 = load %structB** @img, align 4, !tbaa !0
-  %1 = load i32* undef, align 4, !tbaa !3
+  %0 = load %structB** @img, align 4
+  %1 = load i32* undef, align 4
   %mb_data = getelementptr inbounds %structB* %0, i32 0, i32 61
-  %2 = load %structK** %mb_data, align 4, !tbaa !0
+  %2 = load %structK** %mb_data, align 4
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
@@ -109,7 +109,7 @@ for.body119:                                      ; preds = %for.body119, %for.c
   br i1 undef, label %for.body119, label %if.end164
 
 if.end164:                                        ; preds = %for.body119, %for.cond47.preheader, %if.end43
-  store i32*** null, i32**** @cofDC, align 4, !tbaa !0
+  store i32*** null, i32**** @cofDC, align 4
   %mb_type = getelementptr inbounds %structK* %2, i32 %1, i32 8
   br i1 undef, label %if.end230, label %if.then169
 
@@ -134,7 +134,7 @@ if.then233:                                       ; preds = %if.end230
 
 if.end236:                                        ; preds = %if.end230
   %cmp242 = icmp ne i16 undef, 8
-  %4 = load i32* @luma_transform_size_8x8_flag, align 4, !tbaa !3
+  %4 = load i32* @luma_transform_size_8x8_flag, align 4
   %tobool245 = icmp ne i32 %4, 0
   %or.cond812 = or i1 %cmp242, %tobool245
   br i1 %or.cond812, label %if.end249, label %land.lhs.true246
@@ -150,11 +150,11 @@ if.then248:                                       ; preds = %land.lhs.true246
   br label %if.end249
 
 if.end249:                                        ; preds = %if.then248, %land.lhs.true246, %if.end236
-  %5 = load i32* @luma_transform_size_8x8_flag, align 4, !tbaa !3
-  %6 = load %structA** @rdopt, align 4, !tbaa !0
+  %5 = load i32* @luma_transform_size_8x8_flag, align 4
+  %6 = load %structA** @rdopt, align 4
   %luma_transform_size_8x8_flag264 = getelementptr inbounds %structA* %6, i32 0, i32 21
-  store i32 %5, i32* %luma_transform_size_8x8_flag264, align 4, !tbaa !3
-  %7 = load i32* undef, align 4, !tbaa !3
+  store i32 %5, i32* %luma_transform_size_8x8_flag264, align 4
+  %7 = load i32* undef, align 4
   %add281 = add nsw i32 %7, 0
   br label %for.body285
 
@@ -162,36 +162,36 @@ for.body285:                                      ; preds = %for.inc503, %if.end
   %8 = phi %structB* [ undef, %if.end249 ], [ %.pre1155, %for.inc503 ]
   %i.21103 = phi i32 [ 0, %if.end249 ], [ %inc504, %for.inc503 ]
   %block_x286 = getelementptr inbounds %structB* %8, i32 0, i32 37
-  %9 = load i32* %block_x286, align 4, !tbaa !3
+  %9 = load i32* %block_x286, align 4
   %add287 = add nsw i32 %9, %i.21103
   %shr289 = ashr i32 %i.21103, 1
   %add290 = add nsw i32 %shr289, 0
   %arrayidx292 = getelementptr inbounds %structK* %2, i32 %1, i32 15, i32 %add290
-  %10 = load %structM** @enc_picture, align 4, !tbaa !0
+  %10 = load %structM** @enc_picture, align 4
   %ref_idx = getelementptr inbounds %structM* %10, i32 0, i32 35
-  %11 = load i8**** %ref_idx, align 4, !tbaa !0
-  %12 = load i8*** %11, align 4, !tbaa !0
+  %11 = load i8**** %ref_idx, align 4
+  %12 = load i8*** %11, align 4
   %arrayidx313 = getelementptr inbounds i8** %12, i32 %add281
-  %13 = load i8** %arrayidx313, align 4, !tbaa !0
+  %13 = load i8** %arrayidx313, align 4
   %arrayidx314 = getelementptr inbounds i8* %13, i32 %add287
-  store i8 -1, i8* %arrayidx314, align 1, !tbaa !1
-  %14 = load %structB** @img, align 4, !tbaa !0
+  store i8 -1, i8* %arrayidx314, align 1
+  %14 = load %structB** @img, align 4
   %MbaffFrameFlag327 = getelementptr inbounds %structB* %14, i32 0, i32 100
-  %15 = load i32* %MbaffFrameFlag327, align 4, !tbaa !3
+  %15 = load i32* %MbaffFrameFlag327, align 4
   %tobool328 = icmp eq i32 %15, 0
   br i1 %tobool328, label %if.end454, label %if.then329
 
 if.then329:                                       ; preds = %for.body285
-  %16 = load %structA** @rdopt, align 4, !tbaa !0
+  %16 = load %structA** @rdopt, align 4
   br label %if.end454
 
 if.end454:                                        ; preds = %if.then329, %for.body285
-  %17 = load i32* %arrayidx292, align 4, !tbaa !3
+  %17 = load i32* %arrayidx292, align 4
   %cmp457 = icmp eq i32 %17, 0
   br i1 %cmp457, label %if.then475, label %lor.lhs.false459
 
 lor.lhs.false459:                                 ; preds = %if.end454
-  %18 = load i32* %mb_type, align 4, !tbaa !3
+  %18 = load i32* %mb_type, align 4
   switch i32 %18, label %for.inc503 [
     i32 9, label %if.then475
     i32 10, label %if.then475
@@ -200,12 +200,12 @@ lor.lhs.false459:                                 ; preds = %if.end454
   ]
 
 if.then475:                                       ; preds = %lor.lhs.false459, %lor.lhs.false459, %lor.lhs.false459, %lor.lhs.false459, %if.end454
-  store i16 0, i16* undef, align 2, !tbaa !4
+  store i16 0, i16* undef, align 2
   br label %for.inc503
 
 for.inc503:                                       ; preds = %if.then475, %lor.lhs.false459
   %inc504 = add nsw i32 %i.21103, 1
-  %.pre1155 = load %structB** @img, align 4, !tbaa !0
+  %.pre1155 = load %structB** @img, align 4
   br label %for.body285
 }
 
@@ -216,10 +216,4 @@ declare void @update_offset_params(i32, i32) #1
 declare void @RestoreMVBlock8x8(i32, i32, %structN* byval nocapture, i32) #1
 
 attributes #0 = { nounwind }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"int", metadata !1}
-!4 = metadata !{metadata !"short", metadata !1}
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/ARM/struct_byval.ll b/test/CodeGen/ARM/struct_byval.ll
index 012b994..130925a 100644
--- a/test/CodeGen/ARM/struct_byval.ll
+++ b/test/CodeGen/ARM/struct_byval.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios6.0 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios6.0 | FileCheck %s -check-prefix=THUMB
 
 ; rdar://9877866
 %struct.SmallStruct = type { i32, [8 x i32], [37 x i8] }
@@ -10,6 +11,10 @@ entry:
 ; CHECK: ldr
 ; CHECK: str
 ; CHECK-NOT:bne
+; THUMB-LABEL: f:
+; THUMB: ldr
+; THUMB: str
+; THUMB-NOT:bne
   %st = alloca %struct.SmallStruct, align 4
   %call = call i32 @e1(%struct.SmallStruct* byval %st)
   ret i32 0
@@ -23,6 +28,11 @@ entry:
 ; CHECK: sub
 ; CHECK: str
 ; CHECK: bne
+; THUMB-LABEL: g:
+; THUMB: ldr
+; THUMB: sub
+; THUMB: str
+; THUMB: bne
   %st = alloca %struct.LargeStruct, align 4
   %call = call i32 @e2(%struct.LargeStruct* byval %st)
   ret i32 0
@@ -36,6 +46,11 @@ entry:
 ; CHECK: sub
 ; CHECK: vst1
 ; CHECK: bne
+; THUMB-LABEL: h:
+; THUMB: vld1
+; THUMB: sub
+; THUMB: vst1
+; THUMB: bne
   %st = alloca %struct.LargeStruct, align 16
   %call = call i32 @e3(%struct.LargeStruct* byval align 16 %st)
   ret i32 0
@@ -49,8 +64,10 @@ declare i32 @e3(%struct.LargeStruct* nocapture byval align 16 %in) nounwind
 ; We can't do tail call since address of s is passed to the callee and part of
 ; s is in caller's local frame.
 define void @f3(%struct.SmallStruct* nocapture byval %s) nounwind optsize {
-; CHECK: f3
+; CHECK-LABEL: f3
 ; CHECK: bl _consumestruct
+; THUMB-LABEL: f3
+; THUMB: blx _consumestruct
 entry:
   %0 = bitcast %struct.SmallStruct* %s to i8*
   tail call void @consumestruct(i8* %0, i32 80) optsize
@@ -58,8 +75,10 @@ entry:
 }
 
 define void @f4(%struct.SmallStruct* nocapture byval %s) nounwind optsize {
-; CHECK: f4
+; CHECK-LABEL: f4
 ; CHECK: bl _consumestruct
+; THUMB-LABEL: f4
+; THUMB: blx _consumestruct
 entry:
   %addr = getelementptr inbounds %struct.SmallStruct* %s, i32 0, i32 0
   %0 = bitcast i32* %addr to i8*
@@ -69,8 +88,10 @@ entry:
 
 ; We can do tail call here since s is in the incoming argument area.
 define void @f5(i32 %a, i32 %b, i32 %c, i32 %d, %struct.SmallStruct* nocapture byval %s) nounwind optsize {
-; CHECK: f5
+; CHECK-LABEL: f5
 ; CHECK: b _consumestruct
+; THUMB-LABEL: f5
+; THUMB: b.w _consumestruct
 entry:
   %0 = bitcast %struct.SmallStruct* %s to i8*
   tail call void @consumestruct(i8* %0, i32 80) optsize
@@ -78,8 +99,10 @@ entry:
 }
 
 define void @f6(i32 %a, i32 %b, i32 %c, i32 %d, %struct.SmallStruct* nocapture byval %s) nounwind optsize {
-; CHECK: f6
+; CHECK-LABEL: f6
 ; CHECK: b _consumestruct
+; THUMB-LABEL: f6
+; THUMB: b.w _consumestruct
 entry:
   %addr = getelementptr inbounds %struct.SmallStruct* %s, i32 0, i32 0
   %0 = bitcast i32* %addr to i8*
@@ -88,3 +111,19 @@ entry:
 }
 
 declare void @consumestruct(i8* nocapture %structp, i32 %structsize) nounwind
+
+; PR17309
+%struct.I.8 = type { [10 x i32], [3 x i8] }
+
+declare void @use_I(%struct.I.8* byval)
+define void @test_I_16() {
+; CHECK-LABEL: test_I_16
+; CHECK: ldrb
+; CHECK: strb
+; THUMB-LABEL: test_I_16
+; THUMB: ldrb
+; THUMB: strb
+entry:
+  call void @use_I(%struct.I.8* byval align 16 undef)
+  ret void
+}
diff --git a/test/CodeGen/ARM/struct_byval_arm_t1_t2.ll b/test/CodeGen/ARM/struct_byval_arm_t1_t2.ll
new file mode 100644
index 0000000..1899269
--- /dev/null
+++ b/test/CodeGen/ARM/struct_byval_arm_t1_t2.ll
@@ -0,0 +1,1523 @@
+;RUN: llc < %s -mtriple=armv7-none-linux-gnueabi   -mattr=+neon -verify-machineinstrs -filetype=obj | llvm-objdump -triple armv7-none-linux-gnueabi   -disassemble - | FileCheck %s --check-prefix=ARM
+;RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi -mattr=+neon -verify-machineinstrs -filetype=obj | llvm-objdump -triple thumbv7-none-linux-gnueabi -disassemble - | FileCheck %s --check-prefix=THUMB2
+;RUN: llc < %s -mtriple=armv7-none-linux-gnueabi   -mattr=-neon -verify-machineinstrs -filetype=obj | llvm-objdump -triple armv7-none-linux-gnueabi   -disassemble - | FileCheck %s --check-prefix=NO_NEON
+;We want to have both positive and negative checks for thumb1. These checks
+;are not easy to do in a single pass so we generate the output once to a
+;temp file and run filecheck twice with different prefixes.
+;RUN: llc < %s -mtriple=thumbv5-none-linux-gnueabi              -verify-machineinstrs -filetype=obj | llvm-objdump -triple thumbv5-none-linux-gnueabi -disassemble - > %t
+;RUN: cat %t | FileCheck %s --check-prefix=THUMB1
+;RUN: cat %t | FileCheck %s --check-prefix=T1POST
+
+;This file contains auto generated tests for the lowering of passing structs
+;byval in the arm backend. We have tests for both packed and unpacked
+;structs at varying alignments. Each test is run for arm, thumb2 and thumb1.
+;We check for the strings in the generated object code using llvm-objdump
+;because it provides better assurance that we are generating instructions
+;for the correct architecture. Otherwise we could accidently generate an
+;ARM instruction for THUMB1 and wouldn't detect it because the assembly
+;code representation is the same, but the object code would be generated
+;incorrectly. For each test we check for the label, a load instruction of the
+;correct form, a branch if it will be generated with a loop, and the leftover
+;cleanup if the number of bytes does not divide evenly by the store size
+
+%struct.A = type <{ [ 10 x i32 ] }> ; 40 bytes
+declare void @use_A(%struct.A* byval)
+%struct.B = type <{ [ 10 x i32 ], i8 }> ; 41 bytes
+declare void @use_B(%struct.B* byval)
+%struct.C = type <{ [ 10 x i32 ], [ 3 x i8 ] }> ; 43 bytes
+declare void @use_C(%struct.C* byval)
+%struct.D = type <{ [ 100 x i32 ] }> ; 400 bytes
+declare void @use_D(%struct.D* byval)
+%struct.E = type <{ [ 100 x i32 ], i8 }> ; 401 bytes
+declare void @use_E(%struct.E* byval)
+%struct.F = type <{ [ 100 x i32 ], [ 3 x i8 ] }> ; 403 bytes
+declare void @use_F(%struct.F* byval)
+%struct.G = type  { [ 10 x i32 ] }  ; 40 bytes
+declare void @use_G(%struct.G* byval)
+%struct.H = type  { [ 10 x i32 ], i8 }  ; 41 bytes
+declare void @use_H(%struct.H* byval)
+%struct.I = type  { [ 10 x i32 ], [ 3 x i8 ] }  ; 43 bytes
+declare void @use_I(%struct.I* byval)
+%struct.J = type  { [ 100 x i32 ] }  ; 400 bytes
+declare void @use_J(%struct.J* byval)
+%struct.K = type  { [ 100 x i32 ], i8 }  ; 401 bytes
+declare void @use_K(%struct.K* byval)
+%struct.L = type  { [ 100 x i32 ], [ 3 x i8 ] }  ; 403 bytes
+declare void @use_L(%struct.L* byval)
+
+;ARM-LABEL:    test_A_1:
+;THUMB2-LABEL: test_A_1:
+;NO_NEON-LABEL:test_A_1:
+;THUMB1-LABEL: test_A_1:
+;T1POST-LABEL: test_A_1:
+  define void @test_A_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.A, align 1
+    call void @use_A(%struct.A* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_A_2:
+;THUMB2-LABEL: test_A_2:
+;NO_NEON-LABEL:test_A_2:
+;THUMB1-LABEL: test_A_2:
+;T1POST-LABEL: test_A_2:
+  define void @test_A_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.A, align 2
+    call void @use_A(%struct.A* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_A_4:
+;THUMB2-LABEL: test_A_4:
+;NO_NEON-LABEL:test_A_4:
+;THUMB1-LABEL: test_A_4:
+;T1POST-LABEL: test_A_4:
+  define void @test_A_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.A, align 4
+    call void @use_A(%struct.A* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_A_8:
+;THUMB2-LABEL: test_A_8:
+;NO_NEON-LABEL:test_A_8:
+;THUMB1-LABEL: test_A_8:
+;T1POST-LABEL: test_A_8:
+  define void @test_A_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.A, align 8
+    call void @use_A(%struct.A* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_A_16:
+;THUMB2-LABEL: test_A_16:
+;NO_NEON-LABEL:test_A_16:
+;THUMB1-LABEL: test_A_16:
+;T1POST-LABEL: test_A_16:
+  define void @test_A_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.A, align 16
+    call void @use_A(%struct.A* byval align 16 %a)
+    ret void
+  }
+;ARM-LABEL:    test_B_1:
+;THUMB2-LABEL: test_B_1:
+;NO_NEON-LABEL:test_B_1:
+;THUMB1-LABEL: test_B_1:
+;T1POST-LABEL: test_B_1:
+  define void @test_B_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.B, align 1
+    call void @use_B(%struct.B* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_B_2:
+;THUMB2-LABEL: test_B_2:
+;NO_NEON-LABEL:test_B_2:
+;THUMB1-LABEL: test_B_2:
+;T1POST-LABEL: test_B_2:
+  define void @test_B_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.B, align 2
+    call void @use_B(%struct.B* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_B_4:
+;THUMB2-LABEL: test_B_4:
+;NO_NEON-LABEL:test_B_4:
+;THUMB1-LABEL: test_B_4:
+;T1POST-LABEL: test_B_4:
+  define void @test_B_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.B, align 4
+    call void @use_B(%struct.B* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_B_8:
+;THUMB2-LABEL: test_B_8:
+;NO_NEON-LABEL:test_B_8:
+;THUMB1-LABEL: test_B_8:
+;T1POST-LABEL: test_B_8:
+  define void @test_B_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.B, align 8
+    call void @use_B(%struct.B* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_B_16:
+;THUMB2-LABEL: test_B_16:
+;NO_NEON-LABEL:test_B_16:
+;THUMB1-LABEL: test_B_16:
+;T1POST-LABEL: test_B_16:
+  define void @test_B_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.B, align 16
+    call void @use_B(%struct.B* byval align 16 %a)
+    ret void
+  }
+;ARM-LABEL:    test_C_1:
+;THUMB2-LABEL: test_C_1:
+;NO_NEON-LABEL:test_C_1:
+;THUMB1-LABEL: test_C_1:
+;T1POST-LABEL: test_C_1:
+  define void @test_C_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.C, align 1
+    call void @use_C(%struct.C* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_C_2:
+;THUMB2-LABEL: test_C_2:
+;NO_NEON-LABEL:test_C_2:
+;THUMB1-LABEL: test_C_2:
+;T1POST-LABEL: test_C_2:
+  define void @test_C_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.C, align 2
+    call void @use_C(%struct.C* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_C_4:
+;THUMB2-LABEL: test_C_4:
+;NO_NEON-LABEL:test_C_4:
+;THUMB1-LABEL: test_C_4:
+;T1POST-LABEL: test_C_4:
+  define void @test_C_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.C, align 4
+    call void @use_C(%struct.C* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_C_8:
+;THUMB2-LABEL: test_C_8:
+;NO_NEON-LABEL:test_C_8:
+;THUMB1-LABEL: test_C_8:
+;T1POST-LABEL: test_C_8:
+  define void @test_C_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.C, align 8
+    call void @use_C(%struct.C* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_C_16:
+;THUMB2-LABEL: test_C_16:
+;NO_NEON-LABEL:test_C_16:
+;THUMB1-LABEL: test_C_16:
+;T1POST-LABEL: test_C_16:
+  define void @test_C_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.C, align 16
+    call void @use_C(%struct.C* byval align 16 %a)
+    ret void
+  }
+;ARM-LABEL:    test_D_1:
+;THUMB2-LABEL: test_D_1:
+;NO_NEON-LABEL:test_D_1:
+;THUMB1-LABEL: test_D_1:
+;T1POST-LABEL: test_D_1:
+  define void @test_D_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;ARM:         bne
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;THUMB2:      bne
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON:     bne
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+;THUMB1:      bne
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.D, align 1
+    call void @use_D(%struct.D* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_D_2:
+;THUMB2-LABEL: test_D_2:
+;NO_NEON-LABEL:test_D_2:
+;THUMB1-LABEL: test_D_2:
+;T1POST-LABEL: test_D_2:
+  define void @test_D_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;ARM:         bne
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;THUMB2:      bne
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;NO_NEON:     bne
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+;THUMB1:      bne
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.D, align 2
+    call void @use_D(%struct.D* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_D_4:
+;THUMB2-LABEL: test_D_4:
+;NO_NEON-LABEL:test_D_4:
+;THUMB1-LABEL: test_D_4:
+;T1POST-LABEL: test_D_4:
+  define void @test_D_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+;ARM:         bne
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.D, align 4
+    call void @use_D(%struct.D* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_D_8:
+;THUMB2-LABEL: test_D_8:
+;NO_NEON-LABEL:test_D_8:
+;THUMB1-LABEL: test_D_8:
+;T1POST-LABEL: test_D_8:
+  define void @test_D_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.D, align 8
+    call void @use_D(%struct.D* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_D_16:
+;THUMB2-LABEL: test_D_16:
+;NO_NEON-LABEL:test_D_16:
+;THUMB1-LABEL: test_D_16:
+;T1POST-LABEL: test_D_16:
+  define void @test_D_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.D, align 16
+    call void @use_D(%struct.D* byval align 16 %a)
+    ret void
+  }
+;ARM-LABEL:    test_E_1:
+;THUMB2-LABEL: test_E_1:
+;NO_NEON-LABEL:test_E_1:
+;THUMB1-LABEL: test_E_1:
+;T1POST-LABEL: test_E_1:
+  define void @test_E_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;ARM:         bne
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;THUMB2:      bne
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON:     bne
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+;THUMB1:      bne
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.E, align 1
+    call void @use_E(%struct.E* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_E_2:
+;THUMB2-LABEL: test_E_2:
+;NO_NEON-LABEL:test_E_2:
+;THUMB1-LABEL: test_E_2:
+;T1POST-LABEL: test_E_2:
+  define void @test_E_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;ARM:         bne
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;THUMB2:      bne
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;NO_NEON:     bne
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+;THUMB1:      bne
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.E, align 2
+    call void @use_E(%struct.E* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_E_4:
+;THUMB2-LABEL: test_E_4:
+;NO_NEON-LABEL:test_E_4:
+;THUMB1-LABEL: test_E_4:
+;T1POST-LABEL: test_E_4:
+  define void @test_E_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+;ARM:         bne
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+;THUMB2:      bne
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.E, align 4
+    call void @use_E(%struct.E* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_E_8:
+;THUMB2-LABEL: test_E_8:
+;NO_NEON-LABEL:test_E_8:
+;THUMB1-LABEL: test_E_8:
+;T1POST-LABEL: test_E_8:
+  define void @test_E_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.E, align 8
+    call void @use_E(%struct.E* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_E_16:
+;THUMB2-LABEL: test_E_16:
+;NO_NEON-LABEL:test_E_16:
+;THUMB1-LABEL: test_E_16:
+;T1POST-LABEL: test_E_16:
+  define void @test_E_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.E, align 16
+    call void @use_E(%struct.E* byval align 16 %a)
+    ret void
+  }
+;ARM-LABEL:    test_F_1:
+;THUMB2-LABEL: test_F_1:
+;NO_NEON-LABEL:test_F_1:
+;THUMB1-LABEL: test_F_1:
+;T1POST-LABEL: test_F_1:
+  define void @test_F_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;ARM:         bne
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;THUMB2:      bne
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON:     bne
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+;THUMB1:      bne
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.F, align 1
+    call void @use_F(%struct.F* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_F_2:
+;THUMB2-LABEL: test_F_2:
+;NO_NEON-LABEL:test_F_2:
+;THUMB1-LABEL: test_F_2:
+;T1POST-LABEL: test_F_2:
+  define void @test_F_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;ARM:         bne
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;THUMB2:      bne
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;NO_NEON:     bne
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+;THUMB1:      bne
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.F, align 2
+    call void @use_F(%struct.F* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_F_4:
+;THUMB2-LABEL: test_F_4:
+;NO_NEON-LABEL:test_F_4:
+;THUMB1-LABEL: test_F_4:
+;T1POST-LABEL: test_F_4:
+  define void @test_F_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+;ARM:         bne
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+;THUMB2:      bne
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.F, align 4
+    call void @use_F(%struct.F* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_F_8:
+;THUMB2-LABEL: test_F_8:
+;NO_NEON-LABEL:test_F_8:
+;THUMB1-LABEL: test_F_8:
+;T1POST-LABEL: test_F_8:
+  define void @test_F_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.F, align 8
+    call void @use_F(%struct.F* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_F_16:
+;THUMB2-LABEL: test_F_16:
+;NO_NEON-LABEL:test_F_16:
+;THUMB1-LABEL: test_F_16:
+;T1POST-LABEL: test_F_16:
+  define void @test_F_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.F, align 16
+    call void @use_F(%struct.F* byval align 16 %a)
+    ret void
+  }
+;ARM-LABEL:    test_G_1:
+;THUMB2-LABEL: test_G_1:
+;NO_NEON-LABEL:test_G_1:
+;THUMB1-LABEL: test_G_1:
+;T1POST-LABEL: test_G_1:
+  define void @test_G_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.G, align 1
+    call void @use_G(%struct.G* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_G_2:
+;THUMB2-LABEL: test_G_2:
+;NO_NEON-LABEL:test_G_2:
+;THUMB1-LABEL: test_G_2:
+;T1POST-LABEL: test_G_2:
+  define void @test_G_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.G, align 2
+    call void @use_G(%struct.G* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_G_4:
+;THUMB2-LABEL: test_G_4:
+;NO_NEON-LABEL:test_G_4:
+;THUMB1-LABEL: test_G_4:
+;T1POST-LABEL: test_G_4:
+  define void @test_G_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.G, align 4
+    call void @use_G(%struct.G* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_G_8:
+;THUMB2-LABEL: test_G_8:
+;NO_NEON-LABEL:test_G_8:
+;THUMB1-LABEL: test_G_8:
+;T1POST-LABEL: test_G_8:
+  define void @test_G_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.G, align 8
+    call void @use_G(%struct.G* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_G_16:
+;THUMB2-LABEL: test_G_16:
+;NO_NEON-LABEL:test_G_16:
+;THUMB1-LABEL: test_G_16:
+;T1POST-LABEL: test_G_16:
+  define void @test_G_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.G, align 16
+    call void @use_G(%struct.G* byval align 16 %a)
+    ret void
+  }
+;ARM-LABEL:    test_H_1:
+;THUMB2-LABEL: test_H_1:
+;NO_NEON-LABEL:test_H_1:
+;THUMB1-LABEL: test_H_1:
+;T1POST-LABEL: test_H_1:
+  define void @test_H_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.H, align 1
+    call void @use_H(%struct.H* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_H_2:
+;THUMB2-LABEL: test_H_2:
+;NO_NEON-LABEL:test_H_2:
+;THUMB1-LABEL: test_H_2:
+;T1POST-LABEL: test_H_2:
+  define void @test_H_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.H, align 2
+    call void @use_H(%struct.H* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_H_4:
+;THUMB2-LABEL: test_H_4:
+;NO_NEON-LABEL:test_H_4:
+;THUMB1-LABEL: test_H_4:
+;T1POST-LABEL: test_H_4:
+  define void @test_H_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.H, align 4
+    call void @use_H(%struct.H* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_H_8:
+;THUMB2-LABEL: test_H_8:
+;NO_NEON-LABEL:test_H_8:
+;THUMB1-LABEL: test_H_8:
+;T1POST-LABEL: test_H_8:
+  define void @test_H_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.H, align 8
+    call void @use_H(%struct.H* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_H_16:
+;THUMB2-LABEL: test_H_16:
+;NO_NEON-LABEL:test_H_16:
+;THUMB1-LABEL: test_H_16:
+;T1POST-LABEL: test_H_16:
+  define void @test_H_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.H, align 16
+    call void @use_H(%struct.H* byval align 16 %a)
+    ret void
+  }
+;ARM-LABEL:    test_I_1:
+;THUMB2-LABEL: test_I_1:
+;NO_NEON-LABEL:test_I_1:
+;THUMB1-LABEL: test_I_1:
+;T1POST-LABEL: test_I_1:
+  define void @test_I_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.I, align 1
+    call void @use_I(%struct.I* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_I_2:
+;THUMB2-LABEL: test_I_2:
+;NO_NEON-LABEL:test_I_2:
+;THUMB1-LABEL: test_I_2:
+;T1POST-LABEL: test_I_2:
+  define void @test_I_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.I, align 2
+    call void @use_I(%struct.I* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_I_4:
+;THUMB2-LABEL: test_I_4:
+;NO_NEON-LABEL:test_I_4:
+;THUMB1-LABEL: test_I_4:
+;T1POST-LABEL: test_I_4:
+  define void @test_I_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.I, align 4
+    call void @use_I(%struct.I* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_I_8:
+;THUMB2-LABEL: test_I_8:
+;NO_NEON-LABEL:test_I_8:
+;THUMB1-LABEL: test_I_8:
+;T1POST-LABEL: test_I_8:
+  define void @test_I_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.I, align 8
+    call void @use_I(%struct.I* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_I_16:
+;THUMB2-LABEL: test_I_16:
+;NO_NEON-LABEL:test_I_16:
+;THUMB1-LABEL: test_I_16:
+;T1POST-LABEL: test_I_16:
+  define void @test_I_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.I, align 16
+    call void @use_I(%struct.I* byval align 16 %a)
+    ret void
+  }
+;ARM-LABEL:    test_J_1:
+;THUMB2-LABEL: test_J_1:
+;NO_NEON-LABEL:test_J_1:
+;THUMB1-LABEL: test_J_1:
+;T1POST-LABEL: test_J_1:
+  define void @test_J_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;ARM:         bne
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;THUMB2:      bne
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON:     bne
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+;THUMB1:      bne
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.J, align 1
+    call void @use_J(%struct.J* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_J_2:
+;THUMB2-LABEL: test_J_2:
+;NO_NEON-LABEL:test_J_2:
+;THUMB1-LABEL: test_J_2:
+;T1POST-LABEL: test_J_2:
+  define void @test_J_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;ARM:         bne
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;THUMB2:      bne
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;NO_NEON:     bne
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+;THUMB1:      bne
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.J, align 2
+    call void @use_J(%struct.J* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_J_4:
+;THUMB2-LABEL: test_J_4:
+;NO_NEON-LABEL:test_J_4:
+;THUMB1-LABEL: test_J_4:
+;T1POST-LABEL: test_J_4:
+  define void @test_J_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+;ARM:         bne
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.J, align 4
+    call void @use_J(%struct.J* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_J_8:
+;THUMB2-LABEL: test_J_8:
+;NO_NEON-LABEL:test_J_8:
+;THUMB1-LABEL: test_J_8:
+;T1POST-LABEL: test_J_8:
+  define void @test_J_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.J, align 8
+    call void @use_J(%struct.J* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_J_16:
+;THUMB2-LABEL: test_J_16:
+;NO_NEON-LABEL:test_J_16:
+;THUMB1-LABEL: test_J_16:
+;T1POST-LABEL: test_J_16:
+  define void @test_J_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.J, align 16
+    call void @use_J(%struct.J* byval align 16 %a)
+    ret void
+  }
+;ARM-LABEL:    test_K_1:
+;THUMB2-LABEL: test_K_1:
+;NO_NEON-LABEL:test_K_1:
+;THUMB1-LABEL: test_K_1:
+;T1POST-LABEL: test_K_1:
+  define void @test_K_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;ARM:         bne
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;THUMB2:      bne
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON:     bne
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+;THUMB1:      bne
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.K, align 1
+    call void @use_K(%struct.K* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_K_2:
+;THUMB2-LABEL: test_K_2:
+;NO_NEON-LABEL:test_K_2:
+;THUMB1-LABEL: test_K_2:
+;T1POST-LABEL: test_K_2:
+  define void @test_K_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;ARM:         bne
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;THUMB2:      bne
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;NO_NEON:     bne
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+;THUMB1:      bne
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.K, align 2
+    call void @use_K(%struct.K* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_K_4:
+;THUMB2-LABEL: test_K_4:
+;NO_NEON-LABEL:test_K_4:
+;THUMB1-LABEL: test_K_4:
+;T1POST-LABEL: test_K_4:
+  define void @test_K_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+;ARM:         bne
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.K, align 4
+    call void @use_K(%struct.K* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_K_8:
+;THUMB2-LABEL: test_K_8:
+;NO_NEON-LABEL:test_K_8:
+;THUMB1-LABEL: test_K_8:
+;T1POST-LABEL: test_K_8:
+  define void @test_K_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.K, align 8
+    call void @use_K(%struct.K* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_K_16:
+;THUMB2-LABEL: test_K_16:
+;NO_NEON-LABEL:test_K_16:
+;THUMB1-LABEL: test_K_16:
+;T1POST-LABEL: test_K_16:
+  define void @test_K_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.K, align 16
+    call void @use_K(%struct.K* byval align 16 %a)
+    ret void
+  }
+;ARM-LABEL:    test_L_1:
+;THUMB2-LABEL: test_L_1:
+;NO_NEON-LABEL:test_L_1:
+;THUMB1-LABEL: test_L_1:
+;T1POST-LABEL: test_L_1:
+  define void @test_L_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;ARM:         bne
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;THUMB2:      bne
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON:     bne
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+;THUMB1:      bne
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.L, align 1
+    call void @use_L(%struct.L* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_L_2:
+;THUMB2-LABEL: test_L_2:
+;NO_NEON-LABEL:test_L_2:
+;THUMB1-LABEL: test_L_2:
+;T1POST-LABEL: test_L_2:
+  define void @test_L_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;ARM:         bne
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;THUMB2:      bne
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;NO_NEON:     bne
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+;THUMB1:      bne
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.L, align 2
+    call void @use_L(%struct.L* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_L_4:
+;THUMB2-LABEL: test_L_4:
+;NO_NEON-LABEL:test_L_4:
+;THUMB1-LABEL: test_L_4:
+;T1POST-LABEL: test_L_4:
+  define void @test_L_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+;ARM:         bne
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.L, align 4
+    call void @use_L(%struct.L* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_L_8:
+;THUMB2-LABEL: test_L_8:
+;NO_NEON-LABEL:test_L_8:
+;THUMB1-LABEL: test_L_8:
+;T1POST-LABEL: test_L_8:
+  define void @test_L_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.L, align 8
+    call void @use_L(%struct.L* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_L_16:
+;THUMB2-LABEL: test_L_16:
+;NO_NEON-LABEL:test_L_16:
+;THUMB1-LABEL: test_L_16:
+;T1POST-LABEL: test_L_16:
+  define void @test_L_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.L, align 16
+    call void @use_L(%struct.L* byval align 16 %a)
+    ret void
+  }
diff --git a/test/CodeGen/ARM/sub-cmp-peephole.ll b/test/CodeGen/ARM/sub-cmp-peephole.ll
index 1b411e3..19727da 100644
--- a/test/CodeGen/ARM/sub-cmp-peephole.ll
+++ b/test/CodeGen/ARM/sub-cmp-peephole.ll
@@ -1,4 +1,7 @@
 ; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s --check-prefix=V7
+; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi | FileCheck %s -check-prefix=V8
+
 
 define i32 @f(i32 %a, i32 %b) nounwind ssp {
 entry:
@@ -84,3 +87,60 @@ land.lhs.true:                                    ; preds = %num2long.exit
 if.end11:                                         ; preds = %num2long.exit
   ret i32 23
 }
+
+define float @float_sel(i32 %a, i32 %b, float %x, float %y) {
+entry:
+; CHECK-LABEL: float_sel:
+; CHECK-NOT: cmp
+; V8-LABEL: float_sel:
+; V8-NOT: cmp
+; V8: vseleq.f32
+  %sub = sub i32 %a, %b
+  %cmp = icmp eq i32 %sub, 0
+  %ret = select i1 %cmp, float %x, float %y
+  ret float %ret
+}
+
+define double @double_sel(i32 %a, i32 %b, double %x, double %y) {
+entry:
+; CHECK-LABEL: double_sel:
+; CHECK-NOT: cmp
+; V8-LABEL: double_sel:
+; V8-NOT: cmp
+; V8: vseleq.f64
+  %sub = sub i32 %a, %b
+  %cmp = icmp eq i32 %sub, 0
+  %ret = select i1 %cmp, double %x, double %y
+  ret double %ret
+}
+
+@t = common global i32 0
+define double @double_sub(i32 %a, i32 %b, double %x, double %y) {
+entry:
+; CHECK-LABEL: double_sub:
+; CHECK: subs
+; CHECK-NOT: cmp
+; V8-LABEL: double_sub:
+; V8: vsel
+  %cmp = icmp sgt i32 %a, %b
+  %sub = sub i32 %a, %b
+  store i32 %sub, i32* @t
+  %ret = select i1 %cmp, double %x, double %y
+  ret double %ret
+}
+
+define double @double_sub_swap(i32 %a, i32 %b, double %x, double %y) {
+entry:
+; V7-LABEL: double_sub_swap:
+; V7-NOT: cmp
+; V7: subs
+; V8-LABEL: double_sub_swap:
+; V8-NOT: subs
+; V8: cmp
+; V8: vsel
+  %cmp = icmp sgt i32 %a, %b
+  %sub = sub i32 %b, %a
+  %ret = select i1 %cmp, double %x, double %y
+  store i32 %sub, i32* @t
+  ret double %ret
+}
diff --git a/test/CodeGen/ARM/swift-vldm.ll b/test/CodeGen/ARM/swift-vldm.ll
new file mode 100644
index 0000000..67ae00a
--- /dev/null
+++ b/test/CodeGen/ARM/swift-vldm.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mcpu=swift -mtriple=armv7s-apple-ios | FileCheck %s
+
+; Check that we avoid producing vldm instructions using d registers that
+; begin in the most-significant half of a q register. These require more
+; micro-ops on swift and so aren't worth combining.
+
+; CHECK-LABEL: test_vldm
+; CHECK: vldmia r{{[0-9]+}}, {d2, d3, d4}
+; CHECK-NOT: vldmia r{{[0-9]+}}, {d1, d2, d3, d4}
+
+declare fastcc void @force_register(double %d0, double %d1, double %d2, double %d3, double %d4) 
+
+define void @test_vldm(double* %x, double * %y) {
+entry:
+  %addr1 = getelementptr double * %x, i32 1
+  %addr2 = getelementptr double * %x, i32 2
+  %addr3 = getelementptr double * %x, i32 3
+  %d0 = load double * %y
+  %d1 = load double * %x
+  %d2 = load double * %addr1
+  %d3 = load double * %addr2
+  %d4 = load double * %addr3
+  ; We are trying to force x[0-3] in registers d1 to d4 so that we can test we
+  ; don't form a "vldmia rX, {d1, d2, d3, d4}".
+  ; We are relying on the calling convention and that register allocation
+  ; properly coalesces registers.
+  call fastcc void @force_register(double %d0, double %d1, double %d2, double %d3, double %d4)
+  ret void
+}
diff --git a/test/CodeGen/ARM/thumb2-it-block.ll b/test/CodeGen/ARM/thumb2-it-block.ll
index a25352c..47c5dcc 100644
--- a/test/CodeGen/ARM/thumb2-it-block.ll
+++ b/test/CodeGen/ARM/thumb2-it-block.ll
@@ -1,14 +1,15 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s
 ; PR11107
 
 define i32 @test(i32 %a, i32 %b) {
 entry:
 ; CHECK:        cmp
 ; CHECK-NEXT:   it    mi
-; CHECK-NEXT:   rsbmi
+; CHECK-NEXT:   rsb{{s?}}mi
 ; CHECK-NEXT:   cmp
 ; CHECK-NEXT:   it    mi
-; CHECK-NEXT:   rsbmi
+; CHECK-NEXT:   rsb{{s?}}mi
  %cmp1 = icmp slt i32 %a, 0
  %sub1 = sub nsw i32 0, %a
  %abs1 = select i1 %cmp1, i32 %sub1, i32 %a
diff --git a/test/CodeGen/ARM/trap.ll b/test/CodeGen/ARM/trap.ll
index db88a03..6cb26e3 100644
--- a/test/CodeGen/ARM/trap.ll
+++ b/test/CodeGen/ARM/trap.ll
@@ -9,13 +9,13 @@
 ; RUN: llc -mtriple=armv7 -mattr=+nacl-trap -filetype=obj %s -o - \
 ; RUN:  | llvm-objdump -disassemble -triple armv7 -mattr=+nacl-trap - \
 ; RUN:  | FileCheck %s -check-prefix=ENCODING-NACL
-; RUN: llc -fast-isel -mtriple=armv7-unknown-nacl -filetype=obj %s -o - \
+; RUN: llc -verify-machineinstrs -fast-isel -mtriple=armv7-unknown-nacl -filetype=obj %s -o - \
 ; RUN:  | llvm-objdump -disassemble -triple armv7-unknown-nacl - \
 ; RUN:  | FileCheck %s -check-prefix=ENCODING-NACL
 ; RUN: llc -mtriple=armv7 -filetype=obj %s -o - \
 ; RUN:  | llvm-objdump -disassemble -triple armv7 - \
 ; RUN:  | FileCheck %s -check-prefix=ENCODING-ALL
-; RUN: llc -fast-isel -mtriple=armv7 -filetype=obj %s -o - \
+; RUN: llc -verify-machineinstrs -fast-isel -mtriple=armv7 -filetype=obj %s -o - \
 ; RUN:  | llvm-objdump -disassemble -triple armv7 - \
 ; RUN:  | FileCheck %s -check-prefix=ENCODING-ALL
 ; rdar://7961298
diff --git a/test/CodeGen/ARM/vadd.ll b/test/CodeGen/ARM/vadd.ll
index a1ad37b..fcb5408 100644
--- a/test/CodeGen/ARM/vadd.ll
+++ b/test/CodeGen/ARM/vadd.ll
@@ -90,37 +90,6 @@ define <4 x float> @vaddQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 	ret <4 x float> %tmp3
 }
 
-define <8 x i8> @vaddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: vaddhni16:
-;CHECK: vaddhn.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i8> @llvm.arm.neon.vaddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @vaddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: vaddhni32:
-;CHECK: vaddhn.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @vaddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: vaddhni64:
-;CHECK: vaddhn.i64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
-	%tmp3 = call <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-declare <8 x i8>  @llvm.arm.neon.vaddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
-
 define <8 x i8> @vraddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vraddhni16:
 ;CHECK: vraddhn.i16
@@ -152,6 +121,33 @@ declare <8 x i8>  @llvm.arm.neon.vraddhn.v8i8(<8 x i16>, <8 x i16>) nounwind rea
 declare <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
 
+define <8 x i8> @vaddhni16_natural(<8 x i16> %A, <8 x i16> %B) nounwind {
+; CHECK-LABEL: vaddhni16_natural:
+; CHECK: vaddhn.i16
+  %sum = add <8 x i16> %A, %B
+  %shift = lshr <8 x i16> %sum, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %trunc = trunc <8 x i16> %shift to <8 x i8>
+  ret <8 x i8> %trunc
+}
+
+define <4 x i16> @vaddhni32_natural(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK-LABEL: vaddhni32_natural:
+; CHECK: vaddhn.i32
+  %sum = add <4 x i32> %A, %B
+  %shift = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
+  %trunc = trunc <4 x i32> %shift to <4 x i16>
+  ret <4 x i16> %trunc
+}
+
+define <2 x i32> @vaddhni64_natural(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK-LABEL: vaddhni64_natural:
+; CHECK: vaddhn.i64
+  %sum = add <2 x i64> %A, %B
+  %shift = lshr <2 x i64> %sum, <i64 32, i64 32>
+  %trunc = trunc <2 x i64> %shift to <2 x i32>
+  ret <2 x i32> %trunc
+}
+
 define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vaddls8:
 ;CHECK: vaddl.s8
diff --git a/test/CodeGen/ARM/vector-DAGCombine.ll b/test/CodeGen/ARM/vector-DAGCombine.ll
index 4221c98..759da22 100644
--- a/test/CodeGen/ARM/vector-DAGCombine.ll
+++ b/test/CodeGen/ARM/vector-DAGCombine.ll
@@ -29,7 +29,7 @@ entry:
 
 ; Radar 8407927: Make sure that VMOVRRD gets optimized away when the result is
 ; converted back to be used as a vector type.
-; CHECK: test_vmovrrd_combine
+; CHECK-LABEL: test_vmovrrd_combine:
 define <4 x i32> @test_vmovrrd_combine() nounwind {
 entry:
   br i1 undef, label %bb1, label %bb2
@@ -136,7 +136,7 @@ define i16 @foldBuildVectors() {
 
 ; Test that we are generating vrev and vext for reverse shuffles of v8i16
 ; shuffles.
-; CHECK: reverse_v8i16
+; CHECK-LABEL: reverse_v8i16:
 define void @reverse_v8i16(<8 x i16>* %loadaddr, <8 x i16>* %storeaddr) {
   %v0 = load <8 x i16>* %loadaddr
   ; CHECK: vrev64.16
@@ -149,7 +149,7 @@ define void @reverse_v8i16(<8 x i16>* %loadaddr, <8 x i16>* %storeaddr) {
 
 ; Test that we are generating vrev and vext for reverse shuffles of v16i8
 ; shuffles.
-; CHECK: reverse_v16i8
+; CHECK-LABEL: reverse_v16i8:
 define void @reverse_v16i8(<16 x i8>* %loadaddr, <16 x i8>* %storeaddr) {
   %v0 = load <16 x i8>* %loadaddr
   ; CHECK: vrev64.8
@@ -165,7 +165,7 @@ define void @reverse_v16i8(<16 x i8>* %loadaddr, <16 x i8>* %storeaddr) {
 ; vldr cannot handle unaligned loads.
 ; Fall back to vld1.32, which can, instead of using the general purpose loads
 ; followed by a costly sequence of instructions to build the vector register.
-; CHECK: t3
+; CHECK-LABEL: t3:
 ; CHECK: vld1.32 {[[REG:d[0-9]+]][0]}
 ; CHECK: vld1.32 {[[REG]][1]}
 ; CHECK: vmull.u8 q{{[0-9]+}}, [[REG]], [[REG]]
@@ -188,7 +188,7 @@ declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>)
 ; Check that (insert_vector_elt (load)) => (vector_load).
 ; Thus, check that scalar_to_vector do not interfer with that.
 define <8 x i16> @t4(i8* nocapture %sp0) {
-; CHECK: t4
+; CHECK-LABEL: t4:
 ; CHECK: vld1.32 {{{d[0-9]+}}[0]}, [r0]
 entry:
   %pix_sp0.0.cast = bitcast i8* %sp0 to i32*
@@ -202,7 +202,7 @@ entry:
 ; Make sure vector load is used for all three loads.
 ; Lowering to build vector was breaking the single use property of the load of
 ;  %pix_sp0.0.copyload.
-; CHECK: t5
+; CHECK-LABEL: t5:
 ; CHECK: vld1.32 {[[REG1:d[0-9]+]][1]}, [r0]
 ; CHECK: vorr [[REG2:d[0-9]+]], [[REG1]], [[REG1]]
 ; CHECK: vld1.32 {[[REG1]][0]}, [r1]
@@ -224,3 +224,23 @@ entry:
   %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %0, <8 x i8> %1)
   ret <8 x i16> %vmull.i
 }
+
+; <rdar://problem/14989896> Make sure we manage to truncate a vector from an
+; illegal type to a legal type.
+define <2 x i8> @test_truncate(<2 x i128> %in) {
+; CHECK-LABEL: test_truncate:
+; CHECK: mov [[BASE:r[0-9]+]], sp
+; CHECK-NEXT: vld1.32 {[[REG1:d[0-9]+]][0]}, {{\[}}[[BASE]]:32]
+; CHECK-NEXT: add [[BASE2:r[0-9]+]], [[BASE]], #4
+; CHECK-NEXT: vld1.32 {[[REG1]][1]}, {{\[}}[[BASE2]]:32]
+; REG2 Should map on the same Q register as REG1, i.e., REG2 = REG1 - 1, but we
+; cannot express that.
+; CHECK-NEXT: vmov.32 [[REG2:d[0-9]+]][0], r0
+; CHECK-NEXT: vmov.32 [[REG2]][1], r1
+; The Q register used here should match floor(REG1/2), but we cannot express that.
+; CHECK-NEXT: vmovn.i64 [[RES:d[0-9]+]], q{{[0-9]+}}
+; CHECK-NEXT: vmov r0, r1, [[RES]]
+entry:
+  %res = trunc <2 x i128> %in to <2 x i8>
+  ret <2 x i8> %res
+}
diff --git a/test/CodeGen/ARM/vldm-liveness.ll b/test/CodeGen/ARM/vldm-liveness.ll
new file mode 100644
index 0000000..751f447
--- /dev/null
+++ b/test/CodeGen/ARM/vldm-liveness.ll
@@ -0,0 +1,40 @@
+; RUN: llc -mtriple thumbv7-apple-ios -verify-machineinstrs -o - %s | FileCheck %s
+
+; ARM load store optimizer was dealing with a sequence like:
+;     s1 = VLDRS [r0, 1], Q0<imp-def>
+;     s3 = VLDRS [r0, 2], Q0<imp-use,kill>, Q0<imp-def>
+;     s0 = VLDRS [r0, 0], Q0<imp-use,kill>, Q0<imp-def>
+;     s2 = VLDRS [r0, 4], Q0<imp-use,kill>, Q0<imp-def>
+;
+; It decided to combine the {s0, s1} loads into a single instruction in the
+; third position. However, this leaves the instruction defining s3 with a stray
+; imp-use of Q0, which is undefined.
+;
+; The verifier catches this, so this test just makes sure that appropriate
+; liveness flags are added.
+;
+; I believe the change will be tested as long as the vldmia is not the first of
+; the loads. Earlier optimisations may perturb the output over time, but
+; fiddling the indices should be sufficient to restore the test.
+
+define arm_aapcs_vfpcc <4 x float> @foo(float* %ptr) {
+; CHECK-LABEL: foo:
+; CHECK: vldr s3, [r0, #8]
+; CHECK: vldmia r0, {s0, s1}
+; CHECK: vldr s2, [r0, #16]
+   %off0 = getelementptr float* %ptr, i32 0
+   %val0 = load float* %off0
+   %off1 = getelementptr float* %ptr, i32 1
+   %val1 = load float* %off1
+   %off4 = getelementptr float* %ptr, i32 4
+   %val4 = load float* %off4
+   %off2 = getelementptr float* %ptr, i32 2
+   %val2 = load float* %off2
+
+   %vec1 = insertelement <4 x float> undef, float %val0, i32 0
+   %vec2 = insertelement <4 x float> %vec1, float %val1, i32 1
+   %vec3 = insertelement <4 x float> %vec2, float %val4, i32 2
+   %vec4 = insertelement <4 x float> %vec3, float %val2, i32 3
+
+   ret <4 x float> %vec4
+}
diff --git a/test/CodeGen/ARM/vldm-sched-a9.ll b/test/CodeGen/ARM/vldm-sched-a9.ll
new file mode 100644
index 0000000..d0a9ac6
--- /dev/null
+++ b/test/CodeGen/ARM/vldm-sched-a9.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=arm -mtriple=armv7-linux-gnueabihf -float-abi=hard -mcpu=cortex-a9 -O3 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32-S64"
+
+; This test will generate spills/fills using vldmia instructions that access 64 bytes of memory.
+; Check that we don't crash when we generate these instructions on Cortex-A9.
+
+; CHECK: test:
+; CHECK: vstmia
+; CHECK: vldmia
+define void @test(i64* %src) #0 {
+entry:
+  %arrayidx39 = getelementptr inbounds i64* %src, i32 13
+  %vecinit285 = shufflevector <16 x i64> undef, <16 x i64> <i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
+  store <16 x i64> %vecinit285, <16 x i64>* undef, align 128
+  %0 = load i64* undef, align 8
+  %vecinit379 = insertelement <16 x i64> undef, i64 %0, i32 9
+  %1 = load i64* undef, align 8
+  %vecinit419 = insertelement <16 x i64> undef, i64 %1, i32 15
+  store <16 x i64> %vecinit419, <16 x i64>* undef, align 128
+  %vecinit579 = insertelement <16 x i64> undef, i64 0, i32 4
+  %vecinit582 = shufflevector <16 x i64> %vecinit579, <16 x i64> <i64 6, i64 7, i64 8, i64 9, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit584 = insertelement <16 x i64> %vecinit582, i64 undef, i32 9
+  %vecinit586 = insertelement <16 x i64> %vecinit584, i64 0, i32 10
+  %vecinit589 = shufflevector <16 x i64> %vecinit586, <16 x i64> <i64 12, i64 13, i64 14, i64 15, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 16, i32 17, i32 18, i32 19, i32 undef>
+  %2 = load i64* undef, align 8
+  %vecinit591 = insertelement <16 x i64> %vecinit589, i64 %2, i32 15
+  store <16 x i64> %vecinit591, <16 x i64>* undef, align 128
+  %vecinit694 = shufflevector <16 x i64> undef, <16 x i64> <i64 13, i64 14, i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+  store <16 x i64> %vecinit694, <16 x i64>* undef, align 128
+  %3 = load i64* undef, align 8
+  %vecinit1331 = insertelement <16 x i64> undef, i64 %3, i32 14
+  %4 = load i64* undef, align 8
+  %vecinit1468 = insertelement <16 x i64> undef, i64 %4, i32 11
+  %vecinit1471 = shufflevector <16 x i64> %vecinit1468, <16 x i64> <i64 13, i64 14, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 undef, i32 undef>
+  %vecinit1474 = shufflevector <16 x i64> %vecinit1471, <16 x i64> <i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
+  store <16 x i64> %vecinit1474, <16 x i64>* undef, align 128
+  %vecinit1552 = shufflevector <16 x i64> undef, <16 x i64> <i64 10, i64 11, i64 12, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 16, i32 17, i32 18, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit1555 = shufflevector <16 x i64> %vecinit1552, <16 x i64> <i64 13, i64 14, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 undef, i32 undef>
+  %vecinit1558 = shufflevector <16 x i64> %vecinit1555, <16 x i64> <i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
+  store <16 x i64> %vecinit1558, <16 x i64>* undef, align 128
+  %vecinit1591 = shufflevector <16 x i64> undef, <16 x i64> <i64 3, i64 4, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit1594 = shufflevector <16 x i64> %vecinit1591, <16 x i64> <i64 5, i64 6, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit1597 = shufflevector <16 x i64> %vecinit1594, <16 x i64> <i64 7, i64 8, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit1599 = insertelement <16 x i64> %vecinit1597, i64 undef, i32 8
+  %vecinit1601 = insertelement <16 x i64> %vecinit1599, i64 undef, i32 9
+  %vecinit1603 = insertelement <16 x i64> %vecinit1601, i64 undef, i32 10
+  %5 = load i64* undef, align 8
+  %vecinit1605 = insertelement <16 x i64> %vecinit1603, i64 %5, i32 11
+  %vecinit1608 = shufflevector <16 x i64> %vecinit1605, <16 x i64> <i64 13, i64 14, i64 15, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 undef>
+  %6 = load i64* undef, align 8
+  %vecinit1610 = insertelement <16 x i64> %vecinit1608, i64 %6, i32 15
+  store <16 x i64> %vecinit1610, <16 x i64>* undef, align 128
+  %vecinit2226 = shufflevector <16 x i64> undef, <16 x i64> <i64 6, i64 7, i64 8, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 16, i32 17, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %7 = load i64* undef, align 8
+  %vecinit2228 = insertelement <16 x i64> %vecinit2226, i64 %7, i32 8
+  %vecinit2230 = insertelement <16 x i64> %vecinit2228, i64 undef, i32 9
+  %vecinit2233 = shufflevector <16 x i64> %vecinit2230, <16 x i64> <i64 11, i64 12, i64 13, i64 14, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef>
+  %vecinit2236 = shufflevector <16 x i64> %vecinit2233, <16 x i64> <i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
+  store <16 x i64> %vecinit2236, <16 x i64>* undef, align 128
+  %vecinit2246 = shufflevector <16 x i64> undef, <16 x i64> <i64 4, i64 5, i64 6, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 16, i32 17, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit2249 = shufflevector <16 x i64> %vecinit2246, <16 x i64> <i64 7, i64 8, i64 9, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit2252 = shufflevector <16 x i64> %vecinit2249, <16 x i64> <i64 10, i64 11, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 16, i32 17, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit2255 = shufflevector <16 x i64> %vecinit2252, <16 x i64> <i64 12, i64 13, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 16, i32 17, i32 undef, i32 undef, i32 undef>
+  %8 = load i64* %arrayidx39, align 8
+  %vecinit2257 = insertelement <16 x i64> %vecinit2255, i64 %8, i32 13
+  %vecinit2260 = shufflevector <16 x i64> %vecinit2257, <16 x i64> <i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
+  store <16 x i64> %vecinit2260, <16 x i64>* null, align 128
+  ret void
+}
+attributes #0 = { noredzone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/ARM/vminmaxnm.ll b/test/CodeGen/ARM/vminmaxnm.ll
index afa73b9..f6ce64c 100644
--- a/test/CodeGen/ARM/vminmaxnm.ll
+++ b/test/CodeGen/ARM/vminmaxnm.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple armv8 -mattr=+neon | FileCheck %s
+; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK-FAST
 
 define <4 x float> @vmaxnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
 ; CHECK: vmaxnmq
@@ -36,6 +37,51 @@ define <2 x float> @vminnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
   ret <2 x float> %tmp3
 }
 
+define float @fp-armv8_vminnm_o(float %a, float %b) {
+; CHECK-FAST: fp-armv8_vminnm_o
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vminnm.f32
+; CHECK: fp-armv8_vminnm_o
+; CHECK-NOT: vminnm.f32
+  %cmp = fcmp olt float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+
+define float @fp-armv8_vminnm_u(float %a, float %b) {
+; CHECK-FAST: fp-armv8_vminnm_u
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vminnm.f32
+; CHECK: fp-armv8_vminnm_u
+; CHECK-NOT: vminnm.f32
+  %cmp = fcmp ult float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+
+define float @fp-armv8_vmaxnm_o(float %a, float %b) {
+; CHECK-FAST: fp-armv8_vmaxnm_o
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vmaxnm.f32
+; CHECK: fp-armv8_vmaxnm_o
+; CHECK-NOT: vmaxnm.f32
+  %cmp = fcmp ogt float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+
+define float @fp-armv8_vmaxnm_u(float %a, float %b) {
+; CHECK-FAST: fp-armv8_vmaxnm_u
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vmaxnm.f32
+; CHECK: fp-armv8_vmaxnm_u
+; CHECK-NOT: vmaxnm.f32
+  %cmp = fcmp ugt float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+
+
 declare <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
 declare <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
 declare <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index 6210ad3..de329ac 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -515,6 +515,17 @@ entry:
   ret void
 }
 
+define <8 x i8> @no_distribute(<8 x i8> %a, <8 x i8> %b) nounwind {
+entry:
+; CHECK: no_distribute
+; CHECK: vadd.i8
+; CHECK: vmul.i8
+; CHECK-NOT: vmla.i8
+  %0 = add <8 x i8> %a, %b
+  %1 = mul <8x i8> %0, %0
+  ret <8 x i8> %1
+}
+
 ; If one operand has a zero-extend and the other a sign-extend, vmull
 ; cannot be used.
 define i16 @vmullWithInconsistentExtensions(<8 x i8> %vec) {
@@ -623,3 +634,21 @@ entry:
   store <4 x i32> %predphi290.v.i, <4 x i32>* undef, align 4
   ret void
 }
+
+define void @foo(<4 x float> * %a, <4 x float>* nocapture %dst, float* nocapture readonly %src) nounwind {
+;   Look for doing a normal scalar FP load rather than an to-all-lanes load.
+;   e.g., "ldr s0, [r2]" rathern than "vld1.32  {d18[], d19[]}, [r2:32]"
+;   Then check that the vector multiply has folded the splat to all lanes
+;   and used a vector * scalar instruction.
+; CHECK: vldr  {{s[0-9]+}}, [r2]
+; CHECK: vmul.f32  q8, q8, d0[0]
+  %tmp = load float* %src, align 4
+  %tmp5 = load <4 x float>* %a, align 4
+  %tmp6 = insertelement <4 x float> undef, float %tmp, i32 0
+  %tmp7 = insertelement <4 x float> %tmp6, float %tmp, i32 1
+  %tmp8 = insertelement <4 x float> %tmp7, float %tmp, i32 2
+  %tmp9 = insertelement <4 x float> %tmp8, float %tmp, i32 3
+  %tmp10 = fmul <4 x float> %tmp9, %tmp5
+  store <4 x float> %tmp10, <4 x float>* %dst, align 4
+  ret void
+}
diff --git a/test/CodeGen/ARM/vqdmul.ll b/test/CodeGen/ARM/vqdmul.ll
index a28cae9..d298167 100644
--- a/test/CodeGen/ARM/vqdmul.ll
+++ b/test/CodeGen/ARM/vqdmul.ll
@@ -197,84 +197,92 @@ entry:
 declare <4 x i32>  @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
 declare <2 x i64>  @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
 
-define <4 x i32> @vqdmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK-LABEL: vqdmlals16:
+define <4 x i32> @vqdmlals16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK-LABEL: vqdmlals16_natural:
 ;CHECK: vqdmlal.s16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
-	%tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
-	ret <4 x i32> %tmp4
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i16>* %B
+        %tmp3 = load <4 x i16>* %C
+        %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp2, <4 x i16> %tmp3)
+        %tmp5 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp4)
+        ret <4 x i32> %tmp5
 }
 
-define <2 x i64> @vqdmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK-LABEL: vqdmlals32:
+define <2 x i64> @vqdmlals32_natural(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK-LABEL: vqdmlals32_natural:
 ;CHECK: vqdmlal.s32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
-	%tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
-	ret <2 x i64> %tmp4
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i32>* %B
+        %tmp3 = load <2 x i32>* %C
+        %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp2, <2 x i32> %tmp3)
+        %tmp5 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp4)
+        ret <2 x i64> %tmp5
 }
 
-define arm_aapcs_vfpcc <4 x i32> @test_vqdmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmlal_lanes16_natural(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
 entry:
-; CHECK: test_vqdmlal_lanes16
+; CHECK-LABEL: test_vqdmlal_lanes16_natural:
 ; CHECK: vqdmlal.s16 q0, d2, d3[1]
   %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
-  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %1
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %arg1_int16x4_t, <4 x i16> %0)
+  %2 = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %1)
+  ret <4 x i32> %2
 }
 
-define arm_aapcs_vfpcc <2 x i64> @test_vqdmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmlal_lanes32_natural(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
 entry:
-; CHECK: test_vqdmlal_lanes32
+; CHECK-LABEL: test_vqdmlal_lanes32_natural:
 ; CHECK: vqdmlal.s32 q0, d2, d3[1]
   %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
-  %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %1
+  %1 = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %arg1_int32x2_t, <2 x i32> %0)
+  %2 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i64> %1)
+  ret <2 x i64> %2
 }
 
-declare <4 x i32>  @llvm.arm.neon.vqdmlal.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64>  @llvm.arm.neon.vqdmlal.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32>  @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64>  @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
 
-define <4 x i32> @vqdmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK-LABEL: vqdmlsls16:
+define <4 x i32> @vqdmlsls16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK-LABEL: vqdmlsls16_natural:
 ;CHECK: vqdmlsl.s16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
-	%tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
-	ret <4 x i32> %tmp4
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i16>* %B
+        %tmp3 = load <4 x i16>* %C
+        %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp2, <4 x i16> %tmp3)
+        %tmp5 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp4)
+        ret <4 x i32> %tmp5
 }
 
-define <2 x i64> @vqdmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK-LABEL: vqdmlsls32:
+define <2 x i64> @vqdmlsls32_natural(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK-LABEL: vqdmlsls32_natural:
 ;CHECK: vqdmlsl.s32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
-	%tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
-	ret <2 x i64> %tmp4
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i32>* %B
+        %tmp3 = load <2 x i32>* %C
+        %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp2, <2 x i32> %tmp3)
+        %tmp5 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp4)
+        ret <2 x i64> %tmp5
 }
 
-define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsl_lanes16_natural(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
 entry:
-; CHECK: test_vqdmlsl_lanes16
+; CHECK-LABEL: test_vqdmlsl_lanes16_natural:
 ; CHECK: vqdmlsl.s16 q0, d2, d3[1]
   %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
-  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %1
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %arg1_int16x4_t, <4 x i16> %0)
+  %2 = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %1)
+  ret <4 x i32> %2
 }
 
-define arm_aapcs_vfpcc <2 x i64> @test_vqdmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmlsl_lanes32_natural(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
 entry:
-; CHECK: test_vqdmlsl_lanes32
+; CHECK-LABEL: test_vqdmlsl_lanes32_natural:
 ; CHECK: vqdmlsl.s32 q0, d2, d3[1]
   %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
-  %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %1
+  %1 = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %arg1_int32x2_t, <2 x i32> %0)
+  %2 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i64> %1)
+  ret <2 x i64> %2
 }
 
-declare <4 x i32>  @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64>  @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32>  @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64>  @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vsel.ll b/test/CodeGen/ARM/vsel.ll
new file mode 100644
index 0000000..7e1f714
--- /dev/null
+++ b/test/CodeGen/ARM/vsel.ll
@@ -0,0 +1,309 @@
+; RUN: llc < %s -mtriple=armv8-linux-gnueabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+define void @test_vsel32sgt(i32 %lhs32, i32 %rhs32, float %a, float %b) {
+; CHECK: test_vsel32sgt
+  %tst1 = icmp sgt i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: cmp r0, r1
+; CHECK: vselgt.f32 s0, s0, s1
+  ret void
+}
+define void @test_vsel64sgt(i32 %lhs32, i32 %rhs32, double %a, double %b) {
+; CHECK: test_vsel64sgt
+  %tst1 = icmp sgt i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: cmp r0, r1
+; CHECK: vselgt.f64 d16, d0, d1
+  ret void
+}
+define void @test_vsel32sge(i32 %lhs32, i32 %rhs32, float %a, float %b) {
+; CHECK: test_vsel32sge
+  %tst1 = icmp sge i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: cmp r0, r1
+; CHECK: vselge.f32 s0, s0, s1
+  ret void
+}
+define void @test_vsel64sge(i32 %lhs32, i32 %rhs32, double %a, double %b) {
+; CHECK: test_vsel64sge
+  %tst1 = icmp sge i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: cmp r0, r1
+; CHECK: vselge.f64 d16, d0, d1
+  ret void
+}
+define void @test_vsel32eq(i32 %lhs32, i32 %rhs32, float %a, float %b) {
+; CHECK: test_vsel32eq
+  %tst1 = icmp eq i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: cmp r0, r1
+; CHECK: vseleq.f32 s0, s0, s1
+  ret void
+}
+define void @test_vsel64eq(i32 %lhs32, i32 %rhs32, double %a, double %b) {
+; CHECK: test_vsel64eq
+  %tst1 = icmp eq i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: cmp r0, r1
+; CHECK: vseleq.f64 d16, d0, d1
+  ret void
+}
+define void @test_vsel32slt(i32 %lhs32, i32 %rhs32, float %a, float %b) {
+; CHECK: test_vsel32slt
+  %tst1 = icmp slt i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: cmp r0, r1
+; CHECK: vselgt.f32 s0, s1, s0
+  ret void
+}
+define void @test_vsel64slt(i32 %lhs32, i32 %rhs32, double %a, double %b) {
+; CHECK: test_vsel64slt
+  %tst1 = icmp slt i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: cmp r0, r1
+; CHECK: vselgt.f64 d16, d1, d0
+  ret void
+}
+define void @test_vsel32sle(i32 %lhs32, i32 %rhs32, float %a, float %b) {
+; CHECK: test_vsel32sle
+  %tst1 = icmp sle i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: cmp r0, r1
+; CHECK: vselge.f32 s0, s1, s0
+  ret void
+}
+define void @test_vsel64sle(i32 %lhs32, i32 %rhs32, double %a, double %b) {
+; CHECK: test_vsel64sle
+  %tst1 = icmp sle i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: cmp r0, r1
+; CHECK: vselge.f64 d16, d1, d0
+  ret void
+}
+define void @test_vsel32ogt(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32ogt
+  %tst1 = fcmp ogt float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselgt.f32 s0, s2, s3
+  ret void
+}
+define void @test_vsel64ogt(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64ogt
+  %tst1 = fcmp ogt float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselgt.f64 d16, d1, d2
+  ret void
+}
+define void @test_vsel32oge(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32oge
+  %tst1 = fcmp oge float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselge.f32 s0, s2, s3
+  ret void
+}
+define void @test_vsel64oge(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64oge
+  %tst1 = fcmp oge float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselge.f64 d16, d1, d2
+  ret void
+}
+define void @test_vsel32oeq(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32oeq
+  %tst1 = fcmp oeq float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vseleq.f32 s0, s2, s3
+  ret void
+}
+define void @test_vsel64oeq(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64oeq
+  %tst1 = fcmp oeq float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vseleq.f64 d16, d1, d2
+  ret void
+}
+define void @test_vsel32ugt(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32ugt
+  %tst1 = fcmp ugt float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselge.f32 s0, s3, s2
+  ret void
+}
+define void @test_vsel64ugt(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64ugt
+  %tst1 = fcmp ugt float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselge.f64 d16, d2, d1
+  ret void
+}
+define void @test_vsel32uge(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32uge
+  %tst1 = fcmp uge float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselgt.f32 s0, s3, s2
+  ret void
+}
+define void @test_vsel64uge(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64uge
+  %tst1 = fcmp uge float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselgt.f64 d16, d2, d1
+  ret void
+}
+define void @test_vsel32olt(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32olt
+  %tst1 = fcmp olt float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselgt.f32 s0, s2, s3
+  ret void
+}
+define void @test_vsel64olt(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64olt
+  %tst1 = fcmp olt float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselgt.f64 d16, d1, d2
+  ret void
+}
+define void @test_vsel32ult(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32ult
+  %tst1 = fcmp ult float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselge.f32 s0, s3, s2
+  ret void
+}
+define void @test_vsel64ult(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64ult
+  %tst1 = fcmp ult float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselge.f64 d16, d2, d1
+  ret void
+}
+define void @test_vsel32ole(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32ole
+  %tst1 = fcmp ole float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselge.f32 s0, s2, s3
+  ret void
+}
+define void @test_vsel64ole(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64ole
+  %tst1 = fcmp ole float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselge.f64 d16, d1, d2
+  ret void
+}
+define void @test_vsel32ule(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32ule
+  %tst1 = fcmp ule float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselgt.f32 s0, s3, s2
+  ret void
+}
+define void @test_vsel64ule(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64ule
+  %tst1 = fcmp ule float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselgt.f64 d16, d2, d1
+  ret void
+}
+define void @test_vsel32ord(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32ord
+  %tst1 = fcmp ord float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselvs.f32 s0, s3, s2
+  ret void
+}
+define void @test_vsel64ord(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64ord
+  %tst1 = fcmp ord float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselvs.f64 d16, d2, d1
+  ret void
+}
+define void @test_vsel32une(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32une
+  %tst1 = fcmp une float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vseleq.f32 s0, s3, s2
+  ret void
+}
+define void @test_vsel64une(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64une
+  %tst1 = fcmp une float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vseleq.f64 d16, d2, d1
+  ret void
+}
+define void @test_vsel32uno(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32uno
+  %tst1 = fcmp uno float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselvs.f32 s0, s2, s3
+  ret void
+}
+define void @test_vsel64uno(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64uno
+  %tst1 = fcmp uno float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselvs.f64 d16, d1, d2
+  ret void
+}
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
index 651b6d5..34c5c70 100644
--- a/test/CodeGen/ARM/vstlane.ll
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -13,7 +13,7 @@ define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;Check for a post-increment updating store.
 define void @vst1lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst1lanei8_update:
-;CHECK: vst1.8 {d16[3]}, [r2]!
+;CHECK: vst1.8 {d16[3]}, [{{r[0-9]}}]!
 	%A = load i8** %ptr
 	%tmp1 = load <8 x i8>* %B
 	%tmp2 = extractelement <8 x i8> %tmp1, i32 3
diff --git a/test/CodeGen/ARM/vsub.ll b/test/CodeGen/ARM/vsub.ll
index 89c3095..6b95b97 100644
--- a/test/CodeGen/ARM/vsub.ll
+++ b/test/CodeGen/ARM/vsub.ll
@@ -90,37 +90,33 @@ define <4 x float> @vsubQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 	ret <4 x float> %tmp3
 }
 
-define <8 x i8> @vsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: vsubhni16:
-;CHECK: vsubhn.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i8> @llvm.arm.neon.vsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i8> %tmp3
+define <8 x i8> @vsubhni16_natural(<8 x i16> %A, <8 x i16> %B) nounwind {
+; CHECK-LABEL: vsubhni16_natural:
+; CHECK: vsubhn.i16
+  %sum = sub <8 x i16> %A, %B
+  %shift = lshr <8 x i16> %sum, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %trunc = trunc <8 x i16> %shift to <8 x i8>
+  ret <8 x i8> %trunc
 }
 
-define <4 x i16> @vsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: vsubhni32:
-;CHECK: vsubhn.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i16> %tmp3
+define <4 x i16> @vsubhni32_natural(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK-LABEL: vsubhni32_natural:
+; CHECK: vsubhn.i32
+  %sum = sub <4 x i32> %A, %B
+  %shift = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
+  %trunc = trunc <4 x i32> %shift to <4 x i16>
+  ret <4 x i16> %trunc
 }
 
-define <2 x i32> @vsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: vsubhni64:
-;CHECK: vsubhn.i64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
-	%tmp3 = call <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
-	ret <2 x i32> %tmp3
+define <2 x i32> @vsubhni64_natural(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK-LABEL: vsubhni64_natural:
+; CHECK: vsubhn.i64
+  %sum = sub <2 x i64> %A, %B
+  %shift = lshr <2 x i64> %sum, <i64 32, i64 32>
+  %trunc = trunc <2 x i64> %shift to <2 x i32>
+  ret <2 x i32> %trunc
 }
 
-declare <8 x i8>  @llvm.arm.neon.vsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
-
 define <8 x i8> @vrsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrsubhni16:
 ;CHECK: vrsubhn.i16
diff --git a/test/CodeGen/CPP/lit.local.cfg b/test/CodeGen/CPP/lit.local.cfg
index 4d4b4a4..4063dd1 100644
--- a/test/CodeGen/CPP/lit.local.cfg
+++ b/test/CodeGen/CPP/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'CppBackend' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll b/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
index 6281ada..3f17ce1 100644
--- a/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
+++ b/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
@@ -63,30 +63,58 @@ bb47:		; preds = %bb46, %bb44
 	br label %bb44
 }
 
-declare i32 @pthread_once(i32*, void ()*)
+define i32 @pthread_once(i32*, void ()*) {
+  ret i32 0
+}
 
-declare i8* @pthread_getspecific(i32)
+define i8* @pthread_getspecific(i32) {
+  ret i8* null
+}
 
-declare i32 @pthread_setspecific(i32, i8*)
+define i32 @pthread_setspecific(i32, i8*) {
+  ret i32 0
+}
 
-declare i32 @pthread_create(i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)
+define i32 @pthread_create(i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*) {
+  ret i32 0
+}
 
-declare i32 @pthread_cancel(i64)
+define i32 @pthread_cancel(i64) {
+  ret i32 0
+}
 
-declare i32 @pthread_mutex_lock(%struct.pthread_mutex_t*)
+define i32 @pthread_mutex_lock(%struct.pthread_mutex_t*) {
+  ret i32 0
+}
 
-declare i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*)
+define i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*) {
+  ret i32 0
+}
 
-declare i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*)
+define i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*) {
+  ret i32 0
+}
 
-declare i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %struct.Alignment*)
+define i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %struct.Alignment*) {
+  ret i32 0
+}
 
-declare i32 @pthread_key_create(i32*, void (i8*)*)
+define i32 @pthread_key_create(i32*, void (i8*)*) {
+  ret i32 0
+}
 
-declare i32 @pthread_key_delete(i32)
+define i32 @pthread_key_delete(i32) {
+  ret i32 0
+}
 
-declare i32 @pthread_mutexattr_init(%struct.Alignment*)
+define i32 @pthread_mutexattr_init(%struct.Alignment*) {
+  ret i32 0
+}
 
-declare i32 @pthread_mutexattr_settype(%struct.Alignment*, i32)
+define i32 @pthread_mutexattr_settype(%struct.Alignment*, i32) {
+  ret i32 0
+}
 
-declare i32 @pthread_mutexattr_destroy(%struct.Alignment*)
+define i32 @pthread_mutexattr_destroy(%struct.Alignment*) {
+  ret i32 0
+}
diff --git a/test/CodeGen/Generic/crash.ll b/test/CodeGen/Generic/crash.ll
index d3fc204..8de6b0d 100644
--- a/test/CodeGen/Generic/crash.ll
+++ b/test/CodeGen/Generic/crash.ll
@@ -23,7 +23,7 @@ bb32:                                             ; preds = %bb6
 %3 = load double* %1, align 4
 %4 = load double* %0, align 4
 call void @Parse_Vector(double* %0) nounwind
-%5 = call i32 @llvm.objectsize.i32(i8* undef, i1 false)
+%5 = call i32 @llvm.objectsize.i32.p0i8(i8* undef, i1 false)
 %6 = icmp eq i32 %5, -1
 br i1 %6, label %bb34, label %bb33
 
@@ -36,7 +36,7 @@ unreachable
 }
 
 declare void @Parse_Vector(double*)
-declare i32 @llvm.objectsize.i32(i8*, i1)
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1)
 
 
 ; PR9578
diff --git a/test/CodeGen/Generic/lit.local.cfg b/test/CodeGen/Generic/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/CodeGen/Generic/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/CodeGen/Hexagon/BranchPredict.ll b/test/CodeGen/Hexagon/BranchPredict.ll
index 716e85d..4ab1966 100644
--- a/test/CodeGen/Hexagon/BranchPredict.ll
+++ b/test/CodeGen/Hexagon/BranchPredict.ll
@@ -53,7 +53,7 @@ return:                                           ; preds = %if.else, %if.then
 define i32 @foo_bar(i32 %a, i16 signext %b) nounwind {
 ; CHECK: if{{ *}}(!cmp.eq(r{{[0-9]*}}.new, #0)) jump:nt
 entry:
-  %0 = load i32* @j, align 4, !tbaa !2
+  %0 = load i32* @j, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %if.else, label %if.then, !prof !0
 
@@ -74,6 +74,3 @@ return:                                           ; preds = %if.else, %if.then
 
 !0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
 !1 = metadata !{metadata !"branch_weights", i32 4, i32 64}
-!2 = metadata !{metadata !"int", metadata !3}
-!3 = metadata !{metadata !"omnipotent char", metadata !4}
-!4 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/combine_ir.ll b/test/CodeGen/Hexagon/combine_ir.ll
index 8b99ef7..e100cf7 100644
--- a/test/CodeGen/Hexagon/combine_ir.ll
+++ b/test/CodeGen/Hexagon/combine_ir.ll
@@ -4,7 +4,7 @@
 
 define void @word(i32* nocapture %a) nounwind {
 entry:
-  %0 = load i32* %a, align 4, !tbaa !0
+  %0 = load i32* %a, align 4
   %1 = zext i32 %0 to i64
   tail call void @bar(i64 %1) nounwind
   ret void
@@ -17,10 +17,10 @@ declare void @bar(i64)
 
 define void @halfword(i16* nocapture %a) nounwind {
 entry:
-  %0 = load i16* %a, align 2, !tbaa !3
+  %0 = load i16* %a, align 2
   %1 = zext i16 %0 to i64
   %add.ptr = getelementptr inbounds i16* %a, i32 1
-  %2 = load i16* %add.ptr, align 2, !tbaa !3
+  %2 = load i16* %add.ptr, align 2
   %3 = zext i16 %2 to i64
   %4 = shl nuw nsw i64 %3, 16
   %ins = or i64 %4, %1
@@ -33,18 +33,13 @@ entry:
 
 define void @byte(i8* nocapture %a) nounwind {
 entry:
-  %0 = load i8* %a, align 1, !tbaa !1
+  %0 = load i8* %a, align 1
   %1 = zext i8 %0 to i64
   %add.ptr = getelementptr inbounds i8* %a, i32 1
-  %2 = load i8* %add.ptr, align 1, !tbaa !1
+  %2 = load i8* %add.ptr, align 1
   %3 = zext i8 %2 to i64
   %4 = shl nuw nsw i64 %3, 8
   %ins = or i64 %4, %1
   tail call void @bar(i64 %ins) nounwind
   ret void
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/Hexagon/hwloop-dbg.ll b/test/CodeGen/Hexagon/hwloop-dbg.ll
index fce6d19..bfdd813 100644
--- a/test/CodeGen/Hexagon/hwloop-dbg.ll
+++ b/test/CodeGen/Hexagon/hwloop-dbg.ll
@@ -35,13 +35,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!29}
 
 !0 = metadata !{i32 786449, metadata !28, i32 12, metadata !"QuIC LLVM Hexagon Clang version 6.1-pre-unknown, (git://git-hexagon-aus.quicinc.com/llvm/clang-mainline.git e9382867661454cdf44addb39430741578e9765c) (llvm/llvm-mainline.git 36412bb1fcf03ed426d4437b41198bae066675ac)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c] [DW_LANG_C99]
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !28, null, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32*)* @foo, null, null, metadata !11, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
 !6 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9, metadata !9}
 !9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from int]
 !10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
@@ -60,3 +61,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !26 = metadata !{i32 3, i32 23, metadata !20, null}
 !27 = metadata !{i32 6, i32 1, metadata !16, null}
 !28 = metadata !{metadata !"hwloop-dbg.c", metadata !"/usr2/kparzysz/s.hex/t"}
+!29 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/Hexagon/lit.local.cfg b/test/CodeGen/Hexagon/lit.local.cfg
index 24324b2..e96bab8 100644
--- a/test/CodeGen/Hexagon/lit.local.cfg
+++ b/test/CodeGen/Hexagon/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'Hexagon' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/Hexagon/memops.ll b/test/CodeGen/Hexagon/memops.ll
index 5498848..fca1a73 100644
--- a/test/CodeGen/Hexagon/memops.ll
+++ b/test/CodeGen/Hexagon/memops.ll
@@ -4,11 +4,11 @@
 define void @memop_unsigned_char_add5(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 5
   %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  store i8 %conv1, i8* %p, align 1
   ret void
 }
 
@@ -16,11 +16,11 @@ define void @memop_unsigned_char_add(i8* nocapture %p, i8 zeroext %x) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv = zext i8 %x to i32
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %conv1 = zext i8 %0 to i32
   %add = add nsw i32 %conv1, %conv
   %conv2 = trunc i32 %add to i8
-  store i8 %conv2, i8* %p, align 1, !tbaa !0
+  store i8 %conv2, i8* %p, align 1
   ret void
 }
 
@@ -28,51 +28,51 @@ define void @memop_unsigned_char_sub(i8* nocapture %p, i8 zeroext %x) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv = zext i8 %x to i32
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %conv1 = zext i8 %0 to i32
   %sub = sub nsw i32 %conv1, %conv
   %conv2 = trunc i32 %sub to i8
-  store i8 %conv2, i8* %p, align 1, !tbaa !0
+  store i8 %conv2, i8* %p, align 1
   ret void
 }
 
 define void @memop_unsigned_char_or(i8* nocapture %p, i8 zeroext %x) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %or3 = or i8 %0, %x
-  store i8 %or3, i8* %p, align 1, !tbaa !0
+  store i8 %or3, i8* %p, align 1
   ret void
 }
 
 define void @memop_unsigned_char_and(i8* nocapture %p, i8 zeroext %x) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %and3 = and i8 %0, %x
-  store i8 %and3, i8* %p, align 1, !tbaa !0
+  store i8 %and3, i8* %p, align 1
   ret void
 }
 
 define void @memop_unsigned_char_clrbit(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %conv = zext i8 %0 to i32
   %and = and i32 %conv, 223
   %conv1 = trunc i32 %and to i8
-  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  store i8 %conv1, i8* %p, align 1
   ret void
 }
 
 define void @memop_unsigned_char_setbit(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %conv = zext i8 %0 to i32
   %or = or i32 %conv, 128
   %conv1 = trunc i32 %or to i8
-  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  store i8 %conv1, i8* %p, align 1
   ret void
 }
 
@@ -80,11 +80,11 @@ define void @memop_unsigned_char_add5_index(i8* nocapture %p, i32 %i) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 5
   %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
@@ -93,11 +93,11 @@ entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv1 = zext i8 %0 to i32
   %add = add nsw i32 %conv1, %conv
   %conv2 = trunc i32 %add to i8
-  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv2, i8* %add.ptr, align 1
   ret void
 }
 
@@ -106,11 +106,11 @@ entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv1 = zext i8 %0 to i32
   %sub = sub nsw i32 %conv1, %conv
   %conv2 = trunc i32 %sub to i8
-  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv2, i8* %add.ptr, align 1
   ret void
 }
 
@@ -118,9 +118,9 @@ define void @memop_unsigned_char_or_index(i8* nocapture %p, i32 %i, i8 zeroext %
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %or3 = or i8 %0, %x
-  store i8 %or3, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %or3, i8* %add.ptr, align 1
   ret void
 }
 
@@ -128,9 +128,9 @@ define void @memop_unsigned_char_and_index(i8* nocapture %p, i32 %i, i8 zeroext
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %and3 = and i8 %0, %x
-  store i8 %and3, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %and3, i8* %add.ptr, align 1
   ret void
 }
 
@@ -138,11 +138,11 @@ define void @memop_unsigned_char_clrbit_index(i8* nocapture %p, i32 %i) nounwind
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv = zext i8 %0 to i32
   %and = and i32 %conv, 223
   %conv1 = trunc i32 %and to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
@@ -150,11 +150,11 @@ define void @memop_unsigned_char_setbit_index(i8* nocapture %p, i32 %i) nounwind
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv = zext i8 %0 to i32
   %or = or i32 %conv, 128
   %conv1 = trunc i32 %or to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
@@ -162,11 +162,11 @@ define void @memop_unsigned_char_add5_index5(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 5
   %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
@@ -175,11 +175,11 @@ entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}r{{[0-9]+}}
   %conv = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv1 = zext i8 %0 to i32
   %add = add nsw i32 %conv1, %conv
   %conv2 = trunc i32 %add to i8
-  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv2, i8* %add.ptr, align 1
   ret void
 }
 
@@ -188,11 +188,11 @@ entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}-={{ *}}r{{[0-9]+}}
   %conv = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv1 = zext i8 %0 to i32
   %sub = sub nsw i32 %conv1, %conv
   %conv2 = trunc i32 %sub to i8
-  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv2, i8* %add.ptr, align 1
   ret void
 }
 
@@ -200,9 +200,9 @@ define void @memop_unsigned_char_or_index5(i8* nocapture %p, i8 zeroext %x) noun
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %or3 = or i8 %0, %x
-  store i8 %or3, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %or3, i8* %add.ptr, align 1
   ret void
 }
 
@@ -210,9 +210,9 @@ define void @memop_unsigned_char_and_index5(i8* nocapture %p, i8 zeroext %x) nou
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %and3 = and i8 %0, %x
-  store i8 %and3, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %and3, i8* %add.ptr, align 1
   ret void
 }
 
@@ -220,11 +220,11 @@ define void @memop_unsigned_char_clrbit_index5(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv = zext i8 %0 to i32
   %and = and i32 %conv, 223
   %conv1 = trunc i32 %and to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
@@ -232,22 +232,22 @@ define void @memop_unsigned_char_setbit_index5(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv = zext i8 %0 to i32
   %or = or i32 %conv, 128
   %conv1 = trunc i32 %or to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
 define void @memop_signed_char_add5(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %conv2 = zext i8 %0 to i32
   %add = add nsw i32 %conv2, 5
   %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  store i8 %conv1, i8* %p, align 1
   ret void
 }
 
@@ -255,11 +255,11 @@ define void @memop_signed_char_add(i8* nocapture %p, i8 signext %x) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv4 = zext i8 %x to i32
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %conv13 = zext i8 %0 to i32
   %add = add nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %add to i8
-  store i8 %conv2, i8* %p, align 1, !tbaa !0
+  store i8 %conv2, i8* %p, align 1
   ret void
 }
 
@@ -267,51 +267,51 @@ define void @memop_signed_char_sub(i8* nocapture %p, i8 signext %x) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv4 = zext i8 %x to i32
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %conv13 = zext i8 %0 to i32
   %sub = sub nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %sub to i8
-  store i8 %conv2, i8* %p, align 1, !tbaa !0
+  store i8 %conv2, i8* %p, align 1
   ret void
 }
 
 define void @memop_signed_char_or(i8* nocapture %p, i8 signext %x) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %or3 = or i8 %0, %x
-  store i8 %or3, i8* %p, align 1, !tbaa !0
+  store i8 %or3, i8* %p, align 1
   ret void
 }
 
 define void @memop_signed_char_and(i8* nocapture %p, i8 signext %x) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %and3 = and i8 %0, %x
-  store i8 %and3, i8* %p, align 1, !tbaa !0
+  store i8 %and3, i8* %p, align 1
   ret void
 }
 
 define void @memop_signed_char_clrbit(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %conv2 = zext i8 %0 to i32
   %and = and i32 %conv2, 223
   %conv1 = trunc i32 %and to i8
-  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  store i8 %conv1, i8* %p, align 1
   ret void
 }
 
 define void @memop_signed_char_setbit(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %conv2 = zext i8 %0 to i32
   %or = or i32 %conv2, 128
   %conv1 = trunc i32 %or to i8
-  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  store i8 %conv1, i8* %p, align 1
   ret void
 }
 
@@ -319,11 +319,11 @@ define void @memop_signed_char_add5_index(i8* nocapture %p, i32 %i) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv2 = zext i8 %0 to i32
   %add = add nsw i32 %conv2, 5
   %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
@@ -332,11 +332,11 @@ entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv4 = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv13 = zext i8 %0 to i32
   %add = add nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %add to i8
-  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv2, i8* %add.ptr, align 1
   ret void
 }
 
@@ -345,11 +345,11 @@ entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv4 = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv13 = zext i8 %0 to i32
   %sub = sub nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %sub to i8
-  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv2, i8* %add.ptr, align 1
   ret void
 }
 
@@ -357,9 +357,9 @@ define void @memop_signed_char_or_index(i8* nocapture %p, i32 %i, i8 signext %x)
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %or3 = or i8 %0, %x
-  store i8 %or3, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %or3, i8* %add.ptr, align 1
   ret void
 }
 
@@ -367,9 +367,9 @@ define void @memop_signed_char_and_index(i8* nocapture %p, i32 %i, i8 signext %x
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %and3 = and i8 %0, %x
-  store i8 %and3, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %and3, i8* %add.ptr, align 1
   ret void
 }
 
@@ -377,11 +377,11 @@ define void @memop_signed_char_clrbit_index(i8* nocapture %p, i32 %i) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv2 = zext i8 %0 to i32
   %and = and i32 %conv2, 223
   %conv1 = trunc i32 %and to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
@@ -389,11 +389,11 @@ define void @memop_signed_char_setbit_index(i8* nocapture %p, i32 %i) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv2 = zext i8 %0 to i32
   %or = or i32 %conv2, 128
   %conv1 = trunc i32 %or to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
@@ -401,11 +401,11 @@ define void @memop_signed_char_add5_index5(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv2 = zext i8 %0 to i32
   %add = add nsw i32 %conv2, 5
   %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
@@ -414,11 +414,11 @@ entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}r{{[0-9]+}}
   %conv4 = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv13 = zext i8 %0 to i32
   %add = add nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %add to i8
-  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv2, i8* %add.ptr, align 1
   ret void
 }
 
@@ -427,11 +427,11 @@ entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}-={{ *}}r{{[0-9]+}}
   %conv4 = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv13 = zext i8 %0 to i32
   %sub = sub nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %sub to i8
-  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv2, i8* %add.ptr, align 1
   ret void
 }
 
@@ -439,9 +439,9 @@ define void @memop_signed_char_or_index5(i8* nocapture %p, i8 signext %x) nounwi
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %or3 = or i8 %0, %x
-  store i8 %or3, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %or3, i8* %add.ptr, align 1
   ret void
 }
 
@@ -449,9 +449,9 @@ define void @memop_signed_char_and_index5(i8* nocapture %p, i8 signext %x) nounw
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %and3 = and i8 %0, %x
-  store i8 %and3, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %and3, i8* %add.ptr, align 1
   ret void
 }
 
@@ -459,11 +459,11 @@ define void @memop_signed_char_clrbit_index5(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv2 = zext i8 %0 to i32
   %and = and i32 %conv2, 223
   %conv1 = trunc i32 %and to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
@@ -471,22 +471,22 @@ define void @memop_signed_char_setbit_index5(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv2 = zext i8 %0 to i32
   %or = or i32 %conv2, 128
   %conv1 = trunc i32 %or to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
 define void @memop_unsigned_short_add5(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %conv = zext i16 %0 to i32
   %add = add nsw i32 %conv, 5
   %conv1 = trunc i32 %add to i16
-  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  store i16 %conv1, i16* %p, align 2
   ret void
 }
 
@@ -494,11 +494,11 @@ define void @memop_unsigned_short_add(i16* nocapture %p, i16 zeroext %x) nounwin
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv = zext i16 %x to i32
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %conv1 = zext i16 %0 to i32
   %add = add nsw i32 %conv1, %conv
   %conv2 = trunc i32 %add to i16
-  store i16 %conv2, i16* %p, align 2, !tbaa !2
+  store i16 %conv2, i16* %p, align 2
   ret void
 }
 
@@ -506,51 +506,51 @@ define void @memop_unsigned_short_sub(i16* nocapture %p, i16 zeroext %x) nounwin
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv = zext i16 %x to i32
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %conv1 = zext i16 %0 to i32
   %sub = sub nsw i32 %conv1, %conv
   %conv2 = trunc i32 %sub to i16
-  store i16 %conv2, i16* %p, align 2, !tbaa !2
+  store i16 %conv2, i16* %p, align 2
   ret void
 }
 
 define void @memop_unsigned_short_or(i16* nocapture %p, i16 zeroext %x) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %or3 = or i16 %0, %x
-  store i16 %or3, i16* %p, align 2, !tbaa !2
+  store i16 %or3, i16* %p, align 2
   ret void
 }
 
 define void @memop_unsigned_short_and(i16* nocapture %p, i16 zeroext %x) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %and3 = and i16 %0, %x
-  store i16 %and3, i16* %p, align 2, !tbaa !2
+  store i16 %and3, i16* %p, align 2
   ret void
 }
 
 define void @memop_unsigned_short_clrbit(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %conv = zext i16 %0 to i32
   %and = and i32 %conv, 65503
   %conv1 = trunc i32 %and to i16
-  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  store i16 %conv1, i16* %p, align 2
   ret void
 }
 
 define void @memop_unsigned_short_setbit(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %conv = zext i16 %0 to i32
   %or = or i32 %conv, 128
   %conv1 = trunc i32 %or to i16
-  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  store i16 %conv1, i16* %p, align 2
   ret void
 }
 
@@ -558,11 +558,11 @@ define void @memop_unsigned_short_add5_index(i16* nocapture %p, i32 %i) nounwind
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv = zext i16 %0 to i32
   %add = add nsw i32 %conv, 5
   %conv1 = trunc i32 %add to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
@@ -571,11 +571,11 @@ entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv1 = zext i16 %0 to i32
   %add = add nsw i32 %conv1, %conv
   %conv2 = trunc i32 %add to i16
-  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv2, i16* %add.ptr, align 2
   ret void
 }
 
@@ -584,11 +584,11 @@ entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv1 = zext i16 %0 to i32
   %sub = sub nsw i32 %conv1, %conv
   %conv2 = trunc i32 %sub to i16
-  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv2, i16* %add.ptr, align 2
   ret void
 }
 
@@ -596,9 +596,9 @@ define void @memop_unsigned_short_or_index(i16* nocapture %p, i32 %i, i16 zeroex
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %or3 = or i16 %0, %x
-  store i16 %or3, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %or3, i16* %add.ptr, align 2
   ret void
 }
 
@@ -606,9 +606,9 @@ define void @memop_unsigned_short_and_index(i16* nocapture %p, i32 %i, i16 zeroe
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %and3 = and i16 %0, %x
-  store i16 %and3, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %and3, i16* %add.ptr, align 2
   ret void
 }
 
@@ -616,11 +616,11 @@ define void @memop_unsigned_short_clrbit_index(i16* nocapture %p, i32 %i) nounwi
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv = zext i16 %0 to i32
   %and = and i32 %conv, 65503
   %conv1 = trunc i32 %and to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
@@ -628,11 +628,11 @@ define void @memop_unsigned_short_setbit_index(i16* nocapture %p, i32 %i) nounwi
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv = zext i16 %0 to i32
   %or = or i32 %conv, 128
   %conv1 = trunc i32 %or to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
@@ -640,11 +640,11 @@ define void @memop_unsigned_short_add5_index5(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv = zext i16 %0 to i32
   %add = add nsw i32 %conv, 5
   %conv1 = trunc i32 %add to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
@@ -653,11 +653,11 @@ entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}r{{[0-9]+}}
   %conv = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv1 = zext i16 %0 to i32
   %add = add nsw i32 %conv1, %conv
   %conv2 = trunc i32 %add to i16
-  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv2, i16* %add.ptr, align 2
   ret void
 }
 
@@ -666,11 +666,11 @@ entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}r{{[0-9]+}}
   %conv = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv1 = zext i16 %0 to i32
   %sub = sub nsw i32 %conv1, %conv
   %conv2 = trunc i32 %sub to i16
-  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv2, i16* %add.ptr, align 2
   ret void
 }
 
@@ -678,9 +678,9 @@ define void @memop_unsigned_short_or_index5(i16* nocapture %p, i16 zeroext %x) n
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %or3 = or i16 %0, %x
-  store i16 %or3, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %or3, i16* %add.ptr, align 2
   ret void
 }
 
@@ -688,9 +688,9 @@ define void @memop_unsigned_short_and_index5(i16* nocapture %p, i16 zeroext %x)
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %and3 = and i16 %0, %x
-  store i16 %and3, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %and3, i16* %add.ptr, align 2
   ret void
 }
 
@@ -698,11 +698,11 @@ define void @memop_unsigned_short_clrbit_index5(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv = zext i16 %0 to i32
   %and = and i32 %conv, 65503
   %conv1 = trunc i32 %and to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
@@ -710,22 +710,22 @@ define void @memop_unsigned_short_setbit_index5(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv = zext i16 %0 to i32
   %or = or i32 %conv, 128
   %conv1 = trunc i32 %or to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
 define void @memop_signed_short_add5(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %conv2 = zext i16 %0 to i32
   %add = add nsw i32 %conv2, 5
   %conv1 = trunc i32 %add to i16
-  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  store i16 %conv1, i16* %p, align 2
   ret void
 }
 
@@ -733,11 +733,11 @@ define void @memop_signed_short_add(i16* nocapture %p, i16 signext %x) nounwind
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv4 = zext i16 %x to i32
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %conv13 = zext i16 %0 to i32
   %add = add nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %add to i16
-  store i16 %conv2, i16* %p, align 2, !tbaa !2
+  store i16 %conv2, i16* %p, align 2
   ret void
 }
 
@@ -745,51 +745,51 @@ define void @memop_signed_short_sub(i16* nocapture %p, i16 signext %x) nounwind
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv4 = zext i16 %x to i32
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %conv13 = zext i16 %0 to i32
   %sub = sub nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %sub to i16
-  store i16 %conv2, i16* %p, align 2, !tbaa !2
+  store i16 %conv2, i16* %p, align 2
   ret void
 }
 
 define void @memop_signed_short_or(i16* nocapture %p, i16 signext %x) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %or3 = or i16 %0, %x
-  store i16 %or3, i16* %p, align 2, !tbaa !2
+  store i16 %or3, i16* %p, align 2
   ret void
 }
 
 define void @memop_signed_short_and(i16* nocapture %p, i16 signext %x) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %and3 = and i16 %0, %x
-  store i16 %and3, i16* %p, align 2, !tbaa !2
+  store i16 %and3, i16* %p, align 2
   ret void
 }
 
 define void @memop_signed_short_clrbit(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %conv2 = zext i16 %0 to i32
   %and = and i32 %conv2, 65503
   %conv1 = trunc i32 %and to i16
-  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  store i16 %conv1, i16* %p, align 2
   ret void
 }
 
 define void @memop_signed_short_setbit(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %conv2 = zext i16 %0 to i32
   %or = or i32 %conv2, 128
   %conv1 = trunc i32 %or to i16
-  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  store i16 %conv1, i16* %p, align 2
   ret void
 }
 
@@ -797,11 +797,11 @@ define void @memop_signed_short_add5_index(i16* nocapture %p, i32 %i) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv2 = zext i16 %0 to i32
   %add = add nsw i32 %conv2, 5
   %conv1 = trunc i32 %add to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
@@ -810,11 +810,11 @@ entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv4 = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv13 = zext i16 %0 to i32
   %add = add nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %add to i16
-  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv2, i16* %add.ptr, align 2
   ret void
 }
 
@@ -823,11 +823,11 @@ entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv4 = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv13 = zext i16 %0 to i32
   %sub = sub nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %sub to i16
-  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv2, i16* %add.ptr, align 2
   ret void
 }
 
@@ -835,9 +835,9 @@ define void @memop_signed_short_or_index(i16* nocapture %p, i32 %i, i16 signext
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %or3 = or i16 %0, %x
-  store i16 %or3, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %or3, i16* %add.ptr, align 2
   ret void
 }
 
@@ -845,9 +845,9 @@ define void @memop_signed_short_and_index(i16* nocapture %p, i32 %i, i16 signext
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %and3 = and i16 %0, %x
-  store i16 %and3, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %and3, i16* %add.ptr, align 2
   ret void
 }
 
@@ -855,11 +855,11 @@ define void @memop_signed_short_clrbit_index(i16* nocapture %p, i32 %i) nounwind
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv2 = zext i16 %0 to i32
   %and = and i32 %conv2, 65503
   %conv1 = trunc i32 %and to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
@@ -867,11 +867,11 @@ define void @memop_signed_short_setbit_index(i16* nocapture %p, i32 %i) nounwind
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv2 = zext i16 %0 to i32
   %or = or i32 %conv2, 128
   %conv1 = trunc i32 %or to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
@@ -879,11 +879,11 @@ define void @memop_signed_short_add5_index5(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv2 = zext i16 %0 to i32
   %add = add nsw i32 %conv2, 5
   %conv1 = trunc i32 %add to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
@@ -892,11 +892,11 @@ entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}r{{[0-9]+}}
   %conv4 = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv13 = zext i16 %0 to i32
   %add = add nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %add to i16
-  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv2, i16* %add.ptr, align 2
   ret void
 }
 
@@ -905,11 +905,11 @@ entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}r{{[0-9]+}}
   %conv4 = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv13 = zext i16 %0 to i32
   %sub = sub nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %sub to i16
-  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv2, i16* %add.ptr, align 2
   ret void
 }
 
@@ -917,9 +917,9 @@ define void @memop_signed_short_or_index5(i16* nocapture %p, i16 signext %x) nou
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %or3 = or i16 %0, %x
-  store i16 %or3, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %or3, i16* %add.ptr, align 2
   ret void
 }
 
@@ -927,9 +927,9 @@ define void @memop_signed_short_and_index5(i16* nocapture %p, i16 signext %x) no
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %and3 = and i16 %0, %x
-  store i16 %and3, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %and3, i16* %add.ptr, align 2
   ret void
 }
 
@@ -937,11 +937,11 @@ define void @memop_signed_short_clrbit_index5(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv2 = zext i16 %0 to i32
   %and = and i32 %conv2, 65503
   %conv1 = trunc i32 %and to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
@@ -949,74 +949,74 @@ define void @memop_signed_short_setbit_index5(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv2 = zext i16 %0 to i32
   %or = or i32 %conv2, 128
   %conv1 = trunc i32 %or to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
 define void @memop_signed_int_add5(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %add = add i32 %0, 5
-  store i32 %add, i32* %p, align 4, !tbaa !3
+  store i32 %add, i32* %p, align 4
   ret void
 }
 
 define void @memop_signed_int_add(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %add = add i32 %0, %x
-  store i32 %add, i32* %p, align 4, !tbaa !3
+  store i32 %add, i32* %p, align 4
   ret void
 }
 
 define void @memop_signed_int_sub(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %sub = sub i32 %0, %x
-  store i32 %sub, i32* %p, align 4, !tbaa !3
+  store i32 %sub, i32* %p, align 4
   ret void
 }
 
 define void @memop_signed_int_or(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %or = or i32 %0, %x
-  store i32 %or, i32* %p, align 4, !tbaa !3
+  store i32 %or, i32* %p, align 4
   ret void
 }
 
 define void @memop_signed_int_and(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %and = and i32 %0, %x
-  store i32 %and, i32* %p, align 4, !tbaa !3
+  store i32 %and, i32* %p, align 4
   ret void
 }
 
 define void @memop_signed_int_clrbit(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %and = and i32 %0, -33
-  store i32 %and, i32* %p, align 4, !tbaa !3
+  store i32 %and, i32* %p, align 4
   ret void
 }
 
 define void @memop_signed_int_setbit(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %or = or i32 %0, 128
-  store i32 %or, i32* %p, align 4, !tbaa !3
+  store i32 %or, i32* %p, align 4
   ret void
 }
 
@@ -1024,9 +1024,9 @@ define void @memop_signed_int_add5_index(i32* nocapture %p, i32 %i) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %add = add i32 %0, 5
-  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %add, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1034,9 +1034,9 @@ define void @memop_signed_int_add_index(i32* nocapture %p, i32 %i, i32 %x) nounw
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %add = add i32 %0, %x
-  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %add, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1044,9 +1044,9 @@ define void @memop_signed_int_sub_index(i32* nocapture %p, i32 %i, i32 %x) nounw
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %sub = sub i32 %0, %x
-  store i32 %sub, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %sub, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1054,9 +1054,9 @@ define void @memop_signed_int_or_index(i32* nocapture %p, i32 %i, i32 %x) nounwi
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %or = or i32 %0, %x
-  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %or, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1064,9 +1064,9 @@ define void @memop_signed_int_and_index(i32* nocapture %p, i32 %i, i32 %x) nounw
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %and = and i32 %0, %x
-  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %and, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1074,9 +1074,9 @@ define void @memop_signed_int_clrbit_index(i32* nocapture %p, i32 %i) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %and = and i32 %0, -33
-  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %and, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1084,9 +1084,9 @@ define void @memop_signed_int_setbit_index(i32* nocapture %p, i32 %i) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %or = or i32 %0, 128
-  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %or, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1094,9 +1094,9 @@ define void @memop_signed_int_add5_index5(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %add = add i32 %0, 5
-  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %add, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1104,9 +1104,9 @@ define void @memop_signed_int_add_index5(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %add = add i32 %0, %x
-  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %add, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1114,9 +1114,9 @@ define void @memop_signed_int_sub_index5(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %sub = sub i32 %0, %x
-  store i32 %sub, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %sub, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1124,9 +1124,9 @@ define void @memop_signed_int_or_index5(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %or = or i32 %0, %x
-  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %or, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1134,9 +1134,9 @@ define void @memop_signed_int_and_index5(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %and = and i32 %0, %x
-  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %and, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1144,9 +1144,9 @@ define void @memop_signed_int_clrbit_index5(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %and = and i32 %0, -33
-  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %and, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1154,72 +1154,72 @@ define void @memop_signed_int_setbit_index5(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %or = or i32 %0, 128
-  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %or, i32* %add.ptr, align 4
   ret void
 }
 
 define void @memop_unsigned_int_add5(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %add = add nsw i32 %0, 5
-  store i32 %add, i32* %p, align 4, !tbaa !3
+  store i32 %add, i32* %p, align 4
   ret void
 }
 
 define void @memop_unsigned_int_add(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %add = add nsw i32 %0, %x
-  store i32 %add, i32* %p, align 4, !tbaa !3
+  store i32 %add, i32* %p, align 4
   ret void
 }
 
 define void @memop_unsigned_int_sub(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %sub = sub nsw i32 %0, %x
-  store i32 %sub, i32* %p, align 4, !tbaa !3
+  store i32 %sub, i32* %p, align 4
   ret void
 }
 
 define void @memop_unsigned_int_or(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %or = or i32 %0, %x
-  store i32 %or, i32* %p, align 4, !tbaa !3
+  store i32 %or, i32* %p, align 4
   ret void
 }
 
 define void @memop_unsigned_int_and(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %and = and i32 %0, %x
-  store i32 %and, i32* %p, align 4, !tbaa !3
+  store i32 %and, i32* %p, align 4
   ret void
 }
 
 define void @memop_unsigned_int_clrbit(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %and = and i32 %0, -33
-  store i32 %and, i32* %p, align 4, !tbaa !3
+  store i32 %and, i32* %p, align 4
   ret void
 }
 
 define void @memop_unsigned_int_setbit(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %or = or i32 %0, 128
-  store i32 %or, i32* %p, align 4, !tbaa !3
+  store i32 %or, i32* %p, align 4
   ret void
 }
 
@@ -1227,9 +1227,9 @@ define void @memop_unsigned_int_add5_index(i32* nocapture %p, i32 %i) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %add = add nsw i32 %0, 5
-  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %add, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1237,9 +1237,9 @@ define void @memop_unsigned_int_add_index(i32* nocapture %p, i32 %i, i32 %x) nou
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %add = add nsw i32 %0, %x
-  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %add, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1247,9 +1247,9 @@ define void @memop_unsigned_int_sub_index(i32* nocapture %p, i32 %i, i32 %x) nou
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %sub = sub nsw i32 %0, %x
-  store i32 %sub, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %sub, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1257,9 +1257,9 @@ define void @memop_unsigned_int_or_index(i32* nocapture %p, i32 %i, i32 %x) noun
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %or = or i32 %0, %x
-  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %or, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1267,9 +1267,9 @@ define void @memop_unsigned_int_and_index(i32* nocapture %p, i32 %i, i32 %x) nou
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %and = and i32 %0, %x
-  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %and, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1277,9 +1277,9 @@ define void @memop_unsigned_int_clrbit_index(i32* nocapture %p, i32 %i) nounwind
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %and = and i32 %0, -33
-  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %and, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1287,9 +1287,9 @@ define void @memop_unsigned_int_setbit_index(i32* nocapture %p, i32 %i) nounwind
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %or = or i32 %0, 128
-  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %or, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1297,9 +1297,9 @@ define void @memop_unsigned_int_add5_index5(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %add = add nsw i32 %0, 5
-  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %add, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1307,9 +1307,9 @@ define void @memop_unsigned_int_add_index5(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %add = add nsw i32 %0, %x
-  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %add, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1317,9 +1317,9 @@ define void @memop_unsigned_int_sub_index5(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %sub = sub nsw i32 %0, %x
-  store i32 %sub, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %sub, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1327,9 +1327,9 @@ define void @memop_unsigned_int_or_index5(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %or = or i32 %0, %x
-  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %or, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1337,9 +1337,9 @@ define void @memop_unsigned_int_and_index5(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %and = and i32 %0, %x
-  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %and, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1347,9 +1347,9 @@ define void @memop_unsigned_int_clrbit_index5(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %and = and i32 %0, -33
-  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %and, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1357,13 +1357,8 @@ define void @memop_unsigned_int_setbit_index5(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %or = or i32 %0, 128
-  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %or, i32* %add.ptr, align 4
   ret void
 }
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA"}
-!2 = metadata !{metadata !"short", metadata !0}
-!3 = metadata !{metadata !"int", metadata !0}
diff --git a/test/CodeGen/Hexagon/union-1.ll b/test/CodeGen/Hexagon/union-1.ll
index 7c6da74..fe79f95 100644
--- a/test/CodeGen/Hexagon/union-1.ll
+++ b/test/CodeGen/Hexagon/union-1.ll
@@ -5,10 +5,10 @@
 
 define void @word(i32* nocapture %a) nounwind {
 entry:
-  %0 = load i32* %a, align 4, !tbaa !0
+  %0 = load i32* %a, align 4
   %1 = zext i32 %0 to i64
   %add.ptr = getelementptr inbounds i32* %a, i32 1
-  %2 = load i32* %add.ptr, align 4, !tbaa !0
+  %2 = load i32* %add.ptr, align 4
   %3 = zext i32 %2 to i64
   %4 = shl nuw i64 %3, 32
   %ins = or i64 %4, %1
@@ -17,7 +17,3 @@ entry:
 }
 
 declare void @bar(i64)
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Inputs/DbgValueOtherTargets.ll b/test/CodeGen/Inputs/DbgValueOtherTargets.ll
index f35a5d1..953e576 100644
--- a/test/CodeGen/Inputs/DbgValueOtherTargets.ll
+++ b/test/CodeGen/Inputs/DbgValueOtherTargets.ll
@@ -12,11 +12,12 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!13}
 
 !0 = metadata !{i32 786478, metadata !12, metadata !1, metadata !"main", metadata !"main", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !12, i32 12, metadata !"clang version 2.9 (trunk 120996)", i1 false, metadata !"", i32 0, metadata !6, metadata !6, metadata !11, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !12, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !12, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !12, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 0}
@@ -26,3 +27,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !10 = metadata !{i32 4, i32 2, metadata !8, null}
 !11 = metadata !{metadata !0}
 !12 = metadata !{metadata !"/tmp/x.c", metadata !"/Users/manav"}
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/MSP430/cc_args.ll b/test/CodeGen/MSP430/cc_args.ll
new file mode 100644
index 0000000..39e99e2
--- /dev/null
+++ b/test/CodeGen/MSP430/cc_args.ll
@@ -0,0 +1,118 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16-a0:16:16"
+target triple = "msp430---elf"
+
+define void @test() #0 {
+entry:
+; CHECK: test:
+
+; CHECK: mov.w #1, r15
+; CHECK: call #f_i16
+  call void @f_i16(i16 1)
+
+; CHECK: mov.w #772, r14
+; CHECK: mov.w #258, r15
+; CHECK: call #f_i32
+  call void @f_i32(i32 16909060)
+
+; CHECK: mov.w #1800, r12
+; CHECK: mov.w #1286, r13
+; CHECK: mov.w #772, r14
+; CHECK: mov.w #258, r15
+; CHECK: call #f_i64
+  call void @f_i64(i64 72623859790382856)
+
+; CHECK: mov.w #772, r14
+; CHECK: mov.w #258, r15
+; CHECK: mov.w #1800, r12
+; CHECK: mov.w #1286, r13
+; CHECK: call #f_i32_i32
+  call void @f_i32_i32(i32 16909060, i32 84281096)
+
+; CHECK: mov.w #1, r15
+; CHECK: mov.w #772, r13
+; CHECK: mov.w #258, r14
+; CHECK: mov.w #2, r12
+; CHECK: call #f_i16_i32_i16
+  call void @f_i16_i32_i16(i16 1, i32 16909060, i16 2)
+
+; CHECK: mov.w #2, 8(r1)
+; CHECK: mov.w #258, 6(r1)
+; CHECK: mov.w #772, 4(r1)
+; CHECK: mov.w #1286, 2(r1)
+; CHECK: mov.w #1800, 0(r1)
+; CHECK: mov.w #1, r15
+; CHECK: call #f_i16_i64_i16
+  call void @f_i16_i64_i16(i16 1, i64 72623859790382856, i16 2)
+
+  ret void
+}
+
+@g_i16 = common global i16 0, align 2
+@g_i32 = common global i32 0, align 2
+@g_i64 = common global i64 0, align 2
+
+define void @f_i16(i16 %a) #0 {
+; CHECK: f_i16:
+; CHECK: mov.w r15, &g_i16
+  store volatile i16 %a, i16* @g_i16, align 2
+  ret void
+}
+
+define void @f_i32(i32 %a) #0 {
+; CHECK: f_i32:
+; CHECK: mov.w r15, &g_i32+2
+; CHECK: mov.w r14, &g_i32
+  store volatile i32 %a, i32* @g_i32, align 2
+  ret void
+}
+
+define void @f_i64(i64 %a) #0 {
+; CHECK: f_i64:
+; CHECK: mov.w r15, &g_i64+6
+; CHECK: mov.w r14, &g_i64+4
+; CHECK: mov.w r13, &g_i64+2
+; CHECK: mov.w r12, &g_i64
+  store volatile i64 %a, i64* @g_i64, align 2
+  ret void
+}
+
+define void @f_i32_i32(i32 %a, i32 %b) #0 {
+; CHECK: f_i32_i32:
+; CHECK: mov.w r15, &g_i32+2
+; CHECK: mov.w r14, &g_i32
+  store volatile i32 %a, i32* @g_i32, align 2
+; CHECK: mov.w r13, &g_i32+2
+; CHECK: mov.w r12, &g_i32
+  store volatile i32 %b, i32* @g_i32, align 2
+  ret void
+}
+
+define void @f_i16_i32_i16(i16 %a, i32 %b, i16 %c) #0 {
+; CHECK: f_i16_i32_i16:
+; CHECK: mov.w r15, &g_i16
+  store volatile i16 %a, i16* @g_i16, align 2
+; CHECK: mov.w r14, &g_i32+2
+; CHECK: mov.w r13, &g_i32
+  store volatile i32 %b, i32* @g_i32, align 2
+; CHECK: mov.w r12, &g_i16
+  store volatile i16 %c, i16* @g_i16, align 2
+  ret void
+}
+
+define void @f_i16_i64_i16(i16 %a, i64 %b, i16 %c) #0 {
+; CHECK: f_i16_i64_i16:
+; CHECK: mov.w r15, &g_i16
+  store volatile i16 %a, i16* @g_i16, align 2
+;CHECK: mov.w 10(r4), &g_i64+6
+;CHECK: mov.w 8(r4), &g_i64+4
+;CHECK: mov.w 6(r4), &g_i64+2
+;CHECK: mov.w 4(r4), &g_i64
+  store volatile i64 %b, i64* @g_i64, align 2
+;CHECK: mov.w 12(r4), &g_i16
+  store volatile i16 %c, i16* @g_i16, align 2
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/MSP430/cc_ret.ll b/test/CodeGen/MSP430/cc_ret.ll
new file mode 100644
index 0000000..c2a9ae6
--- /dev/null
+++ b/test/CodeGen/MSP430/cc_ret.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16-a0:16:16"
+target triple = "msp430---elf"
+
+define void @test() #0 {
+entry:
+; CHECK: test:
+
+; CHECK: call #f_i16
+; CHECK: mov.w r15, &g_i16
+  %0 = call i16 @f_i16()
+  store volatile i16 %0, i16* @g_i16
+
+; CHECK: call #f_i32
+; CHECK: mov.w r15, &g_i32+2
+; CHECK: mov.w r14, &g_i32
+  %1 = call i32 @f_i32()
+  store volatile i32 %1, i32* @g_i32
+
+; CHECK: call #f_i64
+; CHECK: mov.w r15, &g_i64+6
+; CHECK: mov.w r14, &g_i64+4
+; CHECK: mov.w r13, &g_i64+2
+; CHECK: mov.w r12, &g_i64
+  %2 = call i64 @f_i64()
+  store volatile i64 %2, i64* @g_i64
+
+  ret void
+}
+
+@g_i16 = common global i16 0, align 2
+@g_i32 = common global i32 0, align 2
+@g_i64 = common global i64 0, align 2
+
+define i16 @f_i16() #0 {
+; CHECK: f_i16:
+; CHECK: mov.w #1, r15
+; CHECK: ret
+  ret i16 1
+}
+
+define i32 @f_i32() #0 {
+; CHECK: f_i32:
+; CHECK: mov.w #772, r14
+; CHECK: mov.w #258, r15
+; CHECK: ret
+  ret i32 16909060
+}
+
+define i64 @f_i64() #0 {
+; CHECK: f_i64:
+; CHECK: mov.w #1800, r12
+; CHECK: mov.w #1286, r13
+; CHECK: mov.w #772, r14
+; CHECK: mov.w #258, r15
+; CHECK: ret
+  ret i64 72623859790382856
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/MSP430/lit.local.cfg b/test/CodeGen/MSP430/lit.local.cfg
index 0ca9fc9..a18fe6f 100644
--- a/test/CodeGen/MSP430/lit.local.cfg
+++ b/test/CodeGen/MSP430/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.test']
-
 targets = set(config.root.targets_to_build.split())
 if not 'MSP430' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/MSP430/transient-stack-alignment.ll b/test/CodeGen/MSP430/transient-stack-alignment.ll
new file mode 100644
index 0000000..cca8350
--- /dev/null
+++ b/test/CodeGen/MSP430/transient-stack-alignment.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16-a0:16:16"
+target triple = "msp430---elf"
+
+define void @test() #0 {
+; CHECK-LABEL: test:
+; CHECK: sub.w #2, r1
+  %1 = alloca i8, align 1
+; CHECK-NEXT: mov.b #0, 1(r1)
+  store i8 0, i8* %1, align 1
+; CHECK-NEXT: add.w #2, r1
+; CHECK-NEXT: ret
+  ret void
+}
+
+attributes #0 = { nounwind "no-frame-pointer-elim"="false" }
diff --git a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
index 8479ad2..3381143 100644
--- a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
+++ b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s 
 ; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s 
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32r2 -mattr=+mips16 -soft-float -mips16-hard-float   < %s | FileCheck %s 
 
 define signext i8 @A(i8 %e.0, i8 signext %sum)  nounwind {
 entry:
diff --git a/test/CodeGen/Mips/2008-08-01-AsmInline.ll b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
index dbde742..e274bc0 100644
--- a/test/CodeGen/Mips/2008-08-01-AsmInline.ll
+++ b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
@@ -51,3 +51,21 @@ entry:
   ret void
 }
 
+; Check that RA doesn't allocate registers in the clobber list.
+; CHECK-LABEL: foo4:
+; CHECK: #APP
+; CHECK-NOT: ulh $2
+; CHECK: #NO_APP
+; CHECK: #APP
+; CHECK-NOT: $f0
+; CHECK: #NO_APP
+
+define void @foo4() {
+entry:
+  %0 = tail call i32 asm sideeffect "ulh $0,16($$sp)\0A\09", "=r,~{$2}"()
+  store i32 %0, i32* @gi2, align 4
+  %1 = load float* @gf0, align 4
+  %2 = tail call double asm sideeffect "cvt.d.s $0, $1\0A\09", "=f,f,~{$f0}"(float %1)
+  store double %2, double* @gd0, align 8
+  ret void
+}
diff --git a/test/CodeGen/Mips/2013-11-18-fp64-const0.ll b/test/CodeGen/Mips/2013-11-18-fp64-const0.ll
new file mode 100644
index 0000000..f8390d9
--- /dev/null
+++ b/test/CodeGen/Mips/2013-11-18-fp64-const0.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=mips -mattr=-fp64 < %s | FileCheck -check-prefix=CHECK-FP32 %s
+; RUN: llc -march=mips -mattr=+fp64 < %s | FileCheck -check-prefix=CHECK-FP64 %s
+
+; This test case is a simplified version of an llvm-stress generated test with
+; seed=3718491962.
+; It originally failed on MIPS32 with FP64 with the following error:
+;     LLVM ERROR: ran out of registers during register allocation
+; This was caused by impossible register class restrictions caused by the use
+; of BuildPairF64 instead of BuildPairF64_64.
+
+define void @autogen_SD3718491962() {
+BB:
+  ; CHECK-FP32: mtc1 $zero, $f{{[0-3]*[02468]}}
+  ; CHECK-FP32: mtc1 $zero, $f{{[0-3]*[13579]}}
+
+  ; CHECK-FP64: mtc1 $zero, $f{{[0-9]+}}
+  ; CHECK-FP64-NOT: mtc1 $zero,
+  ; FIXME: A redundant mthc1 is currently emitted. Add a -NOT when it is
+  ;        eliminated
+
+  %Cmp = fcmp ule double 0.000000e+00, undef
+  %Cmp11 = fcmp ueq double 0xFDBD965CF1BB7FDA, undef
+  br label %CF88
+
+CF88:                                             ; preds = %CF86
+  %Sl18 = select i1 %Cmp, i1 %Cmp11, i1 %Cmp
+  br i1 %Sl18, label %CF88, label %CF85
+
+CF85:                                             ; preds = %CF88
+  ret void
+}
diff --git a/test/CodeGen/Mips/beqzc.ll b/test/CodeGen/Mips/beqzc.ll
new file mode 100644
index 0000000..4a294c2
--- /dev/null
+++ b/test/CodeGen/Mips/beqzc.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=cond-b-short
+
+@i = global i32 0, align 4
+@j = common global i32 0, align 4
+
+; Function Attrs: nounwind optsize
+define i32 @main() #0 {
+entry:
+  %0 = load i32* @i, align 4
+  %cmp = icmp eq i32 %0, 0
+  %. = select i1 %cmp, i32 10, i32 55
+  store i32 %., i32* @j, align 4
+; cond-b-short: 	beqz	${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]+}}  # 16 bit inst
+  ret i32 0
+}
+
+attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+
+
+
diff --git a/test/CodeGen/Mips/beqzc1.ll b/test/CodeGen/Mips/beqzc1.ll
new file mode 100644
index 0000000..8f929a8
--- /dev/null
+++ b/test/CodeGen/Mips/beqzc1.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=cond-b-short
+
+@i = global i32 0, align 4
+@j = common global i32 0, align 4
+
+; Function Attrs: nounwind optsize
+define i32 @main() #0 {
+entry:
+  %0 = load i32* @i, align 4
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+; cond-b-short: 	bnez	${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]+}}  # 16 bit inst
+if.then:                                          ; preds = %entry
+  store i32 10, i32* @j, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i32 0
+}
+
+attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+
+
diff --git a/test/CodeGen/Mips/blockaddr.ll b/test/CodeGen/Mips/blockaddr.ll
index 7de7fa6..beab65f 100644
--- a/test/CodeGen/Mips/blockaddr.ll
+++ b/test/CodeGen/Mips/blockaddr.ll
@@ -4,6 +4,8 @@
 ; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N32
 ; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
 ; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -mattr=+mips16 -soft-float -mips16-hard-float -relocation-model=static   < %s | FileCheck %s -check-prefix=STATIC-MIPS16-1
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -mattr=+mips16 -soft-float -mips16-hard-float -relocation-model=static   < %s | FileCheck %s -check-prefix=STATIC-MIPS16-2
 
 @reg = common global i8* null, align 4
 
@@ -36,6 +38,14 @@ entry:
 ; STATIC-N64: daddiu ${{[0-9]+}}, $[[R2]], %got_ofst($tmp[[T2]])
 ; STATIC-N64: ld  $[[R3:[0-9]+]], %got_page($tmp[[T3:[0-9]+]])
 ; STATIC-N64: daddiu ${{[0-9]+}}, $[[R3]], %got_ofst($tmp[[T3]])
+; STATIC-MIPS16-1: .ent	f
+; STATIC-MIPS16-2: .ent	f
+; STATIC-MIPS16-1: li  $[[R1_16:[0-9]+]], %hi($tmp[[TI_16:[0-9]+]])
+; STATIC-MIPS16-1: sll ${{[0-9]+}},  $[[R1_16]], 16
+; STATIC-MIPS16-2: li  ${{[0-9]+}}, %lo($tmp{{[0-9]+}})
+; STATIC-MIPS16-1 jal	dummy
+; STATIC-MIPS16-2 jal	dummy
+
 define void @f() nounwind {
 entry:
   %call = tail call i8* @dummy(i8* blockaddress(@f, %baz))
diff --git a/test/CodeGen/Mips/brdelayslot.ll b/test/CodeGen/Mips/brdelayslot.ll
index 869ecd9..68341c1 100644
--- a/test/CodeGen/Mips/brdelayslot.ll
+++ b/test/CodeGen/Mips/brdelayslot.ll
@@ -160,7 +160,14 @@ for.end:                                          ; preds = %for.body, %entry
 ;
 ; SUCCBB-LABEL:      succbbs_br1:
 ; SUCCBB:      beqz ${{[0-9]+}}, $BB
-; SUCCBB-NEXT: lw $25, %call16(foo100)
+; SUCCBB-NEXT: lw ${{[0-9]+}}, %got(foo101)(${{[0-9]+}})
+
+define internal fastcc void @foo101() {
+entry:
+  tail call void @foo100()
+  tail call void @foo100()
+  ret void
+}
 
 define void @succbbs_br1(i32 %a) {
 entry:
@@ -168,7 +175,7 @@ entry:
   br i1 %tobool, label %if.end, label %if.then
 
 if.then:                                          ; preds = %entry
-  tail call void @foo100() #1
+  tail call fastcc void @foo101()
   br label %if.end
 
 if.end:                                           ; preds = %entry, %if.then
diff --git a/test/CodeGen/Mips/brsize3.ll b/test/CodeGen/Mips/brsize3.ll
new file mode 100644
index 0000000..7b1f440
--- /dev/null
+++ b/test/CodeGen/Mips/brsize3.ll
@@ -0,0 +1,33 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=b-no-short
+
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=b-long
+
+; ModuleID = 'brsize3.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
+target triple = "mips--linux-gnu"
+
+; Function Attrs: noreturn nounwind optsize
+define void @foo() #0 {
+entry:
+  br label %x
+
+x:                                                ; preds = %x, %entry
+  tail call void asm sideeffect ".space 60000", ""() #1, !srcloc !1
+  br label %x
+; b-long: $BB0_1:
+; b-long:	#APP
+; b-long:	.space 60000
+; b-long:	#NO_APP
+; b-long:	b	$BB0_1
+; b-no-short: $BB0_1:
+; b-no-short:	#APP
+; b-no-short:	.space 60000
+; b-no-short:	#NO_APP
+; b-no-short-NOT:	b	$BB0_1 # 16 bit inst
+
+}
+
+attributes #0 = { noreturn nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { nounwind }
+
+!1 = metadata !{i32 45}
diff --git a/test/CodeGen/Mips/brsize3a.ll b/test/CodeGen/Mips/brsize3a.ll
new file mode 100644
index 0000000..6382fa2
--- /dev/null
+++ b/test/CodeGen/Mips/brsize3a.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=b-short
+
+; ModuleID = 'brsize3.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
+target triple = "mips--linux-gnu"
+
+; Function Attrs: noreturn nounwind optsize
+define void @foo() #0 {
+entry:
+  br label %x
+
+x:                                                ; preds = %x, %entry
+  tail call void asm sideeffect ".space 200", ""() #1, !srcloc !1
+  br label %x
+; b-short: $BB0_1:
+; b-short:	#APP
+; b-short:	.space 200
+; b-short:	#NO_APP
+; b-short:	b	$BB0_1 # 16 bit inst
+
+}
+
+attributes #0 = { noreturn nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { nounwind }
+
+!1 = metadata !{i32 45}
diff --git a/test/CodeGen/Mips/bswap.ll b/test/CodeGen/Mips/bswap.ll
index 0da2d2b..f17b91a 100644
--- a/test/CodeGen/Mips/bswap.ll
+++ b/test/CodeGen/Mips/bswap.ll
@@ -1,11 +1,13 @@
 ; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=MIPS32
 ; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=MIPS64
+; RUN: llc  < %s -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32r2 -mattr=+mips16 -soft-float -mips16-hard-float   | FileCheck %s -check-prefix=mips16 
 
 define i32 @bswap32(i32 %x) nounwind readnone {
 entry:
 ; MIPS32-LABEL: bswap32:
 ; MIPS32: wsbh $[[R0:[0-9]+]]
 ; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
+; mips16: .ent bswap32
   %or.3 = call i32 @llvm.bswap.i32(i32 %x)
   ret i32 %or.3
 }
@@ -15,6 +17,7 @@ entry:
 ; MIPS64-LABEL: bswap64:
 ; MIPS64: dsbh $[[R0:[0-9]+]]
 ; MIPS64: dshd ${{[0-9]+}}, $[[R0]]
+; mips16: .ent bswap64
   %or.7 = call i64 @llvm.bswap.i64(i64 %x)
   ret i64 %or.7
 }
diff --git a/test/CodeGen/Mips/buildpairextractelementf64.ll b/test/CodeGen/Mips/buildpairextractelementf64.ll
index 585bc25..490d427 100644
--- a/test/CodeGen/Mips/buildpairextractelementf64.ll
+++ b/test/CodeGen/Mips/buildpairextractelementf64.ll
@@ -1,20 +1,31 @@
-; RUN: llc  < %s -march=mipsel | FileCheck %s
-; RUN: llc  < %s -march=mips   | FileCheck %s
+; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=FP32
+; RUN: llc -march=mips  < %s | FileCheck %s -check-prefix=FP32
+; RUN: llc -march=mipsel -mattr=+fp64 < %s | FileCheck %s -check-prefix=FP64
+; RUN: llc -march=mips -mattr=+fp64 < %s | FileCheck %s -check-prefix=FP64
+
 @a = external global i32
 
+; CHECK-LABEL: f:
+; FP32: mtc1
+; FP32: mtc1
+; FP64-DAG: mtc1
+; FP64-DAG: mthc1
+
 define double @f(i32 %a1, double %d) nounwind {
 entry:
-; CHECK: mtc1
-; CHECK: mtc1
   store i32 %a1, i32* @a, align 4
   %add = fadd double %d, 2.000000e+00
   ret double %add
 }
 
+; CHECK-LABEL: f3:
+; FP32: mfc1
+; FP32: mfc1
+; FP64-DAG: mfc1
+; FP64-DAG: mfhc1
+
 define void @f3(double %d, i32 %a1) nounwind {
 entry:
-; CHECK: mfc1
-; CHECK: mfc1
   tail call void @f2(i32 %a1, double %d) nounwind
   ret void
 }
diff --git a/test/CodeGen/Mips/cmplarge.ll b/test/CodeGen/Mips/cmplarge.ll
index b082fa3..2a3d30a 100644
--- a/test/CodeGen/Mips/cmplarge.ll
+++ b/test/CodeGen/Mips/cmplarge.ll
@@ -10,7 +10,7 @@ target triple = "mipsel--linux-gnu"
 define void @getSubImagesLuma(%struct.StorablePicture* nocapture %s) #0 {
 entry:
   %size_y = getelementptr inbounds %struct.StorablePicture* %s, i32 0, i32 1
-  %0 = load i32* %size_y, align 4, !tbaa !0
+  %0 = load i32* %size_y, align 4
   %sub = add nsw i32 %0, -1
   %add5 = add nsw i32 %0, 20
   %cmp6 = icmp sgt i32 %add5, -20
@@ -20,7 +20,7 @@ for.body:                                         ; preds = %entry, %for.body
   %j.07 = phi i32 [ %inc, %for.body ], [ -20, %entry ]
   %call = tail call i32 bitcast (i32 (...)* @iClip3 to i32 (i32, i32, i32)*)(i32 0, i32 %sub, i32 %j.07) #2
   %inc = add nsw i32 %j.07, 1
-  %1 = load i32* %size_y, align 4, !tbaa !0
+  %1 = load i32* %size_y, align 4
   %add = add nsw i32 %1, 20
   %cmp = icmp slt i32 %inc, %add
   br i1 %cmp, label %for.body, label %for.end
@@ -33,10 +33,6 @@ for.end:                                          ; preds = %for.body, %entry
 ; cmp16:	.end	getSubImagesLuma
 declare i32 @iClip3(...) #1
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #2 = { nounwind }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Mips/const1.ll b/test/CodeGen/Mips/const1.ll
new file mode 100644
index 0000000..cb2baca
--- /dev/null
+++ b/test/CodeGen/Mips/const1.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands < %s | FileCheck %s 
+
+; ModuleID = 'const1.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
+target triple = "mipsel-unknown-linux"
+
+@i = common global i32 0, align 4
+@j = common global i32 0, align 4
+@k = common global i32 0, align 4
+@l = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define void @t() #0 {
+entry:
+  store i32 -559023410, i32* @i, align 4
+  store i32 -559023410, i32* @j, align 4
+  store i32 -87105875, i32* @k, align 4
+  store i32 262991277, i32* @l, align 4
+  ret void
+; CHECK: 	lw	${{[0-9]+}}, $CPI0_0
+; CHECK:	lw	${{[0-9]+}}, $CPI0_1
+; CHECK: 	lw	${{[0-9]+}}, $CPI0_2
+; CHECK: $CPI0_0:
+; CHECK:	.4byte	3735943886
+; CHECK: $CPI0_1:
+; CHECK:	.4byte	4207861421
+; CHECK: $CPI0_2:
+; CHECK:	.4byte	262991277
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"clang version 3.4 (gitosis@dmz-portal.mips.com:clang.git b754974ec32ab712ea7d8b52cd8037b24e7d6ed3) (gitosis@dmz-portal.mips.com:llvm.git 8e211187b501bc73edb938fde0019c9a20bcffd5)"}
diff --git a/test/CodeGen/Mips/const4a.ll b/test/CodeGen/Mips/const4a.ll
new file mode 100644
index 0000000..0332327
--- /dev/null
+++ b/test/CodeGen/Mips/const4a.ll
@@ -0,0 +1,180 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands -mips-constant-islands-no-load-relaxation  < %s | FileCheck %s -check-prefix=no-load-relax
+
+; ModuleID = 'const4.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
+target triple = "mips--linux-gnu"
+
+@i = common global i32 0, align 4
+@b = common global i32 0, align 4
+@j = common global i32 0, align 4
+@k = common global i32 0, align 4
+@l = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define void @t() #0 {
+entry:
+  store i32 -559023410, i32* @i, align 4
+  %0 = load i32* @b, align 4
+; no-load-relax	lw	${{[0-9]+}}, $CPI0_1	# 16 bit inst
+  %tobool = icmp ne i32 %0, 0
+  br i1 %tobool, label %if.then, label %if.else
+; no-load-relax:	beqz	${{[0-9]+}}, $BB0_3
+; no-load-relax:	lw	${{[0-9]+}}, %call16(foo)(${{[0-9]+}})
+; no-load-relax:	b	$BB0_4
+; no-load-relax:	.align	2
+; no-load-relax: $CPI0_0:
+; no-load-relax:	.4byte	3735943886
+; no-load-relax: $BB0_3:
+; no-load-relax:	lw	${{[0-9]+}}, %call16(goo)(${{[0-9]+}})
+if.then:                                          ; preds = %entry
+  call void bitcast (void (...)* @foo to void ()*)()
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  call void bitcast (void (...)* @goo to void ()*)()
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  ret void
+}
+
+declare void @foo(...) #1
+
+declare void @goo(...) #1
+
+declare void @hoo(...) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"clang version 3.4 (gitosis@dmz-portal.mips.com:clang.git b310439121c875937d78cc49cc969bc1197fc025) (gitosis@dmz-portal.mips.com:llvm.git 7fc0ca9656ebec8dad61f72f5a5ddfb232c070fd)"}
diff --git a/test/CodeGen/Mips/const6.ll b/test/CodeGen/Mips/const6.ll
new file mode 100644
index 0000000..20cdc09
--- /dev/null
+++ b/test/CodeGen/Mips/const6.ll
@@ -0,0 +1,164 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=load-relax
+
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands -mips-constant-islands-no-load-relaxation  < %s | FileCheck %s -check-prefix=no-load-relax
+
+; ModuleID = 'const6.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
+target triple = "mips--linux-gnu"
+
+@i = common global i32 0, align 4
+@j = common global i32 0, align 4
+@k = common global i32 0, align 4
+@l = common global i32 0, align 4
+@b = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define void @t() #0 {
+entry:
+  store i32 -559023410, i32* @i, align 4
+; load-relax: 	lw	${{[0-9]+}}, $CPI0_0
+; load-relax:	jrc	 $ra
+; load-relax:	.align	2
+; load-relax: $CPI0_0:
+; load-relax:	.4byte	3735943886
+; load-relax:	.end	t
+
+; no-load-relax: lw	${{[0-9]+}}, $CPI0_1	# 16 bit inst
+; no-load-relax:	jalrc 	${{[0-9]+}}
+; no-load-relax:	b	$BB0_2
+; no-load-relax:	.align	2
+; no-load-relax: $CPI0_0:
+; no-load-relax:	.4byte	3735943886
+; no-load-relax: $BB0_2:
+
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  ret void
+}
+
+declare void @hoo(...) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"clang version 3.4 (gitosis@dmz-portal.mips.com:clang.git b310439121c875937d78cc49cc969bc1197fc025) (gitosis@dmz-portal.mips.com:llvm.git 7fc0ca9656ebec8dad61f72f5a5ddfb232c070fd)"}
+
+
diff --git a/test/CodeGen/Mips/const6a.ll b/test/CodeGen/Mips/const6a.ll
new file mode 100644
index 0000000..8b402ac
--- /dev/null
+++ b/test/CodeGen/Mips/const6a.ll
@@ -0,0 +1,29 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=load-relax1
+
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=load-relax
+
+; ModuleID = 'const6a.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
+target triple = "mips--linux-gnu"
+
+@i = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define void @t() #0 {
+entry:
+  store i32 -559023410, i32* @i, align 4
+; load-relax-NOT: 	lw	${{[0-9]+}}, $CPI0_0 # 16 bit inst
+; load-relax1: lw	${{[0-9]+}}, $CPI0_0
+; load-relax:	jrc	 $ra
+; load-relax:	.align	2
+; load-relax: $CPI0_0:
+; load-relax:	.4byte	3735943886
+; load-relax:	.end	t
+  call void asm sideeffect ".space 40000", ""() #1, !srcloc !1
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { nounwind }
+
+!1 = metadata !{i32 121}
diff --git a/test/CodeGen/Mips/ctlz.ll b/test/CodeGen/Mips/ctlz.ll
new file mode 100644
index 0000000..2ddb727
--- /dev/null
+++ b/test/CodeGen/Mips/ctlz.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -mattr=+mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=static
+
+@x = global i32 28912, align 4
+@y = common global i32 0, align 4
+
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @x, align 4
+  %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 true)
+  store i32 %1, i32* @y, align 4
+  ret i32 0
+}
+
+; static: .end main
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) #1
+
+
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { nounwind readnone }
+
diff --git a/test/CodeGen/Mips/disable-tail-merge.ll b/test/CodeGen/Mips/disable-tail-merge.ll
new file mode 100644
index 0000000..b4c093a
--- /dev/null
+++ b/test/CodeGen/Mips/disable-tail-merge.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+@g0 = common global i32 0, align 4
+@g1 = common global i32 0, align 4
+
+; CHECK: addiu ${{[0-9]+}}, ${{[0-9]+}}, 23
+; CHECK: addiu ${{[0-9]+}}, ${{[0-9]+}}, 23
+
+define i32 @test1(i32 %a) {
+entry:
+  %tobool = icmp eq i32 %a, 0
+  %0 = load i32* @g0, align 4
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* @g0, align 4
+  %1 = load i32* @g1, align 4
+  %add1 = add nsw i32 %1, 23
+  br label %if.end
+
+if.else:
+  %add2 = add nsw i32 %0, 11
+  store i32 %add2, i32* @g0, align 4
+  %2 = load i32* @g1, align 4
+  %add3 = add nsw i32 %2, 23
+  br label %if.end
+
+if.end:
+  %storemerge = phi i32 [ %add3, %if.else ], [ %add1, %if.then ]
+  store i32 %storemerge, i32* @g1, align 4
+  ret i32 %storemerge
+}
diff --git a/test/CodeGen/Mips/divrem.ll b/test/CodeGen/Mips/divrem.ll
index a983c46..b631c3b 100644
--- a/test/CodeGen/Mips/divrem.ll
+++ b/test/CodeGen/Mips/divrem.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=mips < %s | FileCheck %s -check-prefix=TRAP
+; RUN: llc -march=mips -verify-machineinstrs < %s |\
+; RUN: FileCheck %s -check-prefix=TRAP
 ; RUN: llc -march=mips -mno-check-zero-division < %s |\
 ; RUN: FileCheck %s -check-prefix=NOCHECK
 
@@ -11,6 +12,9 @@
 ; NOCHECK-NOT: teq
 ; NOCHECK: .end sdiv1
 
+@g0 = common global i32 0, align 4
+@g1 = common global i32 0, align 4
+
 define i32 @sdiv1(i32 %a0, i32 %a1) nounwind readnone {
 entry:
   %div = sdiv i32 %a0, %a1
@@ -67,3 +71,11 @@ entry:
   %div = udiv i32 %a0, %a1
   ret i32 %div
 }
+
+define i32 @killFlags() {
+entry:
+  %0 = load i32* @g0, align 4
+  %1 = load i32* @g1, align 4
+  %div = sdiv i32 %0, %1
+  ret i32 %div
+}
diff --git a/test/CodeGen/Mips/extins.ll b/test/CodeGen/Mips/extins.ll
index a164f70..efaeeea 100644
--- a/test/CodeGen/Mips/extins.ll
+++ b/test/CodeGen/Mips/extins.ll
@@ -1,8 +1,10 @@
-; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s
+; RUN: llc  < %s -march=mips -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
+; RUN: llc  < %s -march=mips -mcpu=mips16 | FileCheck %s -check-prefix=16
 
 define i32 @ext0_5_9(i32 %s, i32 %pos, i32 %sz) nounwind readnone {
 entry:
-; CHECK: ext ${{[0-9]+}}, $4, 5, 9
+; 32R2: ext ${{[0-9]+}}, $4, 5, 9
+; 16-NOT: ext ${{[0-9]+}}
   %shr = lshr i32 %s, 5
   %and = and i32 %shr, 511
   ret i32 %and
@@ -10,7 +12,8 @@ entry:
 
 define void @ins2_5_9(i32 %s, i32* nocapture %d) nounwind {
 entry:
-; CHECK: ins ${{[0-9]+}}, $4, 5, 9
+; 32R2: ins ${{[0-9]+}}, $4, 5, 9
+; 16-NOT: ins ${{[0-9]+}}
   %and = shl i32 %s, 5
   %shl = and i32 %and, 16352
   %tmp3 = load i32* %d, align 4
diff --git a/test/CodeGen/Mips/f16abs.ll b/test/CodeGen/Mips/f16abs.ll
new file mode 100644
index 0000000..928914f
--- /dev/null
+++ b/test/CodeGen/Mips/f16abs.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=static
+
+@y = global double -1.450000e+00, align 8
+@x = common global double 0.000000e+00, align 8
+
+@y1 = common global float 0.000000e+00, align 4
+@x1 = common global float 0.000000e+00, align 4
+
+
+
+; Function Attrs: nounwind optsize
+define i32 @main() #0 {
+entry:
+  %0 = load double* @y, align 8
+  %call = tail call double @fabs(double %0) #2
+  store double %call, double* @x, align 8
+; static-NOT: 	.ent	__call_stub_fp_fabs
+; static-NOT: 	jal fabs
+  %1 = load float* @y1, align 4
+  %call2 = tail call float @fabsf(float %1) #2
+  store float %call2, float* @x1, align 4
+; static-NOT: 	.ent	__call_stub_fp_fabsf
+; static-NOT: 	jal fabsf
+  ret i32 0
+}
+
+; Function Attrs: nounwind optsize readnone
+declare double @fabs(double) #1
+
+declare float @fabsf(float) #1
+
+attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { nounwind optsize readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #2 = { nounwind optsize readnone }
+
+
+
diff --git a/test/CodeGen/Mips/fixdfsf.ll b/test/CodeGen/Mips/fixdfsf.ll
new file mode 100644
index 0000000..b08eefd
--- /dev/null
+++ b/test/CodeGen/Mips/fixdfsf.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic < %s | FileCheck %s -check-prefix=pic1
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic < %s | FileCheck %s -check-prefix=pic2
+
+@x = common global double 0.000000e+00, align 8
+@y = common global i32 0, align 4
+
+; Function Attrs: nounwind optsize
+define void @foo()  {
+entry:
+  %0 = load double* @x, align 8
+  %conv = fptoui double %0 to i32
+  store i32 %conv, i32* @y, align 4
+; pic1:	lw	${{[0-9]+}}, %call16(__fixunsdfsi)(${{[0-9]+}})
+; pic2:	lw	${{[0-9]+}}, %got(__mips16_call_stub_2)(${{[0-9]+}})
+  ret void
+}
+
+
diff --git a/test/CodeGen/Mips/fp16instrinsmc.ll b/test/CodeGen/Mips/fp16instrinsmc.ll
index 3c01d56..bb43d27 100644
--- a/test/CodeGen/Mips/fp16instrinsmc.ll
+++ b/test/CodeGen/Mips/fp16instrinsmc.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic < %s | FileCheck %s -check-prefix=pic
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips32-function-mask=1010111 -mips-os16 < %s | FileCheck %s -check-prefix=fmask 
 
 @x = global float 1.500000e+00, align 4
 @xn = global float -1.900000e+01, align 4
@@ -13,6 +14,14 @@
 
 ; Function Attrs: nounwind
 define void @foo1() #0 {
+; fmask: .ent foo1
+; fmask: .set	noreorder
+; fmask: .set	nomacro
+; fmask: .set	noat
+; fmask: .set	at
+; fmask: .set	macro
+; fmask: .set	reorder
+; fmask: .end	foo1
 entry:
   %0 = load float* @x, align 4
   %1 = load float* @one, align 4
@@ -26,6 +35,9 @@ declare float @copysignf(float, float) #1
 
 ; Function Attrs: nounwind
 define void @foo2() #0 {
+; fmask:	.ent	foo2
+; fmask:	save	{{.*}}
+; fmask:	.end	foo2
 entry:
   %0 = load float* @x, align 4
   %1 = load float* @negone, align 4
@@ -37,6 +49,14 @@ entry:
 ; Function Attrs: nounwind
 define void @foo3() #0 {
 entry:
+; fmask: .ent foo3
+; fmask: .set	noreorder
+; fmask: .set	nomacro
+; fmask: .set	noat
+; fmask: .set	at
+; fmask: .set	macro
+; fmask: .set	reorder
+; fmask: .end	foo3
   %0 = load double* @xd, align 8
   %1 = load float* @oned, align 4
   %conv = fpext float %1 to double
@@ -51,6 +71,9 @@ declare double @copysign(double, double) #1
 ; Function Attrs: nounwind
 define void @foo4() #0 {
 entry:
+; fmask:	.ent	foo4
+; fmask:	save	{{.*}}
+; fmask:	.end	foo4
   %0 = load double* @xd, align 8
   %1 = load double* @negoned, align 8
   %call = call double @copysign(double %0, double %1) #2
@@ -362,7 +385,7 @@ entry:
 ; Function Attrs: nounwind
 declare double @exp2(double) #0
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
-attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
 attributes #2 = { nounwind readnone }
 attributes #3 = { nounwind }
diff --git a/test/CodeGen/Mips/fp16mix.ll b/test/CodeGen/Mips/fp16mix.ll
new file mode 100644
index 0000000..8d85099
--- /dev/null
+++ b/test/CodeGen/Mips/fp16mix.ll
@@ -0,0 +1,92 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips32-function-mask=10 -mips-os16 < %s | FileCheck %s -check-prefix=fmask1
+
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips32-function-mask=01 -mips-os16 < %s | FileCheck %s -check-prefix=fmask2 
+
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips32-function-mask=10. -mips-os16 < %s | FileCheck %s -check-prefix=fmask1nr
+
+; Function Attrs: nounwind optsize readnone
+define void @foo1()  {
+entry:
+  ret void
+; fmask1: .ent foo1
+; fmask1: .set	noreorder
+; fmask1: .set	nomacro
+; fmask1: .set	noat
+; fmask1: .set	at
+; fmask1: .set	macro
+; fmask1: .set	reorder
+; fmask1: .end	foo1
+; fmask2: .ent	foo1
+; fmask2: save	{{.*}}
+; fmask2: .end	foo1
+; fmask1nr: .ent foo1
+; fmask1nr: .set	noreorder
+; fmask1nr: .set	nomacro
+; fmask1nr: .set	noat
+; fmask1nr: .set	at
+; fmask1nr: .set	macro
+; fmask1nr: .set	reorder
+; fmask1nr: .end	foo1
+}
+
+; Function Attrs: nounwind optsize readnone
+define void @foo2()  {
+entry:
+  ret void
+; fmask2: .ent foo2
+; fmask2: .set	noreorder
+; fmask2: .set	nomacro
+; fmask2: .set	noat
+; fmask2: .set	at
+; fmask2: .set	macro
+; fmask2: .set	reorder
+; fmask2: .end	foo2
+; fmask1: .ent	foo2
+; fmask1: save	{{.*}}
+; fmask1: .end	foo2
+; fmask1nr: .ent	foo2
+; fmask1nr: save	{{.*}}
+; fmask1nr: .end	foo2
+}
+
+; Function Attrs: nounwind optsize readnone
+define void @foo3()  {
+entry:
+  ret void
+; fmask1: .ent foo3
+; fmask1: .set	noreorder
+; fmask1: .set	nomacro
+; fmask1: .set	noat
+; fmask1: .set	at
+; fmask1: .set	macro
+; fmask1: .set	reorder
+; fmask1: .end	foo3
+; fmask2:  .ent	foo3
+; fmask2:  save	{{.*}}
+; fmask2:  .end	foo3
+; fmask1r:  .ent	foo3
+; fmask1r:  save	{{.*}}
+; fmask1r:  .end	foo3
+}
+
+; Function Attrs: nounwind optsize readnone
+define void @foo4()  {
+entry:
+  ret void
+; fmask2: .ent foo4
+; fmask2: .set	noreorder
+; fmask2: .set	nomacro
+; fmask2: .set	noat
+; fmask2: .set	at
+; fmask2: .set	macro
+; fmask2: .set	reorder
+; fmask2: .end	foo4
+; fmask1: .ent	foo4
+; fmask1: save	{{.*}}
+; fmask1: .end	foo4
+; fmask1nr: .ent	foo4
+; fmask1nr: save	{{.*}}
+; fmask1nr: .end	foo4
+}
+
+
diff --git a/test/CodeGen/Mips/fpneeded.ll b/test/CodeGen/Mips/fpneeded.ll
index 623883a..dcdebb9 100644
--- a/test/CodeGen/Mips/fpneeded.ll
+++ b/test/CodeGen/Mips/fpneeded.ll
@@ -131,7 +131,7 @@ entry:
 ; 32:	.set	reorder
 ; 32:	.end	foo3
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 define void @vv() #0 {
 entry:
diff --git a/test/CodeGen/Mips/fpnotneeded.ll b/test/CodeGen/Mips/fpnotneeded.ll
index dc2ec10..b4fab64 100644
--- a/test/CodeGen/Mips/fpnotneeded.ll
+++ b/test/CodeGen/Mips/fpnotneeded.ll
@@ -57,7 +57,7 @@ entry:
 ; 32:	restore	{{.+}} 
 ; 32:	.end	foo
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 
 define float @fv() #0 {
diff --git a/test/CodeGen/Mips/fptr2.ll b/test/CodeGen/Mips/fptr2.ll
new file mode 100644
index 0000000..77028db
--- /dev/null
+++ b/test/CodeGen/Mips/fptr2.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=static16
+
+; Function Attrs: nounwind
+define double @my_mul(double %a, double %b) #0 {
+entry:
+  %a.addr = alloca double, align 8
+  %b.addr = alloca double, align 8
+  store double %a, double* %a.addr, align 8
+  store double %b, double* %b.addr, align 8
+  %0 = load double* %a.addr, align 8
+  %1 = load double* %b.addr, align 8
+  %mul = fmul double %0, %1
+  ret double %mul
+}
+
+; static16: 	        .ent	__fn_stub_my_mul
+; static16:     	.set reorder
+; static16-NEXT:	#NO_APP
+; static16: 	        .end __fn_stub_my_mul
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
diff --git a/test/CodeGen/Mips/helloworld.ll b/test/CodeGen/Mips/helloworld.ll
index 83c88ae..058a041 100644
--- a/test/CodeGen/Mips/helloworld.ll
+++ b/test/CodeGen/Mips/helloworld.ll
@@ -1,11 +1,11 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C1
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C2
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PE
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s | FileCheck %s -check-prefix=ST1
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s | FileCheck %s -check-prefix=ST2
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C1
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C2
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PE
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s | FileCheck %s -check-prefix=ST1
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s | FileCheck %s -check-prefix=ST2
 ;
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR
-; RUN: llc  -march=mipsel -mcpu=mips32  -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR32
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32  -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR32
 
 
 @.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00", align 1
@@ -26,9 +26,11 @@ entry:
 ; SR32:  .set nomacro
 ; SR32:  .set noat
 ; SR:	save 	$ra, $s0, $s1, $s2, [[FS:[0-9]+]]
-; PE:	li	$[[T1:[0-9]+]], %hi(_gp_disp)
-; PE: 	addiu	$[[T2:[0-9]+]], $pc, %lo(_gp_disp)
-; PE:	sll	$[[T3:[0-9]+]], $[[T1]], 16
+; PE:    .ent main
+; PE:    .align  2
+; PE-NEXT:	li	$[[T1:[0-9]+]], %hi(_gp_disp)
+; PE-NEXT: 	addiu	$[[T2:[0-9]+]], $pc, %lo(_gp_disp)
+; PE:	        sll	$[[T3:[0-9]+]], $[[T1]], 16
 ; C1:	lw	${{[0-9]+}}, %got($.str)(${{[0-9]+}})
 ; C2:	lw	${{[0-9]+}}, %call16(printf)(${{[0-9]+}})
 ; C1:	addiu	${{[0-9]+}}, %lo($.str)
diff --git a/test/CodeGen/Mips/hf16call32.ll b/test/CodeGen/Mips/hf16call32.ll
index 934cf06..461438e 100644
--- a/test/CodeGen/Mips/hf16call32.ll
+++ b/test/CodeGen/Mips/hf16call32.ll
@@ -1026,5 +1026,5 @@ declare { double, double } @dc_sf(float) #1
 ; stel:	jr $18
 ; stel:	.end	__call_stub_fp_dc_sf
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/hf16call32_body.ll b/test/CodeGen/Mips/hf16call32_body.ll
index 793b771..34bae26 100644
--- a/test/CodeGen/Mips/hf16call32_body.ll
+++ b/test/CodeGen/Mips/hf16call32_body.ll
@@ -20,7 +20,7 @@ entry:
 }
 ; stel: .section	.mips16.fn.v_sf,"ax",@progbits
 ; stel:	.ent	__fn_stub_v_sf
-; stel:		la $25, v_sf
+; stel:		la $25,v_sf
 ; stel:		mfc1 $4,$f12
 ; stel:		jr $25
 ; stel:		__fn_local_v_sf = v_sf
@@ -40,7 +40,7 @@ entry:
 
 ; stel: .section	.mips16.fn.v_df,"ax",@progbits
 ; stel:	.ent	__fn_stub_v_df
-; stel:		la $25, v_df
+; stel:		la $25,v_df
 ; stel:		mfc1 $4,$f12
 ; stel:		mfc1 $5,$f13
 ; stel:		jr $25
@@ -63,7 +63,7 @@ entry:
 
 ; stel: .section	.mips16.fn.v_sf_sf,"ax",@progbits
 ; stel:	.ent	__fn_stub_v_sf_sf
-; stel:		la $25, v_sf_sf
+; stel:		la $25,v_sf_sf
 ; stel:		mfc1 $4,$f12
 ; stel:		mfc1 $5,$f14
 ; stel:		jr $25
@@ -86,7 +86,7 @@ entry:
 
 ; stel: .section	.mips16.fn.v_sf_df,"ax",@progbits
 ; stel:	.ent	__fn_stub_v_sf_df
-; stel:		la $25, v_sf_df
+; stel:		la $25,v_sf_df
 ; stel:		mfc1 $4,$f12
 ; stel:		mfc1 $6,$f14
 ; stel:		mfc1 $7,$f15
@@ -110,7 +110,7 @@ entry:
 
 ; stel: .section	.mips16.fn.v_df_sf,"ax",@progbits
 ; stel:	.ent	__fn_stub_v_df_sf
-; stel:		la $25, v_df_sf
+; stel:		la $25,v_df_sf
 ; stel:		mfc1 $4,$f12
 ; stel:		mfc1 $5,$f13
 ; stel:		mfc1 $6,$f14
@@ -134,7 +134,7 @@ entry:
 
 ; stel: .section	.mips16.fn.v_df_df,"ax",@progbits
 ; stel:	.ent	__fn_stub_v_df_df
-; stel:		la $25, v_df_df
+; stel:		la $25,v_df_df
 ; stel:		mfc1 $4,$f12
 ; stel:		mfc1 $5,$f13
 ; stel:		mfc1 $6,$f14
@@ -164,7 +164,7 @@ entry:
 
 ; stel: .section	.mips16.fn.sf_sf,"ax",@progbits
 ; stel:	.ent	__fn_stub_sf_sf
-; stel:		la $25, sf_sf
+; stel:		la $25,sf_sf
 ; stel:		mfc1 $4,$f12
 ; stel:		jr $25
 ; stel:		__fn_local_sf_sf = sf_sf
@@ -184,7 +184,7 @@ entry:
 
 ; stel: .section	.mips16.fn.sf_df,"ax",@progbits
 ; stel:	.ent	__fn_stub_sf_df
-; stel:		la $25, sf_df
+; stel:		la $25,sf_df
 ; stel:		mfc1 $4,$f12
 ; stel:		mfc1 $5,$f13
 ; stel:		jr $25
@@ -208,7 +208,7 @@ entry:
 
 ; stel: .section	.mips16.fn.sf_sf_sf,"ax",@progbits
 ; stel:	.ent	__fn_stub_sf_sf_sf
-; stel:		la $25, sf_sf_sf
+; stel:		la $25,sf_sf_sf
 ; stel:		mfc1 $4,$f12
 ; stel:		mfc1 $5,$f14
 ; stel:		jr $25
@@ -232,7 +232,7 @@ entry:
 
 ; stel: .section	.mips16.fn.sf_sf_df,"ax",@progbits
 ; stel:	.ent	__fn_stub_sf_sf_df
-; stel:		la $25, sf_sf_df
+; stel:		la $25,sf_sf_df
 ; stel:		mfc1 $4,$f12
 ; stel:		mfc1 $6,$f14
 ; stel:		mfc1 $7,$f15
@@ -257,7 +257,7 @@ entry:
 
 ; stel: .section	.mips16.fn.sf_df_sf,"ax",@progbits
 ; stel:	.ent	__fn_stub_sf_df_sf
-; stel:		la $25, sf_df_sf
+; stel:		la $25,sf_df_sf
 ; stel:		mfc1 $4,$f12
 ; stel:		mfc1 $5,$f13
 ; stel:		mfc1 $6,$f14
@@ -282,7 +282,7 @@ entry:
 
 ; stel: .section	.mips16.fn.sf_df_df,"ax",@progbits
 ; stel:	.ent	__fn_stub_sf_df_df
-; stel:		la $25, sf_df_df
+; stel:		la $25,sf_df_df
 ; stel:		mfc1 $4,$f12
 ; stel:		mfc1 $5,$f13
 ; stel:		mfc1 $6,$f14
@@ -291,4 +291,4 @@ entry:
 ; stel:		__fn_local_sf_df_df = sf_df_df
 ; stel:	.end	__fn_stub_sf_df_df
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/hf1_body.ll b/test/CodeGen/Mips/hf1_body.ll
new file mode 100644
index 0000000..b2cce92
--- /dev/null
+++ b/test/CodeGen/Mips/hf1_body.ll
@@ -0,0 +1,21 @@
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16  -relocation-model=pic -soft-float -mips16-hard-float < %s | FileCheck %s -check-prefix=picfp16
+
+@x = external global float
+
+; Function Attrs: nounwind
+define void @v_sf(float %p) #0 {
+entry:
+  %p.addr = alloca float, align 4
+  store float %p, float* %p.addr, align 4
+  %0 = load float* %p.addr, align 4
+  store float %0, float* @x, align 4
+  ret void
+}
+; picfp16:	.ent	__fn_stub_v_sf
+; picfp16:	.cpload  $25
+; picfp16:	.set reorder
+; picfp16:	.reloc 0,R_MIPS_NONE,v_sf
+; picfp16: 	la $25,$__fn_local_v_sf
+; picfp16: 	mfc1 $4,$f12
+; picfp16: 	jr $25
+; picfp16: 	.end	__fn_stub_v_sf
diff --git a/test/CodeGen/Mips/hfptrcall.ll b/test/CodeGen/Mips/hfptrcall.ll
index b1d36c0..25639da 100644
--- a/test/CodeGen/Mips/hfptrcall.ll
+++ b/test/CodeGen/Mips/hfptrcall.ll
@@ -118,8 +118,8 @@ entry:
 
 declare i32 @printf(i8*, ...) #1
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="true" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="true" }
 
 
 
diff --git a/test/CodeGen/Mips/i32k.ll b/test/CodeGen/Mips/i32k.ll
index c6da8b1..f4dd1eb 100644
--- a/test/CodeGen/Mips/i32k.ll
+++ b/test/CodeGen/Mips/i32k.ll
@@ -1,16 +1,23 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16a
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16b
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @.str = private unnamed_addr constant [4 x i8] c"%i\0A\00", align 1
 
 define i32 @main() nounwind {
 entry:
   %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 1075344593) nounwind
-; 16a:	li	${{[0-9]+}}, 29905
-; 16b:	li	${{[0-9]+}}, 16408
+; 16:	lw	${{[0-9]+}}, 1f
+; 16:	b	2f
+; 16:	.align	2
+; 16: 1: 	.word	1075344593
+; 16: 2:
+
   %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 -1075344593) nounwind
-; 16a:	li	${{[0-9]+}}, 49127
-; 16b:	li	${{[0-9]+}}, 35631
+
+; 16:	lw	${{[0-9]+}}, 1f
+; 16:	b	2f
+; 16:	.align	2
+; 16: 1: 	.word	-1075344593
+; 16: 2:
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/i64arg.ll b/test/CodeGen/Mips/i64arg.ll
index 0b16424..5b2d135 100644
--- a/test/CodeGen/Mips/i64arg.ll
+++ b/test/CodeGen/Mips/i64arg.ll
@@ -2,18 +2,18 @@
 
 define void @f1(i64 %ll1, float %f, i64 %ll, i32 %i, float %f2) nounwind {
 entry:
-; CHECK: move $[[R1:[0-9]+]], $5
-; CHECK: move $[[R0:[0-9]+]], $4
-; CHECK: ori $6, ${{[0-9]+}}, 3855
-; CHECK: ori $7, ${{[0-9]+}}, 22136
-; CHECK: lw  $25, %call16(ff1)
+; CHECK-DAG: lw $[[R2:[0-9]+]], 80($sp)
+; CHECK-DAG: lw $[[R3:[0-9]+]], 84($sp)
+; CHECK-DAG: move $[[R1:[0-9]+]], $5
+; CHECK-DAG: move $[[R0:[0-9]+]], $4
+; CHECK-DAG: ori $6, ${{[0-9]+}}, 3855
+; CHECK-DAG: ori $7, ${{[0-9]+}}, 22136
+; CHECK-DAG: lw  $25, %call16(ff1)
 ; CHECK: jalr
   tail call void @ff1(i32 %i, i64 1085102592623924856) nounwind
-; CHECK: lw $25, %call16(ff2)
-; CHECK: lw $[[R2:[0-9]+]], 80($sp)
-; CHECK: lw $[[R3:[0-9]+]], 84($sp)
-; CHECK: move $4, $[[R2]]
-; CHECK: move $5, $[[R3]]
+; CHECK-DAG: lw $25, %call16(ff2)
+; CHECK-DAG: move $4, $[[R2]]
+; CHECK-DAG: move $5, $[[R3]]
 ; CHECK: jalr $25
   tail call void @ff2(i64 %ll, double 3.000000e+00) nounwind
   %sub = add nsw i32 %i, -1
diff --git a/test/CodeGen/Mips/largeimmprinting.ll b/test/CodeGen/Mips/largeimmprinting.ll
index 1e96346..09fee3d 100644
--- a/test/CodeGen/Mips/largeimmprinting.ll
+++ b/test/CodeGen/Mips/largeimmprinting.ll
@@ -18,11 +18,11 @@ entry:
 ; 64:  dsll  $[[R0]], $[[R0]], 48
 ; 64:  daddiu  $[[R0]], $[[R0]], -1
 ; 64:  dsll  $[[R0]], $[[R0]], 16
-; 64:  daddiu  $[[R0]], $[[R0]], -48
+; 64:  daddiu  $[[R0]], $[[R0]], -32
 ; 64:  daddu $sp, $sp, $[[R0]]
 ; 64:  lui $[[R1:[0-9]+]], 1
 ; 64:  daddu $[[R1]], $sp, $[[R1]]
-; 64:  sd  $ra, 40($[[R1]])
+; 64:  sd  $ra, 24($[[R1]])
 
   %agg.tmp = alloca %struct.S1, align 1
   %tmp = getelementptr inbounds %struct.S1* %agg.tmp, i32 0, i32 0, i32 0
diff --git a/test/CodeGen/Mips/lazy-binding.ll b/test/CodeGen/Mips/lazy-binding.ll
new file mode 100644
index 0000000..839155a
--- /dev/null
+++ b/test/CodeGen/Mips/lazy-binding.ll
@@ -0,0 +1,41 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+; CHECK-LABEL: foo6:
+; CHECK: %while.body
+; CHECK: lw  $25, %call16(foo2)(${{[0-9]+}})
+; CHECK: jalr $25
+; CHECK: %while.end
+
+define void @foo6(i32 %n) {
+entry:
+  %tobool1 = icmp eq i32 %n, 0
+  br i1 %tobool1, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %n.addr.02 = phi i32 [ %dec, %while.body ], [ %n, %entry ]
+  %dec = add nsw i32 %n.addr.02, -1
+  tail call void @foo2()
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  ret void
+}
+
+declare void @foo2()
+
+; CHECK-LABEL: foo1:
+; CHECK: lw $25, %call16(foo2)(${{[0-9]+}})
+; CHECK: jalr $25
+; CHECK: lw $25, %call16(foo2)(${{[0-9]+}})
+; CHECK: jalr $25
+; CHECK: lw $25, %call16(foo2)(${{[0-9]+}})
+; CHECK: jalr $25
+
+define void @foo1() {
+entry:
+  tail call void @foo2()
+  tail call void @foo2()
+  tail call void @foo2()
+  ret void
+}
diff --git a/test/CodeGen/Mips/lit.local.cfg b/test/CodeGen/Mips/lit.local.cfg
index e157c54..1fa54b4 100644
--- a/test/CodeGen/Mips/lit.local.cfg
+++ b/test/CodeGen/Mips/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.test']
-
 targets = set(config.root.targets_to_build.split())
 if not 'Mips' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/Mips/longbranch.ll b/test/CodeGen/Mips/longbranch.ll
index 1a4f79c..af192d0 100644
--- a/test/CodeGen/Mips/longbranch.ll
+++ b/test/CodeGen/Mips/longbranch.ll
@@ -1,13 +1,17 @@
-; RUN: llc -march=mipsel -force-mips-long-branch < %s | FileCheck %s -check-prefix=O32
-; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64  -force-mips-long-branch < %s | FileCheck %s -check-prefix=N64
+; RUN: llc -march=mipsel -force-mips-long-branch -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=O32
+; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64  -force-mips-long-branch -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=N64
 
 @g0 = external global i32
 
 define void @foo1(i32 %s) nounwind {
 entry:
+; O32: nop
+; O32: addiu $sp, $sp, -8
 ; O32: bal
 ; O32: lui $1, 0
 ; O32: addiu $1, $1, {{[0-9]+}} 
+; N64: nop
+; N64: daddiu $sp, $sp, -16
 ; N64: lui $1, 0
 ; N64: daddiu $1, $1, 0
 ; N64: dsll $1, $1, 16
diff --git a/test/CodeGen/Mips/mips16_32_1.ll b/test/CodeGen/Mips/mips16_32_1.ll
index 6f4826e..e156641 100644
--- a/test/CodeGen/Mips/mips16_32_1.ll
+++ b/test/CodeGen/Mips/mips16_32_1.ll
@@ -11,4 +11,4 @@ entry:
 ; CHECK:	save	{{.+}}
 ; CHECK:	restore	{{.+}} 
 ; CHECK:	.end	foo
-attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_10.ll b/test/CodeGen/Mips/mips16_32_10.ll
index 330dbfe..7c017b8 100644
--- a/test/CodeGen/Mips/mips16_32_10.ll
+++ b/test/CodeGen/Mips/mips16_32_10.ll
@@ -54,6 +54,6 @@ entry:
 
 
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false"  "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false"  "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_3.ll b/test/CodeGen/Mips/mips16_32_3.ll
index 8874a88..dd94ec1 100644
--- a/test/CodeGen/Mips/mips16_32_3.ll
+++ b/test/CodeGen/Mips/mips16_32_3.ll
@@ -65,6 +65,6 @@ entry:
 ; 32:	.set	reorder
 ; 32:	.end	main
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_4.ll b/test/CodeGen/Mips/mips16_32_4.ll
index cdaed6c..5e49071 100644
--- a/test/CodeGen/Mips/mips16_32_4.ll
+++ b/test/CodeGen/Mips/mips16_32_4.ll
@@ -60,6 +60,6 @@ entry:
 ; 32:	.end	main
 
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_5.ll b/test/CodeGen/Mips/mips16_32_5.ll
index 45e0bf4..17900a2 100644
--- a/test/CodeGen/Mips/mips16_32_5.ll
+++ b/test/CodeGen/Mips/mips16_32_5.ll
@@ -75,6 +75,6 @@ entry:
 
 
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_6.ll b/test/CodeGen/Mips/mips16_32_6.ll
index f4b8e7a..a77031a 100644
--- a/test/CodeGen/Mips/mips16_32_6.ll
+++ b/test/CodeGen/Mips/mips16_32_6.ll
@@ -81,6 +81,6 @@ entry:
 
 
 
-attributes #0 = { nounwind "less-precise-fpmad"="false"  "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false"  "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_7.ll b/test/CodeGen/Mips/mips16_32_7.ll
index f8726ea..895b5d4 100644
--- a/test/CodeGen/Mips/mips16_32_7.ll
+++ b/test/CodeGen/Mips/mips16_32_7.ll
@@ -71,6 +71,6 @@ entry:
 
 
 
-attributes #0 = { nounwind "less-precise-fpmad"="false"  "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false"  "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_8.ll b/test/CodeGen/Mips/mips16_32_8.ll
index e51f296..4152d68 100644
--- a/test/CodeGen/Mips/mips16_32_8.ll
+++ b/test/CodeGen/Mips/mips16_32_8.ll
@@ -68,7 +68,7 @@ entry:
 ; 32:	.set	reorder
 ; 32:	.end	main
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_9.ll b/test/CodeGen/Mips/mips16_32_9.ll
index f5ff368..c9b494f 100644
--- a/test/CodeGen/Mips/mips16_32_9.ll
+++ b/test/CodeGen/Mips/mips16_32_9.ll
@@ -46,6 +46,6 @@ entry:
 
 
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false"  "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false"  "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips64instrs.ll b/test/CodeGen/Mips/mips64instrs.ll
index 7b06c2d..2894d69 100644
--- a/test/CodeGen/Mips/mips64instrs.ll
+++ b/test/CodeGen/Mips/mips64instrs.ll
@@ -1,4 +1,7 @@
-; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips64 -verify-machineinstrs < %s | FileCheck %s
+
+@gll0 = common global i64 0, align 8
+@gll1 = common global i64 0, align 8
 
 define i64 @f0(i64 %a0, i64 %a1) nounwind readnone {
 entry:
@@ -90,17 +93,21 @@ entry:
 ; CHECK: ddiv $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
 ; CHECK: teq $[[R0]], $zero, 7
 ; CHECK: mflo
-  %div = sdiv i64 %a, %b
+  %0 = load i64* @gll0, align 8
+  %1 = load i64* @gll1, align 8
+  %div = sdiv i64 %0, %1
   ret i64 %div
 }
 
-define i64 @f15(i64 %a, i64 %b) nounwind readnone {
+define i64 @f15() nounwind readnone {
 entry:
 ; CHECK-LABEL: f15:
 ; CHECK: ddivu $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
 ; CHECK: teq $[[R0]], $zero, 7
 ; CHECK: mflo
-  %div = udiv i64 %a, %b
+  %0 = load i64* @gll0, align 8
+  %1 = load i64* @gll1, align 8
+  %div = udiv i64 %0, %1
   ret i64 %div
 }
 
@@ -148,4 +155,3 @@ entry:
   %neg = xor i64 %or, -1
   ret i64 %neg
 }
-
diff --git a/test/CodeGen/Mips/mno-ldc1-sdc1.ll b/test/CodeGen/Mips/mno-ldc1-sdc1.ll
index be9d0b6..f4854f8 100644
--- a/test/CodeGen/Mips/mno-ldc1-sdc1.ll
+++ b/test/CodeGen/Mips/mno-ldc1-sdc1.ll
@@ -1,22 +1,31 @@
-; RUN: llc -march=mipsel -relocation-model=pic -mno-ldc1-sdc1 < %s | \
-; RUN: FileCheck %s -check-prefix=LE-PIC
+; RUN: llc -march=mipsel -relocation-model=pic -mno-ldc1-sdc1 -mcpu=mips32r2 \
+; RUN: < %s | FileCheck %s -check-prefix=LE-PIC
 ; RUN: llc -march=mipsel -relocation-model=static -mno-ldc1-sdc1 < %s | \
 ; RUN: FileCheck %s -check-prefix=LE-STATIC
 ; RUN: llc -march=mips -relocation-model=pic -mno-ldc1-sdc1 < %s | \
 ; RUN: FileCheck %s -check-prefix=BE-PIC
-; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=CHECK-LDC1-SDC1
+; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | \
+; RUN: FileCheck %s -check-prefix=CHECK-LDC1-SDC1
 
 @g0 = common global double 0.000000e+00, align 8
 
 ; LE-PIC-LABEL: test_ldc1:
-; LE-PIC: lwc1 $f0, 0(${{[0-9]+}})
-; LE-PIC: lwc1 $f1, 4(${{[0-9]+}})
+; LE-PIC-DAG: lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; LE-PIC-DAG: lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; LE-PIC-DAG: mtc1 $[[R0]], $f0
+; LE-PIC-DAG: mtc1 $[[R1]], $f1
 ; LE-STATIC-LABEL: test_ldc1:
-; LE-STATIC: lwc1 $f0, %lo(g0)(${{[0-9]+}})
-; LE-STATIC: lwc1 $f1, %lo(g0+4)(${{[0-9]+}})
+; LE-STATIC-DAG: lui $[[R0:[0-9]+]], %hi(g0)
+; LE-STATIC-DAG: lw $[[R1:[0-9]+]], %lo(g0)($[[R0]])
+; LE-STATIC-DAG: addiu $[[R2:[0-9]+]], $[[R0]], %lo(g0)
+; LE-STATIC-DAG: lw $[[R3:[0-9]+]], 4($[[R2]])
+; LE-STATIC-DAG: mtc1 $[[R1]], $f0
+; LE-STATIC-DAG: mtc1 $[[R3]], $f1
 ; BE-PIC-LABEL: test_ldc1:
-; BE-PIC: lwc1 $f1, 0(${{[0-9]+}})
-; BE-PIC: lwc1 $f0, 4(${{[0-9]+}})
+; BE-PIC-DAG: lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; BE-PIC-DAG: lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; BE-PIC-DAG: mtc1 $[[R1]], $f0
+; BE-PIC-DAG: mtc1 $[[R0]], $f1
 ; CHECK-LDC1-SDC1-LABEL: test_ldc1:
 ; CHECK-LDC1-SDC1: ldc1 $f{{[0-9]+}}
 
@@ -27,14 +36,22 @@ entry:
 }
 
 ; LE-PIC-LABEL: test_sdc1:
-; LE-PIC: swc1 $f12, 0(${{[0-9]+}})
-; LE-PIC: swc1 $f13, 4(${{[0-9]+}})
+; LE-PIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
+; LE-PIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
+; LE-PIC-DAG: sw $[[R0]], 0(${{[0-9]+}})
+; LE-PIC-DAG: sw $[[R1]], 4(${{[0-9]+}})
 ; LE-STATIC-LABEL: test_sdc1:
-; LE-STATIC: swc1 $f12, %lo(g0)(${{[0-9]+}})
-; LE-STATIC: swc1 $f13, %lo(g0+4)(${{[0-9]+}})
+; LE-STATIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
+; LE-STATIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
+; LE-STATIC-DAG: lui $[[R2:[0-9]+]], %hi(g0)
+; LE-STATIC-DAG: sw $[[R0]], %lo(g0)($[[R2]])
+; LE-STATIC-DAG: addiu $[[R3:[0-9]+]], $[[R2]], %lo(g0)
+; LE-STATIC-DAG: sw $[[R1]], 4($[[R3]])
 ; BE-PIC-LABEL: test_sdc1:
-; BE-PIC: swc1 $f13, 0(${{[0-9]+}})
-; BE-PIC: swc1 $f12, 4(${{[0-9]+}})
+; BE-PIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
+; BE-PIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
+; BE-PIC-DAG: sw $[[R1]], 0(${{[0-9]+}})
+; BE-PIC-DAG: sw $[[R0]], 4(${{[0-9]+}})
 ; CHECK-LDC1-SDC1-LABEL: test_sdc1:
 ; CHECK-LDC1-SDC1: sdc1 $f{{[0-9]+}}
 
@@ -43,3 +60,34 @@ entry:
   store double %a, double* @g0, align 8
   ret void
 }
+
+
+; LE-PIC-LABEL: test_ldxc1:
+; LE-PIC-DAG: lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; LE-PIC-DAG: lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; LE-PIC-DAG: mtc1 $[[R0]], $f0
+; LE-PIC-DAG: mtc1 $[[R1]], $f1
+; CHECK-LDC1-SDC1-LABEL: test_ldxc1:
+; CHECK-LDC1-SDC1: ldxc1 $f{{[0-9]+}}
+
+define double @test_ldxc1(double* nocapture readonly %a, i32 %i) {
+entry:
+  %arrayidx = getelementptr inbounds double* %a, i32 %i
+  %0 = load double* %arrayidx, align 8
+  ret double %0
+}
+
+; LE-PIC-LABEL: test_sdxc1:
+; LE-PIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
+; LE-PIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
+; LE-PIC-DAG: sw $[[R0]], 0(${{[0-9]+}})
+; LE-PIC-DAG: sw $[[R1]], 4(${{[0-9]+}})
+; CHECK-LDC1-SDC1-LABEL: test_sdxc1:
+; CHECK-LDC1-SDC1: sdxc1 $f{{[0-9]+}}
+
+define void @test_sdxc1(double %b, double* nocapture %a, i32 %i) {
+entry:
+  %arrayidx = getelementptr inbounds double* %a, i32 %i
+  store double %b, double* %arrayidx, align 8
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/2r.ll b/test/CodeGen/Mips/msa/2r.ll
new file mode 100644
index 0000000..da35ad8
--- /dev/null
+++ b/test/CodeGen/Mips/msa/2r.ll
@@ -0,0 +1,257 @@
+; Test the MSA intrinsics that are encoded with the 2R instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_nloc_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_nloc_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_nloc_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_nloc_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.nloc.b(<16 x i8> %0)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_nloc_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.nloc.b(<16 x i8>) nounwind
+
+; CHECK: llvm_mips_nloc_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_nloc_b_ARG1)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: nloc.b [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_nloc_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_nloc_b_test
+;
+@llvm_mips_nloc_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_nloc_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_nloc_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_nloc_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.nloc.h(<8 x i16> %0)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_nloc_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.nloc.h(<8 x i16>) nounwind
+
+; CHECK: llvm_mips_nloc_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_nloc_h_ARG1)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: nloc.h [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_nloc_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_nloc_h_test
+;
+@llvm_mips_nloc_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_nloc_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_nloc_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_nloc_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.nloc.w(<4 x i32> %0)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_nloc_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.nloc.w(<4 x i32>) nounwind
+
+; CHECK: llvm_mips_nloc_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_nloc_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: nloc.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_nloc_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_nloc_w_test
+;
+@llvm_mips_nloc_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_nloc_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_nloc_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_nloc_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.nloc.d(<2 x i64> %0)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_nloc_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.nloc.d(<2 x i64>) nounwind
+
+; CHECK: llvm_mips_nloc_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_nloc_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: nloc.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_nloc_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_nloc_d_test
+;
+@llvm_mips_nlzc_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_nlzc_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_nlzc_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_nlzc_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.nlzc.b(<16 x i8> %0)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_nlzc_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.nlzc.b(<16 x i8>) nounwind
+
+; CHECK: llvm_mips_nlzc_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_nlzc_b_ARG1)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: nlzc.b [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_nlzc_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_nlzc_b_test
+;
+@llvm_mips_nlzc_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_nlzc_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_nlzc_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_nlzc_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.nlzc.h(<8 x i16> %0)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_nlzc_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.nlzc.h(<8 x i16>) nounwind
+
+; CHECK: llvm_mips_nlzc_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_nlzc_h_ARG1)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: nlzc.h [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_nlzc_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_nlzc_h_test
+;
+@llvm_mips_nlzc_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_nlzc_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_nlzc_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_nlzc_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.nlzc.w(<4 x i32> %0)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_nlzc_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.nlzc.w(<4 x i32>) nounwind
+
+; CHECK: llvm_mips_nlzc_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_nlzc_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: nlzc.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_nlzc_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_nlzc_w_test
+;
+@llvm_mips_nlzc_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_nlzc_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_nlzc_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_nlzc_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.nlzc.d(<2 x i64> %0)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_nlzc_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.nlzc.d(<2 x i64>) nounwind
+
+; CHECK: llvm_mips_nlzc_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_nlzc_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: nlzc.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_nlzc_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_nlzc_d_test
+;
+@llvm_mips_pcnt_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_pcnt_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_pcnt_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_pcnt_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.pcnt.b(<16 x i8> %0)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_pcnt_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.pcnt.b(<16 x i8>) nounwind
+
+; CHECK: llvm_mips_pcnt_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_pcnt_b_ARG1)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: pcnt.b [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_pcnt_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_pcnt_b_test
+;
+@llvm_mips_pcnt_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_pcnt_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_pcnt_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_pcnt_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.pcnt.h(<8 x i16> %0)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_pcnt_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.pcnt.h(<8 x i16>) nounwind
+
+; CHECK: llvm_mips_pcnt_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_pcnt_h_ARG1)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: pcnt.h [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_pcnt_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_pcnt_h_test
+;
+@llvm_mips_pcnt_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_pcnt_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_pcnt_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_pcnt_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.pcnt.w(<4 x i32> %0)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_pcnt_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.pcnt.w(<4 x i32>) nounwind
+
+; CHECK: llvm_mips_pcnt_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_pcnt_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: pcnt.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_pcnt_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_pcnt_w_test
+;
+@llvm_mips_pcnt_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_pcnt_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_pcnt_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_pcnt_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.pcnt.d(<2 x i64> %0)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_pcnt_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.pcnt.d(<2 x i64>) nounwind
+
+; CHECK: llvm_mips_pcnt_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_pcnt_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: pcnt.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_pcnt_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_pcnt_d_test
+;
diff --git a/test/CodeGen/Mips/msa/2r_vector_scalar.ll b/test/CodeGen/Mips/msa/2r_vector_scalar.ll
new file mode 100644
index 0000000..6f6e1b9
--- /dev/null
+++ b/test/CodeGen/Mips/msa/2r_vector_scalar.ll
@@ -0,0 +1,87 @@
+; Test the MSA intrinsics that are encoded with the 2R instruction format and
+; convert scalars to vectors.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_fill_b_ARG1 = global i32 23, align 16
+@llvm_mips_fill_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_fill_b_test() nounwind {
+entry:
+  %0 = load i32* @llvm_mips_fill_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.fill.b(i32 %0)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_fill_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.fill.b(i32) nounwind
+
+; CHECK: llvm_mips_fill_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]],
+; CHECK-DAG: fill.b [[R2:\$w[0-9]+]], [[R1]]
+; CHECK-DAG: st.b [[R2]],
+; CHECK: .size llvm_mips_fill_b_test
+;
+@llvm_mips_fill_h_ARG1 = global i32 23, align 16
+@llvm_mips_fill_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_fill_h_test() nounwind {
+entry:
+  %0 = load i32* @llvm_mips_fill_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.fill.h(i32 %0)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_fill_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.fill.h(i32) nounwind
+
+; CHECK: llvm_mips_fill_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]],
+; CHECK-DAG: fill.h [[R2:\$w[0-9]+]], [[R1]]
+; CHECK-DAG: st.h [[R2]],
+; CHECK: .size llvm_mips_fill_h_test
+;
+@llvm_mips_fill_w_ARG1 = global i32 23, align 16
+@llvm_mips_fill_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fill_w_test() nounwind {
+entry:
+  %0 = load i32* @llvm_mips_fill_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.fill.w(i32 %0)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_fill_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fill.w(i32) nounwind
+
+; CHECK: llvm_mips_fill_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]],
+; CHECK-DAG: fill.w [[R2:\$w[0-9]+]], [[R1]]
+; CHECK-DAG: st.w [[R2]],
+; CHECK: .size llvm_mips_fill_w_test
+;
+@llvm_mips_fill_d_ARG1 = global i64 23, align 16
+@llvm_mips_fill_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fill_d_test() nounwind {
+entry:
+  %0 = load i64* @llvm_mips_fill_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.fill.d(i64 %0)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_fill_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fill.d(i64) nounwind
+
+; CHECK: llvm_mips_fill_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], 0(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], 4(
+; CHECK-DAG: ldi.b [[R3:\$w[0-9]+]], 0
+; CHECK-DAG: insert.w [[R3]][0], [[R1]]
+; CHECK-DAG: insert.w [[R3]][1], [[R2]]
+; CHECK-DAG: insert.w [[R3]][2], [[R1]]
+; CHECK-DAG: insert.w [[R3]][3], [[R2]]
+; CHECK-DAG: st.w [[R3]],
+; CHECK: .size llvm_mips_fill_d_test
+;
diff --git a/test/CodeGen/Mips/msa/2rf.ll b/test/CodeGen/Mips/msa/2rf.ll
new file mode 100644
index 0000000..b361ef5
--- /dev/null
+++ b/test/CodeGen/Mips/msa/2rf.ll
@@ -0,0 +1,323 @@
+; Test the MSA intrinsics that are encoded with the 2RF instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_flog2_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_flog2_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_flog2_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_flog2_w_ARG1
+  %1 = tail call <4 x float> @llvm.mips.flog2.w(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_flog2_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.flog2.w(<4 x float>) nounwind
+
+; CHECK: llvm_mips_flog2_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_flog2_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: flog2.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_flog2_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_flog2_w_test
+;
+@llvm_mips_flog2_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_flog2_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_flog2_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_flog2_d_ARG1
+  %1 = tail call <2 x double> @llvm.mips.flog2.d(<2 x double> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_flog2_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.flog2.d(<2 x double>) nounwind
+
+; CHECK: llvm_mips_flog2_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_flog2_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: flog2.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_flog2_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_flog2_d_test
+
+define void @flog2_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_flog2_w_ARG1
+  %1 = tail call <4 x float> @llvm.log2.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_flog2_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.log2.v4f32(<4 x float> %val)
+
+; CHECK: flog2_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_flog2_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: flog2.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_flog2_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size flog2_w_test
+
+define void @flog2_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_flog2_d_ARG1
+  %1 = tail call <2 x double> @llvm.log2.v2f64(<2 x double> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_flog2_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.log2.v2f64(<2 x double> %val)
+
+; CHECK: flog2_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_flog2_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: flog2.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_flog2_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size flog2_d_test
+;
+@llvm_mips_frint_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_frint_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_frint_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_frint_w_ARG1
+  %1 = tail call <4 x float> @llvm.mips.frint.w(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_frint_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.frint.w(<4 x float>) nounwind
+
+; CHECK: llvm_mips_frint_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_frint_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: frint.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_frint_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_frint_w_test
+;
+@llvm_mips_frint_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_frint_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_frint_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_frint_d_ARG1
+  %1 = tail call <2 x double> @llvm.mips.frint.d(<2 x double> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_frint_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.frint.d(<2 x double>) nounwind
+
+; CHECK: llvm_mips_frint_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_frint_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: frint.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_frint_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_frint_d_test
+
+define void @frint_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_frint_w_ARG1
+  %1 = tail call <4 x float> @llvm.rint.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_frint_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.rint.v4f32(<4 x float>) nounwind
+
+; CHECK: frint_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_frint_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: frint.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_frint_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size frint_w_test
+
+define void @frint_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_frint_d_ARG1
+  %1 = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_frint_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.rint.v2f64(<2 x double>) nounwind
+
+; CHECK: frint_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_frint_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: frint.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_frint_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size frint_d_test
+;
+@llvm_mips_frcp_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_frcp_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_frcp_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_frcp_w_ARG1
+  %1 = tail call <4 x float> @llvm.mips.frcp.w(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_frcp_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.frcp.w(<4 x float>) nounwind
+
+; CHECK: llvm_mips_frcp_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_frcp_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: frcp.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_frcp_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_frcp_w_test
+;
+@llvm_mips_frcp_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_frcp_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_frcp_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_frcp_d_ARG1
+  %1 = tail call <2 x double> @llvm.mips.frcp.d(<2 x double> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_frcp_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.frcp.d(<2 x double>) nounwind
+
+; CHECK: llvm_mips_frcp_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_frcp_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: frcp.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_frcp_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_frcp_d_test
+;
+@llvm_mips_frsqrt_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_frsqrt_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_frsqrt_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_frsqrt_w_ARG1
+  %1 = tail call <4 x float> @llvm.mips.frsqrt.w(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_frsqrt_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.frsqrt.w(<4 x float>) nounwind
+
+; CHECK: llvm_mips_frsqrt_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_frsqrt_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: frsqrt.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_frsqrt_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_frsqrt_w_test
+;
+@llvm_mips_frsqrt_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_frsqrt_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_frsqrt_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_frsqrt_d_ARG1
+  %1 = tail call <2 x double> @llvm.mips.frsqrt.d(<2 x double> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_frsqrt_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.frsqrt.d(<2 x double>) nounwind
+
+; CHECK: llvm_mips_frsqrt_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_frsqrt_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: frsqrt.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_frsqrt_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_frsqrt_d_test
+;
+@llvm_mips_fsqrt_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fsqrt_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fsqrt_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsqrt_w_ARG1
+  %1 = tail call <4 x float> @llvm.mips.fsqrt.w(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_fsqrt_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fsqrt.w(<4 x float>) nounwind
+
+; CHECK: llvm_mips_fsqrt_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_fsqrt_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: fsqrt.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_fsqrt_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_fsqrt_w_test
+;
+@llvm_mips_fsqrt_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fsqrt_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fsqrt_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsqrt_d_ARG1
+  %1 = tail call <2 x double> @llvm.mips.fsqrt.d(<2 x double> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_fsqrt_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fsqrt.d(<2 x double>) nounwind
+
+; CHECK: llvm_mips_fsqrt_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_fsqrt_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: fsqrt.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_fsqrt_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_fsqrt_d_test
+
+define void @fsqrt_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsqrt_w_ARG1
+  %1 = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_fsqrt_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind
+
+; CHECK: fsqrt_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_fsqrt_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: fsqrt.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_fsqrt_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size fsqrt_w_test
+
+define void @fsqrt_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsqrt_d_ARG1
+  %1 = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_fsqrt_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) nounwind
+
+; CHECK: fsqrt_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_fsqrt_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: fsqrt.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_fsqrt_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size fsqrt_d_test
+;
diff --git a/test/CodeGen/Mips/msa/2rf_exup.ll b/test/CodeGen/Mips/msa/2rf_exup.ll
new file mode 100644
index 0000000..8d7cc36
--- /dev/null
+++ b/test/CodeGen/Mips/msa/2rf_exup.ll
@@ -0,0 +1,82 @@
+; Test the MSA floating point conversion intrinsics (e.g. float->double) that
+; are encoded with the 2RF instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_fexupl_w_ARG1 = global <8 x half> <half 0.000000e+00, half 1.000000e+00, half 2.000000e+00, half 3.000000e+00, half 4.000000e+00, half 5.000000e+00, half 6.000000e+00, half 7.000000e+00>, align 16
+@llvm_mips_fexupl_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fexupl_w_test() nounwind {
+entry:
+  %0 = load <8 x half>* @llvm_mips_fexupl_w_ARG1
+  %1 = tail call <4 x float> @llvm.mips.fexupl.w(<8 x half> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_fexupl_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fexupl.w(<8 x half>) nounwind
+
+; CHECK: llvm_mips_fexupl_w_test:
+; CHECK: ld.h
+; CHECK: fexupl.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fexupl_w_test
+;
+@llvm_mips_fexupl_d_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fexupl_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fexupl_d_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fexupl_d_ARG1
+  %1 = tail call <2 x double> @llvm.mips.fexupl.d(<4 x float> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_fexupl_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fexupl.d(<4 x float>) nounwind
+
+; CHECK: llvm_mips_fexupl_d_test:
+; CHECK: ld.w
+; CHECK: fexupl.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fexupl_d_test
+;
+@llvm_mips_fexupr_w_ARG1 = global <8 x half> <half 0.000000e+00, half 1.000000e+00, half 2.000000e+00, half 3.000000e+00, half 4.000000e+00, half 5.000000e+00, half 6.000000e+00, half 7.000000e+00>, align 16
+@llvm_mips_fexupr_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fexupr_w_test() nounwind {
+entry:
+  %0 = load <8 x half>* @llvm_mips_fexupr_w_ARG1
+  %1 = tail call <4 x float> @llvm.mips.fexupr.w(<8 x half> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_fexupr_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fexupr.w(<8 x half>) nounwind
+
+; CHECK: llvm_mips_fexupr_w_test:
+; CHECK: ld.h
+; CHECK: fexupr.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fexupr_w_test
+;
+@llvm_mips_fexupr_d_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fexupr_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fexupr_d_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fexupr_d_ARG1
+  %1 = tail call <2 x double> @llvm.mips.fexupr.d(<4 x float> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_fexupr_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fexupr.d(<4 x float>) nounwind
+
+; CHECK: llvm_mips_fexupr_d_test:
+; CHECK: ld.w
+; CHECK: fexupr.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fexupr_d_test
+;
diff --git a/test/CodeGen/Mips/msa/2rf_float_int.ll b/test/CodeGen/Mips/msa/2rf_float_int.ll
new file mode 100644
index 0000000..3b5dfda
--- /dev/null
+++ b/test/CodeGen/Mips/msa/2rf_float_int.ll
@@ -0,0 +1,90 @@
+; Test the MSA integer to floating point conversion intrinsics that are encoded
+; with the 2RF instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_ffint_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ffint_s_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_ffint_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ffint_s_w_ARG1
+  %1 = tail call <4 x float> @llvm.mips.ffint.s.w(<4 x i32> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_ffint_s_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.ffint.s.w(<4 x i32>) nounwind
+
+; CHECK: llvm_mips_ffint_s_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ffint_s_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ffint_s.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ffint_s_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ffint_s_w_test
+;
+@llvm_mips_ffint_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_ffint_s_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_ffint_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_ffint_s_d_ARG1
+  %1 = tail call <2 x double> @llvm.mips.ffint.s.d(<2 x i64> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_ffint_s_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.ffint.s.d(<2 x i64>) nounwind
+
+; CHECK: llvm_mips_ffint_s_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ffint_s_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ffint_s.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ffint_s_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ffint_s_d_test
+;
+@llvm_mips_ffint_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ffint_u_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_ffint_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ffint_u_w_ARG1
+  %1 = tail call <4 x float> @llvm.mips.ffint.u.w(<4 x i32> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_ffint_u_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.ffint.u.w(<4 x i32>) nounwind
+
+; CHECK: llvm_mips_ffint_u_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ffint_u_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ffint_u.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ffint_u_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ffint_u_w_test
+;
+@llvm_mips_ffint_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_ffint_u_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_ffint_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_ffint_u_d_ARG1
+  %1 = tail call <2 x double> @llvm.mips.ffint.u.d(<2 x i64> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_ffint_u_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.ffint.u.d(<2 x i64>) nounwind
+
+; CHECK: llvm_mips_ffint_u_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ffint_u_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ffint_u.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ffint_u_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ffint_u_d_test
+;
diff --git a/test/CodeGen/Mips/msa/2rf_fq.ll b/test/CodeGen/Mips/msa/2rf_fq.ll
new file mode 100644
index 0000000..021dd93
--- /dev/null
+++ b/test/CodeGen/Mips/msa/2rf_fq.ll
@@ -0,0 +1,82 @@
+; Test the MSA fixed-point to floating point conversion intrinsics that are
+; encoded with the 2RF instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_ffql_w_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_ffql_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_ffql_w_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_ffql_w_ARG1
+  %1 = tail call <4 x float> @llvm.mips.ffql.w(<8 x i16> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_ffql_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.ffql.w(<8 x i16>) nounwind
+
+; CHECK: llvm_mips_ffql_w_test:
+; CHECK: ld.h
+; CHECK: ffql.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_ffql_w_test
+;
+@llvm_mips_ffql_d_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ffql_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_ffql_d_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ffql_d_ARG1
+  %1 = tail call <2 x double> @llvm.mips.ffql.d(<4 x i32> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_ffql_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.ffql.d(<4 x i32>) nounwind
+
+; CHECK: llvm_mips_ffql_d_test:
+; CHECK: ld.w
+; CHECK: ffql.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_ffql_d_test
+;
+@llvm_mips_ffqr_w_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_ffqr_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_ffqr_w_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_ffqr_w_ARG1
+  %1 = tail call <4 x float> @llvm.mips.ffqr.w(<8 x i16> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_ffqr_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.ffqr.w(<8 x i16>) nounwind
+
+; CHECK: llvm_mips_ffqr_w_test:
+; CHECK: ld.h
+; CHECK: ffqr.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_ffqr_w_test
+;
+@llvm_mips_ffqr_d_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ffqr_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_ffqr_d_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ffqr_d_ARG1
+  %1 = tail call <2 x double> @llvm.mips.ffqr.d(<4 x i32> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_ffqr_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.ffqr.d(<4 x i32>) nounwind
+
+; CHECK: llvm_mips_ffqr_d_test:
+; CHECK: ld.w
+; CHECK: ffqr.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_ffqr_d_test
+;
diff --git a/test/CodeGen/Mips/msa/2rf_int_float.ll b/test/CodeGen/Mips/msa/2rf_int_float.ll
new file mode 100644
index 0000000..4665ae0
--- /dev/null
+++ b/test/CodeGen/Mips/msa/2rf_int_float.ll
@@ -0,0 +1,217 @@
+; Test the MSA floating point to integer intrinsics that are encoded with the
+; 2RF instruction format. This includes conversions but other instructions such
+; as fclass are also here.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_fclass_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fclass_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fclass_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fclass_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.fclass.w(<4 x float> %0)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_fclass_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fclass.w(<4 x float>) nounwind
+
+; CHECK: llvm_mips_fclass_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_fclass_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: fclass.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_fclass_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_fclass_w_test
+;
+@llvm_mips_fclass_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fclass_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fclass_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fclass_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.fclass.d(<2 x double> %0)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_fclass_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fclass.d(<2 x double>) nounwind
+
+; CHECK: llvm_mips_fclass_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_fclass_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: fclass.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_fclass_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_fclass_d_test
+;
+@llvm_mips_ftrunc_s_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_ftrunc_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ftrunc_s_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_ftrunc_s_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.ftrunc.s.w(<4 x float> %0)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_ftrunc_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ftrunc.s.w(<4 x float>) nounwind
+
+; CHECK: llvm_mips_ftrunc_s_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ftrunc_s_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ftrunc_s.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ftrunc_s_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ftrunc_s_w_test
+;
+@llvm_mips_ftrunc_s_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_ftrunc_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ftrunc_s_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_ftrunc_s_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.ftrunc.s.d(<2 x double> %0)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_ftrunc_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ftrunc.s.d(<2 x double>) nounwind
+
+; CHECK: llvm_mips_ftrunc_s_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ftrunc_s_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ftrunc_s.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ftrunc_s_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ftrunc_s_d_test
+;
+@llvm_mips_ftrunc_u_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_ftrunc_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ftrunc_u_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_ftrunc_u_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.ftrunc.u.w(<4 x float> %0)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_ftrunc_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ftrunc.u.w(<4 x float>) nounwind
+
+; CHECK: llvm_mips_ftrunc_u_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ftrunc_u_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ftrunc_u.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ftrunc_u_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ftrunc_u_w_test
+;
+@llvm_mips_ftrunc_u_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_ftrunc_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ftrunc_u_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_ftrunc_u_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.ftrunc.u.d(<2 x double> %0)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_ftrunc_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ftrunc.u.d(<2 x double>) nounwind
+
+; CHECK: llvm_mips_ftrunc_u_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ftrunc_u_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ftrunc_u.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ftrunc_u_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ftrunc_u_d_test
+;
+@llvm_mips_ftint_s_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_ftint_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ftint_s_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_ftint_s_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.ftint.s.w(<4 x float> %0)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_ftint_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ftint.s.w(<4 x float>) nounwind
+
+; CHECK: llvm_mips_ftint_s_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ftint_s_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ftint_s.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ftint_s_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ftint_s_w_test
+;
+@llvm_mips_ftint_s_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_ftint_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ftint_s_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_ftint_s_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.ftint.s.d(<2 x double> %0)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_ftint_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ftint.s.d(<2 x double>) nounwind
+
+; CHECK: llvm_mips_ftint_s_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ftint_s_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ftint_s.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ftint_s_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ftint_s_d_test
+;
+@llvm_mips_ftint_u_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_ftint_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ftint_u_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_ftint_u_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.ftint.u.w(<4 x float> %0)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_ftint_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ftint.u.w(<4 x float>) nounwind
+
+; CHECK: llvm_mips_ftint_u_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ftint_u_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ftint_u.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ftint_u_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ftint_u_w_test
+;
+@llvm_mips_ftint_u_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_ftint_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ftint_u_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_ftint_u_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.ftint.u.d(<2 x double> %0)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_ftint_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ftint.u.d(<2 x double>) nounwind
+
+; CHECK: llvm_mips_ftint_u_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ftint_u_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ftint_u.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ftint_u_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ftint_u_d_test
+;
diff --git a/test/CodeGen/Mips/msa/2rf_tq.ll b/test/CodeGen/Mips/msa/2rf_tq.ll
new file mode 100644
index 0000000..6f3c508
--- /dev/null
+++ b/test/CodeGen/Mips/msa/2rf_tq.ll
@@ -0,0 +1,50 @@
+; Test the MSA floating-point to fixed-point conversion intrinsics that are
+; encoded with the 2RF instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_ftq_h_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_ftq_h_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_ftq_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_ftq_h_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_ftq_h_ARG1
+  %1 = load <4 x float>* @llvm_mips_ftq_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.ftq.h(<4 x float> %0, <4 x float> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_ftq_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.ftq.h(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_ftq_h_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ftq.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_ftq_h_test
+;
+@llvm_mips_ftq_w_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_ftq_w_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_ftq_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ftq_w_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_ftq_w_ARG1
+  %1 = load <2 x double>* @llvm_mips_ftq_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.ftq.w(<2 x double> %0, <2 x double> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_ftq_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ftq.w(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_ftq_w_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: ftq.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_ftq_w_test
+;
diff --git a/test/CodeGen/Mips/msa/3r-a.ll b/test/CodeGen/Mips/msa/3r-a.ll
new file mode 100644
index 0000000..dab15b6
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r-a.ll
@@ -0,0 +1,1191 @@
+; Test the MSA intrinsics that are encoded with the 3R instruction format.
+; There are lots of these so this covers those beginning with 'a'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+; It should fail to compile without fp64.
+; RUN: not llc -march=mips -mattr=+msa < %s 2>&1 | \
+; RUN:    FileCheck -check-prefix=FP32ERROR %s
+; FP32ERROR: LLVM ERROR: MSA requires a 64-bit FPU register file (FR=1 mode).
+
+@llvm_mips_add_a_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_add_a_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_add_a_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_add_a_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_add_a_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_add_a_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.add.a.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_add_a_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.add.a.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_add_a_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_add_a_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_add_a_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: add_a.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_add_a_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_add_a_b_test
+;
+@llvm_mips_add_a_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_add_a_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_add_a_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_add_a_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_add_a_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_add_a_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.add.a.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_add_a_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.add.a.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_add_a_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_add_a_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_add_a_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: add_a.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_add_a_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_add_a_h_test
+;
+@llvm_mips_add_a_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_add_a_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_add_a_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_add_a_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_add_a_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_add_a_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.add.a.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_add_a_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.add.a.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_add_a_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_add_a_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_add_a_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: add_a.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_add_a_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_add_a_w_test
+;
+@llvm_mips_add_a_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_add_a_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_add_a_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_add_a_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_add_a_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_add_a_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.add.a.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_add_a_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.add.a.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_add_a_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_add_a_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_add_a_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: add_a.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_add_a_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_add_a_d_test
+;
+@llvm_mips_adds_a_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_adds_a_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_adds_a_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_adds_a_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_adds_a_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_adds_a_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.adds.a.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_adds_a_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.adds.a.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_adds_a_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_a_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_a_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_a.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_a_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_a_b_test
+;
+@llvm_mips_adds_a_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_adds_a_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_adds_a_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_adds_a_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_adds_a_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_adds_a_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.adds.a.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_adds_a_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.adds.a.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_adds_a_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_a_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_a_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_a.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_a_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_a_h_test
+;
+@llvm_mips_adds_a_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_adds_a_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_adds_a_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_adds_a_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_adds_a_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_adds_a_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.adds.a.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_adds_a_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.adds.a.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_adds_a_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_a_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_a_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_a.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_a_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_a_w_test
+;
+@llvm_mips_adds_a_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_adds_a_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_adds_a_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_adds_a_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_adds_a_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_adds_a_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.adds.a.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_adds_a_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.adds.a.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_adds_a_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_a_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_a_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_a.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_a_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_a_d_test
+;
+@llvm_mips_adds_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_adds_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_adds_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_adds_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_adds_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_adds_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.adds.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_adds_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.adds.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_adds_s_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_s_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_s_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_s.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_s_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_s_b_test
+;
+@llvm_mips_adds_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_adds_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_adds_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_adds_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_adds_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_adds_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.adds.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_adds_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.adds.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_adds_s_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_s_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_s_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_s.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_s_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_s_h_test
+;
+@llvm_mips_adds_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_adds_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_adds_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_adds_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_adds_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_adds_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.adds.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_adds_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.adds.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_adds_s_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_s_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_s_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_s.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_s_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_s_w_test
+;
+@llvm_mips_adds_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_adds_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_adds_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_adds_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_adds_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_adds_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.adds.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_adds_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.adds.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_adds_s_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_s_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_s_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_s.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_s_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_s_d_test
+;
+@llvm_mips_adds_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_adds_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_adds_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_adds_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_adds_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_adds_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.adds.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_adds_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.adds.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_adds_u_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_u_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_u_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_u.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_u_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_u_b_test
+;
+@llvm_mips_adds_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_adds_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_adds_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_adds_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_adds_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_adds_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.adds.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_adds_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.adds.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_adds_u_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_u_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_u_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_u.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_u_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_u_h_test
+;
+@llvm_mips_adds_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_adds_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_adds_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_adds_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_adds_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_adds_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.adds.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_adds_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.adds.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_adds_u_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_u_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_u_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_u.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_u_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_u_w_test
+;
+@llvm_mips_adds_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_adds_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_adds_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_adds_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_adds_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_adds_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.adds.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_adds_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.adds.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_adds_u_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_u_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_u_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_u.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_u_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_u_d_test
+;
+@llvm_mips_addv_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_addv_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_addv_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_addv_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_addv_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_addv_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_addv_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.addv.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_addv_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_addv_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_addv_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: addv.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_addv_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_addv_b_test
+;
+@llvm_mips_addv_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_addv_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_addv_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_addv_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_addv_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_addv_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_addv_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.addv.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_addv_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_addv_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_addv_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: addv.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_addv_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_addv_h_test
+;
+@llvm_mips_addv_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_addv_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_addv_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_addv_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_addv_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_addv_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_addv_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.addv.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_addv_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_addv_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_addv_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: addv.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_addv_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_addv_w_test
+;
+@llvm_mips_addv_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_addv_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_addv_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_addv_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_addv_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_addv_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_addv_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.addv.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_addv_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_addv_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_addv_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: addv.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_addv_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_addv_d_test
+;
+
+define void @addv_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_addv_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_addv_b_ARG2
+  %2 = add <16 x i8> %0, %1
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_addv_b_RES
+  ret void
+}
+
+; CHECK: addv_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_addv_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_addv_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: addv.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_addv_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size addv_b_test
+;
+
+define void @addv_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_addv_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_addv_h_ARG2
+  %2 = add <8 x i16> %0, %1
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_addv_h_RES
+  ret void
+}
+
+; CHECK: addv_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_addv_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_addv_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: addv.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_addv_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size addv_h_test
+;
+
+define void @addv_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_addv_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_addv_w_ARG2
+  %2 = add <4 x i32> %0, %1
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_addv_w_RES
+  ret void
+}
+
+; CHECK: addv_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_addv_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_addv_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: addv.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_addv_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size addv_w_test
+;
+
+define void @addv_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_addv_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_addv_d_ARG2
+  %2 = add <2 x i64> %0, %1
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_addv_d_RES
+  ret void
+}
+
+; CHECK: addv_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_addv_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_addv_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: addv.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_addv_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size addv_d_test
+;
+@llvm_mips_asub_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_asub_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_asub_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_asub_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_asub_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_asub_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.asub.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_asub_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.asub.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_asub_s_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_asub_s_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_asub_s_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: asub_s.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_asub_s_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_asub_s_b_test
+;
+@llvm_mips_asub_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_asub_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_asub_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_asub_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_asub_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_asub_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.asub.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_asub_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.asub.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_asub_s_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_asub_s_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_asub_s_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: asub_s.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_asub_s_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_asub_s_h_test
+;
+@llvm_mips_asub_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_asub_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_asub_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_asub_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_asub_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_asub_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.asub.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_asub_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.asub.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_asub_s_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_asub_s_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_asub_s_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: asub_s.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_asub_s_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_asub_s_w_test
+;
+@llvm_mips_asub_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_asub_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_asub_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_asub_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_asub_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_asub_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.asub.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_asub_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.asub.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_asub_s_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_asub_s_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_asub_s_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: asub_s.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_asub_s_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_asub_s_d_test
+;
+@llvm_mips_asub_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_asub_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_asub_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_asub_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_asub_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_asub_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.asub.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_asub_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.asub.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_asub_u_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_asub_u_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_asub_u_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: asub_u.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_asub_u_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_asub_u_b_test
+;
+@llvm_mips_asub_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_asub_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_asub_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_asub_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_asub_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_asub_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.asub.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_asub_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.asub.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_asub_u_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_asub_u_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_asub_u_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: asub_u.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_asub_u_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_asub_u_h_test
+;
+@llvm_mips_asub_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_asub_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_asub_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_asub_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_asub_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_asub_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.asub.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_asub_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.asub.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_asub_u_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_asub_u_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_asub_u_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: asub_u.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_asub_u_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_asub_u_w_test
+;
+@llvm_mips_asub_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_asub_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_asub_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_asub_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_asub_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_asub_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.asub.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_asub_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.asub.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_asub_u_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_asub_u_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_asub_u_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: asub_u.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_asub_u_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_asub_u_d_test
+;
+@llvm_mips_ave_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_ave_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_ave_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_ave_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_ave_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_ave_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.ave.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_ave_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.ave.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_ave_s_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ave_s_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ave_s_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ave_s.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_ave_s_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_ave_s_b_test
+;
+@llvm_mips_ave_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_ave_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_ave_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_ave_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_ave_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_ave_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.ave.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_ave_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.ave.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_ave_s_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ave_s_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ave_s_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ave_s.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_ave_s_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_ave_s_h_test
+;
+@llvm_mips_ave_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ave_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_ave_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ave_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ave_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_ave_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.ave.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_ave_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ave.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_ave_s_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ave_s_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ave_s_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ave_s.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_ave_s_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_ave_s_w_test
+;
+@llvm_mips_ave_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_ave_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_ave_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ave_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_ave_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_ave_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.ave.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_ave_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ave.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_ave_s_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ave_s_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ave_s_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ave_s.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_ave_s_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_ave_s_d_test
+;
+@llvm_mips_ave_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_ave_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_ave_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_ave_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_ave_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_ave_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.ave.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_ave_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.ave.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_ave_u_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ave_u_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ave_u_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ave_u.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_ave_u_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_ave_u_b_test
+;
+@llvm_mips_ave_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_ave_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_ave_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_ave_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_ave_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_ave_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.ave.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_ave_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.ave.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_ave_u_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ave_u_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ave_u_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ave_u.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_ave_u_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_ave_u_h_test
+;
+@llvm_mips_ave_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ave_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_ave_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ave_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ave_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_ave_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.ave.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_ave_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ave.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_ave_u_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ave_u_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ave_u_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ave_u.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_ave_u_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_ave_u_w_test
+;
+@llvm_mips_ave_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_ave_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_ave_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ave_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_ave_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_ave_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.ave.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_ave_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ave.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_ave_u_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ave_u_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ave_u_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ave_u.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_ave_u_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_ave_u_d_test
+;
+@llvm_mips_aver_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_aver_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_aver_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_aver_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_aver_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_aver_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.aver.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_aver_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.aver.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_aver_s_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_aver_s_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_aver_s_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: aver_s.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_aver_s_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_aver_s_b_test
+;
+@llvm_mips_aver_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_aver_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_aver_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_aver_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_aver_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_aver_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.aver.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_aver_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.aver.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_aver_s_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_aver_s_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_aver_s_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: aver_s.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_aver_s_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_aver_s_h_test
+;
+@llvm_mips_aver_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_aver_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_aver_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_aver_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_aver_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_aver_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.aver.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_aver_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.aver.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_aver_s_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_aver_s_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_aver_s_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: aver_s.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_aver_s_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_aver_s_w_test
+;
+@llvm_mips_aver_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_aver_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_aver_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_aver_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_aver_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_aver_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.aver.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_aver_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.aver.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_aver_s_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_aver_s_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_aver_s_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: aver_s.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_aver_s_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_aver_s_d_test
+;
+@llvm_mips_aver_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_aver_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_aver_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_aver_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_aver_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_aver_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.aver.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_aver_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.aver.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_aver_u_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_aver_u_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_aver_u_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: aver_u.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_aver_u_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_aver_u_b_test
+;
+@llvm_mips_aver_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_aver_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_aver_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_aver_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_aver_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_aver_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.aver.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_aver_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.aver.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_aver_u_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_aver_u_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_aver_u_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: aver_u.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_aver_u_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_aver_u_h_test
+;
+@llvm_mips_aver_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_aver_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_aver_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_aver_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_aver_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_aver_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.aver.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_aver_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.aver.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_aver_u_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_aver_u_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_aver_u_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: aver_u.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_aver_u_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_aver_u_w_test
+;
+@llvm_mips_aver_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_aver_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_aver_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_aver_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_aver_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_aver_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.aver.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_aver_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.aver.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_aver_u_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_aver_u_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_aver_u_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: aver_u.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_aver_u_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_aver_u_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3r-b.ll b/test/CodeGen/Mips/msa/3r-b.ll
new file mode 100644
index 0000000..a05d19b
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r-b.ll
@@ -0,0 +1,494 @@
+; Test the MSA intrinsics that are encoded with the 3R instruction format.
+; There are lots of these so this covers those beginning with 'b'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_bclr_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bclr_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_bclr_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bclr_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bclr_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_bclr_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.bclr.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_bclr_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.bclr.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_bclr_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: bclr.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_bclr_b_test
+;
+@llvm_mips_bclr_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bclr_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_bclr_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_bclr_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_bclr_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_bclr_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.bclr.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_bclr_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.bclr.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_bclr_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: bclr.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_bclr_h_test
+;
+@llvm_mips_bclr_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bclr_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_bclr_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_bclr_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_bclr_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_bclr_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.bclr.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_bclr_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.bclr.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_bclr_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: bclr.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_bclr_w_test
+;
+@llvm_mips_bclr_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bclr_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_bclr_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_bclr_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_bclr_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_bclr_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.bclr.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_bclr_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.bclr.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_bclr_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: bclr.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_bclr_d_test
+
+@llvm_mips_binsl_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_binsl_b_ARG2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_binsl_b_ARG3 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_binsl_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_binsl_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_binsl_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_binsl_b_ARG2
+  %2 = load <16 x i8>* @llvm_mips_binsl_b_ARG3
+  %3 = tail call <16 x i8> @llvm.mips.binsl.b(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
+  store <16 x i8> %3, <16 x i8>* @llvm_mips_binsl_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.binsl.b(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_binsl_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsl_b_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsl_b_ARG2)(
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_binsl_b_ARG3)(
+; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; CHECK-DAG: binsl.b [[R4]], [[R5]], [[R6]]
+; CHECK-DAG: st.b [[R4]], 0(
+; CHECK: .size llvm_mips_binsl_b_test
+
+@llvm_mips_binsl_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_binsl_h_ARG2 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_binsl_h_ARG3 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_binsl_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_binsl_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_binsl_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_binsl_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_binsl_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.binsl.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_binsl_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.binsl.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_binsl_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsl_h_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsl_h_ARG2)(
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_binsl_h_ARG3)(
+; CHECK-DAG: ld.h [[R4:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[R5:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ld.h [[R6:\$w[0-9]+]], 0([[R3]])
+; CHECK-DAG: binsl.h [[R4]], [[R5]], [[R6]]
+; CHECK-DAG: st.h [[R4]], 0(
+; CHECK: .size llvm_mips_binsl_h_test
+
+@llvm_mips_binsl_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_binsl_w_ARG2 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_binsl_w_ARG3 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_binsl_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_binsl_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_binsl_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_binsl_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_binsl_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.binsl.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_binsl_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.binsl.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_binsl_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsl_w_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsl_w_ARG2)(
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_binsl_w_ARG3)(
+; CHECK-DAG: ld.w [[R4:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[R5:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ld.w [[R6:\$w[0-9]+]], 0([[R3]])
+; CHECK-DAG: binsl.w [[R4]], [[R5]], [[R6]]
+; CHECK-DAG: st.w [[R4]], 0(
+; CHECK: .size llvm_mips_binsl_w_test
+
+@llvm_mips_binsl_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_binsl_d_ARG2 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_binsl_d_ARG3 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_binsl_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_binsl_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_binsl_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_binsl_d_ARG2
+  %2 = load <2 x i64>* @llvm_mips_binsl_d_ARG3
+  %3 = tail call <2 x i64> @llvm.mips.binsl.d(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
+  store <2 x i64> %3, <2 x i64>* @llvm_mips_binsl_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.binsl.d(<2 x i64>, <2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_binsl_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsl_d_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsl_d_ARG2)(
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_binsl_d_ARG3)(
+; CHECK-DAG: ld.d [[R4:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[R5:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ld.d [[R6:\$w[0-9]+]], 0([[R3]])
+; CHECK-DAG: binsl.d [[R4]], [[R5]], [[R6]]
+; CHECK-DAG: st.d [[R4]], 0(
+; CHECK: .size llvm_mips_binsl_d_test
+
+@llvm_mips_binsr_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_binsr_b_ARG2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_binsr_b_ARG3 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_binsr_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_binsr_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_binsr_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_binsr_b_ARG2
+  %2 = load <16 x i8>* @llvm_mips_binsr_b_ARG3
+  %3 = tail call <16 x i8> @llvm.mips.binsr.b(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
+  store <16 x i8> %3, <16 x i8>* @llvm_mips_binsr_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.binsr.b(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_binsr_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsr_b_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsr_b_ARG2)(
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_binsr_b_ARG3)(
+; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; CHECK-DAG: binsr.b [[R4]], [[R5]], [[R6]]
+; CHECK-DAG: st.b [[R4]], 0(
+; CHECK: .size llvm_mips_binsr_b_test
+
+@llvm_mips_binsr_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_binsr_h_ARG2 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_binsr_h_ARG3 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_binsr_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_binsr_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_binsr_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_binsr_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_binsr_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.binsr.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_binsr_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.binsr.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_binsr_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsr_h_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsr_h_ARG2)(
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_binsr_h_ARG3)(
+; CHECK-DAG: ld.h [[R4:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[R5:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ld.h [[R6:\$w[0-9]+]], 0([[R3]])
+; CHECK-DAG: binsr.h [[R4]], [[R5]], [[R6]]
+; CHECK-DAG: st.h [[R4]], 0(
+; CHECK: .size llvm_mips_binsr_h_test
+
+@llvm_mips_binsr_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_binsr_w_ARG2 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_binsr_w_ARG3 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_binsr_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_binsr_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_binsr_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_binsr_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_binsr_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.binsr.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_binsr_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.binsr.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_binsr_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsr_w_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsr_w_ARG2)(
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_binsr_w_ARG3)(
+; CHECK-DAG: ld.w [[R4:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[R5:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ld.w [[R6:\$w[0-9]+]], 0([[R3]])
+; CHECK-DAG: binsr.w [[R4]], [[R5]], [[R6]]
+; CHECK-DAG: st.w [[R4]], 0(
+; CHECK: .size llvm_mips_binsr_w_test
+
+@llvm_mips_binsr_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_binsr_d_ARG2 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_binsr_d_ARG3 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_binsr_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_binsr_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_binsr_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_binsr_d_ARG2
+  %2 = load <2 x i64>* @llvm_mips_binsr_d_ARG3
+  %3 = tail call <2 x i64> @llvm.mips.binsr.d(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
+  store <2 x i64> %3, <2 x i64>* @llvm_mips_binsr_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.binsr.d(<2 x i64>, <2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_binsr_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsr_d_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsr_d_ARG2)(
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_binsr_d_ARG3)(
+; CHECK-DAG: ld.d [[R4:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[R5:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ld.d [[R6:\$w[0-9]+]], 0([[R3]])
+; CHECK-DAG: binsr.d [[R4]], [[R5]], [[R6]]
+; CHECK-DAG: st.d [[R4]], 0(
+; CHECK: .size llvm_mips_binsr_d_test
+
+@llvm_mips_bneg_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bneg_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_bneg_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bneg_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bneg_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_bneg_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.bneg.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_bneg_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.bneg.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_bneg_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: bneg.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_bneg_b_test
+;
+@llvm_mips_bneg_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bneg_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_bneg_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_bneg_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_bneg_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_bneg_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.bneg.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_bneg_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.bneg.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_bneg_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: bneg.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_bneg_h_test
+;
+@llvm_mips_bneg_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bneg_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_bneg_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_bneg_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_bneg_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_bneg_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.bneg.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_bneg_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.bneg.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_bneg_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: bneg.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_bneg_w_test
+;
+@llvm_mips_bneg_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bneg_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_bneg_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_bneg_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_bneg_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_bneg_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.bneg.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_bneg_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.bneg.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_bneg_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: bneg.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_bneg_d_test
+;
+@llvm_mips_bset_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bset_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_bset_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bset_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bset_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_bset_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.bset.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_bset_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.bset.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_bset_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: bset.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_bset_b_test
+;
+@llvm_mips_bset_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bset_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_bset_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_bset_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_bset_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_bset_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.bset.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_bset_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.bset.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_bset_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: bset.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_bset_h_test
+;
+@llvm_mips_bset_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bset_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_bset_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_bset_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_bset_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_bset_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.bset.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_bset_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.bset.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_bset_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: bset.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_bset_w_test
+;
+@llvm_mips_bset_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bset_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_bset_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_bset_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_bset_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_bset_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.bset.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_bset_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.bset.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_bset_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: bset.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_bset_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3r-c.ll b/test/CodeGen/Mips/msa/3r-c.ll
new file mode 100644
index 0000000..6ec92c2
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r-c.ll
@@ -0,0 +1,446 @@
+; Test the MSA intrinsics that are encoded with the 3R instruction format.
+; There are lots of these so this covers those beginning with 'c'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_ceq_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_ceq_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_ceq_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_ceq_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_ceq_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_ceq_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.ceq.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_ceq_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.ceq.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_ceq_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ceq.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_ceq_b_test
+;
+@llvm_mips_ceq_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_ceq_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_ceq_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_ceq_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_ceq_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_ceq_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.ceq.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_ceq_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.ceq.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_ceq_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ceq.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_ceq_h_test
+;
+@llvm_mips_ceq_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ceq_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_ceq_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ceq_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ceq_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_ceq_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.ceq.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_ceq_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ceq.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_ceq_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ceq.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_ceq_w_test
+;
+@llvm_mips_ceq_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_ceq_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_ceq_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ceq_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_ceq_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_ceq_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.ceq.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_ceq_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ceq.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_ceq_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: ceq.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_ceq_d_test
+;
+@llvm_mips_cle_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_cle_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_cle_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_cle_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_cle_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_cle_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.cle.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_cle_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.cle.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_cle_s_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: cle_s.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_cle_s_b_test
+;
+@llvm_mips_cle_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_cle_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_cle_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_cle_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_cle_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_cle_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.cle.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_cle_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.cle.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_cle_s_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: cle_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_cle_s_h_test
+;
+@llvm_mips_cle_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_cle_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_cle_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_cle_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_cle_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_cle_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.cle.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_cle_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.cle.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_cle_s_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: cle_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_cle_s_w_test
+;
+@llvm_mips_cle_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_cle_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_cle_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_cle_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_cle_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_cle_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.cle.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_cle_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.cle.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_cle_s_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: cle_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_cle_s_d_test
+;
+@llvm_mips_cle_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_cle_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_cle_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_cle_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_cle_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_cle_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.cle.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_cle_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.cle.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_cle_u_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: cle_u.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_cle_u_b_test
+;
+@llvm_mips_cle_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_cle_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_cle_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_cle_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_cle_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_cle_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.cle.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_cle_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.cle.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_cle_u_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: cle_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_cle_u_h_test
+;
+@llvm_mips_cle_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_cle_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_cle_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_cle_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_cle_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_cle_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.cle.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_cle_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.cle.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_cle_u_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: cle_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_cle_u_w_test
+;
+@llvm_mips_cle_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_cle_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_cle_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_cle_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_cle_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_cle_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.cle.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_cle_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.cle.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_cle_u_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: cle_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_cle_u_d_test
+;
+@llvm_mips_clt_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_clt_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_clt_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_clt_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_clt_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_clt_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.clt.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_clt_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.clt.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_clt_s_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: clt_s.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_clt_s_b_test
+;
+@llvm_mips_clt_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_clt_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_clt_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_clt_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_clt_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_clt_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.clt.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_clt_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.clt.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_clt_s_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: clt_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_clt_s_h_test
+;
+@llvm_mips_clt_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_clt_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_clt_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_clt_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_clt_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_clt_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.clt.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_clt_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.clt.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_clt_s_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: clt_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_clt_s_w_test
+;
+@llvm_mips_clt_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_clt_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_clt_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_clt_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_clt_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_clt_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.clt.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_clt_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.clt.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_clt_s_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: clt_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_clt_s_d_test
+;
+@llvm_mips_clt_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_clt_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_clt_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_clt_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_clt_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_clt_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.clt.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_clt_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.clt.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_clt_u_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: clt_u.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_clt_u_b_test
+;
+@llvm_mips_clt_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_clt_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_clt_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_clt_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_clt_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_clt_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.clt.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_clt_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.clt.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_clt_u_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: clt_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_clt_u_h_test
+;
+@llvm_mips_clt_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_clt_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_clt_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_clt_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_clt_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_clt_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.clt.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_clt_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.clt.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_clt_u_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: clt_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_clt_u_w_test
+;
+@llvm_mips_clt_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_clt_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_clt_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_clt_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_clt_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_clt_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.clt.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_clt_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.clt.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_clt_u_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: clt_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_clt_u_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3r-d.ll b/test/CodeGen/Mips/msa/3r-d.ll
new file mode 100644
index 0000000..0099554
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r-d.ll
@@ -0,0 +1,478 @@
+; Test the MSA intrinsics that are encoded with the 3R instruction format.
+; There are lots of these so this covers those beginning with 'd'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_div_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_div_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_div_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_div_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_div_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_div_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.div.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_div_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.div.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_div_s_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: div_s.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_div_s_b_test
+;
+@llvm_mips_div_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_div_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_div_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_div_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_div_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_div_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.div.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_div_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.div.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_div_s_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: div_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_div_s_h_test
+;
+@llvm_mips_div_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_div_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_div_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_div_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_div_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_div_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.div.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_div_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.div.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_div_s_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: div_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_div_s_w_test
+;
+@llvm_mips_div_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_div_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_div_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_div_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_div_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_div_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.div.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_div_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.div.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_div_s_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: div_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_div_s_d_test
+;
+
+define void @div_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_div_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_div_s_b_ARG2
+  %2 = sdiv <16 x i8> %0, %1
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_div_s_b_RES
+  ret void
+}
+
+; CHECK: div_s_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: div_s.b
+; CHECK: st.b
+; CHECK: .size div_s_b_test
+
+define void @div_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_div_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_div_s_h_ARG2
+  %2 = sdiv <8 x i16> %0, %1
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_div_s_h_RES
+  ret void
+}
+
+; CHECK: div_s_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: div_s.h
+; CHECK: st.h
+; CHECK: .size div_s_h_test
+
+define void @div_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_div_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_div_s_w_ARG2
+  %2 = sdiv <4 x i32> %0, %1
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_div_s_w_RES
+  ret void
+}
+
+; CHECK: div_s_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: div_s.w
+; CHECK: st.w
+; CHECK: .size div_s_w_test
+
+define void @div_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_div_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_div_s_d_ARG2
+  %2 = sdiv <2 x i64> %0, %1
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_div_s_d_RES
+  ret void
+}
+
+; CHECK: div_s_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: div_s.d
+; CHECK: st.d
+; CHECK: .size div_s_d_test
+;
+@llvm_mips_div_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_div_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_div_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_div_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_div_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_div_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.div.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_div_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.div.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_div_u_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: div_u.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_div_u_b_test
+;
+@llvm_mips_div_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_div_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_div_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_div_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_div_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_div_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.div.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_div_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.div.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_div_u_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: div_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_div_u_h_test
+;
+@llvm_mips_div_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_div_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_div_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_div_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_div_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_div_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.div.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_div_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.div.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_div_u_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: div_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_div_u_w_test
+;
+@llvm_mips_div_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_div_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_div_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_div_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_div_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_div_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.div.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_div_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.div.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_div_u_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: div_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_div_u_d_test
+;
+
+define void @div_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_div_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_div_u_b_ARG2
+  %2 = udiv <16 x i8> %0, %1
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_div_u_b_RES
+  ret void
+}
+
+; CHECK: div_u_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: div_u.b
+; CHECK: st.b
+; CHECK: .size div_u_b_test
+
+define void @div_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_div_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_div_u_h_ARG2
+  %2 = udiv <8 x i16> %0, %1
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_div_u_h_RES
+  ret void
+}
+
+; CHECK: div_u_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: div_u.h
+; CHECK: st.h
+; CHECK: .size div_u_h_test
+
+define void @div_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_div_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_div_u_w_ARG2
+  %2 = udiv <4 x i32> %0, %1
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_div_u_w_RES
+  ret void
+}
+
+; CHECK: div_u_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: div_u.w
+; CHECK: st.w
+; CHECK: .size div_u_w_test
+
+define void @div_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_div_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_div_u_d_ARG2
+  %2 = udiv <2 x i64> %0, %1
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_div_u_d_RES
+  ret void
+}
+
+; CHECK: div_u_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: div_u.d
+; CHECK: st.d
+; CHECK: .size div_u_d_test
+;
+@llvm_mips_dotp_s_h_ARG1 = global <16 x i8> <i8  0, i8  1, i8  2, i8  3,
+                                             i8  4, i8  5, i8  6, i8  7,
+                                             i8  8, i8  9, i8 10, i8 11,
+                                             i8 12, i8 13, i8 14, i8 15>,
+                                            align 16
+@llvm_mips_dotp_s_h_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19,
+                                             i8 20, i8 21, i8 22, i8 23,
+                                             i8 24, i8 25, i8 26, i8 27,
+                                             i8 28, i8 29, i8 30, i8 31>,
+                                            align 16
+@llvm_mips_dotp_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0,
+                                             i16 0, i16 0, i16 0, i16 0>,
+                                            align 16
+
+define void @llvm_mips_dotp_s_h_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_dotp_s_h_ARG1
+  %1 = load <16 x i8>* @llvm_mips_dotp_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.dotp.s.h(<16 x i8> %0, <16 x i8> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_dotp_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.dotp.s.h(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_dotp_s_h_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: dotp_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_dotp_s_h_test
+;
+@llvm_mips_dotp_s_w_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3,
+                                             i16 4, i16 5, i16 6, i16 7>,
+                                            align 16
+@llvm_mips_dotp_s_w_ARG2 = global <8 x i16> <i16  4, i16  5, i16  6, i16  7,
+                                             i16  8, i16  9, i16 10, i16 11>,
+                                            align 16
+@llvm_mips_dotp_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>,
+                                            align 16
+
+define void @llvm_mips_dotp_s_w_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_dotp_s_w_ARG1
+  %1 = load <8 x i16>* @llvm_mips_dotp_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.dotp.s.w(<8 x i16> %0, <8 x i16> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_dotp_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.dotp.s.w(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_dotp_s_w_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: dotp_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_dotp_s_w_test
+;
+@llvm_mips_dotp_s_d_ARG1 = global <4 x i32> <i32 0, i32 1, i32 0, i32 1>,
+                                            align 16
+@llvm_mips_dotp_s_d_ARG2 = global <4 x i32> <i32 2, i32 3, i32 2, i32 3>,
+                                            align 16
+@llvm_mips_dotp_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_dotp_s_d_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_dotp_s_d_ARG1
+  %1 = load <4 x i32>* @llvm_mips_dotp_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.dotp.s.d(<4 x i32> %0, <4 x i32> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_dotp_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.dotp.s.d(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_dotp_s_d_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: dotp_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_dotp_s_d_test
+;
+@llvm_mips_dotp_u_h_ARG1 = global <16 x i8> <i8  0, i8  1, i8  2, i8  3,
+                                             i8  4, i8  5, i8  6, i8  7,
+                                             i8  8, i8  9, i8 10, i8 11,
+                                             i8 12, i8 13, i8 14, i8 15>,
+                                            align 16
+@llvm_mips_dotp_u_h_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19,
+                                             i8 20, i8 21, i8 22, i8 23,
+                                             i8 24, i8 25, i8 26, i8 27,
+                                             i8 28, i8 29, i8 30, i8 31>,
+                                            align 16
+@llvm_mips_dotp_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0,
+                                             i16 0, i16 0, i16 0, i16 0>,
+                                            align 16
+
+define void @llvm_mips_dotp_u_h_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_dotp_u_h_ARG1
+  %1 = load <16 x i8>* @llvm_mips_dotp_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.dotp.u.h(<16 x i8> %0, <16 x i8> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_dotp_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.dotp.u.h(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_dotp_u_h_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: dotp_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_dotp_u_h_test
+;
+@llvm_mips_dotp_u_w_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3,
+                                             i16 4, i16 5, i16 6, i16 7>,
+                                            align 16
+@llvm_mips_dotp_u_w_ARG2 = global <8 x i16> <i16  4, i16  5, i16  6, i16  7,
+                                             i16  8, i16  9, i16 10, i16 11>,
+                                            align 16
+@llvm_mips_dotp_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>,
+                                            align 16
+
+define void @llvm_mips_dotp_u_w_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_dotp_u_w_ARG1
+  %1 = load <8 x i16>* @llvm_mips_dotp_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.dotp.u.w(<8 x i16> %0, <8 x i16> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_dotp_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.dotp.u.w(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_dotp_u_w_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: dotp_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_dotp_u_w_test
+;
+@llvm_mips_dotp_u_d_ARG1 = global <4 x i32> <i32 0, i32 1, i32 0, i32 1>,
+                                            align 16
+@llvm_mips_dotp_u_d_ARG2 = global <4 x i32> <i32 2, i32 3, i32 2, i32 3>,
+                                            align 16
+@llvm_mips_dotp_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_dotp_u_d_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_dotp_u_d_ARG1
+  %1 = load <4 x i32>* @llvm_mips_dotp_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.dotp.u.d(<4 x i32> %0, <4 x i32> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_dotp_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.dotp.u.d(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_dotp_u_d_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: dotp_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_dotp_u_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3r-i.ll b/test/CodeGen/Mips/msa/3r-i.ll
new file mode 100644
index 0000000..2ef3047
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r-i.ll
@@ -0,0 +1,358 @@
+; Test the MSA intrinsics that are encoded with the 3R instruction format.
+; There are lots of these so this covers those beginning with 'i'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_ilvev_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_ilvev_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_ilvev_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_ilvev_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_ilvev_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_ilvev_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.ilvev.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_ilvev_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.ilvev.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_ilvev_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ilvev.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_ilvev_b_test
+;
+@llvm_mips_ilvev_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_ilvev_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_ilvev_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_ilvev_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_ilvev_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_ilvev_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.ilvev.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_ilvev_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.ilvev.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_ilvev_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ilvev.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_ilvev_h_test
+;
+@llvm_mips_ilvev_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ilvev_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_ilvev_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ilvev_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ilvev_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_ilvev_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.ilvev.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_ilvev_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ilvev.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_ilvev_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ilvev.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_ilvev_w_test
+;
+@llvm_mips_ilvev_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_ilvev_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_ilvev_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ilvev_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_ilvev_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_ilvev_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.ilvev.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_ilvev_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ilvev.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_ilvev_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: ilvev.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_ilvev_d_test
+;
+@llvm_mips_ilvl_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_ilvl_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_ilvl_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_ilvl_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_ilvl_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_ilvl_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.ilvl.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_ilvl_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.ilvl.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_ilvl_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ilvl.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_ilvl_b_test
+;
+@llvm_mips_ilvl_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_ilvl_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_ilvl_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_ilvl_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_ilvl_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_ilvl_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.ilvl.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_ilvl_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.ilvl.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_ilvl_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ilvl.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_ilvl_h_test
+;
+@llvm_mips_ilvl_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ilvl_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_ilvl_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ilvl_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ilvl_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_ilvl_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.ilvl.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_ilvl_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ilvl.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_ilvl_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ilvl.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_ilvl_w_test
+;
+@llvm_mips_ilvl_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_ilvl_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_ilvl_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ilvl_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_ilvl_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_ilvl_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.ilvl.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_ilvl_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ilvl.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_ilvl_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: ilvl.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_ilvl_d_test
+;
+@llvm_mips_ilvod_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_ilvod_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_ilvod_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_ilvod_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_ilvod_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_ilvod_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.ilvod.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_ilvod_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.ilvod.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_ilvod_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ilvod.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_ilvod_b_test
+;
+@llvm_mips_ilvod_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_ilvod_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_ilvod_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_ilvod_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_ilvod_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_ilvod_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.ilvod.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_ilvod_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.ilvod.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_ilvod_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ilvod.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_ilvod_h_test
+;
+@llvm_mips_ilvod_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ilvod_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_ilvod_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ilvod_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ilvod_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_ilvod_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.ilvod.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_ilvod_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ilvod.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_ilvod_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ilvod.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_ilvod_w_test
+;
+@llvm_mips_ilvod_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_ilvod_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_ilvod_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ilvod_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_ilvod_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_ilvod_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.ilvod.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_ilvod_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ilvod.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_ilvod_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: ilvod.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_ilvod_d_test
+;
+@llvm_mips_ilvr_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_ilvr_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_ilvr_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_ilvr_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_ilvr_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_ilvr_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.ilvr.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_ilvr_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.ilvr.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_ilvr_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ilvr.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_ilvr_b_test
+;
+@llvm_mips_ilvr_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_ilvr_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_ilvr_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_ilvr_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_ilvr_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_ilvr_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.ilvr.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_ilvr_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.ilvr.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_ilvr_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ilvr.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_ilvr_h_test
+;
+@llvm_mips_ilvr_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ilvr_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_ilvr_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ilvr_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ilvr_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_ilvr_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.ilvr.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_ilvr_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ilvr.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_ilvr_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ilvr.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_ilvr_w_test
+;
+@llvm_mips_ilvr_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_ilvr_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_ilvr_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ilvr_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_ilvr_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_ilvr_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.ilvr.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_ilvr_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ilvr.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_ilvr_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: ilvr.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_ilvr_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3r-m.ll b/test/CodeGen/Mips/msa/3r-m.ll
new file mode 100644
index 0000000..ddfd720
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r-m.ll
@@ -0,0 +1,862 @@
+; Test the MSA intrinsics that are encoded with the 3R instruction format.
+; There are lots of these so this covers those beginning with 'm'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_max_a_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_max_a_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_max_a_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_max_a_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_max_a_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_max_a_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.max.a.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_max_a_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.max.a.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_max_a_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: max_a.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_max_a_b_test
+;
+@llvm_mips_max_a_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_max_a_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_max_a_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_max_a_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_max_a_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_max_a_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.max.a.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_max_a_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.max.a.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_max_a_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: max_a.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_max_a_h_test
+;
+@llvm_mips_max_a_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_max_a_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_max_a_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_max_a_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_max_a_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_max_a_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.max.a.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_max_a_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.max.a.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_max_a_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: max_a.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_max_a_w_test
+;
+@llvm_mips_max_a_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_max_a_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_max_a_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_max_a_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_max_a_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_max_a_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.max.a.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_max_a_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.max.a.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_max_a_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: max_a.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_max_a_d_test
+;
+@llvm_mips_max_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_max_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_max_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_max_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_max_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_max_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.max.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_max_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.max.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_max_s_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: max_s.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_max_s_b_test
+;
+@llvm_mips_max_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_max_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_max_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_max_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_max_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_max_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.max.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_max_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.max.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_max_s_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: max_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_max_s_h_test
+;
+@llvm_mips_max_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_max_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_max_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_max_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_max_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_max_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.max.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_max_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.max.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_max_s_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: max_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_max_s_w_test
+;
+@llvm_mips_max_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_max_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_max_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_max_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_max_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_max_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.max.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_max_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.max.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_max_s_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: max_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_max_s_d_test
+;
+@llvm_mips_max_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_max_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_max_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_max_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_max_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_max_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.max.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_max_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.max.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_max_u_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: max_u.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_max_u_b_test
+;
+@llvm_mips_max_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_max_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_max_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_max_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_max_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_max_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.max.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_max_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.max.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_max_u_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: max_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_max_u_h_test
+;
+@llvm_mips_max_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_max_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_max_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_max_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_max_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_max_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.max.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_max_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.max.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_max_u_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: max_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_max_u_w_test
+;
+@llvm_mips_max_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_max_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_max_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_max_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_max_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_max_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.max.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_max_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.max.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_max_u_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: max_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_max_u_d_test
+;
+@llvm_mips_min_a_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_min_a_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_min_a_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_min_a_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_min_a_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_min_a_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.min.a.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_min_a_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.min.a.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_min_a_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: min_a.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_min_a_b_test
+;
+@llvm_mips_min_a_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_min_a_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_min_a_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_min_a_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_min_a_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_min_a_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.min.a.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_min_a_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.min.a.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_min_a_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: min_a.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_min_a_h_test
+;
+@llvm_mips_min_a_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_min_a_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_min_a_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_min_a_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_min_a_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_min_a_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.min.a.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_min_a_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.min.a.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_min_a_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: min_a.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_min_a_w_test
+;
+@llvm_mips_min_a_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_min_a_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_min_a_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_min_a_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_min_a_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_min_a_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.min.a.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_min_a_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.min.a.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_min_a_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: min_a.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_min_a_d_test
+;
+@llvm_mips_min_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_min_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_min_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_min_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_min_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_min_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.min.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_min_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.min.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_min_s_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: min_s.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_min_s_b_test
+;
+@llvm_mips_min_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_min_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_min_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_min_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_min_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_min_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.min.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_min_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.min.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_min_s_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: min_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_min_s_h_test
+;
+@llvm_mips_min_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_min_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_min_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_min_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_min_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_min_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.min.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_min_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.min.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_min_s_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: min_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_min_s_w_test
+;
+@llvm_mips_min_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_min_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_min_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_min_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_min_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_min_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.min.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_min_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.min.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_min_s_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: min_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_min_s_d_test
+;
+@llvm_mips_min_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_min_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_min_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_min_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_min_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_min_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.min.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_min_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.min.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_min_u_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: min_u.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_min_u_b_test
+;
+@llvm_mips_min_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_min_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_min_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_min_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_min_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_min_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.min.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_min_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.min.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_min_u_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: min_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_min_u_h_test
+;
+@llvm_mips_min_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_min_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_min_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_min_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_min_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_min_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.min.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_min_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.min.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_min_u_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: min_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_min_u_w_test
+;
+@llvm_mips_min_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_min_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_min_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_min_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_min_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_min_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.min.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_min_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.min.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_min_u_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: min_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_min_u_d_test
+;
+@llvm_mips_mod_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_mod_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_mod_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_mod_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_mod_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_mod_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.mod.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_mod_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.mod.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_mod_s_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: mod_s.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_mod_s_b_test
+;
+@llvm_mips_mod_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_mod_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_mod_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_mod_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_mod_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_mod_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.mod.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_mod_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.mod.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_mod_s_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: mod_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_mod_s_h_test
+;
+@llvm_mips_mod_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_mod_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_mod_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_mod_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_mod_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_mod_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.mod.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_mod_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.mod.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_mod_s_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: mod_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_mod_s_w_test
+;
+@llvm_mips_mod_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_mod_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_mod_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_mod_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_mod_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_mod_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.mod.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_mod_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.mod.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_mod_s_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: mod_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_mod_s_d_test
+;
+@llvm_mips_mod_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_mod_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_mod_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_mod_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_mod_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_mod_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.mod.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_mod_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.mod.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_mod_u_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: mod_u.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_mod_u_b_test
+;
+@llvm_mips_mod_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_mod_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_mod_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_mod_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_mod_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_mod_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.mod.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_mod_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.mod.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_mod_u_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: mod_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_mod_u_h_test
+;
+@llvm_mips_mod_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_mod_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_mod_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_mod_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_mod_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_mod_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.mod.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_mod_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.mod.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_mod_u_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: mod_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_mod_u_w_test
+;
+@llvm_mips_mod_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_mod_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_mod_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_mod_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_mod_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_mod_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.mod.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_mod_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.mod.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_mod_u_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: mod_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_mod_u_d_test
+;
+@llvm_mips_mulv_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_mulv_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_mulv_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_mulv_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_mulv_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_mulv_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.mulv.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_mulv_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.mulv.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_mulv_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: mulv.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_mulv_b_test
+;
+@llvm_mips_mulv_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_mulv_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_mulv_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_mulv_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_mulv_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_mulv_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.mulv.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_mulv_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.mulv.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_mulv_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: mulv.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_mulv_h_test
+;
+@llvm_mips_mulv_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_mulv_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_mulv_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_mulv_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_mulv_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_mulv_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.mulv.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_mulv_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.mulv.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_mulv_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: mulv.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_mulv_w_test
+;
+@llvm_mips_mulv_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_mulv_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_mulv_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_mulv_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_mulv_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_mulv_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.mulv.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_mulv_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.mulv.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_mulv_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: mulv.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_mulv_d_test
+
+define void @mulv_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_mulv_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_mulv_b_ARG2
+  %2 = mul <16 x i8> %0, %1
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_mulv_b_RES
+  ret void
+}
+
+; CHECK: mulv_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: mulv.b
+; CHECK: st.b
+; CHECK: .size mulv_b_test
+
+define void @mulv_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_mulv_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_mulv_h_ARG2
+  %2 = mul <8 x i16> %0, %1
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_mulv_h_RES
+  ret void
+}
+
+; CHECK: mulv_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: mulv.h
+; CHECK: st.h
+; CHECK: .size mulv_h_test
+
+define void @mulv_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_mulv_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_mulv_w_ARG2
+  %2 = mul <4 x i32> %0, %1
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_mulv_w_RES
+  ret void
+}
+
+; CHECK: mulv_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: mulv.w
+; CHECK: st.w
+; CHECK: .size mulv_w_test
+
+define void @mulv_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_mulv_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_mulv_d_ARG2
+  %2 = mul <2 x i64> %0, %1
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_mulv_d_RES
+  ret void
+}
+
+; CHECK: mulv_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: mulv.d
+; CHECK: st.d
+; CHECK: .size mulv_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3r-p.ll b/test/CodeGen/Mips/msa/3r-p.ll
new file mode 100644
index 0000000..852023b
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r-p.ll
@@ -0,0 +1,182 @@
+; Test the MSA intrinsics that are encoded with the 3R instruction format.
+; There are lots of these so this covers those beginning with 'p'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_pckev_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_pckev_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_pckev_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_pckev_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_pckev_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_pckev_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.pckev.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_pckev_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.pckev.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_pckev_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: pckev.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_pckev_b_test
+;
+@llvm_mips_pckev_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_pckev_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_pckev_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_pckev_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_pckev_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_pckev_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.pckev.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_pckev_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.pckev.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_pckev_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: pckev.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_pckev_h_test
+;
+@llvm_mips_pckev_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_pckev_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_pckev_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_pckev_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_pckev_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_pckev_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.pckev.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_pckev_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.pckev.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_pckev_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: pckev.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_pckev_w_test
+;
+@llvm_mips_pckev_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_pckev_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_pckev_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_pckev_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_pckev_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_pckev_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.pckev.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_pckev_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.pckev.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_pckev_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: pckev.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_pckev_d_test
+;
+@llvm_mips_pckod_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_pckod_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_pckod_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_pckod_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_pckod_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_pckod_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.pckod.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_pckod_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.pckod.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_pckod_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: pckod.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_pckod_b_test
+;
+@llvm_mips_pckod_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_pckod_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_pckod_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_pckod_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_pckod_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_pckod_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.pckod.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_pckod_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.pckod.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_pckod_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: pckod.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_pckod_h_test
+;
+@llvm_mips_pckod_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_pckod_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_pckod_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_pckod_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_pckod_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_pckod_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.pckod.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_pckod_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.pckod.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_pckod_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: pckod.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_pckod_w_test
+;
+@llvm_mips_pckod_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_pckod_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_pckod_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_pckod_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_pckod_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_pckod_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.pckod.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_pckod_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.pckod.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_pckod_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: pckod.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_pckod_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3r-s.ll b/test/CodeGen/Mips/msa/3r-s.ll
new file mode 100644
index 0000000..30cf265
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r-s.ll
@@ -0,0 +1,1353 @@
+; Test the MSA intrinsics that are encoded with the 3R instruction format.
+; There are lots of these so this covers those beginning with 's'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_sld_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_sld_b_ARG2 = global i32 10, align 16
+@llvm_mips_sld_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_sld_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_sld_b_ARG1
+  %1 = load i32* @llvm_mips_sld_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.sld.b(<16 x i8> %0, i32 %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_sld_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.sld.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_sld_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sld_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sld_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R2]])
+; CHECK-DAG: sld.b [[WD:\$w[0-9]+]], [[WS]]{{\[}}[[RT]]{{\]}}
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size llvm_mips_sld_b_test
+;
+@llvm_mips_sld_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_sld_h_ARG2 = global i32 10, align 16
+@llvm_mips_sld_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_sld_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_sld_h_ARG1
+  %1 = load i32* @llvm_mips_sld_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.sld.h(<8 x i16> %0, i32 %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_sld_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.sld.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_sld_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sld_h_ARG1)
+; CHECK-DAG: lw [[RT:\$[0-9]+]], %got(llvm_mips_sld_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R2]])
+; CHECK-DAG: sld.h [[WD:\$w[0-9]+]], [[WS]]{{\[}}[[RT]]{{\]}}
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size llvm_mips_sld_h_test
+;
+@llvm_mips_sld_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_sld_w_ARG2 = global i32 10, align 16
+@llvm_mips_sld_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_sld_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_sld_w_ARG1
+  %1 = load i32* @llvm_mips_sld_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.sld.w(<4 x i32> %0, i32 %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_sld_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.sld.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_sld_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sld_w_ARG1)
+; CHECK-DAG: lw [[RT:\$[0-9]+]], %got(llvm_mips_sld_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R2]])
+; CHECK-DAG: sld.w [[WD:\$w[0-9]+]], [[WS]]{{\[}}[[RT]]{{\]}}
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size llvm_mips_sld_w_test
+;
+@llvm_mips_sld_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_sld_d_ARG2 = global i32 10, align 16
+@llvm_mips_sld_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_sld_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_sld_d_ARG1
+  %1 = load i32* @llvm_mips_sld_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.sld.d(<2 x i64> %0, i32 %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_sld_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.sld.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_sld_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sld_d_ARG1)
+; CHECK-DAG: lw [[RT:\$[0-9]+]], %got(llvm_mips_sld_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R2]])
+; CHECK-DAG: sld.d [[WD:\$w[0-9]+]], [[WS]]{{\[}}[[RT]]{{\]}}
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size llvm_mips_sld_d_test
+;
+@llvm_mips_sll_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_sll_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_sll_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_sll_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_sll_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_sll_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.sll.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_sll_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.sll.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_sll_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sll_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sll_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sll.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size llvm_mips_sll_b_test
+;
+@llvm_mips_sll_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_sll_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_sll_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_sll_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_sll_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_sll_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.sll.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_sll_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.sll.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_sll_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sll_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sll_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sll.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size llvm_mips_sll_h_test
+;
+@llvm_mips_sll_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_sll_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_sll_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_sll_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_sll_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_sll_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.sll.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_sll_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.sll.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_sll_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sll_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sll_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sll.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size llvm_mips_sll_w_test
+;
+@llvm_mips_sll_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_sll_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_sll_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_sll_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_sll_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_sll_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.sll.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_sll_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.sll.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_sll_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sll_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sll_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sll.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size llvm_mips_sll_d_test
+
+define void @sll_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_sll_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_sll_b_ARG2
+  %2 = shl <16 x i8> %0, %1
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_sll_b_RES
+  ret void
+}
+
+; CHECK: sll_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sll_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sll_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sll.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size sll_b_test
+
+define void @sll_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_sll_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_sll_h_ARG2
+  %2 = shl <8 x i16> %0, %1
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_sll_h_RES
+  ret void
+}
+
+; CHECK: sll_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sll_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sll_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sll.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size sll_h_test
+
+define void @sll_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_sll_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_sll_w_ARG2
+  %2 = shl <4 x i32> %0, %1
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_sll_w_RES
+  ret void
+}
+
+; CHECK: sll_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sll_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sll_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sll.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size sll_w_test
+
+define void @sll_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_sll_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_sll_d_ARG2
+  %2 = shl <2 x i64> %0, %1
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_sll_d_RES
+  ret void
+}
+
+; CHECK: sll_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sll_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sll_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sll.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size sll_d_test
+;
+@llvm_mips_sra_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_sra_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_sra_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_sra_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_sra_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_sra_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.sra.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_sra_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.sra.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_sra_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sra_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sra_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sra.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size llvm_mips_sra_b_test
+;
+@llvm_mips_sra_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_sra_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_sra_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_sra_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_sra_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_sra_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.sra.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_sra_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.sra.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_sra_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sra_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sra_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sra.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size llvm_mips_sra_h_test
+;
+@llvm_mips_sra_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_sra_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_sra_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_sra_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_sra_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_sra_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.sra.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_sra_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.sra.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_sra_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sra_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sra_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sra.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size llvm_mips_sra_w_test
+;
+@llvm_mips_sra_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_sra_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_sra_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_sra_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_sra_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_sra_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.sra.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_sra_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.sra.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_sra_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sra_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sra_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sra.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size llvm_mips_sra_d_test
+;
+
+define void @sra_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_sra_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_sra_b_ARG2
+  %2 = ashr <16 x i8> %0, %1
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_sra_b_RES
+  ret void
+}
+
+; CHECK: sra_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sra_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sra_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sra.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size sra_b_test
+
+define void @sra_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_sra_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_sra_h_ARG2
+  %2 = ashr <8 x i16> %0, %1
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_sra_h_RES
+  ret void
+}
+
+; CHECK: sra_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sra_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sra_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sra.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size sra_h_test
+
+define void @sra_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_sra_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_sra_w_ARG2
+  %2 = ashr <4 x i32> %0, %1
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_sra_w_RES
+  ret void
+}
+
+; CHECK: sra_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sra_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sra_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sra.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size sra_w_test
+
+define void @sra_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_sra_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_sra_d_ARG2
+  %2 = ashr <2 x i64> %0, %1
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_sra_d_RES
+  ret void
+}
+
+; CHECK: sra_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sra_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sra_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sra.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size sra_d_test
+
+@llvm_mips_srar_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_srar_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_srar_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_srar_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_srar_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_srar_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.srar.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_srar_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.srar.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_srar_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srar_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srar_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srar.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size llvm_mips_srar_b_test
+;
+@llvm_mips_srar_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_srar_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_srar_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_srar_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_srar_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_srar_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.srar.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_srar_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.srar.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_srar_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srar_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srar_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srar.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size llvm_mips_srar_h_test
+;
+@llvm_mips_srar_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_srar_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_srar_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_srar_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_srar_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_srar_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.srar.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_srar_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.srar.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_srar_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srar_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srar_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srar.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size llvm_mips_srar_w_test
+;
+@llvm_mips_srar_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_srar_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_srar_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_srar_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_srar_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_srar_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.srar.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_srar_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.srar.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_srar_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srar_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srar_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srar.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size llvm_mips_srar_d_test
+;
+@llvm_mips_srl_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_srl_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_srl_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_srl_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_srl_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_srl_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.srl.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_srl_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.srl.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_srl_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srl_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srl_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srl.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size llvm_mips_srl_b_test
+;
+@llvm_mips_srl_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_srl_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_srl_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_srl_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_srl_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_srl_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.srl.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_srl_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.srl.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_srl_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srl_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srl_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srl.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size llvm_mips_srl_h_test
+;
+@llvm_mips_srl_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_srl_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_srl_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_srl_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_srl_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_srl_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.srl.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_srl_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.srl.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_srl_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srl_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srl_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srl.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size llvm_mips_srl_w_test
+;
+@llvm_mips_srl_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_srl_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_srl_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_srl_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_srl_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_srl_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.srl.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_srl_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.srl.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_srl_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srl_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srl_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srl.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size llvm_mips_srl_d_test
+;
+@llvm_mips_srlr_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_srlr_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_srlr_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_srlr_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_srlr_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_srlr_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.srlr.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_srlr_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.srlr.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_srlr_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srlr_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srlr_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srlr.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size llvm_mips_srlr_b_test
+;
+@llvm_mips_srlr_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_srlr_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_srlr_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_srlr_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_srlr_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_srlr_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.srlr.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_srlr_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.srlr.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_srlr_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srlr_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srlr_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srlr.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size llvm_mips_srlr_h_test
+;
+@llvm_mips_srlr_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_srlr_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_srlr_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_srlr_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_srlr_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_srlr_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.srlr.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_srlr_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.srlr.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_srlr_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srlr_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srlr_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srlr.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size llvm_mips_srlr_w_test
+;
+@llvm_mips_srlr_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_srlr_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_srlr_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_srlr_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_srlr_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_srlr_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.srlr.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_srlr_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.srlr.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_srlr_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srlr_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srlr_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srlr.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size llvm_mips_srlr_d_test
+;
+
+define void @srl_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_srl_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_srl_b_ARG2
+  %2 = lshr <16 x i8> %0, %1
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_srl_b_RES
+  ret void
+}
+
+; CHECK: srl_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srl_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srl_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srl.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size srl_b_test
+
+define void @srl_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_srl_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_srl_h_ARG2
+  %2 = lshr <8 x i16> %0, %1
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_srl_h_RES
+  ret void
+}
+
+; CHECK: srl_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srl_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srl_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srl.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size srl_h_test
+
+define void @srl_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_srl_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_srl_w_ARG2
+  %2 = lshr <4 x i32> %0, %1
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_srl_w_RES
+  ret void
+}
+
+; CHECK: srl_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srl_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srl_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srl.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size srl_w_test
+
+define void @srl_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_srl_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_srl_d_ARG2
+  %2 = lshr <2 x i64> %0, %1
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_srl_d_RES
+  ret void
+}
+
+; CHECK: srl_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srl_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srl_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srl.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size srl_d_test
+
+@llvm_mips_subs_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_subs_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_subs_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_subs_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_subs_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_subs_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.subs.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_subs_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.subs.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_subs_s_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subs_s_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subs_s_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subs_s.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size llvm_mips_subs_s_b_test
+;
+@llvm_mips_subs_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_subs_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_subs_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_subs_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_subs_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_subs_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.subs.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_subs_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.subs.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_subs_s_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subs_s_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subs_s_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subs_s.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size llvm_mips_subs_s_h_test
+;
+@llvm_mips_subs_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_subs_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_subs_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_subs_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_subs_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_subs_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.subs.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_subs_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.subs.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_subs_s_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subs_s_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subs_s_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subs_s.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size llvm_mips_subs_s_w_test
+;
+@llvm_mips_subs_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_subs_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_subs_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_subs_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_subs_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_subs_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.subs.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_subs_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.subs.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_subs_s_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subs_s_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subs_s_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subs_s.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size llvm_mips_subs_s_d_test
+;
+@llvm_mips_subs_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_subs_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_subs_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_subs_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_subs_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_subs_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.subs.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_subs_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.subs.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_subs_u_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subs_u_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subs_u_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subs_u.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size llvm_mips_subs_u_b_test
+;
+@llvm_mips_subs_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_subs_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_subs_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_subs_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_subs_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_subs_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.subs.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_subs_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.subs.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_subs_u_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subs_u_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subs_u_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subs_u.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size llvm_mips_subs_u_h_test
+;
+@llvm_mips_subs_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_subs_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_subs_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_subs_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_subs_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_subs_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.subs.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_subs_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.subs.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_subs_u_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subs_u_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subs_u_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subs_u.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size llvm_mips_subs_u_w_test
+;
+@llvm_mips_subs_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_subs_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_subs_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_subs_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_subs_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_subs_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.subs.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_subs_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.subs.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_subs_u_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subs_u_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subs_u_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subs_u.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size llvm_mips_subs_u_d_test
+;
+@llvm_mips_subsus_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_subsus_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_subsus_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_subsus_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_subsus_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_subsus_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.subsus.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_subsus_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.subsus.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_subsus_u_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subsus_u_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subsus_u_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subsus_u.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size llvm_mips_subsus_u_b_test
+;
+@llvm_mips_subsus_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_subsus_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_subsus_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_subsus_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_subsus_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_subsus_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.subsus.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_subsus_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.subsus.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_subsus_u_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subsus_u_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subsus_u_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subsus_u.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size llvm_mips_subsus_u_h_test
+;
+@llvm_mips_subsus_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_subsus_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_subsus_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_subsus_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_subsus_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_subsus_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.subsus.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_subsus_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.subsus.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_subsus_u_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subsus_u_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subsus_u_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subsus_u.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size llvm_mips_subsus_u_w_test
+;
+@llvm_mips_subsus_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_subsus_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_subsus_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_subsus_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_subsus_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_subsus_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.subsus.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_subsus_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.subsus.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_subsus_u_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subsus_u_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subsus_u_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subsus_u.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size llvm_mips_subsus_u_d_test
+;
+@llvm_mips_subsuu_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_subsuu_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_subsuu_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_subsuu_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_subsuu_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_subsuu_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.subsuu.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_subsuu_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.subsuu.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_subsuu_s_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subsuu_s_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subsuu_s_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subsuu_s.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size llvm_mips_subsuu_s_b_test
+;
+@llvm_mips_subsuu_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_subsuu_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_subsuu_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_subsuu_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_subsuu_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_subsuu_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.subsuu.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_subsuu_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.subsuu.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_subsuu_s_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subsuu_s_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subsuu_s_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subsuu_s.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size llvm_mips_subsuu_s_h_test
+;
+@llvm_mips_subsuu_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_subsuu_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_subsuu_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_subsuu_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_subsuu_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_subsuu_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.subsuu.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_subsuu_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.subsuu.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_subsuu_s_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subsuu_s_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subsuu_s_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subsuu_s.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size llvm_mips_subsuu_s_w_test
+;
+@llvm_mips_subsuu_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_subsuu_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_subsuu_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_subsuu_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_subsuu_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_subsuu_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.subsuu.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_subsuu_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.subsuu.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_subsuu_s_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subsuu_s_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subsuu_s_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subsuu_s.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size llvm_mips_subsuu_s_d_test
+;
+@llvm_mips_subv_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_subv_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_subv_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_subv_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_subv_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_subv_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.subv.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_subv_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.subv.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_subv_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subv_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subv_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subv.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size llvm_mips_subv_b_test
+;
+@llvm_mips_subv_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_subv_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_subv_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_subv_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_subv_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_subv_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.subv.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_subv_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.subv.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_subv_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subv_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subv_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subv.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size llvm_mips_subv_h_test
+;
+@llvm_mips_subv_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_subv_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_subv_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_subv_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_subv_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_subv_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.subv.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_subv_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.subv.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_subv_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subv_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subv_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subv.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size llvm_mips_subv_w_test
+;
+@llvm_mips_subv_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_subv_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_subv_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_subv_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_subv_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_subv_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.subv.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_subv_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.subv.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_subv_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subv_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subv_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subv.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size llvm_mips_subv_d_test
+;
+
+define void @subv_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_subv_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_subv_b_ARG2
+  %2 = sub <16 x i8> %0, %1
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_subv_b_RES
+  ret void
+}
+
+; CHECK: subv_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subv_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subv_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subv.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size subv_b_test
+
+define void @subv_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_subv_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_subv_h_ARG2
+  %2 = sub <8 x i16> %0, %1
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_subv_h_RES
+  ret void
+}
+
+; CHECK: subv_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subv_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subv_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subv.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size subv_h_test
+
+define void @subv_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_subv_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_subv_w_ARG2
+  %2 = sub <4 x i32> %0, %1
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_subv_w_RES
+  ret void
+}
+
+; CHECK: subv_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subv_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subv_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subv.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size subv_w_test
+
+define void @subv_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_subv_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_subv_d_ARG2
+  %2 = sub <2 x i64> %0, %1
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_subv_d_RES
+  ret void
+}
+
+; CHECK: subv_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subv_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subv_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subv.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size subv_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3r-v.ll b/test/CodeGen/Mips/msa/3r-v.ll
new file mode 100644
index 0000000..c9693f9
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r-v.ll
@@ -0,0 +1,105 @@
+; Test the MSA intrinsics that are encoded with the 3R instruction format.
+; There are lots of these so this covers those beginning with 'v'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_vshf_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_vshf_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_vshf_b_ARG3 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_vshf_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_vshf_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_vshf_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_vshf_b_ARG2
+  %2 = load <16 x i8>* @llvm_mips_vshf_b_ARG3
+  %3 = tail call <16 x i8> @llvm.mips.vshf.b(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
+  store <16 x i8> %3, <16 x i8>* @llvm_mips_vshf_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.vshf.b(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_vshf_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: vshf.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_vshf_b_test
+;
+@llvm_mips_vshf_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_vshf_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_vshf_h_ARG3 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_vshf_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_vshf_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_vshf_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_vshf_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_vshf_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.vshf.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_vshf_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.vshf.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_vshf_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: vshf.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_vshf_h_test
+;
+@llvm_mips_vshf_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_vshf_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_vshf_w_ARG3 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_vshf_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_vshf_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_vshf_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_vshf_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_vshf_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.vshf.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_vshf_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.vshf.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_vshf_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: vshf.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_vshf_w_test
+;
+@llvm_mips_vshf_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_vshf_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_vshf_d_ARG3 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_vshf_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_vshf_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_vshf_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_vshf_d_ARG2
+  %2 = load <2 x i64>* @llvm_mips_vshf_d_ARG3
+  %3 = tail call <2 x i64> @llvm.mips.vshf.d(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
+  store <2 x i64> %3, <2 x i64>* @llvm_mips_vshf_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.vshf.d(<2 x i64>, <2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_vshf_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: vshf.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_vshf_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3r_4r.ll b/test/CodeGen/Mips/msa/3r_4r.ll
new file mode 100644
index 0000000..b7fd728
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r_4r.ll
@@ -0,0 +1,206 @@
+; Test the MSA intrinsics that are encoded with the 3R instruction format and
+; use the result as a third operand.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_maddv_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_maddv_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_maddv_b_ARG3 = global <16 x i8> <i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47>, align 16
+@llvm_mips_maddv_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_maddv_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_maddv_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_maddv_b_ARG2
+  %2 = load <16 x i8>* @llvm_mips_maddv_b_ARG3
+  %3 = tail call <16 x i8> @llvm.mips.maddv.b(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
+  store <16 x i8> %3, <16 x i8>* @llvm_mips_maddv_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.maddv.b(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_maddv_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: maddv.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_maddv_b_test
+;
+@llvm_mips_maddv_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_maddv_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_maddv_h_ARG3 = global <8 x i16> <i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23>, align 16
+@llvm_mips_maddv_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_maddv_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_maddv_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_maddv_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_maddv_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.maddv.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_maddv_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.maddv.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_maddv_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: maddv.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_maddv_h_test
+;
+@llvm_mips_maddv_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_maddv_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_maddv_w_ARG3 = global <4 x i32> <i32 8, i32 9, i32 10, i32 11>, align 16
+@llvm_mips_maddv_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_maddv_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_maddv_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_maddv_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_maddv_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.maddv.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_maddv_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.maddv.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_maddv_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: maddv.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_maddv_w_test
+;
+@llvm_mips_maddv_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_maddv_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_maddv_d_ARG3 = global <2 x i64> <i64 4, i64 5>, align 16
+@llvm_mips_maddv_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_maddv_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_maddv_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_maddv_d_ARG2
+  %2 = load <2 x i64>* @llvm_mips_maddv_d_ARG3
+  %3 = tail call <2 x i64> @llvm.mips.maddv.d(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
+  store <2 x i64> %3, <2 x i64>* @llvm_mips_maddv_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.maddv.d(<2 x i64>, <2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_maddv_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: maddv.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_maddv_d_test
+;
+@llvm_mips_msubv_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_msubv_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_msubv_b_ARG3 = global <16 x i8> <i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47>, align 16
+@llvm_mips_msubv_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_msubv_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_msubv_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_msubv_b_ARG2
+  %2 = load <16 x i8>* @llvm_mips_msubv_b_ARG3
+  %3 = tail call <16 x i8> @llvm.mips.msubv.b(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
+  store <16 x i8> %3, <16 x i8>* @llvm_mips_msubv_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.msubv.b(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_msubv_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: msubv.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_msubv_b_test
+;
+@llvm_mips_msubv_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_msubv_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_msubv_h_ARG3 = global <8 x i16> <i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23>, align 16
+@llvm_mips_msubv_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_msubv_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_msubv_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_msubv_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_msubv_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.msubv.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_msubv_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.msubv.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_msubv_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: msubv.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_msubv_h_test
+;
+@llvm_mips_msubv_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_msubv_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_msubv_w_ARG3 = global <4 x i32> <i32 8, i32 9, i32 10, i32 11>, align 16
+@llvm_mips_msubv_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_msubv_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_msubv_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_msubv_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_msubv_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.msubv.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_msubv_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.msubv.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_msubv_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: msubv.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_msubv_w_test
+;
+@llvm_mips_msubv_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_msubv_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_msubv_d_ARG3 = global <2 x i64> <i64 4, i64 5>, align 16
+@llvm_mips_msubv_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_msubv_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_msubv_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_msubv_d_ARG2
+  %2 = load <2 x i64>* @llvm_mips_msubv_d_ARG3
+  %3 = tail call <2 x i64> @llvm.mips.msubv.d(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
+  store <2 x i64> %3, <2 x i64>* @llvm_mips_msubv_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.msubv.d(<2 x i64>, <2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_msubv_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: msubv.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_msubv_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3r_4r_widen.ll b/test/CodeGen/Mips/msa/3r_4r_widen.ll
new file mode 100644
index 0000000..7063e45
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r_4r_widen.ll
@@ -0,0 +1,307 @@
+; Test the MSA intrinsics that are encoded with the 3R instruction format and
+; use the result as a third operand and results in wider elements than the
+; operands had.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_dpadd_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_dpadd_s_h_ARG2 = global <16 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23>, align 16
+@llvm_mips_dpadd_s_h_ARG3 = global <16 x i8> <i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39>, align 16
+@llvm_mips_dpadd_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_dpadd_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_dpadd_s_h_ARG1
+  %1 = load <16 x i8>* @llvm_mips_dpadd_s_h_ARG2
+  %2 = load <16 x i8>* @llvm_mips_dpadd_s_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.dpadd.s.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_dpadd_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.dpadd.s.h(<8 x i16>, <16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_dpadd_s_h_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ld.h
+; CHECK: dpadd_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_dpadd_s_h_test
+;
+@llvm_mips_dpadd_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_dpadd_s_w_ARG2 = global <8 x i16> <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>, align 16
+@llvm_mips_dpadd_s_w_ARG3 = global <8 x i16> <i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>, align 16
+@llvm_mips_dpadd_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_dpadd_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_dpadd_s_w_ARG1
+  %1 = load <8 x i16>* @llvm_mips_dpadd_s_w_ARG2
+  %2 = load <8 x i16>* @llvm_mips_dpadd_s_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.dpadd.s.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_dpadd_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.dpadd.s.w(<4 x i32>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_dpadd_s_w_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ld.w
+; CHECK: dpadd_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_dpadd_s_w_test
+;
+@llvm_mips_dpadd_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_dpadd_s_d_ARG2 = global <4 x i32> <i32 2, i32 3, i32 4, i32 5>, align 16
+@llvm_mips_dpadd_s_d_ARG3 = global <4 x i32> <i32 6, i32 7, i32 8, i32 9>, align 16
+@llvm_mips_dpadd_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_dpadd_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_dpadd_s_d_ARG1
+  %1 = load <4 x i32>* @llvm_mips_dpadd_s_d_ARG2
+  %2 = load <4 x i32>* @llvm_mips_dpadd_s_d_ARG3
+  %3 = tail call <2 x i64> @llvm.mips.dpadd.s.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2)
+  store <2 x i64> %3, <2 x i64>* @llvm_mips_dpadd_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.dpadd.s.d(<2 x i64>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_dpadd_s_d_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.d
+; CHECK: dpadd_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_dpadd_s_d_test
+;
+@llvm_mips_dpadd_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_dpadd_u_h_ARG2 = global <16 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23>, align 16
+@llvm_mips_dpadd_u_h_ARG3 = global <16 x i8> <i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39>, align 16
+@llvm_mips_dpadd_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_dpadd_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_dpadd_u_h_ARG1
+  %1 = load <16 x i8>* @llvm_mips_dpadd_u_h_ARG2
+  %2 = load <16 x i8>* @llvm_mips_dpadd_u_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.dpadd.u.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_dpadd_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.dpadd.u.h(<8 x i16>, <16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_dpadd_u_h_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ld.h
+; CHECK: dpadd_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_dpadd_u_h_test
+;
+@llvm_mips_dpadd_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_dpadd_u_w_ARG2 = global <8 x i16> <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>, align 16
+@llvm_mips_dpadd_u_w_ARG3 = global <8 x i16> <i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>, align 16
+@llvm_mips_dpadd_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_dpadd_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_dpadd_u_w_ARG1
+  %1 = load <8 x i16>* @llvm_mips_dpadd_u_w_ARG2
+  %2 = load <8 x i16>* @llvm_mips_dpadd_u_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.dpadd.u.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_dpadd_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.dpadd.u.w(<4 x i32>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_dpadd_u_w_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ld.w
+; CHECK: dpadd_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_dpadd_u_w_test
+;
+@llvm_mips_dpadd_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_dpadd_u_d_ARG2 = global <4 x i32> <i32 2, i32 3, i32 4, i32 5>, align 16
+@llvm_mips_dpadd_u_d_ARG3 = global <4 x i32> <i32 6, i32 7, i32 8, i32 9>, align 16
+@llvm_mips_dpadd_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_dpadd_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_dpadd_u_d_ARG1
+  %1 = load <4 x i32>* @llvm_mips_dpadd_u_d_ARG2
+  %2 = load <4 x i32>* @llvm_mips_dpadd_u_d_ARG3
+  %3 = tail call <2 x i64> @llvm.mips.dpadd.u.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2)
+  store <2 x i64> %3, <2 x i64>* @llvm_mips_dpadd_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.dpadd.u.d(<2 x i64>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_dpadd_u_d_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.d
+; CHECK: dpadd_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_dpadd_u_d_test
+;
+@llvm_mips_dpsub_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_dpsub_s_h_ARG2 = global <16 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23>, align 16
+@llvm_mips_dpsub_s_h_ARG3 = global <16 x i8> <i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39>, align 16
+@llvm_mips_dpsub_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_dpsub_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_dpsub_s_h_ARG1
+  %1 = load <16 x i8>* @llvm_mips_dpsub_s_h_ARG2
+  %2 = load <16 x i8>* @llvm_mips_dpsub_s_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.dpsub.s.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_dpsub_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.dpsub.s.h(<8 x i16>, <16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_dpsub_s_h_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ld.h
+; CHECK: dpsub_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_dpsub_s_h_test
+;
+@llvm_mips_dpsub_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_dpsub_s_w_ARG2 = global <8 x i16> <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>, align 16
+@llvm_mips_dpsub_s_w_ARG3 = global <8 x i16> <i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>, align 16
+@llvm_mips_dpsub_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_dpsub_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_dpsub_s_w_ARG1
+  %1 = load <8 x i16>* @llvm_mips_dpsub_s_w_ARG2
+  %2 = load <8 x i16>* @llvm_mips_dpsub_s_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.dpsub.s.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_dpsub_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.dpsub.s.w(<4 x i32>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_dpsub_s_w_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ld.w
+; CHECK: dpsub_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_dpsub_s_w_test
+;
+@llvm_mips_dpsub_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_dpsub_s_d_ARG2 = global <4 x i32> <i32 2, i32 3, i32 4, i32 5>, align 16
+@llvm_mips_dpsub_s_d_ARG3 = global <4 x i32> <i32 6, i32 7, i32 8, i32 9>, align 16
+@llvm_mips_dpsub_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_dpsub_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_dpsub_s_d_ARG1
+  %1 = load <4 x i32>* @llvm_mips_dpsub_s_d_ARG2
+  %2 = load <4 x i32>* @llvm_mips_dpsub_s_d_ARG3
+  %3 = tail call <2 x i64> @llvm.mips.dpsub.s.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2)
+  store <2 x i64> %3, <2 x i64>* @llvm_mips_dpsub_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.dpsub.s.d(<2 x i64>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_dpsub_s_d_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.d
+; CHECK: dpsub_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_dpsub_s_d_test
+;
+@llvm_mips_dpsub_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_dpsub_u_h_ARG2 = global <16 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23>, align 16
+@llvm_mips_dpsub_u_h_ARG3 = global <16 x i8> <i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39>, align 16
+@llvm_mips_dpsub_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_dpsub_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_dpsub_u_h_ARG1
+  %1 = load <16 x i8>* @llvm_mips_dpsub_u_h_ARG2
+  %2 = load <16 x i8>* @llvm_mips_dpsub_u_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.dpsub.u.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_dpsub_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.dpsub.u.h(<8 x i16>, <16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_dpsub_u_h_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ld.h
+; CHECK: dpsub_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_dpsub_u_h_test
+;
+@llvm_mips_dpsub_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_dpsub_u_w_ARG2 = global <8 x i16> <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>, align 16
+@llvm_mips_dpsub_u_w_ARG3 = global <8 x i16> <i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>, align 16
+@llvm_mips_dpsub_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_dpsub_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_dpsub_u_w_ARG1
+  %1 = load <8 x i16>* @llvm_mips_dpsub_u_w_ARG2
+  %2 = load <8 x i16>* @llvm_mips_dpsub_u_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.dpsub.u.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_dpsub_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.dpsub.u.w(<4 x i32>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_dpsub_u_w_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ld.w
+; CHECK: dpsub_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_dpsub_u_w_test
+;
+@llvm_mips_dpsub_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_dpsub_u_d_ARG2 = global <4 x i32> <i32 2, i32 3, i32 4, i32 5>, align 16
+@llvm_mips_dpsub_u_d_ARG3 = global <4 x i32> <i32 6, i32 7, i32 8, i32 9>, align 16
+@llvm_mips_dpsub_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_dpsub_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_dpsub_u_d_ARG1
+  %1 = load <4 x i32>* @llvm_mips_dpsub_u_d_ARG2
+  %2 = load <4 x i32>* @llvm_mips_dpsub_u_d_ARG3
+  %3 = tail call <2 x i64> @llvm.mips.dpsub.u.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2)
+  store <2 x i64> %3, <2 x i64>* @llvm_mips_dpsub_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.dpsub.u.d(<2 x i64>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_dpsub_u_d_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.d
+; CHECK: dpsub_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_dpsub_u_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3r_splat.ll b/test/CodeGen/Mips/msa/3r_splat.ll
new file mode 100644
index 0000000..6b0cb26
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r_splat.ll
@@ -0,0 +1,94 @@
+; Test the MSA splat intrinsics that are encoded with the 3R instruction
+; format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | \
+; RUN:     FileCheck -check-prefix=MIPS32 %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | \
+; RUN:     FileCheck -check-prefix=MIPS32 %s
+
+@llvm_mips_splat_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_splat_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_splat_b_test(i32 %a) nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_splat_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.splat.b(<16 x i8> %0, i32 %a)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_splat_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.splat.b(<16 x i8>, i32) nounwind
+
+; MIPS32: llvm_mips_splat_b_test:
+; MIPS32-DAG: lw   [[R1:\$[0-9]+]], %got(llvm_mips_splat_b_ARG1)(
+; MIPS32-DAG: lw   [[R2:\$[0-9]+]], %got(llvm_mips_splat_b_RES)(
+; MIPS32-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]])
+; MIPS32-DAG: splat.b [[R4:\$w[0-9]+]], [[R3]][$4]
+; MIPS32-DAG: st.b [[R4]], 0([[R2]])
+; MIPS32: .size llvm_mips_splat_b_test
+
+@llvm_mips_splat_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_splat_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_splat_h_test(i32 %a) nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_splat_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.splat.h(<8 x i16> %0, i32 %a)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_splat_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.splat.h(<8 x i16>, i32) nounwind
+
+; MIPS32: llvm_mips_splat_h_test:
+; MIPS32-DAG: lw   [[R1:\$[0-9]+]], %got(llvm_mips_splat_h_ARG1)(
+; MIPS32-DAG: lw   [[R2:\$[0-9]+]], %got(llvm_mips_splat_h_RES)(
+; MIPS32-DAG: ld.h [[R3:\$w[0-9]+]], 0([[R1]])
+; MIPS32-DAG: splat.h [[R4:\$w[0-9]+]], [[R3]][$4]
+; MIPS32-DAG: st.h [[R4]], 0([[R2]])
+; MIPS32: .size llvm_mips_splat_h_test
+
+@llvm_mips_splat_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_splat_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_splat_w_test(i32 %a) nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_splat_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.splat.w(<4 x i32> %0, i32 %a)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_splat_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.splat.w(<4 x i32>, i32) nounwind
+
+; MIPS32: llvm_mips_splat_w_test:
+; MIPS32-DAG: lw   [[R1:\$[0-9]+]], %got(llvm_mips_splat_w_ARG1)(
+; MIPS32-DAG: lw   [[R2:\$[0-9]+]], %got(llvm_mips_splat_w_RES)(
+; MIPS32-DAG: ld.w [[R3:\$w[0-9]+]], 0([[R1]])
+; MIPS32-DAG: splat.w [[R4:\$w[0-9]+]], [[R3]][$4]
+; MIPS32-DAG: st.w [[R4]], 0([[R2]])
+; MIPS32: .size llvm_mips_splat_w_test
+
+@llvm_mips_splat_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_splat_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_splat_d_test(i32 %a) nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_splat_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.splat.d(<2 x i64> %0, i32 %a)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_splat_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.splat.d(<2 x i64>, i32) nounwind
+
+; MIPS32: llvm_mips_splat_d_test:
+; FIXME: This test is currently disabled for MIPS32 because the indices are
+;        difficult to match. This is because 64-bit values cannot be stored in
+;        GPR32.
+; MIPS64-DAG: lw   [[R1:\$[0-9]+]], %got(llvm_mips_splat_d_ARG1)(
+; MIPS64-DAG: lw   [[R2:\$[0-9]+]], %got(llvm_mips_splat_d_RES)(
+; MIPS64-DAG: ld.d [[R3:\$w[0-9]+]], 0([[R1]])
+; MIPS64-DAG: splat.d [[R4:\$w[0-9]+]], [[R3]][$4]
+; MIPS64-DAG: st.d [[R4]], 0([[R2]])
+; MIPS32: .size llvm_mips_splat_d_test
diff --git a/test/CodeGen/Mips/msa/3rf.ll b/test/CodeGen/Mips/msa/3rf.ll
new file mode 100644
index 0000000..ae665af
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3rf.ll
@@ -0,0 +1,485 @@
+; Test the MSA intrinsics that are encoded with the 3RF instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_fadd_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fadd_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fadd_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fadd_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fadd_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fadd_w_ARG2
+  %2 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %1)
+  store <4 x float> %2, <4 x float>* @llvm_mips_fadd_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fadd.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fadd_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fadd.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fadd_w_test
+;
+@llvm_mips_fadd_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fadd_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fadd_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fadd_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fadd_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fadd_d_ARG2
+  %2 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %1)
+  store <2 x double> %2, <2 x double>* @llvm_mips_fadd_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fadd.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fadd_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fadd.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fadd_d_test
+
+define void @fadd_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fadd_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fadd_w_ARG2
+  %2 = fadd <4 x float> %0, %1
+  store <4 x float> %2, <4 x float>* @llvm_mips_fadd_w_RES
+  ret void
+}
+
+; CHECK: fadd_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fadd.w
+; CHECK: st.w
+; CHECK: .size fadd_w_test
+
+define void @fadd_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fadd_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fadd_d_ARG2
+  %2 = fadd <2 x double> %0, %1
+  store <2 x double> %2, <2 x double>* @llvm_mips_fadd_d_RES
+  ret void
+}
+
+; CHECK: fadd_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fadd.d
+; CHECK: st.d
+; CHECK: .size fadd_d_test
+;
+@llvm_mips_fdiv_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fdiv_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fdiv_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fdiv_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fdiv_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fdiv_w_ARG2
+  %2 = tail call <4 x float> @llvm.mips.fdiv.w(<4 x float> %0, <4 x float> %1)
+  store <4 x float> %2, <4 x float>* @llvm_mips_fdiv_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fdiv.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fdiv_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fdiv.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fdiv_w_test
+;
+@llvm_mips_fdiv_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fdiv_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fdiv_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fdiv_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fdiv_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fdiv_d_ARG2
+  %2 = tail call <2 x double> @llvm.mips.fdiv.d(<2 x double> %0, <2 x double> %1)
+  store <2 x double> %2, <2 x double>* @llvm_mips_fdiv_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fdiv.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fdiv_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fdiv.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fdiv_d_test
+
+define void @fdiv_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fdiv_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fdiv_w_ARG2
+  %2 = fdiv <4 x float> %0, %1
+  store <4 x float> %2, <4 x float>* @llvm_mips_fdiv_w_RES
+  ret void
+}
+
+; CHECK: fdiv_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fdiv.w
+; CHECK: st.w
+; CHECK: .size fdiv_w_test
+
+define void @fdiv_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fdiv_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fdiv_d_ARG2
+  %2 = fdiv <2 x double> %0, %1
+  store <2 x double> %2, <2 x double>* @llvm_mips_fdiv_d_RES
+  ret void
+}
+
+; CHECK: fdiv_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fdiv.d
+; CHECK: st.d
+; CHECK: .size fdiv_d_test
+;
+@llvm_mips_fmin_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fmin_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fmin_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fmin_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fmin_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fmin_w_ARG2
+  %2 = tail call <4 x float> @llvm.mips.fmin.w(<4 x float> %0, <4 x float> %1)
+  store <4 x float> %2, <4 x float>* @llvm_mips_fmin_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fmin.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fmin_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fmin.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fmin_w_test
+;
+@llvm_mips_fmin_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fmin_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fmin_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fmin_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fmin_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fmin_d_ARG2
+  %2 = tail call <2 x double> @llvm.mips.fmin.d(<2 x double> %0, <2 x double> %1)
+  store <2 x double> %2, <2 x double>* @llvm_mips_fmin_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fmin.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fmin_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fmin.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fmin_d_test
+;
+@llvm_mips_fmin_a_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fmin_a_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fmin_a_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fmin_a_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fmin_a_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fmin_a_w_ARG2
+  %2 = tail call <4 x float> @llvm.mips.fmin.a.w(<4 x float> %0, <4 x float> %1)
+  store <4 x float> %2, <4 x float>* @llvm_mips_fmin_a_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fmin.a.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fmin_a_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fmin_a.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fmin_a_w_test
+;
+@llvm_mips_fmin_a_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fmin_a_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fmin_a_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fmin_a_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fmin_a_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fmin_a_d_ARG2
+  %2 = tail call <2 x double> @llvm.mips.fmin.a.d(<2 x double> %0, <2 x double> %1)
+  store <2 x double> %2, <2 x double>* @llvm_mips_fmin_a_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fmin.a.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fmin_a_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fmin_a.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fmin_a_d_test
+;
+@llvm_mips_fmax_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fmax_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fmax_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fmax_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fmax_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fmax_w_ARG2
+  %2 = tail call <4 x float> @llvm.mips.fmax.w(<4 x float> %0, <4 x float> %1)
+  store <4 x float> %2, <4 x float>* @llvm_mips_fmax_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fmax.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fmax_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fmax.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fmax_w_test
+;
+@llvm_mips_fmax_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fmax_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fmax_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fmax_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fmax_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fmax_d_ARG2
+  %2 = tail call <2 x double> @llvm.mips.fmax.d(<2 x double> %0, <2 x double> %1)
+  store <2 x double> %2, <2 x double>* @llvm_mips_fmax_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fmax.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fmax_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fmax.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fmax_d_test
+;
+@llvm_mips_fmax_a_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fmax_a_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fmax_a_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fmax_a_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fmax_a_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fmax_a_w_ARG2
+  %2 = tail call <4 x float> @llvm.mips.fmax.a.w(<4 x float> %0, <4 x float> %1)
+  store <4 x float> %2, <4 x float>* @llvm_mips_fmax_a_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fmax.a.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fmax_a_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fmax_a.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fmax_a_w_test
+;
+@llvm_mips_fmax_a_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fmax_a_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fmax_a_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fmax_a_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fmax_a_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fmax_a_d_ARG2
+  %2 = tail call <2 x double> @llvm.mips.fmax.a.d(<2 x double> %0, <2 x double> %1)
+  store <2 x double> %2, <2 x double>* @llvm_mips_fmax_a_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fmax.a.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fmax_a_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fmax_a.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fmax_a_d_test
+;
+@llvm_mips_fmul_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fmul_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fmul_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fmul_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fmul_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fmul_w_ARG2
+  %2 = tail call <4 x float> @llvm.mips.fmul.w(<4 x float> %0, <4 x float> %1)
+  store <4 x float> %2, <4 x float>* @llvm_mips_fmul_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fmul.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fmul_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fmul.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fmul_w_test
+;
+@llvm_mips_fmul_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fmul_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fmul_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fmul_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fmul_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fmul_d_ARG2
+  %2 = tail call <2 x double> @llvm.mips.fmul.d(<2 x double> %0, <2 x double> %1)
+  store <2 x double> %2, <2 x double>* @llvm_mips_fmul_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fmul.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fmul_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fmul.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fmul_d_test
+
+define void @fmul_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fmul_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fmul_w_ARG2
+  %2 = fmul <4 x float> %0, %1
+  store <4 x float> %2, <4 x float>* @llvm_mips_fmul_w_RES
+  ret void
+}
+
+; CHECK: fmul_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fmul.w
+; CHECK: st.w
+; CHECK: .size fmul_w_test
+
+define void @fmul_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fmul_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fmul_d_ARG2
+  %2 = fmul <2 x double> %0, %1
+  store <2 x double> %2, <2 x double>* @llvm_mips_fmul_d_RES
+  ret void
+}
+
+; CHECK: fmul_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fmul.d
+; CHECK: st.d
+; CHECK: .size fmul_d_test
+;
+@llvm_mips_fsub_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fsub_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fsub_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fsub_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsub_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fsub_w_ARG2
+  %2 = tail call <4 x float> @llvm.mips.fsub.w(<4 x float> %0, <4 x float> %1)
+  store <4 x float> %2, <4 x float>* @llvm_mips_fsub_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fsub.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fsub_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fsub.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fsub_w_test
+;
+@llvm_mips_fsub_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fsub_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fsub_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fsub_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsub_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fsub_d_ARG2
+  %2 = tail call <2 x double> @llvm.mips.fsub.d(<2 x double> %0, <2 x double> %1)
+  store <2 x double> %2, <2 x double>* @llvm_mips_fsub_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fsub.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fsub_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fsub.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fsub_d_test
+;
+
+define void @fsub_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsub_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fsub_w_ARG2
+  %2 = fsub <4 x float> %0, %1
+  store <4 x float> %2, <4 x float>* @llvm_mips_fsub_w_RES
+  ret void
+}
+
+; CHECK: fsub_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fsub.w
+; CHECK: st.w
+; CHECK: .size fsub_w_test
+
+define void @fsub_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsub_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fsub_d_ARG2
+  %2 = fsub <2 x double> %0, %1
+  store <2 x double> %2, <2 x double>* @llvm_mips_fsub_d_RES
+  ret void
+}
+
+; CHECK: fsub_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fsub.d
+; CHECK: st.d
+; CHECK: .size fsub_d_test
diff --git a/test/CodeGen/Mips/msa/3rf_4rf.ll b/test/CodeGen/Mips/msa/3rf_4rf.ll
new file mode 100644
index 0000000..67ef7fd
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3rf_4rf.ll
@@ -0,0 +1,106 @@
+; Test the MSA intrinsics that are encoded with the 3RF instruction format and
+; use the result as a third operand.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_fmadd_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fmadd_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fmadd_w_ARG3 = global <4 x float> <float 8.000000e+00, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01>, align 16
+@llvm_mips_fmadd_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fmadd_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fmadd_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fmadd_w_ARG2
+  %2 = load <4 x float>* @llvm_mips_fmadd_w_ARG3
+  %3 = tail call <4 x float> @llvm.mips.fmadd.w(<4 x float> %0, <4 x float> %1, <4 x float> %2)
+  store <4 x float> %3, <4 x float>* @llvm_mips_fmadd_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fmadd.w(<4 x float>, <4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fmadd_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fmadd.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fmadd_w_test
+;
+@llvm_mips_fmadd_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fmadd_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fmadd_d_ARG3 = global <2 x double> <double 4.000000e+00, double 5.000000e+00>, align 16
+@llvm_mips_fmadd_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fmadd_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fmadd_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fmadd_d_ARG2
+  %2 = load <2 x double>* @llvm_mips_fmadd_d_ARG3
+  %3 = tail call <2 x double> @llvm.mips.fmadd.d(<2 x double> %0, <2 x double> %1, <2 x double> %2)
+  store <2 x double> %3, <2 x double>* @llvm_mips_fmadd_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fmadd.d(<2 x double>, <2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fmadd_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fmadd.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fmadd_d_test
+;
+@llvm_mips_fmsub_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fmsub_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fmsub_w_ARG3 = global <4 x float> <float 8.000000e+00, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01>, align 16
+@llvm_mips_fmsub_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fmsub_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fmsub_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fmsub_w_ARG2
+  %2 = load <4 x float>* @llvm_mips_fmsub_w_ARG3
+  %3 = tail call <4 x float> @llvm.mips.fmsub.w(<4 x float> %0, <4 x float> %1, <4 x float> %2)
+  store <4 x float> %3, <4 x float>* @llvm_mips_fmsub_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fmsub.w(<4 x float>, <4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fmsub_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fmsub.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fmsub_w_test
+;
+@llvm_mips_fmsub_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fmsub_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fmsub_d_ARG3 = global <2 x double> <double 4.000000e+00, double 5.000000e+00>, align 16
+@llvm_mips_fmsub_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fmsub_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fmsub_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fmsub_d_ARG2
+  %2 = load <2 x double>* @llvm_mips_fmsub_d_ARG3
+  %3 = tail call <2 x double> @llvm.mips.fmsub.d(<2 x double> %0, <2 x double> %1, <2 x double> %2)
+  store <2 x double> %3, <2 x double>* @llvm_mips_fmsub_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fmsub.d(<2 x double>, <2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fmsub_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fmsub.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fmsub_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3rf_4rf_q.ll b/test/CodeGen/Mips/msa/3rf_4rf_q.ll
new file mode 100644
index 0000000..de28be0
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3rf_4rf_q.ll
@@ -0,0 +1,206 @@
+; Test the MSA intrinsics that are encoded with the 3RF instruction format and
+; use the result as a third operand and perform fixed-point operations.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_madd_q_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_madd_q_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_madd_q_h_ARG3 = global <8 x i16> <i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23>, align 16
+@llvm_mips_madd_q_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_madd_q_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_madd_q_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_madd_q_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_madd_q_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.madd.q.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_madd_q_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.madd.q.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_madd_q_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: madd_q.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_madd_q_h_test
+;
+@llvm_mips_madd_q_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_madd_q_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_madd_q_w_ARG3 = global <4 x i32> <i32 8, i32 9, i32 10, i32 11>, align 16
+@llvm_mips_madd_q_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_madd_q_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_madd_q_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_madd_q_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_madd_q_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.madd.q.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_madd_q_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.madd.q.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_madd_q_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: madd_q.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_madd_q_w_test
+;
+@llvm_mips_maddr_q_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_maddr_q_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_maddr_q_h_ARG3 = global <8 x i16> <i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23>, align 16
+@llvm_mips_maddr_q_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_maddr_q_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_maddr_q_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_maddr_q_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_maddr_q_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.maddr.q.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_maddr_q_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.maddr.q.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_maddr_q_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: maddr_q.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_maddr_q_h_test
+;
+@llvm_mips_maddr_q_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_maddr_q_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_maddr_q_w_ARG3 = global <4 x i32> <i32 8, i32 9, i32 10, i32 11>, align 16
+@llvm_mips_maddr_q_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_maddr_q_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_maddr_q_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_maddr_q_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_maddr_q_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.maddr.q.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_maddr_q_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.maddr.q.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_maddr_q_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: maddr_q.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_maddr_q_w_test
+;
+@llvm_mips_msub_q_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_msub_q_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_msub_q_h_ARG3 = global <8 x i16> <i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23>, align 16
+@llvm_mips_msub_q_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_msub_q_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_msub_q_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_msub_q_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_msub_q_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.msub.q.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_msub_q_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.msub.q.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_msub_q_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: msub_q.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_msub_q_h_test
+;
+@llvm_mips_msub_q_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_msub_q_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_msub_q_w_ARG3 = global <4 x i32> <i32 8, i32 9, i32 10, i32 11>, align 16
+@llvm_mips_msub_q_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_msub_q_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_msub_q_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_msub_q_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_msub_q_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.msub.q.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_msub_q_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.msub.q.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_msub_q_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: msub_q.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_msub_q_w_test
+;
+@llvm_mips_msubr_q_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_msubr_q_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_msubr_q_h_ARG3 = global <8 x i16> <i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23>, align 16
+@llvm_mips_msubr_q_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_msubr_q_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_msubr_q_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_msubr_q_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_msubr_q_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.msubr.q.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_msubr_q_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.msubr.q.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_msubr_q_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: msubr_q.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_msubr_q_h_test
+;
+@llvm_mips_msubr_q_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_msubr_q_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_msubr_q_w_ARG3 = global <4 x i32> <i32 8, i32 9, i32 10, i32 11>, align 16
+@llvm_mips_msubr_q_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_msubr_q_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_msubr_q_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_msubr_q_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_msubr_q_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.msubr.q.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_msubr_q_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.msubr.q.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_msubr_q_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: msubr_q.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_msubr_q_w_test
+;
diff --git a/test/CodeGen/Mips/msa/3rf_exdo.ll b/test/CodeGen/Mips/msa/3rf_exdo.ll
new file mode 100644
index 0000000..8a7f268
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3rf_exdo.ll
@@ -0,0 +1,50 @@
+; Test the MSA floating-point conversion intrinsics that are encoded with the
+; 3RF instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_fexdo_h_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fexdo_h_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fexdo_h_RES  = global <8 x half> <half 0.000000e+00, half 0.000000e+00, half 0.000000e+00, half 0.000000e+00, half 0.000000e+00, half 0.000000e+00, half 0.000000e+00, half 0.000000e+00>, align 16
+
+define void @llvm_mips_fexdo_h_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fexdo_h_ARG1
+  %1 = load <4 x float>* @llvm_mips_fexdo_h_ARG2
+  %2 = tail call <8 x half> @llvm.mips.fexdo.h(<4 x float> %0, <4 x float> %1)
+  store <8 x half> %2, <8 x half>* @llvm_mips_fexdo_h_RES
+  ret void
+}
+
+declare <8 x half> @llvm.mips.fexdo.h(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fexdo_h_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fexdo.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_fexdo_h_test
+;
+@llvm_mips_fexdo_w_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fexdo_w_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fexdo_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fexdo_w_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fexdo_w_ARG1
+  %1 = load <2 x double>* @llvm_mips_fexdo_w_ARG2
+  %2 = tail call <4 x float> @llvm.mips.fexdo.w(<2 x double> %0, <2 x double> %1)
+  store <4 x float> %2, <4 x float>* @llvm_mips_fexdo_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fexdo.w(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fexdo_w_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fexdo.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fexdo_w_test
+;
diff --git a/test/CodeGen/Mips/msa/3rf_float_int.ll b/test/CodeGen/Mips/msa/3rf_float_int.ll
new file mode 100644
index 0000000..7b01e17
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3rf_float_int.ll
@@ -0,0 +1,50 @@
+; Test the MSA intrinsics that are encoded with the 3RF instruction format and
+; take an integer as an operand.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_fexp2_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fexp2_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_fexp2_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fexp2_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fexp2_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_fexp2_w_ARG2
+  %2 = tail call <4 x float> @llvm.mips.fexp2.w(<4 x float> %0, <4 x i32> %1)
+  store <4 x float> %2, <4 x float>* @llvm_mips_fexp2_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fexp2.w(<4 x float>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_fexp2_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fexp2.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fexp2_w_test
+;
+@llvm_mips_fexp2_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fexp2_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_fexp2_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fexp2_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fexp2_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_fexp2_d_ARG2
+  %2 = tail call <2 x double> @llvm.mips.fexp2.d(<2 x double> %0, <2 x i64> %1)
+  store <2 x double> %2, <2 x double>* @llvm_mips_fexp2_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fexp2.d(<2 x double>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_fexp2_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fexp2.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fexp2_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3rf_int_float.ll b/test/CodeGen/Mips/msa/3rf_int_float.ll
new file mode 100644
index 0000000..5624771
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3rf_int_float.ll
@@ -0,0 +1,974 @@
+; Test the MSA intrinsics that are encoded with the 3RF instruction format and
+; produce an integer as a result.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_fcaf_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fcaf_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fcaf_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fcaf_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fcaf_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fcaf_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fcaf.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fcaf_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fcaf.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fcaf_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fcaf.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fcaf_w_test
+;
+@llvm_mips_fcaf_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fcaf_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fcaf_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fcaf_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fcaf_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fcaf_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fcaf.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fcaf_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fcaf.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fcaf_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fcaf.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fcaf_d_test
+;
+@llvm_mips_fceq_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fceq_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fceq_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fceq_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fceq_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fceq_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fceq.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fceq_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fceq.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fceq_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fceq.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fceq_w_test
+;
+@llvm_mips_fceq_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fceq_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fceq_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fceq_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fceq_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fceq_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fceq.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fceq_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fceq.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fceq_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fceq.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fceq_d_test
+;
+@llvm_mips_fcle_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fcle_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fcle_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fcle_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fcle_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fcle_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fcle.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fcle_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fcle.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fcle_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fcle.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fcle_w_test
+;
+@llvm_mips_fcle_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fcle_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fcle_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fcle_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fcle_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fcle_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fcle.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fcle_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fcle.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fcle_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fcle.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fcle_d_test
+;
+@llvm_mips_fclt_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fclt_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fclt_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fclt_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fclt_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fclt_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fclt.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fclt_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fclt.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fclt_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fclt.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fclt_w_test
+;
+@llvm_mips_fclt_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fclt_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fclt_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fclt_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fclt_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fclt_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fclt.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fclt_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fclt.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fclt_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fclt.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fclt_d_test
+;
+@llvm_mips_fcor_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fcor_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fcor_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fcor_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fcor_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fcor_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fcor.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fcor_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fcor.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fcor_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fcor.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fcor_w_test
+;
+@llvm_mips_fcor_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fcor_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fcor_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fcor_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fcor_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fcor_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fcor.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fcor_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fcor.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fcor_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fcor.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fcor_d_test
+;
+@llvm_mips_fcne_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fcne_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fcne_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fcne_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fcne_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fcne_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fcne.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fcne_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fcne.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fcne_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fcne.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fcne_w_test
+;
+@llvm_mips_fcne_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fcne_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fcne_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fcne_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fcne_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fcne_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fcne.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fcne_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fcne.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fcne_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fcne.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fcne_d_test
+;
+@llvm_mips_fcueq_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fcueq_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fcueq_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fcueq_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fcueq_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fcueq_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fcueq.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fcueq_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fcueq.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fcueq_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fcueq.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fcueq_w_test
+;
+@llvm_mips_fcueq_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fcueq_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fcueq_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fcueq_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fcueq_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fcueq_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fcueq.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fcueq_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fcueq.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fcueq_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fcueq.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fcueq_d_test
+;
+@llvm_mips_fcult_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fcult_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fcult_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fcult_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fcult_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fcult_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fcult.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fcult_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fcult.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fcult_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fcult.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fcult_w_test
+;
+@llvm_mips_fcult_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fcult_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fcult_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fcult_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fcult_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fcult_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fcult.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fcult_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fcult.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fcult_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fcult.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fcult_d_test
+;
+@llvm_mips_fcule_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fcule_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fcule_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fcule_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fcule_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fcule_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fcule.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fcule_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fcule.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fcule_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fcule.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fcule_w_test
+;
+@llvm_mips_fcule_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fcule_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fcule_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fcule_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fcule_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fcule_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fcule.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fcule_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fcule.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fcule_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fcule.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fcule_d_test
+;
+@llvm_mips_fcun_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fcun_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fcun_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fcun_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fcun_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fcun_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fcun.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fcun_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fcun.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fcun_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fcun.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fcun_w_test
+;
+@llvm_mips_fcun_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fcun_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fcun_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fcun_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fcun_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fcun_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fcun.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fcun_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fcun.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fcun_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fcun.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fcun_d_test
+;
+@llvm_mips_fcune_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fcune_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fcune_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fcune_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fcune_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fcune_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fcune.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fcune_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fcune.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fcune_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fcune.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fcune_w_test
+;
+@llvm_mips_fcune_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fcune_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fcune_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fcune_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fcune_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fcune_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fcune.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fcune_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fcune.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fcune_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fcune.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fcune_d_test
+;
+@llvm_mips_fsaf_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fsaf_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fsaf_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fsaf_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsaf_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fsaf_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fsaf.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fsaf_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fsaf.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fsaf_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fsaf.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fsaf_w_test
+;
+@llvm_mips_fsaf_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fsaf_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fsaf_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fsaf_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsaf_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fsaf_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fsaf.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fsaf_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fsaf.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fsaf_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fsaf.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fsaf_d_test
+;
+@llvm_mips_fseq_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fseq_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fseq_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fseq_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fseq_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fseq_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fseq.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fseq_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fseq.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fseq_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fseq.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fseq_w_test
+;
+@llvm_mips_fseq_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fseq_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fseq_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fseq_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fseq_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fseq_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fseq.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fseq_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fseq.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fseq_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fseq.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fseq_d_test
+;
+@llvm_mips_fsle_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fsle_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fsle_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fsle_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsle_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fsle_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fsle.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fsle_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fsle.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fsle_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fsle.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fsle_w_test
+;
+@llvm_mips_fsle_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fsle_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fsle_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fsle_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsle_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fsle_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fsle.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fsle_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fsle.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fsle_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fsle.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fsle_d_test
+;
+@llvm_mips_fslt_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fslt_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fslt_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fslt_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fslt_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fslt_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fslt.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fslt_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fslt.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fslt_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fslt.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fslt_w_test
+;
+@llvm_mips_fslt_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fslt_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fslt_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fslt_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fslt_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fslt_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fslt.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fslt_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fslt.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fslt_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fslt.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fslt_d_test
+;
+@llvm_mips_fsor_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fsor_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fsor_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fsor_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsor_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fsor_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fsor.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fsor_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fsor.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fsor_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fsor.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fsor_w_test
+;
+@llvm_mips_fsor_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fsor_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fsor_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fsor_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsor_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fsor_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fsor.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fsor_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fsor.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fsor_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fsor.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fsor_d_test
+;
+@llvm_mips_fsne_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fsne_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fsne_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fsne_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsne_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fsne_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fsne.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fsne_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fsne.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fsne_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fsne.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fsne_w_test
+;
+@llvm_mips_fsne_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fsne_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fsne_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fsne_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsne_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fsne_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fsne.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fsne_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fsne.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fsne_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fsne.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fsne_d_test
+;
+@llvm_mips_fsueq_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fsueq_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fsueq_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fsueq_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsueq_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fsueq_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fsueq.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fsueq_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fsueq.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fsueq_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fsueq.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fsueq_w_test
+;
+@llvm_mips_fsueq_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fsueq_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fsueq_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fsueq_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsueq_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fsueq_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fsueq.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fsueq_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fsueq.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fsueq_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fsueq.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fsueq_d_test
+;
+@llvm_mips_fsult_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fsult_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fsult_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fsult_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsult_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fsult_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fsult.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fsult_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fsult.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fsult_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fsult.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fsult_w_test
+;
+@llvm_mips_fsult_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fsult_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fsult_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fsult_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsult_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fsult_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fsult.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fsult_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fsult.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fsult_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fsult.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fsult_d_test
+;
+@llvm_mips_fsule_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fsule_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fsule_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fsule_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsule_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fsule_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fsule.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fsule_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fsule.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fsule_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fsule.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fsule_w_test
+;
+@llvm_mips_fsule_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fsule_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fsule_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fsule_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsule_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fsule_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fsule.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fsule_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fsule.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fsule_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fsule.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fsule_d_test
+;
+@llvm_mips_fsun_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fsun_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fsun_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fsun_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsun_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fsun_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fsun.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fsun_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fsun.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fsun_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fsun.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fsun_w_test
+;
+@llvm_mips_fsun_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fsun_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fsun_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fsun_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsun_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fsun_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fsun.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fsun_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fsun.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fsun_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fsun.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fsun_d_test
+;
+@llvm_mips_fsune_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fsune_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fsune_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fsune_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsune_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fsune_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fsune.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fsune_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fsune.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fsune_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fsune.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fsune_w_test
+;
+@llvm_mips_fsune_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fsune_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fsune_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fsune_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsune_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fsune_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fsune.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fsune_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fsune.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fsune_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fsune.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fsune_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3rf_q.ll b/test/CodeGen/Mips/msa/3rf_q.ll
new file mode 100644
index 0000000..f7000ee
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3rf_q.ll
@@ -0,0 +1,94 @@
+; Test the MSA fixed-point intrinsics that are encoded with the 3RF instruction
+; format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_mul_q_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_mul_q_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_mul_q_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_mul_q_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_mul_q_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_mul_q_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.mul.q.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_mul_q_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.mul.q.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_mul_q_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: mul_q.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_mul_q_h_test
+;
+@llvm_mips_mul_q_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_mul_q_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_mul_q_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_mul_q_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_mul_q_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_mul_q_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.mul.q.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_mul_q_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.mul.q.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_mul_q_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: mul_q.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_mul_q_w_test
+;
+@llvm_mips_mulr_q_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_mulr_q_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_mulr_q_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_mulr_q_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_mulr_q_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_mulr_q_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.mulr.q.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_mulr_q_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.mulr.q.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_mulr_q_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: mulr_q.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_mulr_q_h_test
+;
+@llvm_mips_mulr_q_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_mulr_q_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_mulr_q_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_mulr_q_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_mulr_q_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_mulr_q_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.mulr.q.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_mulr_q_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.mulr.q.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_mulr_q_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: mulr_q.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_mulr_q_w_test
+;
diff --git a/test/CodeGen/Mips/msa/arithmetic.ll b/test/CodeGen/Mips/msa/arithmetic.ll
new file mode 100644
index 0000000..09ee502
--- /dev/null
+++ b/test/CodeGen/Mips/msa/arithmetic.ll
@@ -0,0 +1,726 @@
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+define void @add_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: add_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = add <16 x i8> %1, %2
+  ; CHECK-DAG: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size add_v16i8
+}
+
+define void @add_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: add_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = add <8 x i16> %1, %2
+  ; CHECK-DAG: addv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size add_v8i16
+}
+
+define void @add_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: add_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = add <4 x i32> %1, %2
+  ; CHECK-DAG: addv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size add_v4i32
+}
+
+define void @add_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: add_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = add <2 x i64> %1, %2
+  ; CHECK-DAG: addv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size add_v2i64
+}
+
+define void @add_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: add_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = add <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: addvi.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size add_v16i8_i
+}
+
+define void @add_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: add_v8i16_i:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = add <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1,
+                          i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: addvi.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size add_v8i16_i
+}
+
+define void @add_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: add_v4i32_i:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = add <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: addvi.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size add_v4i32_i
+}
+
+define void @add_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: add_v2i64_i:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = add <2 x i64> %1, <i64 1, i64 1>
+  ; CHECK-DAG: addvi.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size add_v2i64_i
+}
+
+define void @sub_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: sub_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = sub <16 x i8> %1, %2
+  ; CHECK-DAG: subv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sub_v16i8
+}
+
+define void @sub_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: sub_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = sub <8 x i16> %1, %2
+  ; CHECK-DAG: subv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sub_v8i16
+}
+
+define void @sub_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: sub_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = sub <4 x i32> %1, %2
+  ; CHECK-DAG: subv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sub_v4i32
+}
+
+define void @sub_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: sub_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = sub <2 x i64> %1, %2
+  ; CHECK-DAG: subv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sub_v2i64
+}
+
+define void @sub_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: sub_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = sub <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: subvi.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sub_v16i8_i
+}
+
+define void @sub_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: sub_v8i16_i:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = sub <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1,
+                          i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: subvi.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sub_v8i16_i
+}
+
+define void @sub_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: sub_v4i32_i:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = sub <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: subvi.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sub_v4i32_i
+}
+
+define void @sub_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: sub_v2i64_i:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = sub <2 x i64> %1, <i64 1, i64 1>
+  ; CHECK-DAG: subvi.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sub_v2i64_i
+}
+
+define void @mul_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: mul_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = mul <16 x i8> %1, %2
+  ; CHECK-DAG: mulv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mul_v16i8
+}
+
+define void @mul_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: mul_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = mul <8 x i16> %1, %2
+  ; CHECK-DAG: mulv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mul_v8i16
+}
+
+define void @mul_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: mul_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = mul <4 x i32> %1, %2
+  ; CHECK-DAG: mulv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mul_v4i32
+}
+
+define void @mul_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: mul_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = mul <2 x i64> %1, %2
+  ; CHECK-DAG: mulv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mul_v2i64
+}
+
+define void @maddv_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
+                         <16 x i8>* %c) nounwind {
+  ; CHECK: maddv_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <16 x i8>* %c
+  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
+  %4 = mul <16 x i8> %2, %3
+  %5 = add <16 x i8> %4, %1
+  ; CHECK-DAG: maddv.b [[R1]], [[R2]], [[R3]]
+  store <16 x i8> %5, <16 x i8>* %d
+  ; CHECK-DAG: st.b [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size maddv_v16i8
+}
+
+define void @maddv_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
+                         <8 x i16>* %c) nounwind {
+  ; CHECK: maddv_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <8 x i16>* %c
+  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
+  %4 = mul <8 x i16> %2, %3
+  %5 = add <8 x i16> %4, %1
+  ; CHECK-DAG: maddv.h [[R1]], [[R2]], [[R3]]
+  store <8 x i16> %5, <8 x i16>* %d
+  ; CHECK-DAG: st.h [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size maddv_v8i16
+}
+
+define void @maddv_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
+                         <4 x i32>* %c) nounwind {
+  ; CHECK: maddv_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <4 x i32>* %c
+  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
+  %4 = mul <4 x i32> %2, %3
+  %5 = add <4 x i32> %4, %1
+  ; CHECK-DAG: maddv.w [[R1]], [[R2]], [[R3]]
+  store <4 x i32> %5, <4 x i32>* %d
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size maddv_v4i32
+}
+
+define void @maddv_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
+                         <2 x i64>* %c) nounwind {
+  ; CHECK: maddv_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <2 x i64>* %c
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
+  %4 = mul <2 x i64> %2, %3
+  %5 = add <2 x i64> %4, %1
+  ; CHECK-DAG: maddv.d [[R1]], [[R2]], [[R3]]
+  store <2 x i64> %5, <2 x i64>* %d
+  ; CHECK-DAG: st.d [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size maddv_v2i64
+}
+
+define void @msubv_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
+                         <16 x i8>* %c) nounwind {
+  ; CHECK: msubv_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <16 x i8>* %c
+  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
+  %4 = mul <16 x i8> %2, %3
+  %5 = sub <16 x i8> %1, %4
+  ; CHECK-DAG: msubv.b [[R1]], [[R2]], [[R3]]
+  store <16 x i8> %5, <16 x i8>* %d
+  ; CHECK-DAG: st.b [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size msubv_v16i8
+}
+
+define void @msubv_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
+                         <8 x i16>* %c) nounwind {
+  ; CHECK: msubv_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <8 x i16>* %c
+  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
+  %4 = mul <8 x i16> %2, %3
+  %5 = sub <8 x i16> %1, %4
+  ; CHECK-DAG: msubv.h [[R1]], [[R2]], [[R3]]
+  store <8 x i16> %5, <8 x i16>* %d
+  ; CHECK-DAG: st.h [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size msubv_v8i16
+}
+
+define void @msubv_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
+                         <4 x i32>* %c) nounwind {
+  ; CHECK: msubv_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <4 x i32>* %c
+  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
+  %4 = mul <4 x i32> %2, %3
+  %5 = sub <4 x i32> %1, %4
+  ; CHECK-DAG: msubv.w [[R1]], [[R2]], [[R3]]
+  store <4 x i32> %5, <4 x i32>* %d
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size msubv_v4i32
+}
+
+define void @msubv_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
+                         <2 x i64>* %c) nounwind {
+  ; CHECK: msubv_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <2 x i64>* %c
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
+  %4 = mul <2 x i64> %2, %3
+  %5 = sub <2 x i64> %1, %4
+  ; CHECK-DAG: msubv.d [[R1]], [[R2]], [[R3]]
+  store <2 x i64> %5, <2 x i64>* %d
+  ; CHECK-DAG: st.d [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size msubv_v2i64
+}
+
+define void @div_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: div_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = sdiv <16 x i8> %1, %2
+  ; CHECK-DAG: div_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size div_s_v16i8
+}
+
+define void @div_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: div_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = sdiv <8 x i16> %1, %2
+  ; CHECK-DAG: div_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size div_s_v8i16
+}
+
+define void @div_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: div_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = sdiv <4 x i32> %1, %2
+  ; CHECK-DAG: div_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size div_s_v4i32
+}
+
+define void @div_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: div_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = sdiv <2 x i64> %1, %2
+  ; CHECK-DAG: div_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size div_s_v2i64
+}
+
+define void @div_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: div_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = udiv <16 x i8> %1, %2
+  ; CHECK-DAG: div_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size div_u_v16i8
+}
+
+define void @div_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: div_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = udiv <8 x i16> %1, %2
+  ; CHECK-DAG: div_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size div_u_v8i16
+}
+
+define void @div_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: div_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = udiv <4 x i32> %1, %2
+  ; CHECK-DAG: div_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size div_u_v4i32
+}
+
+define void @div_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: div_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = udiv <2 x i64> %1, %2
+  ; CHECK-DAG: div_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size div_u_v2i64
+}
+
+define void @mod_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: mod_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = srem <16 x i8> %1, %2
+  ; CHECK-DAG: mod_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mod_s_v16i8
+}
+
+define void @mod_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: mod_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = srem <8 x i16> %1, %2
+  ; CHECK-DAG: mod_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mod_s_v8i16
+}
+
+define void @mod_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: mod_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = srem <4 x i32> %1, %2
+  ; CHECK-DAG: mod_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mod_s_v4i32
+}
+
+define void @mod_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: mod_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = srem <2 x i64> %1, %2
+  ; CHECK-DAG: mod_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mod_s_v2i64
+}
+
+define void @mod_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: mod_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = urem <16 x i8> %1, %2
+  ; CHECK-DAG: mod_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mod_u_v16i8
+}
+
+define void @mod_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: mod_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = urem <8 x i16> %1, %2
+  ; CHECK-DAG: mod_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mod_u_v8i16
+}
+
+define void @mod_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: mod_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = urem <4 x i32> %1, %2
+  ; CHECK-DAG: mod_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mod_u_v4i32
+}
+
+define void @mod_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: mod_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = urem <2 x i64> %1, %2
+  ; CHECK-DAG: mod_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mod_u_v2i64
+}
diff --git a/test/CodeGen/Mips/msa/arithmetic_float.ll b/test/CodeGen/Mips/msa/arithmetic_float.ll
new file mode 100644
index 0000000..dc38721
--- /dev/null
+++ b/test/CodeGen/Mips/msa/arithmetic_float.ll
@@ -0,0 +1,456 @@
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+define void @add_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: add_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fadd <4 x float> %1, %2
+  ; CHECK-DAG: fadd.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x float> %3, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size add_v4f32
+}
+
+define void @add_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: add_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fadd <2 x double> %1, %2
+  ; CHECK-DAG: fadd.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x double> %3, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size add_v2f64
+}
+
+define void @sub_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: sub_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fsub <4 x float> %1, %2
+  ; CHECK-DAG: fsub.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x float> %3, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sub_v4f32
+}
+
+define void @sub_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: sub_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fsub <2 x double> %1, %2
+  ; CHECK-DAG: fsub.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x double> %3, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sub_v2f64
+}
+
+define void @mul_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: mul_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fmul <4 x float> %1, %2
+  ; CHECK-DAG: fmul.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x float> %3, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mul_v4f32
+}
+
+define void @mul_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: mul_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fmul <2 x double> %1, %2
+  ; CHECK-DAG: fmul.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x double> %3, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mul_v2f64
+}
+
+define void @fma_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
+                       <4 x float>* %c) nounwind {
+  ; CHECK: fma_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <4 x float>* %c
+  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
+  %4 = tail call <4 x float> @llvm.fma.v4f32 (<4 x float> %1, <4 x float> %2,
+                                              <4 x float> %3)
+  ; CHECK-DAG: fmadd.w [[R1]], [[R2]], [[R3]]
+  store <4 x float> %4, <4 x float>* %d
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size fma_v4f32
+}
+
+define void @fma_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
+                       <2 x double>* %c) nounwind {
+  ; CHECK: fma_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <2 x double>* %c
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
+  %4 = tail call <2 x double> @llvm.fma.v2f64 (<2 x double> %1, <2 x double> %2,
+                                               <2 x double> %3)
+  ; CHECK-DAG: fmadd.d [[R1]], [[R2]], [[R3]]
+  store <2 x double> %4, <2 x double>* %d
+  ; CHECK-DAG: st.d [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size fma_v2f64
+}
+
+define void @fmsub_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
+                       <4 x float>* %c) nounwind {
+  ; CHECK: fmsub_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <4 x float>* %c
+  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
+  %4 = fmul <4 x float> %2, %3
+  %5 = fsub <4 x float> %1, %4
+  ; CHECK-DAG: fmsub.w [[R1]], [[R2]], [[R3]]
+  store <4 x float> %5, <4 x float>* %d
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size fmsub_v4f32
+}
+
+define void @fmsub_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
+                       <2 x double>* %c) nounwind {
+  ; CHECK: fmsub_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <2 x double>* %c
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
+  %4 = fmul <2 x double> %2, %3
+  %5 = fsub <2 x double> %1, %4
+  ; CHECK-DAG: fmsub.d [[R1]], [[R2]], [[R3]]
+  store <2 x double> %5, <2 x double>* %d
+  ; CHECK-DAG: st.d [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size fmsub_v2f64
+}
+
+define void @fdiv_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: fdiv_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fdiv <4 x float> %1, %2
+  ; CHECK-DAG: fdiv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x float> %3, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size fdiv_v4f32
+}
+
+define void @fdiv_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: fdiv_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fdiv <2 x double> %1, %2
+  ; CHECK-DAG: fdiv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x double> %3, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size fdiv_v2f64
+}
+
+define void @fabs_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
+  ; CHECK: fabs_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <4 x float> @llvm.fabs.v4f32 (<4 x float> %1)
+  ; CHECK-DAG: fmax_a.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+  store <4 x float> %2, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size fabs_v4f32
+}
+
+define void @fabs_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
+  ; CHECK: fabs_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <2 x double> @llvm.fabs.v2f64 (<2 x double> %1)
+  ; CHECK-DAG: fmax_a.d [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+  store <2 x double> %2, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size fabs_v2f64
+}
+
+define void @fexp2_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
+  ; CHECK: fexp2_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1)
+  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: ffint_u.w [[R4:\$w[0-9]+]], [[R3]]
+  ; CHECK-DAG: fexp2.w [[R4:\$w[0-9]+]], [[R3]], [[R1]]
+  store <4 x float> %2, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size fexp2_v4f32
+}
+
+define void @fexp2_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
+  ; CHECK: fexp2_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1)
+  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: ffint_u.d [[R4:\$w[0-9]+]], [[R3]]
+  ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]]
+  store <2 x double> %2, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size fexp2_v2f64
+}
+
+define void @fexp2_v4f32_2(<4 x float>* %c, <4 x float>* %a) nounwind {
+  ; CHECK: fexp2_v4f32_2:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1)
+  %3 = fmul <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, %2
+  ; CHECK-DAG: lui [[R3:\$[0-9]+]], 16384
+  ; CHECK-DAG: fill.w [[R4:\$w[0-9]+]], [[R3]]
+  ; CHECK-DAG: fexp2.w [[R5:\$w[0-9]+]], [[R4]], [[R1]]
+  store <4 x float> %3, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R5]], 0($4)
+
+  ret void
+  ; CHECK: .size fexp2_v4f32_2
+}
+
+define void @fexp2_v2f64_2(<2 x double>* %c, <2 x double>* %a) nounwind {
+  ; CHECK:      .8byte 4611686018427387904
+  ; CHECK-NEXT: .8byte 4611686018427387904
+  ; CHECK: fexp2_v2f64_2:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1)
+  %3 = fmul <2 x double> <double 2.0, double 2.0>, %2
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo(
+  ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]]
+  store <2 x double> %3, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size fexp2_v2f64_2
+}
+
+define void @fsqrt_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
+  ; CHECK: fsqrt_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %1)
+  ; CHECK-DAG: fsqrt.w [[R3:\$w[0-9]+]], [[R1]]
+  store <4 x float> %2, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size fsqrt_v4f32
+}
+
+define void @fsqrt_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
+  ; CHECK: fsqrt_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %1)
+  ; CHECK-DAG: fsqrt.d [[R3:\$w[0-9]+]], [[R1]]
+  store <2 x double> %2, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size fsqrt_v2f64
+}
+
+define void @ffint_u_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: ffint_u_v4f32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = uitofp <4 x i32> %1 to <4 x float>
+  ; CHECK-DAG: ffint_u.w [[R3:\$w[0-9]+]], [[R1]]
+  store <4 x float> %2, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ffint_u_v4f32
+}
+
+define void @ffint_u_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: ffint_u_v2f64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = uitofp <2 x i64> %1 to <2 x double>
+  ; CHECK-DAG: ffint_u.d [[R3:\$w[0-9]+]], [[R1]]
+  store <2 x double> %2, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ffint_u_v2f64
+}
+
+define void @ffint_s_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: ffint_s_v4f32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = sitofp <4 x i32> %1 to <4 x float>
+  ; CHECK-DAG: ffint_s.w [[R3:\$w[0-9]+]], [[R1]]
+  store <4 x float> %2, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ffint_s_v4f32
+}
+
+define void @ffint_s_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: ffint_s_v2f64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = sitofp <2 x i64> %1 to <2 x double>
+  ; CHECK-DAG: ffint_s.d [[R3:\$w[0-9]+]], [[R1]]
+  store <2 x double> %2, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ffint_s_v2f64
+}
+
+define void @ftrunc_u_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind {
+  ; CHECK: ftrunc_u_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = fptoui <4 x float> %1 to <4 x i32>
+  ; CHECK-DAG: ftrunc_u.w [[R3:\$w[0-9]+]], [[R1]]
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ftrunc_u_v4f32
+}
+
+define void @ftrunc_u_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind {
+  ; CHECK: ftrunc_u_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = fptoui <2 x double> %1 to <2 x i64>
+  ; CHECK-DAG: ftrunc_u.d [[R3:\$w[0-9]+]], [[R1]]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ftrunc_u_v2f64
+}
+
+define void @ftrunc_s_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind {
+  ; CHECK: ftrunc_s_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = fptosi <4 x float> %1 to <4 x i32>
+  ; CHECK-DAG: ftrunc_s.w [[R3:\$w[0-9]+]], [[R1]]
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ftrunc_s_v4f32
+}
+
+define void @ftrunc_s_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind {
+  ; CHECK: ftrunc_s_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = fptosi <2 x double> %1 to <2 x i64>
+  ; CHECK-DAG: ftrunc_s.d [[R3:\$w[0-9]+]], [[R1]]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ftrunc_s_v2f64
+}
+
+declare <4 x float>  @llvm.fabs.v4f32(<4 x float>  %Val)
+declare <2 x double> @llvm.fabs.v2f64(<2 x double> %Val)
+declare <4 x float>  @llvm.exp2.v4f32(<4 x float>  %val)
+declare <2 x double> @llvm.exp2.v2f64(<2 x double> %val)
+declare <4 x float>  @llvm.fma.v4f32(<4 x float>  %a, <4 x float>  %b,
+                                     <4 x float>  %c)
+declare <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b,
+                                     <2 x double> %c)
+declare <4 x float>  @llvm.sqrt.v4f32(<4 x float>  %Val)
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %Val)
diff --git a/test/CodeGen/Mips/msa/basic_operations.ll b/test/CodeGen/Mips/msa/basic_operations.ll
new file mode 100644
index 0000000..0169a07
--- /dev/null
+++ b/test/CodeGen/Mips/msa/basic_operations.ll
@@ -0,0 +1,481 @@
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=MIPS32-AE -check-prefix=MIPS32-BE %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=MIPS32-AE -check-prefix=MIPS32-LE %s
+
+@v4i8 = global <4 x i8> <i8 0, i8 0, i8 0, i8 0>
+@v16i8 = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+@v8i16 = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+@v4i32 = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+@v2i64 = global <2 x i64> <i64 0, i64 0>
+@i64 = global i64 0
+
+define void @const_v16i8() nounwind {
+  ; MIPS32-AE: const_v16i8:
+
+  store volatile <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8>*@v16i8
+  ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 0
+
+  store volatile <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8>*@v16i8
+  ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 1
+
+  store volatile <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 31>, <16 x i8>*@v16i8
+  ; MIPS32-AE: ld.b  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6>, <16 x i8>*@v16i8
+  ; MIPS32-AE: ld.b  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <16 x i8> <i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0>, <16 x i8>*@v16i8
+  ; MIPS32-BE: ldi.h [[R1:\$w[0-9]+]], 256
+  ; MIPS32-LE: ldi.h [[R1:\$w[0-9]+]], 1
+
+  store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4>, <16 x i8>*@v16i8
+  ; MIPS32-BE-DAG: lui [[R2:\$[0-9]+]], 258
+  ; MIPS32-LE-DAG: lui [[R2:\$[0-9]+]], 1027
+  ; MIPS32-BE-DAG: ori [[R2]], [[R2]], 772
+  ; MIPS32-LE-DAG: ori [[R2]], [[R2]], 513
+  ; MIPS32-AE-DAG: fill.w [[R1:\$w[0-9]+]], [[R2]]
+
+  store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, <16 x i8>*@v16i8
+  ; MIPS32-AE: ld.b  [[R1:\$w[0-9]+]], %lo(
+
+  ret void
+  ; MIPS32-AE: .size const_v16i8
+}
+
+define void @const_v8i16() nounwind {
+  ; MIPS32-AE: const_v8i16:
+
+  store volatile <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16>*@v8i16
+  ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 0
+
+  store volatile <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16>*@v8i16
+  ; MIPS32-AE: ldi.h [[R1:\$w[0-9]+]], 1
+
+  store volatile <8 x i16> <i16 1, i16 1, i16 1, i16 2, i16 1, i16 1, i16 1, i16 31>, <8 x i16>*@v8i16
+  ; MIPS32-AE: ld.h  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <8 x i16> <i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028>, <8 x i16>*@v8i16
+  ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 4
+
+  store volatile <8 x i16> <i16 1, i16 2, i16 1, i16 2, i16 1, i16 2, i16 1, i16 2>, <8 x i16>*@v8i16
+  ; MIPS32-BE-DAG: lui [[R2:\$[0-9]+]], 1
+  ; MIPS32-LE-DAG: lui [[R2:\$[0-9]+]], 2
+  ; MIPS32-BE-DAG: ori [[R2]], [[R2]], 2
+  ; MIPS32-LE-DAG: ori [[R2]], [[R2]], 1
+  ; MIPS32-AE-DAG: fill.w [[R1:\$w[0-9]+]], [[R2]]
+
+  store volatile <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 1, i16 2, i16 3, i16 4>, <8 x i16>*@v8i16
+  ; MIPS32-AE: ld.h  [[R1:\$w[0-9]+]], %lo(
+
+  ret void
+  ; MIPS32-AE: .size const_v8i16
+}
+
+define void @const_v4i32() nounwind {
+  ; MIPS32-AE: const_v4i32:
+
+  store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>*@v4i32
+  ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 0
+
+  store volatile <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32>*@v4i32
+  ; MIPS32-AE: ldi.w [[R1:\$w[0-9]+]], 1
+
+  store volatile <4 x i32> <i32 1, i32 1, i32 1, i32 31>, <4 x i32>*@v4i32
+  ; MIPS32-AE: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <4 x i32> <i32 16843009, i32 16843009, i32 16843009, i32 16843009>, <4 x i32>*@v4i32
+  ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 1
+
+  store volatile <4 x i32> <i32 65537, i32 65537, i32 65537, i32 65537>, <4 x i32>*@v4i32
+  ; MIPS32-AE: ldi.h [[R1:\$w[0-9]+]], 1
+
+  store volatile <4 x i32> <i32 1, i32 2, i32 1, i32 2>, <4 x i32>*@v4i32
+  ; MIPS32-AE: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <4 x i32> <i32 3, i32 4, i32 5, i32 6>, <4 x i32>*@v4i32
+  ; MIPS32-AE: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  ret void
+  ; MIPS32-AE: .size const_v4i32
+}
+
+define void @const_v2i64() nounwind {
+  ; MIPS32-AE: const_v2i64:
+
+  store volatile <2 x i64> <i64 0, i64 0>, <2 x i64>*@v2i64
+  ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 0
+
+  store volatile <2 x i64> <i64 72340172838076673, i64 72340172838076673>, <2 x i64>*@v2i64
+  ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 1
+
+  store volatile <2 x i64> <i64 281479271743489, i64 281479271743489>, <2 x i64>*@v2i64
+  ; MIPS32-AE: ldi.h [[R1:\$w[0-9]+]], 1
+
+  store volatile <2 x i64> <i64 4294967297, i64 4294967297>, <2 x i64>*@v2i64
+  ; MIPS32-AE: ldi.w [[R1:\$w[0-9]+]], 1
+
+  store volatile <2 x i64> <i64 1, i64 1>, <2 x i64>*@v2i64
+  ; MIPS32-AE: ldi.d [[R1:\$w[0-9]+]], 1
+
+  store volatile <2 x i64> <i64 1, i64 31>, <2 x i64>*@v2i64
+  ; MIPS32-AE: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <2 x i64> <i64 3, i64 4>, <2 x i64>*@v2i64
+  ; MIPS32-AE: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  ret void
+  ; MIPS32-AE: .size const_v2i64
+}
+
+define void @nonconst_v16i8(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g, i8 %h) nounwind {
+  ; MIPS32-AE: nonconst_v16i8:
+
+  %1 = insertelement <16 x i8> undef, i8 %a, i32 0
+  %2 = insertelement <16 x i8> %1, i8 %b, i32 1
+  %3 = insertelement <16 x i8> %2, i8 %c, i32 2
+  %4 = insertelement <16 x i8> %3, i8 %d, i32 3
+  %5 = insertelement <16 x i8> %4, i8 %e, i32 4
+  %6 = insertelement <16 x i8> %5, i8 %f, i32 5
+  %7 = insertelement <16 x i8> %6, i8 %g, i32 6
+  %8 = insertelement <16 x i8> %7, i8 %h, i32 7
+  %9 = insertelement <16 x i8> %8, i8 %h, i32 8
+  %10 = insertelement <16 x i8> %9, i8 %h, i32 9
+  %11 = insertelement <16 x i8> %10, i8 %h, i32 10
+  %12 = insertelement <16 x i8> %11, i8 %h, i32 11
+  %13 = insertelement <16 x i8> %12, i8 %h, i32 12
+  %14 = insertelement <16 x i8> %13, i8 %h, i32 13
+  %15 = insertelement <16 x i8> %14, i8 %h, i32 14
+  %16 = insertelement <16 x i8> %15, i8 %h, i32 15
+  ; MIPS32-AE-DAG: insert.b [[R1:\$w[0-9]+]][0], $4
+  ; MIPS32-AE-DAG: insert.b [[R1]][1], $5
+  ; MIPS32-AE-DAG: insert.b [[R1]][2], $6
+  ; MIPS32-AE-DAG: insert.b [[R1]][3], $7
+  ; MIPS32-BE-DAG: lbu [[R2:\$[0-9]+]], 19($sp)
+  ; MIPS32-LE-DAG: lbu [[R2:\$[0-9]+]], 16($sp)
+  ; MIPS32-AE-DAG: insert.b [[R1]][4], [[R2]]
+  ; MIPS32-BE-DAG: lbu [[R3:\$[0-9]+]], 23($sp)
+  ; MIPS32-LE-DAG: lbu [[R3:\$[0-9]+]], 20($sp)
+  ; MIPS32-AE-DAG: insert.b [[R1]][5], [[R3]]
+  ; MIPS32-BE-DAG: lbu [[R4:\$[0-9]+]], 27($sp)
+  ; MIPS32-LE-DAG: lbu [[R4:\$[0-9]+]], 24($sp)
+  ; MIPS32-AE-DAG: insert.b [[R1]][6], [[R4]]
+  ; MIPS32-BE-DAG: lbu [[R5:\$[0-9]+]], 31($sp)
+  ; MIPS32-LE-DAG: lbu [[R5:\$[0-9]+]], 28($sp)
+  ; MIPS32-AE-DAG: insert.b [[R1]][7], [[R5]]
+  ; MIPS32-AE-DAG: insert.b [[R1]][8], [[R5]]
+  ; MIPS32-AE-DAG: insert.b [[R1]][9], [[R5]]
+  ; MIPS32-AE-DAG: insert.b [[R1]][10], [[R5]]
+  ; MIPS32-AE-DAG: insert.b [[R1]][11], [[R5]]
+  ; MIPS32-AE-DAG: insert.b [[R1]][12], [[R5]]
+  ; MIPS32-AE-DAG: insert.b [[R1]][13], [[R5]]
+  ; MIPS32-AE-DAG: insert.b [[R1]][14], [[R5]]
+  ; MIPS32-AE-DAG: insert.b [[R1]][15], [[R5]]
+
+  store volatile <16 x i8> %16, <16 x i8>*@v16i8
+
+  ret void
+  ; MIPS32-AE: .size nonconst_v16i8
+}
+
+define void @nonconst_v8i16(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16 %f, i16 %g, i16 %h) nounwind {
+  ; MIPS32-AE: nonconst_v8i16:
+
+  %1 = insertelement <8 x i16> undef, i16 %a, i32 0
+  %2 = insertelement <8 x i16> %1, i16 %b, i32 1
+  %3 = insertelement <8 x i16> %2, i16 %c, i32 2
+  %4 = insertelement <8 x i16> %3, i16 %d, i32 3
+  %5 = insertelement <8 x i16> %4, i16 %e, i32 4
+  %6 = insertelement <8 x i16> %5, i16 %f, i32 5
+  %7 = insertelement <8 x i16> %6, i16 %g, i32 6
+  %8 = insertelement <8 x i16> %7, i16 %h, i32 7
+  ; MIPS32-AE-DAG: insert.h [[R1:\$w[0-9]+]][0], $4
+  ; MIPS32-AE-DAG: insert.h [[R1]][1], $5
+  ; MIPS32-AE-DAG: insert.h [[R1]][2], $6
+  ; MIPS32-AE-DAG: insert.h [[R1]][3], $7
+  ; MIPS32-BE-DAG: lhu [[R2:\$[0-9]+]], 18($sp)
+  ; MIPS32-LE-DAG: lhu [[R2:\$[0-9]+]], 16($sp)
+  ; MIPS32-AE-DAG: insert.h [[R1]][4], [[R2]]
+  ; MIPS32-BE-DAG: lhu [[R2:\$[0-9]+]], 22($sp)
+  ; MIPS32-LE-DAG: lhu [[R2:\$[0-9]+]], 20($sp)
+  ; MIPS32-AE-DAG: insert.h [[R1]][5], [[R2]]
+  ; MIPS32-BE-DAG: lhu [[R2:\$[0-9]+]], 26($sp)
+  ; MIPS32-LE-DAG: lhu [[R2:\$[0-9]+]], 24($sp)
+  ; MIPS32-AE-DAG: insert.h [[R1]][6], [[R2]]
+  ; MIPS32-BE-DAG: lhu [[R2:\$[0-9]+]], 30($sp)
+  ; MIPS32-LE-DAG: lhu [[R2:\$[0-9]+]], 28($sp)
+  ; MIPS32-AE-DAG: insert.h [[R1]][7], [[R2]]
+
+  store volatile <8 x i16> %8, <8 x i16>*@v8i16
+
+  ret void
+  ; MIPS32-AE: .size nonconst_v8i16
+}
+
+define void @nonconst_v4i32(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+  ; MIPS32-AE: nonconst_v4i32:
+
+  %1 = insertelement <4 x i32> undef, i32 %a, i32 0
+  %2 = insertelement <4 x i32> %1, i32 %b, i32 1
+  %3 = insertelement <4 x i32> %2, i32 %c, i32 2
+  %4 = insertelement <4 x i32> %3, i32 %d, i32 3
+  ; MIPS32-AE: insert.w [[R1:\$w[0-9]+]][0], $4
+  ; MIPS32-AE: insert.w [[R1]][1], $5
+  ; MIPS32-AE: insert.w [[R1]][2], $6
+  ; MIPS32-AE: insert.w [[R1]][3], $7
+
+  store volatile <4 x i32> %4, <4 x i32>*@v4i32
+
+  ret void
+  ; MIPS32-AE: .size nonconst_v4i32
+}
+
+define void @nonconst_v2i64(i64 %a, i64 %b) nounwind {
+  ; MIPS32-AE: nonconst_v2i64:
+
+  %1 = insertelement <2 x i64> undef, i64 %a, i32 0
+  %2 = insertelement <2 x i64> %1, i64 %b, i32 1
+  ; MIPS32-AE: insert.w [[R1:\$w[0-9]+]][0], $4
+  ; MIPS32-AE: insert.w [[R1]][1], $5
+  ; MIPS32-AE: insert.w [[R1]][2], $6
+  ; MIPS32-AE: insert.w [[R1]][3], $7
+
+  store volatile <2 x i64> %2, <2 x i64>*@v2i64
+
+  ret void
+  ; MIPS32-AE: .size nonconst_v2i64
+}
+
+define i32 @extract_sext_v16i8() nounwind {
+  ; MIPS32-AE: extract_sext_v16i8:
+
+  %1 = load <16 x i8>* @v16i8
+  ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]],
+
+  %2 = add <16 x i8> %1, %1
+  ; MIPS32-AE-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = extractelement <16 x i8> %2, i32 1
+  %4 = sext i8 %3 to i32
+  ; MIPS32-AE-DAG: copy_s.b [[R3:\$[0-9]+]], [[R1]][1]
+  ; MIPS32-AE-NOT: sll
+  ; MIPS32-AE-NOT: sra
+
+  ret i32 %4
+  ; MIPS32-AE: .size extract_sext_v16i8
+}
+
+define i32 @extract_sext_v8i16() nounwind {
+  ; MIPS32-AE: extract_sext_v8i16:
+
+  %1 = load <8 x i16>* @v8i16
+  ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]],
+
+  %2 = add <8 x i16> %1, %1
+  ; MIPS32-AE-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = extractelement <8 x i16> %2, i32 1
+  %4 = sext i16 %3 to i32
+  ; MIPS32-AE-DAG: copy_s.h [[R3:\$[0-9]+]], [[R1]][1]
+  ; MIPS32-AE-NOT: sll
+  ; MIPS32-AE-NOT: sra
+
+  ret i32 %4
+  ; MIPS32-AE: .size extract_sext_v8i16
+}
+
+define i32 @extract_sext_v4i32() nounwind {
+  ; MIPS32-AE: extract_sext_v4i32:
+
+  %1 = load <4 x i32>* @v4i32
+  ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
+
+  %2 = add <4 x i32> %1, %1
+  ; MIPS32-AE-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = extractelement <4 x i32> %2, i32 1
+  ; MIPS32-AE-DAG: copy_s.w [[R3:\$[0-9]+]], [[R1]][1]
+
+  ret i32 %3
+  ; MIPS32-AE: .size extract_sext_v4i32
+}
+
+define i64 @extract_sext_v2i64() nounwind {
+  ; MIPS32-AE: extract_sext_v2i64:
+
+  %1 = load <2 x i64>* @v2i64
+  ; MIPS32-AE-DAG: ld.d [[R1:\$w[0-9]+]],
+
+  %2 = add <2 x i64> %1, %1
+  ; MIPS32-AE-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = extractelement <2 x i64> %2, i32 1
+  ; MIPS32-AE-DAG: copy_s.w [[R3:\$[0-9]+]], [[R1]][2]
+  ; MIPS32-AE-DAG: copy_s.w [[R4:\$[0-9]+]], [[R1]][3]
+  ; MIPS32-AE-NOT: sll
+  ; MIPS32-AE-NOT: sra
+
+  ret i64 %3
+  ; MIPS32-AE: .size extract_sext_v2i64
+}
+
+define i32 @extract_zext_v16i8() nounwind {
+  ; MIPS32-AE: extract_zext_v16i8:
+
+  %1 = load <16 x i8>* @v16i8
+  ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]],
+
+  %2 = add <16 x i8> %1, %1
+  ; MIPS32-AE-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = extractelement <16 x i8> %2, i32 1
+  %4 = zext i8 %3 to i32
+  ; MIPS32-AE-DAG: copy_u.b [[R3:\$[0-9]+]], [[R1]][1]
+  ; MIPS32-AE-NOT: andi
+
+  ret i32 %4
+  ; MIPS32-AE: .size extract_zext_v16i8
+}
+
+define i32 @extract_zext_v8i16() nounwind {
+  ; MIPS32-AE: extract_zext_v8i16:
+
+  %1 = load <8 x i16>* @v8i16
+  ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]],
+
+  %2 = add <8 x i16> %1, %1
+  ; MIPS32-AE-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = extractelement <8 x i16> %2, i32 1
+  %4 = zext i16 %3 to i32
+  ; MIPS32-AE-DAG: copy_u.h [[R3:\$[0-9]+]], [[R1]][1]
+  ; MIPS32-AE-NOT: andi
+
+  ret i32 %4
+  ; MIPS32-AE: .size extract_zext_v8i16
+}
+
+define i32 @extract_zext_v4i32() nounwind {
+  ; MIPS32-AE: extract_zext_v4i32:
+
+  %1 = load <4 x i32>* @v4i32
+  ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
+
+  %2 = add <4 x i32> %1, %1
+  ; MIPS32-AE-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = extractelement <4 x i32> %2, i32 1
+  ; MIPS32-AE-DAG: copy_{{[su]}}.w [[R3:\$[0-9]+]], [[R1]][1]
+
+  ret i32 %3
+  ; MIPS32-AE: .size extract_zext_v4i32
+}
+
+define i64 @extract_zext_v2i64() nounwind {
+  ; MIPS32-AE: extract_zext_v2i64:
+
+  %1 = load <2 x i64>* @v2i64
+  ; MIPS32-AE-DAG: ld.d [[R1:\$w[0-9]+]],
+
+  %2 = add <2 x i64> %1, %1
+  ; MIPS32-AE-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = extractelement <2 x i64> %2, i32 1
+  ; MIPS32-AE-DAG: copy_{{[su]}}.w [[R3:\$[0-9]+]], [[R1]][2]
+  ; MIPS32-AE-DAG: copy_{{[su]}}.w [[R4:\$[0-9]+]], [[R1]][3]
+  ; MIPS32-AE-NOT: andi
+
+  ret i64 %3
+  ; MIPS32-AE: .size extract_zext_v2i64
+}
+
+define void @insert_v16i8(i32 %a) nounwind {
+  ; MIPS32-AE: insert_v16i8:
+
+  %1 = load <16 x i8>* @v16i8
+  ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]],
+
+  %a2 = trunc i32 %a to i8
+  %a3 = sext i8 %a2 to i32
+  %a4 = trunc i32 %a3 to i8
+  ; MIPS32-AE-NOT: andi
+  ; MIPS32-AE-NOT: sra
+
+  %2 = insertelement <16 x i8> %1, i8 %a4, i32 1
+  ; MIPS32-AE-DAG: insert.b [[R1]][1], $4
+
+  store <16 x i8> %2, <16 x i8>* @v16i8
+  ; MIPS32-AE-DAG: st.b [[R1]]
+
+  ret void
+  ; MIPS32-AE: .size insert_v16i8
+}
+
+define void @insert_v8i16(i32 %a) nounwind {
+  ; MIPS32-AE: insert_v8i16:
+
+  %1 = load <8 x i16>* @v8i16
+  ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]],
+
+  %a2 = trunc i32 %a to i16
+  %a3 = sext i16 %a2 to i32
+  %a4 = trunc i32 %a3 to i16
+  ; MIPS32-AE-NOT: andi
+  ; MIPS32-AE-NOT: sra
+
+  %2 = insertelement <8 x i16> %1, i16 %a4, i32 1
+  ; MIPS32-AE-DAG: insert.h [[R1]][1], $4
+
+  store <8 x i16> %2, <8 x i16>* @v8i16
+  ; MIPS32-AE-DAG: st.h [[R1]]
+
+  ret void
+  ; MIPS32-AE: .size insert_v8i16
+}
+
+define void @insert_v4i32(i32 %a) nounwind {
+  ; MIPS32-AE: insert_v4i32:
+
+  %1 = load <4 x i32>* @v4i32
+  ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
+
+  ; MIPS32-AE-NOT: andi
+  ; MIPS32-AE-NOT: sra
+
+  %2 = insertelement <4 x i32> %1, i32 %a, i32 1
+  ; MIPS32-AE-DAG: insert.w [[R1]][1], $4
+
+  store <4 x i32> %2, <4 x i32>* @v4i32
+  ; MIPS32-AE-DAG: st.w [[R1]]
+
+  ret void
+  ; MIPS32-AE: .size insert_v4i32
+}
+
+define void @insert_v2i64(i64 %a) nounwind {
+  ; MIPS32-AE: insert_v2i64:
+
+  %1 = load <2 x i64>* @v2i64
+  ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
+
+  ; MIPS32-AE-NOT: andi
+  ; MIPS32-AE-NOT: sra
+
+  %2 = insertelement <2 x i64> %1, i64 %a, i32 1
+  ; MIPS32-AE-DAG: insert.w [[R1]][2], $4
+  ; MIPS32-AE-DAG: insert.w [[R1]][3], $5
+
+  store <2 x i64> %2, <2 x i64>* @v2i64
+  ; MIPS32-AE-DAG: st.w [[R1]]
+
+  ret void
+  ; MIPS32-AE: .size insert_v2i64
+}
+
+define void @truncstore() nounwind {
+  ; MIPS32-AE: truncstore:
+
+  store volatile <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>, <4 x i8>*@v4i8
+  ; TODO: What code should be emitted?
+
+  ret void
+  ; MIPS32-AE: .size truncstore
+}
diff --git a/test/CodeGen/Mips/msa/basic_operations_float.ll b/test/CodeGen/Mips/msa/basic_operations_float.ll
new file mode 100644
index 0000000..1f53810
--- /dev/null
+++ b/test/CodeGen/Mips/msa/basic_operations_float.ll
@@ -0,0 +1,207 @@
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=MIPS32 %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=MIPS32 %s
+
+@v4f32 = global <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>
+@v2f64 = global <2 x double> <double 0.0, double 0.0>
+@f32 = global float 0.0
+@f64 = global double 0.0
+
+define void @const_v4f32() nounwind {
+  ; MIPS32: const_v4f32:
+
+  store volatile <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, <4 x float>*@v4f32
+  ; MIPS32: ldi.b  [[R1:\$w[0-9]+]], 0
+
+  store volatile <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, <4 x float>*@v4f32
+  ; MIPS32: lui     [[R1:\$[0-9]+]], 16256
+  ; MIPS32: fill.w  [[R2:\$w[0-9]+]], [[R1]]
+
+  store volatile <4 x float> <float 1.0, float 1.0, float 1.0, float 31.0>, <4 x float>*@v4f32
+  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <4 x float> <float 65537.0, float 65537.0, float 65537.0, float 65537.0>, <4 x float>*@v4f32
+  ; MIPS32: lui     [[R1:\$[0-9]+]], 18304
+  ; MIPS32: ori     [[R2:\$[0-9]+]], [[R1]], 128
+  ; MIPS32: fill.w  [[R3:\$w[0-9]+]], [[R2]]
+
+  store volatile <4 x float> <float 1.0, float 2.0, float 1.0, float 2.0>, <4 x float>*@v4f32
+  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <4 x float> <float 3.0, float 4.0, float 5.0, float 6.0>, <4 x float>*@v4f32
+  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  ret void
+  ; MIPS32: .size const_v4f32
+}
+
+define void @const_v2f64() nounwind {
+  ; MIPS32: const_v2f64:
+
+  store volatile <2 x double> <double 0.0, double 0.0>, <2 x double>*@v2f64
+  ; MIPS32: ldi.b  [[R1:\$w[0-9]+]], 0
+
+  store volatile <2 x double> <double 72340172838076673.0, double 72340172838076673.0>, <2 x double>*@v2f64
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <2 x double> <double 281479271743489.0, double 281479271743489.0>, <2 x double>*@v2f64
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <2 x double> <double 4294967297.0, double 4294967297.0>, <2 x double>*@v2f64
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <2 x double> <double 1.0, double 1.0>, <2 x double>*@v2f64
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <2 x double> <double 1.0, double 31.0>, <2 x double>*@v2f64
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <2 x double> <double 3.0, double 4.0>, <2 x double>*@v2f64
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+
+  ret void
+  ; MIPS32: .size const_v2f64
+}
+
+define void @nonconst_v4f32() nounwind {
+  ; MIPS32: nonconst_v4f32:
+
+  %1 = load float *@f32
+  %2 = insertelement <4 x float> undef, float %1, i32 0
+  %3 = insertelement <4 x float> %2, float %1, i32 1
+  %4 = insertelement <4 x float> %3, float %1, i32 2
+  %5 = insertelement <4 x float> %4, float %1, i32 3
+  store volatile <4 x float> %5, <4 x float>*@v4f32
+  ; MIPS32: lwc1 $f[[R1:[0-9]+]], 0(
+  ; MIPS32: splati.w [[R2:\$w[0-9]+]], $w[[R1]]
+
+  ret void
+  ; MIPS32: .size nonconst_v4f32
+}
+
+define void @nonconst_v2f64() nounwind {
+  ; MIPS32: nonconst_v2f64:
+
+  %1 = load double *@f64
+  %2 = insertelement <2 x double> undef, double %1, i32 0
+  %3 = insertelement <2 x double> %2, double %1, i32 1
+  store volatile <2 x double> %3, <2 x double>*@v2f64
+  ; MIPS32: ldc1 $f[[R1:[0-9]+]], 0(
+  ; MIPS32: splati.d [[R2:\$w[0-9]+]], $w[[R1]]
+
+  ret void
+  ; MIPS32: .size nonconst_v2f64
+}
+
+define float @extract_v4f32() nounwind {
+  ; MIPS32: extract_v4f32:
+
+  %1 = load <4 x float>* @v4f32
+  ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
+
+  %2 = fadd <4 x float> %1, %1
+  ; MIPS32-DAG: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = extractelement <4 x float> %2, i32 1
+  ; Element 1 can be obtained by splatting it across the vector and extracting
+  ; $w0:sub_lo
+  ; MIPS32-DAG: splati.w $w0, [[R1]][1]
+
+  ret float %3
+  ; MIPS32: .size extract_v4f32
+}
+
+define float @extract_v4f32_elt0() nounwind {
+  ; MIPS32: extract_v4f32_elt0:
+
+  %1 = load <4 x float>* @v4f32
+  ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
+
+  %2 = fadd <4 x float> %1, %1
+  ; MIPS32-DAG: fadd.w $w0, [[R1]], [[R1]]
+
+  %3 = extractelement <4 x float> %2, i32 0
+  ; Element 0 can be obtained by extracting $w0:sub_lo ($f0)
+  ; MIPS32-NOT: copy_u.w
+  ; MIPS32-NOT: mtc1
+
+  ret float %3
+  ; MIPS32: .size extract_v4f32_elt0
+}
+
+define double @extract_v2f64() nounwind {
+  ; MIPS32: extract_v2f64:
+
+  %1 = load <2 x double>* @v2f64
+  ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]],
+
+  %2 = fadd <2 x double> %1, %1
+  ; MIPS32-DAG: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = extractelement <2 x double> %2, i32 1
+  ; Element 1 can be obtained by splatting it across the vector and extracting
+  ; $w0:sub_64
+  ; MIPS32-DAG: splati.d $w0, [[R1]][1]
+  ; MIPS32-NOT: copy_u.w
+  ; MIPS32-NOT: mtc1
+  ; MIPS32-NOT: mthc1
+  ; MIPS32-NOT: sll
+  ; MIPS32-NOT: sra
+
+  ret double %3
+  ; MIPS32: .size extract_v2f64
+}
+
+define double @extract_v2f64_elt0() nounwind {
+  ; MIPS32: extract_v2f64_elt0:
+
+  %1 = load <2 x double>* @v2f64
+  ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]],
+
+  %2 = fadd <2 x double> %1, %1
+  ; MIPS32-DAG: fadd.d $w0, [[R1]], [[R1]]
+
+  %3 = extractelement <2 x double> %2, i32 0
+  ; Element 0 can be obtained by extracting $w0:sub_64 ($f0)
+  ; MIPS32-NOT: copy_u.w
+  ; MIPS32-NOT: mtc1
+  ; MIPS32-NOT: mthc1
+  ; MIPS32-NOT: sll
+  ; MIPS32-NOT: sra
+
+  ret double %3
+  ; MIPS32: .size extract_v2f64_elt0
+}
+
+define void @insert_v4f32(float %a) nounwind {
+  ; MIPS32: insert_v4f32:
+
+  %1 = load <4 x float>* @v4f32
+  ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
+
+  %2 = insertelement <4 x float> %1, float %a, i32 1
+  ; float argument passed in $f12
+  ; MIPS32-DAG: insve.w [[R1]][1], $w12[0]
+
+  store <4 x float> %2, <4 x float>* @v4f32
+  ; MIPS32-DAG: st.w [[R1]]
+
+  ret void
+  ; MIPS32: .size insert_v4f32
+}
+
+define void @insert_v2f64(double %a) nounwind {
+  ; MIPS32: insert_v2f64:
+
+  %1 = load <2 x double>* @v2f64
+  ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]],
+
+  %2 = insertelement <2 x double> %1, double %a, i32 1
+  ; double argument passed in $f12
+  ; MIPS32-DAG: insve.d [[R1]][1], $w12[0]
+
+  store <2 x double> %2, <2 x double>* @v2f64
+  ; MIPS32-DAG: st.d [[R1]]
+
+  ret void
+  ; MIPS32: .size insert_v2f64
+}
diff --git a/test/CodeGen/Mips/msa/bit.ll b/test/CodeGen/Mips/msa/bit.ll
new file mode 100644
index 0000000..59ddbe1
--- /dev/null
+++ b/test/CodeGen/Mips/msa/bit.ll
@@ -0,0 +1,537 @@
+; Test the MSA intrinsics that are encoded with the BIT instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_sat_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_sat_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_sat_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_sat_s_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.sat.s.b(<16 x i8> %0, i32 7)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_sat_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.sat.s.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_sat_s_b_test:
+; CHECK: ld.b
+; CHECK: sat_s.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_sat_s_b_test
+;
+@llvm_mips_sat_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_sat_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_sat_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_sat_s_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.sat.s.h(<8 x i16> %0, i32 7)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_sat_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.sat.s.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_sat_s_h_test:
+; CHECK: ld.h
+; CHECK: sat_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_sat_s_h_test
+;
+@llvm_mips_sat_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_sat_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_sat_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_sat_s_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.sat.s.w(<4 x i32> %0, i32 7)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_sat_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.sat.s.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_sat_s_w_test:
+; CHECK: ld.w
+; CHECK: sat_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_sat_s_w_test
+;
+@llvm_mips_sat_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_sat_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_sat_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_sat_s_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.sat.s.d(<2 x i64> %0, i32 7)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_sat_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.sat.s.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_sat_s_d_test:
+; CHECK: ld.d
+; CHECK: sat_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_sat_s_d_test
+;
+@llvm_mips_sat_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_sat_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_sat_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_sat_u_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.sat.u.b(<16 x i8> %0, i32 7)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_sat_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.sat.u.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_sat_u_b_test:
+; CHECK: ld.b
+; CHECK: sat_u.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_sat_u_b_test
+;
+@llvm_mips_sat_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_sat_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_sat_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_sat_u_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.sat.u.h(<8 x i16> %0, i32 7)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_sat_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.sat.u.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_sat_u_h_test:
+; CHECK: ld.h
+; CHECK: sat_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_sat_u_h_test
+;
+@llvm_mips_sat_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_sat_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_sat_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_sat_u_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.sat.u.w(<4 x i32> %0, i32 7)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_sat_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.sat.u.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_sat_u_w_test:
+; CHECK: ld.w
+; CHECK: sat_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_sat_u_w_test
+;
+@llvm_mips_sat_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_sat_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_sat_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_sat_u_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.sat.u.d(<2 x i64> %0, i32 7)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_sat_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.sat.u.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_sat_u_d_test:
+; CHECK: ld.d
+; CHECK: sat_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_sat_u_d_test
+;
+@llvm_mips_slli_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_slli_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_slli_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_slli_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.slli.b(<16 x i8> %0, i32 7)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_slli_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.slli.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_slli_b_test:
+; CHECK: ld.b
+; CHECK: slli.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_slli_b_test
+;
+@llvm_mips_slli_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_slli_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_slli_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_slli_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.slli.h(<8 x i16> %0, i32 7)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_slli_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.slli.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_slli_h_test:
+; CHECK: ld.h
+; CHECK: slli.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_slli_h_test
+;
+@llvm_mips_slli_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_slli_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_slli_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_slli_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.slli.w(<4 x i32> %0, i32 7)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_slli_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.slli.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_slli_w_test:
+; CHECK: ld.w
+; CHECK: slli.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_slli_w_test
+;
+@llvm_mips_slli_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_slli_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_slli_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_slli_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %0, i32 7)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_slli_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.slli.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_slli_d_test:
+; CHECK: ld.d
+; CHECK: slli.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_slli_d_test
+;
+@llvm_mips_srai_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_srai_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_srai_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_srai_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.srai.b(<16 x i8> %0, i32 7)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_srai_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.srai.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_srai_b_test:
+; CHECK: ld.b
+; CHECK: srai.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_srai_b_test
+;
+@llvm_mips_srai_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_srai_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_srai_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_srai_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.srai.h(<8 x i16> %0, i32 7)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_srai_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.srai.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_srai_h_test:
+; CHECK: ld.h
+; CHECK: srai.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_srai_h_test
+;
+@llvm_mips_srai_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_srai_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_srai_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_srai_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.srai.w(<4 x i32> %0, i32 7)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_srai_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.srai.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_srai_w_test:
+; CHECK: ld.w
+; CHECK: srai.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_srai_w_test
+;
+@llvm_mips_srai_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_srai_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_srai_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_srai_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.srai.d(<2 x i64> %0, i32 7)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_srai_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.srai.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_srai_d_test:
+; CHECK: ld.d
+; CHECK: srai.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_srai_d_test
+;
+@llvm_mips_srari_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_srari_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_srari_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_srari_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.srari.b(<16 x i8> %0, i32 7)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_srari_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.srari.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_srari_b_test:
+; CHECK: ld.b
+; CHECK: srari.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_srari_b_test
+;
+@llvm_mips_srari_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_srari_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_srari_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_srari_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.srari.h(<8 x i16> %0, i32 7)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_srari_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.srari.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_srari_h_test:
+; CHECK: ld.h
+; CHECK: srari.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_srari_h_test
+;
+@llvm_mips_srari_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_srari_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_srari_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_srari_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.srari.w(<4 x i32> %0, i32 7)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_srari_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.srari.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_srari_w_test:
+; CHECK: ld.w
+; CHECK: srari.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_srari_w_test
+;
+@llvm_mips_srari_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_srari_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_srari_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_srari_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.srari.d(<2 x i64> %0, i32 7)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_srari_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.srari.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_srari_d_test:
+; CHECK: ld.d
+; CHECK: srari.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_srari_d_test
+;
+@llvm_mips_srli_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_srli_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_srli_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_srli_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.srli.b(<16 x i8> %0, i32 7)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_srli_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.srli.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_srli_b_test:
+; CHECK: ld.b
+; CHECK: srli.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_srli_b_test
+;
+@llvm_mips_srli_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_srli_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_srli_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_srli_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.srli.h(<8 x i16> %0, i32 7)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_srli_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.srli.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_srli_h_test:
+; CHECK: ld.h
+; CHECK: srli.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_srli_h_test
+;
+@llvm_mips_srli_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_srli_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_srli_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_srli_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.srli.w(<4 x i32> %0, i32 7)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_srli_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.srli.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_srli_w_test:
+; CHECK: ld.w
+; CHECK: srli.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_srli_w_test
+;
+@llvm_mips_srli_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_srli_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_srli_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_srli_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 7)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_srli_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.srli.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_srli_d_test:
+; CHECK: ld.d
+; CHECK: srli.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_srli_d_test
+;
+@llvm_mips_srlri_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_srlri_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_srlri_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_srlri_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.srlri.b(<16 x i8> %0, i32 7)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_srlri_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.srlri.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_srlri_b_test:
+; CHECK: ld.b
+; CHECK: srlri.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_srlri_b_test
+;
+@llvm_mips_srlri_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_srlri_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_srlri_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_srlri_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.srlri.h(<8 x i16> %0, i32 7)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_srlri_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.srlri.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_srlri_h_test:
+; CHECK: ld.h
+; CHECK: srlri.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_srlri_h_test
+;
+@llvm_mips_srlri_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_srlri_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_srlri_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_srlri_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.srlri.w(<4 x i32> %0, i32 7)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_srlri_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.srlri.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_srlri_w_test:
+; CHECK: ld.w
+; CHECK: srlri.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_srlri_w_test
+;
+@llvm_mips_srlri_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_srlri_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_srlri_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_srlri_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.srlri.d(<2 x i64> %0, i32 7)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_srlri_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.srlri.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_srlri_d_test:
+; CHECK: ld.d
+; CHECK: srlri.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_srlri_d_test
+;
diff --git a/test/CodeGen/Mips/msa/bitcast.ll b/test/CodeGen/Mips/msa/bitcast.ll
new file mode 100644
index 0000000..8e880ec
--- /dev/null
+++ b/test/CodeGen/Mips/msa/bitcast.ll
@@ -0,0 +1,1210 @@
+; Test the bitcast operation for big-endian and little-endian.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=BIGENDIAN %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=LITENDIAN %s
+
+define void @v16i8_to_v16i8(<16 x i8>* %src, <16 x i8>* %dst) nounwind {
+entry:
+  %0 = load volatile <16 x i8>* %src
+  %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
+  %2 = bitcast <16 x i8> %1 to <16 x i8>
+  %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
+  store <16 x i8> %3, <16 x i8>* %dst
+  ret void
+}
+
+; LITENDIAN: v16i8_to_v16i8:
+; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.b [[R3]],
+; LITENDIAN: .size v16i8_to_v16i8
+
+; BIGENDIAN: v16i8_to_v16i8:
+; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.b [[R3]],
+; BIGENDIAN: .size v16i8_to_v16i8
+
+define void @v16i8_to_v8i16(<16 x i8>* %src, <8 x i16>* %dst) nounwind {
+entry:
+  %0 = load volatile <16 x i8>* %src
+  %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
+  %2 = bitcast <16 x i8> %1 to <8 x i16>
+  %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* %dst
+  ret void
+}
+
+; LITENDIAN: v16i8_to_v8i16:
+; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.h [[R3]],
+; LITENDIAN: .size v16i8_to_v8i16
+
+; BIGENDIAN: v16i8_to_v8i16:
+; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.h [[R4]],
+; BIGENDIAN: .size v16i8_to_v8i16
+
+; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v16i8_to_v8f16(<16 x i8>* %src, <8 x half>* %dst) nounwind {
+entry:
+  %0 = load volatile <16 x i8>* %src
+  %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
+  %2 = bitcast <16 x i8> %1 to <8 x half>
+  store <8 x half> %2, <8 x half>* %dst
+  ret void
+}
+
+; LITENDIAN: v16i8_to_v8f16:
+; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.b [[R2]],
+; LITENDIAN: .size v16i8_to_v8f16
+
+; BIGENDIAN: v16i8_to_v8f16:
+; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: st.b [[R2]],
+; BIGENDIAN: .size v16i8_to_v8f16
+
+define void @v16i8_to_v4i32(<16 x i8>* %src, <4 x i32>* %dst) nounwind {
+entry:
+  %0 = load volatile <16 x i8>* %src
+  %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
+  %2 = bitcast <16 x i8> %1 to <4 x i32>
+  %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* %dst
+  ret void
+}
+
+; LITENDIAN: v16i8_to_v4i32:
+; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v16i8_to_v4i32
+
+; BIGENDIAN: v16i8_to_v4i32:
+; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.w [[R4]],
+; BIGENDIAN: .size v16i8_to_v4i32
+
+define void @v16i8_to_v4f32(<16 x i8>* %src, <4 x float>* %dst) nounwind {
+entry:
+  %0 = load volatile <16 x i8>* %src
+  %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
+  %2 = bitcast <16 x i8> %1 to <4 x float>
+  %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
+  store <4 x float> %3, <4 x float>* %dst
+  ret void
+}
+
+; LITENDIAN: v16i8_to_v4f32:
+; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v16i8_to_v4f32
+
+; BIGENDIAN: v16i8_to_v4f32:
+; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.w [[R4]],
+; BIGENDIAN: .size v16i8_to_v4f32
+
+define void @v16i8_to_v2i64(<16 x i8>* %src, <2 x i64>* %dst) nounwind {
+entry:
+  %0 = load volatile <16 x i8>* %src
+  %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
+  %2 = bitcast <16 x i8> %1 to <2 x i64>
+  %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
+  store <2 x i64> %3, <2 x i64>* %dst
+  ret void
+}
+
+; LITENDIAN: v16i8_to_v2i64:
+; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v16i8_to_v2i64
+
+; BIGENDIAN: v16i8_to_v2i64:
+; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
+; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.d [[R4]],
+; BIGENDIAN: .size v16i8_to_v2i64
+
+define void @v16i8_to_v2f64(<16 x i8>* %src, <2 x double>* %dst) nounwind {
+entry:
+  %0 = load volatile <16 x i8>* %src
+  %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
+  %2 = bitcast <16 x i8> %1 to <2 x double>
+  %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
+  store <2 x double> %3, <2 x double>* %dst
+  ret void
+}
+
+; LITENDIAN: v16i8_to_v2f64:
+; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v16i8_to_v2f64
+
+; BIGENDIAN: v16i8_to_v2f64:
+; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
+; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.d [[R4]],
+; BIGENDIAN: .size v16i8_to_v2f64
+
+define void @v8i16_to_v16i8(<8 x i16>* %src, <16 x i8>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x i16>* %src
+  %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
+  %2 = bitcast <8 x i16> %1 to <16 x i8>
+  %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
+  store <16 x i8> %3, <16 x i8>* %dst
+  ret void
+}
+
+; LITENDIAN: v8i16_to_v16i8:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.b [[R3]],
+; LITENDIAN: .size v8i16_to_v16i8
+
+; BIGENDIAN: v8i16_to_v16i8:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.b [[R4]],
+; BIGENDIAN: .size v8i16_to_v16i8
+
+define void @v8i16_to_v8i16(<8 x i16>* %src, <8 x i16>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x i16>* %src
+  %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
+  %2 = bitcast <8 x i16> %1 to <8 x i16>
+  %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* %dst
+  ret void
+}
+
+; LITENDIAN: v8i16_to_v8i16:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.h [[R3]],
+; LITENDIAN: .size v8i16_to_v8i16
+
+; BIGENDIAN: v8i16_to_v8i16:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.h [[R3]],
+; BIGENDIAN: .size v8i16_to_v8i16
+
+; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v8i16_to_v8f16(<8 x i16>* %src, <8 x half>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x i16>* %src
+  %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
+  %2 = bitcast <8 x i16> %1 to <8 x half>
+  store <8 x half> %2, <8 x half>* %dst
+  ret void
+}
+
+; LITENDIAN: v8i16_to_v8f16:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.h [[R2]],
+; LITENDIAN: .size v8i16_to_v8f16
+
+; BIGENDIAN: v8i16_to_v8f16:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: st.h [[R2]],
+; BIGENDIAN: .size v8i16_to_v8f16
+
+define void @v8i16_to_v4i32(<8 x i16>* %src, <4 x i32>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x i16>* %src
+  %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
+  %2 = bitcast <8 x i16> %1 to <4 x i32>
+  %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* %dst
+  ret void
+}
+
+; LITENDIAN: v8i16_to_v4i32:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v8i16_to_v4i32
+
+; BIGENDIAN: v8i16_to_v4i32:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.w [[R4]],
+; BIGENDIAN: .size v8i16_to_v4i32
+
+define void @v8i16_to_v4f32(<8 x i16>* %src, <4 x float>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x i16>* %src
+  %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
+  %2 = bitcast <8 x i16> %1 to <4 x float>
+  %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
+  store <4 x float> %3, <4 x float>* %dst
+  ret void
+}
+
+; LITENDIAN: v8i16_to_v4f32:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v8i16_to_v4f32
+
+; BIGENDIAN: v8i16_to_v4f32:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.w [[R4]],
+; BIGENDIAN: .size v8i16_to_v4f32
+
+define void @v8i16_to_v2i64(<8 x i16>* %src, <2 x i64>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x i16>* %src
+  %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
+  %2 = bitcast <8 x i16> %1 to <2 x i64>
+  %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
+  store <2 x i64> %3, <2 x i64>* %dst
+  ret void
+}
+
+; LITENDIAN: v8i16_to_v2i64:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v8i16_to_v2i64
+
+; BIGENDIAN: v8i16_to_v2i64:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.d [[R4]],
+; BIGENDIAN: .size v8i16_to_v2i64
+
+define void @v8i16_to_v2f64(<8 x i16>* %src, <2 x double>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x i16>* %src
+  %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
+  %2 = bitcast <8 x i16> %1 to <2 x double>
+  %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
+  store <2 x double> %3, <2 x double>* %dst
+  ret void
+}
+
+; LITENDIAN: v8i16_to_v2f64:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v8i16_to_v2f64
+
+; BIGENDIAN: v8i16_to_v2f64:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.d [[R4]],
+; BIGENDIAN: .size v8i16_to_v2f64
+
+;----
+; We can't prevent the (bitcast (load X)) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v8f16_to_v16i8(<8 x half>* %src, <16 x i8>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x half>* %src
+  %1 = bitcast <8 x half> %0 to <16 x i8>
+  %2 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %1, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* %dst
+  ret void
+}
+
+; LITENDIAN: v8f16_to_v16i8:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.b [[R3]],
+; LITENDIAN: .size v8f16_to_v16i8
+
+; BIGENDIAN: v8f16_to_v16i8:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R1]], 177
+; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.b [[R4]],
+; BIGENDIAN: .size v8f16_to_v16i8
+
+; We can't prevent the (bitcast (load X)) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v8f16_to_v8i16(<8 x half>* %src, <8 x i16>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x half>* %src
+  %1 = bitcast <8 x half> %0 to <8 x i16>
+  %2 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %1, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* %dst
+  ret void
+}
+
+; LITENDIAN: v8f16_to_v8i16:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.h [[R2]],
+; LITENDIAN: .size v8f16_to_v8i16
+
+; BIGENDIAN: v8f16_to_v8i16:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: st.h [[R2]],
+; BIGENDIAN: .size v8f16_to_v8i16
+
+; We can't prevent the (bitcast (load X)) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v8f16_to_v8f16(<8 x half>* %src, <8 x half>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x half>* %src
+  %1 = bitcast <8 x half> %0 to <8 x half>
+  store <8 x half> %1, <8 x half>* %dst
+  ret void
+}
+
+; LITENDIAN: v8f16_to_v8f16:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: st.h [[R1]],
+; LITENDIAN: .size v8f16_to_v8f16
+
+; BIGENDIAN: v8f16_to_v8f16:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: st.h [[R1]],
+; BIGENDIAN: .size v8f16_to_v8f16
+
+; We can't prevent the (bitcast (load X)) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v8f16_to_v4i32(<8 x half>* %src, <4 x i32>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x half>* %src
+  %1 = bitcast <8 x half> %0 to <4 x i32>
+  %2 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %1, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* %dst
+  ret void
+}
+
+; LITENDIAN: v8f16_to_v4i32:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.w [[R2]],
+; LITENDIAN: .size v8f16_to_v4i32
+
+; BIGENDIAN: v8f16_to_v4i32:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177
+; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.w [[R3]],
+; BIGENDIAN: .size v8f16_to_v4i32
+
+; We can't prevent the (bitcast (load X)) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v8f16_to_v4f32(<8 x half>* %src, <4 x float>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x half>* %src
+  %1 = bitcast <8 x half> %0 to <4 x float>
+  %2 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %1, <4 x float> %1)
+  store <4 x float> %2, <4 x float>* %dst
+  ret void
+}
+
+; LITENDIAN: v8f16_to_v4f32:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.w [[R2]],
+; LITENDIAN: .size v8f16_to_v4f32
+
+; BIGENDIAN: v8f16_to_v4f32:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177
+; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.w [[R3]],
+; BIGENDIAN: .size v8f16_to_v4f32
+
+; We can't prevent the (bitcast (load X)) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v8f16_to_v2i64(<8 x half>* %src, <2 x i64>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x half>* %src
+  %1 = bitcast <8 x half> %0 to <2 x i64>
+  %2 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %1, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* %dst
+  ret void
+}
+
+; LITENDIAN: v8f16_to_v2i64:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.d [[R2]],
+; LITENDIAN: .size v8f16_to_v2i64
+
+; BIGENDIAN: v8f16_to_v2i64:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27
+; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.d [[R3]],
+; BIGENDIAN: .size v8f16_to_v2i64
+
+; We can't prevent the (bitcast (load X)) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v8f16_to_v2f64(<8 x half>* %src, <2 x double>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x half>* %src
+  %1 = bitcast <8 x half> %0 to <2 x double>
+  %2 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %1, <2 x double> %1)
+  store <2 x double> %2, <2 x double>* %dst
+  ret void
+}
+
+; LITENDIAN: v8f16_to_v2f64:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.d [[R2]],
+; LITENDIAN: .size v8f16_to_v2f64
+
+; BIGENDIAN: v8f16_to_v2f64:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27
+; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.d [[R3]],
+; BIGENDIAN: .size v8f16_to_v2f64
+;----
+
+define void @v4i32_to_v16i8(<4 x i32>* %src, <16 x i8>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x i32>* %src
+  %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
+  %2 = bitcast <4 x i32> %1 to <16 x i8>
+  %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
+  store <16 x i8> %3, <16 x i8>* %dst
+  ret void
+}
+
+; LITENDIAN: v4i32_to_v16i8:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.b [[R3]],
+; LITENDIAN: .size v4i32_to_v16i8
+
+; BIGENDIAN: v4i32_to_v16i8:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.b [[R4]],
+; BIGENDIAN: .size v4i32_to_v16i8
+
+define void @v4i32_to_v8i16(<4 x i32>* %src, <8 x i16>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x i32>* %src
+  %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
+  %2 = bitcast <4 x i32> %1 to <8 x i16>
+  %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* %dst
+  ret void
+}
+
+; LITENDIAN: v4i32_to_v8i16:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.h [[R3]],
+; LITENDIAN: .size v4i32_to_v8i16
+
+; BIGENDIAN: v4i32_to_v8i16:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.h [[R4]],
+; BIGENDIAN: .size v4i32_to_v8i16
+
+; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v4i32_to_v8f16(<4 x i32>* %src, <8 x half>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x i32>* %src
+  %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
+  %2 = bitcast <4 x i32> %1 to <8 x half>
+  store <8 x half> %2, <8 x half>* %dst
+  ret void
+}
+
+; LITENDIAN: v4i32_to_v8f16:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.w [[R2]],
+; LITENDIAN: .size v4i32_to_v8f16
+
+; BIGENDIAN: v4i32_to_v8f16:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: st.w [[R2]],
+; BIGENDIAN: .size v4i32_to_v8f16
+
+define void @v4i32_to_v4i32(<4 x i32>* %src, <4 x i32>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x i32>* %src
+  %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
+  %2 = bitcast <4 x i32> %1 to <4 x i32>
+  %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* %dst
+  ret void
+}
+
+; LITENDIAN: v4i32_to_v4i32:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v4i32_to_v4i32
+
+; BIGENDIAN: v4i32_to_v4i32:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.w [[R3]],
+; BIGENDIAN: .size v4i32_to_v4i32
+
+define void @v4i32_to_v4f32(<4 x i32>* %src, <4 x float>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x i32>* %src
+  %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
+  %2 = bitcast <4 x i32> %1 to <4 x float>
+  %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
+  store <4 x float> %3, <4 x float>* %dst
+  ret void
+}
+
+; LITENDIAN: v4i32_to_v4f32:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v4i32_to_v4f32
+
+; BIGENDIAN: v4i32_to_v4f32:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.w [[R3]],
+; BIGENDIAN: .size v4i32_to_v4f32
+
+define void @v4i32_to_v2i64(<4 x i32>* %src, <2 x i64>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x i32>* %src
+  %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
+  %2 = bitcast <4 x i32> %1 to <2 x i64>
+  %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
+  store <2 x i64> %3, <2 x i64>* %dst
+  ret void
+}
+
+; LITENDIAN: v4i32_to_v2i64:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v4i32_to_v2i64
+
+; BIGENDIAN: v4i32_to_v2i64:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.d [[R4]],
+; BIGENDIAN: .size v4i32_to_v2i64
+
+define void @v4i32_to_v2f64(<4 x i32>* %src, <2 x double>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x i32>* %src
+  %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
+  %2 = bitcast <4 x i32> %1 to <2 x double>
+  %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
+  store <2 x double> %3, <2 x double>* %dst
+  ret void
+}
+
+; LITENDIAN: v4i32_to_v2f64:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v4i32_to_v2f64
+
+; BIGENDIAN: v4i32_to_v2f64:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.d [[R4]],
+; BIGENDIAN: .size v4i32_to_v2f64
+
+define void @v4f32_to_v16i8(<4 x float>* %src, <16 x i8>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x float>* %src
+  %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
+  %2 = bitcast <4 x float> %1 to <16 x i8>
+  %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
+  store <16 x i8> %3, <16 x i8>* %dst
+  ret void
+}
+
+; LITENDIAN: v4f32_to_v16i8:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.b [[R3]],
+; LITENDIAN: .size v4f32_to_v16i8
+
+; BIGENDIAN: v4f32_to_v16i8:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.b [[R4]],
+; BIGENDIAN: .size v4f32_to_v16i8
+
+define void @v4f32_to_v8i16(<4 x float>* %src, <8 x i16>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x float>* %src
+  %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
+  %2 = bitcast <4 x float> %1 to <8 x i16>
+  %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* %dst
+  ret void
+}
+
+; LITENDIAN: v4f32_to_v8i16:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.h [[R3]],
+; LITENDIAN: .size v4f32_to_v8i16
+
+; BIGENDIAN: v4f32_to_v8i16:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.h [[R4]],
+; BIGENDIAN: .size v4f32_to_v8i16
+
+; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v4f32_to_v8f16(<4 x float>* %src, <8 x half>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x float>* %src
+  %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
+  %2 = bitcast <4 x float> %1 to <8 x half>
+  store <8 x half> %2, <8 x half>* %dst
+  ret void
+}
+
+; LITENDIAN: v4f32_to_v8f16:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.w [[R2]],
+; LITENDIAN: .size v4f32_to_v8f16
+
+; BIGENDIAN: v4f32_to_v8f16:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: st.w [[R2]],
+; BIGENDIAN: .size v4f32_to_v8f16
+
+define void @v4f32_to_v4i32(<4 x float>* %src, <4 x i32>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x float>* %src
+  %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
+  %2 = bitcast <4 x float> %1 to <4 x i32>
+  %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* %dst
+  ret void
+}
+
+; LITENDIAN: v4f32_to_v4i32:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v4f32_to_v4i32
+
+; BIGENDIAN: v4f32_to_v4i32:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.w [[R3]],
+; BIGENDIAN: .size v4f32_to_v4i32
+
+define void @v4f32_to_v4f32(<4 x float>* %src, <4 x float>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x float>* %src
+  %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
+  %2 = bitcast <4 x float> %1 to <4 x float>
+  %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
+  store <4 x float> %3, <4 x float>* %dst
+  ret void
+}
+
+; LITENDIAN: v4f32_to_v4f32:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v4f32_to_v4f32
+
+; BIGENDIAN: v4f32_to_v4f32:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.w [[R3]],
+; BIGENDIAN: .size v4f32_to_v4f32
+
+define void @v4f32_to_v2i64(<4 x float>* %src, <2 x i64>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x float>* %src
+  %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
+  %2 = bitcast <4 x float> %1 to <2 x i64>
+  %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
+  store <2 x i64> %3, <2 x i64>* %dst
+  ret void
+}
+
+; LITENDIAN: v4f32_to_v2i64:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v4f32_to_v2i64
+
+; BIGENDIAN: v4f32_to_v2i64:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.d [[R4]],
+; BIGENDIAN: .size v4f32_to_v2i64
+
+define void @v4f32_to_v2f64(<4 x float>* %src, <2 x double>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x float>* %src
+  %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
+  %2 = bitcast <4 x float> %1 to <2 x double>
+  %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
+  store <2 x double> %3, <2 x double>* %dst
+  ret void
+}
+
+; LITENDIAN: v4f32_to_v2f64:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v4f32_to_v2f64
+
+; BIGENDIAN: v4f32_to_v2f64:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.d [[R4]],
+; BIGENDIAN: .size v4f32_to_v2f64
+
+define void @v2i64_to_v16i8(<2 x i64>* %src, <16 x i8>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x i64>* %src
+  %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
+  %2 = bitcast <2 x i64> %1 to <16 x i8>
+  %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
+  store <16 x i8> %3, <16 x i8>* %dst
+  ret void
+}
+
+; LITENDIAN: v2i64_to_v16i8:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.b [[R3]],
+; LITENDIAN: .size v2i64_to_v16i8
+
+; BIGENDIAN: v2i64_to_v16i8:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
+; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.b [[R4]],
+; BIGENDIAN: .size v2i64_to_v16i8
+
+define void @v2i64_to_v8i16(<2 x i64>* %src, <8 x i16>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x i64>* %src
+  %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
+  %2 = bitcast <2 x i64> %1 to <8 x i16>
+  %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* %dst
+  ret void
+}
+
+; LITENDIAN: v2i64_to_v8i16:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.h [[R3]],
+; LITENDIAN: .size v2i64_to_v8i16
+
+; BIGENDIAN: v2i64_to_v8i16:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.h [[R4]],
+; BIGENDIAN: .size v2i64_to_v8i16
+
+; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v2i64_to_v8f16(<2 x i64>* %src, <8 x half>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x i64>* %src
+  %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
+  %2 = bitcast <2 x i64> %1 to <8 x half>
+  store <8 x half> %2, <8 x half>* %dst
+  ret void
+}
+
+; LITENDIAN: v2i64_to_v8f16:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.d [[R2]],
+; LITENDIAN: .size v2i64_to_v8f16
+
+; BIGENDIAN: v2i64_to_v8f16:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: st.d [[R2]],
+; BIGENDIAN: .size v2i64_to_v8f16
+
+define void @v2i64_to_v4i32(<2 x i64>* %src, <4 x i32>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x i64>* %src
+  %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
+  %2 = bitcast <2 x i64> %1 to <4 x i32>
+  %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* %dst
+  ret void
+}
+
+; LITENDIAN: v2i64_to_v4i32:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v2i64_to_v4i32
+
+; BIGENDIAN: v2i64_to_v4i32:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.w [[R4]],
+; BIGENDIAN: .size v2i64_to_v4i32
+
+define void @v2i64_to_v4f32(<2 x i64>* %src, <4 x float>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x i64>* %src
+  %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
+  %2 = bitcast <2 x i64> %1 to <4 x float>
+  %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
+  store <4 x float> %3, <4 x float>* %dst
+  ret void
+}
+
+; LITENDIAN: v2i64_to_v4f32:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v2i64_to_v4f32
+
+; BIGENDIAN: v2i64_to_v4f32:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.w [[R4]],
+; BIGENDIAN: .size v2i64_to_v4f32
+
+define void @v2i64_to_v2i64(<2 x i64>* %src, <2 x i64>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x i64>* %src
+  %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
+  %2 = bitcast <2 x i64> %1 to <2 x i64>
+  %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
+  store <2 x i64> %3, <2 x i64>* %dst
+  ret void
+}
+
+; LITENDIAN: v2i64_to_v2i64:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v2i64_to_v2i64
+
+; BIGENDIAN: v2i64_to_v2i64:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.d [[R3]],
+; BIGENDIAN: .size v2i64_to_v2i64
+
+define void @v2i64_to_v2f64(<2 x i64>* %src, <2 x double>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x i64>* %src
+  %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
+  %2 = bitcast <2 x i64> %1 to <2 x double>
+  %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
+  store <2 x double> %3, <2 x double>* %dst
+  ret void
+}
+
+; LITENDIAN: v2i64_to_v2f64:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v2i64_to_v2f64
+
+; BIGENDIAN: v2i64_to_v2f64:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.d [[R3]],
+; BIGENDIAN: .size v2i64_to_v2f64
+
+define void @v2f64_to_v16i8(<2 x double>* %src, <16 x i8>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x double>* %src
+  %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
+  %2 = bitcast <2 x double> %1 to <16 x i8>
+  %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
+  store <16 x i8> %3, <16 x i8>* %dst
+  ret void
+}
+
+; LITENDIAN: v2f64_to_v16i8:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.b [[R3]],
+; LITENDIAN: .size v2f64_to_v16i8
+
+; BIGENDIAN: v2f64_to_v16i8:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
+; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.b [[R4]],
+; BIGENDIAN: .size v2f64_to_v16i8
+
+define void @v2f64_to_v8i16(<2 x double>* %src, <8 x i16>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x double>* %src
+  %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
+  %2 = bitcast <2 x double> %1 to <8 x i16>
+  %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* %dst
+  ret void
+}
+
+; LITENDIAN: v2f64_to_v8i16:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.h [[R3]],
+; LITENDIAN: .size v2f64_to_v8i16
+
+; BIGENDIAN: v2f64_to_v8i16:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.h [[R4]],
+; BIGENDIAN: .size v2f64_to_v8i16
+
+; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v2f64_to_v8f16(<2 x double>* %src, <8 x half>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x double>* %src
+  %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
+  %2 = bitcast <2 x double> %1 to <8 x half>
+  store <8 x half> %2, <8 x half>* %dst
+  ret void
+}
+
+; LITENDIAN: v2f64_to_v8f16:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.d [[R2]],
+; LITENDIAN: .size v2f64_to_v8f16
+
+; BIGENDIAN: v2f64_to_v8f16:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: st.d [[R2]],
+; BIGENDIAN: .size v2f64_to_v8f16
+
+define void @v2f64_to_v4i32(<2 x double>* %src, <4 x i32>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x double>* %src
+  %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
+  %2 = bitcast <2 x double> %1 to <4 x i32>
+  %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* %dst
+  ret void
+}
+
+; LITENDIAN: v2f64_to_v4i32:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v2f64_to_v4i32
+
+; BIGENDIAN: v2f64_to_v4i32:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.w [[R4]],
+; BIGENDIAN: .size v2f64_to_v4i32
+
+define void @v2f64_to_v4f32(<2 x double>* %src, <4 x float>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x double>* %src
+  %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
+  %2 = bitcast <2 x double> %1 to <4 x float>
+  %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
+  store <4 x float> %3, <4 x float>* %dst
+  ret void
+}
+
+; LITENDIAN: v2f64_to_v4f32:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v2f64_to_v4f32
+
+; BIGENDIAN: v2f64_to_v4f32:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.w [[R4]],
+; BIGENDIAN: .size v2f64_to_v4f32
+
+define void @v2f64_to_v2i64(<2 x double>* %src, <2 x i64>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x double>* %src
+  %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
+  %2 = bitcast <2 x double> %1 to <2 x i64>
+  %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
+  store <2 x i64> %3, <2 x i64>* %dst
+  ret void
+}
+
+; LITENDIAN: v2f64_to_v2i64:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v2f64_to_v2i64
+
+; BIGENDIAN: v2f64_to_v2i64:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.d [[R3]],
+; BIGENDIAN: .size v2f64_to_v2i64
+
+define void @v2f64_to_v2f64(<2 x double>* %src, <2 x double>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x double>* %src
+  %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
+  %2 = bitcast <2 x double> %1 to <2 x double>
+  %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
+  store <2 x double> %3, <2 x double>* %dst
+  ret void
+}
+
+; LITENDIAN: v2f64_to_v2f64:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v2f64_to_v2f64
+
+; BIGENDIAN: v2f64_to_v2f64:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.d [[R3]],
+; BIGENDIAN: .size v2f64_to_v2f64
+
+declare <16 x i8> @llvm.mips.addv.b(<16 x i8>, <16 x i8>) nounwind
+declare <8 x i16> @llvm.mips.addv.h(<8 x i16>, <8 x i16>) nounwind
+declare <4 x i32> @llvm.mips.addv.w(<4 x i32>, <4 x i32>) nounwind
+declare <2 x i64> @llvm.mips.addv.d(<2 x i64>, <2 x i64>) nounwind
+declare <4 x float> @llvm.mips.fadd.w(<4 x float>, <4 x float>) nounwind
+declare <2 x double> @llvm.mips.fadd.d(<2 x double>, <2 x double>) nounwind
diff --git a/test/CodeGen/Mips/msa/bitwise.ll b/test/CodeGen/Mips/msa/bitwise.ll
new file mode 100644
index 0000000..9a88c47
--- /dev/null
+++ b/test/CodeGen/Mips/msa/bitwise.ll
@@ -0,0 +1,1639 @@
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+define void @and_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: and_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <16 x i8> %1, %2
+  ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size and_v16i8
+}
+
+define void @and_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: and_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <8 x i16> %1, %2
+  ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size and_v8i16
+}
+
+define void @and_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: and_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <4 x i32> %1, %2
+  ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size and_v4i32
+}
+
+define void @and_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: and_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <2 x i64> %1, %2
+  ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size and_v2i64
+}
+
+define void @and_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: and_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = and <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: andi.b [[R4:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size and_v16i8_i
+}
+
+define void @and_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: and_v8i16_i:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = and <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size and_v8i16_i
+}
+
+define void @and_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: and_v4i32_i:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = and <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size and_v4i32_i
+}
+
+define void @and_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: and_v2i64_i:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = and <2 x i64> %1, <i64 1, i64 1>
+  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size and_v2i64_i
+}
+
+define void @or_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: or_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = or <16 x i8> %1, %2
+  ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size or_v16i8
+}
+
+define void @or_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: or_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = or <8 x i16> %1, %2
+  ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size or_v8i16
+}
+
+define void @or_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: or_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = or <4 x i32> %1, %2
+  ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size or_v4i32
+}
+
+define void @or_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: or_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = or <2 x i64> %1, %2
+  ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size or_v2i64
+}
+
+define void @or_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: or_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ; CHECK-DAG: ori.b [[R4:\$w[0-9]+]], [[R1]], 3
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size or_v16i8_i
+}
+
+define void @or_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: or_v8i16_i:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 3
+  ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size or_v8i16_i
+}
+
+define void @or_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: or_v4i32_i:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
+  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 3
+  ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size or_v4i32_i
+}
+
+define void @or_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: or_v2i64_i:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <2 x i64> %1, <i64 3, i64 3>
+  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 3
+  ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size or_v2i64_i
+}
+
+define void @nor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: nor_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = or <16 x i8> %1, %2
+  %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size nor_v16i8
+}
+
+define void @nor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: nor_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = or <8 x i16> %1, %2
+  %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size nor_v8i16
+}
+
+define void @nor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: nor_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = or <4 x i32> %1, %2
+  %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
+  ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size nor_v4i32
+}
+
+define void @nor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: nor_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = or <2 x i64> %1, %2
+  %4 = xor <2 x i64> %3, <i64 -1, i64 -1>
+  ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size nor_v2i64
+}
+
+define void @nor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: nor_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  ; CHECK-DAG: ori.b [[R4:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size nor_v16i8_i
+}
+
+define void @nor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: nor_v8i16_i:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = xor <8 x i16> %2, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size nor_v8i16_i
+}
+
+define void @nor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: nor_v4i32_i:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = xor <4 x i32> %2, <i32 -1, i32 -1, i32 -1, i32 -1>
+  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size nor_v4i32_i
+}
+
+define void @nor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: nor_v2i64_i:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <2 x i64> %1, <i64 1, i64 1>
+  %3 = xor <2 x i64> %2, <i64 -1, i64 -1>
+  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size nor_v2i64_i
+}
+
+define void @xor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: xor_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = xor <16 x i8> %1, %2
+  ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size xor_v16i8
+}
+
+define void @xor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: xor_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = xor <8 x i16> %1, %2
+  ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size xor_v8i16
+}
+
+define void @xor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: xor_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = xor <4 x i32> %1, %2
+  ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size xor_v4i32
+}
+
+define void @xor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: xor_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = xor <2 x i64> %1, %2
+  ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size xor_v2i64
+}
+
+define void @xor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: xor_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ; CHECK-DAG: xori.b [[R4:\$w[0-9]+]], [[R1]], 3
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size xor_v16i8_i
+}
+
+define void @xor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: xor_v8i16_i:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 3
+  ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size xor_v8i16_i
+}
+
+define void @xor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: xor_v4i32_i:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
+  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 3
+  ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size xor_v4i32_i
+}
+
+define void @xor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: xor_v2i64_i:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <2 x i64> %1, <i64 3, i64 3>
+  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 3
+  ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size xor_v2i64_i
+}
+
+define void @sll_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: sll_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <16 x i8> %1, %2
+  ; CHECK-DAG: sll.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sll_v16i8
+}
+
+define void @sll_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: sll_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <8 x i16> %1, %2
+  ; CHECK-DAG: sll.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sll_v8i16
+}
+
+define void @sll_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: sll_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <4 x i32> %1, %2
+  ; CHECK-DAG: sll.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sll_v4i32
+}
+
+define void @sll_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: sll_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <2 x i64> %1, %2
+  ; CHECK-DAG: sll.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sll_v2i64
+}
+
+define void @sll_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: sll_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = shl <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: slli.b [[R4:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size sll_v16i8_i
+}
+
+define void @sll_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: sll_v8i16_i:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = shl <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: slli.h [[R4:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size sll_v8i16_i
+}
+
+define void @sll_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: sll_v4i32_i:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = shl <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: slli.w [[R4:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size sll_v4i32_i
+}
+
+define void @sll_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: sll_v2i64_i:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = shl <2 x i64> %1, <i64 1, i64 1>
+  ; CHECK-DAG: slli.d [[R4:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size sll_v2i64_i
+}
+
+define void @sra_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: sra_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = ashr <16 x i8> %1, %2
+  ; CHECK-DAG: sra.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sra_v16i8
+}
+
+define void @sra_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: sra_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = ashr <8 x i16> %1, %2
+  ; CHECK-DAG: sra.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sra_v8i16
+}
+
+define void @sra_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: sra_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = ashr <4 x i32> %1, %2
+  ; CHECK-DAG: sra.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sra_v4i32
+}
+
+define void @sra_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: sra_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = ashr <2 x i64> %1, %2
+  ; CHECK-DAG: sra.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sra_v2i64
+}
+
+define void @sra_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: sra_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = ashr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: srai.b [[R4:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size sra_v16i8_i
+}
+
+define void @sra_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: sra_v8i16_i:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = ashr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: srai.h [[R4:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size sra_v8i16_i
+}
+
+define void @sra_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: sra_v4i32_i:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = ashr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: srai.w [[R4:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size sra_v4i32_i
+}
+
+define void @sra_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: sra_v2i64_i:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = ashr <2 x i64> %1, <i64 1, i64 1>
+  ; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size sra_v2i64_i
+}
+
+define void @srl_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: srl_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = lshr <16 x i8> %1, %2
+  ; CHECK-DAG: srl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size srl_v16i8
+}
+
+define void @srl_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: srl_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = lshr <8 x i16> %1, %2
+  ; CHECK-DAG: srl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size srl_v8i16
+}
+
+define void @srl_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: srl_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = lshr <4 x i32> %1, %2
+  ; CHECK-DAG: srl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size srl_v4i32
+}
+
+define void @srl_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: srl_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = lshr <2 x i64> %1, %2
+  ; CHECK-DAG: srl.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size srl_v2i64
+}
+
+define void @srl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: srl_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = lshr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: srli.b [[R4:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size srl_v16i8_i
+}
+
+define void @srl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: srl_v8i16_i:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = lshr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: srli.h [[R4:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size srl_v8i16_i
+}
+
+define void @srl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: srl_v4i32_i:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = lshr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: srli.w [[R4:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size srl_v4i32_i
+}
+
+define void @srl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: srl_v2i64_i:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = lshr <2 x i64> %1, <i64 1, i64 1>
+  ; CHECK-DAG: srli.d [[R4:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size srl_v2i64_i
+}
+
+define void @ctpop_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: ctpop_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <16 x i8> @llvm.ctpop.v16i8 (<16 x i8> %1)
+  ; CHECK-DAG: pcnt.b [[R3:\$w[0-9]+]], [[R1]]
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ctpop_v16i8
+}
+
+define void @ctpop_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: ctpop_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <8 x i16> @llvm.ctpop.v8i16 (<8 x i16> %1)
+  ; CHECK-DAG: pcnt.h [[R3:\$w[0-9]+]], [[R1]]
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ctpop_v8i16
+}
+
+define void @ctpop_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: ctpop_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <4 x i32> @llvm.ctpop.v4i32 (<4 x i32> %1)
+  ; CHECK-DAG: pcnt.w [[R3:\$w[0-9]+]], [[R1]]
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ctpop_v4i32
+}
+
+define void @ctpop_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: ctpop_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <2 x i64> @llvm.ctpop.v2i64 (<2 x i64> %1)
+  ; CHECK-DAG: pcnt.d [[R3:\$w[0-9]+]], [[R1]]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ctpop_v2i64
+}
+
+define void @ctlz_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: ctlz_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <16 x i8> @llvm.ctlz.v16i8 (<16 x i8> %1)
+  ; CHECK-DAG: nlzc.b [[R3:\$w[0-9]+]], [[R1]]
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ctlz_v16i8
+}
+
+define void @ctlz_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: ctlz_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <8 x i16> @llvm.ctlz.v8i16 (<8 x i16> %1)
+  ; CHECK-DAG: nlzc.h [[R3:\$w[0-9]+]], [[R1]]
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ctlz_v8i16
+}
+
+define void @ctlz_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: ctlz_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <4 x i32> @llvm.ctlz.v4i32 (<4 x i32> %1)
+  ; CHECK-DAG: nlzc.w [[R3:\$w[0-9]+]], [[R1]]
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ctlz_v4i32
+}
+
+define void @ctlz_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: ctlz_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <2 x i64> @llvm.ctlz.v2i64 (<2 x i64> %1)
+  ; CHECK-DAG: nlzc.d [[R3:\$w[0-9]+]], [[R1]]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ctlz_v2i64
+}
+
+define void @bsel_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b, <16 x i8>* %m) nounwind {
+  ; CHECK: bsel_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <16 x i8>* %m
+  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
+  %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1,
+                          i8 -1, i8 -1, i8 -1, i8 -1,
+                          i8 -1, i8 -1, i8 -1, i8 -1,
+                          i8 -1, i8 -1, i8 -1, i8 -1>
+  %5 = and <16 x i8> %1, %3
+  %6 = and <16 x i8> %2, %4
+  %7 = or <16 x i8> %5, %6
+  ; bmnz is the same operation
+  ; CHECK-DAG: bmnz.v [[R1]], [[R2]], [[R3]]
+  store <16 x i8> %7, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_v16i8
+}
+
+define void @bsel_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %m) nounwind {
+  ; CHECK: bsel_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %m
+  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($6)
+  %3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1,
+                          i8 -1, i8 -1, i8 -1, i8 -1,
+                          i8 -1, i8 -1, i8 -1, i8 -1,
+                          i8 -1, i8 -1, i8 -1, i8 -1>
+  %4 = and <16 x i8> %1, %3
+  %5 = and <16 x i8> <i8 6, i8 6, i8 6, i8 6,
+                      i8 6, i8 6, i8 6, i8 6,
+                      i8 6, i8 6, i8 6, i8 6,
+                      i8 6, i8 6, i8 6, i8 6>, %2
+  %6 = or <16 x i8> %4, %5
+  ; CHECK-DAG: bseli.b [[R3]], [[R1]], 6
+  store <16 x i8> %6, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_v16i8_i
+}
+
+define void @bsel_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: bsel_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <8 x i16> %1, <i16 6, i16 6, i16 6, i16 6,
+                          i16 6, i16 6, i16 6, i16 6>
+  %4 = and <8 x i16> %2, <i16 65529, i16 65529, i16 65529, i16 65529,
+                          i16 65529, i16 65529, i16 65529, i16 65529>
+  %5 = or <8 x i16> %3, %4
+  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 6
+  ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
+  store <8 x i16> %5, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_v8i16
+}
+
+define void @bsel_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: bsel_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <4 x i32> %1, <i32 6, i32 6, i32 6, i32 6>
+  %4 = and <4 x i32> %2, <i32 4294967289, i32 4294967289, i32 4294967289, i32 4294967289>
+  %5 = or <4 x i32> %3, %4
+  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 6
+  ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
+  store <4 x i32> %5, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_v4i32
+}
+
+define void @bsel_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: bsel_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <2 x i64> %1, <i64 6, i64 6>
+  %4 = and <2 x i64> %2, <i64 18446744073709551609, i64 18446744073709551609>
+  %5 = or <2 x i64> %3, %4
+  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 6
+  ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
+  store <2 x i64> %5, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_v2i64
+}
+
+define void @binsl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: binsl_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <16 x i8> %1, <i8 192, i8 192, i8 192, i8 192,
+                          i8 192, i8 192, i8 192, i8 192,
+                          i8 192, i8 192, i8 192, i8 192,
+                          i8 192, i8 192, i8 192, i8 192>
+  %4 = and <16 x i8> %2, <i8 63, i8 63, i8 63, i8 63,
+                          i8 63, i8 63, i8 63, i8 63,
+                          i8 63, i8 63, i8 63, i8 63,
+                          i8 63, i8 63, i8 63, i8 63>
+  %5 = or <16 x i8> %3, %4
+  ; CHECK-DAG: binsli.b [[R2]], [[R1]], 2
+  store <16 x i8> %5, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R2]], 0($4)
+
+  ret void
+  ; CHECK: .size binsl_v16i8_i
+}
+
+define void @binsl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: binsl_v8i16_i:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <8 x i16> %1, <i16 49152, i16 49152, i16 49152, i16 49152,
+                          i16 49152, i16 49152, i16 49152, i16 49152>
+  %4 = and <8 x i16> %2, <i16 16383, i16 16383, i16 16383, i16 16383,
+                          i16 16383, i16 16383, i16 16383, i16 16383>
+  %5 = or <8 x i16> %3, %4
+  ; CHECK-DAG: binsli.h [[R2]], [[R1]], 2
+  store <8 x i16> %5, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R2]], 0($4)
+
+  ret void
+  ; CHECK: .size binsl_v8i16_i
+}
+
+define void @binsl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: binsl_v4i32_i:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <4 x i32> %1, <i32 3221225472, i32 3221225472, i32 3221225472, i32 3221225472>
+  %4 = and <4 x i32> %2, <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
+  %5 = or <4 x i32> %3, %4
+  ; CHECK-DAG: binsli.w [[R2]], [[R1]], 2
+  store <4 x i32> %5, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R2]], 0($4)
+
+  ret void
+  ; CHECK: .size binsl_v4i32_i
+}
+
+define void @binsl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: binsl_v2i64_i:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <2 x i64> %1, <i64 18446744073709551608, i64 18446744073709551608>
+  %4 = and <2 x i64> %2, <i64 7, i64 7>
+  %5 = or <2 x i64> %3, %4
+  ; TODO: We use a particularly wide mask here to work around a legalization
+  ;       issue. If the mask doesn't fit within a 10-bit immediate, it gets
+  ;       legalized into a constant pool. We should add a test to cover the
+  ;       other cases once they correctly select binsli.d.
+  ; CHECK-DAG: binsli.d [[R2]], [[R1]], 61
+  store <2 x i64> %5, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R2]], 0($4)
+
+  ret void
+  ; CHECK: .size binsl_v2i64_i
+}
+
+define void @binsr_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: binsr_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3,
+                          i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %4 = and <16 x i8> %2, <i8 252, i8 252, i8 252, i8 252,
+                          i8 252, i8 252, i8 252, i8 252,
+                          i8 252, i8 252, i8 252, i8 252,
+                          i8 252, i8 252, i8 252, i8 252>
+  %5 = or <16 x i8> %3, %4
+  ; CHECK-DAG: binsri.b [[R2]], [[R1]], 2
+  store <16 x i8> %5, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R2]], 0($4)
+
+  ret void
+  ; CHECK: .size binsr_v16i8_i
+}
+
+define void @binsr_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: binsr_v8i16_i:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3,
+                          i16 3, i16 3, i16 3, i16 3>
+  %4 = and <8 x i16> %2, <i16 65532, i16 65532, i16 65532, i16 65532,
+                          i16 65532, i16 65532, i16 65532, i16 65532>
+  %5 = or <8 x i16> %3, %4
+  ; CHECK-DAG: binsri.h [[R2]], [[R1]], 2
+  store <8 x i16> %5, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R2]], 0($4)
+
+  ret void
+  ; CHECK: .size binsr_v8i16_i
+}
+
+define void @binsr_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: binsr_v4i32_i:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
+  %4 = and <4 x i32> %2, <i32 4294967292, i32 4294967292, i32 4294967292, i32 4294967292>
+  %5 = or <4 x i32> %3, %4
+  ; CHECK-DAG: binsri.w [[R2]], [[R1]], 2
+  store <4 x i32> %5, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R2]], 0($4)
+
+  ret void
+  ; CHECK: .size binsr_v4i32_i
+}
+
+define void @binsr_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: binsr_v2i64_i:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <2 x i64> %1, <i64 3, i64 3>
+  %4 = and <2 x i64> %2, <i64 18446744073709551612, i64 18446744073709551612>
+  %5 = or <2 x i64> %3, %4
+  ; CHECK-DAG: binsri.d [[R2]], [[R1]], 2
+  store <2 x i64> %5, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R2]], 0($4)
+
+  ret void
+  ; CHECK: .size binsr_v2i64_i
+}
+
+define void @bclr_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: bclr_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
+  %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %5 = and <16 x i8> %1, %4
+  ; CHECK-DAG: bclr.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %5, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclr_v16i8
+}
+
+define void @bclr_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: bclr_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
+  %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  %5 = and <8 x i16> %1, %4
+  ; CHECK-DAG: bclr.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %5, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclr_v8i16
+}
+
+define void @bclr_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: bclr_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
+  %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %5 = and <4 x i32> %1, %4
+  ; CHECK-DAG: bclr.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %5, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclr_v4i32
+}
+
+define void @bclr_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: bclr_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <2 x i64> <i64 1, i64 1>, %2
+  %4 = xor <2 x i64> %3, <i64 -1, i64 -1>
+  %5 = and <2 x i64> %1, %4
+  ; CHECK-DAG: bclr.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %5, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclr_v2i64
+}
+
+define void @bset_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: bset_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
+  %4 = or <16 x i8> %1, %3
+  ; CHECK-DAG: bset.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bset_v16i8
+}
+
+define void @bset_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: bset_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
+  %4 = or <8 x i16> %1, %3
+  ; CHECK-DAG: bset.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bset_v8i16
+}
+
+define void @bset_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: bset_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
+  %4 = or <4 x i32> %1, %3
+  ; CHECK-DAG: bset.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bset_v4i32
+}
+
+define void @bset_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: bset_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <2 x i64> <i64 1, i64 1>, %2
+  %4 = or <2 x i64> %1, %3
+  ; CHECK-DAG: bset.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bset_v2i64
+}
+
+define void @bneg_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: bneg_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
+  %4 = xor <16 x i8> %1, %3
+  ; CHECK-DAG: bneg.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bneg_v16i8
+}
+
+define void @bneg_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: bneg_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
+  %4 = xor <8 x i16> %1, %3
+  ; CHECK-DAG: bneg.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bneg_v8i16
+}
+
+define void @bneg_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: bneg_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
+  %4 = xor <4 x i32> %1, %3
+  ; CHECK-DAG: bneg.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bneg_v4i32
+}
+
+define void @bneg_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: bneg_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <2 x i64> <i64 1, i64 1>, %2
+  %4 = xor <2 x i64> %1, %3
+  ; CHECK-DAG: bneg.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bneg_v2i64
+}
+
+define void @bclri_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: bclri_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <16 x i8> <i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8>,
+                     <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %3 = and <16 x i8> %1, %2
+  ; bclri.b and andi.b are exactly equivalent.
+  ; CHECK-DAG: andi.b [[R3:\$w[0-9]+]], [[R1]], 247
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclri_v16i8
+}
+
+define void @bclri_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: bclri_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <8 x i16> <i16  8, i16  8, i16  8, i16  8, i16  8, i16  8, i16  8, i16  8>,
+                     <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  %3 = and <8 x i16> %1, %2
+  ; CHECK-DAG: bclri.h [[R3:\$w[0-9]+]], [[R1]], 3
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclri_v8i16
+}
+
+define void @bclri_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: bclri_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <4 x i32> <i32  8, i32  8, i32  8, i32  8>,
+                     <i32 -1, i32 -1, i32 -1, i32 -1>
+  %3 = and <4 x i32> %1, %2
+  ; CHECK-DAG: bclri.w [[R3:\$w[0-9]+]], [[R1]], 3
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclri_v4i32
+}
+
+define void @bclri_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: bclri_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <2 x i64> <i64  8, i64  8>,
+                     <i64 -1, i64 -1>
+  %3 = and <2 x i64> %1, %2
+  ; CHECK-DAG: bclri.d [[R3:\$w[0-9]+]], [[R1]], 3
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclri_v2i64
+}
+
+define void @bseti_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: bseti_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <16 x i8> %1, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+  ; CHECK-DAG: bseti.b [[R3:\$w[0-9]+]], [[R1]], 3
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bseti_v16i8
+}
+
+define void @bseti_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: bseti_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  ; CHECK-DAG: bseti.h [[R3:\$w[0-9]+]], [[R1]], 3
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bseti_v8i16
+}
+
+define void @bseti_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: bseti_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
+  ; CHECK-DAG: bseti.w [[R3:\$w[0-9]+]], [[R1]], 3
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bseti_v4i32
+}
+
+define void @bseti_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: bseti_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <2 x i64> %1, <i64 8, i64 8>
+  ; CHECK-DAG: bseti.d [[R3:\$w[0-9]+]], [[R1]], 3
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bseti_v2i64
+}
+
+define void @bnegi_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: bnegi_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <16 x i8> %1, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+  ; CHECK-DAG: bnegi.b [[R3:\$w[0-9]+]], [[R1]], 3
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bnegi_v16i8
+}
+
+define void @bnegi_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: bnegi_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  ; CHECK-DAG: bnegi.h [[R3:\$w[0-9]+]], [[R1]], 3
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bnegi_v8i16
+}
+
+define void @bnegi_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: bnegi_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
+  ; CHECK-DAG: bnegi.w [[R3:\$w[0-9]+]], [[R1]], 3
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bnegi_v4i32
+}
+
+define void @bnegi_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: bnegi_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <2 x i64> %1, <i64 8, i64 8>
+  ; CHECK-DAG: bnegi.d [[R3:\$w[0-9]+]], [[R1]], 3
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bnegi_v2i64
+}
+
+declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %val)
+declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %val)
+declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val)
+declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val)
+declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %val)
+declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %val)
+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val)
+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %val)
diff --git a/test/CodeGen/Mips/msa/compare.ll b/test/CodeGen/Mips/msa/compare.ll
new file mode 100644
index 0000000..6408d7b
--- /dev/null
+++ b/test/CodeGen/Mips/msa/compare.ll
@@ -0,0 +1,2079 @@
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+define void @ceq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: ceq_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp eq <16 x i8> %1, %2
+  %4 = sext <16 x i1> %3 to <16 x i8>
+  ; CHECK-DAG: ceq.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ceq_v16i8
+}
+
+define void @ceq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: ceq_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp eq <8 x i16> %1, %2
+  %4 = sext <8 x i1> %3 to <8 x i16>
+  ; CHECK-DAG: ceq.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ceq_v8i16
+}
+
+define void @ceq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: ceq_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp eq <4 x i32> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: ceq.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ceq_v4i32
+}
+
+define void @ceq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: ceq_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp eq <2 x i64> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: ceq.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ceq_v2i64
+}
+
+define void @cle_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: cle_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sle <16 x i8> %1, %2
+  %4 = sext <16 x i1> %3 to <16 x i8>
+  ; CHECK-DAG: cle_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cle_s_v16i8
+}
+
+define void @cle_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: cle_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sle <8 x i16> %1, %2
+  %4 = sext <8 x i1> %3 to <8 x i16>
+  ; CHECK-DAG: cle_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cle_s_v8i16
+}
+
+define void @cle_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: cle_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sle <4 x i32> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: cle_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cle_s_v4i32
+}
+
+define void @cle_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: cle_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sle <2 x i64> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: cle_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cle_s_v2i64
+}
+
+define void @cle_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: cle_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ule <16 x i8> %1, %2
+  %4 = sext <16 x i1> %3 to <16 x i8>
+  ; CHECK-DAG: cle_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cle_u_v16i8
+}
+
+define void @cle_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: cle_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ule <8 x i16> %1, %2
+  %4 = sext <8 x i1> %3 to <8 x i16>
+  ; CHECK-DAG: cle_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cle_u_v8i16
+}
+
+define void @cle_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: cle_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ule <4 x i32> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: cle_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cle_u_v4i32
+}
+
+define void @cle_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: cle_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ule <2 x i64> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: cle_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cle_u_v2i64
+}
+
+define void @clt_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: clt_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp slt <16 x i8> %1, %2
+  %4 = sext <16 x i1> %3 to <16 x i8>
+  ; CHECK-DAG: clt_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clt_s_v16i8
+}
+
+define void @clt_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: clt_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp slt <8 x i16> %1, %2
+  %4 = sext <8 x i1> %3 to <8 x i16>
+  ; CHECK-DAG: clt_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clt_s_v8i16
+}
+
+define void @clt_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: clt_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp slt <4 x i32> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: clt_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clt_s_v4i32
+}
+
+define void @clt_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: clt_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp slt <2 x i64> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: clt_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clt_s_v2i64
+}
+
+define void @clt_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: clt_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ult <16 x i8> %1, %2
+  %4 = sext <16 x i1> %3 to <16 x i8>
+  ; CHECK-DAG: clt_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clt_u_v16i8
+}
+
+define void @clt_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: clt_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ult <8 x i16> %1, %2
+  %4 = sext <8 x i1> %3 to <8 x i16>
+  ; CHECK-DAG: clt_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clt_u_v8i16
+}
+
+define void @clt_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: clt_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ult <4 x i32> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: clt_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clt_u_v4i32
+}
+
+define void @clt_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: clt_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ult <2 x i64> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: clt_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clt_u_v2i64
+}
+
+; There is no != comparison, but test it anyway since we've had legalizer
+; issues in this area.
+define void @cne_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: cne_v16i8:
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ne <16 x i8> %1, %2
+  %4 = sext <16 x i1> %3 to <16 x i8>
+  ; CHECK-DAG: ceq.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  ; CHECK-DAG: xori.b [[R3]], [[R3]], 255
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cne_v16i8
+}
+
+; There is no != comparison, but test it anyway since we've had legalizer
+; issues in this area.
+define void @cne_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: cne_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ne <8 x i16> %1, %2
+  %4 = sext <8 x i1> %3 to <8 x i16>
+  ; CHECK-DAG: ceq.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue
+  ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1
+  ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cne_v8i16
+}
+
+; There is no != comparison, but test it anyway since we've had legalizer
+; issues in this area.
+define void @cne_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: cne_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ne <4 x i32> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: ceq.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue
+  ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1
+  ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cne_v4i32
+}
+
+; There is no != comparison, but test it anyway since we've had legalizer
+; issues in this area.
+define void @cne_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: cne_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ne <2 x i64> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: ceq.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue
+  ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1
+  ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cne_v2i64
+}
+
+define void @ceqi_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: ceqi_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp eq <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = sext <16 x i1> %2 to <16 x i8>
+  ; CHECK-DAG: ceqi.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ceqi_v16i8
+}
+
+define void @ceqi_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: ceqi_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp eq <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = sext <8 x i1> %2 to <8 x i16>
+  ; CHECK-DAG: ceqi.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ceqi_v8i16
+}
+
+define void @ceqi_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: ceqi_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp eq <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = sext <4 x i1> %2 to <4 x i32>
+  ; CHECK-DAG: ceqi.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ceqi_v4i32
+}
+
+define void @ceqi_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: ceqi_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp eq <2 x i64> %1, <i64 1, i64 1>
+  %3 = sext <2 x i1> %2 to <2 x i64>
+  ; CHECK-DAG: ceqi.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ceqi_v2i64
+}
+
+define void @clei_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: clei_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sle <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = sext <16 x i1> %2 to <16 x i8>
+  ; CHECK-DAG: clei_s.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clei_s_v16i8
+}
+
+define void @clei_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: clei_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sle <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = sext <8 x i1> %2 to <8 x i16>
+  ; CHECK-DAG: clei_s.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clei_s_v8i16
+}
+
+define void @clei_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: clei_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sle <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = sext <4 x i1> %2 to <4 x i32>
+  ; CHECK-DAG: clei_s.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clei_s_v4i32
+}
+
+define void @clei_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: clei_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sle <2 x i64> %1, <i64 1, i64 1>
+  %3 = sext <2 x i1> %2 to <2 x i64>
+  ; CHECK-DAG: clei_s.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clei_s_v2i64
+}
+
+define void @clei_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: clei_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ule <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = sext <16 x i1> %2 to <16 x i8>
+  ; CHECK-DAG: clei_u.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clei_u_v16i8
+}
+
+define void @clei_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: clei_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ule <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = sext <8 x i1> %2 to <8 x i16>
+  ; CHECK-DAG: clei_u.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clei_u_v8i16
+}
+
+define void @clei_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: clei_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ule <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = sext <4 x i1> %2 to <4 x i32>
+  ; CHECK-DAG: clei_u.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clei_u_v4i32
+}
+
+define void @clei_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: clei_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ule <2 x i64> %1, <i64 1, i64 1>
+  %3 = sext <2 x i1> %2 to <2 x i64>
+  ; CHECK-DAG: clei_u.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clei_u_v2i64
+}
+
+define void @clti_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: clti_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp slt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = sext <16 x i1> %2 to <16 x i8>
+  ; CHECK-DAG: clti_s.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clti_s_v16i8
+}
+
+define void @clti_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: clti_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp slt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = sext <8 x i1> %2 to <8 x i16>
+  ; CHECK-DAG: clti_s.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clti_s_v8i16
+}
+
+define void @clti_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: clti_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp slt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = sext <4 x i1> %2 to <4 x i32>
+  ; CHECK-DAG: clti_s.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clti_s_v4i32
+}
+
+define void @clti_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: clti_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp slt <2 x i64> %1, <i64 1, i64 1>
+  %3 = sext <2 x i1> %2 to <2 x i64>
+  ; CHECK-DAG: clti_s.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clti_s_v2i64
+}
+
+define void @clti_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: clti_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ult <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = sext <16 x i1> %2 to <16 x i8>
+  ; CHECK-DAG: clti_u.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clti_u_v16i8
+}
+
+define void @clti_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: clti_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ult <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = sext <8 x i1> %2 to <8 x i16>
+  ; CHECK-DAG: clti_u.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clti_u_v8i16
+}
+
+define void @clti_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: clti_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ult <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = sext <4 x i1> %2 to <4 x i32>
+  ; CHECK-DAG: clti_u.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clti_u_v4i32
+}
+
+define void @clti_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: clti_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ult <2 x i64> %1, <i64 1, i64 1>
+  %3 = sext <2 x i1> %2 to <2 x i64>
+  ; CHECK-DAG: clti_u.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clti_u_v2i64
+}
+
+define void @bsel_s_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
+                        <16 x i8>* %c) nounwind {
+  ; CHECK: bsel_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <16 x i8>* %c
+  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
+  %4 = icmp sgt <16 x i8> %1, %2
+  ; CHECK-DAG: clt_s.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %5 = select <16 x i1> %4, <16 x i8> %1, <16 x i8> %3
+  ; bmnz.v is the same operation
+  ; CHECK-DAG: bmnz.v [[R3]], [[R1]], [[R4]]
+  store <16 x i8> %5, <16 x i8>* %d
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_s_v16i8
+}
+
+define void @bsel_s_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
+                        <8 x i16>* %c) nounwind {
+  ; CHECK: bsel_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <8 x i16>* %c
+  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
+  %4 = icmp sgt <8 x i16> %1, %2
+  ; CHECK-DAG: clt_s.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %5 = select <8 x i1> %4, <8 x i16> %1, <8 x i16> %3
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <8 x i16> %5, <8 x i16>* %d
+  ; CHECK-DAG: st.h [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_s_v8i16
+}
+
+define void @bsel_s_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
+                        <4 x i32>* %c) nounwind {
+  ; CHECK: bsel_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <4 x i32>* %c
+  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
+  %4 = icmp sgt <4 x i32> %1, %2
+  ; CHECK-DAG: clt_s.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %5 = select <4 x i1> %4, <4 x i32> %1, <4 x i32> %3
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <4 x i32> %5, <4 x i32>* %d
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_s_v4i32
+}
+
+define void @bsel_s_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
+                        <2 x i64>* %c) nounwind {
+  ; CHECK: bsel_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <2 x i64>* %c
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
+  %4 = icmp sgt <2 x i64> %1, %2
+  ; CHECK-DAG: clt_s.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %5 = select <2 x i1> %4, <2 x i64> %1, <2 x i64> %3
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <2 x i64> %5, <2 x i64>* %d
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_s_v2i64
+}
+
+define void @bsel_u_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
+                        <16 x i8>* %c) nounwind {
+  ; CHECK: bsel_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <16 x i8>* %c
+  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
+  %4 = icmp ugt <16 x i8> %1, %2
+  ; CHECK-DAG: clt_u.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %5 = select <16 x i1> %4, <16 x i8> %1, <16 x i8> %3
+  ; bmnz.v is the same operation
+  ; CHECK-DAG: bmnz.v [[R3]], [[R1]], [[R4]]
+  store <16 x i8> %5, <16 x i8>* %d
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_u_v16i8
+}
+
+define void @bsel_u_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
+                        <8 x i16>* %c) nounwind {
+  ; CHECK: bsel_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <8 x i16>* %c
+  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
+  %4 = icmp ugt <8 x i16> %1, %2
+  ; CHECK-DAG: clt_u.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %5 = select <8 x i1> %4, <8 x i16> %1, <8 x i16> %3
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <8 x i16> %5, <8 x i16>* %d
+  ; CHECK-DAG: st.h [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_u_v8i16
+}
+
+define void @bsel_u_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
+                        <4 x i32>* %c) nounwind {
+  ; CHECK: bsel_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <4 x i32>* %c
+  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
+  %4 = icmp ugt <4 x i32> %1, %2
+  ; CHECK-DAG: clt_u.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %5 = select <4 x i1> %4, <4 x i32> %1, <4 x i32> %3
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <4 x i32> %5, <4 x i32>* %d
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_u_v4i32
+}
+
+define void @bsel_u_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
+                        <2 x i64>* %c) nounwind {
+  ; CHECK: bsel_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <2 x i64>* %c
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
+  %4 = icmp ugt <2 x i64> %1, %2
+  ; CHECK-DAG: clt_u.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %5 = select <2 x i1> %4, <2 x i64> %1, <2 x i64> %3
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <2 x i64> %5, <2 x i64>* %d
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_u_v2i64
+}
+
+define void @bseli_s_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
+                        <16 x i8>* %c) nounwind {
+  ; CHECK: bseli_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sgt <16 x i8> %1, %2
+  ; CHECK-DAG: clt_s.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: bseli.b [[R4]], [[R1]], 1
+  store <16 x i8> %4, <16 x i8>* %d
+  ; CHECK-DAG: st.b [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bseli_s_v16i8
+}
+
+define void @bseli_s_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
+                        <8 x i16>* %c) nounwind {
+  ; CHECK: bseli_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sgt <8 x i16> %1, %2
+  ; CHECK-DAG: clt_s.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <8 x i16> %4, <8 x i16>* %d
+  ; CHECK-DAG: st.h [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bseli_s_v8i16
+}
+
+define void @bseli_s_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
+                        <4 x i32>* %c) nounwind {
+  ; CHECK: bseli_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sgt <4 x i32> %1, %2
+  ; CHECK-DAG: clt_s.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <4 x i32> %4, <4 x i32>* %d
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bseli_s_v4i32
+}
+
+define void @bseli_s_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
+                        <2 x i64>* %c) nounwind {
+  ; CHECK: bseli_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sgt <2 x i64> %1, %2
+  ; CHECK-DAG: clt_s.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
+  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <2 x i64> %4, <2 x i64>* %d
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bseli_s_v2i64
+}
+
+define void @bseli_u_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
+                        <16 x i8>* %c) nounwind {
+  ; CHECK: bseli_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ugt <16 x i8> %1, %2
+  ; CHECK-DAG: clt_u.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: bseli.b [[R4]], [[R1]], 1
+  store <16 x i8> %4, <16 x i8>* %d
+  ; CHECK-DAG: st.b [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bseli_u_v16i8
+}
+
+define void @bseli_u_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
+                        <8 x i16>* %c) nounwind {
+  ; CHECK: bseli_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ugt <8 x i16> %1, %2
+  ; CHECK-DAG: clt_u.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <8 x i16> %4, <8 x i16>* %d
+  ; CHECK-DAG: st.h [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bseli_u_v8i16
+}
+
+define void @bseli_u_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
+                        <4 x i32>* %c) nounwind {
+  ; CHECK: bseli_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ugt <4 x i32> %1, %2
+  ; CHECK-DAG: clt_u.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <4 x i32> %4, <4 x i32>* %d
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bseli_u_v4i32
+}
+
+define void @bseli_u_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
+                        <2 x i64>* %c) nounwind {
+  ; CHECK: bseli_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ugt <2 x i64> %1, %2
+  ; CHECK-DAG: clt_u.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
+  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <2 x i64> %4, <2 x i64>* %d
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bseli_u_v2i64
+}
+
+define void @max_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: max_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sgt <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ; CHECK-DAG: max_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_s_v16i8
+}
+
+define void @max_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: max_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sgt <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ; CHECK-DAG: max_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_s_v8i16
+}
+
+define void @max_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: max_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sgt <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ; CHECK-DAG: max_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_s_v4i32
+}
+
+define void @max_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: max_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sgt <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ; CHECK-DAG: max_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_s_v2i64
+}
+
+define void @max_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: max_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ugt <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ; CHECK-DAG: max_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_u_v16i8
+}
+
+define void @max_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: max_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ugt <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ; CHECK-DAG: max_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_u_v8i16
+}
+
+define void @max_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: max_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ugt <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ; CHECK-DAG: max_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_u_v4i32
+}
+
+define void @max_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: max_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ugt <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ; CHECK-DAG: max_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_u_v2i64
+}
+
+define void @max_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: max_s_eq_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sge <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ; CHECK-DAG: max_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_s_eq_v16i8
+}
+
+define void @max_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: max_s_eq_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sge <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ; CHECK-DAG: max_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_s_eq_v8i16
+}
+
+define void @max_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: max_s_eq_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sge <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ; CHECK-DAG: max_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_s_eq_v4i32
+}
+
+define void @max_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: max_s_eq_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sge <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ; CHECK-DAG: max_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_s_eq_v2i64
+}
+
+define void @max_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: max_u_eq_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp uge <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ; CHECK-DAG: max_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_u_eq_v16i8
+}
+
+define void @max_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: max_u_eq_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp uge <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ; CHECK-DAG: max_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_u_eq_v8i16
+}
+
+define void @max_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: max_u_eq_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp uge <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ; CHECK-DAG: max_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_u_eq_v4i32
+}
+
+define void @max_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: max_u_eq_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp uge <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ; CHECK-DAG: max_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_u_eq_v2i64
+}
+
+define void @maxi_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: maxi_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sgt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: maxi_s.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_s_v16i8
+}
+
+define void @maxi_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: maxi_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sgt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: maxi_s.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_s_v8i16
+}
+
+define void @maxi_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: maxi_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sgt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: maxi_s.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_s_v4i32
+}
+
+define void @maxi_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: maxi_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sgt <2 x i64> %1, <i64 1, i64 1>
+  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
+  ; CHECK-DAG: maxi_s.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_s_v2i64
+}
+
+define void @maxi_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: maxi_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ugt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: maxi_u.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_u_v16i8
+}
+
+define void @maxi_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: maxi_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ugt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: maxi_u.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_u_v8i16
+}
+
+define void @maxi_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: maxi_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ugt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: maxi_u.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_u_v4i32
+}
+
+define void @maxi_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: maxi_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ugt <2 x i64> %1, <i64 1, i64 1>
+  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
+  ; CHECK-DAG: maxi_u.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_u_v2i64
+}
+
+define void @maxi_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: maxi_s_eq_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sge <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: maxi_s.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_s_eq_v16i8
+}
+
+define void @maxi_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: maxi_s_eq_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sge <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: maxi_s.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_s_eq_v8i16
+}
+
+define void @maxi_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: maxi_s_eq_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sge <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: maxi_s.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_s_eq_v4i32
+}
+
+define void @maxi_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: maxi_s_eq_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sge <2 x i64> %1, <i64 1, i64 1>
+  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
+  ; CHECK-DAG: maxi_s.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_s_eq_v2i64
+}
+
+define void @maxi_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: maxi_u_eq_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp uge <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: maxi_u.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_u_eq_v16i8
+}
+
+define void @maxi_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: maxi_u_eq_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp uge <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: maxi_u.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_u_eq_v8i16
+}
+
+define void @maxi_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: maxi_u_eq_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp uge <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: maxi_u.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_u_eq_v4i32
+}
+
+define void @maxi_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: maxi_u_eq_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp uge <2 x i64> %1, <i64 1, i64 1>
+  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
+  ; CHECK-DAG: maxi_u.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_u_eq_v2i64
+}
+
+define void @min_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: min_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sle <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ; CHECK-DAG: min_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_s_v16i8
+}
+
+define void @min_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: min_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp slt <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ; CHECK-DAG: min_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_s_v8i16
+}
+
+define void @min_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: min_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp slt <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ; CHECK-DAG: min_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_s_v4i32
+}
+
+define void @min_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: min_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp slt <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ; CHECK-DAG: min_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_s_v2i64
+}
+
+define void @min_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: min_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ult <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ; CHECK-DAG: min_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_u_v16i8
+}
+
+define void @min_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: min_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ult <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ; CHECK-DAG: min_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_u_v8i16
+}
+
+define void @min_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: min_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ult <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ; CHECK-DAG: min_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_u_v4i32
+}
+
+define void @min_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: min_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ult <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ; CHECK-DAG: min_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_u_v2i64
+}
+
+define void @min_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: min_s_eq_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sle <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ; CHECK-DAG: min_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_s_eq_v16i8
+}
+
+define void @min_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: min_s_eq_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sle <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ; CHECK-DAG: min_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_s_eq_v8i16
+}
+
+define void @min_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: min_s_eq_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sle <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ; CHECK-DAG: min_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_s_eq_v4i32
+}
+
+define void @min_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: min_s_eq_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sle <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ; CHECK-DAG: min_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_s_eq_v2i64
+}
+
+define void @min_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: min_u_eq_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ule <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ; CHECK-DAG: min_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_u_eq_v16i8
+}
+
+define void @min_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: min_u_eq_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ule <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ; CHECK-DAG: min_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_u_eq_v8i16
+}
+
+define void @min_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: min_u_eq_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ule <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ; CHECK-DAG: min_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_u_eq_v4i32
+}
+
+define void @min_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: min_u_eq_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ule <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ; CHECK-DAG: min_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_u_eq_v2i64
+}
+
+define void @mini_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: mini_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp slt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: mini_s.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_s_v16i8
+}
+
+define void @mini_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: mini_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp slt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: mini_s.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_s_v8i16
+}
+
+define void @mini_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: mini_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp slt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: mini_s.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_s_v4i32
+}
+
+define void @mini_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: mini_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp slt <2 x i64> %1, <i64 1, i64 1>
+  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
+  ; CHECK-DAG: mini_s.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_s_v2i64
+}
+
+define void @mini_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: mini_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ult <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: mini_u.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_u_v16i8
+}
+
+define void @mini_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: mini_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ult <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: mini_u.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_u_v8i16
+}
+
+define void @mini_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: mini_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ult <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: mini_u.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_u_v4i32
+}
+
+define void @mini_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: mini_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ult <2 x i64> %1, <i64 1, i64 1>
+  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
+  ; CHECK-DAG: mini_u.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_u_v2i64
+}
+
+define void @mini_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: mini_s_eq_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sle <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: mini_s.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_s_eq_v16i8
+}
+
+define void @mini_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: mini_s_eq_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sle <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: mini_s.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_s_eq_v8i16
+}
+
+define void @mini_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: mini_s_eq_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sle <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: mini_s.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_s_eq_v4i32
+}
+
+define void @mini_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: mini_s_eq_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sle <2 x i64> %1, <i64 1, i64 1>
+  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
+  ; CHECK-DAG: mini_s.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_s_eq_v2i64
+}
+
+define void @mini_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: mini_u_eq_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ule <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: mini_u.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_u_eq_v16i8
+}
+
+define void @mini_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: mini_u_eq_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ule <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: mini_u.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_u_eq_v8i16
+}
+
+define void @mini_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: mini_u_eq_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ule <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: mini_u.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_u_eq_v4i32
+}
+
+define void @mini_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: mini_u_eq_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ule <2 x i64> %1, <i64 1, i64 1>
+  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
+  ; CHECK-DAG: mini_u.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_u_eq_v2i64
+}
diff --git a/test/CodeGen/Mips/msa/compare_float.ll b/test/CodeGen/Mips/msa/compare_float.ll
new file mode 100644
index 0000000..2fc61f8
--- /dev/null
+++ b/test/CodeGen/Mips/msa/compare_float.ll
@@ -0,0 +1,663 @@
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+declare <4 x float> @llvm.mips.fmax.w(<4 x float>, <4 x float>) nounwind
+declare <2 x double> @llvm.mips.fmax.d(<2 x double>, <2 x double>) nounwind
+declare <4 x float> @llvm.mips.fmin.w(<4 x float>, <4 x float>) nounwind
+declare <2 x double> @llvm.mips.fmin.d(<2 x double>, <2 x double>) nounwind
+
+define void @false_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: false_v4f32:
+
+  %1 = load <4 x float>* %a
+  %2 = load <4 x float>* %b
+  %3 = fcmp false <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  store <4 x i32> %4, <4 x i32>* %c
+  ret void
+
+  ; (setcc $a, $b, SETFALSE) is always folded, so we won't get fcaf:
+  ; CHECK-DAG: ldi.b [[R1:\$w[0-9]+]], 0
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+  ; CHECK: .size false_v4f32
+}
+
+define void @false_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: false_v2f64:
+
+  %1 = load <2 x double>* %a
+  %2 = load <2 x double>* %b
+  %3 = fcmp false <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  store <2 x i64> %4, <2 x i64>* %c
+  ret void
+
+  ; FIXME: This code is correct, but poor. Ideally it would be similar to
+  ;        the code in @false_v4f32
+  ; CHECK-DAG: ldi.b [[R1:\$w[0-9]+]], 0
+  ; CHECK-DAG: slli.d [[R3:\$w[0-9]+]], [[R1]], 63
+  ; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R3]], 63
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+  ; CHECK: .size false_v2f64
+}
+
+define void @oeq_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: oeq_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp oeq <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fceq.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size oeq_v4f32
+}
+
+define void @oeq_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: oeq_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp oeq <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fceq.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size oeq_v2f64
+}
+
+define void @oge_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: oge_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp oge <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fcle.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size oge_v4f32
+}
+
+define void @oge_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: oge_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp oge <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fcle.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size oge_v2f64
+}
+
+define void @ogt_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: ogt_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ogt <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fclt.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ogt_v4f32
+}
+
+define void @ogt_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: ogt_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ogt <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fclt.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ogt_v2f64
+}
+
+define void @ole_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: ole_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ole <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fcle.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ole_v4f32
+}
+
+define void @ole_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: ole_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ole <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fcle.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ole_v2f64
+}
+
+define void @olt_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: olt_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp olt <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fclt.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size olt_v4f32
+}
+
+define void @olt_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: olt_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp olt <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fclt.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size olt_v2f64
+}
+
+define void @one_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: one_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp one <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fcne.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size one_v4f32
+}
+
+define void @one_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: one_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp one <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fcne.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size one_v2f64
+}
+
+define void @ord_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: ord_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ord <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fcor.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ord_v4f32
+}
+
+define void @ord_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: ord_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ord <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fcor.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ord_v2f64
+}
+
+define void @ueq_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: ueq_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ueq <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fcueq.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ueq_v4f32
+}
+
+define void @ueq_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: ueq_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ueq <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fcueq.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ueq_v2f64
+}
+
+define void @uge_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: uge_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp uge <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fcule.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size uge_v4f32
+}
+
+define void @uge_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: uge_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp uge <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fcule.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size uge_v2f64
+}
+
+define void @ugt_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: ugt_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ugt <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fcult.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ugt_v4f32
+}
+
+define void @ugt_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: ugt_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ugt <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fcult.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ugt_v2f64
+}
+
+define void @ule_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: ule_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ule <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fcule.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ule_v4f32
+}
+
+define void @ule_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: ule_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ule <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fcule.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ule_v2f64
+}
+
+define void @ult_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: ult_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ult <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fcult.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ult_v4f32
+}
+
+define void @ult_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: ult_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ult <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fcult.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ult_v2f64
+}
+
+define void @uno_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: uno_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp uno <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fcun.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size uno_v4f32
+}
+
+define void @uno_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: uno_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp uno <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fcun.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size uno_v2f64
+}
+
+define void @true_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: true_v4f32:
+
+  %1 = load <4 x float>* %a
+  %2 = load <4 x float>* %b
+  %3 = fcmp true <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  store <4 x i32> %4, <4 x i32>* %c
+  ret void
+
+  ; (setcc $a, $b, SETTRUE) is always folded, so we won't get fcaf:
+  ; CHECK-DAG: ldi.b [[R1:\$w[0-9]+]], -1
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+  ; CHECK: .size true_v4f32
+}
+
+define void @true_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: true_v2f64:
+
+  %1 = load <2 x double>* %a
+  %2 = load <2 x double>* %b
+  %3 = fcmp true <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  store <2 x i64> %4, <2 x i64>* %c
+  ret void
+
+  ; FIXME: This code is correct, but poor. Ideally it would be similar to
+  ;        the code in @true_v4f32
+  ; CHECK-DAG: ldi.d [[R1:\$w[0-9]+]], 1
+  ; CHECK-DAG: slli.d [[R3:\$w[0-9]+]], [[R1]], 63
+  ; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R3]], 63
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+  ; CHECK: .size true_v2f64
+}
+
+define void @bsel_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
+                          <4 x float>* %c) nounwind {
+  ; CHECK: bsel_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <4 x float>* %c
+  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
+  %4 = fcmp ogt <4 x float> %1, %2
+  ; CHECK-DAG: fclt.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %5 = select <4 x i1> %4, <4 x float> %1, <4 x float> %3
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <4 x float> %5, <4 x float>* %d
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_v4f32
+}
+
+define void @bsel_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
+                          <2 x double>* %c) nounwind {
+  ; CHECK: bsel_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <2 x double>* %c
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
+  %4 = fcmp ogt <2 x double> %1, %2
+  ; CHECK-DAG: fclt.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %5 = select <2 x i1> %4, <2 x double> %1, <2 x double> %3
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <2 x double> %5, <2 x double>* %d
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_v2f64
+}
+
+define void @bseli_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
+                          <4 x float>* %c) nounwind {
+  ; CHECK: bseli_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ogt <4 x float> %1, %2
+  ; CHECK-DAG: fclt.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %4 = select <4 x i1> %3, <4 x float> %1, <4 x float> zeroinitializer
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3:\$w[0-9]+]]
+  store <4 x float> %4, <4 x float>* %d
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bseli_v4f32
+}
+
+define void @bseli_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
+                          <2 x double>* %c) nounwind {
+  ; CHECK: bseli_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ogt <2 x double> %1, %2
+  ; CHECK-DAG: fclt.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %4 = select <2 x i1> %3, <2 x double> %1, <2 x double> zeroinitializer
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3:\$w[0-9]+]]
+  store <2 x double> %4, <2 x double>* %d
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bseli_v2f64
+}
+
+define void @max_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: max_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = tail call <4 x float> @llvm.mips.fmax.w(<4 x float> %1, <4 x float> %2)
+  ; CHECK-DAG: fmax.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x float> %3, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_v4f32
+}
+
+define void @max_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: max_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = tail call <2 x double> @llvm.mips.fmax.d(<2 x double> %1, <2 x double> %2)
+  ; CHECK-DAG: fmax.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x double> %3, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_v2f64
+}
+
+define void @min_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: min_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = tail call <4 x float> @llvm.mips.fmin.w(<4 x float> %1, <4 x float> %2)
+  ; CHECK-DAG: fmin.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x float> %3, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_v4f32
+}
+
+define void @min_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: min_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = tail call <2 x double> @llvm.mips.fmin.d(<2 x double> %1, <2 x double> %2)
+  ; CHECK-DAG: fmin.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x double> %3, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_v2f64
+}
diff --git a/test/CodeGen/Mips/msa/elm_copy.ll b/test/CodeGen/Mips/msa/elm_copy.ll
new file mode 100644
index 0000000..ed3e52c
--- /dev/null
+++ b/test/CodeGen/Mips/msa/elm_copy.ll
@@ -0,0 +1,162 @@
+; Test the MSA intrinsics that are encoded with the ELM instruction format and
+; are element extraction operations.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_copy_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_copy_s_b_RES  = global i32 0, align 16
+
+define void @llvm_mips_copy_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_copy_s_b_ARG1
+  %1 = tail call i32 @llvm.mips.copy.s.b(<16 x i8> %0, i32 1)
+  store i32 %1, i32* @llvm_mips_copy_s_b_RES
+  ret void
+}
+
+declare i32 @llvm.mips.copy.s.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_copy_s_b_test:
+; CHECK: ld.b
+; CHECK: copy_s.b
+; CHECK: sw
+; CHECK: .size llvm_mips_copy_s_b_test
+;
+@llvm_mips_copy_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_copy_s_h_RES  = global i32 0, align 16
+
+define void @llvm_mips_copy_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_copy_s_h_ARG1
+  %1 = tail call i32 @llvm.mips.copy.s.h(<8 x i16> %0, i32 1)
+  store i32 %1, i32* @llvm_mips_copy_s_h_RES
+  ret void
+}
+
+declare i32 @llvm.mips.copy.s.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_copy_s_h_test:
+; CHECK: ld.h
+; CHECK: copy_s.h
+; CHECK: sw
+; CHECK: .size llvm_mips_copy_s_h_test
+;
+@llvm_mips_copy_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_copy_s_w_RES  = global i32 0, align 16
+
+define void @llvm_mips_copy_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_copy_s_w_ARG1
+  %1 = tail call i32 @llvm.mips.copy.s.w(<4 x i32> %0, i32 1)
+  store i32 %1, i32* @llvm_mips_copy_s_w_RES
+  ret void
+}
+
+declare i32 @llvm.mips.copy.s.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_copy_s_w_test:
+; CHECK: ld.w
+; CHECK: copy_s.w
+; CHECK: sw
+; CHECK: .size llvm_mips_copy_s_w_test
+;
+@llvm_mips_copy_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_copy_s_d_RES  = global i64 0, align 16
+
+define void @llvm_mips_copy_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_copy_s_d_ARG1
+  %1 = tail call i64 @llvm.mips.copy.s.d(<2 x i64> %0, i32 1)
+  store i64 %1, i64* @llvm_mips_copy_s_d_RES
+  ret void
+}
+
+declare i64 @llvm.mips.copy.s.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_copy_s_d_test:
+; CHECK: ld.w
+; CHECK: copy_s.w
+; CHECK: copy_s.w
+; CHECK: sw
+; CHECK: sw
+; CHECK: .size llvm_mips_copy_s_d_test
+;
+@llvm_mips_copy_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_copy_u_b_RES  = global i32 0, align 16
+
+define void @llvm_mips_copy_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_copy_u_b_ARG1
+  %1 = tail call i32 @llvm.mips.copy.u.b(<16 x i8> %0, i32 1)
+  store i32 %1, i32* @llvm_mips_copy_u_b_RES
+  ret void
+}
+
+declare i32 @llvm.mips.copy.u.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_copy_u_b_test:
+; CHECK: ld.b
+; CHECK: copy_u.b
+; CHECK: sw
+; CHECK: .size llvm_mips_copy_u_b_test
+;
+@llvm_mips_copy_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_copy_u_h_RES  = global i32 0, align 16
+
+define void @llvm_mips_copy_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_copy_u_h_ARG1
+  %1 = tail call i32 @llvm.mips.copy.u.h(<8 x i16> %0, i32 1)
+  store i32 %1, i32* @llvm_mips_copy_u_h_RES
+  ret void
+}
+
+declare i32 @llvm.mips.copy.u.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_copy_u_h_test:
+; CHECK: ld.h
+; CHECK: copy_u.h
+; CHECK: sw
+; CHECK: .size llvm_mips_copy_u_h_test
+;
+@llvm_mips_copy_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_copy_u_w_RES  = global i32 0, align 16
+
+define void @llvm_mips_copy_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_copy_u_w_ARG1
+  %1 = tail call i32 @llvm.mips.copy.u.w(<4 x i32> %0, i32 1)
+  store i32 %1, i32* @llvm_mips_copy_u_w_RES
+  ret void
+}
+
+declare i32 @llvm.mips.copy.u.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_copy_u_w_test:
+; CHECK: ld.w
+; CHECK: copy_u.w
+; CHECK: sw
+; CHECK: .size llvm_mips_copy_u_w_test
+;
+@llvm_mips_copy_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_copy_u_d_RES  = global i64 0, align 16
+
+define void @llvm_mips_copy_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_copy_u_d_ARG1
+  %1 = tail call i64 @llvm.mips.copy.u.d(<2 x i64> %0, i32 1)
+  store i64 %1, i64* @llvm_mips_copy_u_d_RES
+  ret void
+}
+
+declare i64 @llvm.mips.copy.u.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_copy_u_d_test:
+; CHECK: ld.w
+; CHECK: copy_s.w
+; CHECK: copy_s.w
+; CHECK: sw
+; CHECK: sw
+; CHECK: .size llvm_mips_copy_u_d_test
+;
diff --git a/test/CodeGen/Mips/msa/elm_cxcmsa.ll b/test/CodeGen/Mips/msa/elm_cxcmsa.ll
new file mode 100644
index 0000000..8d6b0ee
--- /dev/null
+++ b/test/CodeGen/Mips/msa/elm_cxcmsa.ll
@@ -0,0 +1,168 @@
+; Test the MSA ctcmsa and cfcmsa intrinsics (which are encoded with the ELM
+; instruction format).
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+define i32 @msa_ir_cfcmsa_test() nounwind {
+entry:
+  %0 = tail call i32 @llvm.mips.cfcmsa(i32 0)
+  ret i32 %0
+}
+
+; CHECK: msa_ir_cfcmsa_test:
+; CHECK: cfcmsa $[[R1:[0-9]+]], $0
+; CHECK: .size msa_ir_cfcmsa_test
+;
+define i32 @msa_csr_cfcmsa_test() nounwind {
+entry:
+  %0 = tail call i32 @llvm.mips.cfcmsa(i32 1)
+  ret i32 %0
+}
+
+; CHECK: msa_csr_cfcmsa_test:
+; CHECK: cfcmsa $[[R1:[0-9]+]], $1
+; CHECK: .size msa_csr_cfcmsa_test
+;
+define i32 @msa_access_cfcmsa_test() nounwind {
+entry:
+  %0 = tail call i32 @llvm.mips.cfcmsa(i32 2)
+  ret i32 %0
+}
+
+; CHECK: msa_access_cfcmsa_test:
+; CHECK: cfcmsa $[[R1:[0-9]+]], $2
+; CHECK: .size msa_access_cfcmsa_test
+;
+define i32 @msa_save_cfcmsa_test() nounwind {
+entry:
+  %0 = tail call i32 @llvm.mips.cfcmsa(i32 3)
+  ret i32 %0
+}
+
+; CHECK: msa_save_cfcmsa_test:
+; CHECK: cfcmsa $[[R1:[0-9]+]], $3
+; CHECK: .size msa_save_cfcmsa_test
+;
+define i32 @msa_modify_cfcmsa_test() nounwind {
+entry:
+  %0 = tail call i32 @llvm.mips.cfcmsa(i32 4)
+  ret i32 %0
+}
+
+; CHECK: msa_modify_cfcmsa_test:
+; CHECK: cfcmsa $[[R1:[0-9]+]], $4
+; CHECK: .size msa_modify_cfcmsa_test
+;
+define i32 @msa_request_cfcmsa_test() nounwind {
+entry:
+  %0 = tail call i32 @llvm.mips.cfcmsa(i32 5)
+  ret i32 %0
+}
+
+; CHECK: msa_request_cfcmsa_test:
+; CHECK: cfcmsa $[[R1:[0-9]+]], $5
+; CHECK: .size msa_request_cfcmsa_test
+;
+define i32 @msa_map_cfcmsa_test() nounwind {
+entry:
+  %0 = tail call i32 @llvm.mips.cfcmsa(i32 6)
+  ret i32 %0
+}
+
+; CHECK: msa_map_cfcmsa_test:
+; CHECK: cfcmsa $[[R1:[0-9]+]], $6
+; CHECK: .size msa_map_cfcmsa_test
+;
+define i32 @msa_unmap_cfcmsa_test() nounwind {
+entry:
+  %0 = tail call i32 @llvm.mips.cfcmsa(i32 7)
+  ret i32 %0
+}
+
+; CHECK: msa_unmap_cfcmsa_test:
+; CHECK: cfcmsa $[[R1:[0-9]+]], $7
+; CHECK: .size msa_unmap_cfcmsa_test
+;
+define void @msa_ir_ctcmsa_test() nounwind {
+entry:
+  tail call void @llvm.mips.ctcmsa(i32 0, i32 1)
+  ret void
+}
+
+; CHECK: msa_ir_ctcmsa_test:
+; CHECK: ctcmsa $0
+; CHECK: .size msa_ir_ctcmsa_test
+;
+define void @msa_csr_ctcmsa_test() nounwind {
+entry:
+  tail call void @llvm.mips.ctcmsa(i32 1, i32 1)
+  ret void
+}
+
+; CHECK: msa_csr_ctcmsa_test:
+; CHECK: ctcmsa $1
+; CHECK: .size msa_csr_ctcmsa_test
+;
+define void @msa_access_ctcmsa_test() nounwind {
+entry:
+  tail call void @llvm.mips.ctcmsa(i32 2, i32 1)
+  ret void
+}
+
+; CHECK: msa_access_ctcmsa_test:
+; CHECK: ctcmsa $2
+; CHECK: .size msa_access_ctcmsa_test
+;
+define void @msa_save_ctcmsa_test() nounwind {
+entry:
+  tail call void @llvm.mips.ctcmsa(i32 3, i32 1)
+  ret void
+}
+
+; CHECK: msa_save_ctcmsa_test:
+; CHECK: ctcmsa $3
+; CHECK: .size msa_save_ctcmsa_test
+;
+define void @msa_modify_ctcmsa_test() nounwind {
+entry:
+  tail call void @llvm.mips.ctcmsa(i32 4, i32 1)
+  ret void
+}
+
+; CHECK: msa_modify_ctcmsa_test:
+; CHECK: ctcmsa $4
+; CHECK: .size msa_modify_ctcmsa_test
+;
+define void @msa_request_ctcmsa_test() nounwind {
+entry:
+  tail call void @llvm.mips.ctcmsa(i32 5, i32 1)
+  ret void
+}
+
+; CHECK: msa_request_ctcmsa_test:
+; CHECK: ctcmsa $5
+; CHECK: .size msa_request_ctcmsa_test
+;
+define void @msa_map_ctcmsa_test() nounwind {
+entry:
+  tail call void @llvm.mips.ctcmsa(i32 6, i32 1)
+  ret void
+}
+
+; CHECK: msa_map_ctcmsa_test:
+; CHECK: ctcmsa $6
+; CHECK: .size msa_map_ctcmsa_test
+;
+define void @msa_unmap_ctcmsa_test() nounwind {
+entry:
+  tail call void @llvm.mips.ctcmsa(i32 7, i32 1)
+  ret void
+}
+
+; CHECK: msa_unmap_ctcmsa_test:
+; CHECK: ctcmsa $7
+; CHECK: .size msa_unmap_ctcmsa_test
+;
+declare i32 @llvm.mips.cfcmsa(i32) nounwind
+declare void @llvm.mips.ctcmsa(i32, i32) nounwind
diff --git a/test/CodeGen/Mips/msa/elm_insv.ll b/test/CodeGen/Mips/msa/elm_insv.ll
new file mode 100644
index 0000000..fa7ceaf
--- /dev/null
+++ b/test/CodeGen/Mips/msa/elm_insv.ll
@@ -0,0 +1,192 @@
+; Test the MSA element insertion intrinsics that are encoded with the ELM
+; instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_insert_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_insert_b_ARG3 = global i32 27, align 16
+@llvm_mips_insert_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_insert_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_insert_b_ARG1
+  %1 = load i32* @llvm_mips_insert_b_ARG3
+  %2 = tail call <16 x i8> @llvm.mips.insert.b(<16 x i8> %0, i32 1, i32 %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_insert_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.insert.b(<16 x i8>, i32, i32) nounwind
+
+; CHECK: llvm_mips_insert_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], 0(
+; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0(
+; CHECK-DAG: insert.b [[R2]][1], [[R1]]
+; CHECK-DAG: st.b [[R2]], 0(
+; CHECK: .size llvm_mips_insert_b_test
+;
+@llvm_mips_insert_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_insert_h_ARG3 = global i32 27, align 16
+@llvm_mips_insert_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_insert_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_insert_h_ARG1
+  %1 = load i32* @llvm_mips_insert_h_ARG3
+  %2 = tail call <8 x i16> @llvm.mips.insert.h(<8 x i16> %0, i32 1, i32 %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_insert_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.insert.h(<8 x i16>, i32, i32) nounwind
+
+; CHECK: llvm_mips_insert_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], 0(
+; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0(
+; CHECK-DAG: insert.h [[R2]][1], [[R1]]
+; CHECK-DAG: st.h [[R2]], 0(
+; CHECK: .size llvm_mips_insert_h_test
+;
+@llvm_mips_insert_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_insert_w_ARG3 = global i32 27, align 16
+@llvm_mips_insert_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_insert_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_insert_w_ARG1
+  %1 = load i32* @llvm_mips_insert_w_ARG3
+  %2 = tail call <4 x i32> @llvm.mips.insert.w(<4 x i32> %0, i32 1, i32 %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_insert_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.insert.w(<4 x i32>, i32, i32) nounwind
+
+; CHECK: llvm_mips_insert_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], 0(
+; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0(
+; CHECK-DAG: insert.w [[R2]][1], [[R1]]
+; CHECK-DAG: st.w [[R2]], 0(
+; CHECK: .size llvm_mips_insert_w_test
+;
+@llvm_mips_insert_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_insert_d_ARG3 = global i64 27, align 16
+@llvm_mips_insert_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_insert_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_insert_d_ARG1
+  %1 = load i64* @llvm_mips_insert_d_ARG3
+  %2 = tail call <2 x i64> @llvm.mips.insert.d(<2 x i64> %0, i32 1, i64 %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_insert_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.insert.d(<2 x i64>, i32, i64) nounwind
+
+; CHECK: llvm_mips_insert_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], 0(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], 4(
+; CHECK-DAG: ld.w [[R3:\$w[0-9]+]],
+; CHECK-DAG: insert.w [[R3]][2], [[R1]]
+; CHECK-DAG: insert.w [[R3]][3], [[R2]]
+; CHECK-DAG: st.w [[R3]],
+; CHECK: .size llvm_mips_insert_d_test
+;
+@llvm_mips_insve_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_insve_b_ARG3 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_insve_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_insve_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_insve_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_insve_b_ARG3
+  %2 = tail call <16 x i8> @llvm.mips.insve.b(<16 x i8> %0, i32 1, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_insve_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.insve.b(<16 x i8>, i32, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_insve_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_insve_b_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_insve_b_ARG3)(
+; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: insve.b [[R3]][1], [[R4]][0]
+; CHECK-DAG: st.b [[R3]],
+; CHECK: .size llvm_mips_insve_b_test
+;
+@llvm_mips_insve_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_insve_h_ARG3 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_insve_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_insve_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_insve_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_insve_h_ARG3
+  %2 = tail call <8 x i16> @llvm.mips.insve.h(<8 x i16> %0, i32 1, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_insve_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.insve.h(<8 x i16>, i32, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_insve_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_insve_h_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_insve_h_ARG3)(
+; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: insve.h [[R3]][1], [[R4]][0]
+; CHECK-DAG: st.h [[R3]],
+; CHECK: .size llvm_mips_insve_h_test
+;
+@llvm_mips_insve_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_insve_w_ARG3 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_insve_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_insve_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_insve_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_insve_w_ARG3
+  %2 = tail call <4 x i32> @llvm.mips.insve.w(<4 x i32> %0, i32 1, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_insve_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.insve.w(<4 x i32>, i32, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_insve_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_insve_w_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_insve_w_ARG3)(
+; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: insve.w [[R3]][1], [[R4]][0]
+; CHECK-DAG: st.w [[R3]],
+; CHECK: .size llvm_mips_insve_w_test
+;
+@llvm_mips_insve_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_insve_d_ARG3 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_insve_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_insve_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_insve_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_insve_d_ARG3
+  %2 = tail call <2 x i64> @llvm.mips.insve.d(<2 x i64> %0, i32 1, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_insve_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.insve.d(<2 x i64>, i32, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_insve_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_insve_d_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_insve_d_ARG3)(
+; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: insve.d [[R3]][1], [[R4]][0]
+; CHECK-DAG: st.d [[R3]],
+; CHECK: .size llvm_mips_insve_d_test
+;
diff --git a/test/CodeGen/Mips/msa/elm_move.ll b/test/CodeGen/Mips/msa/elm_move.ll
new file mode 100644
index 0000000..98c06c7
--- /dev/null
+++ b/test/CodeGen/Mips/msa/elm_move.ll
@@ -0,0 +1,25 @@
+; Test the MSA move intrinsics (which are encoded with the ELM instruction
+; format).
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_move_vb_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_move_vb_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_move_vb_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_move_vb_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.move.v(<16 x i8> %0)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_move_vb_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.move.v(<16 x i8>) nounwind
+
+; CHECK: llvm_mips_move_vb_test:
+; CHECK: ld.b
+; CHECK: move.v
+; CHECK: st.b
+; CHECK: .size llvm_mips_move_vb_test
+;
diff --git a/test/CodeGen/Mips/msa/elm_shift_slide.ll b/test/CodeGen/Mips/msa/elm_shift_slide.ll
new file mode 100644
index 0000000..39d670d
--- /dev/null
+++ b/test/CodeGen/Mips/msa/elm_shift_slide.ll
@@ -0,0 +1,158 @@
+; Test the MSA intrinsics that are encoded with the ELM instruction format and
+; are either shifts or slides.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_sldi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_sldi_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_sldi_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_sldi_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.sldi.b(<16 x i8> %0, i32 1)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_sldi_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.sldi.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_sldi_b_test:
+; CHECK: ld.b
+; CHECK: sldi.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_sldi_b_test
+;
+@llvm_mips_sldi_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_sldi_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_sldi_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_sldi_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.sldi.h(<8 x i16> %0, i32 1)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_sldi_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.sldi.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_sldi_h_test:
+; CHECK: ld.h
+; CHECK: sldi.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_sldi_h_test
+;
+@llvm_mips_sldi_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_sldi_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_sldi_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_sldi_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.sldi.w(<4 x i32> %0, i32 1)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_sldi_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.sldi.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_sldi_w_test:
+; CHECK: ld.w
+; CHECK: sldi.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_sldi_w_test
+;
+@llvm_mips_sldi_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_sldi_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_sldi_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_sldi_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.sldi.d(<2 x i64> %0, i32 1)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_sldi_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.sldi.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_sldi_d_test:
+; CHECK: ld.d
+; CHECK: sldi.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_sldi_d_test
+;
+@llvm_mips_splati_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_splati_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_splati_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_splati_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.splati.b(<16 x i8> %0, i32 1)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_splati_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.splati.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_splati_b_test:
+; CHECK: ld.b
+; CHECK: splati.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_splati_b_test
+;
+@llvm_mips_splati_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_splati_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_splati_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_splati_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.splati.h(<8 x i16> %0, i32 1)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_splati_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.splati.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_splati_h_test:
+; CHECK: ld.h
+; CHECK: splati.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_splati_h_test
+;
+@llvm_mips_splati_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_splati_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_splati_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_splati_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.splati.w(<4 x i32> %0, i32 1)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_splati_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.splati.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_splati_w_test:
+; CHECK: ld.w
+; CHECK: splati.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_splati_w_test
+;
+@llvm_mips_splati_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_splati_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_splati_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_splati_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.splati.d(<2 x i64> %0, i32 1)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_splati_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.splati.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_splati_d_test:
+; CHECK: ld.d
+; CHECK: splati.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_splati_d_test
+;
diff --git a/test/CodeGen/Mips/msa/endian.ll b/test/CodeGen/Mips/msa/endian.ll
new file mode 100644
index 0000000..44d1925
--- /dev/null
+++ b/test/CodeGen/Mips/msa/endian.ll
@@ -0,0 +1,107 @@
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=BIGENDIAN %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=LITENDIAN %s
+
+@v16i8 = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+@v8i16 = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+@v4i32 = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+@v2i64 = global <2 x i64> <i64 0, i64 0>
+
+define void @const_v16i8() nounwind {
+  ; LITENDIAN: .byte 0
+  ; LITENDIAN: .byte 1
+  ; LITENDIAN: .byte 2
+  ; LITENDIAN: .byte 3
+  ; LITENDIAN: .byte 4
+  ; LITENDIAN: .byte 5
+  ; LITENDIAN: .byte 6
+  ; LITENDIAN: .byte 7
+  ; LITENDIAN: .byte 8
+  ; LITENDIAN: .byte 9
+  ; LITENDIAN: .byte 10
+  ; LITENDIAN: .byte 11
+  ; LITENDIAN: .byte 12
+  ; LITENDIAN: .byte 13
+  ; LITENDIAN: .byte 14
+  ; LITENDIAN: .byte 15
+  ; LITENDIAN: const_v16i8:
+  ; BIGENDIAN: .byte 0
+  ; BIGENDIAN: .byte 1
+  ; BIGENDIAN: .byte 2
+  ; BIGENDIAN: .byte 3
+  ; BIGENDIAN: .byte 4
+  ; BIGENDIAN: .byte 5
+  ; BIGENDIAN: .byte 6
+  ; BIGENDIAN: .byte 7
+  ; BIGENDIAN: .byte 8
+  ; BIGENDIAN: .byte 9
+  ; BIGENDIAN: .byte 10
+  ; BIGENDIAN: .byte 11
+  ; BIGENDIAN: .byte 12
+  ; BIGENDIAN: .byte 13
+  ; BIGENDIAN: .byte 14
+  ; BIGENDIAN: .byte 15
+  ; BIGENDIAN: const_v16i8:
+
+  store volatile <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, <16 x i8>*@v16i8
+
+  ret void
+}
+
+define void @const_v8i16() nounwind {
+  ; LITENDIAN: .2byte 0
+  ; LITENDIAN: .2byte 1
+  ; LITENDIAN: .2byte 2
+  ; LITENDIAN: .2byte 3
+  ; LITENDIAN: .2byte 4
+  ; LITENDIAN: .2byte 5
+  ; LITENDIAN: .2byte 6
+  ; LITENDIAN: .2byte 7
+  ; LITENDIAN: const_v8i16:
+  ; BIGENDIAN: .2byte 0
+  ; BIGENDIAN: .2byte 1
+  ; BIGENDIAN: .2byte 2
+  ; BIGENDIAN: .2byte 3
+  ; BIGENDIAN: .2byte 4
+  ; BIGENDIAN: .2byte 5
+  ; BIGENDIAN: .2byte 6
+  ; BIGENDIAN: .2byte 7
+  ; BIGENDIAN: const_v8i16:
+
+  store volatile <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, <8 x i16>*@v8i16
+
+  ret void
+}
+
+define void @const_v4i32() nounwind {
+  ; LITENDIAN: .4byte 0
+  ; LITENDIAN: .4byte 1
+  ; LITENDIAN: .4byte 2
+  ; LITENDIAN: .4byte 3
+  ; LITENDIAN: const_v4i32:
+  ; BIGENDIAN: .4byte 0
+  ; BIGENDIAN: .4byte 1
+  ; BIGENDIAN: .4byte 2
+  ; BIGENDIAN: .4byte 3
+  ; BIGENDIAN: const_v4i32:
+
+  store volatile <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>*@v4i32
+
+  ret void
+}
+
+define void @const_v2i64() nounwind {
+  ; LITENDIAN: .4byte 1
+  ; LITENDIAN: .4byte 0
+  ; LITENDIAN: .4byte 2
+  ; LITENDIAN: .4byte 0
+  ; LITENDIAN: const_v2i64:
+  ; BIGENDIAN: .4byte 0
+  ; BIGENDIAN: .4byte 1
+  ; BIGENDIAN: .4byte 0
+  ; BIGENDIAN: .4byte 2
+  ; BIGENDIAN: const_v2i64:
+
+  store volatile <2 x i64> <i64 1, i64 2>, <2 x i64>*@v2i64
+
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/frameindex.ll b/test/CodeGen/Mips/msa/frameindex.ll
new file mode 100644
index 0000000..3088e1b
--- /dev/null
+++ b/test/CodeGen/Mips/msa/frameindex.ll
@@ -0,0 +1,85 @@
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=MIPS32-AE -check-prefix=MIPS32-BE %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=MIPS32-AE -check-prefix=MIPS32-LE %s
+
+define void @loadstore_v16i8_near() nounwind {
+  ; MIPS32-AE: loadstore_v16i8_near:
+
+  %1 = alloca <16 x i8>
+  %2 = load volatile <16 x i8>* %1
+  ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0($sp)
+  store volatile <16 x i8> %2, <16 x i8>* %1
+  ; MIPS32-AE: st.b [[R1]], 0($sp)
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v16i8_near
+}
+
+define void @loadstore_v16i8_just_under_simm10() nounwind {
+  ; MIPS32-AE: loadstore_v16i8_just_under_simm10:
+
+  %1 = alloca <16 x i8>
+  %2 = alloca [496 x i8] ; Push the frame right up to 512 bytes
+
+  %3 = load volatile <16 x i8>* %1
+  ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 496($sp)
+  store volatile <16 x i8> %3, <16 x i8>* %1
+  ; MIPS32-AE: st.b [[R1]], 496($sp)
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v16i8_just_under_simm10
+}
+
+define void @loadstore_v16i8_just_over_simm10() nounwind {
+  ; MIPS32-AE: loadstore_v16i8_just_over_simm10:
+
+  %1 = alloca <16 x i8>
+  %2 = alloca [497 x i8] ; Push the frame just over 512 bytes
+
+  %3 = load volatile <16 x i8>* %1
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 512
+  ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <16 x i8> %3, <16 x i8>* %1
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 512
+  ; MIPS32-AE: st.b [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v16i8_just_over_simm10
+}
+
+define void @loadstore_v16i8_just_under_simm16() nounwind {
+  ; MIPS32-AE: loadstore_v16i8_just_under_simm16:
+
+  %1 = alloca <16 x i8>
+  %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
+
+  %3 = load volatile <16 x i8>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <16 x i8> %3, <16 x i8>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: st.b [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v16i8_just_under_simm16
+}
+
+define void @loadstore_v16i8_just_over_simm16() nounwind {
+  ; MIPS32-AE: loadstore_v16i8_just_over_simm16:
+
+  %1 = alloca <16 x i8>
+  %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
+
+  %3 = load volatile <16 x i8>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <16 x i8> %3, <16 x i8>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: st.b [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v16i8_just_over_simm16
+}
diff --git a/test/CodeGen/Mips/msa/i10.ll b/test/CodeGen/Mips/msa/i10.ll
new file mode 100644
index 0000000..c5a9617
--- /dev/null
+++ b/test/CodeGen/Mips/msa/i10.ll
@@ -0,0 +1,89 @@
+; Test the MSA intrinsics that are encoded with the I10 instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_bnz_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+
+define i32 @llvm_mips_bnz_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bnz_b_ARG1
+  %1 = tail call i32 @llvm.mips.bnz.b(<16 x i8> %0)
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %true, label %false
+true:
+  ret i32 2
+false:
+  ret i32 3
+}
+
+declare i32 @llvm.mips.bnz.b(<16 x i8>) nounwind
+
+; CHECK: llvm_mips_bnz_b_test:
+; CHECK-DAG: ld.b [[R0:\$w[0-9]+]]
+; CHECK-DAG: bnz.b [[R0]]
+; CHECK: .size llvm_mips_bnz_b_test
+
+@llvm_mips_bnz_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+
+define i32 @llvm_mips_bnz_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_bnz_h_ARG1
+  %1 = tail call i32 @llvm.mips.bnz.h(<8 x i16> %0)
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %true, label %false
+true:
+  ret i32 2
+false:
+  ret i32 3
+}
+
+declare i32 @llvm.mips.bnz.h(<8 x i16>) nounwind
+
+; CHECK: llvm_mips_bnz_h_test:
+; CHECK-DAG: ld.h [[R0:\$w[0-9]+]]
+; CHECK-DAG: bnz.h [[R0]]
+; CHECK: .size llvm_mips_bnz_h_test
+
+@llvm_mips_bnz_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+
+define i32 @llvm_mips_bnz_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_bnz_w_ARG1
+  %1 = tail call i32 @llvm.mips.bnz.w(<4 x i32> %0)
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %true, label %false
+true:
+  ret i32 2
+false:
+  ret i32 3
+}
+
+declare i32 @llvm.mips.bnz.w(<4 x i32>) nounwind
+
+; CHECK: llvm_mips_bnz_w_test:
+; CHECK-DAG: ld.w [[R0:\$w[0-9]+]]
+; CHECK-DAG: bnz.w [[R0]]
+; CHECK: .size llvm_mips_bnz_w_test
+
+@llvm_mips_bnz_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+
+define i32 @llvm_mips_bnz_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_bnz_d_ARG1
+  %1 = tail call i32 @llvm.mips.bnz.d(<2 x i64> %0)
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %true, label %false
+true:
+  ret i32 2
+false:
+  ret i32 3
+}
+
+declare i32 @llvm.mips.bnz.d(<2 x i64>) nounwind
+
+; CHECK: llvm_mips_bnz_d_test:
+; CHECK-DAG: ld.d [[R0:\$w[0-9]+]]
+; CHECK-DAG: bnz.d [[R0]]
+; CHECK: .size llvm_mips_bnz_d_test
+
diff --git a/test/CodeGen/Mips/msa/i5-a.ll b/test/CodeGen/Mips/msa/i5-a.ll
new file mode 100644
index 0000000..0b50720
--- /dev/null
+++ b/test/CodeGen/Mips/msa/i5-a.ll
@@ -0,0 +1,82 @@
+; Test the MSA intrinsics that are encoded with the I5 instruction format.
+; There are lots of these so this covers those beginning with 'a'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_addvi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_addvi_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_addvi_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_addvi_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.addvi.b(<16 x i8> %0, i32 14)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_addvi_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.addvi.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_addvi_b_test:
+; CHECK: ld.b
+; CHECK: addvi.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_addvi_b_test
+;
+@llvm_mips_addvi_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_addvi_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_addvi_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_addvi_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.addvi.h(<8 x i16> %0, i32 14)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_addvi_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.addvi.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_addvi_h_test:
+; CHECK: ld.h
+; CHECK: addvi.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_addvi_h_test
+;
+@llvm_mips_addvi_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_addvi_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_addvi_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_addvi_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.addvi.w(<4 x i32> %0, i32 14)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_addvi_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.addvi.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_addvi_w_test:
+; CHECK: ld.w
+; CHECK: addvi.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_addvi_w_test
+;
+@llvm_mips_addvi_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_addvi_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_addvi_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_addvi_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.addvi.d(<2 x i64> %0, i32 14)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_addvi_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.addvi.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_addvi_d_test:
+; CHECK: ld.d
+; CHECK: addvi.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_addvi_d_test
+;
diff --git a/test/CodeGen/Mips/msa/i5-b.ll b/test/CodeGen/Mips/msa/i5-b.ll
new file mode 100644
index 0000000..da6be66
--- /dev/null
+++ b/test/CodeGen/Mips/msa/i5-b.ll
@@ -0,0 +1,439 @@
+; Test the MSA intrinsics that are encoded with the I5 instruction format.
+; There are lots of these so this covers those beginning with 'b'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_bclri_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bclri_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bclri_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bclri_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.bclri.b(<16 x i8> %0, i32 7)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_bclri_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.bclri.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_bclri_b_test:
+; CHECK: ld.b
+; andi.b is equivalent to bclri.b
+; CHECK: andi.b {{\$w[0-9]}}, {{\$w[0-9]}}, 127
+; CHECK: st.b
+; CHECK: .size llvm_mips_bclri_b_test
+;
+@llvm_mips_bclri_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bclri_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_bclri_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_bclri_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.bclri.h(<8 x i16> %0, i32 7)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_bclri_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.bclri.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_bclri_h_test:
+; CHECK: ld.h
+; CHECK: bclri.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_bclri_h_test
+;
+@llvm_mips_bclri_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bclri_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_bclri_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_bclri_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.bclri.w(<4 x i32> %0, i32 7)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_bclri_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.bclri.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_bclri_w_test:
+; CHECK: ld.w
+; CHECK: bclri.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_bclri_w_test
+;
+@llvm_mips_bclri_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bclri_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_bclri_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_bclri_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.bclri.d(<2 x i64> %0, i32 7)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_bclri_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.bclri.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_bclri_d_test:
+; CHECK: ld.d
+; CHECK: bclri.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_bclri_d_test
+;
+@llvm_mips_binsli_b_ARG1 = global <16 x i8> zeroinitializer, align 16
+@llvm_mips_binsli_b_ARG2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_binsli_b_RES  = global <16 x i8> zeroinitializer, align 16
+
+define void @llvm_mips_binsli_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_binsli_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_binsli_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.binsli.b(<16 x i8> %0, <16 x i8> %1, i32 7)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_binsli_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.binsli.b(<16 x i8>, <16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_binsli_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsli_b_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsli_b_ARG2)(
+; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: binsli.b [[R3]], [[R4]], 7
+; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsli_b_RES)(
+; CHECK-DAG: st.b [[R3]], 0([[R5]])
+; CHECK: .size llvm_mips_binsli_b_test
+
+@llvm_mips_binsli_h_ARG1 = global <8 x i16> zeroinitializer, align 16
+@llvm_mips_binsli_h_ARG2 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_binsli_h_RES  = global <8 x i16> zeroinitializer, align 16
+
+define void @llvm_mips_binsli_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_binsli_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_binsli_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.binsli.h(<8 x i16> %0, <8 x i16> %1, i32 7)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_binsli_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.binsli.h(<8 x i16>, <8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_binsli_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsli_h_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsli_h_ARG2)(
+; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: binsli.h [[R3]], [[R4]], 7
+; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsli_h_RES)(
+; CHECK-DAG: st.h [[R3]], 0([[R5]])
+; CHECK: .size llvm_mips_binsli_h_test
+
+@llvm_mips_binsli_w_ARG1 = global <4 x i32> zeroinitializer, align 16
+@llvm_mips_binsli_w_ARG2 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_binsli_w_RES  = global <4 x i32> zeroinitializer, align 16
+
+define void @llvm_mips_binsli_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_binsli_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_binsli_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.binsli.w(<4 x i32> %0, <4 x i32> %1, i32 7)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_binsli_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.binsli.w(<4 x i32>, <4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_binsli_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsli_w_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsli_w_ARG2)(
+; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: binsli.w [[R3]], [[R4]], 7
+; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsli_w_RES)(
+; CHECK-DAG: st.w [[R3]], 0([[R5]])
+; CHECK: .size llvm_mips_binsli_w_test
+
+@llvm_mips_binsli_d_ARG1 = global <2 x i64> zeroinitializer, align 16
+@llvm_mips_binsli_d_ARG2 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_binsli_d_RES  = global <2 x i64> zeroinitializer, align 16
+
+define void @llvm_mips_binsli_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_binsli_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_binsli_d_ARG2
+  ; TODO: We use a particularly wide mask here to work around a legalization
+  ;       issue. If the mask doesn't fit within a 10-bit immediate, it gets
+  ;       legalized into a constant pool. We should add a test to cover the
+  ;       other cases once they correctly select binsli.d.
+  %2 = tail call <2 x i64> @llvm.mips.binsli.d(<2 x i64> %0, <2 x i64> %1, i32 61)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_binsli_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.binsli.d(<2 x i64>, <2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_binsli_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsli_d_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsli_d_ARG2)(
+; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: binsli.d [[R3]], [[R4]], 61
+; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsli_d_RES)(
+; CHECK-DAG: st.d [[R3]], 0([[R5]])
+; CHECK: .size llvm_mips_binsli_d_test
+
+@llvm_mips_binsri_b_ARG1 = global <16 x i8> zeroinitializer, align 16
+@llvm_mips_binsri_b_ARG2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_binsri_b_RES  = global <16 x i8> zeroinitializer, align 16
+
+define void @llvm_mips_binsri_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_binsri_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_binsri_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.binsri.b(<16 x i8> %0, <16 x i8> %1, i32 7)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_binsri_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.binsri.b(<16 x i8>, <16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_binsri_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsri_b_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsri_b_ARG2)(
+; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: binsri.b [[R3]], [[R4]], 7
+; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsri_b_RES)(
+; CHECK-DAG: st.b [[R3]], 0([[R5]])
+; CHECK: .size llvm_mips_binsri_b_test
+
+@llvm_mips_binsri_h_ARG1 = global <8 x i16> zeroinitializer, align 16
+@llvm_mips_binsri_h_ARG2 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_binsri_h_RES  = global <8 x i16> zeroinitializer, align 16
+
+define void @llvm_mips_binsri_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_binsri_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_binsri_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.binsri.h(<8 x i16> %0, <8 x i16> %1, i32 7)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_binsri_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.binsri.h(<8 x i16>, <8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_binsri_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsri_h_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsri_h_ARG2)(
+; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: binsri.h [[R3]], [[R4]], 7
+; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsri_h_RES)(
+; CHECK-DAG: st.h [[R3]], 0([[R5]])
+; CHECK: .size llvm_mips_binsri_h_test
+
+@llvm_mips_binsri_w_ARG1 = global <4 x i32> zeroinitializer, align 16
+@llvm_mips_binsri_w_ARG2 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_binsri_w_RES  = global <4 x i32> zeroinitializer, align 16
+
+define void @llvm_mips_binsri_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_binsri_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_binsri_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.binsri.w(<4 x i32> %0, <4 x i32> %1, i32 7)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_binsri_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.binsri.w(<4 x i32>, <4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_binsri_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsri_w_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsri_w_ARG2)(
+; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: binsri.w [[R3]], [[R4]], 7
+; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsri_w_RES)(
+; CHECK-DAG: st.w [[R3]], 0([[R5]])
+; CHECK: .size llvm_mips_binsri_w_test
+
+@llvm_mips_binsri_d_ARG1 = global <2 x i64> zeroinitializer, align 16
+@llvm_mips_binsri_d_ARG2 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_binsri_d_RES  = global <2 x i64> zeroinitializer, align 16
+
+define void @llvm_mips_binsri_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_binsri_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_binsri_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.binsri.d(<2 x i64> %0, <2 x i64> %1, i32 7)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_binsri_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.binsri.d(<2 x i64>, <2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_binsri_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsri_d_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsri_d_ARG2)(
+; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: binsri.d [[R3]], [[R4]], 7
+; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsri_d_RES)(
+; CHECK-DAG: st.d [[R3]], 0([[R5]])
+; CHECK: .size llvm_mips_binsri_d_test
+
+@llvm_mips_bnegi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bnegi_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bnegi_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bnegi_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.bnegi.b(<16 x i8> %0, i32 7)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_bnegi_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.bnegi.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_bnegi_b_test:
+; CHECK: ld.b
+; CHECK: bnegi.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_bnegi_b_test
+;
+@llvm_mips_bnegi_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bnegi_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_bnegi_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_bnegi_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.bnegi.h(<8 x i16> %0, i32 7)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_bnegi_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.bnegi.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_bnegi_h_test:
+; CHECK: ld.h
+; CHECK: bnegi.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_bnegi_h_test
+;
+@llvm_mips_bnegi_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bnegi_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_bnegi_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_bnegi_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.bnegi.w(<4 x i32> %0, i32 7)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_bnegi_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.bnegi.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_bnegi_w_test:
+; CHECK: ld.w
+; CHECK: bnegi.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_bnegi_w_test
+;
+@llvm_mips_bnegi_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bnegi_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_bnegi_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_bnegi_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.bnegi.d(<2 x i64> %0, i32 7)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_bnegi_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.bnegi.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_bnegi_d_test:
+; CHECK: ld.d
+; CHECK: bnegi.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_bnegi_d_test
+;
+@llvm_mips_bseti_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bseti_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bseti_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bseti_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.bseti.b(<16 x i8> %0, i32 7)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_bseti_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.bseti.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_bseti_b_test:
+; CHECK: ld.b
+; CHECK: bseti.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_bseti_b_test
+;
+@llvm_mips_bseti_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bseti_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_bseti_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_bseti_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.bseti.h(<8 x i16> %0, i32 7)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_bseti_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.bseti.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_bseti_h_test:
+; CHECK: ld.h
+; CHECK: bseti.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_bseti_h_test
+;
+@llvm_mips_bseti_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bseti_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_bseti_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_bseti_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.bseti.w(<4 x i32> %0, i32 7)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_bseti_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.bseti.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_bseti_w_test:
+; CHECK: ld.w
+; CHECK: bseti.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_bseti_w_test
+;
+@llvm_mips_bseti_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bseti_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_bseti_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_bseti_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.bseti.d(<2 x i64> %0, i32 7)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_bseti_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.bseti.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_bseti_d_test:
+; CHECK: ld.d
+; CHECK: bseti.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_bseti_d_test
+;
diff --git a/test/CodeGen/Mips/msa/i5-c.ll b/test/CodeGen/Mips/msa/i5-c.ll
new file mode 100644
index 0000000..bf1578f
--- /dev/null
+++ b/test/CodeGen/Mips/msa/i5-c.ll
@@ -0,0 +1,386 @@
+; Test the MSA intrinsics that are encoded with the I5 instruction format.
+; There are lots of these so this covers those beginning with 'c'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_ceqi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_ceqi_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_ceqi_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_ceqi_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.ceqi.b(<16 x i8> %0, i32 14)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_ceqi_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.ceqi.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_ceqi_b_test:
+; CHECK: ld.b
+; CHECK: ceqi.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_ceqi_b_test
+;
+@llvm_mips_ceqi_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_ceqi_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_ceqi_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_ceqi_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.ceqi.h(<8 x i16> %0, i32 14)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_ceqi_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.ceqi.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_ceqi_h_test:
+; CHECK: ld.h
+; CHECK: ceqi.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_ceqi_h_test
+;
+@llvm_mips_ceqi_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ceqi_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ceqi_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ceqi_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.ceqi.w(<4 x i32> %0, i32 14)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_ceqi_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ceqi.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_ceqi_w_test:
+; CHECK: ld.w
+; CHECK: ceqi.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_ceqi_w_test
+;
+@llvm_mips_ceqi_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_ceqi_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ceqi_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_ceqi_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.ceqi.d(<2 x i64> %0, i32 14)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_ceqi_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ceqi.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_ceqi_d_test:
+; CHECK: ld.d
+; CHECK: ceqi.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_ceqi_d_test
+;
+@llvm_mips_clei_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_clei_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_clei_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_clei_s_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.clei.s.b(<16 x i8> %0, i32 14)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_clei_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.clei.s.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_clei_s_b_test:
+; CHECK: ld.b
+; CHECK: clei_s.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_clei_s_b_test
+;
+@llvm_mips_clei_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_clei_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_clei_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_clei_s_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.clei.s.h(<8 x i16> %0, i32 14)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_clei_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.clei.s.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_clei_s_h_test:
+; CHECK: ld.h
+; CHECK: clei_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_clei_s_h_test
+;
+@llvm_mips_clei_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_clei_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_clei_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_clei_s_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.clei.s.w(<4 x i32> %0, i32 14)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_clei_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.clei.s.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_clei_s_w_test:
+; CHECK: ld.w
+; CHECK: clei_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_clei_s_w_test
+;
+@llvm_mips_clei_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_clei_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_clei_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_clei_s_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.clei.s.d(<2 x i64> %0, i32 14)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_clei_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.clei.s.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_clei_s_d_test:
+; CHECK: ld.d
+; CHECK: clei_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_clei_s_d_test
+;
+@llvm_mips_clei_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_clei_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_clei_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_clei_u_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.clei.u.b(<16 x i8> %0, i32 14)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_clei_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.clei.u.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_clei_u_b_test:
+; CHECK: ld.b
+; CHECK: clei_u.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_clei_u_b_test
+;
+@llvm_mips_clei_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_clei_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_clei_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_clei_u_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.clei.u.h(<8 x i16> %0, i32 14)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_clei_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.clei.u.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_clei_u_h_test:
+; CHECK: ld.h
+; CHECK: clei_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_clei_u_h_test
+;
+@llvm_mips_clei_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_clei_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_clei_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_clei_u_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.clei.u.w(<4 x i32> %0, i32 14)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_clei_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.clei.u.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_clei_u_w_test:
+; CHECK: ld.w
+; CHECK: clei_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_clei_u_w_test
+;
+@llvm_mips_clei_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_clei_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_clei_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_clei_u_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.clei.u.d(<2 x i64> %0, i32 14)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_clei_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.clei.u.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_clei_u_d_test:
+; CHECK: ld.d
+; CHECK: clei_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_clei_u_d_test
+;
+@llvm_mips_clti_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_clti_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_clti_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_clti_s_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.clti.s.b(<16 x i8> %0, i32 14)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_clti_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.clti.s.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_clti_s_b_test:
+; CHECK: ld.b
+; CHECK: clti_s.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_clti_s_b_test
+;
+@llvm_mips_clti_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_clti_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_clti_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_clti_s_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.clti.s.h(<8 x i16> %0, i32 14)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_clti_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.clti.s.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_clti_s_h_test:
+; CHECK: ld.h
+; CHECK: clti_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_clti_s_h_test
+;
+@llvm_mips_clti_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_clti_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_clti_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_clti_s_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.clti.s.w(<4 x i32> %0, i32 14)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_clti_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.clti.s.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_clti_s_w_test:
+; CHECK: ld.w
+; CHECK: clti_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_clti_s_w_test
+;
+@llvm_mips_clti_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_clti_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_clti_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_clti_s_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.clti.s.d(<2 x i64> %0, i32 14)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_clti_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.clti.s.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_clti_s_d_test:
+; CHECK: ld.d
+; CHECK: clti_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_clti_s_d_test
+;
+@llvm_mips_clti_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_clti_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_clti_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_clti_u_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.clti.u.b(<16 x i8> %0, i32 14)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_clti_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.clti.u.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_clti_u_b_test:
+; CHECK: ld.b
+; CHECK: clti_u.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_clti_u_b_test
+;
+@llvm_mips_clti_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_clti_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_clti_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_clti_u_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.clti.u.h(<8 x i16> %0, i32 14)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_clti_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.clti.u.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_clti_u_h_test:
+; CHECK: ld.h
+; CHECK: clti_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_clti_u_h_test
+;
+@llvm_mips_clti_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_clti_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_clti_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_clti_u_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.clti.u.w(<4 x i32> %0, i32 14)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_clti_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.clti.u.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_clti_u_w_test:
+; CHECK: ld.w
+; CHECK: clti_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_clti_u_w_test
+;
+@llvm_mips_clti_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_clti_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_clti_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_clti_u_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.clti.u.d(<2 x i64> %0, i32 14)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_clti_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.clti.u.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_clti_u_d_test:
+; CHECK: ld.d
+; CHECK: clti_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_clti_u_d_test
+;
diff --git a/test/CodeGen/Mips/msa/i5-m.ll b/test/CodeGen/Mips/msa/i5-m.ll
new file mode 100644
index 0000000..2766349
--- /dev/null
+++ b/test/CodeGen/Mips/msa/i5-m.ll
@@ -0,0 +1,310 @@
+; Test the MSA intrinsics that are encoded with the I5 instruction format.
+; There are lots of these so this covers those beginning with 'm'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_maxi_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_maxi_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_maxi_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_maxi_s_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.maxi.s.b(<16 x i8> %0, i32 14)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_maxi_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.maxi.s.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_maxi_s_b_test:
+; CHECK: ld.b
+; CHECK: maxi_s.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_maxi_s_b_test
+;
+@llvm_mips_maxi_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_maxi_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_maxi_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_maxi_s_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.maxi.s.h(<8 x i16> %0, i32 14)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_maxi_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.maxi.s.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_maxi_s_h_test:
+; CHECK: ld.h
+; CHECK: maxi_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_maxi_s_h_test
+;
+@llvm_mips_maxi_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_maxi_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_maxi_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_maxi_s_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.maxi.s.w(<4 x i32> %0, i32 14)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_maxi_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.maxi.s.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_maxi_s_w_test:
+; CHECK: ld.w
+; CHECK: maxi_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_maxi_s_w_test
+;
+@llvm_mips_maxi_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_maxi_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_maxi_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_maxi_s_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.maxi.s.d(<2 x i64> %0, i32 14)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_maxi_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.maxi.s.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_maxi_s_d_test:
+; CHECK: ld.d
+; CHECK: maxi_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_maxi_s_d_test
+;
+@llvm_mips_maxi_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_maxi_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_maxi_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_maxi_u_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.maxi.u.b(<16 x i8> %0, i32 14)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_maxi_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.maxi.u.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_maxi_u_b_test:
+; CHECK: ld.b
+; CHECK: maxi_u.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_maxi_u_b_test
+;
+@llvm_mips_maxi_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_maxi_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_maxi_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_maxi_u_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.maxi.u.h(<8 x i16> %0, i32 14)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_maxi_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.maxi.u.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_maxi_u_h_test:
+; CHECK: ld.h
+; CHECK: maxi_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_maxi_u_h_test
+;
+@llvm_mips_maxi_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_maxi_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_maxi_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_maxi_u_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.maxi.u.w(<4 x i32> %0, i32 14)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_maxi_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.maxi.u.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_maxi_u_w_test:
+; CHECK: ld.w
+; CHECK: maxi_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_maxi_u_w_test
+;
+@llvm_mips_maxi_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_maxi_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_maxi_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_maxi_u_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.maxi.u.d(<2 x i64> %0, i32 14)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_maxi_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.maxi.u.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_maxi_u_d_test:
+; CHECK: ld.d
+; CHECK: maxi_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_maxi_u_d_test
+;
+@llvm_mips_mini_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_mini_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_mini_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_mini_s_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.mini.s.b(<16 x i8> %0, i32 14)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_mini_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.mini.s.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_mini_s_b_test:
+; CHECK: ld.b
+; CHECK: mini_s.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_mini_s_b_test
+;
+@llvm_mips_mini_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_mini_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_mini_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_mini_s_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.mini.s.h(<8 x i16> %0, i32 14)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_mini_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.mini.s.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_mini_s_h_test:
+; CHECK: ld.h
+; CHECK: mini_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_mini_s_h_test
+;
+@llvm_mips_mini_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_mini_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_mini_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_mini_s_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.mini.s.w(<4 x i32> %0, i32 14)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_mini_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.mini.s.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_mini_s_w_test:
+; CHECK: ld.w
+; CHECK: mini_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_mini_s_w_test
+;
+@llvm_mips_mini_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_mini_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_mini_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_mini_s_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.mini.s.d(<2 x i64> %0, i32 14)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_mini_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.mini.s.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_mini_s_d_test:
+; CHECK: ld.d
+; CHECK: mini_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_mini_s_d_test
+;
+@llvm_mips_mini_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_mini_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_mini_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_mini_u_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.mini.u.b(<16 x i8> %0, i32 14)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_mini_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.mini.u.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_mini_u_b_test:
+; CHECK: ld.b
+; CHECK: mini_u.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_mini_u_b_test
+;
+@llvm_mips_mini_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_mini_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_mini_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_mini_u_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.mini.u.h(<8 x i16> %0, i32 14)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_mini_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.mini.u.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_mini_u_h_test:
+; CHECK: ld.h
+; CHECK: mini_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_mini_u_h_test
+;
+@llvm_mips_mini_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_mini_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_mini_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_mini_u_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.mini.u.w(<4 x i32> %0, i32 14)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_mini_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.mini.u.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_mini_u_w_test:
+; CHECK: ld.w
+; CHECK: mini_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_mini_u_w_test
+;
+@llvm_mips_mini_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_mini_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_mini_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_mini_u_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.mini.u.d(<2 x i64> %0, i32 14)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_mini_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.mini.u.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_mini_u_d_test:
+; CHECK: ld.d
+; CHECK: mini_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_mini_u_d_test
+;
diff --git a/test/CodeGen/Mips/msa/i5-s.ll b/test/CodeGen/Mips/msa/i5-s.ll
new file mode 100644
index 0000000..184172f
--- /dev/null
+++ b/test/CodeGen/Mips/msa/i5-s.ll
@@ -0,0 +1,82 @@
+; Test the MSA intrinsics that are encoded with the I5 instruction format.
+; There are lots of these so this covers those beginning with 's'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_subvi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_subvi_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_subvi_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_subvi_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.subvi.b(<16 x i8> %0, i32 14)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_subvi_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.subvi.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_subvi_b_test:
+; CHECK: ld.b
+; CHECK: subvi.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_subvi_b_test
+;
+@llvm_mips_subvi_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_subvi_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_subvi_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_subvi_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.subvi.h(<8 x i16> %0, i32 14)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_subvi_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.subvi.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_subvi_h_test:
+; CHECK: ld.h
+; CHECK: subvi.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_subvi_h_test
+;
+@llvm_mips_subvi_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_subvi_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_subvi_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_subvi_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.subvi.w(<4 x i32> %0, i32 14)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_subvi_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.subvi.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_subvi_w_test:
+; CHECK: ld.w
+; CHECK: subvi.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_subvi_w_test
+;
+@llvm_mips_subvi_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_subvi_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_subvi_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_subvi_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.subvi.d(<2 x i64> %0, i32 14)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_subvi_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.subvi.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_subvi_d_test:
+; CHECK: ld.d
+; CHECK: subvi.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_subvi_d_test
+;
diff --git a/test/CodeGen/Mips/msa/i5_ld_st.ll b/test/CodeGen/Mips/msa/i5_ld_st.ll
new file mode 100644
index 0000000..7cc55f2
--- /dev/null
+++ b/test/CodeGen/Mips/msa/i5_ld_st.ll
@@ -0,0 +1,150 @@
+; Test the MSA intrinsics that are encoded with the I5 instruction format and
+; are loads or stores.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_ld_b_ARG = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_ld_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_ld_b_test() nounwind {
+entry:
+  %0 = bitcast <16 x i8>* @llvm_mips_ld_b_ARG to i8*
+  %1 = tail call <16 x i8> @llvm.mips.ld.b(i8* %0, i32 16)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_ld_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.ld.b(i8*, i32) nounwind
+
+; CHECK: llvm_mips_ld_b_test:
+; CHECK: ld.b [[R1:\$w[0-9]+]], 16(
+; CHECK: st.b
+; CHECK: .size llvm_mips_ld_b_test
+;
+@llvm_mips_ld_h_ARG = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_ld_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_ld_h_test() nounwind {
+entry:
+  %0 = bitcast <8 x i16>* @llvm_mips_ld_h_ARG to i8*
+  %1 = tail call <8 x i16> @llvm.mips.ld.h(i8* %0, i32 16)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_ld_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.ld.h(i8*, i32) nounwind
+
+; CHECK: llvm_mips_ld_h_test:
+; CHECK: ld.h [[R1:\$w[0-9]+]], 16(
+; CHECK: st.h
+; CHECK: .size llvm_mips_ld_h_test
+;
+@llvm_mips_ld_w_ARG = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ld_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ld_w_test() nounwind {
+entry:
+  %0 = bitcast <4 x i32>* @llvm_mips_ld_w_ARG to i8*
+  %1 = tail call <4 x i32> @llvm.mips.ld.w(i8* %0, i32 16)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_ld_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ld.w(i8*, i32) nounwind
+
+; CHECK: llvm_mips_ld_w_test:
+; CHECK: ld.w [[R1:\$w[0-9]+]], 16(
+; CHECK: st.w
+; CHECK: .size llvm_mips_ld_w_test
+;
+@llvm_mips_ld_d_ARG = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_ld_d_RES = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ld_d_test() nounwind {
+entry:
+  %0 = bitcast <2 x i64>* @llvm_mips_ld_d_ARG to i8*
+  %1 = tail call <2 x i64> @llvm.mips.ld.d(i8* %0, i32 16)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_ld_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ld.d(i8*, i32) nounwind
+
+; CHECK: llvm_mips_ld_d_test:
+; CHECK: ld.d [[R1:\$w[0-9]+]], 16(
+; CHECK: st.d
+; CHECK: .size llvm_mips_ld_d_test
+;
+@llvm_mips_st_b_ARG = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_st_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_st_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_st_b_ARG
+  %1 = bitcast <16 x i8>* @llvm_mips_st_b_RES to i8*
+  tail call void @llvm.mips.st.b(<16 x i8> %0, i8* %1, i32 16)
+  ret void
+}
+
+declare void @llvm.mips.st.b(<16 x i8>, i8*, i32) nounwind
+
+; CHECK: llvm_mips_st_b_test:
+; CHECK: ld.b
+; CHECK: st.b [[R1:\$w[0-9]+]], 16(
+; CHECK: .size llvm_mips_st_b_test
+;
+@llvm_mips_st_h_ARG = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_st_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_st_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_st_h_ARG
+  %1 = bitcast <8 x i16>* @llvm_mips_st_h_RES to i8*
+  tail call void @llvm.mips.st.h(<8 x i16> %0, i8* %1, i32 16)
+  ret void
+}
+
+declare void @llvm.mips.st.h(<8 x i16>, i8*, i32) nounwind
+
+; CHECK: llvm_mips_st_h_test:
+; CHECK: ld.h
+; CHECK: st.h [[R1:\$w[0-9]+]], 16(
+; CHECK: .size llvm_mips_st_h_test
+;
+@llvm_mips_st_w_ARG = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_st_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_st_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_st_w_ARG
+  %1 = bitcast <4 x i32>* @llvm_mips_st_w_RES to i8*
+  tail call void @llvm.mips.st.w(<4 x i32> %0, i8* %1, i32 16)
+  ret void
+}
+
+declare void @llvm.mips.st.w(<4 x i32>, i8*, i32) nounwind
+
+; CHECK: llvm_mips_st_w_test:
+; CHECK: ld.w
+; CHECK: st.w [[R1:\$w[0-9]+]], 16(
+; CHECK: .size llvm_mips_st_w_test
+;
+@llvm_mips_st_d_ARG = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_st_d_RES = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_st_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_st_d_ARG
+  %1 = bitcast <2 x i64>* @llvm_mips_st_d_RES to i8*
+  tail call void @llvm.mips.st.d(<2 x i64> %0, i8* %1, i32 16)
+  ret void
+}
+
+declare void @llvm.mips.st.d(<2 x i64>, i8*, i32) nounwind
+
+; CHECK: llvm_mips_st_d_test:
+; CHECK: ld.d
+; CHECK: st.d [[R1:\$w[0-9]+]], 16(
+; CHECK: .size llvm_mips_st_d_test
+;
diff --git a/test/CodeGen/Mips/msa/i8.ll b/test/CodeGen/Mips/msa/i8.ll
new file mode 100644
index 0000000..d2931a7
--- /dev/null
+++ b/test/CodeGen/Mips/msa/i8.ll
@@ -0,0 +1,211 @@
+; Test the MSA intrinsics that are encoded with the I8 instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_andi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_andi_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_andi_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_andi_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.andi.b(<16 x i8> %0, i32 25)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_andi_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.andi.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_andi_b_test:
+; CHECK: ld.b
+; CHECK: andi.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_andi_b_test
+
+@llvm_mips_bmnzi_b_ARG1 = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+@llvm_mips_bmnzi_b_ARG2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bmnzi_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bmnzi_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bmnzi_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_bmnzi_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, <16 x i8> %1, i32 25)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.bmnzi.b(<16 x i8>, <16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_bmnzi_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmnzi_b_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmnzi_b_ARG2)(
+; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: bmnzi.b [[R3]], [[R4]], 25
+; CHECK-DAG: st.b [[R3]], 0(
+; CHECK: .size llvm_mips_bmnzi_b_test
+
+@llvm_mips_bmzi_b_ARG1 = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+@llvm_mips_bmzi_b_ARG2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bmzi_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bmzi_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bmzi_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_bmzi_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, <16 x i8> %1, i32 25)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_bmzi_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.bmzi.b(<16 x i8>, <16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_bmzi_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmzi_b_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmzi_b_ARG2)(
+; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R2]])
+; bmnzi.b is the same as bmzi.b with ws and wd_in swapped
+; CHECK-DAG: bmnzi.b [[R4]], [[R3]], 25
+; CHECK-DAG: st.b [[R4]], 0(
+; CHECK: .size llvm_mips_bmzi_b_test
+
+@llvm_mips_bseli_b_ARG1 = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+@llvm_mips_bseli_b_ARG2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bseli_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bseli_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bseli_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_bseli_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.bseli.b(<16 x i8> %0, <16 x i8> %1, i32 25)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_bseli_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.bseli.b(<16 x i8>, <16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_bseli_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bseli_b_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bseli_b_ARG2)(
+; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: bseli.b [[R3]], [[R4]], 25
+; CHECK-DAG: st.b [[R3]], 0(
+; CHECK: .size llvm_mips_bseli_b_test
+
+@llvm_mips_nori_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_nori_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_nori_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_nori_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.nori.b(<16 x i8> %0, i32 25)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_nori_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.nori.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_nori_b_test:
+; CHECK: ld.b
+; CHECK: nori.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_nori_b_test
+;
+@llvm_mips_ori_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_ori_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_ori_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_ori_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.ori.b(<16 x i8> %0, i32 25)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_ori_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.ori.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_ori_b_test:
+; CHECK: ld.b
+; CHECK: ori.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_ori_b_test
+;
+@llvm_mips_shf_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_shf_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_shf_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_shf_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.shf.b(<16 x i8> %0, i32 25)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_shf_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.shf.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_shf_b_test:
+; CHECK: ld.b
+; CHECK: shf.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_shf_b_test
+;
+@llvm_mips_shf_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_shf_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_shf_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_shf_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.shf.h(<8 x i16> %0, i32 25)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_shf_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.shf.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_shf_h_test:
+; CHECK: ld.h
+; CHECK: shf.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_shf_h_test
+;
+@llvm_mips_shf_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_shf_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_shf_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_shf_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.shf.w(<4 x i32> %0, i32 25)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_shf_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.shf.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_shf_w_test:
+; CHECK: ld.w
+; CHECK: shf.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_shf_w_test
+;
+@llvm_mips_xori_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_xori_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_xori_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_xori_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.xori.b(<16 x i8> %0, i32 25)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_xori_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.xori.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_xori_b_test:
+; CHECK: ld.b
+; CHECK: xori.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_xori_b_test
+;
diff --git a/test/CodeGen/Mips/msa/inline-asm.ll b/test/CodeGen/Mips/msa/inline-asm.ll
new file mode 100644
index 0000000..4a34273
--- /dev/null
+++ b/test/CodeGen/Mips/msa/inline-asm.ll
@@ -0,0 +1,34 @@
+; A basic inline assembly test
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@v4i32_r  = global <4 x i32> zeroinitializer, align 16
+
+define void @test1() nounwind {
+entry:
+  ; CHECK-LABEL: test1:
+  %0 = call <4 x i32> asm "ldi.w ${0:w}, 1", "=f"()
+  ; CHECK: ldi.w $w{{[1-3]?[0-9]}}, 1
+  store <4 x i32> %0, <4 x i32>* @v4i32_r
+  ret void
+}
+
+define void @test2() nounwind {
+entry:
+  ; CHECK-LABEL: test2:
+  %0 = load <4 x i32>* @v4i32_r
+  %1 = call <4 x i32> asm "addvi.w ${0:w}, ${1:w}, 1", "=f,f"(<4 x i32> %0)
+  ; CHECK: addvi.w $w{{[1-3]?[0-9]}}, $w{{[1-3]?[0-9]}}, 1
+  store <4 x i32> %1, <4 x i32>* @v4i32_r
+  ret void
+}
+
+define void @test3() nounwind {
+entry:
+  ; CHECK-LABEL: test3:
+  %0 = load <4 x i32>* @v4i32_r
+  %1 = call <4 x i32> asm sideeffect "addvi.w ${0:w}, ${1:w}, 1", "=f,f,~{$w0}"(<4 x i32> %0)
+  ; CHECK: addvi.w $w{{([1-9]|[1-3][0-9])}}, $w{{([1-9]|[1-3][0-9])}}, 1
+  store <4 x i32> %1, <4 x i32>* @v4i32_r
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll b/test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll
new file mode 100644
index 0000000..4beaaa9
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll
@@ -0,0 +1,134 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed for MSA with a
+; "Unexpected illegal type!" assertion.
+; It should at least successfully build.
+
+define void @autogen_SD1704963983(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+  %A4 = alloca <4 x double>
+  %A3 = alloca <8 x i64>
+  %A2 = alloca <1 x double>
+  %A1 = alloca double
+  %A = alloca i32
+  %L = load i8* %0
+  store i8 77, i8* %0
+  %E = extractelement <8 x i64> zeroinitializer, i32 2
+  %Shuff = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15, i32 1, i32 3>
+  %I = insertelement <8 x i64> zeroinitializer, i64 %E, i32 7
+  %Sl = select i1 false, i8* %0, i8* %0
+  %Cmp = icmp eq i32 434069, 272505
+  br label %CF
+
+CF:                                               ; preds = %CF, %CF78, %BB
+  %L5 = load i8* %Sl
+  store i8 %L, i8* %Sl
+  %E6 = extractelement <8 x i32> zeroinitializer, i32 2
+  %Shuff7 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff, <8 x i32> <i32 13, i32 15, i32 1, i32 3, i32 5, i32 7, i32 9, i32 undef>
+  %I8 = insertelement <8 x i64> zeroinitializer, i64 %4, i32 7
+  %B = shl <1 x i16> zeroinitializer, zeroinitializer
+  %FC = sitofp <8 x i64> zeroinitializer to <8 x float>
+  %Sl9 = select i1 %Cmp, i8 77, i8 77
+  %Cmp10 = icmp uge <8 x i64> %Shuff, zeroinitializer
+  %L11 = load i8* %0
+  store i8 %Sl9, i8* %0
+  %E12 = extractelement <1 x i16> zeroinitializer, i32 0
+  %Shuff13 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff, <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 undef, i32 3, i32 5, i32 7>
+  %I14 = insertelement <4 x i32> zeroinitializer, i32 %3, i32 3
+  %B15 = udiv <1 x i16> %B, zeroinitializer
+  %Tr = trunc <8 x i64> %Shuff to <8 x i32>
+  %Sl16 = select i1 %Cmp, i8 77, i8 %5
+  %Cmp17 = icmp ult <8 x i1> %Cmp10, %Cmp10
+  %L18 = load i8* %Sl
+  store i8 -1, i8* %Sl
+  %E19 = extractelement <8 x i32> zeroinitializer, i32 3
+  %Shuff20 = shufflevector <8 x float> %FC, <8 x float> %FC, <8 x i32> <i32 6, i32 8, i32 undef, i32 12, i32 14, i32 0, i32 2, i32 undef>
+  %I21 = insertelement <8 x i64> %Shuff13, i64 %E, i32 0
+  %B22 = urem <8 x i64> %Shuff7, %I21
+  %FC23 = sitofp i32 50347 to float
+  %Sl24 = select i1 %Cmp, double 0.000000e+00, double 0.000000e+00
+  %Cmp25 = icmp ugt i32 465489, 47533
+  br i1 %Cmp25, label %CF, label %CF78
+
+CF78:                                             ; preds = %CF
+  %L26 = load i8* %Sl
+  store i32 50347, i32* %A
+  %E27 = extractelement <8 x i1> %Cmp10, i32 2
+  br i1 %E27, label %CF, label %CF77
+
+CF77:                                             ; preds = %CF77, %CF81, %CF78
+  %Shuff28 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff, <8 x i32> <i32 13, i32 15, i32 1, i32 3, i32 5, i32 7, i32 9, i32 undef>
+  %I29 = insertelement <1 x i16> zeroinitializer, i16 -1, i32 0
+  %B30 = urem <8 x i32> %Tr, zeroinitializer
+  %Tr31 = trunc i32 0 to i16
+  %Sl32 = select i1 %Cmp, <2 x i1> zeroinitializer, <2 x i1> zeroinitializer
+  %L33 = load i8* %Sl
+  store i8 %L26, i8* %Sl
+  %E34 = extractelement <4 x i32> zeroinitializer, i32 0
+  %Shuff35 = shufflevector <1 x i16> zeroinitializer, <1 x i16> %B, <1 x i32> undef
+  %I36 = insertelement <8 x i64> %Shuff28, i64 %E, i32 7
+  %B37 = srem <1 x i16> %I29, zeroinitializer
+  %FC38 = sitofp <8 x i32> %B30 to <8 x double>
+  %Sl39 = select i1 %Cmp, double 0.000000e+00, double %Sl24
+  %L40 = load i8* %Sl
+  store i8 %Sl16, i8* %Sl
+  %E41 = extractelement <1 x i16> zeroinitializer, i32 0
+  %Shuff42 = shufflevector <8 x i1> %Cmp17, <8 x i1> %Cmp10, <8 x i32> <i32 14, i32 undef, i32 2, i32 4, i32 undef, i32 8, i32 10, i32 12>
+  %I43 = insertelement <4 x i32> zeroinitializer, i32 272505, i32 0
+  %B44 = urem <8 x i32> %B30, %Tr
+  %PC = bitcast i8* %0 to i64*
+  %Sl45 = select i1 %Cmp, <8 x i1> %Cmp10, <8 x i1> %Shuff42
+  %Cmp46 = fcmp ugt float 0xB856238A00000000, 0x47DA795E40000000
+  br i1 %Cmp46, label %CF77, label %CF80
+
+CF80:                                             ; preds = %CF80, %CF77
+  %L47 = load i64* %PC
+  store i8 77, i8* %Sl
+  %E48 = extractelement <8 x i64> zeroinitializer, i32 2
+  %Shuff49 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff7, <8 x i32> <i32 5, i32 7, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 3>
+  %I50 = insertelement <8 x i64> zeroinitializer, i64 %L47, i32 7
+  %B51 = fdiv float 0x46CC2D8000000000, %FC23
+  %PC52 = bitcast <8 x i64>* %A3 to i64*
+  %Sl53 = select i1 %Cmp, <8 x i64> %Shuff, <8 x i64> %Shuff
+  %Cmp54 = fcmp ole float 0x47DA795E40000000, 0xB856238A00000000
+  br i1 %Cmp54, label %CF80, label %CF81
+
+CF81:                                             ; preds = %CF80
+  %L55 = load i8* %Sl
+  store i8 %Sl16, i8* %Sl
+  %E56 = extractelement <1 x i16> %B, i32 0
+  %Shuff57 = shufflevector <1 x i16> zeroinitializer, <1 x i16> zeroinitializer, <1 x i32> <i32 1>
+  %I58 = insertelement <8 x i64> zeroinitializer, i64 %L47, i32 7
+  %B59 = srem i32 %E19, %E19
+  %Sl60 = select i1 %Cmp, i8 77, i8 77
+  %Cmp61 = icmp ult <1 x i16> zeroinitializer, %B
+  %L62 = load i8* %Sl
+  store i64 %L47, i64* %PC52
+  %E63 = extractelement <4 x i32> %I43, i32 2
+  %Shuff64 = shufflevector <4 x i1> zeroinitializer, <4 x i1> zeroinitializer, <4 x i32> <i32 undef, i32 undef, i32 1, i32 3>
+  %I65 = insertelement <8 x i64> %B22, i64 %L47, i32 7
+  %B66 = add <8 x i64> %I50, %I65
+  %FC67 = uitofp i16 %E12 to float
+  %Sl68 = select i1 %Cmp, <8 x i32> %B30, <8 x i32> zeroinitializer
+  %Cmp69 = fcmp ord double 0.000000e+00, 0.000000e+00
+  br i1 %Cmp69, label %CF77, label %CF79
+
+CF79:                                             ; preds = %CF81
+  %L70 = load i32* %A
+  store i64 %4, i64* %PC
+  %E71 = extractelement <4 x i32> zeroinitializer, i32 0
+  %Shuff72 = shufflevector <8 x i32> zeroinitializer, <8 x i32> %B44, <8 x i32> <i32 11, i32 undef, i32 15, i32 1, i32 3, i32 undef, i32 7, i32 9>
+  %I73 = insertelement <8 x i16> zeroinitializer, i16 %E12, i32 5
+  %B74 = fsub double 0.000000e+00, 0.000000e+00
+  %Sl75 = select i1 %Cmp46, i32 %E6, i32 %E19
+  %Cmp76 = icmp ugt <4 x i32> %I43, zeroinitializer
+  store i8 %L, i8* %Sl
+  store i64 %L47, i64* %PC
+  store i64 %L47, i64* %PC
+  store i8 %L5, i8* %Sl
+  store i8 %L5, i8* %0
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s1935737938.ll b/test/CodeGen/Mips/msa/llvm-stress-s1935737938.ll
new file mode 100644
index 0000000..f9cab03
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s1935737938.ll
@@ -0,0 +1,138 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed for MSA with a
+; `Opc && "Cannot copy registers"' assertion.
+; It should at least successfully build.
+
+define void @autogen_SD1935737938(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+  %A4 = alloca i64
+  %A3 = alloca <4 x i32>
+  %A2 = alloca i64
+  %A1 = alloca i32
+  %A = alloca <2 x i64>
+  %L = load i8* %0
+  store i8 -1, i8* %0
+  %E = extractelement <2 x i32> zeroinitializer, i32 0
+  %Shuff = shufflevector <2 x i32> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> <i32 1, i32 3>
+  %I = insertelement <1 x i64> <i64 -1>, i64 286689, i32 0
+  %B = lshr i8 %L, -69
+  %ZE = fpext float 0xBF2AA5FE80000000 to double
+  %Sl = select i1 true, <1 x i64> <i64 -1>, <1 x i64> <i64 -1>
+  %L5 = load i8* %0
+  store i8 -69, i8* %0
+  %E6 = extractelement <16 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, i32 14
+  %Shuff7 = shufflevector <2 x i32> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> <i32 1, i32 3>
+  %I8 = insertelement <2 x i32> zeroinitializer, i32 135673, i32 1
+  %B9 = udiv i8 %B, %B
+  %FC = uitofp i32 %3 to double
+  %Sl10 = select i1 true, <1 x i1> zeroinitializer, <1 x i1> zeroinitializer
+  %Cmp = icmp ne <1 x i64> %I, <i64 -1>
+  %L11 = load i8* %0
+  store i8 %L11, i8* %0
+  %E12 = extractelement <1 x i64> <i64 -1>, i32 0
+  %Shuff13 = shufflevector <1 x i64> %Sl, <1 x i64> <i64 -1>, <1 x i32> <i32 1>
+  %I14 = insertelement <1 x i64> %I, i64 303290, i32 0
+  %B15 = frem float 0.000000e+00, 0.000000e+00
+  %Sl16 = select i1 true, <1 x i1> %Cmp, <1 x i1> zeroinitializer
+  %Cmp17 = fcmp one float 0xBD946F9840000000, %B15
+  br label %CF74
+
+CF74:                                             ; preds = %CF74, %CF80, %CF76, %BB
+  %L18 = load i8* %0
+  store i8 -69, i8* %0
+  %E19 = extractelement <1 x i64> %Sl, i32 0
+  %Shuff20 = shufflevector <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i32> <i32 12, i32 14, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10>
+  %I21 = insertelement <2 x i32> %Shuff, i32 135673, i32 0
+  %B22 = urem i32 135673, %3
+  %FC23 = sitofp i8 %L to float
+  %Sl24 = select i1 true, i8 %B, i8 %L18
+  %L25 = load i8* %0
+  store i8 %L, i8* %0
+  %E26 = extractelement <2 x i32> %Shuff, i32 1
+  %Shuff27 = shufflevector <2 x i32> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> <i32 2, i32 0>
+  %I28 = insertelement <16 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, i64 %E12, i32 8
+  %B29 = frem double %ZE, 0x235104F0E94F406E
+  %Tr = trunc i64 286689 to i8
+  %Sl30 = select i1 true, float 0x45B13EA500000000, float %B15
+  %Cmp31 = icmp eq i32 %B22, %B22
+  br i1 %Cmp31, label %CF74, label %CF80
+
+CF80:                                             ; preds = %CF74
+  %L32 = load i8* %0
+  store i8 -1, i8* %0
+  %E33 = extractelement <2 x i32> zeroinitializer, i32 1
+  %Shuff34 = shufflevector <1 x i64> %Shuff13, <1 x i64> <i64 -1>, <1 x i32> zeroinitializer
+  %I35 = insertelement <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, i8 -1, i32 0
+  %FC36 = sitofp <1 x i1> %Cmp to <1 x float>
+  %Sl37 = select i1 true, <8 x i8> %Shuff20, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %Cmp38 = icmp sgt <2 x i32> %I21, %Shuff27
+  %L39 = load i8* %0
+  store i8 %Sl24, i8* %0
+  %E40 = extractelement <8 x i64> zeroinitializer, i32 1
+  %Shuff41 = shufflevector <2 x i1> zeroinitializer, <2 x i1> %Cmp38, <2 x i32> <i32 0, i32 2>
+  %I42 = insertelement <4 x i32> zeroinitializer, i32 414573, i32 2
+  %B43 = srem i8 %L5, %L39
+  %Sl44 = select i1 %Cmp17, i8 %L, i8 %L
+  %Cmp45 = fcmp une float 0x3AFCE1A0C0000000, 0.000000e+00
+  br i1 %Cmp45, label %CF74, label %CF76
+
+CF76:                                             ; preds = %CF80
+  %L46 = load i8* %0
+  store i8 %L39, i8* %0
+  %E47 = extractelement <2 x i32> %Shuff27, i32 0
+  %Shuff48 = shufflevector <1 x i1> %Sl10, <1 x i1> %Sl10, <1 x i32> <i32 1>
+  %I49 = insertelement <1 x i64> <i64 -1>, i64 %E12, i32 0
+  %FC50 = fptosi double 0x235104F0E94F406E to i32
+  %Sl51 = select i1 %Cmp17, <16 x i64> %I28, <16 x i64> %I28
+  %Cmp52 = icmp ne i8 %Tr, %Sl24
+  br i1 %Cmp52, label %CF74, label %CF75
+
+CF75:                                             ; preds = %CF75, %CF76
+  %L53 = load i8* %0
+  store i8 %L18, i8* %0
+  %E54 = extractelement <8 x i8> %Shuff20, i32 5
+  %Shuff55 = shufflevector <2 x i32> %Shuff, <2 x i32> zeroinitializer, <2 x i32> <i32 0, i32 2>
+  %I56 = insertelement <4 x i32> %I42, i32 %B22, i32 2
+  %B57 = sub i64 %E40, %E6
+  %Sl58 = select i1 true, i64 303290, i64 %E40
+  %Cmp59 = icmp slt i64 %E40, %E6
+  br i1 %Cmp59, label %CF75, label %CF78
+
+CF78:                                             ; preds = %CF75
+  %L60 = load i8* %0
+  store i8 -69, i8* %0
+  %E61 = extractelement <2 x i32> zeroinitializer, i32 0
+  %Shuff62 = shufflevector <2 x i32> %Shuff7, <2 x i32> %I21, <2 x i32> <i32 1, i32 3>
+  %I63 = insertelement <1 x i1> %Sl16, i1 %Cmp45, i32 0
+  %B64 = and i8 %Sl44, -69
+  %ZE65 = zext <1 x i1> %Shuff48 to <1 x i64>
+  %Sl66 = select i1 true, <1 x i64> %I, <1 x i64> %I49
+  %Cmp67 = icmp ugt i64 286689, %E40
+  br label %CF
+
+CF:                                               ; preds = %CF, %CF78
+  %L68 = load i8* %0
+  store i64 %B57, i64* %2
+  %E69 = extractelement <2 x i1> %Shuff41, i32 1
+  br i1 %E69, label %CF, label %CF77
+
+CF77:                                             ; preds = %CF77, %CF
+  %Shuff70 = shufflevector <1 x i64> %Shuff34, <1 x i64> <i64 -1>, <1 x i32> zeroinitializer
+  %I71 = insertelement <2 x i32> %Shuff, i32 %E26, i32 0
+  %Se = sext i8 %L60 to i32
+  %Sl72 = select i1 %Cmp45, <2 x i32> %Shuff62, <2 x i32> %I71
+  %Cmp73 = fcmp ugt double 0x235104F0E94F406E, 0x235104F0E94F406E
+  br i1 %Cmp73, label %CF77, label %CF79
+
+CF79:                                             ; preds = %CF77
+  store i8 %L18, i8* %0
+  store i8 %E54, i8* %0
+  store i8 %L39, i8* %0
+  store i8 %L39, i8* %0
+  store i8 %B, i8* %0
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s2090927243-simplified.ll b/test/CodeGen/Mips/msa/llvm-stress-s2090927243-simplified.ll
new file mode 100644
index 0000000..3811314
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s2090927243-simplified.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed for MSA with a "Cannot select ..." error.
+; This was because undef's are ignored when checking if a vector constant is a
+; splat, but are legalized to zero if left in the DAG which changes the constant
+; into a non-splat.
+;
+; It should at least successfully build.
+
+define void @autogen_SD2090927243() {
+BB:
+  br label %CF77
+
+CF77:                                             ; preds = %CF77, %CF80
+  %Shuff27 = shufflevector <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>,
+                           <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>,
+                           <16 x i32> <i32 27, i32 29, i32 31, i32 1, i32 3, i32 5, i32 undef, i32 9, i32 11, i32 13, i32 undef, i32 17, i32 19, i32 21, i32 23, i32 undef>
+  %ZE30 = zext <16 x i8> %Shuff27 to <16 x i32>
+  %Cmp32 = fcmp ueq float undef, 0x3CDA6E5E40000000
+  br i1 %Cmp32, label %CF77, label %CF
+
+CF:                                               ; preds = %CF, %CF81
+  %E48 = extractelement <16 x i32> %ZE30, i32 14
+  br i1 undef, label %CF, label %CF78
+
+CF78:                                             ; preds = %CF
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s2501752154-simplified.ll b/test/CodeGen/Mips/msa/llvm-stress-s2501752154-simplified.ll
new file mode 100644
index 0000000..564ad74
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s2501752154-simplified.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed for MSA with a "Cannot select ..." error.
+; This happened because the legalizer treated undef's in the <4 x float>
+; constant as equivalent to the defined elements when checking if it a constant
+; splat, but then proceeded to legalize the undef's to zero, leaving it as a
+; non-splat that cannot be selected. It should have eliminated the undef's by
+; rewriting the splat constant.
+
+; It should at least successfully build.
+
+define void @autogen_SD2501752154() {
+BB:
+  %BC = bitcast <4 x i32> <i32 -1, i32 -1, i32 undef, i32 undef> to <4 x float>
+  br label %CF74
+
+CF74:                                             ; preds = %CF74, %CF
+  %E54 = extractelement <1 x i1> undef, i32 0
+  br i1 %E54, label %CF74, label %CF79
+
+CF79:                                             ; preds = %CF75
+  %I63 = insertelement <4 x float> %BC, float undef, i32 0
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s2704903805.ll b/test/CodeGen/Mips/msa/llvm-stress-s2704903805.ll
new file mode 100644
index 0000000..e14f405
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s2704903805.ll
@@ -0,0 +1,141 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed for MSA after dereferencing a null this pointer.
+; It should at least successfully build.
+
+define void @autogen_SD2704903805(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+  %A4 = alloca i32
+  %A3 = alloca i32
+  %A2 = alloca i8
+  %A1 = alloca i32
+  %A = alloca i8
+  %L = load i8* %0
+  store i8 %5, i8* %0
+  %E = extractelement <2 x i16> zeroinitializer, i32 0
+  %Shuff = shufflevector <1 x i8> <i8 -1>, <1 x i8> <i8 -1>, <1 x i32> undef
+  %I = insertelement <1 x i8> <i8 -1>, i8 85, i32 0
+  %B = lshr <2 x i16> zeroinitializer, zeroinitializer
+  %FC = sitofp <4 x i16> zeroinitializer to <4 x float>
+  %Sl = select i1 true, float 0.000000e+00, float 0x401E76A240000000
+  %Cmp = icmp ule i16 -25210, %E
+  br label %CF83
+
+CF83:                                             ; preds = %BB
+  %L5 = load i8* %0
+  store i8 85, i8* %0
+  %E6 = extractelement <1 x i8> <i8 -1>, i32 0
+  %Shuff7 = shufflevector <2 x i16> zeroinitializer, <2 x i16> zeroinitializer, <2 x i32> <i32 1, i32 3>
+  %I8 = insertelement <4 x i16> zeroinitializer, i16 %E, i32 3
+  %B9 = ashr <2 x i16> %Shuff7, zeroinitializer
+  %FC10 = sitofp i32 -1 to float
+  %Sl11 = select i1 %Cmp, i32 -1, i32 -1
+  %Cmp12 = icmp sgt i32 -1, -1
+  br label %CF
+
+CF:                                               ; preds = %CF, %CF81, %CF83
+  %L13 = load i8* %0
+  store i8 0, i8* %0
+  %E14 = extractelement <2 x i64> zeroinitializer, i32 0
+  %Shuff15 = shufflevector <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, <4 x i32> <i32 3, i32 5, i32 7, i32 undef>
+  %I16 = insertelement <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, i64 81222, i32 1
+  %B17 = lshr <2 x i16> zeroinitializer, %B
+  %Tr = trunc i32 272597 to i1
+  br i1 %Tr, label %CF, label %CF80
+
+CF80:                                             ; preds = %CF80, %CF
+  %Sl18 = select i1 %Cmp, <2 x i64> zeroinitializer, <2 x i64> zeroinitializer
+  %Cmp19 = icmp ne i1 %Cmp12, %Cmp
+  br i1 %Cmp19, label %CF80, label %CF81
+
+CF81:                                             ; preds = %CF80
+  %L20 = load i8* %0
+  store i8 85, i8* %0
+  %E21 = extractelement <1 x i8> <i8 -1>, i32 0
+  %Shuff22 = shufflevector <1 x i8> <i8 -1>, <1 x i8> %Shuff, <1 x i32> zeroinitializer
+  %I23 = insertelement <1 x i8> <i8 -1>, i8 %L5, i32 0
+  %FC24 = fptoui <4 x float> %FC to <4 x i16>
+  %Sl25 = select i1 %Cmp, <2 x i32> zeroinitializer, <2 x i32> <i32 -1, i32 -1>
+  %Cmp26 = icmp ult <4 x i64> %I16, %Shuff15
+  %L27 = load i8* %0
+  store i8 %L, i8* %0
+  %E28 = extractelement <1 x i8> <i8 -1>, i32 0
+  %Shuff29 = shufflevector <8 x i16> zeroinitializer, <8 x i16> zeroinitializer, <8 x i32> <i32 11, i32 undef, i32 15, i32 1, i32 3, i32 5, i32 undef, i32 9>
+  %I30 = insertelement <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, i64 %E14, i32 1
+  %B31 = mul i8 %E28, 85
+  %PC = bitcast i32* %A3 to i32*
+  %Sl32 = select i1 %Cmp12, float %FC10, float 0x4712BFE680000000
+  %L33 = load i32* %PC
+  store i32 %L33, i32* %PC
+  %E34 = extractelement <2 x i16> zeroinitializer, i32 1
+  %Shuff35 = shufflevector <1 x i8> %Shuff, <1 x i8> <i8 -1>, <1 x i32> zeroinitializer
+  %I36 = insertelement <1 x i8> <i8 -1>, i8 %L13, i32 0
+  %B37 = xor i8 %L27, %L
+  %Sl38 = select i1 %Cmp, i16 %E34, i16 %E
+  %Cmp39 = icmp eq i1 %Cmp19, %Cmp
+  br i1 %Cmp39, label %CF, label %CF77
+
+CF77:                                             ; preds = %CF77, %CF81
+  %L40 = load i32* %PC
+  store i32 %3, i32* %PC
+  %E41 = extractelement <2 x i32> zeroinitializer, i32 0
+  %Shuff42 = shufflevector <2 x i32> <i32 -1, i32 -1>, <2 x i32> zeroinitializer, <2 x i32> <i32 1, i32 3>
+  %I43 = insertelement <1 x i8> <i8 -1>, i8 0, i32 0
+  %B44 = or i16 %E, -25210
+  %Se = sext i32 %3 to i64
+  %Sl45 = select i1 true, <1 x i8> %Shuff, <1 x i8> %I43
+  %Cmp46 = icmp sge <1 x i8> %I36, %Shuff
+  %L47 = load i32* %PC
+  store i32 %L33, i32* %PC
+  %E48 = extractelement <2 x i16> zeroinitializer, i32 0
+  %Shuff49 = shufflevector <1 x i8> <i8 -1>, <1 x i8> <i8 -1>, <1 x i32> <i32 1>
+  %I50 = insertelement <2 x i32> %Sl25, i32 47963, i32 1
+  %B51 = srem <1 x i8> %I, %Shuff22
+  %FC52 = sitofp i8 %5 to double
+  %Sl53 = select i1 %Cmp39, i8 %L27, i8 85
+  %Cmp54 = icmp slt i16 %E34, %E34
+  br i1 %Cmp54, label %CF77, label %CF78
+
+CF78:                                             ; preds = %CF78, %CF77
+  %L55 = load i32* %PC
+  store i32 %L33, i32* %PC
+  %E56 = extractelement <8 x i16> %Shuff29, i32 4
+  %Shuff57 = shufflevector <1 x i8> <i8 -1>, <1 x i8> <i8 -1>, <1 x i32> <i32 1>
+  %I58 = insertelement <1 x i8> %B51, i8 %Sl53, i32 0
+  %ZE = fpext float %FC10 to double
+  %Sl59 = select i1 %Cmp12, <2 x i16> %B9, <2 x i16> zeroinitializer
+  %Cmp60 = fcmp ult double 0.000000e+00, 0.000000e+00
+  br i1 %Cmp60, label %CF78, label %CF79
+
+CF79:                                             ; preds = %CF79, %CF78
+  %L61 = load i32* %PC
+  store i32 %L33, i32* %A3
+  %E62 = extractelement <4 x i64> %Shuff15, i32 1
+  %Shuff63 = shufflevector <8 x i16> %Shuff29, <8 x i16> %Shuff29, <8 x i32> <i32 undef, i32 10, i32 12, i32 undef, i32 undef, i32 undef, i32 4, i32 6>
+  %I64 = insertelement <2 x i64> zeroinitializer, i64 %Se, i32 0
+  %B65 = shl i8 %5, 85
+  %ZE66 = zext <4 x i1> %Cmp26 to <4 x i32>
+  %Sl67 = select i1 %Tr, <1 x i8> %Shuff, <1 x i8> %I23
+  %Cmp68 = fcmp olt float 0x4712BFE680000000, 0x4712BFE680000000
+  br i1 %Cmp68, label %CF79, label %CF82
+
+CF82:                                             ; preds = %CF79
+  %L69 = load i32* %PC
+  store i32 %L33, i32* %PC
+  %E70 = extractelement <8 x i16> zeroinitializer, i32 3
+  %Shuff71 = shufflevector <4 x i64> %Shuff15, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, <4 x i32> <i32 6, i32 undef, i32 2, i32 4>
+  %I72 = insertelement <1 x i8> <i8 -1>, i8 %L, i32 0
+  %B73 = srem i64 %E62, %Se
+  %ZE74 = zext <4 x i1> %Cmp26 to <4 x i32>
+  %Sl75 = select i1 %Cmp, i32 463279, i32 %L61
+  %Cmp76 = icmp sgt <1 x i8> %Shuff49, %Shuff22
+  store i8 %B31, i8* %0
+  store i8 85, i8* %0
+  store i32 %L33, i32* %PC
+  store i8 %B65, i8* %0
+  store i8 %L5, i8* %0
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll b/test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll
new file mode 100644
index 0000000..1a03e55
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll
@@ -0,0 +1,149 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed for MSA with a
+; "Don't know how to expand this condition!" unreachable.
+; It should at least successfully build.
+
+define void @autogen_SD3861334421(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+  %A4 = alloca <2 x i32>
+  %A3 = alloca <2 x double>
+  %A2 = alloca i64
+  %A1 = alloca i64
+  %A = alloca double
+  %L = load i8* %0
+  store i8 -101, i8* %0
+  %E = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 0
+  %Shuff = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 undef, i32 1>
+  %I = insertelement <8 x i64> zeroinitializer, i64 %4, i32 5
+  %B = and i64 116376, 57247
+  %FC = uitofp i8 7 to double
+  %Sl = select i1 false, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %L5 = load i8* %0
+  store i8 %L, i8* %0
+  %E6 = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 3
+  %Shuff7 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 6, i32 0>
+  %I8 = insertelement <8 x i8> %Sl, i8 7, i32 4
+  %B9 = or <8 x i64> zeroinitializer, zeroinitializer
+  %Sl10 = select i1 false, i64 116376, i64 380809
+  %Cmp = icmp sgt i32 394647, 17081
+  br label %CF
+
+CF:                                               ; preds = %CF, %BB
+  %L11 = load i8* %0
+  store i8 -87, i8* %0
+  %E12 = extractelement <4 x i64> zeroinitializer, i32 0
+  %Shuff13 = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 7, i32 9, i32 11, i32 13, i32 undef, i32 1, i32 3, i32 5>
+  %I14 = insertelement <4 x i64> zeroinitializer, i64 380809, i32 1
+  %B15 = srem i64 %Sl10, 380809
+  %FC16 = sitofp i64 57247 to float
+  %Sl17 = select i1 false, double 0x87A9374869A78EC6, double 0.000000e+00
+  %Cmp18 = icmp uge i8 %L, %5
+  br i1 %Cmp18, label %CF, label %CF80
+
+CF80:                                             ; preds = %CF80, %CF88, %CF
+  %L19 = load i8* %0
+  store i8 -101, i8* %0
+  %E20 = extractelement <4 x i64> zeroinitializer, i32 0
+  %Shuff21 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %Shuff7, <4 x i32> <i32 7, i32 1, i32 3, i32 5>
+  %I22 = insertelement <4 x i64> zeroinitializer, i64 127438, i32 1
+  %B23 = fdiv double %Sl17, 0.000000e+00
+  %Sl24 = select i1 %Cmp18, i32 420510, i32 492085
+  %Cmp25 = icmp ugt i1 %Cmp18, false
+  br i1 %Cmp25, label %CF80, label %CF83
+
+CF83:                                             ; preds = %CF83, %CF80
+  %L26 = load i8* %0
+  store i8 -87, i8* %0
+  %E27 = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 0
+  %Shuff28 = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 7, i32 1, i32 3, i32 5>
+  %I29 = insertelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 492085, i32 1
+  %B30 = lshr <8 x i8> %I8, %I8
+  %FC31 = sitofp <4 x i32> %Shuff28 to <4 x double>
+  %Sl32 = select i1 false, <8 x i8> %I8, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %Cmp33 = icmp eq i64 %B, 116376
+  br i1 %Cmp33, label %CF83, label %CF88
+
+CF88:                                             ; preds = %CF83
+  %L34 = load i8* %0
+  store i8 -87, i8* %0
+  %E35 = extractelement <8 x i64> %Shuff, i32 7
+  %Shuff36 = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %Shuff28, <4 x i32> <i32 2, i32 undef, i32 undef, i32 0>
+  %I37 = insertelement <4 x i64> zeroinitializer, i64 380809, i32 0
+  %B38 = xor <8 x i64> %B9, %B9
+  %ZE = zext i32 0 to i64
+  %Sl39 = select i1 %Cmp33, i8 %L11, i8 %L5
+  %Cmp40 = icmp sgt i1 %Cmp, false
+  br i1 %Cmp40, label %CF80, label %CF81
+
+CF81:                                             ; preds = %CF81, %CF85, %CF87, %CF88
+  %L41 = load i8* %0
+  store i8 %L34, i8* %0
+  %E42 = extractelement <8 x i64> %Shuff13, i32 6
+  %Shuff43 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 7>
+  %I44 = insertelement <4 x i64> zeroinitializer, i64 116376, i32 3
+  %B45 = fsub float %FC16, 0x3AC86DCC40000000
+  %Tr = trunc <4 x i64> %I14 to <4 x i32>
+  %Sl46 = select i1 false, <8 x i64> %B38, <8 x i64> zeroinitializer
+  %Cmp47 = icmp sgt i1 %Cmp18, %Cmp18
+  br i1 %Cmp47, label %CF81, label %CF85
+
+CF85:                                             ; preds = %CF81
+  %L48 = load i8* %0
+  store i8 -101, i8* %0
+  %E49 = extractelement <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, i32 2
+  %Shuff50 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 5, i32 7, i32 1, i32 3>
+  %I51 = insertelement <4 x i64> zeroinitializer, i64 %E20, i32 3
+  %B52 = or i32 336955, %Sl24
+  %FC53 = uitofp i8 %L48 to double
+  %Sl54 = select i1 %Cmp47, i32 %3, i32 %Sl24
+  %Cmp55 = icmp ne <8 x i64> %Shuff13, zeroinitializer
+  %L56 = load i8* %0
+  store i8 %L11, i8* %0
+  %E57 = extractelement <4 x i64> %Shuff21, i32 1
+  %Shuff58 = shufflevector <8 x i64> %Shuff, <8 x i64> zeroinitializer, <8 x i32> <i32 4, i32 6, i32 undef, i32 10, i32 12, i32 undef, i32 0, i32 2>
+  %I59 = insertelement <4 x i64> zeroinitializer, i64 %E42, i32 2
+  %B60 = udiv <8 x i8> %Sl, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %Tr61 = trunc i8 49 to i1
+  br i1 %Tr61, label %CF81, label %CF84
+
+CF84:                                             ; preds = %CF84, %CF85
+  %Sl62 = select i1 false, i8 %L, i8 %L48
+  %Cmp63 = icmp ne <8 x i64> %I, zeroinitializer
+  %L64 = load i8* %0
+  store i8 %5, i8* %0
+  %E65 = extractelement <8 x i1> %Cmp55, i32 0
+  br i1 %E65, label %CF84, label %CF87
+
+CF87:                                             ; preds = %CF84
+  %Shuff66 = shufflevector <4 x i64> %Shuff21, <4 x i64> %I14, <4 x i32> <i32 3, i32 undef, i32 7, i32 1>
+  %I67 = insertelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 %Sl54, i32 1
+  %B68 = frem double %B23, %Sl17
+  %ZE69 = zext <8 x i8> %Sl32 to <8 x i64>
+  %Sl70 = select i1 %Tr61, i64 %E20, i64 %E12
+  %Cmp71 = icmp slt <8 x i64> %I, %Shuff
+  %L72 = load i8* %0
+  store i8 %L72, i8* %0
+  %E73 = extractelement <8 x i1> %Cmp55, i32 6
+  br i1 %E73, label %CF81, label %CF82
+
+CF82:                                             ; preds = %CF82, %CF87
+  %Shuff74 = shufflevector <4 x i32> %I67, <4 x i32> %I29, <4 x i32> <i32 1, i32 3, i32 undef, i32 7>
+  %I75 = insertelement <4 x i64> zeroinitializer, i64 380809, i32 3
+  %B76 = fsub double 0.000000e+00, %FC53
+  %Tr77 = trunc i32 %E to i8
+  %Sl78 = select i1 %Cmp18, i64* %A2, i64* %2
+  %Cmp79 = icmp eq i32 394647, 492085
+  br i1 %Cmp79, label %CF82, label %CF86
+
+CF86:                                             ; preds = %CF82
+  store i64 %Sl70, i64* %Sl78
+  store i64 %E57, i64* %Sl78
+  store i64 %Sl70, i64* %Sl78
+  store i64 %B, i64* %Sl78
+  store i64 %Sl10, i64* %Sl78
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s3926023935.ll b/test/CodeGen/Mips/msa/llvm-stress-s3926023935.ll
new file mode 100644
index 0000000..96547d9
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s3926023935.ll
@@ -0,0 +1,143 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed for MSA with a
+; "Type for zero vector elements is not legal" assertion.
+; It should at least successfully build.
+
+define void @autogen_SD3926023935(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+  %A4 = alloca i1
+  %A3 = alloca float
+  %A2 = alloca double
+  %A1 = alloca float
+  %A = alloca double
+  %L = load i8* %0
+  store i8 -123, i8* %0
+  %E = extractelement <4 x i64> zeroinitializer, i32 1
+  %Shuff = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %I = insertelement <2 x i1> zeroinitializer, i1 false, i32 0
+  %BC = bitcast i64 181325 to double
+  %Sl = select i1 false, <2 x i32> zeroinitializer, <2 x i32> zeroinitializer
+  %Cmp = icmp ne <4 x i64> zeroinitializer, zeroinitializer
+  %L5 = load i8* %0
+  store i8 %L, i8* %0
+  %E6 = extractelement <4 x i64> zeroinitializer, i32 3
+  %Shuff7 = shufflevector <2 x i16> zeroinitializer, <2 x i16> zeroinitializer, <2 x i32> <i32 2, i32 0>
+  %I8 = insertelement <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, i64 498254, i32 4
+  %B = shl i32 0, 364464
+  %Sl9 = select i1 false, i64 %E, i64 498254
+  %Cmp10 = icmp sge i8 -123, %5
+  br label %CF80
+
+CF80:                                             ; preds = %BB
+  %L11 = load i8* %0
+  store i8 -123, i8* %0
+  %E12 = extractelement <2 x i16> zeroinitializer, i32 1
+  %Shuff13 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %I14 = insertelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 %B, i32 2
+  %B15 = sdiv i64 334618, -1
+  %PC = bitcast i1* %A4 to i64*
+  %Sl16 = select i1 %Cmp10, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  %Cmp17 = icmp ule <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %Sl16
+  %L18 = load double* %A2
+  store i64 498254, i64* %PC
+  %E19 = extractelement <4 x i64> zeroinitializer, i32 0
+  %Shuff20 = shufflevector <2 x i1> zeroinitializer, <2 x i1> %I, <2 x i32> <i32 3, i32 1>
+  %I21 = insertelement <2 x i1> zeroinitializer, i1 false, i32 1
+  %B22 = fadd double 0.000000e+00, %BC
+  %ZE = zext <2 x i1> %Shuff20 to <2 x i32>
+  %Sl23 = select i1 %Cmp10, <2 x i1> %Shuff20, <2 x i1> zeroinitializer
+  %Cmp24 = icmp ult <2 x i32> zeroinitializer, zeroinitializer
+  %L25 = load i8* %0
+  store i8 %L25, i8* %0
+  %E26 = extractelement <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>, i32 3
+  %Shuff27 = shufflevector <4 x i32> %Shuff, <4 x i32> %I14, <4 x i32> <i32 6, i32 0, i32 undef, i32 4>
+  %I28 = insertelement <4 x i32> zeroinitializer, i32 %3, i32 0
+  %B29 = lshr i8 %E26, -43
+  %Tr = trunc i8 %L5 to i1
+  br label %CF79
+
+CF79:                                             ; preds = %CF80
+  %Sl30 = select i1 false, i8 %B29, i8 -123
+  %Cmp31 = icmp sge <2 x i1> %I, %I
+  %L32 = load i64* %PC
+  store i8 -123, i8* %0
+  %E33 = extractelement <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, i32 2
+  %Shuff34 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %Shuff13, <4 x i32> <i32 5, i32 7, i32 1, i32 3>
+  %I35 = insertelement <4 x i64> zeroinitializer, i64 498254, i32 3
+  %B36 = sub <8 x i64> %I8, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+  %PC37 = bitcast i8* %0 to i1*
+  %Sl38 = select i1 %Cmp10, i8 -43, i8 %L5
+  %Cmp39 = icmp eq i64 498254, %B15
+  br label %CF
+
+CF:                                               ; preds = %CF, %CF79
+  %L40 = load double* %A
+  store i1 %Cmp39, i1* %PC37
+  %E41 = extractelement <4 x i64> zeroinitializer, i32 3
+  %Shuff42 = shufflevector <2 x i32> zeroinitializer, <2 x i32> %ZE, <2 x i32> <i32 2, i32 undef>
+  %I43 = insertelement <4 x i32> %Shuff, i32 %3, i32 0
+  %B44 = shl i64 %E41, -1
+  %Se = sext <2 x i1> %I to <2 x i32>
+  %Sl45 = select i1 %Cmp10, i1 false, i1 false
+  br i1 %Sl45, label %CF, label %CF77
+
+CF77:                                             ; preds = %CF77, %CF
+  %Cmp46 = fcmp uno double 0.000000e+00, 0.000000e+00
+  br i1 %Cmp46, label %CF77, label %CF78
+
+CF78:                                             ; preds = %CF78, %CF83, %CF82, %CF77
+  %L47 = load i64* %PC
+  store i8 -123, i8* %0
+  %E48 = extractelement <4 x i64> zeroinitializer, i32 3
+  %Shuff49 = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 6, i32 undef>
+  %I50 = insertelement <2 x i1> zeroinitializer, i1 %Cmp10, i32 0
+  %B51 = sdiv i64 %E19, 463132
+  %Tr52 = trunc i64 %E48 to i32
+  %Sl53 = select i1 %Tr, i1 %Cmp46, i1 %Cmp10
+  br i1 %Sl53, label %CF78, label %CF83
+
+CF83:                                             ; preds = %CF78
+  %Cmp54 = fcmp uge double %L40, %L40
+  br i1 %Cmp54, label %CF78, label %CF82
+
+CF82:                                             ; preds = %CF83
+  %L55 = load i64* %PC
+  store i64 %L32, i64* %PC
+  %E56 = extractelement <2 x i16> %Shuff7, i32 1
+  %Shuff57 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 6, i32 0>
+  %I58 = insertelement <2 x i32> %Sl, i32 %Tr52, i32 0
+  %B59 = or i32 %B, %3
+  %FC = sitofp i64 498254 to double
+  %Sl60 = select i1 false, i64 %E6, i64 -1
+  %Cmp61 = icmp sgt <4 x i32> %Shuff27, %I43
+  %L62 = load i64* %PC
+  store i64 %Sl9, i64* %PC
+  %E63 = extractelement <2 x i32> %ZE, i32 0
+  %Shuff64 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %Shuff13, <4 x i32> <i32 1, i32 3, i32 undef, i32 7>
+  %I65 = insertelement <4 x i32> %Shuff, i32 %3, i32 3
+  %B66 = sub i64 %L47, 53612
+  %Tr67 = trunc i64 %4 to i32
+  %Sl68 = select i1 %Cmp39, i1 %Cmp39, i1 false
+  br i1 %Sl68, label %CF78, label %CF81
+
+CF81:                                             ; preds = %CF82
+  %Cmp69 = icmp ne <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, %B36
+  %L70 = load i8* %0
+  store i64 %L55, i64* %PC
+  %E71 = extractelement <4 x i32> %Shuff49, i32 1
+  %Shuff72 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %Shuff34, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %I73 = insertelement <4 x i64> %Shuff64, i64 %E, i32 2
+  %B74 = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, %B36
+  %Sl75 = select i1 %Sl68, i64 %B51, i64 %L55
+  %Cmp76 = icmp sgt <8 x i64> %B74, %B36
+  store i1 %Cmp39, i1* %PC37
+  store i64 %E41, i64* %PC
+  store i64 %L32, i64* %PC
+  store i64 %Sl75, i64* %2
+  store i64 %L32, i64* %PC
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s3997499501.ll b/test/CodeGen/Mips/msa/llvm-stress-s3997499501.ll
new file mode 100644
index 0000000..bef75f3
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s3997499501.ll
@@ -0,0 +1,152 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed to select instructions for extract_vector_elt for
+; v4f32 on MSA.
+; It should at least successfully build.
+
+define void @autogen_SD3997499501(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+  %A4 = alloca <1 x double>
+  %A3 = alloca double
+  %A2 = alloca float
+  %A1 = alloca double
+  %A = alloca double
+  %L = load i8* %0
+  store i8 97, i8* %0
+  %E = extractelement <16 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, i32 14
+  %Shuff = shufflevector <2 x i1> zeroinitializer, <2 x i1> zeroinitializer, <2 x i32> <i32 1, i32 3>
+  %I = insertelement <4 x i64> zeroinitializer, i64 0, i32 3
+  %Tr = trunc <1 x i64> zeroinitializer to <1 x i8>
+  %Sl = select i1 false, double* %A1, double* %A
+  %Cmp = icmp ne <2 x i64> zeroinitializer, zeroinitializer
+  %L5 = load double* %Sl
+  store float -4.374162e+06, float* %A2
+  %E6 = extractelement <4 x i64> zeroinitializer, i32 3
+  %Shuff7 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %I, <4 x i32> <i32 2, i32 4, i32 6, i32 undef>
+  %I8 = insertelement <2 x i1> %Shuff, i1 false, i32 0
+  %B = ashr <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %PC = bitcast float* %A2 to float*
+  %Sl9 = select i1 false, i32 82299, i32 0
+  %Cmp10 = icmp slt i8 97, %5
+  br label %CF72
+
+CF72:                                             ; preds = %CF72, %CF80, %CF78, %BB
+  %L11 = load double* %Sl
+  store double 0.000000e+00, double* %Sl
+  %E12 = extractelement <2 x i1> zeroinitializer, i32 0
+  br i1 %E12, label %CF72, label %CF80
+
+CF80:                                             ; preds = %CF72
+  %Shuff13 = shufflevector <2 x i1> zeroinitializer, <2 x i1> zeroinitializer, <2 x i32> <i32 3, i32 1>
+  %I14 = insertelement <2 x i64> zeroinitializer, i64 %4, i32 1
+  %B15 = fadd double %L5, 0.000000e+00
+  %BC = bitcast i32 0 to float
+  %Sl16 = select i1 %E12, float 0xC7957ED940000000, float %BC
+  %Cmp17 = icmp eq i32 136082, 471909
+  br i1 %Cmp17, label %CF72, label %CF77
+
+CF77:                                             ; preds = %CF77, %CF80
+  %L18 = load double* %Sl
+  store double 0.000000e+00, double* %Sl
+  %E19 = extractelement <2 x i1> zeroinitializer, i32 0
+  br i1 %E19, label %CF77, label %CF78
+
+CF78:                                             ; preds = %CF77
+  %Shuff20 = shufflevector <2 x i1> zeroinitializer, <2 x i1> zeroinitializer, <2 x i32> <i32 1, i32 3>
+  %I21 = insertelement <8 x i1> zeroinitializer, i1 %Cmp10, i32 7
+  %B22 = sdiv <4 x i64> %Shuff7, zeroinitializer
+  %FC = uitofp i8 97 to double
+  %Sl23 = select i1 %Cmp10, <2 x i1> zeroinitializer, <2 x i1> zeroinitializer
+  %L24 = load double* %Sl
+  store float %Sl16, float* %PC
+  %E25 = extractelement <2 x i1> %Shuff, i32 1
+  br i1 %E25, label %CF72, label %CF76
+
+CF76:                                             ; preds = %CF78
+  %Shuff26 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %B22, <4 x i32> <i32 undef, i32 undef, i32 0, i32 undef>
+  %I27 = insertelement <4 x i64> zeroinitializer, i64 %E, i32 2
+  %B28 = mul <4 x i64> %I27, zeroinitializer
+  %ZE = zext <8 x i1> zeroinitializer to <8 x i64>
+  %Sl29 = select i1 %Cmp17, float -4.374162e+06, float -4.374162e+06
+  %L30 = load i8* %0
+  store double %L5, double* %Sl
+  %E31 = extractelement <8 x i1> zeroinitializer, i32 5
+  br label %CF
+
+CF:                                               ; preds = %CF, %CF81, %CF76
+  %Shuff32 = shufflevector <16 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <16 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <16 x i32> <i32 8, i32 undef, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 undef, i32 26, i32 28, i32 30, i32 undef, i32 2, i32 4, i32 6>
+  %I33 = insertelement <8 x i1> zeroinitializer, i1 false, i32 2
+  %BC34 = bitcast <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> to <4 x float>
+  %Sl35 = select i1 %E12, <2 x i1> %I8, <2 x i1> zeroinitializer
+  %Cmp36 = fcmp oge double 0xC2C3BAE2D5C18360, 0xC2C3BAE2D5C18360
+  br i1 %Cmp36, label %CF, label %CF74
+
+CF74:                                             ; preds = %CF74, %CF
+  %L37 = load float* %PC
+  store double 0.000000e+00, double* %Sl
+  %E38 = extractelement <2 x i1> %Sl23, i32 1
+  br i1 %E38, label %CF74, label %CF75
+
+CF75:                                             ; preds = %CF75, %CF82, %CF74
+  %Shuff39 = shufflevector <2 x i1> %Shuff13, <2 x i1> zeroinitializer, <2 x i32> <i32 undef, i32 2>
+  %I40 = insertelement <4 x i64> zeroinitializer, i64 %4, i32 2
+  %Sl41 = select i1 %Cmp10, i32 0, i32 %3
+  %Cmp42 = icmp ne <1 x i64> zeroinitializer, zeroinitializer
+  %L43 = load double* %Sl
+  store i64 %4, i64* %2
+  %E44 = extractelement <2 x i1> %Shuff20, i32 1
+  br i1 %E44, label %CF75, label %CF82
+
+CF82:                                             ; preds = %CF75
+  %Shuff45 = shufflevector <2 x i1> %Sl23, <2 x i1> %Sl23, <2 x i32> <i32 2, i32 0>
+  %I46 = insertelement <4 x i64> zeroinitializer, i64 0, i32 0
+  %B47 = sub i64 %E, %E6
+  %Sl48 = select i1 %Cmp10, double %L5, double %L43
+  %Cmp49 = icmp uge i64 %4, %B47
+  br i1 %Cmp49, label %CF75, label %CF81
+
+CF81:                                             ; preds = %CF82
+  %L50 = load i8* %0
+  store double %L43, double* %Sl
+  %E51 = extractelement <4 x i64> %Shuff7, i32 3
+  %Shuff52 = shufflevector <4 x float> %BC34, <4 x float> %BC34, <4 x i32> <i32 2, i32 4, i32 6, i32 0>
+  %I53 = insertelement <2 x i1> %Cmp, i1 %E25, i32 0
+  %B54 = fdiv double %L24, %L43
+  %BC55 = bitcast <4 x i64> zeroinitializer to <4 x double>
+  %Sl56 = select i1 false, i8 %5, i8 97
+  %L57 = load i8* %0
+  store i8 %L50, i8* %0
+  %E58 = extractelement <2 x i1> %Shuff20, i32 1
+  br i1 %E58, label %CF, label %CF73
+
+CF73:                                             ; preds = %CF73, %CF81
+  %Shuff59 = shufflevector <2 x i1> %Shuff13, <2 x i1> %Shuff45, <2 x i32> <i32 undef, i32 0>
+  %I60 = insertelement <4 x float> %Shuff52, float -4.374162e+06, i32 0
+  %B61 = mul <4 x i64> %I46, zeroinitializer
+  %PC62 = bitcast double* %A3 to float*
+  %Sl63 = select i1 %Cmp10, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer
+  %Cmp64 = icmp ne <2 x i1> %Cmp, %Shuff
+  %L65 = load double* %A1
+  store float -4.374162e+06, float* %PC62
+  %E66 = extractelement <8 x i1> %I21, i32 3
+  br i1 %E66, label %CF73, label %CF79
+
+CF79:                                             ; preds = %CF79, %CF73
+  %Shuff67 = shufflevector <8 x i1> %I21, <8 x i1> %I21, <8 x i32> <i32 6, i32 8, i32 10, i32 12, i32 14, i32 0, i32 undef, i32 4>
+  %I68 = insertelement <1 x i1> %Cmp42, i1 %E25, i32 0
+  %B69 = sdiv <16 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+  %Sl70 = select i1 %Cmp49, <2 x i1> %Sl23, <2 x i1> %Shuff45
+  %Cmp71 = icmp ne i1 false, false
+  br i1 %Cmp71, label %CF79, label %CF83
+
+CF83:                                             ; preds = %CF79
+  store double 0.000000e+00, double* %Sl
+  store float %BC, float* %PC62
+  store double %Sl48, double* %Sl
+  store double %FC, double* %Sl
+  store float %BC, float* %PC62
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s449609655-simplified.ll b/test/CodeGen/Mips/msa/llvm-stress-s449609655-simplified.ll
new file mode 100644
index 0000000..24e27cb
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s449609655-simplified.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test is based on an llvm-stress generated test case with seed=449609655
+
+; This test originally failed for MSA with a
+; "Comparison requires equal bit widths" assertion.
+; The legalizer legalized ; the <4 x i8>'s into <4 x i32>'s, then a call to
+; isVSplat() returned the splat value for <i8 -1, i8 -1, ...> as a 32-bit APInt
+; (255), but the zeroinitializer splat value as an 8-bit APInt (0). The
+; assertion occured when trying to check the values were bitwise inverses of
+; each-other.
+;
+; It should at least successfully build.
+
+define void @autogen_SD449609655(i8) {
+BB:
+  %Cmp = icmp ult i8 -3, %0
+  br label %CF78
+
+CF78:                                             ; preds = %CF81, %CF78, %BB
+  %Sl31 = select i1 %Cmp, <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>, <4 x i8> zeroinitializer
+  br i1 undef, label %CF78, label %CF81
+
+CF81:                                             ; preds = %CF78
+  br i1 undef, label %CF78, label %CF80
+
+CF80:                                             ; preds = %CF81
+  %I59 = insertelement <4 x i8> %Sl31, i8 undef, i32 1
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s525530439.ll b/test/CodeGen/Mips/msa/llvm-stress-s525530439.ll
new file mode 100644
index 0000000..697871d
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s525530439.ll
@@ -0,0 +1,139 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed for MSA with a
+; `Num < NumOperands && "Invalid child # of SDNode!"' assertion.
+; It should at least successfully build.
+
+define void @autogen_SD525530439(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+  %A4 = alloca i32
+  %A3 = alloca double
+  %A2 = alloca <1 x double>
+  %A1 = alloca <8 x double>
+  %A = alloca i64
+  %L = load i8* %0
+  store i64 33695, i64* %A
+  %E = extractelement <4 x i32> zeroinitializer, i32 3
+  %Shuff = shufflevector <2 x i32> <i32 -1, i32 -1>, <2 x i32> <i32 -1, i32 -1>, <2 x i32> <i32 2, i32 0>
+  %I = insertelement <4 x i16> zeroinitializer, i16 -11642, i32 0
+  %B = lshr <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %ZE = fpext float 0x3B64A2B880000000 to double
+  %Sl = select i1 true, i16 -1, i16 -11642
+  %L5 = load i8* %0
+  store i8 0, i8* %0
+  %E6 = extractelement <4 x i32> zeroinitializer, i32 2
+  %Shuff7 = shufflevector <8 x i1> zeroinitializer, <8 x i1> zeroinitializer, <8 x i32> <i32 undef, i32 7, i32 9, i32 11, i32 13, i32 15, i32 1, i32 undef>
+  %I8 = insertelement <4 x i32> zeroinitializer, i32 %3, i32 3
+  %B9 = sub i32 71140, 439732
+  %BC = bitcast <2 x i32> <i32 -1, i32 -1> to <2 x float>
+  %Sl10 = select i1 true, i32* %1, i32* %1
+  %Cmp = icmp sge <8 x i64> zeroinitializer, zeroinitializer
+  %L11 = load i32* %Sl10
+  store <1 x double> zeroinitializer, <1 x double>* %A2
+  %E12 = extractelement <4 x i16> zeroinitializer, i32 0
+  %Shuff13 = shufflevector <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i32> undef
+  %I14 = insertelement <1 x i16> zeroinitializer, i16 %Sl, i32 0
+  %B15 = or i16 -1, %E12
+  %BC16 = bitcast <4 x i32> zeroinitializer to <4 x float>
+  %Sl17 = select i1 true, i64 %4, i64 %4
+  %Cmp18 = fcmp ugt float 0xC5ABB1BF80000000, 0x3EEF3D6300000000
+  br label %CF75
+
+CF75:                                             ; preds = %CF75, %BB
+  %L19 = load i32* %Sl10
+  store i32 %L11, i32* %Sl10
+  %E20 = extractelement <4 x i32> zeroinitializer, i32 1
+  %Shuff21 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %I8, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
+  %I22 = insertelement <4 x float> %BC16, float 0x3EEF3D6300000000, i32 2
+  %B23 = shl i32 71140, 439732
+  %ZE24 = fpext <4 x float> %I22 to <4 x double>
+  %Sl25 = select i1 %Cmp18, i32 %L11, i32 %L11
+  %Cmp26 = icmp ne i32 %E20, %L19
+  br i1 %Cmp26, label %CF75, label %CF76
+
+CF76:                                             ; preds = %CF75
+  %L27 = load i32* %Sl10
+  store i32 439732, i32* %Sl10
+  %E28 = extractelement <4 x i32> %Shuff21, i32 3
+  %Shuff29 = shufflevector <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32> <i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 0>
+  %I30 = insertelement <8 x i1> %Shuff7, i1 %Cmp18, i32 4
+  %Sl31 = select i1 %Cmp18, i32 %3, i32 %B23
+  %Cmp32 = icmp ugt i32 0, %3
+  br label %CF74
+
+CF74:                                             ; preds = %CF74, %CF80, %CF78, %CF76
+  %L33 = load i64* %2
+  store i32 71140, i32* %Sl10
+  %E34 = extractelement <4 x i32> zeroinitializer, i32 1
+  %Shuff35 = shufflevector <1 x i16> zeroinitializer, <1 x i16> zeroinitializer, <1 x i32> undef
+  %I36 = insertelement <4 x i16> zeroinitializer, i16 -11642, i32 0
+  %B37 = mul <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, %Shuff29
+  %Sl38 = select i1 %Cmp18, double 0.000000e+00, double 0x2BA9DB480DA732C6
+  %Cmp39 = icmp sgt i16 -11642, %Sl
+  br i1 %Cmp39, label %CF74, label %CF80
+
+CF80:                                             ; preds = %CF74
+  %L40 = load i8* %0
+  store i32 0, i32* %Sl10
+  %E41 = extractelement <8 x i64> zeroinitializer, i32 1
+  %Shuff42 = shufflevector <1 x i16> %I14, <1 x i16> %I14, <1 x i32> undef
+  %I43 = insertelement <4 x i16> %I36, i16 -11642, i32 0
+  %FC = fptoui float 0x455CA2B080000000 to i16
+  %Sl44 = select i1 %Cmp18, i1 %Cmp18, i1 %Cmp39
+  br i1 %Sl44, label %CF74, label %CF78
+
+CF78:                                             ; preds = %CF80
+  %L45 = load i32* %Sl10
+  store i8 %L5, i8* %0
+  %E46 = extractelement <8 x i1> %Shuff7, i32 2
+  br i1 %E46, label %CF74, label %CF77
+
+CF77:                                             ; preds = %CF77, %CF78
+  %Shuff47 = shufflevector <4 x i16> %I43, <4 x i16> zeroinitializer, <4 x i32> <i32 5, i32 undef, i32 1, i32 3>
+  %I48 = insertelement <1 x i16> %Shuff42, i16 %Sl, i32 0
+  %B49 = mul i8 0, %L40
+  %FC50 = uitofp i32 %3 to double
+  %Sl51 = select i1 %Sl44, i32 %L27, i32 0
+  %Cmp52 = icmp sge i8 %B49, 0
+  br i1 %Cmp52, label %CF77, label %CF79
+
+CF79:                                             ; preds = %CF77
+  %L53 = load i32* %Sl10
+  store i8 %L40, i8* %0
+  %E54 = extractelement <4 x i32> zeroinitializer, i32 1
+  %Shuff55 = shufflevector <4 x i32> %Shuff21, <4 x i32> %I8, <4 x i32> <i32 4, i32 6, i32 undef, i32 2>
+  %I56 = insertelement <4 x i32> zeroinitializer, i32 %Sl51, i32 2
+  %Tr = trunc <1 x i64> %Shuff13 to <1 x i16>
+  %Sl57 = select i1 %Cmp18, <2 x i32> <i32 -1, i32 -1>, <2 x i32> <i32 -1, i32 -1>
+  %Cmp58 = icmp uge <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %I56
+  %L59 = load i8* %0
+  store <1 x double> zeroinitializer, <1 x double>* %A2
+  %E60 = extractelement <4 x i32> zeroinitializer, i32 0
+  %Shuff61 = shufflevector <4 x i32> %I8, <4 x i32> %I8, <4 x i32> <i32 undef, i32 1, i32 undef, i32 undef>
+  %I62 = insertelement <4 x i16> zeroinitializer, i16 %E12, i32 1
+  %B63 = and <4 x i32> %Shuff61, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %PC = bitcast double* %A3 to i32*
+  %Sl64 = select i1 %Cmp18, <4 x i32> %Shuff61, <4 x i32> %Shuff55
+  %Cmp65 = icmp sgt i32 439732, %3
+  br label %CF
+
+CF:                                               ; preds = %CF79
+  %L66 = load i32* %Sl10
+  store i32 %E6, i32* %PC
+  %E67 = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 2
+  %Shuff68 = shufflevector <4 x i32> %Sl64, <4 x i32> %I8, <4 x i32> <i32 5, i32 undef, i32 1, i32 undef>
+  %I69 = insertelement <4 x i16> %Shuff47, i16 %Sl, i32 3
+  %B70 = sdiv <4 x i64> zeroinitializer, zeroinitializer
+  %FC71 = sitofp i32 %L66 to double
+  %Sl72 = select i1 %Cmp18, i64 %4, i64 %4
+  %Cmp73 = icmp eq <4 x i64> zeroinitializer, %B70
+  store i32 %B23, i32* %PC
+  store i32 %3, i32* %PC
+  store i32 %3, i32* %Sl10
+  store i32 %L27, i32* %1
+  store i32 0, i32* %PC
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s997348632.ll b/test/CodeGen/Mips/msa/llvm-stress-s997348632.ll
new file mode 100644
index 0000000..dc4200a
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s997348632.ll
@@ -0,0 +1,143 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed to select instructions for extract_vector_elt for
+; v2f64 on MSA.
+; It should at least successfully build.
+
+define void @autogen_SD997348632(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+  %A4 = alloca <2 x i32>
+  %A3 = alloca <16 x i16>
+  %A2 = alloca <4 x i1>
+  %A1 = alloca <4 x i16>
+  %A = alloca <2 x i32>
+  %L = load i8* %0
+  store i8 %L, i8* %0
+  %E = extractelement <4 x i32> zeroinitializer, i32 0
+  %Shuff = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 undef, i32 1, i32 3, i32 5>
+  %I = insertelement <2 x i1> zeroinitializer, i1 false, i32 1
+  %FC = sitofp <4 x i32> zeroinitializer to <4 x double>
+  %Sl = select i1 false, <4 x i64> %Shuff, <4 x i64> %Shuff
+  %L5 = load i8* %0
+  store i8 %5, i8* %0
+  %E6 = extractelement <1 x i16> zeroinitializer, i32 0
+  %Shuff7 = shufflevector <2 x i1> %I, <2 x i1> %I, <2 x i32> <i32 1, i32 undef>
+  %I8 = insertelement <1 x i16> zeroinitializer, i16 0, i32 0
+  %B = xor i32 376034, %3
+  %FC9 = fptoui float 0x406DB70180000000 to i64
+  %Sl10 = select i1 false, <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %Cmp = icmp ult <4 x i64> zeroinitializer, zeroinitializer
+  %L11 = load i8* %0
+  store i8 %L, i8* %0
+  %E12 = extractelement <4 x i64> zeroinitializer, i32 2
+  %Shuff13 = shufflevector <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> <i32 5, i32 7, i32 undef, i32 3>
+  %I14 = insertelement <8 x i32> zeroinitializer, i32 -1, i32 7
+  %B15 = fdiv <4 x double> %FC, %FC
+  %Tr = trunc i32 376034 to i16
+  %Sl16 = select i1 false, <8 x i32> %Sl10, <8 x i32> zeroinitializer
+  %Cmp17 = icmp uge i32 233658, %E
+  br label %CF
+
+CF:                                               ; preds = %CF, %CF79, %CF84, %BB
+  %L18 = load i8* %0
+  store i8 %L, i8* %0
+  %E19 = extractelement <4 x i64> %Sl, i32 3
+  %Shuff20 = shufflevector <2 x i1> %Shuff7, <2 x i1> %I, <2 x i32> <i32 2, i32 0>
+  %I21 = insertelement <4 x i64> zeroinitializer, i64 %FC9, i32 0
+  %B22 = xor <8 x i32> %I14, %I14
+  %Tr23 = trunc i16 0 to i8
+  %Sl24 = select i1 false, <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32> zeroinitializer
+  %Cmp25 = icmp eq i1 false, false
+  br i1 %Cmp25, label %CF, label %CF79
+
+CF79:                                             ; preds = %CF
+  %L26 = load i8* %0
+  store i8 %L26, i8* %0
+  %E27 = extractelement <1 x i16> zeroinitializer, i32 0
+  %Shuff28 = shufflevector <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> <i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11>
+  %I29 = insertelement <16 x i32> %Shuff28, i32 %B, i32 15
+  %B30 = fdiv float 0.000000e+00, -6.749110e+06
+  %Sl31 = select i1 false, i32 %3, i32 %3
+  %Cmp32 = fcmp uno float 0.000000e+00, 0x406DB70180000000
+  br i1 %Cmp32, label %CF, label %CF78
+
+CF78:                                             ; preds = %CF78, %CF79
+  %L33 = load i8* %0
+  store i8 %L, i8* %0
+  %E34 = extractelement <16 x i32> %Shuff28, i32 1
+  %Shuff35 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %I21, <4 x i32> <i32 undef, i32 6, i32 0, i32 2>
+  %I36 = insertelement <4 x double> %FC, double 0xA4A57F449CA36CC2, i32 2
+  %Se = sext <4 x i1> %Cmp to <4 x i32>
+  %Sl37 = select i1 %Cmp17, i32 0, i32 0
+  %Cmp38 = icmp ne i32 440284, 376034
+  br i1 %Cmp38, label %CF78, label %CF80
+
+CF80:                                             ; preds = %CF80, %CF82, %CF78
+  %L39 = load i8* %0
+  store i8 %L, i8* %0
+  %E40 = extractelement <2 x i1> %Shuff20, i32 1
+  br i1 %E40, label %CF80, label %CF82
+
+CF82:                                             ; preds = %CF80
+  %Shuff41 = shufflevector <2 x i1> zeroinitializer, <2 x i1> %Shuff20, <2 x i32> <i32 2, i32 0>
+  %I42 = insertelement <2 x i1> %Shuff41, i1 false, i32 0
+  %B43 = sub i32 %E, 0
+  %Sl44 = select i1 %Cmp32, <16 x i32> %Shuff28, <16 x i32> %Shuff28
+  %Cmp45 = icmp sgt <4 x i64> zeroinitializer, %I21
+  %L46 = load i8* %0
+  store i8 %L11, i8* %0
+  %E47 = extractelement <8 x i32> %Sl16, i32 4
+  %Shuff48 = shufflevector <2 x i1> zeroinitializer, <2 x i1> %Shuff7, <2 x i32> <i32 undef, i32 1>
+  %I49 = insertelement <2 x i1> %Shuff48, i1 %Cmp17, i32 1
+  %B50 = and <8 x i32> %I14, %Sl10
+  %FC51 = fptoui float -6.749110e+06 to i1
+  br i1 %FC51, label %CF80, label %CF81
+
+CF81:                                             ; preds = %CF81, %CF82
+  %Sl52 = select i1 false, float -6.749110e+06, float 0x406DB70180000000
+  %Cmp53 = icmp uge <2 x i32> <i32 -1, i32 -1>, <i32 -1, i32 -1>
+  %L54 = load i8* %0
+  store i8 %L5, i8* %0
+  %E55 = extractelement <8 x i32> zeroinitializer, i32 7
+  %Shuff56 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 undef, i32 4, i32 6, i32 0>
+  %I57 = insertelement <2 x i1> %Shuff7, i1 false, i32 0
+  %B58 = fmul <4 x double> %FC, %FC
+  %FC59 = fptoui <4 x double> %I36 to <4 x i16>
+  %Sl60 = select i1 %Cmp17, <2 x i1> %I, <2 x i1> %I57
+  %Cmp61 = icmp ule <8 x i32> %B50, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %L62 = load i8* %0
+  store i8 %L33, i8* %0
+  %E63 = extractelement <4 x i64> %Shuff, i32 2
+  %Shuff64 = shufflevector <4 x i64> %Shuff56, <4 x i64> %Shuff56, <4 x i32> <i32 5, i32 7, i32 1, i32 undef>
+  %I65 = insertelement <2 x i1> zeroinitializer, i1 false, i32 1
+  %B66 = sdiv i32 %B, %E55
+  %Tr67 = trunc i8 %L54 to i1
+  br i1 %Tr67, label %CF81, label %CF83
+
+CF83:                                             ; preds = %CF83, %CF81
+  %Sl68 = select i1 %Cmp17, i1 %Cmp25, i1 %Tr67
+  br i1 %Sl68, label %CF83, label %CF84
+
+CF84:                                             ; preds = %CF83
+  %Cmp69 = icmp uge i32 %E, %E34
+  br i1 %Cmp69, label %CF, label %CF77
+
+CF77:                                             ; preds = %CF84
+  %L70 = load i8* %0
+  store i8 %L, i8* %0
+  %E71 = extractelement <4 x i64> %Shuff, i32 0
+  %Shuff72 = shufflevector <2 x i1> zeroinitializer, <2 x i1> %I, <2 x i32> <i32 3, i32 1>
+  %I73 = insertelement <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, i32 %B66, i32 1
+  %FC74 = uitofp i1 %Cmp32 to double
+  %Sl75 = select i1 %FC51, i16 9704, i16 0
+  %Cmp76 = icmp ugt <1 x i16> %I8, %I8
+  store i8 %L39, i8* %0
+  store i8 %5, i8* %0
+  store i8 %Tr23, i8* %0
+  store i8 %L, i8* %0
+  store i8 %5, i8* %0
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-sz1-s742806235.ll b/test/CodeGen/Mips/msa/llvm-stress-sz1-s742806235.ll
new file mode 100644
index 0000000..8c4fcba
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-sz1-s742806235.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed to select code for a truncstore of a
+; build_vector.
+; It should at least successfully build.
+
+define void @autogen_SD742806235(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+  %A4 = alloca double
+  %A3 = alloca double
+  %A2 = alloca <8 x i8>
+  %A1 = alloca <4 x float>
+  %A = alloca i1
+  store i8 %5, i8* %0
+  store i8 %5, i8* %0
+  store i8 %5, i8* %0
+  store <8 x i8> <i8 0, i8 -1, i8 0, i8 -1, i8 0, i8 -1, i8 0, i8 -1>, <8 x i8>* %A2
+  store i8 %5, i8* %0
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/shift-dagcombine.ll b/test/CodeGen/Mips/msa/shift-dagcombine.ll
new file mode 100644
index 0000000..0d809fb
--- /dev/null
+++ b/test/CodeGen/Mips/msa/shift-dagcombine.ll
@@ -0,0 +1,70 @@
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+
+define void @ashr_v4i32(<4 x i32>* %c) nounwind {
+  ; CHECK-LABEL: ashr_v4i32:
+
+  %1 = ashr <4 x i32> <i32 1, i32 2, i32 4, i32 8>,
+                      <i32 0, i32 1, i32 2, i32 3>
+  ; CHECK-NOT: sra
+  ; CHECK-DAG: ldi.w [[R1:\$w[0-9]+]], 1
+  ; CHECK-NOT: sra
+  store volatile <4 x i32> %1, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+
+  %2 = ashr <4 x i32> <i32 -2, i32 -4, i32 -8, i32 -16>,
+                      <i32 0, i32 1, i32 2, i32 3>
+  ; CHECK-NOT: sra
+  ; CHECK-DAG: ldi.w [[R1:\$w[0-9]+]], -2
+  ; CHECK-NOT: sra
+  store volatile <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+
+  ret void
+  ; CHECK-LABEL: .size ashr_v4i32
+}
+
+define void @lshr_v4i32(<4 x i32>* %c) nounwind {
+  ; CHECK-LABEL: lshr_v4i32:
+
+  %1 = lshr <4 x i32> <i32 1, i32 2, i32 4, i32 8>,
+                      <i32 0, i32 1, i32 2, i32 3>
+  ; CHECK-NOT: srl
+  ; CHECK-DAG: ldi.w [[R1:\$w[0-9]+]], 1
+  ; CHECK-NOT: srl
+  store volatile <4 x i32> %1, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+
+  %2 = lshr <4 x i32> <i32 -2, i32 -4, i32 -8, i32 -16>,
+                      <i32 0, i32 1, i32 2, i32 3>
+  ; CHECK-NOT: srl
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], %lo
+  ; CHECK-NOT: srl
+  store volatile <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+
+  ret void
+  ; CHECK-LABEL: .size lshr_v4i32
+}
+
+define void @shl_v4i32(<4 x i32>* %c) nounwind {
+  ; CHECK-LABEL: shl_v4i32:
+
+  %1 = shl <4 x i32> <i32 8, i32 4, i32 2, i32 1>,
+                     <i32 0, i32 1, i32 2, i32 3>
+  ; CHECK-NOT: sll
+  ; CHECK-DAG: ldi.w [[R1:\$w[0-9]+]], 8
+  ; CHECK-NOT: sll
+  store volatile <4 x i32> %1, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+
+  %2 = shl <4 x i32> <i32 -8, i32 -4, i32 -2, i32 -1>,
+                     <i32 0, i32 1, i32 2, i32 3>
+  ; CHECK-NOT: sll
+  ; CHECK-DAG: ldi.w [[R1:\$w[0-9]+]], -8
+  ; CHECK-NOT: sll
+  store volatile <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+
+  ret void
+  ; CHECK-LABEL: .size shl_v4i32
+}
diff --git a/test/CodeGen/Mips/msa/shuffle.ll b/test/CodeGen/Mips/msa/shuffle.ll
new file mode 100644
index 0000000..316c669
--- /dev/null
+++ b/test/CodeGen/Mips/msa/shuffle.ll
@@ -0,0 +1,803 @@
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+define void @vshf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: vshf_v16i8_0:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]]
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v16i8_0
+}
+
+define void @vshf_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: vshf_v16i8_1:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v16i8_1
+}
+
+define void @vshf_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: vshf_v16i8_2:
+
+  %1 = load <16 x i8>* %a
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16>
+  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v16i8_2
+}
+
+define void @vshf_v16i8_3(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: vshf_v16i8_3:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 17, i32 24, i32 25, i32 18, i32 19, i32 20, i32 28, i32 19, i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
+  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v16i8_3
+}
+
+define void @vshf_v16i8_4(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: vshf_v16i8_4:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17>
+  ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v16i8_4
+}
+
+define void @vshf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: vshf_v8i16_0:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]]
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v8i16_0
+}
+
+define void @vshf_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: vshf_v8i16_1:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v8i16_1
+}
+
+define void @vshf_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: vshf_v8i16_2:
+
+  %1 = load <8 x i16>* %a
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8>
+  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v8i16_2
+}
+
+define void @vshf_v8i16_3(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: vshf_v8i16_3:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
+  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v8i16_3
+}
+
+define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: vshf_v8i16_4:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <8 x i16> %1, <8 x i16> %1, <8 x i32> <i32 1, i32 9, i32 1, i32 9, i32 1, i32 9, i32 1, i32 9>
+  ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v8i16_4
+}
+
+; Note: v4i32 only has one 4-element set so it's impossible to get a vshf.w
+; instruction when using a single vector.
+
+define void @vshf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: vshf_v4i32_0:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v4i32_0
+}
+
+define void @vshf_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: vshf_v4i32_1:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v4i32_1
+}
+
+define void @vshf_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: vshf_v4i32_2:
+
+  %1 = load <4 x i32>* %a
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 5, i32 6, i32 4>
+  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R2]], 36
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v4i32_2
+}
+
+define void @vshf_v4i32_3(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: vshf_v4i32_3:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 6, i32 4>
+  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: vshf.w [[R3]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v4i32_3
+}
+
+define void @vshf_v4i32_4(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: vshf_v4i32_4:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 5, i32 1>
+  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v4i32_4
+}
+
+define void @vshf_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: vshf_v2i64_0:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v2i64_0
+}
+
+define void @vshf_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: vshf_v2i64_1:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v2i64_1
+}
+
+define void @vshf_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: vshf_v2i64_2:
+
+  %1 = load <2 x i64>* %a
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 2>
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v2i64_2
+}
+
+define void @vshf_v2i64_3(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: vshf_v2i64_3:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 2>
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v2i64_3
+}
+
+define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: vshf_v2i64_4:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <2 x i64> %1, <2 x i64> %1, <2 x i32> <i32 1, i32 3>
+  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v2i64_4
+}
+
+define void @shf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: shf_v16i8_0:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 2, i32 0, i32 5, i32 7, i32 6, i32 4, i32 9, i32 11, i32 10, i32 8, i32 13, i32 15, i32 14, i32 12>
+  ; CHECK-DAG: shf.b [[R3:\$w[0-9]+]], [[R1]], 45
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size shf_v16i8_0
+}
+
+define void @shf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: shf_v8i16_0:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+  ; CHECK-DAG: shf.h [[R3:\$w[0-9]+]], [[R1]], 27
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size shf_v8i16_0
+}
+
+define void @shf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: shf_v4i32_0:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size shf_v4i32_0
+}
+
+; shf.d does not exist
+
+define void @ilvev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: ilvev_v16i8_0:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+                     <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvev_v16i8_0
+}
+
+define void @ilvev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: ilvev_v8i16_0:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvev_v8i16_0
+}
+
+define void @ilvev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: ilvev_v4i32_0:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvev_v4i32_0
+}
+
+define void @ilvev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: ilvev_v2i64_0:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
+  ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvev_v2i64_0
+}
+
+define void @ilvod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: ilvod_v16i8_0:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+                     <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+  ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvod_v16i8_0
+}
+
+define void @ilvod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: ilvod_v8i16_0:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvod_v8i16_0
+}
+
+define void @ilvod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: ilvod_v4i32_0:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvod_v4i32_0
+}
+
+define void @ilvod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: ilvod_v2i64_0:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
+  ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvod_v2i64_0
+}
+
+define void @ilvl_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: ilvl_v16i8_0:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+                     <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvl_v16i8_0
+}
+
+define void @ilvl_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: ilvl_v8i16_0:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvl_v8i16_0
+}
+
+define void @ilvl_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: ilvl_v4i32_0:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvl_v4i32_0
+}
+
+define void @ilvl_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: ilvl_v2i64_0:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
+  ; ilvl.d and ilvev.d are equivalent for v2i64
+  ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvl_v2i64_0
+}
+
+define void @ilvr_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: ilvr_v16i8_0:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+                     <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvr_v16i8_0
+}
+
+define void @ilvr_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: ilvr_v8i16_0:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvr_v8i16_0
+}
+
+define void @ilvr_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: ilvr_v4i32_0:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvr_v4i32_0
+}
+
+define void @ilvr_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: ilvr_v2i64_0:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
+  ; ilvr.d and ilvod.d are equivalent for v2i64
+  ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvr_v2i64_0
+}
+
+define void @pckev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: pckev_v16i8_0:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+                     <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size pckev_v16i8_0
+}
+
+define void @pckev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: pckev_v8i16_0:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size pckev_v8i16_0
+}
+
+define void @pckev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: pckev_v4i32_0:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size pckev_v4i32_0
+}
+
+define void @pckev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: pckev_v2i64_0:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
+  ; pckev.d and ilvev.d are equivalent for v2i64
+  ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size pckev_v2i64_0
+}
+
+define void @pckod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: pckod_v16i8_0:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+                     <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size pckod_v16i8_0
+}
+
+define void @pckod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: pckod_v8i16_0:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size pckod_v8i16_0
+}
+
+define void @pckod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: pckod_v4i32_0:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size pckod_v4i32_0
+}
+
+define void @pckod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: pckod_v2i64_0:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
+  ; pckod.d and ilvod.d are equivalent for v2i64
+  ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size pckod_v2i64_0
+}
+
+define void @splati_v16i8_0(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: splati_v16i8_0:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <16 x i8> %1, <16 x i8> undef,
+                     <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+  ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][4]
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size splati_v16i8_0
+}
+
+define void @splati_v8i16_0(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: splati_v8i16_0:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+  ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][4]
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size splati_v8i16_0
+}
+
+define void @splati_v4i32_0(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: splati_v4i32_0:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  ; shf.w and splati.w are equivalent
+  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 255
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size splati_v4i32_0
+}
+
+define void @splati_v2i64_0(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: splati_v2i64_0:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size splati_v2i64_0
+}
diff --git a/test/CodeGen/Mips/msa/special.ll b/test/CodeGen/Mips/msa/special.ll
new file mode 100644
index 0000000..60a4369
--- /dev/null
+++ b/test/CodeGen/Mips/msa/special.ll
@@ -0,0 +1,26 @@
+; Test the MSA intrinsics that are encoded with the SPECIAL instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+
+define i32 @llvm_mips_lsa_test(i32 %a, i32 %b) nounwind {
+entry:
+  %0 = tail call i32 @llvm.mips.lsa(i32 %a, i32 %b, i32 2)
+  ret i32 %0
+}
+
+declare i32 @llvm.mips.lsa(i32, i32, i32) nounwind
+
+; CHECK: llvm_mips_lsa_test:
+; CHECK: lsa {{\$[0-9]+}}, {{\$[0-9]+}}, {{\$[0-9]+}}, 2
+; CHECK: .size llvm_mips_lsa_test
+
+define i32 @lsa_test(i32 %a, i32 %b) nounwind {
+entry:
+  %0 = shl i32 %b, 2
+  %1 = add i32 %a, %0
+  ret i32 %1
+}
+
+; CHECK: lsa_test:
+; CHECK: lsa {{\$[0-9]+}}, {{\$[0-9]+}}, {{\$[0-9]+}}, 2
+; CHECK: .size lsa_test
diff --git a/test/CodeGen/Mips/msa/spill.ll b/test/CodeGen/Mips/msa/spill.ll
new file mode 100644
index 0000000..66f896a
--- /dev/null
+++ b/test/CodeGen/Mips/msa/spill.ll
@@ -0,0 +1,601 @@
+; Test that the correct instruction is chosen for spill and reload by trying
+; to have 33 live MSA registers simultaneously
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+define i32 @test_i8(<16 x i8>* %p0, <16 x i8>* %q1) nounwind {
+entry:
+  %p1  = getelementptr <16 x i8>* %p0, i32 1
+  %p2  = getelementptr <16 x i8>* %p0, i32 2
+  %p3  = getelementptr <16 x i8>* %p0, i32 3
+  %p4  = getelementptr <16 x i8>* %p0, i32 4
+  %p5  = getelementptr <16 x i8>* %p0, i32 5
+  %p6  = getelementptr <16 x i8>* %p0, i32 6
+  %p7  = getelementptr <16 x i8>* %p0, i32 7
+  %p8  = getelementptr <16 x i8>* %p0, i32 8
+  %p9  = getelementptr <16 x i8>* %p0, i32 9
+  %p10 = getelementptr <16 x i8>* %p0, i32 10
+  %p11 = getelementptr <16 x i8>* %p0, i32 11
+  %p12 = getelementptr <16 x i8>* %p0, i32 12
+  %p13 = getelementptr <16 x i8>* %p0, i32 13
+  %p14 = getelementptr <16 x i8>* %p0, i32 14
+  %p15 = getelementptr <16 x i8>* %p0, i32 15
+  %p16 = getelementptr <16 x i8>* %p0, i32 16
+  %p17 = getelementptr <16 x i8>* %p0, i32 17
+  %p18 = getelementptr <16 x i8>* %p0, i32 18
+  %p19 = getelementptr <16 x i8>* %p0, i32 19
+  %p20 = getelementptr <16 x i8>* %p0, i32 20
+  %p21 = getelementptr <16 x i8>* %p0, i32 21
+  %p22 = getelementptr <16 x i8>* %p0, i32 22
+  %p23 = getelementptr <16 x i8>* %p0, i32 23
+  %p24 = getelementptr <16 x i8>* %p0, i32 24
+  %p25 = getelementptr <16 x i8>* %p0, i32 25
+  %p26 = getelementptr <16 x i8>* %p0, i32 26
+  %p27 = getelementptr <16 x i8>* %p0, i32 27
+  %p28 = getelementptr <16 x i8>* %p0, i32 28
+  %p29 = getelementptr <16 x i8>* %p0, i32 29
+  %p30 = getelementptr <16 x i8>* %p0, i32 30
+  %p31 = getelementptr <16 x i8>* %p0, i32 31
+  %p32 = getelementptr <16 x i8>* %p0, i32 32
+  %p33 = getelementptr <16 x i8>* %p0, i32 33
+  %0  = load <16 x i8>* %p0, align 16
+  %1  = load <16 x i8>* %p1, align 16
+  %2  = load <16 x i8>* %p2, align 16
+  %3  = load <16 x i8>* %p3, align 16
+  %4  = load <16 x i8>* %p4, align 16
+  %5  = load <16 x i8>* %p5, align 16
+  %6  = load <16 x i8>* %p6, align 16
+  %7  = load <16 x i8>* %p7, align 16
+  %8  = load <16 x i8>* %p8, align 16
+  %9  = load <16 x i8>* %p9, align 16
+  %10 = load <16 x i8>* %p10, align 16
+  %11 = load <16 x i8>* %p11, align 16
+  %12 = load <16 x i8>* %p12, align 16
+  %13 = load <16 x i8>* %p13, align 16
+  %14 = load <16 x i8>* %p14, align 16
+  %15 = load <16 x i8>* %p15, align 16
+  %16 = load <16 x i8>* %p16, align 16
+  %17 = load <16 x i8>* %p17, align 16
+  %18 = load <16 x i8>* %p18, align 16
+  %19 = load <16 x i8>* %p19, align 16
+  %20 = load <16 x i8>* %p20, align 16
+  %21 = load <16 x i8>* %p21, align 16
+  %22 = load <16 x i8>* %p22, align 16
+  %23 = load <16 x i8>* %p23, align 16
+  %24 = load <16 x i8>* %p24, align 16
+  %25 = load <16 x i8>* %p25, align 16
+  %26 = load <16 x i8>* %p26, align 16
+  %27 = load <16 x i8>* %p27, align 16
+  %28 = load <16 x i8>* %p28, align 16
+  %29 = load <16 x i8>* %p29, align 16
+  %30 = load <16 x i8>* %p30, align 16
+  %31 = load <16 x i8>* %p31, align 16
+  %32 = load <16 x i8>* %p32, align 16
+  %33 = load <16 x i8>* %p33, align 16
+  %r1  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0,   <16 x i8> %1)
+  %r2  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r1,  <16 x i8> %2)
+  %r3  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r2,  <16 x i8> %3)
+  %r4  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r3,  <16 x i8> %4)
+  %r5  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r4,  <16 x i8> %5)
+  %r6  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r5,  <16 x i8> %6)
+  %r7  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r6,  <16 x i8> %7)
+  %r8  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r7,  <16 x i8> %8)
+  %r9  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r8,  <16 x i8> %9)
+  %r10 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r9,  <16 x i8> %10)
+  %r11 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r10, <16 x i8> %11)
+  %r12 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r11, <16 x i8> %12)
+  %r13 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r12, <16 x i8> %13)
+  %r14 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r13, <16 x i8> %14)
+  %r15 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r14, <16 x i8> %15)
+  %r16 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r15, <16 x i8> %16)
+  %r17 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r16, <16 x i8> %17)
+  %r18 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r17, <16 x i8> %18)
+  %r19 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r18, <16 x i8> %19)
+  %r20 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r19, <16 x i8> %20)
+  %r21 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r20, <16 x i8> %21)
+  %r22 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r21, <16 x i8> %22)
+  %r23 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r22, <16 x i8> %23)
+  %r24 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r23, <16 x i8> %24)
+  %r25 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r24, <16 x i8> %25)
+  %r26 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r25, <16 x i8> %26)
+  %r27 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r26, <16 x i8> %27)
+  %r28 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r27, <16 x i8> %28)
+  %r29 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r28, <16 x i8> %29)
+  %r30 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r29, <16 x i8> %30)
+  %r31 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r30, <16 x i8> %31)
+  %r32 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r31, <16 x i8> %32)
+  %r33 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r32, <16 x i8> %33)
+  %rx1  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r33,   <16 x i8> %1)
+  %rx2  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx1,  <16 x i8> %2)
+  %rx3  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx2,  <16 x i8> %3)
+  %rx4  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx3,  <16 x i8> %4)
+  %rx5  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx4,  <16 x i8> %5)
+  %rx6  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx5,  <16 x i8> %6)
+  %rx7  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx6,  <16 x i8> %7)
+  %rx8  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx7,  <16 x i8> %8)
+  %rx9  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx8,  <16 x i8> %9)
+  %rx10 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx9,  <16 x i8> %10)
+  %rx11 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx10, <16 x i8> %11)
+  %rx12 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx11, <16 x i8> %12)
+  %rx13 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx12, <16 x i8> %13)
+  %rx14 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx13, <16 x i8> %14)
+  %rx15 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx14, <16 x i8> %15)
+  %rx16 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx15, <16 x i8> %16)
+  %rx17 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx16, <16 x i8> %17)
+  %rx18 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx17, <16 x i8> %18)
+  %rx19 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx18, <16 x i8> %19)
+  %rx20 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx19, <16 x i8> %20)
+  %rx21 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx20, <16 x i8> %21)
+  %rx22 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx21, <16 x i8> %22)
+  %rx23 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx22, <16 x i8> %23)
+  %rx24 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx23, <16 x i8> %24)
+  %rx25 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx24, <16 x i8> %25)
+  %rx26 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx25, <16 x i8> %26)
+  %rx27 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx26, <16 x i8> %27)
+  %rx28 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx27, <16 x i8> %28)
+  %rx29 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx28, <16 x i8> %29)
+  %rx30 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx29, <16 x i8> %30)
+  %rx31 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx30, <16 x i8> %31)
+  %rx32 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx31, <16 x i8> %32)
+  %rx33 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx32, <16 x i8> %33)
+  %res = call i32 @llvm.mips.copy.s.b(<16 x i8> %rx33, i32 0)
+  ret i32 %res
+}
+
+declare <16 x i8> @llvm.mips.addv.b(<16 x i8>, <16 x i8>) nounwind
+declare i32       @llvm.mips.copy.s.b(<16 x i8>, i32) nounwind
+
+; CHECK: test_i8:
+; CHECK: st.b {{.*}} Spill
+; CHECK: st.b {{.*}} Spill
+; CHECK: ld.b {{.*}} Reload
+; CHECK: ld.b {{.*}} Reload
+; CHECK: .size
+
+define i32 @test_i16(<8 x i16>* %p0, <8 x i16>* %q1) nounwind {
+entry:
+  %p1  = getelementptr <8 x i16>* %p0, i32 1
+  %p2  = getelementptr <8 x i16>* %p0, i32 2
+  %p3  = getelementptr <8 x i16>* %p0, i32 3
+  %p4  = getelementptr <8 x i16>* %p0, i32 4
+  %p5  = getelementptr <8 x i16>* %p0, i32 5
+  %p6  = getelementptr <8 x i16>* %p0, i32 6
+  %p7  = getelementptr <8 x i16>* %p0, i32 7
+  %p8  = getelementptr <8 x i16>* %p0, i32 8
+  %p9  = getelementptr <8 x i16>* %p0, i32 9
+  %p10 = getelementptr <8 x i16>* %p0, i32 10
+  %p11 = getelementptr <8 x i16>* %p0, i32 11
+  %p12 = getelementptr <8 x i16>* %p0, i32 12
+  %p13 = getelementptr <8 x i16>* %p0, i32 13
+  %p14 = getelementptr <8 x i16>* %p0, i32 14
+  %p15 = getelementptr <8 x i16>* %p0, i32 15
+  %p16 = getelementptr <8 x i16>* %p0, i32 16
+  %p17 = getelementptr <8 x i16>* %p0, i32 17
+  %p18 = getelementptr <8 x i16>* %p0, i32 18
+  %p19 = getelementptr <8 x i16>* %p0, i32 19
+  %p20 = getelementptr <8 x i16>* %p0, i32 20
+  %p21 = getelementptr <8 x i16>* %p0, i32 21
+  %p22 = getelementptr <8 x i16>* %p0, i32 22
+  %p23 = getelementptr <8 x i16>* %p0, i32 23
+  %p24 = getelementptr <8 x i16>* %p0, i32 24
+  %p25 = getelementptr <8 x i16>* %p0, i32 25
+  %p26 = getelementptr <8 x i16>* %p0, i32 26
+  %p27 = getelementptr <8 x i16>* %p0, i32 27
+  %p28 = getelementptr <8 x i16>* %p0, i32 28
+  %p29 = getelementptr <8 x i16>* %p0, i32 29
+  %p30 = getelementptr <8 x i16>* %p0, i32 30
+  %p31 = getelementptr <8 x i16>* %p0, i32 31
+  %p32 = getelementptr <8 x i16>* %p0, i32 32
+  %p33 = getelementptr <8 x i16>* %p0, i32 33
+  %0  = load <8 x i16>* %p0, align 16
+  %1  = load <8 x i16>* %p1, align 16
+  %2  = load <8 x i16>* %p2, align 16
+  %3  = load <8 x i16>* %p3, align 16
+  %4  = load <8 x i16>* %p4, align 16
+  %5  = load <8 x i16>* %p5, align 16
+  %6  = load <8 x i16>* %p6, align 16
+  %7  = load <8 x i16>* %p7, align 16
+  %8  = load <8 x i16>* %p8, align 16
+  %9  = load <8 x i16>* %p9, align 16
+  %10 = load <8 x i16>* %p10, align 16
+  %11 = load <8 x i16>* %p11, align 16
+  %12 = load <8 x i16>* %p12, align 16
+  %13 = load <8 x i16>* %p13, align 16
+  %14 = load <8 x i16>* %p14, align 16
+  %15 = load <8 x i16>* %p15, align 16
+  %16 = load <8 x i16>* %p16, align 16
+  %17 = load <8 x i16>* %p17, align 16
+  %18 = load <8 x i16>* %p18, align 16
+  %19 = load <8 x i16>* %p19, align 16
+  %20 = load <8 x i16>* %p20, align 16
+  %21 = load <8 x i16>* %p21, align 16
+  %22 = load <8 x i16>* %p22, align 16
+  %23 = load <8 x i16>* %p23, align 16
+  %24 = load <8 x i16>* %p24, align 16
+  %25 = load <8 x i16>* %p25, align 16
+  %26 = load <8 x i16>* %p26, align 16
+  %27 = load <8 x i16>* %p27, align 16
+  %28 = load <8 x i16>* %p28, align 16
+  %29 = load <8 x i16>* %p29, align 16
+  %30 = load <8 x i16>* %p30, align 16
+  %31 = load <8 x i16>* %p31, align 16
+  %32 = load <8 x i16>* %p32, align 16
+  %33 = load <8 x i16>* %p33, align 16
+  %r1  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0,   <8 x i16> %1)
+  %r2  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r1,  <8 x i16> %2)
+  %r3  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r2,  <8 x i16> %3)
+  %r4  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r3,  <8 x i16> %4)
+  %r5  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r4,  <8 x i16> %5)
+  %r6  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r5,  <8 x i16> %6)
+  %r7  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r6,  <8 x i16> %7)
+  %r8  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r7,  <8 x i16> %8)
+  %r9  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r8,  <8 x i16> %9)
+  %r10 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r9,  <8 x i16> %10)
+  %r11 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r10, <8 x i16> %11)
+  %r12 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r11, <8 x i16> %12)
+  %r13 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r12, <8 x i16> %13)
+  %r14 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r13, <8 x i16> %14)
+  %r15 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r14, <8 x i16> %15)
+  %r16 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r15, <8 x i16> %16)
+  %r17 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r16, <8 x i16> %17)
+  %r18 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r17, <8 x i16> %18)
+  %r19 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r18, <8 x i16> %19)
+  %r20 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r19, <8 x i16> %20)
+  %r21 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r20, <8 x i16> %21)
+  %r22 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r21, <8 x i16> %22)
+  %r23 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r22, <8 x i16> %23)
+  %r24 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r23, <8 x i16> %24)
+  %r25 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r24, <8 x i16> %25)
+  %r26 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r25, <8 x i16> %26)
+  %r27 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r26, <8 x i16> %27)
+  %r28 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r27, <8 x i16> %28)
+  %r29 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r28, <8 x i16> %29)
+  %r30 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r29, <8 x i16> %30)
+  %r31 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r30, <8 x i16> %31)
+  %r32 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r31, <8 x i16> %32)
+  %r33 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r32, <8 x i16> %33)
+  %rx1  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r33,   <8 x i16> %1)
+  %rx2  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx1,  <8 x i16> %2)
+  %rx3  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx2,  <8 x i16> %3)
+  %rx4  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx3,  <8 x i16> %4)
+  %rx5  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx4,  <8 x i16> %5)
+  %rx6  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx5,  <8 x i16> %6)
+  %rx7  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx6,  <8 x i16> %7)
+  %rx8  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx7,  <8 x i16> %8)
+  %rx9  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx8,  <8 x i16> %9)
+  %rx10 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx9,  <8 x i16> %10)
+  %rx11 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx10, <8 x i16> %11)
+  %rx12 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx11, <8 x i16> %12)
+  %rx13 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx12, <8 x i16> %13)
+  %rx14 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx13, <8 x i16> %14)
+  %rx15 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx14, <8 x i16> %15)
+  %rx16 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx15, <8 x i16> %16)
+  %rx17 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx16, <8 x i16> %17)
+  %rx18 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx17, <8 x i16> %18)
+  %rx19 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx18, <8 x i16> %19)
+  %rx20 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx19, <8 x i16> %20)
+  %rx21 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx20, <8 x i16> %21)
+  %rx22 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx21, <8 x i16> %22)
+  %rx23 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx22, <8 x i16> %23)
+  %rx24 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx23, <8 x i16> %24)
+  %rx25 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx24, <8 x i16> %25)
+  %rx26 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx25, <8 x i16> %26)
+  %rx27 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx26, <8 x i16> %27)
+  %rx28 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx27, <8 x i16> %28)
+  %rx29 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx28, <8 x i16> %29)
+  %rx30 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx29, <8 x i16> %30)
+  %rx31 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx30, <8 x i16> %31)
+  %rx32 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx31, <8 x i16> %32)
+  %rx33 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx32, <8 x i16> %33)
+  %res = call i32 @llvm.mips.copy.s.h(<8 x i16> %rx33, i32 0)
+  ret i32 %res
+}
+
+declare <8 x i16> @llvm.mips.addv.h(<8 x i16>, <8 x i16>) nounwind
+declare i32       @llvm.mips.copy.s.h(<8 x i16>, i32) nounwind
+
+; CHECK: test_i16:
+; CHECK: st.h {{.*}} Spill
+; CHECK: st.h {{.*}} Spill
+; CHECK: ld.h {{.*}} Reload
+; CHECK: ld.h {{.*}} Reload
+; CHECK: .size
+
+define i32 @test_i32(<4 x i32>* %p0, <4 x i32>* %q1) nounwind {
+entry:
+  %p1  = getelementptr <4 x i32>* %p0, i32 1
+  %p2  = getelementptr <4 x i32>* %p0, i32 2
+  %p3  = getelementptr <4 x i32>* %p0, i32 3
+  %p4  = getelementptr <4 x i32>* %p0, i32 4
+  %p5  = getelementptr <4 x i32>* %p0, i32 5
+  %p6  = getelementptr <4 x i32>* %p0, i32 6
+  %p7  = getelementptr <4 x i32>* %p0, i32 7
+  %p8  = getelementptr <4 x i32>* %p0, i32 8
+  %p9  = getelementptr <4 x i32>* %p0, i32 9
+  %p10 = getelementptr <4 x i32>* %p0, i32 10
+  %p11 = getelementptr <4 x i32>* %p0, i32 11
+  %p12 = getelementptr <4 x i32>* %p0, i32 12
+  %p13 = getelementptr <4 x i32>* %p0, i32 13
+  %p14 = getelementptr <4 x i32>* %p0, i32 14
+  %p15 = getelementptr <4 x i32>* %p0, i32 15
+  %p16 = getelementptr <4 x i32>* %p0, i32 16
+  %p17 = getelementptr <4 x i32>* %p0, i32 17
+  %p18 = getelementptr <4 x i32>* %p0, i32 18
+  %p19 = getelementptr <4 x i32>* %p0, i32 19
+  %p20 = getelementptr <4 x i32>* %p0, i32 20
+  %p21 = getelementptr <4 x i32>* %p0, i32 21
+  %p22 = getelementptr <4 x i32>* %p0, i32 22
+  %p23 = getelementptr <4 x i32>* %p0, i32 23
+  %p24 = getelementptr <4 x i32>* %p0, i32 24
+  %p25 = getelementptr <4 x i32>* %p0, i32 25
+  %p26 = getelementptr <4 x i32>* %p0, i32 26
+  %p27 = getelementptr <4 x i32>* %p0, i32 27
+  %p28 = getelementptr <4 x i32>* %p0, i32 28
+  %p29 = getelementptr <4 x i32>* %p0, i32 29
+  %p30 = getelementptr <4 x i32>* %p0, i32 30
+  %p31 = getelementptr <4 x i32>* %p0, i32 31
+  %p32 = getelementptr <4 x i32>* %p0, i32 32
+  %p33 = getelementptr <4 x i32>* %p0, i32 33
+  %0  = load <4 x i32>* %p0, align 16
+  %1  = load <4 x i32>* %p1, align 16
+  %2  = load <4 x i32>* %p2, align 16
+  %3  = load <4 x i32>* %p3, align 16
+  %4  = load <4 x i32>* %p4, align 16
+  %5  = load <4 x i32>* %p5, align 16
+  %6  = load <4 x i32>* %p6, align 16
+  %7  = load <4 x i32>* %p7, align 16
+  %8  = load <4 x i32>* %p8, align 16
+  %9  = load <4 x i32>* %p9, align 16
+  %10 = load <4 x i32>* %p10, align 16
+  %11 = load <4 x i32>* %p11, align 16
+  %12 = load <4 x i32>* %p12, align 16
+  %13 = load <4 x i32>* %p13, align 16
+  %14 = load <4 x i32>* %p14, align 16
+  %15 = load <4 x i32>* %p15, align 16
+  %16 = load <4 x i32>* %p16, align 16
+  %17 = load <4 x i32>* %p17, align 16
+  %18 = load <4 x i32>* %p18, align 16
+  %19 = load <4 x i32>* %p19, align 16
+  %20 = load <4 x i32>* %p20, align 16
+  %21 = load <4 x i32>* %p21, align 16
+  %22 = load <4 x i32>* %p22, align 16
+  %23 = load <4 x i32>* %p23, align 16
+  %24 = load <4 x i32>* %p24, align 16
+  %25 = load <4 x i32>* %p25, align 16
+  %26 = load <4 x i32>* %p26, align 16
+  %27 = load <4 x i32>* %p27, align 16
+  %28 = load <4 x i32>* %p28, align 16
+  %29 = load <4 x i32>* %p29, align 16
+  %30 = load <4 x i32>* %p30, align 16
+  %31 = load <4 x i32>* %p31, align 16
+  %32 = load <4 x i32>* %p32, align 16
+  %33 = load <4 x i32>* %p33, align 16
+  %r1 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %1)
+  %r2 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r1, <4 x i32> %2)
+  %r3 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r2, <4 x i32> %3)
+  %r4 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r3, <4 x i32> %4)
+  %r5 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r4, <4 x i32> %5)
+  %r6 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r5, <4 x i32> %6)
+  %r7 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r6, <4 x i32> %7)
+  %r8 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r7, <4 x i32> %8)
+  %r9 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r8, <4 x i32> %9)
+  %r10 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r9, <4 x i32> %10)
+  %r11 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r10, <4 x i32> %11)
+  %r12 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r11, <4 x i32> %12)
+  %r13 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r12, <4 x i32> %13)
+  %r14 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r13, <4 x i32> %14)
+  %r15 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r14, <4 x i32> %15)
+  %r16 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r15, <4 x i32> %16)
+  %r17 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r16, <4 x i32> %17)
+  %r18 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r17, <4 x i32> %18)
+  %r19 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r18, <4 x i32> %19)
+  %r20 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r19, <4 x i32> %20)
+  %r21 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r20, <4 x i32> %21)
+  %r22 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r21, <4 x i32> %22)
+  %r23 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r22, <4 x i32> %23)
+  %r24 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r23, <4 x i32> %24)
+  %r25 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r24, <4 x i32> %25)
+  %r26 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r25, <4 x i32> %26)
+  %r27 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r26, <4 x i32> %27)
+  %r28 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r27, <4 x i32> %28)
+  %r29 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r28, <4 x i32> %29)
+  %r30 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r29, <4 x i32> %30)
+  %r31 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r30, <4 x i32> %31)
+  %r32 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r31, <4 x i32> %32)
+  %r33 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r32, <4 x i32> %33)
+  %rx1 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r33, <4 x i32> %1)
+  %rx2 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx1, <4 x i32> %2)
+  %rx3 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx2, <4 x i32> %3)
+  %rx4 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx3, <4 x i32> %4)
+  %rx5 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx4, <4 x i32> %5)
+  %rx6 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx5, <4 x i32> %6)
+  %rx7 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx6, <4 x i32> %7)
+  %rx8 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx7, <4 x i32> %8)
+  %rx9 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx8, <4 x i32> %9)
+  %rx10 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx9, <4 x i32> %10)
+  %rx11 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx10, <4 x i32> %11)
+  %rx12 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx11, <4 x i32> %12)
+  %rx13 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx12, <4 x i32> %13)
+  %rx14 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx13, <4 x i32> %14)
+  %rx15 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx14, <4 x i32> %15)
+  %rx16 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx15, <4 x i32> %16)
+  %rx17 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx16, <4 x i32> %17)
+  %rx18 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx17, <4 x i32> %18)
+  %rx19 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx18, <4 x i32> %19)
+  %rx20 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx19, <4 x i32> %20)
+  %rx21 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx20, <4 x i32> %21)
+  %rx22 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx21, <4 x i32> %22)
+  %rx23 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx22, <4 x i32> %23)
+  %rx24 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx23, <4 x i32> %24)
+  %rx25 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx24, <4 x i32> %25)
+  %rx26 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx25, <4 x i32> %26)
+  %rx27 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx26, <4 x i32> %27)
+  %rx28 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx27, <4 x i32> %28)
+  %rx29 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx28, <4 x i32> %29)
+  %rx30 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx29, <4 x i32> %30)
+  %rx31 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx30, <4 x i32> %31)
+  %rx32 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx31, <4 x i32> %32)
+  %rx33 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx32, <4 x i32> %33)
+  %res = call i32 @llvm.mips.copy.s.w(<4 x i32> %rx33, i32 0)
+  ret i32 %res
+}
+
+declare <4 x i32> @llvm.mips.addv.w(<4 x i32>, <4 x i32>) nounwind
+declare i32       @llvm.mips.copy.s.w(<4 x i32>, i32) nounwind
+
+; CHECK: test_i32:
+; CHECK: st.w {{.*}} Spill
+; CHECK: st.w {{.*}} Spill
+; CHECK: ld.w {{.*}} Reload
+; CHECK: ld.w {{.*}} Reload
+; CHECK: .size
+
+define i32 @test_i64(<2 x i64>* %p0, <2 x i64>* %q1) nounwind {
+entry:
+  %p1  = getelementptr <2 x i64>* %p0, i32 1
+  %p2  = getelementptr <2 x i64>* %p0, i32 2
+  %p3  = getelementptr <2 x i64>* %p0, i32 3
+  %p4  = getelementptr <2 x i64>* %p0, i32 4
+  %p5  = getelementptr <2 x i64>* %p0, i32 5
+  %p6  = getelementptr <2 x i64>* %p0, i32 6
+  %p7  = getelementptr <2 x i64>* %p0, i32 7
+  %p8  = getelementptr <2 x i64>* %p0, i32 8
+  %p9  = getelementptr <2 x i64>* %p0, i32 9
+  %p10 = getelementptr <2 x i64>* %p0, i32 10
+  %p11 = getelementptr <2 x i64>* %p0, i32 11
+  %p12 = getelementptr <2 x i64>* %p0, i32 12
+  %p13 = getelementptr <2 x i64>* %p0, i32 13
+  %p14 = getelementptr <2 x i64>* %p0, i32 14
+  %p15 = getelementptr <2 x i64>* %p0, i32 15
+  %p16 = getelementptr <2 x i64>* %p0, i32 16
+  %p17 = getelementptr <2 x i64>* %p0, i32 17
+  %p18 = getelementptr <2 x i64>* %p0, i32 18
+  %p19 = getelementptr <2 x i64>* %p0, i32 19
+  %p20 = getelementptr <2 x i64>* %p0, i32 20
+  %p21 = getelementptr <2 x i64>* %p0, i32 21
+  %p22 = getelementptr <2 x i64>* %p0, i32 22
+  %p23 = getelementptr <2 x i64>* %p0, i32 23
+  %p24 = getelementptr <2 x i64>* %p0, i32 24
+  %p25 = getelementptr <2 x i64>* %p0, i32 25
+  %p26 = getelementptr <2 x i64>* %p0, i32 26
+  %p27 = getelementptr <2 x i64>* %p0, i32 27
+  %p28 = getelementptr <2 x i64>* %p0, i32 28
+  %p29 = getelementptr <2 x i64>* %p0, i32 29
+  %p30 = getelementptr <2 x i64>* %p0, i32 30
+  %p31 = getelementptr <2 x i64>* %p0, i32 31
+  %p32 = getelementptr <2 x i64>* %p0, i32 32
+  %p33 = getelementptr <2 x i64>* %p0, i32 33
+  %0  = load <2 x i64>* %p0, align 16
+  %1  = load <2 x i64>* %p1, align 16
+  %2  = load <2 x i64>* %p2, align 16
+  %3  = load <2 x i64>* %p3, align 16
+  %4  = load <2 x i64>* %p4, align 16
+  %5  = load <2 x i64>* %p5, align 16
+  %6  = load <2 x i64>* %p6, align 16
+  %7  = load <2 x i64>* %p7, align 16
+  %8  = load <2 x i64>* %p8, align 16
+  %9  = load <2 x i64>* %p9, align 16
+  %10 = load <2 x i64>* %p10, align 16
+  %11 = load <2 x i64>* %p11, align 16
+  %12 = load <2 x i64>* %p12, align 16
+  %13 = load <2 x i64>* %p13, align 16
+  %14 = load <2 x i64>* %p14, align 16
+  %15 = load <2 x i64>* %p15, align 16
+  %16 = load <2 x i64>* %p16, align 16
+  %17 = load <2 x i64>* %p17, align 16
+  %18 = load <2 x i64>* %p18, align 16
+  %19 = load <2 x i64>* %p19, align 16
+  %20 = load <2 x i64>* %p20, align 16
+  %21 = load <2 x i64>* %p21, align 16
+  %22 = load <2 x i64>* %p22, align 16
+  %23 = load <2 x i64>* %p23, align 16
+  %24 = load <2 x i64>* %p24, align 16
+  %25 = load <2 x i64>* %p25, align 16
+  %26 = load <2 x i64>* %p26, align 16
+  %27 = load <2 x i64>* %p27, align 16
+  %28 = load <2 x i64>* %p28, align 16
+  %29 = load <2 x i64>* %p29, align 16
+  %30 = load <2 x i64>* %p30, align 16
+  %31 = load <2 x i64>* %p31, align 16
+  %32 = load <2 x i64>* %p32, align 16
+  %33 = load <2 x i64>* %p33, align 16
+  %r1  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0,   <2 x i64> %1)
+  %r2  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r1,  <2 x i64> %2)
+  %r3  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r2,  <2 x i64> %3)
+  %r4  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r3,  <2 x i64> %4)
+  %r5  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r4,  <2 x i64> %5)
+  %r6  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r5,  <2 x i64> %6)
+  %r7  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r6,  <2 x i64> %7)
+  %r8  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r7,  <2 x i64> %8)
+  %r9  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r8,  <2 x i64> %9)
+  %r10 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r9,  <2 x i64> %10)
+  %r11 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r10, <2 x i64> %11)
+  %r12 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r11, <2 x i64> %12)
+  %r13 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r12, <2 x i64> %13)
+  %r14 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r13, <2 x i64> %14)
+  %r15 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r14, <2 x i64> %15)
+  %r16 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r15, <2 x i64> %16)
+  %r17 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r16, <2 x i64> %17)
+  %r18 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r17, <2 x i64> %18)
+  %r19 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r18, <2 x i64> %19)
+  %r20 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r19, <2 x i64> %20)
+  %r21 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r20, <2 x i64> %21)
+  %r22 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r21, <2 x i64> %22)
+  %r23 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r22, <2 x i64> %23)
+  %r24 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r23, <2 x i64> %24)
+  %r25 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r24, <2 x i64> %25)
+  %r26 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r25, <2 x i64> %26)
+  %r27 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r26, <2 x i64> %27)
+  %r28 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r27, <2 x i64> %28)
+  %r29 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r28, <2 x i64> %29)
+  %r30 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r29, <2 x i64> %30)
+  %r31 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r30, <2 x i64> %31)
+  %r32 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r31, <2 x i64> %32)
+  %r33 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r32, <2 x i64> %33)
+  %rx1  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r33,  <2 x i64> %1)
+  %rx2  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx1,  <2 x i64> %2)
+  %rx3  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx2,  <2 x i64> %3)
+  %rx4  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx3,  <2 x i64> %4)
+  %rx5  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx4,  <2 x i64> %5)
+  %rx6  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx5,  <2 x i64> %6)
+  %rx7  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx6,  <2 x i64> %7)
+  %rx8  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx7,  <2 x i64> %8)
+  %rx9  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx8,  <2 x i64> %9)
+  %rx10 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx9,  <2 x i64> %10)
+  %rx11 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx10, <2 x i64> %11)
+  %rx12 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx11, <2 x i64> %12)
+  %rx13 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx12, <2 x i64> %13)
+  %rx14 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx13, <2 x i64> %14)
+  %rx15 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx14, <2 x i64> %15)
+  %rx16 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx15, <2 x i64> %16)
+  %rx17 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx16, <2 x i64> %17)
+  %rx18 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx17, <2 x i64> %18)
+  %rx19 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx18, <2 x i64> %19)
+  %rx20 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx19, <2 x i64> %20)
+  %rx21 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx20, <2 x i64> %21)
+  %rx22 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx21, <2 x i64> %22)
+  %rx23 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx22, <2 x i64> %23)
+  %rx24 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx23, <2 x i64> %24)
+  %rx25 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx24, <2 x i64> %25)
+  %rx26 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx25, <2 x i64> %26)
+  %rx27 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx26, <2 x i64> %27)
+  %rx28 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx27, <2 x i64> %28)
+  %rx29 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx28, <2 x i64> %29)
+  %rx30 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx29, <2 x i64> %30)
+  %rx31 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx30, <2 x i64> %31)
+  %rx32 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx31, <2 x i64> %32)
+  %rx33 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx32, <2 x i64> %33)
+  %res1 = bitcast <2 x i64> %rx33 to <4 x i32>
+  %res = call i32 @llvm.mips.copy.s.w(<4 x i32> %res1, i32 0)
+  ret i32 %res
+}
+
+declare <2 x i64> @llvm.mips.addv.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: test_i64:
+; CHECK: st.d {{.*}} Spill
+; CHECK: st.d {{.*}} Spill
+; CHECK: ld.d {{.*}} Reload
+; CHECK: ld.d {{.*}} Reload
+; CHECK: .size
diff --git a/test/CodeGen/Mips/msa/vec.ll b/test/CodeGen/Mips/msa/vec.ll
new file mode 100644
index 0000000..5bddf5a
--- /dev/null
+++ b/test/CodeGen/Mips/msa/vec.ll
@@ -0,0 +1,946 @@
+; Test the MSA intrinsics that are encoded with the VEC instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ANYENDIAN %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ANYENDIAN %s
+
+@llvm_mips_and_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_and_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_and_v_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_and_v_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_and_v_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_and_v_b_ARG2
+  %2 = bitcast <16 x i8> %0 to <16 x i8>
+  %3 = bitcast <16 x i8> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.and.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <16 x i8>
+  store <16 x i8> %5, <16 x i8>* @llvm_mips_and_v_b_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_and_v_b_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: and.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_and_v_b_test
+;
+@llvm_mips_and_v_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_and_v_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_and_v_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_and_v_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_and_v_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_and_v_h_ARG2
+  %2 = bitcast <8 x i16> %0 to <16 x i8>
+  %3 = bitcast <8 x i16> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.and.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <8 x i16>
+  store <8 x i16> %5, <8 x i16>* @llvm_mips_and_v_h_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_and_v_h_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: and.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_and_v_h_test
+;
+@llvm_mips_and_v_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_and_v_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_and_v_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_and_v_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_and_v_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_and_v_w_ARG2
+  %2 = bitcast <4 x i32> %0 to <16 x i8>
+  %3 = bitcast <4 x i32> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.and.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <4 x i32>
+  store <4 x i32> %5, <4 x i32>* @llvm_mips_and_v_w_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_and_v_w_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: and.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_and_v_w_test
+;
+@llvm_mips_and_v_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_and_v_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_and_v_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_and_v_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_and_v_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_and_v_d_ARG2
+  %2 = bitcast <2 x i64> %0 to <16 x i8>
+  %3 = bitcast <2 x i64> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.and.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <2 x i64>
+  store <2 x i64> %5, <2 x i64>* @llvm_mips_and_v_d_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_and_v_d_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: and.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_and_v_d_test
+;
+define void @and_v_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_and_v_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_and_v_b_ARG2
+  %2 = and <16 x i8> %0, %1
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_and_v_b_RES
+  ret void
+}
+
+; CHECK: and_v_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: and.v
+; CHECK: st.b
+; CHECK: .size and_v_b_test
+;
+define void @and_v_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_and_v_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_and_v_h_ARG2
+  %2 = and <8 x i16> %0, %1
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_and_v_h_RES
+  ret void
+}
+
+; CHECK: and_v_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: and.v
+; CHECK: st.h
+; CHECK: .size and_v_h_test
+;
+
+define void @and_v_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_and_v_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_and_v_w_ARG2
+  %2 = and <4 x i32> %0, %1
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_and_v_w_RES
+  ret void
+}
+
+; CHECK: and_v_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: and.v
+; CHECK: st.w
+; CHECK: .size and_v_w_test
+;
+
+define void @and_v_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_and_v_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_and_v_d_ARG2
+  %2 = and <2 x i64> %0, %1
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_and_v_d_RES
+  ret void
+}
+
+; CHECK: and_v_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: and.v
+; CHECK: st.d
+; CHECK: .size and_v_d_test
+;
+@llvm_mips_bmnz_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bmnz_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_bmnz_v_b_ARG3 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bmnz_v_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bmnz_v_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bmnz_v_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_bmnz_v_b_ARG2
+  %2 = load <16 x i8>* @llvm_mips_bmnz_v_b_ARG3
+  %3 = bitcast <16 x i8> %0 to <16 x i8>
+  %4 = bitcast <16 x i8> %1 to <16 x i8>
+  %5 = bitcast <16 x i8> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <16 x i8>
+  store <16 x i8> %7, <16 x i8>* @llvm_mips_bmnz_v_b_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bmnz_v_b_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmnz_v_b_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmnz_v_b_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmnz_v_b_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; ANYENDIAN-DAG: bmnz.v [[R4]], [[R5]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R4]], 0(
+; ANYENDIAN: .size llvm_mips_bmnz_v_b_test
+
+@llvm_mips_bmnz_v_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bmnz_v_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_bmnz_v_h_ARG3 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bmnz_v_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_bmnz_v_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_bmnz_v_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_bmnz_v_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_bmnz_v_h_ARG3
+  %3 = bitcast <8 x i16> %0 to <16 x i8>
+  %4 = bitcast <8 x i16> %1 to <16 x i8>
+  %5 = bitcast <8 x i16> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <8 x i16>
+  store <8 x i16> %7, <8 x i16>* @llvm_mips_bmnz_v_h_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bmnz_v_h_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmnz_v_h_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmnz_v_h_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmnz_v_h_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; ANYENDIAN-DAG: bmnz.v [[R4]], [[R5]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R4]], 0(
+; ANYENDIAN: .size llvm_mips_bmnz_v_h_test
+
+@llvm_mips_bmnz_v_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bmnz_v_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_bmnz_v_w_ARG3 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bmnz_v_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_bmnz_v_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_bmnz_v_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_bmnz_v_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_bmnz_v_w_ARG3
+  %3 = bitcast <4 x i32> %0 to <16 x i8>
+  %4 = bitcast <4 x i32> %1 to <16 x i8>
+  %5 = bitcast <4 x i32> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <4 x i32>
+  store <4 x i32> %7, <4 x i32>* @llvm_mips_bmnz_v_w_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bmnz_v_w_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmnz_v_w_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmnz_v_w_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmnz_v_w_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; ANYENDIAN-DAG: bmnz.v [[R4]], [[R5]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R4]], 0(
+; ANYENDIAN: .size llvm_mips_bmnz_v_w_test
+
+@llvm_mips_bmnz_v_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bmnz_v_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_bmnz_v_d_ARG3 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bmnz_v_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_bmnz_v_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_bmnz_v_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_bmnz_v_d_ARG2
+  %2 = load <2 x i64>* @llvm_mips_bmnz_v_d_ARG3
+  %3 = bitcast <2 x i64> %0 to <16 x i8>
+  %4 = bitcast <2 x i64> %1 to <16 x i8>
+  %5 = bitcast <2 x i64> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <2 x i64>
+  store <2 x i64> %7, <2 x i64>* @llvm_mips_bmnz_v_d_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bmnz_v_d_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmnz_v_d_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmnz_v_d_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmnz_v_d_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; ANYENDIAN-DAG: bmnz.v [[R4]], [[R5]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R4]], 0(
+; ANYENDIAN: .size llvm_mips_bmnz_v_d_test
+
+@llvm_mips_bmz_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bmz_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_bmz_v_b_ARG3 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bmz_v_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bmz_v_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bmz_v_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_bmz_v_b_ARG2
+  %2 = load <16 x i8>* @llvm_mips_bmz_v_b_ARG3
+  %3 = bitcast <16 x i8> %0 to <16 x i8>
+  %4 = bitcast <16 x i8> %1 to <16 x i8>
+  %5 = bitcast <16 x i8> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <16 x i8>
+  store <16 x i8> %7, <16 x i8>* @llvm_mips_bmz_v_b_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bmz_v_b_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmz_v_b_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmz_v_b_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmz_v_b_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bmz.v with ws and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R5]], [[R4]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R5]], 0(
+; ANYENDIAN: .size llvm_mips_bmz_v_b_test
+
+@llvm_mips_bmz_v_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bmz_v_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_bmz_v_h_ARG3 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bmz_v_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_bmz_v_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_bmz_v_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_bmz_v_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_bmz_v_h_ARG3
+  %3 = bitcast <8 x i16> %0 to <16 x i8>
+  %4 = bitcast <8 x i16> %1 to <16 x i8>
+  %5 = bitcast <8 x i16> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <8 x i16>
+  store <8 x i16> %7, <8 x i16>* @llvm_mips_bmz_v_h_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bmz_v_h_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmz_v_h_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmz_v_h_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmz_v_h_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bmz.v with ws and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R5]], [[R4]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R5]], 0(
+; ANYENDIAN: .size llvm_mips_bmz_v_h_test
+
+@llvm_mips_bmz_v_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bmz_v_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_bmz_v_w_ARG3 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bmz_v_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_bmz_v_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_bmz_v_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_bmz_v_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_bmz_v_w_ARG3
+  %3 = bitcast <4 x i32> %0 to <16 x i8>
+  %4 = bitcast <4 x i32> %1 to <16 x i8>
+  %5 = bitcast <4 x i32> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <4 x i32>
+  store <4 x i32> %7, <4 x i32>* @llvm_mips_bmz_v_w_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bmz_v_w_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmz_v_w_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmz_v_w_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmz_v_w_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bmz.v with ws and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R5]], [[R4]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R5]], 0(
+; ANYENDIAN: .size llvm_mips_bmz_v_w_test
+
+@llvm_mips_bmz_v_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bmz_v_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_bmz_v_d_ARG3 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bmz_v_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_bmz_v_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_bmz_v_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_bmz_v_d_ARG2
+  %2 = load <2 x i64>* @llvm_mips_bmz_v_d_ARG3
+  %3 = bitcast <2 x i64> %0 to <16 x i8>
+  %4 = bitcast <2 x i64> %1 to <16 x i8>
+  %5 = bitcast <2 x i64> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <2 x i64>
+  store <2 x i64> %7, <2 x i64>* @llvm_mips_bmz_v_d_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bmz_v_d_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmz_v_d_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmz_v_d_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmz_v_d_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bmz.v with ws and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R5]], [[R4]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R5]], 0(
+; ANYENDIAN: .size llvm_mips_bmz_v_d_test
+
+@llvm_mips_bsel_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bsel_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_bsel_v_b_ARG3 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bsel_v_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bsel_v_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bsel_v_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_bsel_v_b_ARG2
+  %2 = load <16 x i8>* @llvm_mips_bsel_v_b_ARG3
+  %3 = bitcast <16 x i8> %0 to <16 x i8>
+  %4 = bitcast <16 x i8> %1 to <16 x i8>
+  %5 = bitcast <16 x i8> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <16 x i8>
+  store <16 x i8> %7, <16 x i8>* @llvm_mips_bsel_v_b_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bsel_v_b_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bsel_v_b_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bsel_v_b_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bsel_v_b_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bsel.v with wt and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R6]], [[R5]], [[R4]]
+; ANYENDIAN-DAG: st.b [[R6]], 0(
+; ANYENDIAN: .size llvm_mips_bsel_v_b_test
+
+@llvm_mips_bsel_v_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bsel_v_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_bsel_v_h_ARG3 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bsel_v_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_bsel_v_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_bsel_v_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_bsel_v_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_bsel_v_h_ARG3
+  %3 = bitcast <8 x i16> %0 to <16 x i8>
+  %4 = bitcast <8 x i16> %1 to <16 x i8>
+  %5 = bitcast <8 x i16> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <8 x i16>
+  store <8 x i16> %7, <8 x i16>* @llvm_mips_bsel_v_h_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bsel_v_h_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bsel_v_h_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bsel_v_h_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bsel_v_h_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bsel.v with wt and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R6]], [[R5]], [[R4]]
+; ANYENDIAN-DAG: st.b [[R6]], 0(
+; ANYENDIAN: .size llvm_mips_bsel_v_h_test
+
+@llvm_mips_bsel_v_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bsel_v_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_bsel_v_w_ARG3 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bsel_v_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_bsel_v_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_bsel_v_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_bsel_v_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_bsel_v_w_ARG3
+  %3 = bitcast <4 x i32> %0 to <16 x i8>
+  %4 = bitcast <4 x i32> %1 to <16 x i8>
+  %5 = bitcast <4 x i32> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <4 x i32>
+  store <4 x i32> %7, <4 x i32>* @llvm_mips_bsel_v_w_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bsel_v_w_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bsel_v_w_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bsel_v_w_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bsel_v_w_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bsel.v with wt and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R6]], [[R5]], [[R4]]
+; ANYENDIAN-DAG: st.b [[R6]], 0(
+; ANYENDIAN: .size llvm_mips_bsel_v_w_test
+
+@llvm_mips_bsel_v_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bsel_v_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_bsel_v_d_ARG3 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bsel_v_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_bsel_v_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_bsel_v_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_bsel_v_d_ARG2
+  %2 = load <2 x i64>* @llvm_mips_bsel_v_d_ARG3
+  %3 = bitcast <2 x i64> %0 to <16 x i8>
+  %4 = bitcast <2 x i64> %1 to <16 x i8>
+  %5 = bitcast <2 x i64> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <2 x i64>
+  store <2 x i64> %7, <2 x i64>* @llvm_mips_bsel_v_d_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bsel_v_d_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bsel_v_d_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bsel_v_d_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bsel_v_d_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bsel.v with wt and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R6]], [[R5]], [[R4]]
+; ANYENDIAN-DAG: st.b [[R6]], 0(
+; ANYENDIAN: .size llvm_mips_bsel_v_d_test
+
+@llvm_mips_nor_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_nor_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_nor_v_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_nor_v_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_nor_v_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_nor_v_b_ARG2
+  %2 = bitcast <16 x i8> %0 to <16 x i8>
+  %3 = bitcast <16 x i8> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.nor.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <16 x i8>
+  store <16 x i8> %5, <16 x i8>* @llvm_mips_nor_v_b_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_nor_v_b_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: nor.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_nor_v_b_test
+;
+@llvm_mips_nor_v_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_nor_v_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_nor_v_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_nor_v_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_nor_v_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_nor_v_h_ARG2
+  %2 = bitcast <8 x i16> %0 to <16 x i8>
+  %3 = bitcast <8 x i16> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.nor.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <8 x i16>
+  store <8 x i16> %5, <8 x i16>* @llvm_mips_nor_v_h_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_nor_v_h_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: nor.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_nor_v_h_test
+;
+@llvm_mips_nor_v_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_nor_v_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_nor_v_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_nor_v_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_nor_v_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_nor_v_w_ARG2
+  %2 = bitcast <4 x i32> %0 to <16 x i8>
+  %3 = bitcast <4 x i32> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.nor.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <4 x i32>
+  store <4 x i32> %5, <4 x i32>* @llvm_mips_nor_v_w_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_nor_v_w_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: nor.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_nor_v_w_test
+;
+@llvm_mips_nor_v_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_nor_v_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_nor_v_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_nor_v_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_nor_v_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_nor_v_d_ARG2
+  %2 = bitcast <2 x i64> %0 to <16 x i8>
+  %3 = bitcast <2 x i64> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.nor.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <2 x i64>
+  store <2 x i64> %5, <2 x i64>* @llvm_mips_nor_v_d_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_nor_v_d_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: nor.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_nor_v_d_test
+;
+@llvm_mips_or_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_or_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_or_v_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_or_v_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_or_v_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_or_v_b_ARG2
+  %2 = bitcast <16 x i8> %0 to <16 x i8>
+  %3 = bitcast <16 x i8> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.or.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <16 x i8>
+  store <16 x i8> %5, <16 x i8>* @llvm_mips_or_v_b_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_or_v_b_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: or.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_or_v_b_test
+;
+@llvm_mips_or_v_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_or_v_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_or_v_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_or_v_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_or_v_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_or_v_h_ARG2
+  %2 = bitcast <8 x i16> %0 to <16 x i8>
+  %3 = bitcast <8 x i16> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.or.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <8 x i16>
+  store <8 x i16> %5, <8 x i16>* @llvm_mips_or_v_h_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_or_v_h_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: or.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_or_v_h_test
+;
+@llvm_mips_or_v_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_or_v_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_or_v_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_or_v_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_or_v_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_or_v_w_ARG2
+  %2 = bitcast <4 x i32> %0 to <16 x i8>
+  %3 = bitcast <4 x i32> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.or.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <4 x i32>
+  store <4 x i32> %5, <4 x i32>* @llvm_mips_or_v_w_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_or_v_w_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: or.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_or_v_w_test
+;
+@llvm_mips_or_v_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_or_v_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_or_v_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_or_v_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_or_v_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_or_v_d_ARG2
+  %2 = bitcast <2 x i64> %0 to <16 x i8>
+  %3 = bitcast <2 x i64> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.or.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <2 x i64>
+  store <2 x i64> %5, <2 x i64>* @llvm_mips_or_v_d_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_or_v_d_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: or.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_or_v_d_test
+;
+define void @or_v_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_or_v_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_or_v_b_ARG2
+  %2 = or <16 x i8> %0, %1
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_or_v_b_RES
+  ret void
+}
+
+; CHECK: or_v_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: or.v
+; CHECK: st.b
+; CHECK: .size or_v_b_test
+;
+define void @or_v_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_or_v_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_or_v_h_ARG2
+  %2 = or <8 x i16> %0, %1
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_or_v_h_RES
+  ret void
+}
+
+; CHECK: or_v_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: or.v
+; CHECK: st.h
+; CHECK: .size or_v_h_test
+;
+
+define void @or_v_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_or_v_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_or_v_w_ARG2
+  %2 = or <4 x i32> %0, %1
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_or_v_w_RES
+  ret void
+}
+
+; CHECK: or_v_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: or.v
+; CHECK: st.w
+; CHECK: .size or_v_w_test
+;
+
+define void @or_v_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_or_v_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_or_v_d_ARG2
+  %2 = or <2 x i64> %0, %1
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_or_v_d_RES
+  ret void
+}
+
+; CHECK: or_v_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: or.v
+; CHECK: st.d
+; CHECK: .size or_v_d_test
+;
+@llvm_mips_xor_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_xor_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_xor_v_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_xor_v_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_xor_v_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_xor_v_b_ARG2
+  %2 = bitcast <16 x i8> %0 to <16 x i8>
+  %3 = bitcast <16 x i8> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.xor.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <16 x i8>
+  store <16 x i8> %5, <16 x i8>* @llvm_mips_xor_v_b_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_xor_v_b_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: xor.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_xor_v_b_test
+;
+@llvm_mips_xor_v_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_xor_v_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_xor_v_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_xor_v_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_xor_v_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_xor_v_h_ARG2
+  %2 = bitcast <8 x i16> %0 to <16 x i8>
+  %3 = bitcast <8 x i16> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.xor.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <8 x i16>
+  store <8 x i16> %5, <8 x i16>* @llvm_mips_xor_v_h_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_xor_v_h_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: xor.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_xor_v_h_test
+;
+@llvm_mips_xor_v_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_xor_v_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_xor_v_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_xor_v_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_xor_v_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_xor_v_w_ARG2
+  %2 = bitcast <4 x i32> %0 to <16 x i8>
+  %3 = bitcast <4 x i32> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.xor.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <4 x i32>
+  store <4 x i32> %5, <4 x i32>* @llvm_mips_xor_v_w_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_xor_v_w_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: xor.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_xor_v_w_test
+;
+@llvm_mips_xor_v_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_xor_v_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_xor_v_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_xor_v_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_xor_v_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_xor_v_d_ARG2
+  %2 = bitcast <2 x i64> %0 to <16 x i8>
+  %3 = bitcast <2 x i64> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.xor.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <2 x i64>
+  store <2 x i64> %5, <2 x i64>* @llvm_mips_xor_v_d_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_xor_v_d_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: xor.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_xor_v_d_test
+;
+define void @xor_v_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_xor_v_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_xor_v_b_ARG2
+  %2 = xor <16 x i8> %0, %1
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_xor_v_b_RES
+  ret void
+}
+
+; CHECK: xor_v_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: xor.v
+; CHECK: st.b
+; CHECK: .size xor_v_b_test
+;
+define void @xor_v_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_xor_v_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_xor_v_h_ARG2
+  %2 = xor <8 x i16> %0, %1
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_xor_v_h_RES
+  ret void
+}
+
+; CHECK: xor_v_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: xor.v
+; CHECK: st.h
+; CHECK: .size xor_v_h_test
+;
+
+define void @xor_v_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_xor_v_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_xor_v_w_ARG2
+  %2 = xor <4 x i32> %0, %1
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_xor_v_w_RES
+  ret void
+}
+
+; CHECK: xor_v_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: xor.v
+; CHECK: st.w
+; CHECK: .size xor_v_w_test
+;
+
+define void @xor_v_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_xor_v_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_xor_v_d_ARG2
+  %2 = xor <2 x i64> %0, %1
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_xor_v_d_RES
+  ret void
+}
+
+; CHECK: xor_v_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: xor.v
+; CHECK: st.d
+; CHECK: .size xor_v_d_test
+;
+declare <16 x i8> @llvm.mips.and.v(<16 x i8>, <16 x i8>) nounwind
+declare <16 x i8> @llvm.mips.bmnz.v(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
+declare <16 x i8> @llvm.mips.bmz.v(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
+declare <16 x i8> @llvm.mips.bsel.v(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
+declare <16 x i8> @llvm.mips.nor.v(<16 x i8>, <16 x i8>) nounwind
+declare <16 x i8> @llvm.mips.or.v(<16 x i8>, <16 x i8>) nounwind
+declare <16 x i8> @llvm.mips.xor.v(<16 x i8>, <16 x i8>) nounwind
diff --git a/test/CodeGen/Mips/msa/vecs10.ll b/test/CodeGen/Mips/msa/vecs10.ll
new file mode 100644
index 0000000..e22e075
--- /dev/null
+++ b/test/CodeGen/Mips/msa/vecs10.ll
@@ -0,0 +1,47 @@
+; Test the MSA intrinsics that are encoded with the VECS10 instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_bnz_v_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+
+define i32 @llvm_mips_bnz_v_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bnz_v_ARG1
+  %1 = tail call i32 @llvm.mips.bnz.v(<16 x i8> %0)
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %true, label %false
+true:
+  ret i32 2
+false:
+  ret i32 3
+}
+
+declare i32 @llvm.mips.bnz.v(<16 x i8>) nounwind
+
+; CHECK: llvm_mips_bnz_v_test:
+; CHECK-DAG: ld.b [[R0:\$w[0-9]+]]
+; CHECK-DAG: bnz.v [[R0]]
+; CHECK: .size llvm_mips_bnz_v_test
+
+@llvm_mips_bz_v_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+
+define i32 @llvm_mips_bz_v_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bz_v_ARG1
+  %1 = tail call i32 @llvm.mips.bz.v(<16 x i8> %0)
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %true, label %false
+true:
+  ret i32 2
+false:
+  ret i32 3
+}
+
+declare i32 @llvm.mips.bz.v(<16 x i8>) nounwind
+
+; CHECK: llvm_mips_bz_v_test:
+; CHECK-DAG: ld.b [[R0:\$w[0-9]+]]
+; CHECK-DAG: bz.v [[R0]]
+; CHECK: .size llvm_mips_bz_v_test
+;
diff --git a/test/CodeGen/Mips/nomips16.ll b/test/CodeGen/Mips/nomips16.ll
new file mode 100644
index 0000000..bf7c667
--- /dev/null
+++ b/test/CodeGen/Mips/nomips16.ll
@@ -0,0 +1,38 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s 
+
+@x = global float 0.000000e+00, align 4
+@.str = private unnamed_addr constant [20 x i8] c"in main: mips16 %f\0A\00", align 1
+
+; Function Attrs: nounwind
+define void @foo() #0 {
+entry:
+  %0 = load float* @x, align 4
+  %conv = fpext float %0 to double
+  %add = fadd double %conv, 1.500000e+00
+  %conv1 = fptrunc double %add to float
+  store float %conv1, float* @x, align 4
+  ret void
+}
+; CHECK: 	.ent	foo
+; CHECK: 	jal	__mips16_extendsfdf2
+; CHECK:   	.end	foo
+
+; Function Attrs: nounwind
+define void @nofoo() #1 {
+entry:
+  %0 = load float* @x, align 4
+  %conv = fpext float %0 to double
+  %add = fadd double %conv, 3.900000e+00
+  %conv1 = fptrunc double %add to float
+  store float %conv1, float* @x, align 4
+  ret void
+}
+
+; CHECK: 	.ent	nofoo
+; CHECK: 	cvt.d.s	$f{{.+}}, $f{{.+}}
+; CHECK: 	.end	nofoo
+
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+
diff --git a/test/CodeGen/Mips/o32_cc.ll b/test/CodeGen/Mips/o32_cc.ll
index 70b66ef..08e5aab 100644
--- a/test/CodeGen/Mips/o32_cc.ll
+++ b/test/CodeGen/Mips/o32_cc.ll
@@ -1,11 +1,12 @@
-; RUN: llc -march=mips < %s | FileCheck %s
-
-; FIXME: Disabled because it unpredictably fails on certain platforms.
-; REQUIRES: disabled
+; RUN: llc -march=mipsel < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel < %s | FileCheck -check-prefix=FP32EL %s
+; RUN: llc -march=mipsel -mattr=+fp64 < %s | FileCheck -check-prefix=FP64EL %s
 
 ; $f12, $f14
-; CHECK: ldc1 $f12, %lo
-; CHECK: ldc1 $f14, %lo
+; CHECK-LABEL: testlowercall0:
+; CHECK-DAG: ldc1 $f12, %lo
+; CHECK-DAG: ldc1 $f14, %lo
 define void @testlowercall0() nounwind {
 entry:
   tail call void @f0(double 5.000000e+00, double 6.000000e+00) nounwind
@@ -15,8 +16,9 @@ entry:
 declare void @f0(double, double)
 
 ; $f12, $f14
-; CHECK: lwc1 $f12, %lo
-; CHECK: lwc1 $f14, %lo
+; CHECK-LABEL: testlowercall1:
+; CHECK-DAG: lwc1 $f12, %lo
+; CHECK-DAG: lwc1 $f14, %lo
 define void @testlowercall1() nounwind {
 entry:
   tail call void @f1(float 8.000000e+00, float 9.000000e+00) nounwind
@@ -26,8 +28,9 @@ entry:
 declare void @f1(float, float)
 
 ; $f12, $f14
-; CHECK: lwc1 $f12, %lo
-; CHECK: ldc1 $f14, %lo
+; CHECK-LABEL: testlowercall2:
+; CHECK-DAG: lwc1 $f12, %lo
+; CHECK-DAG: ldc1 $f14, %lo
 define void @testlowercall2() nounwind {
 entry:
   tail call void @f2(float 8.000000e+00, double 6.000000e+00) nounwind
@@ -37,8 +40,9 @@ entry:
 declare void @f2(float, double)
 
 ; $f12, $f14
-; CHECK: ldc1 $f12, %lo
-; CHECK: lwc1 $f14, %lo
+; CHECK-LABEL: testlowercall3:
+; CHECK-DAG: ldc1 $f12, %lo
+; CHECK-DAG: lwc1 $f14, %lo
 define void @testlowercall3() nounwind {
 entry:
   tail call void @f3(double 5.000000e+00, float 9.000000e+00) nounwind
@@ -48,10 +52,11 @@ entry:
 declare void @f3(double, float)
 
 ; $4, $5, $6, $7
-; CHECK: addiu $4, $zero, 12
-; CHECK: addiu $5, $zero, 13
-; CHECK: addiu $6, $zero, 14
-; CHECK: addiu $7, $zero, 15
+; CHECK-LABEL: testlowercall4:
+; CHECK-DAG: addiu $4, $zero, 12
+; CHECK-DAG: addiu $5, $zero, 13
+; CHECK-DAG: addiu $6, $zero, 14
+; CHECK-DAG: addiu $7, $zero, 15
 define void @testlowercall4() nounwind {
 entry:
   tail call void @f4(i32 12, i32 13, i32 14, i32 15) nounwind
@@ -61,10 +66,11 @@ entry:
 declare void @f4(i32, i32, i32, i32)
 
 ; $f12, $6, stack
-; CHECK: sw
-; CHECK: sw
-; CHECK: ldc1 $f12, %lo
-; CHECK: addiu $6, $zero, 23
+; CHECK-LABEL: testlowercall5:
+; CHECK-DAG: ldc1 $f12, %lo
+; CHECK-DAG: addiu $6, $zero, 23
+; CHECK-DAG: sw ${{[a-z0-9]+}}, 16($sp)
+; CHECK-DAG: sw ${{[a-z0-9]+}}, 20($sp)
 define void @testlowercall5() nounwind {
 entry:
   tail call void @f5(double 1.500000e+01, i32 23, double 1.700000e+01) nounwind
@@ -74,9 +80,10 @@ entry:
 declare void @f5(double, i32, double)
 
 ; $f12, $6, $7
-; CHECK: ldc1 $f12, %lo
-; CHECK: addiu $6, $zero, 33
-; CHECK: addiu $7, $zero, 24
+; CHECK-LABEL: testlowercall6:
+; CHECK-DAG: ldc1 $f12, %lo
+; CHECK-DAG: addiu $6, $zero, 33
+; CHECK-DAG: addiu $7, $zero, 24
 define void @testlowercall6() nounwind {
 entry:
   tail call void @f6(double 2.500000e+01, i32 33, i32 24) nounwind
@@ -86,9 +93,10 @@ entry:
 declare void @f6(double, i32, i32)
 
 ; $f12, $5, $6
-; CHECK: lwc1 $f12, %lo
-; CHECK: addiu $5, $zero, 43
-; CHECK: addiu $6, $zero, 34
+; CHECK-LABEL: testlowercall7:
+; CHECK-DAG: lwc1 $f12, %lo
+; CHECK-DAG: addiu $5, $zero, 43
+; CHECK-DAG: addiu $6, $zero, 34
 define void @testlowercall7() nounwind {
 entry:
   tail call void @f7(float 1.800000e+01, i32 43, i32 34) nounwind
@@ -98,11 +106,12 @@ entry:
 declare void @f7(float, i32, i32)
 
 ; $4, $5, $6, stack
-; CHECK: sw
-; CHECK: sw
-; CHECK: addiu $4, $zero, 22
-; CHECK: addiu $5, $zero, 53
-; CHECK: addiu $6, $zero, 44
+; CHECK-LABEL: testlowercall8:
+; CHECK-DAG: addiu $4, $zero, 22
+; CHECK-DAG: addiu $5, $zero, 53
+; CHECK-DAG: addiu $6, $zero, 44
+; CHECK-DAG: sw ${{[a-z0-9]+}}, 16($sp)
+; CHECK-DAG: sw ${{[a-z0-9]+}}, 20($sp)
 define void @testlowercall8() nounwind {
 entry:
   tail call void @f8(i32 22, i32 53, i32 44, double 4.000000e+00) nounwind
@@ -112,10 +121,11 @@ entry:
 declare void @f8(i32, i32, i32, double)
 
 ; $4, $5, $6, $7
-; CHECK: addiu $4, $zero, 32
-; CHECK: addiu $5, $zero, 63
-; CHECK: addiu $6, $zero, 54
-; CHECK: ori $7
+; CHECK-LABEL: testlowercall9:
+; CHECK-DAG: addiu $4, $zero, 32
+; CHECK-DAG: addiu $5, $zero, 63
+; CHECK-DAG: addiu $6, $zero, 54
+; CHECK-DAG: lui $7, 16688
 define void @testlowercall9() nounwind {
 entry:
   tail call void @f9(i32 32, i32 63, i32 54, float 1.100000e+01) nounwind
@@ -125,10 +135,15 @@ entry:
 declare void @f9(i32, i32, i32, float)
 
 ; $4, $5, ($6, $7)
-; CHECK: addiu $4, $zero, 42
-; CHECK: addiu $5, $zero, 73
-; CHECK: addiu $6, $zero, 0
-; CHECK: ori $7
+; CHECK-LABEL: testlowercall10:
+; CHECK-DAG: addiu $4, $zero, 42
+; CHECK-DAG: addiu $5, $zero, 73
+; FP32EL-LABEL: testlowercall10:
+; FP32EL-DAG: mfc1 $6, $f{{[0-9]+}}
+; FP32EL-DAG: mfc1 $7, $f{{[0-9]+}}
+; FP64EL-LABEL: testlowercall10:
+; FP64EL-DAG: mfc1 $6, $f{{[0-9]+}}
+; FP64EL-DAG: mfhc1 $7, $f{{[0-9]+}}
 define void @testlowercall10() nounwind {
 entry:
   tail call void @f10(i32 42, i32 73, double 2.700000e+01) nounwind
@@ -138,9 +153,14 @@ entry:
 declare void @f10(i32, i32, double)
 
 ; $4, ($6, $7)
-; CHECK: addiu $4, $zero, 52
-; CHECK: addiu $6, $zero, 0
-; CHECK: ori $7
+; CHECK-LABEL: testlowercall11:
+; CHECK-DAG: addiu $4, $zero, 52
+; FP32EL-LABEL: testlowercall11:
+; FP32EL-DAG: mfc1 $6, $f{{[0-9]+}}
+; FP32EL-DAG: mfc1 $7, $f{{[0-9]+}}
+; FP64EL-LABEL: testlowercall11:
+; FP64EL-DAG: mfc1 $6, $f{{[0-9]+}}
+; FP64EL-DAG: mfhc1 $7, $f{{[0-9]+}}
 define void @testlowercall11() nounwind {
 entry:
   tail call void @f11(i32 52, double 1.600000e+01) nounwind
@@ -150,10 +170,11 @@ entry:
 declare void @f11(i32, double)
 
 ; $f12, $f14, $6, $7
-; CHECK: lwc1 $f12, %lo
-; CHECK: lwc1 $f14, %lo
-; CHECK: ori $6
-; CHECK: ori $7
+; CHECK-LABEL: testlowercall12:
+; CHECK-DAG: lwc1 $f12, %lo
+; CHECK-DAG: lwc1 $f14, %lo
+; CHECK-DAG: lui $6, 16672
+; CHECK-DAG: lui $7, 16808
 define void @testlowercall12() nounwind {
 entry:
   tail call void @f12(float 2.800000e+01, float 1.900000e+01, float 1.000000e+01, float 2.100000e+01) nounwind
@@ -163,10 +184,11 @@ entry:
 declare void @f12(float, float, float, float)
 
 ; $f12, $5, $6, $7
-; CHECK: lwc1 $f12, %lo
-; CHECK: addiu $5, $zero, 83
-; CHECK: ori $6
-; CHECK: addiu $7, $zero, 25
+; CHECK-LABEL: testlowercall13:
+; CHECK-DAG: lwc1 $f12, %lo
+; CHECK-DAG: addiu $5, $zero, 83
+; CHECK-DAG: lui $6, 16800
+; CHECK-DAG: addiu $7, $zero, 25
 define void @testlowercall13() nounwind {
 entry:
   tail call void @f13(float 3.800000e+01, i32 83, float 2.000000e+01, i32 25) nounwind
@@ -177,9 +199,10 @@ entry:
 declare void @f13(float, i32, float, i32)
 
 ; $f12, $f14, $7
-; CHECK: ldc1 $f12, %lo
-; CHECK: lwc1 $f14, %lo
-; CHECK: ori $7
+; CHECK-LABEL: testlowercall14:
+; CHECK-DAG: ldc1 $f12, %lo
+; CHECK-DAG: lwc1 $f14, %lo
+; CHECK-DAG: lui $7, 16880
 define void @testlowercall14() nounwind {
 entry:
   tail call void @f14(double 3.500000e+01, float 2.900000e+01, float 3.000000e+01) nounwind
@@ -189,10 +212,15 @@ entry:
 declare void @f14(double, float, float)
 
 ; $f12, $f14, ($6, $7)
-; CHECK: lwc1 $f12, %lo
-; CHECK: lwc1 $f14, %lo
-; CHECK: addiu $6, $zero, 0
-; CHECK: ori $7
+; CHECK-LABEL: testlowercall15:
+; CHECK-DAG: lwc1 $f12, %lo
+; CHECK-DAG: lwc1 $f14, %lo
+; FP32EL-LABEL: testlowercall15:
+; FP32EL-DAG: mfc1 $6, $f{{[0-9]+}}
+; FP32EL-DAG: mfc1 $7, $f{{[0-9]+}}
+; FP64EL-LABEL: testlowercall15:
+; FP64EL-DAG: mfc1 $6, $f{{[0-9]+}}
+; FP64EL-DAG: mfhc1 $7, $f{{[0-9]+}}
 define void @testlowercall15() nounwind {
 entry:
   tail call void @f15(float 4.800000e+01, float 3.900000e+01, double 3.700000e+01) nounwind
@@ -202,10 +230,11 @@ entry:
 declare void @f15(float, float, double)
 
 ; $4, $5, $6, $7
-; CHECK: addiu $4, $zero, 62
-; CHECK: ori $5
-; CHECK: addiu $6, $zero, 64
-; CHECK: ori $7
+; CHECK-LABEL: testlowercall16:
+; CHECK-DAG: addiu $4, $zero, 62
+; CHECK-DAG: lui $5, 16964
+; CHECK-DAG: addiu $6, $zero, 64
+; CHECK-DAG: lui $7, 16888
 define void @testlowercall16() nounwind {
 entry:
   tail call void @f16(i32 62, float 4.900000e+01, i32 64, float 3.100000e+01) nounwind
@@ -215,10 +244,11 @@ entry:
 declare void @f16(i32, float, i32, float)
 
 ; $4, $5, $6, $7
-; CHECK: addiu $4, $zero, 72
-; CHECK: ori $5
-; CHECK: addiu $6, $zero, 74
-; CHECK: addiu $7, $zero, 35
+; CHECK-LABEL: testlowercall17:
+; CHECK-DAG: addiu $4, $zero, 72
+; CHECK-DAG: lui $5, 17004
+; CHECK-DAG: addiu $6, $zero, 74
+; CHECK-DAG: addiu $7, $zero, 35
 define void @testlowercall17() nounwind {
 entry:
   tail call void @f17(i32 72, float 5.900000e+01, i32 74, i32 35) nounwind
@@ -228,10 +258,11 @@ entry:
 declare void @f17(i32, float, i32, i32)
 
 ; $4, $5, $6, $7
-; CHECK: addiu $4, $zero, 82
-; CHECK: addiu $5, $zero, 93
-; CHECK: ori $6
-; CHECK: addiu $7, $zero, 45
+; CHECK-LABEL: testlowercall18:
+; CHECK-DAG: addiu $4, $zero, 82
+; CHECK-DAG: addiu $5, $zero, 93
+; CHECK-DAG: lui $6, 16928
+; CHECK-DAG: addiu $7, $zero, 45
 define void @testlowercall18() nounwind {
 entry:
   tail call void @f18(i32 82, i32 93, float 4.000000e+01, i32 45) nounwind
@@ -242,11 +273,16 @@ declare void @f18(i32, i32, float, i32)
 
 
 ; $4, ($6, $7), stack
-; CHECK: sw
-; CHECK: sw
-; CHECK: addiu $4, $zero, 92
-; CHECK: addiu $6, $zero, 0
-; CHECK: ori $7
+; CHECK-LABEL: testlowercall20:
+; CHECK-DAG: addiu $4, $zero, 92
+; CHECK-DAG: sw ${{[a-z0-9]+}}, 16($sp)
+; CHECK-DAG: sw ${{[a-z0-9]+}}, 20($sp)
+; FP32EL-LABEL: testlowercall20:
+; FP32EL-DAG: mfc1 $6, $f{{[0-9]+}}
+; FP32EL-DAG: mfc1 $7, $f{{[0-9]+}}
+; FP64EL-LABEL: testlowercall20:
+; FP64EL-DAG: mfc1 $6, $f{{[0-9]+}}
+; FP64EL-DAG: mfhc1 $7, $f{{[0-9]+}}
 define void @testlowercall20() nounwind {
 entry:
   tail call void @f20(i32 92, double 2.600000e+01, double 4.700000e+01) nounwind
@@ -256,8 +292,9 @@ entry:
 declare void @f20(i32, double, double)
 
 ; $f12, $5
-; CHECK: lwc1 $f12, %lo
-; CHECK: addiu $5, $zero, 103
+; CHECK-LABEL: testlowercall21:
+; CHECK-DAG: lwc1 $f12, %lo
+; CHECK-DAG: addiu $5, $zero, 103
 define void @testlowercall21() nounwind {
 entry:
   tail call void @f21(float 5.800000e+01, i32 103) nounwind
@@ -267,10 +304,15 @@ entry:
 declare void @f21(float, i32)
 
 ; $f12, $5, ($6, $7)
-; CHECK: lwc1 $f12, %lo
-; CHECK: addiu $5, $zero, 113
-; CHECK: addiu $6, $zero, 0
-; CHECK: ori $7
+; CHECK-LABEL: testlowercall22:
+; CHECK-DAG: lwc1 $f12, %lo
+; CHECK-DAG: addiu $5, $zero, 113
+; FP32EL-LABEL: testlowercall22:
+; FP32EL-DAG: mfc1 $6, $f{{[0-9]+}}
+; FP32EL-DAG: mfc1 $7, $f{{[0-9]+}}
+; FP64EL-LABEL: testlowercall22:
+; FP64EL-DAG: mfc1 $6, $f{{[0-9]+}}
+; FP64EL-DAG: mfhc1 $7, $f{{[0-9]+}}
 define void @testlowercall22() nounwind {
 entry:
   tail call void @f22(float 6.800000e+01, i32 113, double 5.700000e+01) nounwind
@@ -280,8 +322,9 @@ entry:
 declare void @f22(float, i32, double)
 
 ; $f12, f6
-; CHECK: ldc1 $f12, %lo
-; CHECK: addiu $6, $zero, 123
+; CHECK-LABEL: testlowercall23:
+; CHECK-DAG: ldc1 $f12, %lo
+; CHECK-DAG: addiu $6, $zero, 123
 define void @testlowercall23() nounwind {
 entry:
   tail call void @f23(double 4.500000e+01, i32 123) nounwind
@@ -291,10 +334,11 @@ entry:
 declare void @f23(double, i32)
 
 ; $f12,$6, stack
-; CHECK: sw
-; CHECK: sw
-; CHECK: ldc1 $f12, %lo
-; CHECK: addiu $6, $zero, 133
+; CHECK-LABEL: testlowercall24:
+; CHECK-DAG: ldc1 $f12, %lo
+; CHECK-DAG: addiu $6, $zero, 133
+; CHECK-DAG: sw ${{[a-z0-9]+}}, 16($sp)
+; CHECK-DAG: sw ${{[a-z0-9]+}}, 20($sp)
 define void @testlowercall24() nounwind {
 entry:
   tail call void @f24(double 5.500000e+01, i32 133, double 6.700000e+01) nounwind
@@ -303,19 +347,19 @@ entry:
 
 declare void @f24(double, i32, double)
 
-; CHECK: lwc1 $f12, %lo
-; lwc1 $f12, %lo
-; CHECK: lwc1 $f14, %lo
-; CHECK: ori $6
-; CHECK: ori $7
-; CHECK: lwc1 $f12, %lo
-; CHECK: addiu $5, $zero, 83
-; CHECK: ori $6
-; CHECK: addiu $7, $zero, 25
-; CHECK: addiu $4, $zero, 82
-; CHECK: addiu $5, $zero, 93
-; CHECK: ori $6
-; CHECK: addiu $7, $zero, 45
+; CHECK-LABEL: testlowercall25:
+; CHECK-DAG: lwc1 $f12, %lo
+; CHECK-DAG: lwc1 $f14, %lo
+; CHECK-DAG: lui $6
+; CHECK-DAG: lui $7
+; CHECK-DAG: lwc1 $f12, %lo
+; CHECK-DAG: addiu $5, $zero, 83
+; CHECK-DAG: lui $6
+; CHECK-DAG: addiu $7, $zero, 25
+; CHECK-DAG: addiu $4, $zero, 82
+; CHECK-DAG: addiu $5, $zero, 93
+; CHECK-DAG: lui $6
+; CHECK-DAG: addiu $7, $zero, 45
 define void @testlowercall25() nounwind {
 entry:
   tail call void @f12(float 2.800000e+01, float 1.900000e+01, float 1.000000e+01, float 2.100000e+01) nounwind
diff --git a/test/CodeGen/Mips/o32_cc_byval.ll b/test/CodeGen/Mips/o32_cc_byval.ll
index 0a8f85f..5db47ac 100644
--- a/test/CodeGen/Mips/o32_cc_byval.ll
+++ b/test/CodeGen/Mips/o32_cc_byval.ll
@@ -10,22 +10,23 @@
 
 define void @f1() nounwind {
 entry:
-; CHECK: lw  $[[R1:[0-9]+]], %got(f1.s1)
-; CHECK: addiu $[[R0:[0-9]+]], $[[R1]], %lo(f1.s1)
-; CHECK: lw  $[[R7:[0-9]+]], 12($[[R0]])
-; CHECK: lw  $[[R3:[0-9]+]], 16($[[R0]])
-; CHECK: lw  $[[R4:[0-9]+]], 20($[[R0]])
-; CHECK: lw  $[[R5:[0-9]+]], 24($[[R0]])
-; CHECK: lw  $[[R6:[0-9]+]], 28($[[R0]])
-; CHECK: sw  $[[R6]], 36($sp)
-; CHECK: sw  $[[R5]], 32($sp)
-; CHECK: sw  $[[R4]], 28($sp)
-; CHECK: sw  $[[R3]], 24($sp)
-; CHECK: sw  $[[R7]], 20($sp)
-; CHECK: lw  $[[R2:[0-9]+]], 8($[[R0]])
-; CHECK: sw  $[[R2]], 16($sp)
-; CHECK: lw  $6, %lo(f1.s1)($[[R1]])
-; CHECK: lw  $7, 4($[[R0]])
+; CHECK-LABEL: f1:
+; CHECK-DAG: lw  $[[R1:[0-9]+]], %got(f1.s1)
+; CHECK-DAG: addiu $[[R0:[0-9]+]], $[[R1]], %lo(f1.s1)
+; CHECK-DAG: lw  $[[R7:[0-9]+]], 12($[[R0]])
+; CHECK-DAG: lw  $[[R3:[0-9]+]], 16($[[R0]])
+; CHECK-DAG: lw  $[[R4:[0-9]+]], 20($[[R0]])
+; CHECK-DAG: lw  $[[R5:[0-9]+]], 24($[[R0]])
+; CHECK-DAG: lw  $[[R6:[0-9]+]], 28($[[R0]])
+; CHECK-DAG: sw  $[[R6]], 36($sp)
+; CHECK-DAG: sw  $[[R5]], 32($sp)
+; CHECK-DAG: sw  $[[R4]], 28($sp)
+; CHECK-DAG: sw  $[[R3]], 24($sp)
+; CHECK-DAG: sw  $[[R7]], 20($sp)
+; CHECK-DAG: lw  $[[R2:[0-9]+]], 8($[[R0]])
+; CHECK-DAG: sw  $[[R2]], 16($sp)
+; CHECK-DAG: lw  $6, %lo(f1.s1)($[[R1]])
+; CHECK-DAG: lw  $7, 4($[[R0]])
   %agg.tmp10 = alloca %struct.S3, align 4
   call void @callee1(float 2.000000e+01, %struct.S1* byval bitcast (%0* @f1.s1 to %struct.S1*)) nounwind
   call void @callee2(%struct.S2* byval @f1.s2) nounwind
@@ -61,17 +62,17 @@ entry:
 ; CHECK: mfc1 $6, $f[[F0]]
 
   %i2 = getelementptr inbounds %struct.S1* %s1, i32 0, i32 5
-  %tmp = load i32* %i2, align 4, !tbaa !0
+  %tmp = load i32* %i2, align 4
   %d = getelementptr inbounds %struct.S1* %s1, i32 0, i32 4
-  %tmp1 = load double* %d, align 8, !tbaa !3
+  %tmp1 = load double* %d, align 8
   %ll = getelementptr inbounds %struct.S1* %s1, i32 0, i32 3
-  %tmp2 = load i64* %ll, align 8, !tbaa !4
+  %tmp2 = load i64* %ll, align 8
   %i = getelementptr inbounds %struct.S1* %s1, i32 0, i32 2
-  %tmp3 = load i32* %i, align 4, !tbaa !0
+  %tmp3 = load i32* %i, align 4
   %s = getelementptr inbounds %struct.S1* %s1, i32 0, i32 1
-  %tmp4 = load i16* %s, align 2, !tbaa !5
+  %tmp4 = load i16* %s, align 2
   %c = getelementptr inbounds %struct.S1* %s1, i32 0, i32 0
-  %tmp5 = load i8* %c, align 1, !tbaa !1
+  %tmp5 = load i8* %c, align 1
   tail call void @callee4(i32 %tmp, double %tmp1, i64 %tmp2, i32 %tmp3, i16 signext %tmp4, i8 signext %tmp5, float %f) nounwind
   ret void
 }
@@ -90,9 +91,9 @@ entry:
 ; CHECK: sw  $[[R0]], 24($sp)
 
   %arrayidx = getelementptr inbounds %struct.S2* %s2, i32 0, i32 0, i32 0
-  %tmp = load i32* %arrayidx, align 4, !tbaa !0
+  %tmp = load i32* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds %struct.S2* %s2, i32 0, i32 0, i32 3
-  %tmp3 = load i32* %arrayidx2, align 4, !tbaa !0
+  %tmp3 = load i32* %arrayidx2, align 4
   tail call void @callee4(i32 %tmp, double 2.000000e+00, i64 3, i32 %tmp3, i16 signext 4, i8 signext 5, float 6.000000e+00) nounwind
   ret void
 }
@@ -110,11 +111,11 @@ entry:
 ; CHECK: sw  $[[R1]], 24($sp)
 
   %i = getelementptr inbounds %struct.S1* %s1, i32 0, i32 2
-  %tmp = load i32* %i, align 4, !tbaa !0
+  %tmp = load i32* %i, align 4
   %i2 = getelementptr inbounds %struct.S1* %s1, i32 0, i32 5
-  %tmp1 = load i32* %i2, align 4, !tbaa !0
+  %tmp1 = load i32* %i2, align 4
   %c = getelementptr inbounds %struct.S3* %s3, i32 0, i32 0
-  %tmp2 = load i8* %c, align 1, !tbaa !1
+  %tmp2 = load i8* %c, align 1
   tail call void @callee4(i32 %tmp, double 2.000000e+00, i64 3, i32 %tmp1, i16 signext 4, i8 signext %tmp2, float 6.000000e+00) nounwind
   ret void
 }
@@ -128,10 +129,3 @@ entry:
 }
 
 declare void @f6(%struct.S4* nocapture byval, i64)
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"double", metadata !1}
-!4 = metadata !{metadata !"long long", metadata !1}
-!5 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/Mips/powif64_16.ll b/test/CodeGen/Mips/powif64_16.ll
new file mode 100644
index 0000000..35a7ca9
--- /dev/null
+++ b/test/CodeGen/Mips/powif64_16.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s 
+
+declare float     @llvm.powi.f32(float  %Val, i32 %power)
+declare double    @llvm.powi.f64(double %Val, i32 %power)
+
+define float @foo_pow_f32(float %y, i32 %p)  {
+  %1 = tail call float @llvm.powi.f32(float %y, i32 %p)
+; CHECK-NOT: .ent	__call_stub_fp_llvm.powi.f32
+; CHECK-NOT: {{.*}} jal llvm.powi.f32
+  ret float %1
+} 
+
+define double @foo_pow_f64(double %y, i32 %p)  {
+  %1 = tail call double @llvm.powi.f64(double %y, i32 %p)
+; CHECK-NOT: .ent	__call_stub_fp_llvm.powi.f64
+; CHECK-NOT: {{.*}} jal llvm.powi.f64 
+  ret double %1
+} 
+
+attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { nounwind readonly }
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"int", metadata !1}
diff --git a/test/CodeGen/Mips/ra-allocatable.ll b/test/CodeGen/Mips/ra-allocatable.ll
index 7621788..afc5cb0 100644
--- a/test/CodeGen/Mips/ra-allocatable.ll
+++ b/test/CodeGen/Mips/ra-allocatable.ll
@@ -98,191 +98,186 @@ entry:
 ; CHECK: lw  $ra, {{[0-9]+}}($sp)            # 4-byte Folded Reload
 ; CHECK: jr  $ra
 
-  %0 = load i32* @a0, align 4, !tbaa !0
-  %1 = load i32** @b0, align 4, !tbaa !3
-  store i32 %0, i32* %1, align 4, !tbaa !0
-  %2 = load i32* @a1, align 4, !tbaa !0
-  %3 = load i32** @b1, align 4, !tbaa !3
-  store i32 %2, i32* %3, align 4, !tbaa !0
-  %4 = load i32* @a2, align 4, !tbaa !0
-  %5 = load i32** @b2, align 4, !tbaa !3
-  store i32 %4, i32* %5, align 4, !tbaa !0
-  %6 = load i32* @a3, align 4, !tbaa !0
-  %7 = load i32** @b3, align 4, !tbaa !3
-  store i32 %6, i32* %7, align 4, !tbaa !0
-  %8 = load i32* @a4, align 4, !tbaa !0
-  %9 = load i32** @b4, align 4, !tbaa !3
-  store i32 %8, i32* %9, align 4, !tbaa !0
-  %10 = load i32* @a5, align 4, !tbaa !0
-  %11 = load i32** @b5, align 4, !tbaa !3
-  store i32 %10, i32* %11, align 4, !tbaa !0
-  %12 = load i32* @a6, align 4, !tbaa !0
-  %13 = load i32** @b6, align 4, !tbaa !3
-  store i32 %12, i32* %13, align 4, !tbaa !0
-  %14 = load i32* @a7, align 4, !tbaa !0
-  %15 = load i32** @b7, align 4, !tbaa !3
-  store i32 %14, i32* %15, align 4, !tbaa !0
-  %16 = load i32* @a8, align 4, !tbaa !0
-  %17 = load i32** @b8, align 4, !tbaa !3
-  store i32 %16, i32* %17, align 4, !tbaa !0
-  %18 = load i32* @a9, align 4, !tbaa !0
-  %19 = load i32** @b9, align 4, !tbaa !3
-  store i32 %18, i32* %19, align 4, !tbaa !0
-  %20 = load i32* @a10, align 4, !tbaa !0
-  %21 = load i32** @b10, align 4, !tbaa !3
-  store i32 %20, i32* %21, align 4, !tbaa !0
-  %22 = load i32* @a11, align 4, !tbaa !0
-  %23 = load i32** @b11, align 4, !tbaa !3
-  store i32 %22, i32* %23, align 4, !tbaa !0
-  %24 = load i32* @a12, align 4, !tbaa !0
-  %25 = load i32** @b12, align 4, !tbaa !3
-  store i32 %24, i32* %25, align 4, !tbaa !0
-  %26 = load i32* @a13, align 4, !tbaa !0
-  %27 = load i32** @b13, align 4, !tbaa !3
-  store i32 %26, i32* %27, align 4, !tbaa !0
-  %28 = load i32* @a14, align 4, !tbaa !0
-  %29 = load i32** @b14, align 4, !tbaa !3
-  store i32 %28, i32* %29, align 4, !tbaa !0
-  %30 = load i32* @a15, align 4, !tbaa !0
-  %31 = load i32** @b15, align 4, !tbaa !3
-  store i32 %30, i32* %31, align 4, !tbaa !0
-  %32 = load i32* @a16, align 4, !tbaa !0
-  %33 = load i32** @b16, align 4, !tbaa !3
-  store i32 %32, i32* %33, align 4, !tbaa !0
-  %34 = load i32* @a17, align 4, !tbaa !0
-  %35 = load i32** @b17, align 4, !tbaa !3
-  store i32 %34, i32* %35, align 4, !tbaa !0
-  %36 = load i32* @a18, align 4, !tbaa !0
-  %37 = load i32** @b18, align 4, !tbaa !3
-  store i32 %36, i32* %37, align 4, !tbaa !0
-  %38 = load i32* @a19, align 4, !tbaa !0
-  %39 = load i32** @b19, align 4, !tbaa !3
-  store i32 %38, i32* %39, align 4, !tbaa !0
-  %40 = load i32* @a20, align 4, !tbaa !0
-  %41 = load i32** @b20, align 4, !tbaa !3
-  store i32 %40, i32* %41, align 4, !tbaa !0
-  %42 = load i32* @a21, align 4, !tbaa !0
-  %43 = load i32** @b21, align 4, !tbaa !3
-  store i32 %42, i32* %43, align 4, !tbaa !0
-  %44 = load i32* @a22, align 4, !tbaa !0
-  %45 = load i32** @b22, align 4, !tbaa !3
-  store i32 %44, i32* %45, align 4, !tbaa !0
-  %46 = load i32* @a23, align 4, !tbaa !0
-  %47 = load i32** @b23, align 4, !tbaa !3
-  store i32 %46, i32* %47, align 4, !tbaa !0
-  %48 = load i32* @a24, align 4, !tbaa !0
-  %49 = load i32** @b24, align 4, !tbaa !3
-  store i32 %48, i32* %49, align 4, !tbaa !0
-  %50 = load i32* @a25, align 4, !tbaa !0
-  %51 = load i32** @b25, align 4, !tbaa !3
-  store i32 %50, i32* %51, align 4, !tbaa !0
-  %52 = load i32* @a26, align 4, !tbaa !0
-  %53 = load i32** @b26, align 4, !tbaa !3
-  store i32 %52, i32* %53, align 4, !tbaa !0
-  %54 = load i32* @a27, align 4, !tbaa !0
-  %55 = load i32** @b27, align 4, !tbaa !3
-  store i32 %54, i32* %55, align 4, !tbaa !0
-  %56 = load i32* @a28, align 4, !tbaa !0
-  %57 = load i32** @b28, align 4, !tbaa !3
-  store i32 %56, i32* %57, align 4, !tbaa !0
-  %58 = load i32* @a29, align 4, !tbaa !0
-  %59 = load i32** @b29, align 4, !tbaa !3
-  store i32 %58, i32* %59, align 4, !tbaa !0
-  %60 = load i32* @a0, align 4, !tbaa !0
-  %61 = load i32** @c0, align 4, !tbaa !3
-  store i32 %60, i32* %61, align 4, !tbaa !0
-  %62 = load i32* @a1, align 4, !tbaa !0
-  %63 = load i32** @c1, align 4, !tbaa !3
-  store i32 %62, i32* %63, align 4, !tbaa !0
-  %64 = load i32* @a2, align 4, !tbaa !0
-  %65 = load i32** @c2, align 4, !tbaa !3
-  store i32 %64, i32* %65, align 4, !tbaa !0
-  %66 = load i32* @a3, align 4, !tbaa !0
-  %67 = load i32** @c3, align 4, !tbaa !3
-  store i32 %66, i32* %67, align 4, !tbaa !0
-  %68 = load i32* @a4, align 4, !tbaa !0
-  %69 = load i32** @c4, align 4, !tbaa !3
-  store i32 %68, i32* %69, align 4, !tbaa !0
-  %70 = load i32* @a5, align 4, !tbaa !0
-  %71 = load i32** @c5, align 4, !tbaa !3
-  store i32 %70, i32* %71, align 4, !tbaa !0
-  %72 = load i32* @a6, align 4, !tbaa !0
-  %73 = load i32** @c6, align 4, !tbaa !3
-  store i32 %72, i32* %73, align 4, !tbaa !0
-  %74 = load i32* @a7, align 4, !tbaa !0
-  %75 = load i32** @c7, align 4, !tbaa !3
-  store i32 %74, i32* %75, align 4, !tbaa !0
-  %76 = load i32* @a8, align 4, !tbaa !0
-  %77 = load i32** @c8, align 4, !tbaa !3
-  store i32 %76, i32* %77, align 4, !tbaa !0
-  %78 = load i32* @a9, align 4, !tbaa !0
-  %79 = load i32** @c9, align 4, !tbaa !3
-  store i32 %78, i32* %79, align 4, !tbaa !0
-  %80 = load i32* @a10, align 4, !tbaa !0
-  %81 = load i32** @c10, align 4, !tbaa !3
-  store i32 %80, i32* %81, align 4, !tbaa !0
-  %82 = load i32* @a11, align 4, !tbaa !0
-  %83 = load i32** @c11, align 4, !tbaa !3
-  store i32 %82, i32* %83, align 4, !tbaa !0
-  %84 = load i32* @a12, align 4, !tbaa !0
-  %85 = load i32** @c12, align 4, !tbaa !3
-  store i32 %84, i32* %85, align 4, !tbaa !0
-  %86 = load i32* @a13, align 4, !tbaa !0
-  %87 = load i32** @c13, align 4, !tbaa !3
-  store i32 %86, i32* %87, align 4, !tbaa !0
-  %88 = load i32* @a14, align 4, !tbaa !0
-  %89 = load i32** @c14, align 4, !tbaa !3
-  store i32 %88, i32* %89, align 4, !tbaa !0
-  %90 = load i32* @a15, align 4, !tbaa !0
-  %91 = load i32** @c15, align 4, !tbaa !3
-  store i32 %90, i32* %91, align 4, !tbaa !0
-  %92 = load i32* @a16, align 4, !tbaa !0
-  %93 = load i32** @c16, align 4, !tbaa !3
-  store i32 %92, i32* %93, align 4, !tbaa !0
-  %94 = load i32* @a17, align 4, !tbaa !0
-  %95 = load i32** @c17, align 4, !tbaa !3
-  store i32 %94, i32* %95, align 4, !tbaa !0
-  %96 = load i32* @a18, align 4, !tbaa !0
-  %97 = load i32** @c18, align 4, !tbaa !3
-  store i32 %96, i32* %97, align 4, !tbaa !0
-  %98 = load i32* @a19, align 4, !tbaa !0
-  %99 = load i32** @c19, align 4, !tbaa !3
-  store i32 %98, i32* %99, align 4, !tbaa !0
-  %100 = load i32* @a20, align 4, !tbaa !0
-  %101 = load i32** @c20, align 4, !tbaa !3
-  store i32 %100, i32* %101, align 4, !tbaa !0
-  %102 = load i32* @a21, align 4, !tbaa !0
-  %103 = load i32** @c21, align 4, !tbaa !3
-  store i32 %102, i32* %103, align 4, !tbaa !0
-  %104 = load i32* @a22, align 4, !tbaa !0
-  %105 = load i32** @c22, align 4, !tbaa !3
-  store i32 %104, i32* %105, align 4, !tbaa !0
-  %106 = load i32* @a23, align 4, !tbaa !0
-  %107 = load i32** @c23, align 4, !tbaa !3
-  store i32 %106, i32* %107, align 4, !tbaa !0
-  %108 = load i32* @a24, align 4, !tbaa !0
-  %109 = load i32** @c24, align 4, !tbaa !3
-  store i32 %108, i32* %109, align 4, !tbaa !0
-  %110 = load i32* @a25, align 4, !tbaa !0
-  %111 = load i32** @c25, align 4, !tbaa !3
-  store i32 %110, i32* %111, align 4, !tbaa !0
-  %112 = load i32* @a26, align 4, !tbaa !0
-  %113 = load i32** @c26, align 4, !tbaa !3
-  store i32 %112, i32* %113, align 4, !tbaa !0
-  %114 = load i32* @a27, align 4, !tbaa !0
-  %115 = load i32** @c27, align 4, !tbaa !3
-  store i32 %114, i32* %115, align 4, !tbaa !0
-  %116 = load i32* @a28, align 4, !tbaa !0
-  %117 = load i32** @c28, align 4, !tbaa !3
-  store i32 %116, i32* %117, align 4, !tbaa !0
-  %118 = load i32* @a29, align 4, !tbaa !0
-  %119 = load i32** @c29, align 4, !tbaa !3
-  store i32 %118, i32* %119, align 4, !tbaa !0
-  %120 = load i32* @a0, align 4, !tbaa !0
+  %0 = load i32* @a0, align 4
+  %1 = load i32** @b0, align 4
+  store i32 %0, i32* %1, align 4
+  %2 = load i32* @a1, align 4
+  %3 = load i32** @b1, align 4
+  store i32 %2, i32* %3, align 4
+  %4 = load i32* @a2, align 4
+  %5 = load i32** @b2, align 4
+  store i32 %4, i32* %5, align 4
+  %6 = load i32* @a3, align 4
+  %7 = load i32** @b3, align 4
+  store i32 %6, i32* %7, align 4
+  %8 = load i32* @a4, align 4
+  %9 = load i32** @b4, align 4
+  store i32 %8, i32* %9, align 4
+  %10 = load i32* @a5, align 4
+  %11 = load i32** @b5, align 4
+  store i32 %10, i32* %11, align 4
+  %12 = load i32* @a6, align 4
+  %13 = load i32** @b6, align 4
+  store i32 %12, i32* %13, align 4
+  %14 = load i32* @a7, align 4
+  %15 = load i32** @b7, align 4
+  store i32 %14, i32* %15, align 4
+  %16 = load i32* @a8, align 4
+  %17 = load i32** @b8, align 4
+  store i32 %16, i32* %17, align 4
+  %18 = load i32* @a9, align 4
+  %19 = load i32** @b9, align 4
+  store i32 %18, i32* %19, align 4
+  %20 = load i32* @a10, align 4
+  %21 = load i32** @b10, align 4
+  store i32 %20, i32* %21, align 4
+  %22 = load i32* @a11, align 4
+  %23 = load i32** @b11, align 4
+  store i32 %22, i32* %23, align 4
+  %24 = load i32* @a12, align 4
+  %25 = load i32** @b12, align 4
+  store i32 %24, i32* %25, align 4
+  %26 = load i32* @a13, align 4
+  %27 = load i32** @b13, align 4
+  store i32 %26, i32* %27, align 4
+  %28 = load i32* @a14, align 4
+  %29 = load i32** @b14, align 4
+  store i32 %28, i32* %29, align 4
+  %30 = load i32* @a15, align 4
+  %31 = load i32** @b15, align 4
+  store i32 %30, i32* %31, align 4
+  %32 = load i32* @a16, align 4
+  %33 = load i32** @b16, align 4
+  store i32 %32, i32* %33, align 4
+  %34 = load i32* @a17, align 4
+  %35 = load i32** @b17, align 4
+  store i32 %34, i32* %35, align 4
+  %36 = load i32* @a18, align 4
+  %37 = load i32** @b18, align 4
+  store i32 %36, i32* %37, align 4
+  %38 = load i32* @a19, align 4
+  %39 = load i32** @b19, align 4
+  store i32 %38, i32* %39, align 4
+  %40 = load i32* @a20, align 4
+  %41 = load i32** @b20, align 4
+  store i32 %40, i32* %41, align 4
+  %42 = load i32* @a21, align 4
+  %43 = load i32** @b21, align 4
+  store i32 %42, i32* %43, align 4
+  %44 = load i32* @a22, align 4
+  %45 = load i32** @b22, align 4
+  store i32 %44, i32* %45, align 4
+  %46 = load i32* @a23, align 4
+  %47 = load i32** @b23, align 4
+  store i32 %46, i32* %47, align 4
+  %48 = load i32* @a24, align 4
+  %49 = load i32** @b24, align 4
+  store i32 %48, i32* %49, align 4
+  %50 = load i32* @a25, align 4
+  %51 = load i32** @b25, align 4
+  store i32 %50, i32* %51, align 4
+  %52 = load i32* @a26, align 4
+  %53 = load i32** @b26, align 4
+  store i32 %52, i32* %53, align 4
+  %54 = load i32* @a27, align 4
+  %55 = load i32** @b27, align 4
+  store i32 %54, i32* %55, align 4
+  %56 = load i32* @a28, align 4
+  %57 = load i32** @b28, align 4
+  store i32 %56, i32* %57, align 4
+  %58 = load i32* @a29, align 4
+  %59 = load i32** @b29, align 4
+  store i32 %58, i32* %59, align 4
+  %60 = load i32* @a0, align 4
+  %61 = load i32** @c0, align 4
+  store i32 %60, i32* %61, align 4
+  %62 = load i32* @a1, align 4
+  %63 = load i32** @c1, align 4
+  store i32 %62, i32* %63, align 4
+  %64 = load i32* @a2, align 4
+  %65 = load i32** @c2, align 4
+  store i32 %64, i32* %65, align 4
+  %66 = load i32* @a3, align 4
+  %67 = load i32** @c3, align 4
+  store i32 %66, i32* %67, align 4
+  %68 = load i32* @a4, align 4
+  %69 = load i32** @c4, align 4
+  store i32 %68, i32* %69, align 4
+  %70 = load i32* @a5, align 4
+  %71 = load i32** @c5, align 4
+  store i32 %70, i32* %71, align 4
+  %72 = load i32* @a6, align 4
+  %73 = load i32** @c6, align 4
+  store i32 %72, i32* %73, align 4
+  %74 = load i32* @a7, align 4
+  %75 = load i32** @c7, align 4
+  store i32 %74, i32* %75, align 4
+  %76 = load i32* @a8, align 4
+  %77 = load i32** @c8, align 4
+  store i32 %76, i32* %77, align 4
+  %78 = load i32* @a9, align 4
+  %79 = load i32** @c9, align 4
+  store i32 %78, i32* %79, align 4
+  %80 = load i32* @a10, align 4
+  %81 = load i32** @c10, align 4
+  store i32 %80, i32* %81, align 4
+  %82 = load i32* @a11, align 4
+  %83 = load i32** @c11, align 4
+  store i32 %82, i32* %83, align 4
+  %84 = load i32* @a12, align 4
+  %85 = load i32** @c12, align 4
+  store i32 %84, i32* %85, align 4
+  %86 = load i32* @a13, align 4
+  %87 = load i32** @c13, align 4
+  store i32 %86, i32* %87, align 4
+  %88 = load i32* @a14, align 4
+  %89 = load i32** @c14, align 4
+  store i32 %88, i32* %89, align 4
+  %90 = load i32* @a15, align 4
+  %91 = load i32** @c15, align 4
+  store i32 %90, i32* %91, align 4
+  %92 = load i32* @a16, align 4
+  %93 = load i32** @c16, align 4
+  store i32 %92, i32* %93, align 4
+  %94 = load i32* @a17, align 4
+  %95 = load i32** @c17, align 4
+  store i32 %94, i32* %95, align 4
+  %96 = load i32* @a18, align 4
+  %97 = load i32** @c18, align 4
+  store i32 %96, i32* %97, align 4
+  %98 = load i32* @a19, align 4
+  %99 = load i32** @c19, align 4
+  store i32 %98, i32* %99, align 4
+  %100 = load i32* @a20, align 4
+  %101 = load i32** @c20, align 4
+  store i32 %100, i32* %101, align 4
+  %102 = load i32* @a21, align 4
+  %103 = load i32** @c21, align 4
+  store i32 %102, i32* %103, align 4
+  %104 = load i32* @a22, align 4
+  %105 = load i32** @c22, align 4
+  store i32 %104, i32* %105, align 4
+  %106 = load i32* @a23, align 4
+  %107 = load i32** @c23, align 4
+  store i32 %106, i32* %107, align 4
+  %108 = load i32* @a24, align 4
+  %109 = load i32** @c24, align 4
+  store i32 %108, i32* %109, align 4
+  %110 = load i32* @a25, align 4
+  %111 = load i32** @c25, align 4
+  store i32 %110, i32* %111, align 4
+  %112 = load i32* @a26, align 4
+  %113 = load i32** @c26, align 4
+  store i32 %112, i32* %113, align 4
+  %114 = load i32* @a27, align 4
+  %115 = load i32** @c27, align 4
+  store i32 %114, i32* %115, align 4
+  %116 = load i32* @a28, align 4
+  %117 = load i32** @c28, align 4
+  store i32 %116, i32* %117, align 4
+  %118 = load i32* @a29, align 4
+  %119 = load i32** @c29, align 4
+  store i32 %118, i32* %119, align 4
+  %120 = load i32* @a0, align 4
   ret i32 %120
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"any pointer", metadata !1}
diff --git a/test/CodeGen/Mips/rotate.ll b/test/CodeGen/Mips/rotate.ll
index 4f3cfb7..813bbdf 100644
--- a/test/CodeGen/Mips/rotate.ll
+++ b/test/CodeGen/Mips/rotate.ll
@@ -1,6 +1,8 @@
 ; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32r2 -mattr=+mips16 -soft-float -mips16-hard-float   < %s | FileCheck %s -check-prefix=mips16 
 
 ; CHECK:  rotrv $2, $4
+; mips16: .ent rot0
 define i32 @rot0(i32 %a, i32 %b) nounwind readnone {
 entry:
   %shl = shl i32 %a, %b
@@ -11,6 +13,7 @@ entry:
 }
 
 ; CHECK:  rotr  $2, $4, 22
+; mips16: .ent rot1
 define i32 @rot1(i32 %a) nounwind readnone {
 entry:
   %shl = shl i32 %a, 10
@@ -20,6 +23,7 @@ entry:
 }
 
 ; CHECK:  rotrv $2, $4, $5
+; mips16: .ent rot2
 define i32 @rot2(i32 %a, i32 %b) nounwind readnone {
 entry:
   %shr = lshr i32 %a, %b
@@ -30,6 +34,7 @@ entry:
 }
 
 ; CHECK:  rotr  $2, $4, 10
+; mips16: .ent rot3
 define i32 @rot3(i32 %a) nounwind readnone {
 entry:
   %shr = lshr i32 %a, 10
diff --git a/test/CodeGen/Mips/sel1c.ll b/test/CodeGen/Mips/sel1c.ll
new file mode 100644
index 0000000..4c4784d
--- /dev/null
+++ b/test/CodeGen/Mips/sel1c.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=cond-b-short
+
+@i = global i32 1, align 4
+@j = global i32 2, align 4
+@k = common global i32 0, align 4
+
+; Function Attrs: nounwind optsize
+define void @t() #0 {
+entry:
+  %0 = load i32* @i, align 4
+  %1 = load i32* @j, align 4
+  %cmp = icmp eq i32 %0, %1
+  %cond = select i1 %cmp, i32 1, i32 2
+  store i32 %cond, i32* @k, align 4
+  ret void
+; cond-b-short:	bteqz	$BB0_{{[0-9]+}}  # 16 bit inst
+}
+
+attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+
+
diff --git a/test/CodeGen/Mips/sel2c.ll b/test/CodeGen/Mips/sel2c.ll
new file mode 100644
index 0000000..25dfaa9
--- /dev/null
+++ b/test/CodeGen/Mips/sel2c.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=cond-b-short
+
+@i = global i32 1, align 4
+@j = global i32 2, align 4
+@k = common global i32 0, align 4
+
+; Function Attrs: nounwind optsize
+define void @t() #0 {
+entry:
+  %0 = load i32* @i, align 4
+  %1 = load i32* @j, align 4
+  %cmp = icmp ne i32 %0, %1
+  %cond = select i1 %cmp, i32 1, i32 2
+  store i32 %cond, i32* @k, align 4
+; cond-b-short:	btnez	$BB0_{{[0-9]+}}  # 16 bit inst
+  ret void
+}
+
+attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+
+
diff --git a/test/CodeGen/Mips/simplebr.ll b/test/CodeGen/Mips/simplebr.ll
new file mode 100644
index 0000000..a1d6367
--- /dev/null
+++ b/test/CodeGen/Mips/simplebr.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
+
+; ModuleID = 'simplebr.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
+target triple = "mips--linux-gnu"
+
+@i = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define void @foo() #0 {
+entry:
+  %0 = load i32* @i, align 4
+  %tobool = icmp ne i32 %0, 0
+  br i1 %tobool, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  call void bitcast (void (...)* @goo to void ()*)()
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  call void bitcast (void (...)* @hoo to void ()*)()
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+; CHECK-STATIC16:	b	$BB{{[0-9]+}}_{{[0-9]+}} # 16 bit inst
+
+declare void @goo(...) #1
+
+declare void @hoo(...) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+
+
diff --git a/test/CodeGen/Mips/stack-alignment.ll b/test/CodeGen/Mips/stack-alignment.ll
new file mode 100644
index 0000000..b18f966
--- /dev/null
+++ b/test/CodeGen/Mips/stack-alignment.ll
@@ -0,0 +1,13 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=32
+; RUN: llc -march=mipsel -mattr=+fp64 < %s | FileCheck %s -check-prefix=32
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s -check-prefix=64
+
+; 32:      addiu  $sp, $sp, -8
+; 64:      addiu  $sp, $sp, -16
+
+define i32 @foo1() #0 {
+entry:
+  ret i32 14
+}
+
+attributes #0 = { "no-frame-pointer-elim"="true" }
diff --git a/test/CodeGen/Mips/tailcall.ll b/test/CodeGen/Mips/tailcall.ll
index bcd33fc..30f47ab 100644
--- a/test/CodeGen/Mips/tailcall.ll
+++ b/test/CodeGen/Mips/tailcall.ll
@@ -243,3 +243,16 @@ entry:
   ret i32 %call
 }
 
+; Check that there is a chain edge between the load and store nodes.
+;
+; PIC32-LABEL: caller14:
+; PIC32: lw ${{[0-9]+}}, 16($sp)
+; PIC32: sw $4, 16($sp)
+
+define void @caller14(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+entry:
+  tail call void @callee14(i32 %e, i32 %b, i32 %c, i32 %d, i32 %a)
+  ret void
+}
+
+declare void @callee14(i32, i32, i32, i32, i32)
diff --git a/test/CodeGen/Mips/tnaked.ll b/test/CodeGen/Mips/tnaked.ll
index edf1ecf..08f1ab5 100644
--- a/test/CodeGen/Mips/tnaked.ll
+++ b/test/CodeGen/Mips/tnaked.ll
@@ -25,5 +25,5 @@ entry:
 ; CHECK:	.fmask	0x00000000,0
 ; CHECK: 	addiu	$sp, $sp, -8
 
-attributes #0 = { naked noinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { naked noinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/NVPTX/bug17709.ll b/test/CodeGen/NVPTX/bug17709.ll
new file mode 100644
index 0000000..92f0fcb1
--- /dev/null
+++ b/test/CodeGen/NVPTX/bug17709.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+; ModuleID = '__kernelgen_main_module'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+define linker_private ptx_device { double, double } @__utils1_MOD_trace(%"struct.array2_complex(kind=8).43.5.57"* noalias %m) {
+entry:
+  ;unreachable
+  %t0 = insertvalue {double, double} undef, double 1.0, 0
+  %t1 = insertvalue {double, double} %t0, double 1.0, 1
+  ret { double, double } %t1
+}
+
+%struct.descriptor_dimension.0.52 = type { i64, i64, i64 }
+%"struct.array2_complex(kind=8).37.18.70" = type { i8*, i64, i64, [2 x %struct.descriptor_dimension.0.52] }
+%"struct.array2_complex(kind=8).43.5.57" = type { i8*, i64, i64, [2 x %struct.descriptor_dimension.0.52] }
+@replacementOfAlloca8 = private global %"struct.array2_complex(kind=8).37.18.70" zeroinitializer, align 4096
+
+; CHECK: .visible .entry __kernelgen_main
+define ptx_kernel void @__kernelgen_main(i32* nocapture %args, i32*) {
+entry:
+  %1 = tail call ptx_device { double, double } bitcast ({ double, double } (%"struct.array2_complex(kind=8).43.5.57"*)* @__utils1_MOD_trace to { double, double } (%"struct.array2_complex(kind=8).37.18.70"*)*)(%"struct.array2_complex(kind=8).37.18.70"* noalias @replacementOfAlloca8)
+  ret void
+}
+
diff --git a/test/CodeGen/NVPTX/callchain.ll b/test/CodeGen/NVPTX/callchain.ll
new file mode 100644
index 0000000..60b118b
--- /dev/null
+++ b/test/CodeGen/NVPTX/callchain.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target triple = "nvptx"
+
+define void @foo(i8* %ptr) {
+  %fnptr = bitcast i8* %ptr to void ()*
+; CHECK: prototype_0 : .callprototype ()_ ()
+  tail call void %fnptr()
+  ret void
+}
diff --git a/test/CodeGen/NVPTX/constant-vectors.ll b/test/CodeGen/NVPTX/constant-vectors.ll
new file mode 100644
index 0000000..208c2d9
--- /dev/null
+++ b/test/CodeGen/NVPTX/constant-vectors.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target triple = "nvptx-nvidia-cuda"
+
+; CHECK: .visible .global .align 16 .b8 testArray[8] = {0, 1, 2, 3, 4, 5, 6, 7};
+@testArray = constant [2 x <4 x i8>] [<4 x i8> <i8 0, i8 1, i8 2, i8 3>, <4 x i8> <i8 4, i8 5, i8 6, i8 7>], align 16
diff --git a/test/CodeGen/NVPTX/implicit-def.ll b/test/CodeGen/NVPTX/implicit-def.ll
new file mode 100644
index 0000000..06d3d56
--- /dev/null
+++ b/test/CodeGen/NVPTX/implicit-def.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -O0 -march=nvptx -mcpu=sm_20 -asm-verbose=1 | FileCheck %s
+
+; CHECK: // implicit-def: %f[[F0:[0-9]+]]
+; CHECK: add.f32         %f{{[0-9]+}}, %f{{[0-9]+}}, %f[[F0]];
+define float @foo(float %a) {
+  %ret = fadd float %a, undef
+  ret float %ret
+}
+
diff --git a/test/CodeGen/NVPTX/inline-asm.ll b/test/CodeGen/NVPTX/inline-asm.ll
new file mode 100644
index 0000000..d76eb42
--- /dev/null
+++ b/test/CodeGen/NVPTX/inline-asm.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+
+define float @test(float %x) {
+entry:
+; CHECK: ex2.approx.ftz.f32 %f{{[0-9]+}}, %f{{[0-9]+}}
+  %0 = call float asm "ex2.approx.ftz.f32 $0, $1;", "=f,f"(float %x)
+  ret float %0
+}
diff --git a/test/CodeGen/NVPTX/lit.local.cfg b/test/CodeGen/NVPTX/lit.local.cfg
index 7180c84..85cf8c2 100644
--- a/test/CodeGen/NVPTX/lit.local.cfg
+++ b/test/CodeGen/NVPTX/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'NVPTX' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/NVPTX/pr17529.ll b/test/CodeGen/NVPTX/pr17529.ll
new file mode 100644
index 0000000..a162142
--- /dev/null
+++ b/test/CodeGen/NVPTX/pr17529.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; Function Attrs: nounwind
+; CHECK: .func kernelgen_memcpy
+define ptx_device void @kernelgen_memcpy(i8* nocapture %dst) #0 {
+entry:
+  br i1 undef, label %for.end, label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
+  %scevgep9 = getelementptr i8* %dst, i64 %index
+  %scevgep910 = bitcast i8* %scevgep9 to <4 x i8>*
+  store <4 x i8> undef, <4 x i8>* %scevgep910, align 1
+  %index.next = add i64 %index, 4
+  %0 = icmp eq i64 undef, %index.next
+  br i1 %0, label %middle.block, label %vector.body
+
+middle.block:                                     ; preds = %vector.body
+  br i1 undef, label %for.end, label %for.body.preheader1
+
+for.body.preheader1:                              ; preds = %middle.block
+  %scevgep2 = getelementptr i8* %dst, i64 0
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader1
+  %lsr.iv3 = phi i8* [ %scevgep2, %for.body.preheader1 ], [ %scevgep4, %for.body ]
+  store i8 undef, i8* %lsr.iv3, align 1
+  %scevgep4 = getelementptr i8* %lsr.iv3, i64 1
+  br label %for.body
+
+for.end:                                          ; preds = %middle.block, %entry
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/NVPTX/vec8.ll b/test/CodeGen/NVPTX/vec8.ll
new file mode 100644
index 0000000..03f5cfc
--- /dev/null
+++ b/test/CodeGen/NVPTX/vec8.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target triple = "nvptx-unknown-cuda"
+
+; CHECK: .visible .func foo
+define void @foo(<8 x i8> %a, i8* %b) {
+  %t0 = extractelement <8 x i8> %a, i32 0
+; CHECK-DAG: ld.param.v4.u8
+; CHECK-DAG: ld.param.u32
+  store i8 %t0, i8* %b
+  ret void
+}
+
diff --git a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
index 097611a..b0c37b8 100644
--- a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
+++ b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 -break-anti-dependencies=none | FileCheck %s
 ; ModuleID = 'hh.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
 target triple = "powerpc-apple-darwin9.6"
diff --git a/test/CodeGen/PowerPC/2013-07-01-PHIElimBug.ll b/test/CodeGen/PowerPC/2013-07-01-PHIElimBug.ll
index 635062b..9bf25c8 100644
--- a/test/CodeGen/PowerPC/2013-07-01-PHIElimBug.ll
+++ b/test/CodeGen/PowerPC/2013-07-01-PHIElimBug.ll
@@ -25,4 +25,4 @@ if.end1018:                                       ; preds = %for.end957, %for.en
   ret void
 }
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/Frames-alloca.ll b/test/CodeGen/PowerPC/Frames-alloca.ll
index 28dd08c..4588bc0 100644
--- a/test/CodeGen/PowerPC/Frames-alloca.ll
+++ b/test/CodeGen/PowerPC/Frames-alloca.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC32
-; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC64
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-NOFP
-; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC64-NOFP
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC32
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC32-RS
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-RS-NOFP
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=CHECK-PPC32
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=CHECK-PPC64
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=CHECK-PPC32-NOFP
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=CHECK-PPC64-NOFP
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=CHECK-PPC32
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=CHECK-PPC32-RS
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=CHECK-PPC32-RS-NOFP
 
 ; CHECK-PPC32: stw r31, -4(r1)
 ; CHECK-PPC32: lwz r1, 0(r1)
diff --git a/test/CodeGen/PowerPC/addrfuncstr.ll b/test/CodeGen/PowerPC/addrfuncstr.ll
index 60c02d4..6750b5c 100644
--- a/test/CodeGen/PowerPC/addrfuncstr.ll
+++ b/test/CodeGen/PowerPC/addrfuncstr.ll
@@ -23,5 +23,5 @@ declare i64 @fread(i8*, i64, i64, %struct._IO_FILE*) #1
 ; CHECK: .section .data.rel.ro
 ; CHECK: .quad fread
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/asym-regclass-copy.ll b/test/CodeGen/PowerPC/asym-regclass-copy.ll
index d04a6c9..b19125b 100644
--- a/test/CodeGen/PowerPC/asym-regclass-copy.ll
+++ b/test/CodeGen/PowerPC/asym-regclass-copy.ll
@@ -52,5 +52,5 @@ declare void @free(i8* nocapture) #0
 
 declare i64 @strtol(i8*, i8** nocapture, i32 signext) #0
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind }
diff --git a/test/CodeGen/PowerPC/bdzlr.ll b/test/CodeGen/PowerPC/bdzlr.ll
index 656a858..e487558 100644
--- a/test/CodeGen/PowerPC/bdzlr.ll
+++ b/test/CodeGen/PowerPC/bdzlr.ll
@@ -35,15 +35,15 @@ for.body:                                         ; preds = %for.body.for.body_c
   %0 = phi %struct.lua_TValue.17.692* [ undef, %for.body.lr.ph ], [ %.pre, %for.body.for.body_crit_edge ]
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body.for.body_crit_edge ]
   %tt = getelementptr inbounds %struct.lua_TValue.17.692* %0, i64 %indvars.iv, i32 1
-  %1 = load i32* %tt, align 4, !tbaa !0
-  store i32 %1, i32* undef, align 4, !tbaa !0
+  %1 = load i32* %tt, align 4
+  store i32 %1, i32* undef, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
   br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge
 
 for.body.for.body_crit_edge:                      ; preds = %for.body
-  %.pre = load %struct.lua_TValue.17.692** undef, align 8, !tbaa !3
+  %.pre = load %struct.lua_TValue.17.692** undef, align 8
   br label %for.body
 
 for.end:                                          ; preds = %for.body, %if.end, %entry
@@ -57,8 +57,3 @@ for.end:                                          ; preds = %for.body, %if.end,
 }
 
 attributes #0 = { nounwind }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"any pointer", metadata !1}
diff --git a/test/CodeGen/PowerPC/copysignl.ll b/test/CodeGen/PowerPC/copysignl.ll
new file mode 100644
index 0000000..4b801b7
--- /dev/null
+++ b/test/CodeGen/PowerPC/copysignl.ll
@@ -0,0 +1,67 @@
+; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define double @foo_d_ll(ppc_fp128 %a, ppc_fp128 %b) #0 {
+entry:
+  %call = tail call ppc_fp128 @copysignl(ppc_fp128 %a, ppc_fp128 %b) #0
+  %conv = fptrunc ppc_fp128 %call to double
+  ret double %conv
+
+; CHECK-LABEL: @foo_d_ll
+; CHECK: fcpsgn 1, 3, 1
+; CHECK: blr
+}
+
+declare ppc_fp128 @copysignl(ppc_fp128, ppc_fp128) #0
+
+define double @foo_dl(double %a, ppc_fp128 %b) #0 {
+entry:
+  %conv = fptrunc ppc_fp128 %b to double
+  %call = tail call double @copysign(double %a, double %conv) #0
+  ret double %call
+
+; CHECK-LABEL: @foo_dl
+; CHECK: fcpsgn 1, 2, 1
+; CHECK: blr
+}
+
+declare double @copysign(double, double) #0
+
+define ppc_fp128 @foo_ll(double %a, ppc_fp128 %b) #0 {
+entry:
+  %conv = fpext double %a to ppc_fp128
+  %call = tail call ppc_fp128 @copysignl(ppc_fp128 %conv, ppc_fp128 %b) #0
+  ret ppc_fp128 %call
+
+; CHECK-LABEL: @foo_ll
+; CHECK: bl copysignl
+; CHECK: blr
+}
+
+define ppc_fp128 @foo_ld(double %a, double %b) #0 {
+entry:
+  %conv = fpext double %a to ppc_fp128
+  %conv1 = fpext double %b to ppc_fp128
+  %call = tail call ppc_fp128 @copysignl(ppc_fp128 %conv, ppc_fp128 %conv1) #0
+  ret ppc_fp128 %call
+
+; CHECK-LABEL: @foo_ld
+; CHECK: bl copysignl
+; CHECK: blr
+}
+
+define ppc_fp128 @foo_lf(double %a, float %b) #0 {
+entry:
+  %conv = fpext double %a to ppc_fp128
+  %conv1 = fpext float %b to ppc_fp128
+  %call = tail call ppc_fp128 @copysignl(ppc_fp128 %conv, ppc_fp128 %conv1) #0
+  ret ppc_fp128 %call
+
+; CHECK-LABEL: @foo_lf
+; CHECK: bl copysignl
+; CHECK: blr
+}
+
+attributes #0 = { nounwind readnone }
+
diff --git a/test/CodeGen/PowerPC/cr-spills.ll b/test/CodeGen/PowerPC/cr-spills.ll
index d6df7a2..be0dbad 100644
--- a/test/CodeGen/PowerPC/cr-spills.ll
+++ b/test/CodeGen/PowerPC/cr-spills.ll
@@ -53,11 +53,11 @@ for.cond286.preheader:                            ; preds = %for.body252
 
 for.cond290.preheader:                            ; preds = %for.end520, %for.cond286.preheader
   %srcptr.31595 = phi i16* [ getelementptr inbounds ([768 x i16]* @SetupFastFullPelSearch.orig_pels, i64 0, i64 0), %for.cond286.preheader ], [ null, %for.end520 ]
-  %1 = load i32* undef, align 4, !tbaa !0
-  %2 = load i32* @weight_luma, align 4, !tbaa !0
-  %3 = load i32* @wp_luma_round, align 4, !tbaa !0
-  %4 = load i32* @luma_log_weight_denom, align 4, !tbaa !0
-  %5 = load i32* @offset_luma, align 4, !tbaa !0
+  %1 = load i32* undef, align 4
+  %2 = load i32* @weight_luma, align 4
+  %3 = load i32* @wp_luma_round, align 4
+  %4 = load i32* @luma_log_weight_denom, align 4
+  %5 = load i32* @offset_luma, align 4
   %incdec.ptr502.sum = add i64 undef, 16
   br label %for.body293
 
@@ -68,7 +68,7 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %LineSadBlk1.01587 = phi i32 [ 0, %for.cond290.preheader ], [ %add402, %for.body293 ]
   %LineSadBlk3.01586 = phi i32 [ 0, %for.cond290.preheader ], [ %add514, %for.body293 ]
   %LineSadBlk2.01585 = phi i32 [ 0, %for.cond290.preheader ], [ %add458, %for.body293 ]
-  %6 = load i16* %refptr.11590, align 2, !tbaa !3
+  %6 = load i16* %refptr.11590, align 2
   %conv294 = zext i16 %6 to i32
   %mul295 = mul nsw i32 %conv294, %2
   %add296 = add nsw i32 %mul295, %3
@@ -78,16 +78,16 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %cond.i.i1514 = select i1 %cmp.i.i1513, i32 %add297, i32 0
   %cmp.i4.i1515 = icmp slt i32 %cond.i.i1514, %1
   %cond.i5.i1516 = select i1 %cmp.i4.i1515, i32 %cond.i.i1514, i32 %1
-  %7 = load i16* %srcptr.41591, align 2, !tbaa !3
+  %7 = load i16* %srcptr.41591, align 2
   %conv300 = zext i16 %7 to i32
   %sub301 = sub nsw i32 %cond.i5.i1516, %conv300
   %idxprom302 = sext i32 %sub301 to i64
   %arrayidx303 = getelementptr inbounds i32* %cond, i64 %idxprom302
-  %8 = load i32* %arrayidx303, align 4, !tbaa !0
+  %8 = load i32* %arrayidx303, align 4
   %add304 = add nsw i32 %8, %LineSadBlk0.01588
-  %9 = load i32* undef, align 4, !tbaa !0
+  %9 = load i32* undef, align 4
   %add318 = add nsw i32 %add304, %9
-  %10 = load i16* undef, align 2, !tbaa !3
+  %10 = load i16* undef, align 2
   %conv321 = zext i16 %10 to i32
   %mul322 = mul nsw i32 %conv321, %2
   %add323 = add nsw i32 %mul322, %3
@@ -100,22 +100,22 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %sub329 = sub nsw i32 %cond.i5.i1508, 0
   %idxprom330 = sext i32 %sub329 to i64
   %arrayidx331 = getelementptr inbounds i32* %cond, i64 %idxprom330
-  %11 = load i32* %arrayidx331, align 4, !tbaa !0
+  %11 = load i32* %arrayidx331, align 4
   %add332 = add nsw i32 %add318, %11
   %cmp.i.i1501 = icmp sgt i32 undef, 0
   %cond.i.i1502 = select i1 %cmp.i.i1501, i32 undef, i32 0
   %cmp.i4.i1503 = icmp slt i32 %cond.i.i1502, %1
   %cond.i5.i1504 = select i1 %cmp.i4.i1503, i32 %cond.i.i1502, i32 %1
   %incdec.ptr341 = getelementptr inbounds i16* %srcptr.41591, i64 4
-  %12 = load i16* null, align 2, !tbaa !3
+  %12 = load i16* null, align 2
   %conv342 = zext i16 %12 to i32
   %sub343 = sub nsw i32 %cond.i5.i1504, %conv342
   %idxprom344 = sext i32 %sub343 to i64
   %arrayidx345 = getelementptr inbounds i32* %cond, i64 %idxprom344
-  %13 = load i32* %arrayidx345, align 4, !tbaa !0
+  %13 = load i32* %arrayidx345, align 4
   %add346 = add nsw i32 %add332, %13
   %incdec.ptr348 = getelementptr inbounds i16* %refptr.11590, i64 5
-  %14 = load i16* null, align 2, !tbaa !3
+  %14 = load i16* null, align 2
   %conv349 = zext i16 %14 to i32
   %mul350 = mul nsw i32 %conv349, %2
   %add351 = add nsw i32 %mul350, %3
@@ -126,15 +126,15 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %cmp.i4.i1499 = icmp slt i32 %cond.i.i1498, %1
   %cond.i5.i1500 = select i1 %cmp.i4.i1499, i32 %cond.i.i1498, i32 %1
   %incdec.ptr355 = getelementptr inbounds i16* %srcptr.41591, i64 5
-  %15 = load i16* %incdec.ptr341, align 2, !tbaa !3
+  %15 = load i16* %incdec.ptr341, align 2
   %conv356 = zext i16 %15 to i32
   %sub357 = sub nsw i32 %cond.i5.i1500, %conv356
   %idxprom358 = sext i32 %sub357 to i64
   %arrayidx359 = getelementptr inbounds i32* %cond, i64 %idxprom358
-  %16 = load i32* %arrayidx359, align 4, !tbaa !0
+  %16 = load i32* %arrayidx359, align 4
   %add360 = add nsw i32 %16, %LineSadBlk1.01587
   %incdec.ptr362 = getelementptr inbounds i16* %refptr.11590, i64 6
-  %17 = load i16* %incdec.ptr348, align 2, !tbaa !3
+  %17 = load i16* %incdec.ptr348, align 2
   %conv363 = zext i16 %17 to i32
   %mul364 = mul nsw i32 %conv363, %2
   %add365 = add nsw i32 %mul364, %3
@@ -145,15 +145,15 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %cmp.i4.i1495 = icmp slt i32 %cond.i.i1494, %1
   %cond.i5.i1496 = select i1 %cmp.i4.i1495, i32 %cond.i.i1494, i32 %1
   %incdec.ptr369 = getelementptr inbounds i16* %srcptr.41591, i64 6
-  %18 = load i16* %incdec.ptr355, align 2, !tbaa !3
+  %18 = load i16* %incdec.ptr355, align 2
   %conv370 = zext i16 %18 to i32
   %sub371 = sub nsw i32 %cond.i5.i1496, %conv370
   %idxprom372 = sext i32 %sub371 to i64
   %arrayidx373 = getelementptr inbounds i32* %cond, i64 %idxprom372
-  %19 = load i32* %arrayidx373, align 4, !tbaa !0
+  %19 = load i32* %arrayidx373, align 4
   %add374 = add nsw i32 %add360, %19
   %incdec.ptr376 = getelementptr inbounds i16* %refptr.11590, i64 7
-  %20 = load i16* %incdec.ptr362, align 2, !tbaa !3
+  %20 = load i16* %incdec.ptr362, align 2
   %conv377 = zext i16 %20 to i32
   %mul378 = mul nsw i32 %conv377, %2
   %add379 = add nsw i32 %mul378, %3
@@ -164,14 +164,14 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %cmp.i4.i1491 = icmp slt i32 %cond.i.i1490, %1
   %cond.i5.i1492 = select i1 %cmp.i4.i1491, i32 %cond.i.i1490, i32 %1
   %incdec.ptr383 = getelementptr inbounds i16* %srcptr.41591, i64 7
-  %21 = load i16* %incdec.ptr369, align 2, !tbaa !3
+  %21 = load i16* %incdec.ptr369, align 2
   %conv384 = zext i16 %21 to i32
   %sub385 = sub nsw i32 %cond.i5.i1492, %conv384
   %idxprom386 = sext i32 %sub385 to i64
   %arrayidx387 = getelementptr inbounds i32* %cond, i64 %idxprom386
-  %22 = load i32* %arrayidx387, align 4, !tbaa !0
+  %22 = load i32* %arrayidx387, align 4
   %add388 = add nsw i32 %add374, %22
-  %23 = load i16* %incdec.ptr376, align 2, !tbaa !3
+  %23 = load i16* %incdec.ptr376, align 2
   %conv391 = zext i16 %23 to i32
   %mul392 = mul nsw i32 %conv391, %2
   %add395 = add nsw i32 0, %5
@@ -180,25 +180,25 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %cmp.i4.i1487 = icmp slt i32 %cond.i.i1486, %1
   %cond.i5.i1488 = select i1 %cmp.i4.i1487, i32 %cond.i.i1486, i32 %1
   %incdec.ptr397 = getelementptr inbounds i16* %srcptr.41591, i64 8
-  %24 = load i16* %incdec.ptr383, align 2, !tbaa !3
+  %24 = load i16* %incdec.ptr383, align 2
   %conv398 = zext i16 %24 to i32
   %sub399 = sub nsw i32 %cond.i5.i1488, %conv398
   %idxprom400 = sext i32 %sub399 to i64
   %arrayidx401 = getelementptr inbounds i32* %cond, i64 %idxprom400
-  %25 = load i32* %arrayidx401, align 4, !tbaa !0
+  %25 = load i32* %arrayidx401, align 4
   %add402 = add nsw i32 %add388, %25
   %incdec.ptr404 = getelementptr inbounds i16* %refptr.11590, i64 9
   %cmp.i4.i1483 = icmp slt i32 undef, %1
   %cond.i5.i1484 = select i1 %cmp.i4.i1483, i32 undef, i32 %1
-  %26 = load i16* %incdec.ptr397, align 2, !tbaa !3
+  %26 = load i16* %incdec.ptr397, align 2
   %conv412 = zext i16 %26 to i32
   %sub413 = sub nsw i32 %cond.i5.i1484, %conv412
   %idxprom414 = sext i32 %sub413 to i64
   %arrayidx415 = getelementptr inbounds i32* %cond, i64 %idxprom414
-  %27 = load i32* %arrayidx415, align 4, !tbaa !0
+  %27 = load i32* %arrayidx415, align 4
   %add416 = add nsw i32 %27, %LineSadBlk2.01585
   %incdec.ptr418 = getelementptr inbounds i16* %refptr.11590, i64 10
-  %28 = load i16* %incdec.ptr404, align 2, !tbaa !3
+  %28 = load i16* %incdec.ptr404, align 2
   %conv419 = zext i16 %28 to i32
   %mul420 = mul nsw i32 %conv419, %2
   %add421 = add nsw i32 %mul420, %3
@@ -212,10 +212,10 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %sub427 = sub nsw i32 %cond.i5.i1480, 0
   %idxprom428 = sext i32 %sub427 to i64
   %arrayidx429 = getelementptr inbounds i32* %cond, i64 %idxprom428
-  %29 = load i32* %arrayidx429, align 4, !tbaa !0
+  %29 = load i32* %arrayidx429, align 4
   %add430 = add nsw i32 %add416, %29
   %incdec.ptr432 = getelementptr inbounds i16* %refptr.11590, i64 11
-  %30 = load i16* %incdec.ptr418, align 2, !tbaa !3
+  %30 = load i16* %incdec.ptr418, align 2
   %conv433 = zext i16 %30 to i32
   %mul434 = mul nsw i32 %conv433, %2
   %add435 = add nsw i32 %mul434, %3
@@ -225,15 +225,15 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %cond.i.i1474 = select i1 %cmp.i.i1473, i32 %add437, i32 0
   %cmp.i4.i1475 = icmp slt i32 %cond.i.i1474, %1
   %cond.i5.i1476 = select i1 %cmp.i4.i1475, i32 %cond.i.i1474, i32 %1
-  %31 = load i16* %incdec.ptr425, align 2, !tbaa !3
+  %31 = load i16* %incdec.ptr425, align 2
   %conv440 = zext i16 %31 to i32
   %sub441 = sub nsw i32 %cond.i5.i1476, %conv440
   %idxprom442 = sext i32 %sub441 to i64
   %arrayidx443 = getelementptr inbounds i32* %cond, i64 %idxprom442
-  %32 = load i32* %arrayidx443, align 4, !tbaa !0
+  %32 = load i32* %arrayidx443, align 4
   %add444 = add nsw i32 %add430, %32
   %incdec.ptr446 = getelementptr inbounds i16* %refptr.11590, i64 12
-  %33 = load i16* %incdec.ptr432, align 2, !tbaa !3
+  %33 = load i16* %incdec.ptr432, align 2
   %conv447 = zext i16 %33 to i32
   %mul448 = mul nsw i32 %conv447, %2
   %add449 = add nsw i32 %mul448, %3
@@ -244,15 +244,15 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %cmp.i4.i1471 = icmp slt i32 %cond.i.i1470, %1
   %cond.i5.i1472 = select i1 %cmp.i4.i1471, i32 %cond.i.i1470, i32 %1
   %incdec.ptr453 = getelementptr inbounds i16* %srcptr.41591, i64 12
-  %34 = load i16* undef, align 2, !tbaa !3
+  %34 = load i16* undef, align 2
   %conv454 = zext i16 %34 to i32
   %sub455 = sub nsw i32 %cond.i5.i1472, %conv454
   %idxprom456 = sext i32 %sub455 to i64
   %arrayidx457 = getelementptr inbounds i32* %cond, i64 %idxprom456
-  %35 = load i32* %arrayidx457, align 4, !tbaa !0
+  %35 = load i32* %arrayidx457, align 4
   %add458 = add nsw i32 %add444, %35
   %incdec.ptr460 = getelementptr inbounds i16* %refptr.11590, i64 13
-  %36 = load i16* %incdec.ptr446, align 2, !tbaa !3
+  %36 = load i16* %incdec.ptr446, align 2
   %conv461 = zext i16 %36 to i32
   %mul462 = mul nsw i32 %conv461, %2
   %add463 = add nsw i32 %mul462, %3
@@ -263,12 +263,12 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %cmp.i4.i1467 = icmp slt i32 %cond.i.i1466, %1
   %cond.i5.i1468 = select i1 %cmp.i4.i1467, i32 %cond.i.i1466, i32 %1
   %incdec.ptr467 = getelementptr inbounds i16* %srcptr.41591, i64 13
-  %37 = load i16* %incdec.ptr453, align 2, !tbaa !3
+  %37 = load i16* %incdec.ptr453, align 2
   %conv468 = zext i16 %37 to i32
   %sub469 = sub nsw i32 %cond.i5.i1468, %conv468
   %idxprom470 = sext i32 %sub469 to i64
   %arrayidx471 = getelementptr inbounds i32* %cond, i64 %idxprom470
-  %38 = load i32* %arrayidx471, align 4, !tbaa !0
+  %38 = load i32* %arrayidx471, align 4
   %add472 = add nsw i32 %38, %LineSadBlk3.01586
   %incdec.ptr474 = getelementptr inbounds i16* %refptr.11590, i64 14
   %add477 = add nsw i32 0, %3
@@ -279,15 +279,15 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %cmp.i4.i1463 = icmp slt i32 %cond.i.i1462, %1
   %cond.i5.i1464 = select i1 %cmp.i4.i1463, i32 %cond.i.i1462, i32 %1
   %incdec.ptr481 = getelementptr inbounds i16* %srcptr.41591, i64 14
-  %39 = load i16* %incdec.ptr467, align 2, !tbaa !3
+  %39 = load i16* %incdec.ptr467, align 2
   %conv482 = zext i16 %39 to i32
   %sub483 = sub nsw i32 %cond.i5.i1464, %conv482
   %idxprom484 = sext i32 %sub483 to i64
   %arrayidx485 = getelementptr inbounds i32* %cond, i64 %idxprom484
-  %40 = load i32* %arrayidx485, align 4, !tbaa !0
+  %40 = load i32* %arrayidx485, align 4
   %add486 = add nsw i32 %add472, %40
   %incdec.ptr488 = getelementptr inbounds i16* %refptr.11590, i64 15
-  %41 = load i16* %incdec.ptr474, align 2, !tbaa !3
+  %41 = load i16* %incdec.ptr474, align 2
   %conv489 = zext i16 %41 to i32
   %mul490 = mul nsw i32 %conv489, %2
   %add491 = add nsw i32 %mul490, %3
@@ -298,14 +298,14 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %cmp.i4.i1459 = icmp slt i32 %cond.i.i1458, %1
   %cond.i5.i1460 = select i1 %cmp.i4.i1459, i32 %cond.i.i1458, i32 %1
   %incdec.ptr495 = getelementptr inbounds i16* %srcptr.41591, i64 15
-  %42 = load i16* %incdec.ptr481, align 2, !tbaa !3
+  %42 = load i16* %incdec.ptr481, align 2
   %conv496 = zext i16 %42 to i32
   %sub497 = sub nsw i32 %cond.i5.i1460, %conv496
   %idxprom498 = sext i32 %sub497 to i64
   %arrayidx499 = getelementptr inbounds i32* %cond, i64 %idxprom498
-  %43 = load i32* %arrayidx499, align 4, !tbaa !0
+  %43 = load i32* %arrayidx499, align 4
   %add500 = add nsw i32 %add486, %43
-  %44 = load i16* %incdec.ptr488, align 2, !tbaa !3
+  %44 = load i16* %incdec.ptr488, align 2
   %conv503 = zext i16 %44 to i32
   %mul504 = mul nsw i32 %conv503, %2
   %add505 = add nsw i32 %mul504, %3
@@ -315,22 +315,22 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %cond.i.i1454 = select i1 %cmp.i.i1453, i32 %add507, i32 0
   %cmp.i4.i1455 = icmp slt i32 %cond.i.i1454, %1
   %cond.i5.i1456 = select i1 %cmp.i4.i1455, i32 %cond.i.i1454, i32 %1
-  %45 = load i16* %incdec.ptr495, align 2, !tbaa !3
+  %45 = load i16* %incdec.ptr495, align 2
   %conv510 = zext i16 %45 to i32
   %sub511 = sub nsw i32 %cond.i5.i1456, %conv510
   %idxprom512 = sext i32 %sub511 to i64
   %arrayidx513 = getelementptr inbounds i32* %cond, i64 %idxprom512
-  %46 = load i32* %arrayidx513, align 4, !tbaa !0
+  %46 = load i32* %arrayidx513, align 4
   %add514 = add nsw i32 %add500, %46
   %add.ptr517 = getelementptr inbounds i16* %refptr.11590, i64 %incdec.ptr502.sum
   %exitcond1692 = icmp eq i32 undef, 4
   br i1 %exitcond1692, label %for.end520, label %for.body293
 
 for.end520:                                       ; preds = %for.body293
-  store i32 %add346, i32* undef, align 4, !tbaa !0
-  store i32 %add402, i32* undef, align 4, !tbaa !0
-  store i32 %add458, i32* undef, align 4, !tbaa !0
-  store i32 %add514, i32* null, align 4, !tbaa !0
+  store i32 %add346, i32* undef, align 4
+  store i32 %add402, i32* undef, align 4
+  store i32 %add458, i32* undef, align 4
+  store i32 %add514, i32* null, align 4
   br i1 undef, label %for.end543, label %for.cond290.preheader
 
 for.end543:                                       ; preds = %for.end520
@@ -400,10 +400,5 @@ for.end999:                                       ; preds = %for.inc997
   ret void
 }
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/PowerPC/ctr-cleanup.ll b/test/CodeGen/PowerPC/ctr-cleanup.ll
index 04e4ffb..1a669eb 100644
--- a/test/CodeGen/PowerPC/ctr-cleanup.ll
+++ b/test/CodeGen/PowerPC/ctr-cleanup.ll
@@ -22,4 +22,4 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/ctrloop-cpsgn.ll b/test/CodeGen/PowerPC/ctrloop-cpsgn.ll
new file mode 100644
index 0000000..2f04409
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctrloop-cpsgn.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mcpu=ppc | FileCheck %s
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc-unknown-linux-gnu"
+
+define ppc_fp128 @foo(ppc_fp128* nocapture %n, ppc_fp128 %d) nounwind readonly {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %x.05 = phi ppc_fp128 [ %d, %entry ], [ %conv, %for.body ]
+  %arrayidx = getelementptr inbounds ppc_fp128* %n, i32 %i.06
+  %0 = load ppc_fp128* %arrayidx, align 8
+  %conv = tail call ppc_fp128 @copysignl(ppc_fp128 %x.05, ppc_fp128 %d) nounwind readonly
+  %inc = add nsw i32 %i.06, 1
+  %exitcond = icmp eq i32 %inc, 2048
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret ppc_fp128 %conv
+}
+
+declare ppc_fp128 @copysignl(ppc_fp128, ppc_fp128) #0
+
+; CHECK: @foo
+; CHECK-NOT: mtctr
+
diff --git a/test/CodeGen/PowerPC/ctrloop-le.ll b/test/CodeGen/PowerPC/ctrloop-le.ll
index 21a6fab..7b8185e 100644
--- a/test/CodeGen/PowerPC/ctrloop-le.ll
+++ b/test/CodeGen/PowerPC/ctrloop-le.ll
@@ -32,8 +32,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 
 ; CHECK: test_pos2_ir_sle
-; FIXME: Support this loop!
-; CHECK-NOT: bdnz
+; CHECK: bdnz
 ; a < b
 define void @test_pos2_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
 entry:
@@ -62,8 +61,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 
 ; CHECK: test_pos4_ir_sle
-; FIXME: Support this loop!
-; CHECK-NOT: bdnz
+; CHECK: bdnz
 ; a < b
 define void @test_pos4_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
 entry:
@@ -92,8 +90,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 
 ; CHECK: test_pos8_ir_sle
-; FIXME: Support this loop!
-; CHECK-NOT: bdnz
+; CHECK: bdnz
 ; a < b
 define void @test_pos8_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
 entry:
@@ -122,8 +119,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 
 ; CHECK: test_pos16_ir_sle
-; FIXME: Support this loop!
-; CHECK-NOT: bdnz
+; CHECK: bdnz
 ; a < b
 define void @test_pos16_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
 entry:
@@ -443,4 +439,3 @@ for.body:                                         ; preds = %for.body.lr.ph, %fo
 for.end:                                          ; preds = %for.body, %entry
   ret void
 }
-
diff --git a/test/CodeGen/PowerPC/ctrloop-lt.ll b/test/CodeGen/PowerPC/ctrloop-lt.ll
index 448716d..eaab61a 100644
--- a/test/CodeGen/PowerPC/ctrloop-lt.ll
+++ b/test/CodeGen/PowerPC/ctrloop-lt.ll
@@ -33,7 +33,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 ; CHECK: test_pos2_ir_slt
 ; FIXME: Support this loop!
-; CHECK-NOT: bdnz
+; CHECK: bdnz
 ; a < b
 define void @test_pos2_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
 entry:
@@ -63,7 +63,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 ; CHECK: test_pos4_ir_slt
 ; FIXME: Support this loop!
-; CHECK-NOT: bdnz
+; CHECK: bdnz
 ; a < b
 define void @test_pos4_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
 entry:
@@ -92,8 +92,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 
 ; CHECK: test_pos8_ir_slt
-; FIXME: Support this loop!
-; CHECK-NOT: bdnz
+; CHECK: bdnz
 ; a < b
 define void @test_pos8_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
 entry:
@@ -122,8 +121,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 
 ; CHECK: test_pos16_ir_slt
-; FIXME: Support this loop!
-; CHECK-NOT: bdnz
+; CHECK: bdnz
 ; a < b
 define void @test_pos16_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
 entry:
@@ -326,8 +324,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 
 ; CHECK: test_pos2_rr_slt
-; FIXME: Support this loop!
-; CHECK-NOT: bdnz
+; CHECK: bdnz
 ; a < b
 define void @test_pos2_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
 entry:
@@ -356,8 +353,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 
 ; CHECK: test_pos4_rr_slt
-; FIXME: Support this loop!
-; CHECK-NOT: bdnz
+; CHECK: bdnz
 ; a < b
 define void @test_pos4_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
 entry:
@@ -386,8 +382,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 
 ; CHECK: test_pos8_rr_slt
-; FIXME: Support this loop!
-; CHECK-NOT: bdnz
+; CHECK: bdnz
 ; a < b
 define void @test_pos8_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
 entry:
@@ -416,8 +411,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 
 ; CHECK: test_pos16_rr_slt
-; FIXME: Support this loop!
-; CHECK-NOT: bdnz
+; CHECK: bdnz
 ; a < b
 define void @test_pos16_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
 entry:
@@ -442,4 +436,3 @@ for.body:                                         ; preds = %for.body.lr.ph, %fo
 for.end:                                          ; preds = %for.body, %entry
   ret void
 }
-
diff --git a/test/CodeGen/PowerPC/dbg.ll b/test/CodeGen/PowerPC/dbg.ll
index 30fe19e..cb93dec 100644
--- a/test/CodeGen/PowerPC/dbg.ll
+++ b/test/CodeGen/PowerPC/dbg.ll
@@ -15,13 +15,14 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!22}
 
 !0 = metadata !{i32 720913, metadata !21, i32 12, metadata !"clang version 3.1", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !"", metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 720942, metadata !21, null, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !13, i32 0} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !21} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 720917, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !9, metadata !10}
 !9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 720911, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
@@ -36,3 +37,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !19 = metadata !{i32 2, i32 3, metadata !20, null}
 !20 = metadata !{i32 720907, metadata !21, metadata !5, i32 1, i32 34, i32 0} ; [ DW_TAG_lexical_block ]
 !21 = metadata !{metadata !"dbg.c", metadata !"/src"}
+!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/PowerPC/dyn-alloca-aligned.ll b/test/CodeGen/PowerPC/dyn-alloca-aligned.ll
index a18ada7..a5d45b8 100644
--- a/test/CodeGen/PowerPC/dyn-alloca-aligned.ll
+++ b/test/CodeGen/PowerPC/dyn-alloca-aligned.ll
@@ -12,12 +12,12 @@ entry:
   %vla = alloca i32, i64 %0, align 128
   %vla1 = alloca i32, i64 %0, align 128
   %a2 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
-  %1 = load i32* %a2, align 4, !tbaa !0
-  store i32 %1, i32* %vla1, align 128, !tbaa !0
+  %1 = load i32* %a2, align 4
+  store i32 %1, i32* %vla1, align 128
   %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
-  %2 = load i32* %b, align 4, !tbaa !0
+  %2 = load i32* %b, align 4
   %arrayidx3 = getelementptr inbounds i32* %vla1, i64 1
-  store i32 %2, i32* %arrayidx3, align 4, !tbaa !0
+  store i32 %2, i32* %arrayidx3, align 4
   call void @bar(i32* %vla1, i32* %vla) #0
   ret void
 
@@ -33,7 +33,3 @@ entry:
 }
 
 attributes #0 = { nounwind }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/fast-isel-GEP-coalesce.ll b/test/CodeGen/PowerPC/fast-isel-GEP-coalesce.ll
new file mode 100644
index 0000000..7bdda04
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-GEP-coalesce.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+%struct.A = type { i32, [2 x [2 x i32]], i8, [3 x [3 x [3 x i32]]] }
+%struct.B = type { i32, [2 x [2 x [2 x %struct.A]]] }
+
+@arr = common global [2 x [2 x [2 x [2 x [2 x i32]]]]] zeroinitializer, align 4
+@A = common global [3 x [3 x %struct.A]] zeroinitializer, align 4
+@B = common global [2 x [2 x [2 x %struct.B]]] zeroinitializer, align 4
+
+define i32* @t1() nounwind {
+entry:
+; ELF64: t1
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([2 x [2 x [2 x [2 x [2 x i32]]]]]* @arr, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1), i32** %addr, align 4
+; ELF64: addi {{[0-9]+}}, {{[0-9]+}}, 124
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
+
+define i32* @t2() nounwind {
+entry:
+; ELF64: t2
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 2, i32 2, i32 3, i32 1, i32 2, i32 2), i32** %addr, align 4
+; ELF64: addi {{[0-9]+}}, {{[0-9]+}}, 1148
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
+
+define i32* @t3() nounwind {
+entry:
+; ELF64: t3
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 0, i32 1, i32 1, i32 0, i32 1), i32** %addr, align 4
+; ELF64: addi {{[0-9]+}}, {{[0-9]+}}, 140
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
+
+define i32* @t4() nounwind {
+entry:
+; ELF64: t4
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([2 x [2 x [2 x %struct.B]]]* @B, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 3, i32 1, i32 2, i32 1), i32** %addr, align 4
+; ELF64: addi {{[0-9]+}}, {{[0-9]+}}, 1284
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-binary.ll b/test/CodeGen/PowerPC/fast-isel-binary.ll
new file mode 100644
index 0000000..43a6cd0
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-binary.ll
@@ -0,0 +1,137 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+; Test add with non-legal types
+
+define void @add_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ELF64: add_i8
+  %a.addr = alloca i8, align 4
+  %0 = add i8 %a, %b
+; ELF64: add
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @add_i8_imm(i8 %a) nounwind ssp {
+entry:
+; ELF64: add_i8_imm
+  %a.addr = alloca i8, align 4
+  %0 = add i8 %a, 22;
+; ELF64: addi
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @add_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ELF64: add_i16
+  %a.addr = alloca i16, align 4
+  %0 = add i16 %a, %b
+; ELF64: add
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+define void @add_i16_imm(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ELF64: add_i16_imm
+  %a.addr = alloca i16, align 4
+  %0 = add i16 %a, 243;
+; ELF64: addi
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+; Test or with non-legal types
+
+define void @or_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ELF64: or_i8
+  %a.addr = alloca i8, align 4
+  %0 = or i8 %a, %b
+; ELF64: or
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @or_i8_imm(i8 %a) nounwind ssp {
+entry:
+; ELF64: or_i8_imm
+  %a.addr = alloca i8, align 4
+  %0 = or i8 %a, -13;
+; ELF64: ori
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @or_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ELF64: or_i16
+  %a.addr = alloca i16, align 4
+  %0 = or i16 %a, %b
+; ELF64: or
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+define void @or_i16_imm(i16 %a) nounwind ssp {
+entry:
+; ELF64: or_i16_imm
+  %a.addr = alloca i16, align 4
+  %0 = or i16 %a, 273;
+; ELF64: ori
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+; Test sub with non-legal types
+
+define void @sub_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ELF64: sub_i8
+  %a.addr = alloca i8, align 4
+  %0 = sub i8 %a, %b
+; ELF64: subf
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @sub_i8_imm(i8 %a) nounwind ssp {
+entry:
+; ELF64: sub_i8_imm
+  %a.addr = alloca i8, align 4
+  %0 = sub i8 %a, 22;
+; ELF64: addi
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @sub_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ELF64: sub_i16
+  %a.addr = alloca i16, align 4
+  %0 = sub i16 %a, %b
+; ELF64: subf
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+define void @sub_i16_imm(i16 %a) nounwind ssp {
+entry:
+; ELF64: sub_i16_imm
+  %a.addr = alloca i16, align 4
+  %0 = sub i16 %a, 247;
+; ELF64: addi
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+define void @sub_i16_badimm(i16 %a) nounwind ssp {
+entry:
+; ELF64: sub_i16_imm
+  %a.addr = alloca i16, align 4
+  %0 = sub i16 %a, -32768;
+; ELF64: subf
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-br-const.ll b/test/CodeGen/PowerPC/fast-isel-br-const.ll
new file mode 100644
index 0000000..2cfb8a2
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-br-const.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+define i32 @t1(i32 %a, i32 %b) nounwind uwtable ssp {
+entry:
+; ELF64: t1
+  %x = add i32 %a, %b  
+  br i1 1, label %if.then, label %if.else
+; ELF64-NOT: b {{\.?}}LBB0_1
+
+if.then:                                          ; preds = %entry
+  call void @foo1()
+  br label %if.end7
+
+if.else:                                          ; preds = %entry
+  br i1 0, label %if.then2, label %if.else3
+; ELF64: b {{\.?}}LBB0_4
+
+if.then2:                                         ; preds = %if.else
+  call void @foo2()
+  br label %if.end6
+
+if.else3:                                         ; preds = %if.else
+  %y = sub i32 %a, %b
+  br i1 1, label %if.then5, label %if.end
+; ELF64-NOT: b {{\.?}}LBB0_5
+
+if.then5:                                         ; preds = %if.else3
+  call void @foo1()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then5, %if.else3
+  br label %if.end6
+
+if.end6:                                          ; preds = %if.end, %if.then2
+  br label %if.end7
+
+if.end7:                                          ; preds = %if.end6, %if.then
+  ret i32 0
+}
+
+declare void @foo1()
+
+declare void @foo2()
diff --git a/test/CodeGen/PowerPC/fast-isel-call.ll b/test/CodeGen/PowerPC/fast-isel-call.ll
new file mode 100644
index 0000000..33a8ba9
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-call.ll
@@ -0,0 +1,132 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+define i32 @t1(i8 signext %a) nounwind {
+  %1 = sext i8 %a to i32
+  ret i32 %1
+}
+
+define i32 @t2(i8 zeroext %a) nounwind {
+  %1 = zext i8 %a to i32
+  ret i32 %1
+}
+
+define i32 @t3(i16 signext %a) nounwind {
+  %1 = sext i16 %a to i32
+  ret i32 %1
+}
+
+define i32 @t4(i16 zeroext %a) nounwind {
+  %1 = zext i16 %a to i32
+  ret i32 %1
+}
+
+define void @foo(i8 %a, i16 %b) nounwind {
+; ELF64: foo
+  %1 = call i32 @t1(i8 signext %a)
+; ELF64: extsb
+  %2 = call i32 @t2(i8 zeroext %a)
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
+  %3 = call i32 @t3(i16 signext %b)
+; ELF64: extsh
+  %4 = call i32 @t4(i16 zeroext %b)
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
+
+;; A few test to check materialization
+  %5 = call i32 @t2(i8 zeroext 255)
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
+  %6 = call i32 @t4(i16 zeroext 65535)
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
+  ret void
+}
+
+define void @foo2() nounwind {
+  %1 = call signext i16 @t5()
+  %2 = call zeroext i16 @t6()
+  %3 = call signext i8 @t7()
+  %4 = call zeroext i8 @t8()
+  ret void
+}
+
+declare signext i16 @t5();
+declare zeroext i16 @t6();
+declare signext i8 @t7();
+declare zeroext i8 @t8();
+
+define i32 @t10(i32 %argc, i8** nocapture %argv) {
+entry:
+; ELF64: t10
+  %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70)
+; ELF64: li 3, 0
+; ELF64: li 4, 248
+; ELF64: li 5, 187
+; ELF64: li 6, 28
+; ELF64: li 7, 40
+; ELF64: li 8, 186
+; ELF64: rldicl 3, 3, 0, 56
+; ELF64: rldicl 4, 4, 0, 56
+; ELF64: rldicl 5, 5, 0, 56
+; ELF64: rldicl 6, 6, 0, 56
+; ELF64: rldicl 7, 7, 0, 56
+; ELF64: rldicl 8, 8, 0, 56
+  ret i32 0
+}
+
+declare i32 @bar(i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext)
+
+define i32 @bar0(i32 %i) nounwind {
+  ret i32 0
+}
+
+; Function pointers are not yet implemented.
+;define void @foo3() uwtable {
+;  %fptr = alloca i32 (i32)*, align 8
+;  store i32 (i32)* @bar0, i32 (i32)** %fptr, align 8
+;  %1 = load i32 (i32)** %fptr, align 8
+;  %call = call i32 %1(i32 0)
+;  ret void
+;}
+
+; Intrinsic calls not yet implemented, and udiv isn't one for PPC anyway.
+;define i32 @LibCall(i32 %a, i32 %b) {
+;entry:
+;        %tmp1 = udiv i32 %a, %b         ; <i32> [#uses=1]
+;        ret i32 %tmp1
+;}
+
+declare void @float_foo(float %f) ssp
+
+define void @float_const() ssp {
+entry:
+; ELF64: float_const
+  call void @float_foo(float 0x401C666660000000)
+; ELF64: addis [[REG:[0-9]+]], 2, .LCPI[[SUF:[0-9_]+]]@toc@ha
+; ELF64: lfs 1, .LCPI[[SUF]]@toc@l([[REG]])
+  ret void
+}
+
+define void @float_reg(float %dummy, float %f) ssp {
+entry:
+; ELF64: float_reg
+  call void @float_foo(float %f)
+; ELF64: fmr 1, 2
+  ret void
+}
+
+declare void @double_foo(double %d) ssp
+
+define void @double_const() ssp {
+entry:
+; ELF64: double_const
+  call void @double_foo(double 0x1397723CCABD0000401C666660000000)
+; ELF64: addis [[REG2:[0-9]+]], 2, .LCPI[[SUF2:[0-9_]+]]@toc@ha
+; ELF64: lfd 1, .LCPI[[SUF2]]@toc@l([[REG2]])
+  ret void
+}
+
+define void @double_reg(double %dummy, double %d) ssp {
+entry:
+; ELF64: double_reg
+  call void @double_foo(double %d)
+; ELF64: fmr 1, 2
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll b/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll
new file mode 100644
index 0000000..33f7a79
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll
@@ -0,0 +1,289 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+define void @t1a(float %a) uwtable ssp {
+entry:
+; ELF64: t1a
+  %cmp = fcmp oeq float %a, 0.000000e+00
+; ELF64: addis
+; ELF64: lfs
+; ELF64: fcmpu
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+declare void @foo()
+
+define void @t1b(float %a) uwtable ssp {
+entry:
+; ELF64: t1b
+  %cmp = fcmp oeq float %a, -0.000000e+00
+; ELF64: addis
+; ELF64: lfs
+; ELF64: fcmpu
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t2a(double %a) uwtable ssp {
+entry:
+; ELF64: t2a
+  %cmp = fcmp oeq double %a, 0.000000e+00
+; ELF64: addis
+; ELF64: lfd
+; ELF64: fcmpu
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t2b(double %a) uwtable ssp {
+entry:
+; ELF64: t2b
+  %cmp = fcmp oeq double %a, -0.000000e+00
+; ELF64: addis
+; ELF64: lfd
+; ELF64: fcmpu
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t4(i8 signext %a) uwtable ssp {
+entry:
+; ELF64: t4
+  %cmp = icmp eq i8 %a, -1
+; ELF64: extsb
+; ELF64: cmpwi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t5(i8 zeroext %a) uwtable ssp {
+entry:
+; ELF64: t5
+  %cmp = icmp eq i8 %a, 1
+; ELF64: extsb
+; ELF64: cmpwi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t6(i16 signext %a) uwtable ssp {
+entry:
+; ELF64: t6
+  %cmp = icmp eq i16 %a, -1
+; ELF64: extsh
+; ELF64: cmpwi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t7(i16 zeroext %a) uwtable ssp {
+entry:
+; ELF64: t7
+  %cmp = icmp eq i16 %a, 1
+; ELF64: extsh
+; ELF64: cmpwi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t8(i32 %a) uwtable ssp {
+entry:
+; ELF64: t8
+  %cmp = icmp eq i32 %a, -1
+; ELF64: cmpwi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t9(i32 %a) uwtable ssp {
+entry:
+; ELF64: t9
+  %cmp = icmp eq i32 %a, 1
+; ELF64: cmpwi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t10(i32 %a) uwtable ssp {
+entry:
+; ELF64: t10
+  %cmp = icmp eq i32 %a, 384
+; ELF64: cmpwi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t11(i32 %a) uwtable ssp {
+entry:
+; ELF64: t11
+  %cmp = icmp eq i32 %a, 4096
+; ELF64: cmpwi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t12(i8 %a) uwtable ssp {
+entry:
+; ELF64: t12
+  %cmp = icmp ugt i8 %a, -113
+; ELF64: rlwinm
+; ELF64: cmplwi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t13() nounwind ssp {
+entry:
+; ELF64: t13
+  %cmp = icmp slt i32 -123, -2147483648
+; ELF64: li
+; ELF64: lis
+; ELF64: cmpw
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  ret void
+}
+
+define void @t14(i64 %a) uwtable ssp {
+entry:
+; ELF64: t14
+  %cmp = icmp eq i64 %a, -1
+; ELF64: cmpdi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t15(i64 %a) uwtable ssp {
+entry:
+; ELF64: t15
+  %cmp = icmp eq i64 %a, 1
+; ELF64: cmpdi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t16(i64 %a) uwtable ssp {
+entry:
+; ELF64: t16
+  %cmp = icmp eq i64 %a, 384
+; ELF64: cmpdi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t17(i64 %a) uwtable ssp {
+entry:
+; ELF64: t17
+  %cmp = icmp eq i64 %a, 32768
+; Extra operand so we don't match on cmpdi.
+; ELF64: cmpd {{[0-9]+}}
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
diff --git a/test/CodeGen/PowerPC/fast-isel-conversion.ll b/test/CodeGen/PowerPC/fast-isel-conversion.ll
new file mode 100644
index 0000000..a31c312
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-conversion.ll
@@ -0,0 +1,305 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+; Test sitofp
+
+define void @sitofp_single_i64(i64 %a, float %b) nounwind ssp {
+entry:
+; ELF64: sitofp_single_i64
+  %b.addr = alloca float, align 4
+  %conv = sitofp i64 %a to float
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfids
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @sitofp_single_i32(i32 %a, float %b) nounwind ssp {
+entry:
+; ELF64: sitofp_single_i32
+  %b.addr = alloca float, align 4
+  %conv = sitofp i32 %a to float
+; ELF64: std
+; ELF64: lfiwax
+; ELF64: fcfids
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @sitofp_single_i16(i16 %a, float %b) nounwind ssp {
+entry:
+; ELF64: sitofp_single_i16
+  %b.addr = alloca float, align 4
+  %conv = sitofp i16 %a to float
+; ELF64: extsh
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfids
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @sitofp_single_i8(i8 %a) nounwind ssp {
+entry:
+; ELF64: sitofp_single_i8
+  %b.addr = alloca float, align 4
+  %conv = sitofp i8 %a to float
+; ELF64: extsb
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfids
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp {
+entry:
+; ELF64: sitofp_double_i32
+  %b.addr = alloca double, align 8
+  %conv = sitofp i32 %a to double
+; ELF64: std
+; ELF64: lfiwax
+; ELF64: fcfid
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @sitofp_double_i64(i64 %a, double %b) nounwind ssp {
+entry:
+; ELF64: sitofp_double_i64
+  %b.addr = alloca double, align 8
+  %conv = sitofp i64 %a to double
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfid
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp {
+entry:
+; ELF64: sitofp_double_i16
+  %b.addr = alloca double, align 8
+  %conv = sitofp i16 %a to double
+; ELF64: extsh
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfid
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp {
+entry:
+; ELF64: sitofp_double_i8
+  %b.addr = alloca double, align 8
+  %conv = sitofp i8 %a to double
+; ELF64: extsb
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfid
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+; Test uitofp
+
+define void @uitofp_single_i64(i64 %a, float %b) nounwind ssp {
+entry:
+; ELF64: uitofp_single_i64
+  %b.addr = alloca float, align 4
+  %conv = uitofp i64 %a to float
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfidus
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp {
+entry:
+; ELF64: uitofp_single_i32
+  %b.addr = alloca float, align 4
+  %conv = uitofp i32 %a to float
+; ELF64: std
+; ELF64: lfiwzx
+; ELF64: fcfidus
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp {
+entry:
+; ELF64: uitofp_single_i16
+  %b.addr = alloca float, align 4
+  %conv = uitofp i16 %a to float
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfidus
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_single_i8(i8 %a) nounwind ssp {
+entry:
+; ELF64: uitofp_single_i8
+  %b.addr = alloca float, align 4
+  %conv = uitofp i8 %a to float
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfidus
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_double_i64(i64 %a, double %b) nounwind ssp {
+entry:
+; ELF64: uitofp_double_i64
+  %b.addr = alloca double, align 8
+  %conv = uitofp i64 %a to double
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfidu
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp {
+entry:
+; ELF64: uitofp_double_i32
+  %b.addr = alloca double, align 8
+  %conv = uitofp i32 %a to double
+; ELF64: std
+; ELF64: lfiwzx
+; ELF64: fcfidu
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp {
+entry:
+; ELF64: uitofp_double_i16
+  %b.addr = alloca double, align 8
+  %conv = uitofp i16 %a to double
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfidu
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp {
+entry:
+; ELF64: uitofp_double_i8
+  %b.addr = alloca double, align 8
+  %conv = uitofp i8 %a to double
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfidu
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+; Test fptosi
+
+define void @fptosi_float_i32(float %a) nounwind ssp {
+entry:
+; ELF64: fptosi_float_i32
+  %b.addr = alloca i32, align 4
+  %conv = fptosi float %a to i32
+; ELF64: fctiwz
+; ELF64: stfd
+; ELF64: lwa
+  store i32 %conv, i32* %b.addr, align 4
+  ret void
+}
+
+define void @fptosi_float_i64(float %a) nounwind ssp {
+entry:
+; ELF64: fptosi_float_i64
+  %b.addr = alloca i64, align 4
+  %conv = fptosi float %a to i64
+; ELF64: fctidz
+; ELF64: stfd
+; ELF64: ld
+  store i64 %conv, i64* %b.addr, align 4
+  ret void
+}
+
+define void @fptosi_double_i32(double %a) nounwind ssp {
+entry:
+; ELF64: fptosi_double_i32
+  %b.addr = alloca i32, align 8
+  %conv = fptosi double %a to i32
+; ELF64: fctiwz
+; ELF64: stfd
+; ELF64: lwa
+  store i32 %conv, i32* %b.addr, align 8
+  ret void
+}
+
+define void @fptosi_double_i64(double %a) nounwind ssp {
+entry:
+; ELF64: fptosi_double_i64
+  %b.addr = alloca i64, align 8
+  %conv = fptosi double %a to i64
+; ELF64: fctidz
+; ELF64: stfd
+; ELF64: ld
+  store i64 %conv, i64* %b.addr, align 8
+  ret void
+}
+
+; Test fptoui
+
+define void @fptoui_float_i32(float %a) nounwind ssp {
+entry:
+; ELF64: fptoui_float_i32
+  %b.addr = alloca i32, align 4
+  %conv = fptoui float %a to i32
+; ELF64: fctiwuz
+; ELF64: stfd
+; ELF64: lwz
+  store i32 %conv, i32* %b.addr, align 4
+  ret void
+}
+
+define void @fptoui_float_i64(float %a) nounwind ssp {
+entry:
+; ELF64: fptoui_float_i64
+  %b.addr = alloca i64, align 4
+  %conv = fptoui float %a to i64
+; ELF64: fctiduz
+; ELF64: stfd
+; ELF64: ld
+  store i64 %conv, i64* %b.addr, align 4
+  ret void
+}
+
+define void @fptoui_double_i32(double %a) nounwind ssp {
+entry:
+; ELF64: fptoui_double_i32
+  %b.addr = alloca i32, align 8
+  %conv = fptoui double %a to i32
+; ELF64: fctiwuz
+; ELF64: stfd
+; ELF64: lwz
+  store i32 %conv, i32* %b.addr, align 8
+  ret void
+}
+
+define void @fptoui_double_i64(double %a) nounwind ssp {
+entry:
+; ELF64: fptoui_double_i64
+  %b.addr = alloca i64, align 8
+  %conv = fptoui double %a to i64
+; ELF64: fctiduz
+; ELF64: stfd
+; ELF64: ld
+  store i64 %conv, i64* %b.addr, align 8
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-crash.ll b/test/CodeGen/PowerPC/fast-isel-crash.ll
new file mode 100644
index 0000000..1813fc9
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-crash.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7
+
+; Ensure this doesn't crash.
+
+%union.anon = type { <16 x i32> }
+
+@__md0 = external global [137 x i8]
+
+define internal void @stretch(<4 x i8> addrspace(1)* %src, <4 x i8> addrspace(1)* %dst, i32 %width, i32 %height, i32 %iLS, i32 %oLS, <2 x float> %c, <4 x float> %param) nounwind {
+entry:
+  ret void
+}
+
+define internal i32 @_Z13get_global_idj(i32 %dim) nounwind ssp {
+entry:
+  ret i32 undef
+}
+
+define void @wrap(i8 addrspace(1)* addrspace(1)* %arglist, i32 addrspace(1)* %gtid) nounwind ssp {
+entry:
+  call void @stretch(<4 x i8> addrspace(1)* undef, <4 x i8> addrspace(1)* undef, i32 undef, i32 undef, i32 undef, i32 undef, <2 x float> undef, <4 x float> undef)
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-ext.ll b/test/CodeGen/PowerPC/fast-isel-ext.ll
new file mode 100644
index 0000000..753305a
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-ext.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+; zext
+
+define i32 @zext_8_32(i8 %a) nounwind ssp {
+; ELF64: zext_8_32
+  %r = zext i8 %a to i32
+; ELF64: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
+  ret i32 %r
+}
+
+define i32 @zext_16_32(i16 %a) nounwind ssp {
+; ELF64: zext_16_32
+  %r = zext i16 %a to i32
+; ELF64: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
+  ret i32 %r
+}
+
+define i64 @zext_8_64(i8 %a) nounwind ssp {
+; ELF64: zext_8_64
+  %r = zext i8 %a to i64
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
+  ret i64 %r
+}
+
+define i64 @zext_16_64(i16 %a) nounwind ssp {
+; ELF64: zext_16_64
+  %r = zext i16 %a to i64
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
+  ret i64 %r
+}
+
+define i64 @zext_32_64(i32 %a) nounwind ssp {
+; ELF64: zext_32_64
+  %r = zext i32 %a to i64
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
+  ret i64 %r
+}
+
+; sext
+
+define i32 @sext_8_32(i8 %a) nounwind ssp {
+; ELF64: sext_8_32
+  %r = sext i8 %a to i32
+; ELF64: extsb
+  ret i32 %r
+}
+
+define i32 @sext_16_32(i16 %a) nounwind ssp {
+; ELF64: sext_16_32
+  %r = sext i16 %a to i32
+; ELF64: extsh
+  ret i32 %r
+}
+
+define i64 @sext_8_64(i8 %a) nounwind ssp {
+; ELF64: sext_8_64
+  %r = sext i8 %a to i64
+; ELF64: extsb
+  ret i64 %r
+}
+
+define i64 @sext_16_64(i16 %a) nounwind ssp {
+; ELF64: sext_16_64
+  %r = sext i16 %a to i64
+; ELF64: extsh
+  ret i64 %r
+}
+
+define i64 @sext_32_64(i32 %a) nounwind ssp {
+; ELF64: sext_32_64
+  %r = sext i32 %a to i64
+; ELF64: extsw
+  ret i64 %r
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-fold.ll b/test/CodeGen/PowerPC/fast-isel-fold.ll
new file mode 100644
index 0000000..4de345f
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-fold.ll
@@ -0,0 +1,129 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+@a = global i8 1, align 1
+@b = global i16 2, align 2
+@c = global i32 4, align 4
+
+define void @t1() nounwind uwtable ssp {
+; ELF64: t1
+  %1 = load i8* @a, align 1
+  call void @foo1(i8 zeroext %1)
+; ELF64: lbz
+; ELF64-NOT: rldicl
+; ELF64-NOT: rlwinm
+  ret void
+}
+
+define void @t2() nounwind uwtable ssp {
+; ELF64: t2
+  %1 = load i16* @b, align 2
+  call void @foo2(i16 zeroext %1)
+; ELF64: lhz
+; ELF64-NOT: rldicl
+; ELF64-NOT: rlwinm
+  ret void
+}
+
+define void @t2a() nounwind uwtable ssp {
+; ELF64: t2a
+  %1 = load i32* @c, align 4
+  call void @foo3(i32 zeroext %1)
+; ELF64: lwz
+; ELF64-NOT: rldicl
+; ELF64-NOT: rlwinm
+  ret void
+}
+
+declare void @foo1(i8 zeroext)
+declare void @foo2(i16 zeroext)
+declare void @foo3(i32 zeroext)
+
+define i32 @t3() nounwind uwtable ssp {
+; ELF64: t3
+  %1 = load i8* @a, align 1
+  %2 = zext i8 %1 to i32
+; ELF64: lbz
+; ELF64-NOT: rlwinm
+  ret i32 %2
+}
+
+define i32 @t4() nounwind uwtable ssp {
+; ELF64: t4
+  %1 = load i16* @b, align 2
+  %2 = zext i16 %1 to i32
+; ELF64: lhz
+; ELF64-NOT: rlwinm
+  ret i32 %2
+}
+
+define i32 @t5() nounwind uwtable ssp {
+; ELF64: t5
+  %1 = load i16* @b, align 2
+  %2 = sext i16 %1 to i32
+; ELF64: lha
+; ELF64-NOT: rlwinm
+  ret i32 %2
+}
+
+define i32 @t6() nounwind uwtable ssp {
+; ELF64: t6
+  %1 = load i8* @a, align 2
+  %2 = sext i8 %1 to i32
+; ELF64: lbz
+; ELF64-NOT: rlwinm
+  ret i32 %2
+}
+
+define i64 @t7() nounwind uwtable ssp {
+; ELF64: t7
+  %1 = load i8* @a, align 1
+  %2 = zext i8 %1 to i64
+; ELF64: lbz
+; ELF64-NOT: rldicl
+  ret i64 %2
+}
+
+define i64 @t8() nounwind uwtable ssp {
+; ELF64: t8
+  %1 = load i16* @b, align 2
+  %2 = zext i16 %1 to i64
+; ELF64: lhz
+; ELF64-NOT: rldicl
+  ret i64 %2
+}
+
+define i64 @t9() nounwind uwtable ssp {
+; ELF64: t9
+  %1 = load i16* @b, align 2
+  %2 = sext i16 %1 to i64
+; ELF64: lha
+; ELF64-NOT: extsh
+  ret i64 %2
+}
+
+define i64 @t10() nounwind uwtable ssp {
+; ELF64: t10
+  %1 = load i8* @a, align 2
+  %2 = sext i8 %1 to i64
+; ELF64: lbz
+; ELF64: extsb
+  ret i64 %2
+}
+
+define i64 @t11() nounwind uwtable ssp {
+; ELF64: t11
+  %1 = load i32* @c, align 4
+  %2 = zext i32 %1 to i64
+; ELF64: lwz
+; ELF64-NOT: rldicl
+  ret i64 %2
+}
+
+define i64 @t12() nounwind uwtable ssp {
+; ELF64: t12
+  %1 = load i32* @c, align 4
+  %2 = sext i32 %1 to i64
+; ELF64: lwa
+; ELF64-NOT: extsw
+  ret i64 %2
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-indirectbr.ll b/test/CodeGen/PowerPC/fast-isel-indirectbr.ll
new file mode 100644
index 0000000..88ccf91
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-indirectbr.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+define void @t1(i8* %x) {
+entry:
+; ELF64: t1
+  br label %L0
+
+L0:
+  br label %L1
+
+L1:
+  indirectbr i8* %x, [ label %L0, label %L1 ]
+; ELF64: mtctr 3
+; ELF64: bctr
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-load-store.ll b/test/CodeGen/PowerPC/fast-isel-load-store.ll
new file mode 100644
index 0000000..026b15f
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-load-store.ll
@@ -0,0 +1,202 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+; This test verifies that load/store instructions are properly generated,
+; and that they pass MI verification.
+
+@a = global i8 1, align 1
+@b = global i16 2, align 2
+@c = global i32 4, align 4
+@d = global i64 8, align 8
+@e = global float 1.25, align 4
+@f = global double 3.5, align 8
+
+%struct.s = type<{ i8, i32 }>
+%struct.t = type<{ i8, i64 }>
+
+@g = global %struct.s <{ i8 1, i32 2 }>, align 1
+@h = global %struct.t <{ i8 1, i64 2 }>, align 1
+
+@i = common global [8192 x i64] zeroinitializer, align 8
+
+; load
+
+define i8 @t1() nounwind uwtable ssp {
+; ELF64: t1
+  %1 = load i8* @a, align 1
+; ELF64: lbz
+  %2 = add nsw i8 %1, 1
+; ELF64: addi
+  ret i8 %2
+}
+
+define i16 @t2() nounwind uwtable ssp {
+; ELF64: t2
+  %1 = load i16* @b, align 2
+; ELF64: lhz
+  %2 = add nsw i16 %1, 1
+; ELF64: addi
+  ret i16 %2
+}
+
+define i32 @t3() nounwind uwtable ssp {
+; ELF64: t3
+  %1 = load i32* @c, align 4
+; ELF64: lwz
+  %2 = add nsw i32 %1, 1
+; ELF64: addi
+  ret i32 %2
+}
+
+define i64 @t4() nounwind uwtable ssp {
+; ELF64: t4
+  %1 = load i64* @d, align 4
+; ELF64: ld
+  %2 = add nsw i64 %1, 1
+; ELF64: addi
+  ret i64 %2
+}
+
+define float @t5() nounwind uwtable ssp {
+; ELF64: t5
+  %1 = load float* @e, align 4
+; ELF64: lfs
+  %2 = fadd float %1, 1.0
+; ELF64: fadds
+  ret float %2
+}
+
+define double @t6() nounwind uwtable ssp {
+; ELF64: t6
+  %1 = load double* @f, align 8
+; ELF64: lfd
+  %2 = fadd double %1, 1.0
+; ELF64: fadd
+  ret double %2
+}
+
+; store
+
+define void @t7(i8 %v) nounwind uwtable ssp {
+; ELF64: t7
+  %1 = add nsw i8 %v, 1
+  store i8 %1, i8* @a, align 1
+; ELF64: addis
+; ELF64: addi
+; ELF64: addi
+; ELF64: stb
+  ret void
+}
+
+define void @t8(i16 %v) nounwind uwtable ssp {
+; ELF64: t8
+  %1 = add nsw i16 %v, 1
+  store i16 %1, i16* @b, align 2
+; ELF64: addis
+; ELF64: addi
+; ELF64: addi
+; ELF64: sth
+  ret void
+}
+
+define void @t9(i32 %v) nounwind uwtable ssp {
+; ELF64: t9
+  %1 = add nsw i32 %v, 1
+  store i32 %1, i32* @c, align 4
+; ELF64: addis
+; ELF64: addi
+; ELF64: addi
+; ELF64: stw
+  ret void
+}
+
+define void @t10(i64 %v) nounwind uwtable ssp {
+; ELF64: t10
+  %1 = add nsw i64 %v, 1
+  store i64 %1, i64* @d, align 4
+; ELF64: addis
+; ELF64: addi
+; ELF64: addi
+; ELF64: std
+  ret void
+}
+
+define void @t11(float %v) nounwind uwtable ssp {
+; ELF64: t11
+  %1 = fadd float %v, 1.0
+  store float %1, float* @e, align 4
+; ELF64: fadds
+; ELF64: stfs
+  ret void
+}
+
+define void @t12(double %v) nounwind uwtable ssp {
+; ELF64: t12
+  %1 = fadd double %v, 1.0
+  store double %1, double* @f, align 8
+; ELF64: fadd
+; ELF64: stfd
+  ret void
+}
+
+;; lwa requires an offset divisible by 4, so we need lwax here.
+define i64 @t13() nounwind uwtable ssp {
+; ELF64: t13
+  %1 = load i32* getelementptr inbounds (%struct.s* @g, i32 0, i32 1), align 1
+  %2 = sext i32 %1 to i64
+; ELF64: li
+; ELF64: lwax
+  %3 = add nsw i64 %2, 1
+; ELF64: addi
+  ret i64 %3
+}
+
+;; ld requires an offset divisible by 4, so we need ldx here.
+define i64 @t14() nounwind uwtable ssp {
+; ELF64: t14
+  %1 = load i64* getelementptr inbounds (%struct.t* @h, i32 0, i32 1), align 1
+; ELF64: li
+; ELF64: ldx
+  %2 = add nsw i64 %1, 1
+; ELF64: addi
+  ret i64 %2
+}
+
+;; std requires an offset divisible by 4, so we need stdx here.
+define void @t15(i64 %v) nounwind uwtable ssp {
+; ELF64: t15
+  %1 = add nsw i64 %v, 1
+  store i64 %1, i64* getelementptr inbounds (%struct.t* @h, i32 0, i32 1), align 1
+; ELF64: addis
+; ELF64: addi
+; ELF64: addi
+; ELF64: li
+; ELF64: stdx
+  ret void
+}
+
+;; ld requires an offset that fits in 16 bits, so we need ldx here.
+define i64 @t16() nounwind uwtable ssp {
+; ELF64: t16
+  %1 = load i64* getelementptr inbounds ([8192 x i64]* @i, i32 0, i64 5000), align 8
+; ELF64: lis
+; ELF64: ori
+; ELF64: ldx
+  %2 = add nsw i64 %1, 1
+; ELF64: addi
+  ret i64 %2
+}
+
+;; std requires an offset that fits in 16 bits, so we need stdx here.
+define void @t17(i64 %v) nounwind uwtable ssp {
+; ELF64: t17
+  %1 = add nsw i64 %v, 1
+  store i64 %1, i64* getelementptr inbounds ([8192 x i64]* @i, i32 0, i64 5000), align 8
+; ELF64: addis
+; ELF64: ld
+; ELF64: addi
+; ELF64: lis
+; ELF64: ori
+; ELF64: stdx
+  ret void
+}
+
diff --git a/test/CodeGen/PowerPC/fast-isel-redefinition.ll b/test/CodeGen/PowerPC/fast-isel-redefinition.ll
new file mode 100644
index 0000000..72422bd
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-redefinition.ll
@@ -0,0 +1,10 @@
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort -optimize-regalloc -regalloc=basic -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s
+; This isn't exactly a useful set of command-line options, but check that it
+; doesn't crash.  (It crashed formerly on ARM, and proved useful in
+; discovering a bug on PowerPC as well.)
+
+define i32 @f(i32* %x) nounwind ssp {
+  %y = getelementptr inbounds i32* %x, i32 5000
+  %tmp103 = load i32* %y, align 4
+  ret i32 %tmp103
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-ret.ll b/test/CodeGen/PowerPC/fast-isel-ret.ll
new file mode 100644
index 0000000..fa19f8b
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-ret.ll
@@ -0,0 +1,142 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+define signext i8 @ret2(i8 signext %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret2
+; ELF64: extsb
+; ELF64: blr
+  ret i8 %a
+}
+
+define zeroext i8 @ret3(i8 signext %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret3
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
+; ELF64: blr
+  ret i8 %a
+}
+
+define signext i16 @ret4(i16 signext %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret4
+; ELF64: extsh
+; ELF64: blr
+  ret i16 %a
+}
+
+define zeroext i16 @ret5(i16 signext %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret5
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
+; ELF64: blr
+  ret i16 %a
+}
+
+define i16 @ret6(i16 %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret6
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
+; ELF64: blr
+  ret i16 %a
+}
+
+define signext i32 @ret7(i32 signext %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret7
+; ELF64: extsw
+; ELF64: blr
+  ret i32 %a
+}
+
+define zeroext i32 @ret8(i32 signext %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret8
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
+; ELF64: blr
+  ret i32 %a
+}
+
+define i32 @ret9(i32 %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret9
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
+; ELF64: blr
+  ret i32 %a
+}
+
+define i64 @ret10(i64 %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret10
+; ELF64-NOT: exts
+; ELF64-NOT: rldicl
+; ELF64: blr
+  ret i64 %a
+}
+
+define float @ret11(float %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret11
+; ELF64: blr
+  ret float %a
+}
+
+define double @ret12(double %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret12
+; ELF64: blr
+  ret double %a
+}
+
+define i8 @ret13() nounwind uwtable ssp {
+entry:
+; ELF64: ret13
+; ELF64: li
+; ELF64: blr
+  ret i8 15;
+}
+
+define i16 @ret14() nounwind uwtable ssp {
+entry:
+; ELF64: ret14
+; ELF64: li
+; ELF64: blr
+  ret i16 -225;
+}
+
+define i32 @ret15() nounwind uwtable ssp {
+entry:
+; ELF64: ret15
+; ELF64: lis
+; ELF64: ori
+; ELF64: blr
+  ret i32 278135;
+}
+
+define i64 @ret16() nounwind uwtable ssp {
+entry:
+; ELF64: ret16
+; ELF64: li
+; ELF64: sldi
+; ELF64: oris
+; ELF64: ori
+; ELF64: blr
+  ret i64 27813515225;
+}
+
+define float @ret17() nounwind uwtable ssp {
+entry:
+; ELF64: ret17
+; ELF64: addis
+; ELF64: lfs
+; ELF64: blr
+  ret float 2.5;
+}
+
+define double @ret18() nounwind uwtable ssp {
+entry:
+; ELF64: ret18
+; ELF64: addis
+; ELF64: lfd
+; ELF64: blr
+  ret double 2.5e-33;
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-shifter.ll b/test/CodeGen/PowerPC/fast-isel-shifter.ll
new file mode 100644
index 0000000..198bfbe
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-shifter.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+define i32 @shl() nounwind ssp {
+entry:
+; ELF64: shl
+; ELF64: slw
+  %shl = shl i32 -1, 2
+  ret i32 %shl
+}
+
+define i32 @shl_reg(i32 %src1, i32 %src2) nounwind ssp {
+entry:
+; ELF64: shl_reg
+; ELF64: slw
+  %shl = shl i32 %src1, %src2
+  ret i32 %shl
+}
+
+define i32 @lshr() nounwind ssp {
+entry:
+; ELF64: lshr
+; ELF64: srw
+  %lshr = lshr i32 -1, 2
+  ret i32 %lshr
+}
+
+define i32 @lshr_reg(i32 %src1, i32 %src2) nounwind ssp {
+entry:
+; ELF64: lshr_reg
+; ELF64: srw
+  %lshr = lshr i32 %src1, %src2
+  ret i32 %lshr
+}
+
+define i32 @ashr() nounwind ssp {
+entry:
+; ELF64: ashr
+; ELF64: srawi
+  %ashr = ashr i32 -1, 2
+  ret i32 %ashr
+}
+
+define i32 @ashr_reg(i32 %src1, i32 %src2) nounwind ssp {
+entry:
+; ELF64: ashr_reg
+; ELF64: sraw
+  %ashr = ashr i32 %src1, %src2
+  ret i32 %ashr
+}
+
diff --git a/test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll b/test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll
new file mode 100644
index 0000000..4bcacf0
--- /dev/null
+++ b/test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll
@@ -0,0 +1,17 @@
+; fastisel should not fold add with non-pointer bitwidth
+; sext(a) + sext(b) != sext(a + b)
+; RUN: llc -mtriple=powerpc64-unknown-freebsd10.0 %s -O0 -o - | FileCheck %s
+
+define zeroext i8 @gep_promotion(i8* %ptr) nounwind uwtable ssp {
+entry:
+  %ptr.addr = alloca i8*, align 8
+  %add = add i8 64, 64 ; 0x40 + 0x40
+  %0 = load i8** %ptr.addr, align 8
+
+  ; CHECK-LABEL: gep_promotion:
+  ; CHECK: lbz {{[0-9]+}}, 0({{.*}})
+  %arrayidx = getelementptr inbounds i8* %0, i8 %add
+
+  %1 = load i8* %arrayidx, align 1
+  ret i8 %1
+}
diff --git a/test/CodeGen/PowerPC/fcpsgn.ll b/test/CodeGen/PowerPC/fcpsgn.ll
new file mode 100644
index 0000000..f469981
--- /dev/null
+++ b/test/CodeGen/PowerPC/fcpsgn.ll
@@ -0,0 +1,52 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define double @foo_dd(double %a, double %b) #0 {
+entry:
+  %call = tail call double @copysign(double %a, double %b) #0
+  ret double %call
+
+; CHECK-LABEL: @foo_dd
+; CHECK: fcpsgn 1, 2, 1
+; CHECK: blr
+}
+
+declare double @copysign(double, double) #0
+
+define float @foo_ss(float %a, float %b) #0 {
+entry:
+  %call = tail call float @copysignf(float %a, float %b) #0
+  ret float %call
+
+; CHECK-LABEL: @foo_ss
+; CHECK: fcpsgn 1, 2, 1
+; CHECK: blr
+}
+
+declare float @copysignf(float, float) #0
+
+define float @foo_sd(float %a, double %b) #0 {
+entry:
+  %conv = fptrunc double %b to float
+  %call = tail call float @copysignf(float %a, float %conv) #0
+  ret float %call
+
+; CHECK-LABEL: @foo_sd
+; CHECK: fcpsgn 1, 2, 1
+; CHECK: blr
+}
+
+define double @foo_ds(double %a, float %b) #0 {
+entry:
+  %conv = fpext float %b to double
+  %call = tail call double @copysign(double %a, double %conv) #0
+  ret double %call
+
+; CHECK-LABEL: @foo_ds
+; CHECK: fcpsgn 1, 2, 1
+; CHECK: blr
+}
+
+attributes #0 = { nounwind readnone }
+
diff --git a/test/CodeGen/PowerPC/frameaddr.ll b/test/CodeGen/PowerPC/frameaddr.ll
index eabd4a6..4480273 100644
--- a/test/CodeGen/PowerPC/frameaddr.ll
+++ b/test/CodeGen/PowerPC/frameaddr.ll
@@ -40,8 +40,8 @@ declare void @use(i8*)
 
 declare i8* @llvm.frameaddress(i32) #2
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { noreturn nounwind }
 attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind naked "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind naked "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
diff --git a/test/CodeGen/PowerPC/glob-comp-aa-crash.ll b/test/CodeGen/PowerPC/glob-comp-aa-crash.ll
new file mode 100644
index 0000000..f97d0ff
--- /dev/null
+++ b/test/CodeGen/PowerPC/glob-comp-aa-crash.ll
@@ -0,0 +1,139 @@
+; RUN: llc -mtriple=powerpc64-bgq-linux -mcpu=a2 < %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+%"class.std::__1::__assoc_sub_state" = type { %"class.std::__1::__shared_count", %"class.std::__exception_ptr::exception_ptr", %"class.std::__1::mutex", %"class.std::__1::condition_variable", i32 }
+%"class.std::__1::__shared_count" = type { i32 (...)**, i64 }
+%"class.std::__exception_ptr::exception_ptr" = type { i8* }
+%"class.std::__1::mutex" = type { %union.pthread_mutex_t }
+%union.pthread_mutex_t = type { %"struct.<anonymous union>::__pthread_mutex_s" }
+%"struct.<anonymous union>::__pthread_mutex_s" = type { i32, i32, i32, i32, i32, i32, %struct.__pthread_internal_list }
+%struct.__pthread_internal_list = type { %struct.__pthread_internal_list*, %struct.__pthread_internal_list* }
+%"class.std::__1::condition_variable" = type { %union.pthread_cond_t }
+%union.pthread_cond_t = type { %struct.anon }
+%struct.anon = type { i32, i32, i64, i64, i64, i8*, i32, i32 }
+%"class.std::__1::unique_lock" = type { %"class.std::__1::mutex"*, i8 }
+
+declare i32 @__gxx_personality_v0(...)
+
+; Function Attrs: optsize
+define void @_ZNSt3__117__assoc_sub_state4copyEv(%"class.std::__1::__assoc_sub_state"* %this) #0 align 2 {
+entry:
+  %__lk = alloca %"class.std::__1::unique_lock", align 8
+  %ref.tmp = alloca %"class.std::__exception_ptr::exception_ptr", align 8
+  %tmp = alloca { i64, i64 }, align 8
+  %agg.tmp = alloca %"class.std::__exception_ptr::exception_ptr", align 8
+  %__mut_ = getelementptr inbounds %"class.std::__1::__assoc_sub_state"* %this, i64 0, i32 2
+  %__m_.i.i = getelementptr inbounds %"class.std::__1::unique_lock"* %__lk, i64 0, i32 0
+  store %"class.std::__1::mutex"* %__mut_, %"class.std::__1::mutex"** %__m_.i.i, align 8, !tbaa !5
+  %__owns_.i.i = getelementptr inbounds %"class.std::__1::unique_lock"* %__lk, i64 0, i32 1
+  store i8 1, i8* %__owns_.i.i, align 8, !tbaa !6
+  call void @_ZNSt3__15mutex4lockEv(%"class.std::__1::mutex"* %__mut_) #4
+  invoke void @_ZNSt3__117__assoc_sub_state10__sub_waitERNS_11unique_lockINS_5mutexEEE(%"class.std::__1::__assoc_sub_state"* %this, %"class.std::__1::unique_lock"* %__lk) #4
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  %__exception_ = getelementptr inbounds %"class.std::__1::__assoc_sub_state"* %this, i64 0, i32 1
+  %0 = bitcast { i64, i64 }* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 16, i32 8, i1 false)
+  call void @_ZNSt15__exception_ptr13exception_ptrC1EMS0_FvvE(%"class.std::__exception_ptr::exception_ptr"* %ref.tmp, { i64, i64 }* byval %tmp) #5
+  %call = call zeroext i1 @_ZNSt15__exception_ptrneERKNS_13exception_ptrES2_(%"class.std::__exception_ptr::exception_ptr"* %__exception_, %"class.std::__exception_ptr::exception_ptr"* %ref.tmp) #5
+  call void @_ZNSt15__exception_ptr13exception_ptrD1Ev(%"class.std::__exception_ptr::exception_ptr"* %ref.tmp) #5
+  br i1 %call, label %if.then, label %if.end
+
+if.then:                                          ; preds = %invoke.cont
+  call void @_ZNSt15__exception_ptr13exception_ptrC1ERKS0_(%"class.std::__exception_ptr::exception_ptr"* %agg.tmp, %"class.std::__exception_ptr::exception_ptr"* %__exception_) #5
+  invoke void @_ZSt17rethrow_exceptionNSt15__exception_ptr13exception_ptrE(%"class.std::__exception_ptr::exception_ptr"* %agg.tmp) #6
+          to label %invoke.cont4 unwind label %lpad3
+
+invoke.cont4:                                     ; preds = %if.then
+  unreachable
+
+lpad:                                             ; preds = %entry
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  %2 = extractvalue { i8*, i32 } %1, 0
+  %3 = extractvalue { i8*, i32 } %1, 1
+  br label %ehcleanup
+
+lpad3:                                            ; preds = %if.then
+  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  %5 = extractvalue { i8*, i32 } %4, 0
+  %6 = extractvalue { i8*, i32 } %4, 1
+  call void @_ZNSt15__exception_ptr13exception_ptrD1Ev(%"class.std::__exception_ptr::exception_ptr"* %agg.tmp) #5
+  br label %ehcleanup
+
+if.end:                                           ; preds = %invoke.cont
+  %7 = load i8* %__owns_.i.i, align 8, !tbaa !6, !range !4
+  %tobool.i.i = icmp eq i8 %7, 0
+  br i1 %tobool.i.i, label %_ZNSt3__111unique_lockINS_5mutexEED1Ev.exit, label %if.then.i.i
+
+if.then.i.i:                                      ; preds = %if.end
+  %8 = load %"class.std::__1::mutex"** %__m_.i.i, align 8, !tbaa !5
+  call void @_ZNSt3__15mutex6unlockEv(%"class.std::__1::mutex"* %8) #5
+  br label %_ZNSt3__111unique_lockINS_5mutexEED1Ev.exit
+
+_ZNSt3__111unique_lockINS_5mutexEED1Ev.exit:      ; preds = %if.then.i.i, %if.end
+  ret void
+
+ehcleanup:                                        ; preds = %lpad3, %lpad
+  %exn.slot.0 = phi i8* [ %5, %lpad3 ], [ %2, %lpad ]
+  %ehselector.slot.0 = phi i32 [ %6, %lpad3 ], [ %3, %lpad ]
+  %9 = load i8* %__owns_.i.i, align 8, !tbaa !6, !range !4
+  %tobool.i.i9 = icmp eq i8 %9, 0
+  br i1 %tobool.i.i9, label %_ZNSt3__111unique_lockINS_5mutexEED1Ev.exit12, label %if.then.i.i11
+
+if.then.i.i11:                                    ; preds = %ehcleanup
+  %10 = load %"class.std::__1::mutex"** %__m_.i.i, align 8, !tbaa !5
+  call void @_ZNSt3__15mutex6unlockEv(%"class.std::__1::mutex"* %10) #5
+  br label %_ZNSt3__111unique_lockINS_5mutexEED1Ev.exit12
+
+_ZNSt3__111unique_lockINS_5mutexEED1Ev.exit12:    ; preds = %if.then.i.i11, %ehcleanup
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0
+  %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1
+  resume { i8*, i32 } %lpad.val5
+}
+
+; Function Attrs: optsize
+declare void @_ZNSt3__117__assoc_sub_state10__sub_waitERNS_11unique_lockINS_5mutexEEE(%"class.std::__1::__assoc_sub_state"*, %"class.std::__1::unique_lock"*) #0 align 2
+
+; Function Attrs: nounwind optsize
+declare zeroext i1 @_ZNSt15__exception_ptrneERKNS_13exception_ptrES2_(%"class.std::__exception_ptr::exception_ptr"*, %"class.std::__exception_ptr::exception_ptr"*) #1
+
+; Function Attrs: nounwind optsize
+declare void @_ZNSt15__exception_ptr13exception_ptrC1EMS0_FvvE(%"class.std::__exception_ptr::exception_ptr"*, { i64, i64 }* byval) #1
+
+; Function Attrs: nounwind optsize
+declare void @_ZNSt15__exception_ptr13exception_ptrD1Ev(%"class.std::__exception_ptr::exception_ptr"*) #1
+
+; Function Attrs: noreturn optsize
+declare void @_ZSt17rethrow_exceptionNSt15__exception_ptr13exception_ptrE(%"class.std::__exception_ptr::exception_ptr"*) #2
+
+; Function Attrs: nounwind optsize
+declare void @_ZNSt15__exception_ptr13exception_ptrC1ERKS0_(%"class.std::__exception_ptr::exception_ptr"*, %"class.std::__exception_ptr::exception_ptr"*) #1
+
+; Function Attrs: nounwind optsize
+declare void @_ZNSt3__15mutex6unlockEv(%"class.std::__1::mutex"*) #1
+
+; Function Attrs: optsize
+declare void @_ZNSt3__15mutex4lockEv(%"class.std::__1::mutex"*) #0
+
+; Function Attrs: nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #3
+
+attributes #0 = { optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { noreturn optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+attributes #4 = { optsize }
+attributes #5 = { nounwind optsize }
+attributes #6 = { noreturn optsize }
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"bool", metadata !1}
+!4 = metadata !{i8 0, i8 2}
+!5 = metadata !{metadata !0, metadata !0, i64 0}
+!6 = metadata !{metadata !3, metadata !3, i64 0}
diff --git a/test/CodeGen/PowerPC/hello-reloc.s b/test/CodeGen/PowerPC/hello-reloc.s
new file mode 100644
index 0000000..9bbfb38
--- /dev/null
+++ b/test/CodeGen/PowerPC/hello-reloc.s
@@ -0,0 +1,84 @@
+; This tests for the basic implementation of PPCMachObjectWriter.cpp,
+; which is responsible for writing mach-o relocation entries for (PIC)
+; PowerPC objects.
+; NOTE: Darwin PPC asm syntax is not yet supported by PPCAsmParser,
+; so this test case uses ELF PPC asm syntax to produce a mach-o object.
+; Once PPCAsmParser supports darwin asm syntax, this test case should
+; be updated accordingly.  
+
+; RUN: llvm-mc -filetype=obj -relocation-model=pic -mcpu=g4 -triple=powerpc-apple-darwin8 %s -o - | llvm-readobj -relocations | FileCheck -check-prefix=DARWIN-G4-DUMP %s
+
+;	.machine ppc7400
+	.section	__TEXT,__textcoal_nt,coalesced,pure_instructions
+	.section	__TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
+	.section	__TEXT,__text,regular,pure_instructions
+	.globl	_main
+	.align	4
+_main:                                  ; @main
+; BB#0:                                 ; %entry
+	mflr 0
+	stw 31, -4(1)
+	stw 0, 8(1)
+	stwu 1, -80(1)
+	bl L0$pb
+L0$pb:
+	mr 31, 1
+	li 5, 0
+	mflr 2
+	stw 3, 68(31)
+	stw 5, 72(31)
+	stw 4, 64(31)
+	addis 2, 2, (L_.str-L0$pb)@ha
+	la 3, (L_.str-L0$pb)@l(2)
+	bl L_puts$stub
+	li 3, 0
+	addi 1, 1, 80
+	lwz 0, 8(1)
+	lwz 31, -4(1)
+	mtlr 0
+	blr
+
+	.section	__TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
+	.align	4
+L_puts$stub:
+	.indirect_symbol	_puts
+	mflr 0
+	bcl 20, 31, L_puts$stub$tmp
+L_puts$stub$tmp:
+	mflr 11
+	addis 11, 11, (L_puts$lazy_ptr-L_puts$stub$tmp)@ha
+	mtlr 0
+	lwzu 12, (L_puts$lazy_ptr-L_puts$stub$tmp)@l(11)
+	mtctr 12
+	bctr
+	.section	__DATA,__la_symbol_ptr,lazy_symbol_pointers
+L_puts$lazy_ptr:
+	.indirect_symbol	_puts
+	.long	dyld_stub_binding_helper
+
+.subsections_via_symbols
+	.section	__TEXT,__cstring,cstring_literals
+L_.str:                                 ; @.str
+	.asciz	 "Hello, world!"
+
+; DARWIN-G4-DUMP:Format: Mach-O 32-bit ppc
+; DARWIN-G4-DUMP:Arch: powerpc
+; DARWIN-G4-DUMP:AddressSize: 32bit
+; DARWIN-G4-DUMP:Relocations [
+; DARWIN-G4-DUMP:  Section __text {
+; DARWIN-G4-DUMP:    0x34 1 2 0 PPC_RELOC_BR24 0 -
+; DARWIN-G4-DUMP:    0x30 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 _main
+; DARWIN-G4-DUMP:    0x0 0 2 n/a PPC_RELOC_PAIR 1 _main
+; DARWIN-G4-DUMP:    0x2C 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 _main
+; DARWIN-G4-DUMP:    0x60 0 2 n/a PPC_RELOC_PAIR 1 _main
+; DARWIN-G4-DUMP:  }
+; DARWIN-G4-DUMP:  Section __picsymbolstub1 {
+; DARWIN-G4-DUMP:    0x14 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 _main
+; DARWIN-G4-DUMP:    0x0 0 2 n/a PPC_RELOC_PAIR 1 _main
+; DARWIN-G4-DUMP:    0xC 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 _main
+; DARWIN-G4-DUMP:    0x18 0 2 n/a PPC_RELOC_PAIR 1 _main
+; DARWIN-G4-DUMP:  }
+; DARWIN-G4-DUMP:  Section __la_symbol_ptr {
+; DARWIN-G4-DUMP:    0x0 0 2 1 PPC_RELOC_VANILLA 0 dyld_stub_binding_helper
+; DARWIN-G4-DUMP:  }
+; DARWIN-G4-DUMP:]
diff --git a/test/CodeGen/PowerPC/i64_fp_round.ll b/test/CodeGen/PowerPC/i64_fp_round.ll
index d2a3239..5770d78 100644
--- a/test/CodeGen/PowerPC/i64_fp_round.ll
+++ b/test/CodeGen/PowerPC/i64_fp_round.ll
@@ -22,6 +22,6 @@ entry:
 ; Also check that with -enable-unsafe-fp-math we do not get that extra
 ; code sequence.  Simply verify that there is no "isel" present.
 
-; RUN: llc -mcpu=pwr7 -mattr=-fpcvt -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=UNSAFE
+; RUN: llc -mcpu=pwr7 -mattr=-fpcvt -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK-UNSAFE
 ; CHECK-UNSAFE-NOT: isel
 
diff --git a/test/CodeGen/PowerPC/inlineasm-i64-reg.ll b/test/CodeGen/PowerPC/inlineasm-i64-reg.ll
index fa9aa45..5e31cd5 100644
--- a/test/CodeGen/PowerPC/inlineasm-i64-reg.ll
+++ b/test/CodeGen/PowerPC/inlineasm-i64-reg.ll
@@ -59,6 +59,49 @@ entry:
   ret i32 %conv
 }
 
+declare void @mtrace()
+
+define signext i32 @main(i32 signext %argc, i8** %argv) {
+entry:
+  %argc.addr = alloca i32, align 4
+  store i32 %argc, i32* %argc.addr, align 4
+  %0 = call { i64, i64 } asm sideeffect "sc", "={r0},={r3},{r0},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{cr0},~{memory}"(i64 1076)
+  %asmresult1.i = extractvalue { i64, i64 } %0, 1
+  %conv.i = trunc i64 %asmresult1.i to i32
+  %cmp = icmp eq i32 %conv.i, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+; CHECK-LABEL: @main
+
+; CHECK-DAG: mr [[REG:[0-9]+]], 3
+; CHECK-DAG: li 0, 1076
+; CHECK:     stw [[REG]],
+
+; CHECK:     #APP
+; CHECK:     sc
+; CHECK:     #NO_APP
+                                      
+; CHECK:     cmpwi {{[0-9]+}}, [[REG]], 1
+
+; CHECK: blr
+
+if.then:                                          ; preds = %entry
+  call void @mtrace()
+  %.pre = load i32* %argc.addr, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %1 = phi i32 [ %.pre, %if.then ], [ %argc, %entry ]
+  %cmp1 = icmp slt i32 %1, 2
+  br i1 %cmp1, label %usage, label %if.end40
+
+usage:    
+  ret i32 8
+
+if.end40:
+  ret i32 0
+}
+
 attributes #0 = { alwaysinline inlinehint nounwind }
 attributes #1 = { nounwind }
 
diff --git a/test/CodeGen/PowerPC/isel-rc-nox0.ll b/test/CodeGen/PowerPC/isel-rc-nox0.ll
index 7d425cc..ac99aa4 100644
--- a/test/CodeGen/PowerPC/isel-rc-nox0.ll
+++ b/test/CodeGen/PowerPC/isel-rc-nox0.ll
@@ -22,7 +22,7 @@ crc32_gentab.exit:                                ; preds = %for.cond1.preheader
 
 for.cond1.preheader.i2961.i:                      ; preds = %for.inc44.i2977.i, %crc32_gentab.exit
   call void @llvm.memset.p0i8.i64(i8* bitcast ([1 x [9 x i32]]* @g_62 to i8*), i8 -1, i64 36, i32 4, i1 false) #1
-  %0 = load i32* %retval.0.i.i.i, align 4, !tbaa !0
+  %0 = load i32* %retval.0.i.i.i, align 4
   %tobool.i2967.i = icmp eq i32 %0, 0
   br label %for.body21.i2968.i
 
@@ -42,9 +42,5 @@ func_80.exit2978.i:                               ; preds = %for.inc44.i2977.i
 ; Function Attrs: nounwind
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "ssp-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "ssp-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/lit.local.cfg b/test/CodeGen/PowerPC/lit.local.cfg
index aaa31d9..2e46300 100644
--- a/test/CodeGen/PowerPC/lit.local.cfg
+++ b/test/CodeGen/PowerPC/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.test']
-
 targets = set(config.root.targets_to_build.split())
 if not 'PowerPC' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/PowerPC/mcm-2.ll b/test/CodeGen/PowerPC/mcm-2.ll
index d4f40f7..fee98d8 100644
--- a/test/CodeGen/PowerPC/mcm-2.ll
+++ b/test/CodeGen/PowerPC/mcm-2.ll
@@ -31,7 +31,9 @@ entry:
 ; LARGE: ld [[REG2:[0-9]+]], [[VAR]]@toc@l([[REG1]])
 ; LARGE: lwz {{[0-9]+}}, 0([[REG2]])
 ; LARGE: stw {{[0-9]+}}, 0([[REG2]])
-; LARGE: .type [[VAR]],@object
-; LARGE: .local [[VAR]]
-; LARGE: .comm [[VAR]],4,4
+; LARGE: [[VAR]]:
+; LARGE: .tc [[VAR2:[a-z0-9A-Z_.]+]][TC],[[VAR2]]
+; LARGE: .type [[VAR2]],@object
+; LARGE: .local [[VAR2]]
+; LARGE: .comm [[VAR2]],4,4
 
diff --git a/test/CodeGen/PowerPC/mcm-3.ll b/test/CodeGen/PowerPC/mcm-3.ll
index ce151fb..b6d681d 100644
--- a/test/CodeGen/PowerPC/mcm-3.ll
+++ b/test/CodeGen/PowerPC/mcm-3.ll
@@ -33,9 +33,11 @@ entry:
 ; LARGE: ld [[REG2:[0-9]+]], [[VAR]]@toc@l([[REG1]])
 ; LARGE: lwz {{[0-9]+}}, 0([[REG2]])
 ; LARGE: stw {{[0-9]+}}, 0([[REG2]])
-; LARGE: .type [[VAR]],@object
-; LARGE: .data
-; LARGE: .globl [[VAR]]
 ; LARGE: [[VAR]]:
+; LARGE: .tc [[VAR2:[a-z0-9A-Z_.]+]][TC],[[VAR2]]
+; LARGE: .type [[VAR2]],@object
+; LARGE: .data
+; LARGE: .globl [[VAR2]]
+; LARGE: [[VAR2]]:
 ; LARGE: .long 5
 
diff --git a/test/CodeGen/PowerPC/mcm-4.ll b/test/CodeGen/PowerPC/mcm-4.ll
index 7d7b132..73dd902 100644
--- a/test/CodeGen/PowerPC/mcm-4.ll
+++ b/test/CodeGen/PowerPC/mcm-4.ll
@@ -22,6 +22,6 @@ entry:
 ; LARGE: [[VAR:[a-z0-9A-Z_.]+]]:
 ; LARGE: .quad 4562098671269285104
 ; LARGE-LABEL: test_double_const:
-; LARGE: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
-; LARGE: ld [[REG2:[0-9]+]], [[VAR]]@toc@l([[REG1]])
+; LARGE: addis [[REG1:[0-9]+]], 2, [[VAR2:[a-z0-9A-Z_.]+]]@toc@ha
+; LARGE: ld [[REG2:[0-9]+]], [[VAR2]]@toc@l([[REG1]])
 ; LARGE: lfd {{[0-9]+}}, 0([[REG2]])
diff --git a/test/CodeGen/PowerPC/mcm-9.ll b/test/CodeGen/PowerPC/mcm-9.ll
index e587f61..7906b6a 100644
--- a/test/CodeGen/PowerPC/mcm-9.ll
+++ b/test/CodeGen/PowerPC/mcm-9.ll
@@ -7,8 +7,7 @@
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
-@ei = external global i32
-@a = alias i32* @ei
+@a = external global i32
 
 define signext i32 @test_external() nounwind {
 entry:
diff --git a/test/CodeGen/PowerPC/negctr.ll b/test/CodeGen/PowerPC/negctr.ll
index ef33bb7..2e64993 100644
--- a/test/CodeGen/PowerPC/negctr.ll
+++ b/test/CodeGen/PowerPC/negctr.ll
@@ -83,4 +83,4 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/optcmp.ll b/test/CodeGen/PowerPC/optcmp.ll
index 523f329..35aabfa 100644
--- a/test/CodeGen/PowerPC/optcmp.ll
+++ b/test/CodeGen/PowerPC/optcmp.ll
@@ -5,7 +5,7 @@ target triple = "powerpc64-unknown-linux-gnu"
 define signext i32 @foo(i32 signext %a, i32 signext %b, i32* nocapture %c) #0 {
 entry:
   %sub = sub nsw i32 %a, %b
-  store i32 %sub, i32* %c, align 4, !tbaa !0
+  store i32 %sub, i32* %c, align 4
   %cmp = icmp sgt i32 %a, %b
   %cond = select i1 %cmp, i32 %a, i32 %b
   ret i32 %cond
@@ -17,7 +17,7 @@ entry:
 define signext i32 @foo2(i32 signext %a, i32 signext %b, i32* nocapture %c) #0 {
 entry:
   %shl = shl i32 %a, %b
-  store i32 %shl, i32* %c, align 4, !tbaa !0
+  store i32 %shl, i32* %c, align 4
   %cmp = icmp sgt i32 %shl, 0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
@@ -29,7 +29,7 @@ entry:
 define i64 @fool(i64 %a, i64 %b, i64* nocapture %c) #0 {
 entry:
   %sub = sub nsw i64 %a, %b
-  store i64 %sub, i64* %c, align 8, !tbaa !3
+  store i64 %sub, i64* %c, align 8
   %cmp = icmp sgt i64 %a, %b
   %cond = select i1 %cmp, i64 %a, i64 %b
   ret i64 %cond
@@ -43,7 +43,7 @@ entry:
 define i64 @foolb(i64 %a, i64 %b, i64* nocapture %c) #0 {
 entry:
   %sub = sub nsw i64 %a, %b
-  store i64 %sub, i64* %c, align 8, !tbaa !3
+  store i64 %sub, i64* %c, align 8
   %cmp = icmp sle i64 %a, %b
   %cond = select i1 %cmp, i64 %a, i64 %b
   ret i64 %cond
@@ -57,7 +57,7 @@ entry:
 define i64 @foolc(i64 %a, i64 %b, i64* nocapture %c) #0 {
 entry:
   %sub = sub nsw i64 %b, %a
-  store i64 %sub, i64* %c, align 8, !tbaa !3
+  store i64 %sub, i64* %c, align 8
   %cmp = icmp sgt i64 %a, %b
   %cond = select i1 %cmp, i64 %a, i64 %b
   ret i64 %cond
@@ -71,7 +71,7 @@ entry:
 define i64 @foold(i64 %a, i64 %b, i64* nocapture %c) #0 {
 entry:
   %sub = sub nsw i64 %b, %a
-  store i64 %sub, i64* %c, align 8, !tbaa !3
+  store i64 %sub, i64* %c, align 8
   %cmp = icmp eq i64 %a, %b
   %cond = select i1 %cmp, i64 %a, i64 %b
   ret i64 %cond
@@ -85,7 +85,7 @@ entry:
 define i64 @foold2(i64 %a, i64 %b, i64* nocapture %c) #0 {
 entry:
   %sub = sub nsw i64 %a, %b
-  store i64 %sub, i64* %c, align 8, !tbaa !3
+  store i64 %sub, i64* %c, align 8
   %cmp = icmp eq i64 %a, %b
   %cond = select i1 %cmp, i64 %a, i64 %b
   ret i64 %cond
@@ -99,7 +99,7 @@ entry:
 define i64 @foo2l(i64 %a, i64 %b, i64* nocapture %c) #0 {
 entry:
   %shl = shl i64 %a, %b
-  store i64 %shl, i64* %c, align 8, !tbaa !3
+  store i64 %shl, i64* %c, align 8
   %cmp = icmp sgt i64 %shl, 0
   %conv1 = zext i1 %cmp to i64
   ret i64 %conv1
@@ -112,7 +112,7 @@ entry:
 define double @food(double %a, double %b, double* nocapture %c) #0 {
 entry:
   %sub = fsub double %a, %b
-  store double %sub, double* %c, align 8, !tbaa !3
+  store double %sub, double* %c, align 8
   %cmp = fcmp ogt double %a, %b
   %cond = select i1 %cmp, double %a, double %b
   ret double %cond
@@ -125,7 +125,7 @@ entry:
 define float @foof(float %a, float %b, float* nocapture %c) #0 {
 entry:
   %sub = fsub float %a, %b
-  store float %sub, float* %c, align 4, !tbaa !3
+  store float %sub, float* %c, align 4
   %cmp = fcmp ogt float %a, %b
   %cond = select i1 %cmp, float %a, float %b
   ret float %cond
@@ -135,9 +135,18 @@ entry:
 ; CHECK: stfs 0, 0(5)
 }
 
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"long", metadata !1}
-!4 = metadata !{metadata !"any pointer", metadata !1}
+declare i64 @llvm.ctpop.i64(i64);
+
+define signext i64 @fooct(i64 signext %a, i64 signext %b, i64* nocapture %c) #0 {
+entry:
+  %sub = sub nsw i64 %a, %b
+  %subc = call i64 @llvm.ctpop.i64(i64 %sub)
+  store i64 %subc, i64* %c, align 4
+  %cmp = icmp sgt i64 %subc, 0
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+
+; CHECK: @fooct
+; CHECK-NOT: popcntd.
+}
 
diff --git a/test/CodeGen/PowerPC/pr15031.ll b/test/CodeGen/PowerPC/pr15031.ll
index 5ccf941..e58ad80 100644
--- a/test/CodeGen/PowerPC/pr15031.ll
+++ b/test/CodeGen/PowerPC/pr15031.ll
@@ -317,54 +317,42 @@ if.then:                                          ; preds = %entry
 if.end:                                           ; preds = %entry, %if.then
   %Reg.addr.0 = phi i32 [ %call3, %if.then ], [ %Reg, %entry ]
   %RegNo.i.i = getelementptr inbounds %"class.llvm::MachineOperand"* %this, i64 0, i32 2, i32 0
-  %1 = load i32* %RegNo.i.i, align 4, !tbaa !0
+  %1 = load i32* %RegNo.i.i, align 4
   %cmp.i = icmp eq i32 %1, %Reg.addr.0
   br i1 %cmp.i, label %_ZN4llvm14MachineOperand6setRegEj.exit, label %if.end.i
 
 if.end.i:                                         ; preds = %if.end
   %ParentMI.i.i = getelementptr inbounds %"class.llvm::MachineOperand"* %this, i64 0, i32 3
-  %2 = load %"class.llvm::MachineInstr"** %ParentMI.i.i, align 8, !tbaa !3
+  %2 = load %"class.llvm::MachineInstr"** %ParentMI.i.i, align 8
   %tobool.i = icmp eq %"class.llvm::MachineInstr"* %2, null
   br i1 %tobool.i, label %if.end13.i, label %if.then3.i
 
 if.then3.i:                                       ; preds = %if.end.i
   %Parent.i.i = getelementptr inbounds %"class.llvm::MachineInstr"* %2, i64 0, i32 2
-  %3 = load %"class.llvm::MachineBasicBlock"** %Parent.i.i, align 8, !tbaa !3
+  %3 = load %"class.llvm::MachineBasicBlock"** %Parent.i.i, align 8
   %tobool5.i = icmp eq %"class.llvm::MachineBasicBlock"* %3, null
   br i1 %tobool5.i, label %if.end13.i, label %if.then6.i
 
 if.then6.i:                                       ; preds = %if.then3.i
   %xParent.i.i = getelementptr inbounds %"class.llvm::MachineBasicBlock"* %3, i64 0, i32 4
-  %4 = load %"class.llvm::MachineFunction"** %xParent.i.i, align 8, !tbaa !3
+  %4 = load %"class.llvm::MachineFunction"** %xParent.i.i, align 8
   %tobool8.i = icmp eq %"class.llvm::MachineFunction"* %4, null
   br i1 %tobool8.i, label %if.end13.i, label %if.then9.i
 
 if.then9.i:                                       ; preds = %if.then6.i
   %RegInfo.i.i = getelementptr inbounds %"class.llvm::MachineFunction"* %4, i64 0, i32 5
-  %5 = load %"class.llvm::MachineRegisterInfo"** %RegInfo.i.i, align 8, !tbaa !3
+  %5 = load %"class.llvm::MachineRegisterInfo"** %RegInfo.i.i, align 8
   tail call void @_ZN4llvm19MachineRegisterInfo27removeRegOperandFromUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"* %5, %"class.llvm::MachineOperand"* %this)
-  store i32 %Reg.addr.0, i32* %RegNo.i.i, align 4, !tbaa !0
+  store i32 %Reg.addr.0, i32* %RegNo.i.i, align 4
   tail call void @_ZN4llvm19MachineRegisterInfo22addRegOperandToUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"* %5, %"class.llvm::MachineOperand"* %this)
   br label %_ZN4llvm14MachineOperand6setRegEj.exit
 
 if.end13.i:                                       ; preds = %if.then6.i, %if.then3.i, %if.end.i
-  store i32 %Reg.addr.0, i32* %RegNo.i.i, align 4, !tbaa !0
+  store i32 %Reg.addr.0, i32* %RegNo.i.i, align 4
   br label %_ZN4llvm14MachineOperand6setRegEj.exit
 
 _ZN4llvm14MachineOperand6setRegEj.exit:           ; preds = %if.end, %if.then9.i, %if.end13.i
   ret void
 }
 
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"any pointer", metadata !1}
-!4 = metadata !{metadata !"vtable pointer", metadata !2}
-!5 = metadata !{metadata !"long", metadata !1}
-!6 = metadata !{i64 0, i64 8, metadata !3, i64 8, i64 8, metadata !5}
-!7 = metadata !{metadata !"short", metadata !1}
-!8 = metadata !{i64 0, i64 1, metadata !1, i64 1, i64 4, metadata !0, i64 2, i64 1, metadata !1, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 4, i64 4, metadata !0, i64 4, i64 4, metadata !0, i64 8, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !5, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 24, i64 8, metadata !3, i64 16, i64 4, metadata !0, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 24, i64 4, metadata !0}
-!9 = metadata !{metadata !"bool", metadata !1}
-!10 = metadata !{i8 0, i8 2}
-
 ; CHECK-NOT: lbzu 3, 1(3)
diff --git a/test/CodeGen/PowerPC/pr17168.ll b/test/CodeGen/PowerPC/pr17168.ll
new file mode 100644
index 0000000..2848221
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr17168.ll
@@ -0,0 +1,521 @@
+; RUN: llc -mcpu=pwr7 -O0 < %s
+
+; This test formerly failed due to a DBG_VALUE being placed prior to a PHI
+; when fast-isel is partially successful before punting to DAG-isel.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@grid_points = external global [3 x i32], align 4
+
+; Function Attrs: nounwind
+define fastcc void @compute_rhs() #0 {
+entry:
+  br i1 undef, label %for.cond871.preheader.for.inc960_crit_edge, label %for.end1042, !dbg !439
+
+for.cond871.preheader.for.inc960_crit_edge:       ; preds = %for.cond871.preheader.for.inc960_crit_edge, %entry
+  br i1 false, label %for.cond871.preheader.for.inc960_crit_edge, label %for.cond964.preheader, !dbg !439
+
+for.cond964.preheader:                            ; preds = %for.cond871.preheader.for.inc960_crit_edge
+  br i1 undef, label %for.cond968.preheader, label %for.end1042, !dbg !441
+
+for.cond968.preheader:                            ; preds = %for.cond968.preheader, %for.cond964.preheader
+  br i1 false, label %for.cond968.preheader, label %for.end1042, !dbg !441
+
+for.end1042:                                      ; preds = %for.cond968.preheader, %for.cond964.preheader, %entry
+  %0 = phi i32 [ undef, %for.cond964.preheader ], [ undef, %for.cond968.preheader ], [ undef, %entry ]
+  %1 = load i32* getelementptr inbounds ([3 x i32]* @grid_points, i64 0, i64 0), align 4, !dbg !443, !tbaa !444
+  tail call void @llvm.dbg.value(metadata !447, i64 0, metadata !119), !dbg !448
+  %sub10454270 = add nsw i32 %0, -1, !dbg !448
+  %cmp10464271 = icmp sgt i32 %sub10454270, 1, !dbg !448
+  %sub11134263 = add nsw i32 %1, -1, !dbg !450
+  %cmp11144264 = icmp sgt i32 %sub11134263, 1, !dbg !450
+  br i1 %cmp11144264, label %for.cond1116.preheader, label %for.cond1816.preheader.for.inc1898_crit_edge, !dbg !450
+
+for.cond1116.preheader:                           ; preds = %for.inc1658, %for.end1042
+  br i1 %cmp10464271, label %for.body1123, label %for.inc1658, !dbg !452
+
+for.body1123:                                     ; preds = %for.body1123, %for.cond1116.preheader
+  br label %for.body1123, !dbg !455
+
+for.inc1658:                                      ; preds = %for.cond1116.preheader
+  br i1 undef, label %for.cond1116.preheader, label %for.cond1816.preheader.for.inc1898_crit_edge, !dbg !450
+
+for.cond1816.preheader.for.inc1898_crit_edge:     ; preds = %for.cond1816.preheader.for.inc1898_crit_edge, %for.inc1658, %for.end1042
+  br label %for.cond1816.preheader.for.inc1898_crit_edge, !dbg !458
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!438, !464}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 190311)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !298, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"bt.c", metadata !"/home/hfinkel/src/NPB2.3-omp-C/BT"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !82, metadata !102, metadata !114, metadata !132, metadata !145, metadata !154, metadata !155, metadata !162, metadata !183, metadata !200, metadata !201, metadata !207, metadata !208, metadata !215, metadata !221, metadata !230, metadata !238, metadata !246, metadata !255, metadata !260, metadata !261, metadata !268, metadata !274, metadata !279, metadata !280, metadata !287, metadata !293}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 74, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !12, i32 74} ; [ DW_TAG_subprogram ] [line 74] [def] [main]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8, metadata !8, metadata !9}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
+!11 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_unsigned_char]
+!12 = metadata !{metadata !13, metadata !14, metadata !15, metadata !16, metadata !17, metadata !18, metadata !19, metadata !21, metadata !22, metadata !23, metadata !25, metadata !26}
+!13 = metadata !{i32 786689, metadata !4, metadata !"argc", metadata !5, i32 16777290, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argc] [line 74]
+!14 = metadata !{i32 786689, metadata !4, metadata !"argv", metadata !5, i32 33554506, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 74]
+!15 = metadata !{i32 786688, metadata !4, metadata !"niter", metadata !5, i32 76, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [niter] [line 76]
+!16 = metadata !{i32 786688, metadata !4, metadata !"step", metadata !5, i32 76, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [step] [line 76]
+!17 = metadata !{i32 786688, metadata !4, metadata !"n3", metadata !5, i32 76, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [n3] [line 76]
+!18 = metadata !{i32 786688, metadata !4, metadata !"nthreads", metadata !5, i32 77, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [nthreads] [line 77]
+!19 = metadata !{i32 786688, metadata !4, metadata !"navg", metadata !5, i32 78, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [navg] [line 78]
+!20 = metadata !{i32 786468, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
+!21 = metadata !{i32 786688, metadata !4, metadata !"mflops", metadata !5, i32 78, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [mflops] [line 78]
+!22 = metadata !{i32 786688, metadata !4, metadata !"tmax", metadata !5, i32 80, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [tmax] [line 80]
+!23 = metadata !{i32 786688, metadata !4, metadata !"verified", metadata !5, i32 81, metadata !24, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [verified] [line 81]
+!24 = metadata !{i32 786454, metadata !1, null, metadata !"boolean", i32 12, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ] [boolean] [line 12, size 0, align 0, offset 0] [from int]
+!25 = metadata !{i32 786688, metadata !4, metadata !"class", metadata !5, i32 82, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [class] [line 82]
+!26 = metadata !{i32 786688, metadata !4, metadata !"fp", metadata !5, i32 83, metadata !27, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [fp] [line 83]
+!27 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !28} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from FILE]
+!28 = metadata !{i32 786454, metadata !1, null, metadata !"FILE", i32 49, i64 0, i64 0, i64 0, i32 0, metadata !29} ; [ DW_TAG_typedef ] [FILE] [line 49, size 0, align 0, offset 0] [from _IO_FILE]
+!29 = metadata !{i32 786451, metadata !30, null, metadata !"_IO_FILE", i32 271, i64 1728, i64 64, i32 0, i32 0, null, metadata !31, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [_IO_FILE] [line 271, size 1728, align 64, offset 0] [def] [from ]
+!30 = metadata !{metadata !"/usr/include/libio.h", metadata !"/home/hfinkel/src/NPB2.3-omp-C/BT"}
+!31 = metadata !{metadata !32, metadata !33, metadata !34, metadata !35, metadata !36, metadata !37, metadata !38, metadata !39, metadata !40, metadata !41, metadata !42, metadata !43, metadata !44, metadata !52, metadata !53, metadata !54, metadata !55, metadata !58, metadata !60, metadata !62, metadata !66, metadata !68, metadata !70, metadata !71, metadata !72, metadata !73, metadata !74, metadata !77, metadata !78}
+!32 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_flags", i32 272, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [_flags] [line 272, size 32, align 32, offset 0] [from int]
+!33 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_read_ptr", i32 277, i64 64, i64 64, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_read_ptr] [line 277, size 64, align 64, offset 64] [from ]
+!34 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_read_end", i32 278, i64 64, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_read_end] [line 278, size 64, align 64, offset 128] [from ]
+!35 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_read_base", i32 279, i64 64, i64 64, i64 192, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_read_base] [line 279, size 64, align 64, offset 192] [from ]
+!36 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_write_base", i32 280, i64 64, i64 64, i64 256, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_write_base] [line 280, size 64, align 64, offset 256] [from ]
+!37 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_write_ptr", i32 281, i64 64, i64 64, i64 320, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_write_ptr] [line 281, size 64, align 64, offset 320] [from ]
+!38 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_write_end", i32 282, i64 64, i64 64, i64 384, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_write_end] [line 282, size 64, align 64, offset 384] [from ]
+!39 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_buf_base", i32 283, i64 64, i64 64, i64 448, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_buf_base] [line 283, size 64, align 64, offset 448] [from ]
+!40 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_buf_end", i32 284, i64 64, i64 64, i64 512, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_buf_end] [line 284, size 64, align 64, offset 512] [from ]
+!41 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_save_base", i32 286, i64 64, i64 64, i64 576, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_save_base] [line 286, size 64, align 64, offset 576] [from ]
+!42 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_backup_base", i32 287, i64 64, i64 64, i64 640, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_backup_base] [line 287, size 64, align 64, offset 640] [from ]
+!43 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_save_end", i32 288, i64 64, i64 64, i64 704, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_save_end] [line 288, size 64, align 64, offset 704] [from ]
+!44 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_markers", i32 290, i64 64, i64 64, i64 768, i32 0, metadata !45} ; [ DW_TAG_member ] [_markers] [line 290, size 64, align 64, offset 768] [from ]
+!45 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !46} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _IO_marker]
+!46 = metadata !{i32 786451, metadata !30, null, metadata !"_IO_marker", i32 186, i64 192, i64 64, i32 0, i32 0, null, metadata !47, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [_IO_marker] [line 186, size 192, align 64, offset 0] [def] [from ]
+!47 = metadata !{metadata !48, metadata !49, metadata !51}
+!48 = metadata !{i32 786445, metadata !30, metadata !46, metadata !"_next", i32 187, i64 64, i64 64, i64 0, i32 0, metadata !45} ; [ DW_TAG_member ] [_next] [line 187, size 64, align 64, offset 0] [from ]
+!49 = metadata !{i32 786445, metadata !30, metadata !46, metadata !"_sbuf", i32 188, i64 64, i64 64, i64 64, i32 0, metadata !50} ; [ DW_TAG_member ] [_sbuf] [line 188, size 64, align 64, offset 64] [from ]
+!50 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _IO_FILE]
+!51 = metadata !{i32 786445, metadata !30, metadata !46, metadata !"_pos", i32 192, i64 32, i64 32, i64 128, i32 0, metadata !8} ; [ DW_TAG_member ] [_pos] [line 192, size 32, align 32, offset 128] [from int]
+!52 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_chain", i32 292, i64 64, i64 64, i64 832, i32 0, metadata !50} ; [ DW_TAG_member ] [_chain] [line 292, size 64, align 64, offset 832] [from ]
+!53 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_fileno", i32 294, i64 32, i64 32, i64 896, i32 0, metadata !8} ; [ DW_TAG_member ] [_fileno] [line 294, size 32, align 32, offset 896] [from int]
+!54 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_flags2", i32 298, i64 32, i64 32, i64 928, i32 0, metadata !8} ; [ DW_TAG_member ] [_flags2] [line 298, size 32, align 32, offset 928] [from int]
+!55 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_old_offset", i32 300, i64 64, i64 64, i64 960, i32 0, metadata !56} ; [ DW_TAG_member ] [_old_offset] [line 300, size 64, align 64, offset 960] [from __off_t]
+!56 = metadata !{i32 786454, metadata !30, null, metadata !"__off_t", i32 141, i64 0, i64 0, i64 0, i32 0, metadata !57} ; [ DW_TAG_typedef ] [__off_t] [line 141, size 0, align 0, offset 0] [from long int]
+!57 = metadata !{i32 786468, null, null, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [long int] [line 0, size 64, align 64, offset 0, enc DW_ATE_signed]
+!58 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_cur_column", i32 304, i64 16, i64 16, i64 1024, i32 0, metadata !59} ; [ DW_TAG_member ] [_cur_column] [line 304, size 16, align 16, offset 1024] [from unsigned short]
+!59 = metadata !{i32 786468, null, null, metadata !"unsigned short", i32 0, i64 16, i64 16, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned short] [line 0, size 16, align 16, offset 0, enc DW_ATE_unsigned]
+!60 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_vtable_offset", i32 305, i64 8, i64 8, i64 1040, i32 0, metadata !61} ; [ DW_TAG_member ] [_vtable_offset] [line 305, size 8, align 8, offset 1040] [from signed char]
+!61 = metadata !{i32 786468, null, null, metadata !"signed char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [signed char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!62 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_shortbuf", i32 306, i64 8, i64 8, i64 1048, i32 0, metadata !63} ; [ DW_TAG_member ] [_shortbuf] [line 306, size 8, align 8, offset 1048] [from ]
+!63 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 8, i64 8, i32 0, i32 0, metadata !11, metadata !64, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 8, align 8, offset 0] [from char]
+!64 = metadata !{metadata !65}
+!65 = metadata !{i32 786465, i64 0, i64 1}        ; [ DW_TAG_subrange_type ] [0, 0]
+!66 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_lock", i32 310, i64 64, i64 64, i64 1088, i32 0, metadata !67} ; [ DW_TAG_member ] [_lock] [line 310, size 64, align 64, offset 1088] [from ]
+!67 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!68 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_offset", i32 319, i64 64, i64 64, i64 1152, i32 0, metadata !69} ; [ DW_TAG_member ] [_offset] [line 319, size 64, align 64, offset 1152] [from __off64_t]
+!69 = metadata !{i32 786454, metadata !30, null, metadata !"__off64_t", i32 142, i64 0, i64 0, i64 0, i32 0, metadata !57} ; [ DW_TAG_typedef ] [__off64_t] [line 142, size 0, align 0, offset 0] [from long int]
+!70 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"__pad1", i32 328, i64 64, i64 64, i64 1216, i32 0, metadata !67} ; [ DW_TAG_member ] [__pad1] [line 328, size 64, align 64, offset 1216] [from ]
+!71 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"__pad2", i32 329, i64 64, i64 64, i64 1280, i32 0, metadata !67} ; [ DW_TAG_member ] [__pad2] [line 329, size 64, align 64, offset 1280] [from ]
+!72 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"__pad3", i32 330, i64 64, i64 64, i64 1344, i32 0, metadata !67} ; [ DW_TAG_member ] [__pad3] [line 330, size 64, align 64, offset 1344] [from ]
+!73 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"__pad4", i32 331, i64 64, i64 64, i64 1408, i32 0, metadata !67} ; [ DW_TAG_member ] [__pad4] [line 331, size 64, align 64, offset 1408] [from ]
+!74 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"__pad5", i32 332, i64 64, i64 64, i64 1472, i32 0, metadata !75} ; [ DW_TAG_member ] [__pad5] [line 332, size 64, align 64, offset 1472] [from size_t]
+!75 = metadata !{i32 786454, metadata !30, null, metadata !"size_t", i32 42, i64 0, i64 0, i64 0, i32 0, metadata !76} ; [ DW_TAG_typedef ] [size_t] [line 42, size 0, align 0, offset 0] [from long unsigned int]
+!76 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
+!77 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_mode", i32 334, i64 32, i64 32, i64 1536, i32 0, metadata !8} ; [ DW_TAG_member ] [_mode] [line 334, size 32, align 32, offset 1536] [from int]
+!78 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_unused2", i32 336, i64 160, i64 8, i64 1568, i32 0, metadata !79} ; [ DW_TAG_member ] [_unused2] [line 336, size 160, align 8, offset 1568] [from ]
+!79 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 160, i64 8, i32 0, i32 0, metadata !11, metadata !80, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 160, align 8, offset 0] [from char]
+!80 = metadata !{metadata !81}
+!81 = metadata !{i32 786465, i64 0, i64 20}       ; [ DW_TAG_subrange_type ] [0, 19]
+!82 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"verify", metadata !"verify", metadata !"", i32 2388, metadata !83, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !86, i32 2388} ; [ DW_TAG_subprogram ] [line 2388] [local] [def] [verify]
+!83 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !84, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!84 = metadata !{null, metadata !8, metadata !10, metadata !85}
+!85 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from boolean]
+!86 = metadata !{metadata !87, metadata !88, metadata !89, metadata !90, metadata !94, metadata !95, metadata !96, metadata !97, metadata !98, metadata !99, metadata !100, metadata !101}
+!87 = metadata !{i32 786689, metadata !82, metadata !"no_time_steps", metadata !5, i32 16779604, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [no_time_steps] [line 2388]
+!88 = metadata !{i32 786689, metadata !82, metadata !"class", metadata !5, i32 33556820, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [class] [line 2388]
+!89 = metadata !{i32 786689, metadata !82, metadata !"verified", metadata !5, i32 50334036, metadata !85, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [verified] [line 2388]
+!90 = metadata !{i32 786688, metadata !82, metadata !"xcrref", metadata !5, i32 2397, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xcrref] [line 2397]
+!91 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 320, i64 64, i32 0, i32 0, metadata !20, metadata !92, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 320, align 64, offset 0] [from double]
+!92 = metadata !{metadata !93}
+!93 = metadata !{i32 786465, i64 0, i64 5}        ; [ DW_TAG_subrange_type ] [0, 4]
+!94 = metadata !{i32 786688, metadata !82, metadata !"xceref", metadata !5, i32 2397, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xceref] [line 2397]
+!95 = metadata !{i32 786688, metadata !82, metadata !"xcrdif", metadata !5, i32 2397, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xcrdif] [line 2397]
+!96 = metadata !{i32 786688, metadata !82, metadata !"xcedif", metadata !5, i32 2397, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xcedif] [line 2397]
+!97 = metadata !{i32 786688, metadata !82, metadata !"epsilon", metadata !5, i32 2398, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [epsilon] [line 2398]
+!98 = metadata !{i32 786688, metadata !82, metadata !"xce", metadata !5, i32 2398, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xce] [line 2398]
+!99 = metadata !{i32 786688, metadata !82, metadata !"xcr", metadata !5, i32 2398, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xcr] [line 2398]
+!100 = metadata !{i32 786688, metadata !82, metadata !"dtref", metadata !5, i32 2398, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [dtref] [line 2398]
+!101 = metadata !{i32 786688, metadata !82, metadata !"m", metadata !5, i32 2399, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 2399]
+!102 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"rhs_norm", metadata !"rhs_norm", metadata !"", i32 266, metadata !103, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !106, i32 266} ; [ DW_TAG_subprogram ] [line 266] [local] [def] [rhs_norm]
+!103 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !104, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!104 = metadata !{null, metadata !105}
+!105 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from double]
+!106 = metadata !{metadata !107, metadata !108, metadata !109, metadata !110, metadata !111, metadata !112, metadata !113}
+!107 = metadata !{i32 786689, metadata !102, metadata !"rms", metadata !5, i32 16777482, metadata !105, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [rms] [line 266]
+!108 = metadata !{i32 786688, metadata !102, metadata !"i", metadata !5, i32 271, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 271]
+!109 = metadata !{i32 786688, metadata !102, metadata !"j", metadata !5, i32 271, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 271]
+!110 = metadata !{i32 786688, metadata !102, metadata !"k", metadata !5, i32 271, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 271]
+!111 = metadata !{i32 786688, metadata !102, metadata !"d", metadata !5, i32 271, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 271]
+!112 = metadata !{i32 786688, metadata !102, metadata !"m", metadata !5, i32 271, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 271]
+!113 = metadata !{i32 786688, metadata !102, metadata !"add", metadata !5, i32 272, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [add] [line 272]
+!114 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"compute_rhs", metadata !"compute_rhs", metadata !"", i32 1767, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @compute_rhs, null, null, metadata !117, i32 1767} ; [ DW_TAG_subprogram ] [line 1767] [local] [def] [compute_rhs]
+!115 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !116, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!116 = metadata !{null}
+!117 = metadata !{metadata !118, metadata !119, metadata !120, metadata !121, metadata !122, metadata !123, metadata !124, metadata !125, metadata !126, metadata !127, metadata !128, metadata !129, metadata !130, metadata !131}
+!118 = metadata !{i32 786688, metadata !114, metadata !"i", metadata !5, i32 1769, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 1769]
+!119 = metadata !{i32 786688, metadata !114, metadata !"j", metadata !5, i32 1769, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 1769]
+!120 = metadata !{i32 786688, metadata !114, metadata !"k", metadata !5, i32 1769, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 1769]
+!121 = metadata !{i32 786688, metadata !114, metadata !"m", metadata !5, i32 1769, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 1769]
+!122 = metadata !{i32 786688, metadata !114, metadata !"rho_inv", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [rho_inv] [line 1770]
+!123 = metadata !{i32 786688, metadata !114, metadata !"uijk", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [uijk] [line 1770]
+!124 = metadata !{i32 786688, metadata !114, metadata !"up1", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [up1] [line 1770]
+!125 = metadata !{i32 786688, metadata !114, metadata !"um1", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [um1] [line 1770]
+!126 = metadata !{i32 786688, metadata !114, metadata !"vijk", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [vijk] [line 1770]
+!127 = metadata !{i32 786688, metadata !114, metadata !"vp1", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [vp1] [line 1770]
+!128 = metadata !{i32 786688, metadata !114, metadata !"vm1", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [vm1] [line 1770]
+!129 = metadata !{i32 786688, metadata !114, metadata !"wijk", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [wijk] [line 1770]
+!130 = metadata !{i32 786688, metadata !114, metadata !"wp1", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [wp1] [line 1770]
+!131 = metadata !{i32 786688, metadata !114, metadata !"wm1", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [wm1] [line 1770]
+!132 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"error_norm", metadata !"error_norm", metadata !"", i32 225, metadata !103, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !133, i32 225} ; [ DW_TAG_subprogram ] [line 225] [local] [def] [error_norm]
+!133 = metadata !{metadata !134, metadata !135, metadata !136, metadata !137, metadata !138, metadata !139, metadata !140, metadata !141, metadata !142, metadata !143, metadata !144}
+!134 = metadata !{i32 786689, metadata !132, metadata !"rms", metadata !5, i32 16777441, metadata !105, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [rms] [line 225]
+!135 = metadata !{i32 786688, metadata !132, metadata !"i", metadata !5, i32 232, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 232]
+!136 = metadata !{i32 786688, metadata !132, metadata !"j", metadata !5, i32 232, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 232]
+!137 = metadata !{i32 786688, metadata !132, metadata !"k", metadata !5, i32 232, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 232]
+!138 = metadata !{i32 786688, metadata !132, metadata !"m", metadata !5, i32 232, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 232]
+!139 = metadata !{i32 786688, metadata !132, metadata !"d", metadata !5, i32 232, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 232]
+!140 = metadata !{i32 786688, metadata !132, metadata !"xi", metadata !5, i32 233, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xi] [line 233]
+!141 = metadata !{i32 786688, metadata !132, metadata !"eta", metadata !5, i32 233, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [eta] [line 233]
+!142 = metadata !{i32 786688, metadata !132, metadata !"zeta", metadata !5, i32 233, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [zeta] [line 233]
+!143 = metadata !{i32 786688, metadata !132, metadata !"u_exact", metadata !5, i32 233, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [u_exact] [line 233]
+!144 = metadata !{i32 786688, metadata !132, metadata !"add", metadata !5, i32 233, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [add] [line 233]
+!145 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"exact_solution", metadata !"exact_solution", metadata !"", i32 643, metadata !146, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !148, i32 644} ; [ DW_TAG_subprogram ] [line 643] [local] [def] [scope 644] [exact_solution]
+!146 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !147, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!147 = metadata !{null, metadata !20, metadata !20, metadata !20, metadata !105}
+!148 = metadata !{metadata !149, metadata !150, metadata !151, metadata !152, metadata !153}
+!149 = metadata !{i32 786689, metadata !145, metadata !"xi", metadata !5, i32 16777859, metadata !20, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [xi] [line 643]
+!150 = metadata !{i32 786689, metadata !145, metadata !"eta", metadata !5, i32 33555075, metadata !20, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [eta] [line 643]
+!151 = metadata !{i32 786689, metadata !145, metadata !"zeta", metadata !5, i32 50332291, metadata !20, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [zeta] [line 643]
+!152 = metadata !{i32 786689, metadata !145, metadata !"dtemp", metadata !5, i32 67109508, metadata !105, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [dtemp] [line 644]
+!153 = metadata !{i32 786688, metadata !145, metadata !"m", metadata !5, i32 653, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 653]
+!154 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"set_constants", metadata !"set_constants", metadata !"", i32 2191, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !2, i32 2191} ; [ DW_TAG_subprogram ] [line 2191] [local] [def] [set_constants]
+!155 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"lhsinit", metadata !"lhsinit", metadata !"", i32 855, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !156, i32 855} ; [ DW_TAG_subprogram ] [line 855] [local] [def] [lhsinit]
+!156 = metadata !{metadata !157, metadata !158, metadata !159, metadata !160, metadata !161}
+!157 = metadata !{i32 786688, metadata !155, metadata !"i", metadata !5, i32 857, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 857]
+!158 = metadata !{i32 786688, metadata !155, metadata !"j", metadata !5, i32 857, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 857]
+!159 = metadata !{i32 786688, metadata !155, metadata !"k", metadata !5, i32 857, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 857]
+!160 = metadata !{i32 786688, metadata !155, metadata !"m", metadata !5, i32 857, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 857]
+!161 = metadata !{i32 786688, metadata !155, metadata !"n", metadata !5, i32 857, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [n] [line 857]
+!162 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"initialize", metadata !"initialize", metadata !"", i32 669, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !163, i32 669} ; [ DW_TAG_subprogram ] [line 669] [local] [def] [initialize]
+!163 = metadata !{metadata !164, metadata !165, metadata !166, metadata !167, metadata !168, metadata !169, metadata !170, metadata !171, metadata !172, metadata !173, metadata !174, metadata !179, metadata !180, metadata !181, metadata !182}
+!164 = metadata !{i32 786688, metadata !162, metadata !"i", metadata !5, i32 679, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 679]
+!165 = metadata !{i32 786688, metadata !162, metadata !"j", metadata !5, i32 679, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 679]
+!166 = metadata !{i32 786688, metadata !162, metadata !"k", metadata !5, i32 679, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 679]
+!167 = metadata !{i32 786688, metadata !162, metadata !"m", metadata !5, i32 679, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 679]
+!168 = metadata !{i32 786688, metadata !162, metadata !"ix", metadata !5, i32 679, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [ix] [line 679]
+!169 = metadata !{i32 786688, metadata !162, metadata !"iy", metadata !5, i32 679, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [iy] [line 679]
+!170 = metadata !{i32 786688, metadata !162, metadata !"iz", metadata !5, i32 679, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [iz] [line 679]
+!171 = metadata !{i32 786688, metadata !162, metadata !"xi", metadata !5, i32 680, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xi] [line 680]
+!172 = metadata !{i32 786688, metadata !162, metadata !"eta", metadata !5, i32 680, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [eta] [line 680]
+!173 = metadata !{i32 786688, metadata !162, metadata !"zeta", metadata !5, i32 680, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [zeta] [line 680]
+!174 = metadata !{i32 786688, metadata !162, metadata !"Pface", metadata !5, i32 680, metadata !175, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [Pface] [line 680]
+!175 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 1920, i64 64, i32 0, i32 0, metadata !20, metadata !176, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 1920, align 64, offset 0] [from double]
+!176 = metadata !{metadata !177, metadata !178, metadata !93}
+!177 = metadata !{i32 786465, i64 0, i64 2}       ; [ DW_TAG_subrange_type ] [0, 1]
+!178 = metadata !{i32 786465, i64 0, i64 3}       ; [ DW_TAG_subrange_type ] [0, 2]
+!179 = metadata !{i32 786688, metadata !162, metadata !"Pxi", metadata !5, i32 680, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [Pxi] [line 680]
+!180 = metadata !{i32 786688, metadata !162, metadata !"Peta", metadata !5, i32 680, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [Peta] [line 680]
+!181 = metadata !{i32 786688, metadata !162, metadata !"Pzeta", metadata !5, i32 680, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [Pzeta] [line 680]
+!182 = metadata !{i32 786688, metadata !162, metadata !"temp", metadata !5, i32 680, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [temp] [line 680]
+!183 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"exact_rhs", metadata !"exact_rhs", metadata !"", i32 301, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !184, i32 301} ; [ DW_TAG_subprogram ] [line 301] [local] [def] [exact_rhs]
+!184 = metadata !{metadata !185, metadata !186, metadata !187, metadata !188, metadata !189, metadata !190, metadata !191, metadata !192, metadata !193, metadata !194, metadata !195, metadata !196, metadata !197, metadata !198, metadata !199}
+!185 = metadata !{i32 786688, metadata !183, metadata !"dtemp", metadata !5, i32 310, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [dtemp] [line 310]
+!186 = metadata !{i32 786688, metadata !183, metadata !"xi", metadata !5, i32 310, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xi] [line 310]
+!187 = metadata !{i32 786688, metadata !183, metadata !"eta", metadata !5, i32 310, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [eta] [line 310]
+!188 = metadata !{i32 786688, metadata !183, metadata !"zeta", metadata !5, i32 310, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [zeta] [line 310]
+!189 = metadata !{i32 786688, metadata !183, metadata !"dtpp", metadata !5, i32 310, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [dtpp] [line 310]
+!190 = metadata !{i32 786688, metadata !183, metadata !"m", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 311]
+!191 = metadata !{i32 786688, metadata !183, metadata !"i", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 311]
+!192 = metadata !{i32 786688, metadata !183, metadata !"j", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 311]
+!193 = metadata !{i32 786688, metadata !183, metadata !"k", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 311]
+!194 = metadata !{i32 786688, metadata !183, metadata !"ip1", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [ip1] [line 311]
+!195 = metadata !{i32 786688, metadata !183, metadata !"im1", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [im1] [line 311]
+!196 = metadata !{i32 786688, metadata !183, metadata !"jp1", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [jp1] [line 311]
+!197 = metadata !{i32 786688, metadata !183, metadata !"jm1", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [jm1] [line 311]
+!198 = metadata !{i32 786688, metadata !183, metadata !"km1", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [km1] [line 311]
+!199 = metadata !{i32 786688, metadata !183, metadata !"kp1", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [kp1] [line 311]
+!200 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"adi", metadata !"adi", metadata !"", i32 210, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !2, i32 210} ; [ DW_TAG_subprogram ] [line 210] [local] [def] [adi]
+!201 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"add", metadata !"add", metadata !"", i32 187, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !202, i32 187} ; [ DW_TAG_subprogram ] [line 187] [local] [def] [add]
+!202 = metadata !{metadata !203, metadata !204, metadata !205, metadata !206}
+!203 = metadata !{i32 786688, metadata !201, metadata !"i", metadata !5, i32 193, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 193]
+!204 = metadata !{i32 786688, metadata !201, metadata !"j", metadata !5, i32 193, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 193]
+!205 = metadata !{i32 786688, metadata !201, metadata !"k", metadata !5, i32 193, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 193]
+!206 = metadata !{i32 786688, metadata !201, metadata !"m", metadata !5, i32 193, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 193]
+!207 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"z_solve", metadata !"z_solve", metadata !"", i32 3457, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !2, i32 3457} ; [ DW_TAG_subprogram ] [line 3457] [local] [def] [z_solve]
+!208 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"z_backsubstitute", metadata !"z_backsubstitute", metadata !"", i32 3480, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !209, i32 3480} ; [ DW_TAG_subprogram ] [line 3480] [local] [def] [z_backsubstitute]
+!209 = metadata !{metadata !210, metadata !211, metadata !212, metadata !213, metadata !214}
+!210 = metadata !{i32 786688, metadata !208, metadata !"i", metadata !5, i32 3492, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 3492]
+!211 = metadata !{i32 786688, metadata !208, metadata !"j", metadata !5, i32 3492, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 3492]
+!212 = metadata !{i32 786688, metadata !208, metadata !"k", metadata !5, i32 3492, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 3492]
+!213 = metadata !{i32 786688, metadata !208, metadata !"m", metadata !5, i32 3492, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 3492]
+!214 = metadata !{i32 786688, metadata !208, metadata !"n", metadata !5, i32 3492, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [n] [line 3492]
+!215 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"z_solve_cell", metadata !"z_solve_cell", metadata !"", i32 3512, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !216, i32 3512} ; [ DW_TAG_subprogram ] [line 3512] [local] [def] [z_solve_cell]
+!216 = metadata !{metadata !217, metadata !218, metadata !219, metadata !220}
+!217 = metadata !{i32 786688, metadata !215, metadata !"i", metadata !5, i32 3527, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 3527]
+!218 = metadata !{i32 786688, metadata !215, metadata !"j", metadata !5, i32 3527, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 3527]
+!219 = metadata !{i32 786688, metadata !215, metadata !"k", metadata !5, i32 3527, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 3527]
+!220 = metadata !{i32 786688, metadata !215, metadata !"ksize", metadata !5, i32 3527, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [ksize] [line 3527]
+!221 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"binvrhs", metadata !"binvrhs", metadata !"", i32 3154, metadata !222, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !225, i32 3154} ; [ DW_TAG_subprogram ] [line 3154] [local] [def] [binvrhs]
+!222 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !223, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!223 = metadata !{null, metadata !224, metadata !105}
+!224 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !91} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!225 = metadata !{metadata !226, metadata !227, metadata !228, metadata !229}
+!226 = metadata !{i32 786689, metadata !221, metadata !"lhs", metadata !5, i32 16780370, metadata !224, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [lhs] [line 3154]
+!227 = metadata !{i32 786689, metadata !221, metadata !"r", metadata !5, i32 33557586, metadata !105, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [r] [line 3154]
+!228 = metadata !{i32 786688, metadata !221, metadata !"pivot", metadata !5, i32 3159, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [pivot] [line 3159]
+!229 = metadata !{i32 786688, metadata !221, metadata !"coeff", metadata !5, i32 3159, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [coeff] [line 3159]
+!230 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"matmul_sub", metadata !"matmul_sub", metadata !"", i32 2841, metadata !231, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !233, i32 2842} ; [ DW_TAG_subprogram ] [line 2841] [local] [def] [scope 2842] [matmul_sub]
+!231 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !232, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!232 = metadata !{null, metadata !224, metadata !224, metadata !224}
+!233 = metadata !{metadata !234, metadata !235, metadata !236, metadata !237}
+!234 = metadata !{i32 786689, metadata !230, metadata !"ablock", metadata !5, i32 16780057, metadata !224, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [ablock] [line 2841]
+!235 = metadata !{i32 786689, metadata !230, metadata !"bblock", metadata !5, i32 33557273, metadata !224, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [bblock] [line 2841]
+!236 = metadata !{i32 786689, metadata !230, metadata !"cblock", metadata !5, i32 50334490, metadata !224, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [cblock] [line 2842]
+!237 = metadata !{i32 786688, metadata !230, metadata !"j", metadata !5, i32 2851, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 2851]
+!238 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"matvec_sub", metadata !"matvec_sub", metadata !"", i32 2814, metadata !239, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !241, i32 2814} ; [ DW_TAG_subprogram ] [line 2814] [local] [def] [matvec_sub]
+!239 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !240, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!240 = metadata !{null, metadata !224, metadata !105, metadata !105}
+!241 = metadata !{metadata !242, metadata !243, metadata !244, metadata !245}
+!242 = metadata !{i32 786689, metadata !238, metadata !"ablock", metadata !5, i32 16780030, metadata !224, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [ablock] [line 2814]
+!243 = metadata !{i32 786689, metadata !238, metadata !"avec", metadata !5, i32 33557246, metadata !105, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [avec] [line 2814]
+!244 = metadata !{i32 786689, metadata !238, metadata !"bvec", metadata !5, i32 50334462, metadata !105, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [bvec] [line 2814]
+!245 = metadata !{i32 786688, metadata !238, metadata !"i", metadata !5, i32 2823, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 2823]
+!246 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"binvcrhs", metadata !"binvcrhs", metadata !"", i32 2885, metadata !247, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !249, i32 2885} ; [ DW_TAG_subprogram ] [line 2885] [local] [def] [binvcrhs]
+!247 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !248, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!248 = metadata !{null, metadata !224, metadata !224, metadata !105}
+!249 = metadata !{metadata !250, metadata !251, metadata !252, metadata !253, metadata !254}
+!250 = metadata !{i32 786689, metadata !246, metadata !"lhs", metadata !5, i32 16780101, metadata !224, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [lhs] [line 2885]
+!251 = metadata !{i32 786689, metadata !246, metadata !"c", metadata !5, i32 33557317, metadata !224, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [c] [line 2885]
+!252 = metadata !{i32 786689, metadata !246, metadata !"r", metadata !5, i32 50334533, metadata !105, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [r] [line 2885]
+!253 = metadata !{i32 786688, metadata !246, metadata !"pivot", metadata !5, i32 2890, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [pivot] [line 2890]
+!254 = metadata !{i32 786688, metadata !246, metadata !"coeff", metadata !5, i32 2890, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [coeff] [line 2890]
+!255 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"lhsz", metadata !"lhsz", metadata !"", i32 1475, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !256, i32 1475} ; [ DW_TAG_subprogram ] [line 1475] [local] [def] [lhsz]
+!256 = metadata !{metadata !257, metadata !258, metadata !259}
+!257 = metadata !{i32 786688, metadata !255, metadata !"i", metadata !5, i32 1484, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 1484]
+!258 = metadata !{i32 786688, metadata !255, metadata !"j", metadata !5, i32 1484, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 1484]
+!259 = metadata !{i32 786688, metadata !255, metadata !"k", metadata !5, i32 1484, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 1484]
+!260 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"y_solve", metadata !"y_solve", metadata !"", i32 3299, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !2, i32 3299} ; [ DW_TAG_subprogram ] [line 3299] [local] [def] [y_solve]
+!261 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"y_backsubstitute", metadata !"y_backsubstitute", metadata !"", i32 3323, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !262, i32 3323} ; [ DW_TAG_subprogram ] [line 3323] [local] [def] [y_backsubstitute]
+!262 = metadata !{metadata !263, metadata !264, metadata !265, metadata !266, metadata !267}
+!263 = metadata !{i32 786688, metadata !261, metadata !"i", metadata !5, i32 3335, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 3335]
+!264 = metadata !{i32 786688, metadata !261, metadata !"j", metadata !5, i32 3335, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 3335]
+!265 = metadata !{i32 786688, metadata !261, metadata !"k", metadata !5, i32 3335, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 3335]
+!266 = metadata !{i32 786688, metadata !261, metadata !"m", metadata !5, i32 3335, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 3335]
+!267 = metadata !{i32 786688, metadata !261, metadata !"n", metadata !5, i32 3335, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [n] [line 3335]
+!268 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"y_solve_cell", metadata !"y_solve_cell", metadata !"", i32 3355, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !269, i32 3355} ; [ DW_TAG_subprogram ] [line 3355] [local] [def] [y_solve_cell]
+!269 = metadata !{metadata !270, metadata !271, metadata !272, metadata !273}
+!270 = metadata !{i32 786688, metadata !268, metadata !"i", metadata !5, i32 3370, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 3370]
+!271 = metadata !{i32 786688, metadata !268, metadata !"j", metadata !5, i32 3370, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 3370]
+!272 = metadata !{i32 786688, metadata !268, metadata !"k", metadata !5, i32 3370, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 3370]
+!273 = metadata !{i32 786688, metadata !268, metadata !"jsize", metadata !5, i32 3370, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [jsize] [line 3370]
+!274 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"lhsy", metadata !"lhsy", metadata !"", i32 1181, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !275, i32 1181} ; [ DW_TAG_subprogram ] [line 1181] [local] [def] [lhsy]
+!275 = metadata !{metadata !276, metadata !277, metadata !278}
+!276 = metadata !{i32 786688, metadata !274, metadata !"i", metadata !5, i32 1190, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 1190]
+!277 = metadata !{i32 786688, metadata !274, metadata !"j", metadata !5, i32 1190, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 1190]
+!278 = metadata !{i32 786688, metadata !274, metadata !"k", metadata !5, i32 1190, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 1190]
+!279 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"x_solve", metadata !"x_solve", metadata !"", i32 2658, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !2, i32 2658} ; [ DW_TAG_subprogram ] [line 2658] [local] [def] [x_solve]
+!280 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"x_backsubstitute", metadata !"x_backsubstitute", metadata !"", i32 2684, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !281, i32 2684} ; [ DW_TAG_subprogram ] [line 2684] [local] [def] [x_backsubstitute]
+!281 = metadata !{metadata !282, metadata !283, metadata !284, metadata !285, metadata !286}
+!282 = metadata !{i32 786688, metadata !280, metadata !"i", metadata !5, i32 2696, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 2696]
+!283 = metadata !{i32 786688, metadata !280, metadata !"j", metadata !5, i32 2696, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 2696]
+!284 = metadata !{i32 786688, metadata !280, metadata !"k", metadata !5, i32 2696, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 2696]
+!285 = metadata !{i32 786688, metadata !280, metadata !"m", metadata !5, i32 2696, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 2696]
+!286 = metadata !{i32 786688, metadata !280, metadata !"n", metadata !5, i32 2696, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [n] [line 2696]
+!287 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"x_solve_cell", metadata !"x_solve_cell", metadata !"", i32 2716, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !288, i32 2716} ; [ DW_TAG_subprogram ] [line 2716] [local] [def] [x_solve_cell]
+!288 = metadata !{metadata !289, metadata !290, metadata !291, metadata !292}
+!289 = metadata !{i32 786688, metadata !287, metadata !"i", metadata !5, i32 2728, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 2728]
+!290 = metadata !{i32 786688, metadata !287, metadata !"j", metadata !5, i32 2728, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 2728]
+!291 = metadata !{i32 786688, metadata !287, metadata !"k", metadata !5, i32 2728, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 2728]
+!292 = metadata !{i32 786688, metadata !287, metadata !"isize", metadata !5, i32 2728, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [isize] [line 2728]
+!293 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"lhsx", metadata !"lhsx", metadata !"", i32 898, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !294, i32 898} ; [ DW_TAG_subprogram ] [line 898] [local] [def] [lhsx]
+!294 = metadata !{metadata !295, metadata !296, metadata !297}
+!295 = metadata !{i32 786688, metadata !293, metadata !"i", metadata !5, i32 907, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 907]
+!296 = metadata !{i32 786688, metadata !293, metadata !"j", metadata !5, i32 907, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 907]
+!297 = metadata !{i32 786688, metadata !293, metadata !"k", metadata !5, i32 907, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 907]
+!298 = metadata !{metadata !299, metadata !304, metadata !305, metadata !309, metadata !310, metadata !311, metadata !312, metadata !313, metadata !314, metadata !315, metadata !316, metadata !317, metadata !318, metadata !319, metadata !320, metadata !321, metadata !322, metadata !323, metadata !324, metadata !325, metadata !326, metadata !327, metadata !328, metadata !329, metadata !330, metadata !331, metadata !332, metadata !333, metadata !334, metadata !335, metadata !336, metadata !337, metadata !338, metadata !339, metadata !340, metadata !341, metadata !342, metadata !343, metadata !347, metadata !350, metadata !351, metadata !352, metadata !353, metadata !354, metadata !355, metadata !356, metadata !360, metadata !361, metadata !362, metadata !363, metadata !364, metadata !365, metadata !366, metadata !367, metadata !368, metadata !369, metadata !370, metadata !371, metadata !372, metadata !373, metadata !374, metadata !375, metadata !376, metadata !377, metadata !378, metadata !379, metadata !380, metadata !381, metadata !382, metadata !383, metadata !384, metadata !385, metadata !386, metadata !387, metadata !388, metadata !389, metadata !390, metadata !391, metadata !392, metadata !393, metadata !394, metadata !395, metadata !396, metadata !397, metadata !398, metadata !399, metadata !400, metadata !401, metadata !402, metadata !403, metadata !404, metadata !405, metadata !406, metadata !407, metadata !408, metadata !409, metadata !410, metadata !411, metadata !412, metadata !413, metadata !414, metadata !415, metadata !416, metadata !417, metadata !418, metadata !419, metadata !422, metadata !426, metadata !427, metadata !430, metadata !431, metadata !434, metadata !435, metadata !436, metadata !437}
+!299 = metadata !{i32 786484, i32 0, null, metadata !"grid_points", metadata !"grid_points", metadata !"", metadata !300, i32 28, metadata !302, i32 1, i32 1, [3 x i32]* @grid_points, null} ; [ DW_TAG_variable ] [grid_points] [line 28] [local] [def]
+!300 = metadata !{i32 786473, metadata !301}      ; [ DW_TAG_file_type ] [/home/hfinkel/src/NPB2.3-omp-C/BT/./header.h]
+!301 = metadata !{metadata !"./header.h", metadata !"/home/hfinkel/src/NPB2.3-omp-C/BT"}
+!302 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 96, i64 32, i32 0, i32 0, metadata !8, metadata !303, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 96, align 32, offset 0] [from int]
+!303 = metadata !{metadata !178}
+!304 = metadata !{i32 786484, i32 0, null, metadata !"dt", metadata !"dt", metadata !"", metadata !300, i32 35, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dt] [line 35] [local] [def]
+!305 = metadata !{i32 786484, i32 0, null, metadata !"rhs", metadata !"rhs", metadata !"", metadata !300, i32 68, metadata !306, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [rhs] [line 68] [local] [def]
+!306 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 1385839040, i64 64, i32 0, i32 0, metadata !20, metadata !307, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 1385839040, align 64, offset 0] [from double]
+!307 = metadata !{metadata !308, metadata !308, metadata !308, metadata !93}
+!308 = metadata !{i32 786465, i64 0, i64 163}     ; [ DW_TAG_subrange_type ] [0, 162]
+!309 = metadata !{i32 786484, i32 0, null, metadata !"zzcon5", metadata !"zzcon5", metadata !"", metadata !300, i32 42, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [zzcon5] [line 42] [local] [def]
+!310 = metadata !{i32 786484, i32 0, null, metadata !"zzcon4", metadata !"zzcon4", metadata !"", metadata !300, i32 42, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [zzcon4] [line 42] [local] [def]
+!311 = metadata !{i32 786484, i32 0, null, metadata !"zzcon3", metadata !"zzcon3", metadata !"", metadata !300, i32 42, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [zzcon3] [line 42] [local] [def]
+!312 = metadata !{i32 786484, i32 0, null, metadata !"dz5tz1", metadata !"dz5tz1", metadata !"", metadata !300, i32 43, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz5tz1] [line 43] [local] [def]
+!313 = metadata !{i32 786484, i32 0, null, metadata !"dz4tz1", metadata !"dz4tz1", metadata !"", metadata !300, i32 43, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz4tz1] [line 43] [local] [def]
+!314 = metadata !{i32 786484, i32 0, null, metadata !"dz3tz1", metadata !"dz3tz1", metadata !"", metadata !300, i32 43, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz3tz1] [line 43] [local] [def]
+!315 = metadata !{i32 786484, i32 0, null, metadata !"zzcon2", metadata !"zzcon2", metadata !"", metadata !300, i32 42, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [zzcon2] [line 42] [local] [def]
+!316 = metadata !{i32 786484, i32 0, null, metadata !"dz2tz1", metadata !"dz2tz1", metadata !"", metadata !300, i32 43, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz2tz1] [line 43] [local] [def]
+!317 = metadata !{i32 786484, i32 0, null, metadata !"tz2", metadata !"tz2", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tz2] [line 31] [local] [def]
+!318 = metadata !{i32 786484, i32 0, null, metadata !"dz1tz1", metadata !"dz1tz1", metadata !"", metadata !300, i32 43, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz1tz1] [line 43] [local] [def]
+!319 = metadata !{i32 786484, i32 0, null, metadata !"yycon5", metadata !"yycon5", metadata !"", metadata !300, i32 40, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [yycon5] [line 40] [local] [def]
+!320 = metadata !{i32 786484, i32 0, null, metadata !"yycon4", metadata !"yycon4", metadata !"", metadata !300, i32 40, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [yycon4] [line 40] [local] [def]
+!321 = metadata !{i32 786484, i32 0, null, metadata !"yycon3", metadata !"yycon3", metadata !"", metadata !300, i32 40, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [yycon3] [line 40] [local] [def]
+!322 = metadata !{i32 786484, i32 0, null, metadata !"dy5ty1", metadata !"dy5ty1", metadata !"", metadata !300, i32 41, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy5ty1] [line 41] [local] [def]
+!323 = metadata !{i32 786484, i32 0, null, metadata !"dy4ty1", metadata !"dy4ty1", metadata !"", metadata !300, i32 41, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy4ty1] [line 41] [local] [def]
+!324 = metadata !{i32 786484, i32 0, null, metadata !"dy3ty1", metadata !"dy3ty1", metadata !"", metadata !300, i32 41, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy3ty1] [line 41] [local] [def]
+!325 = metadata !{i32 786484, i32 0, null, metadata !"yycon2", metadata !"yycon2", metadata !"", metadata !300, i32 40, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [yycon2] [line 40] [local] [def]
+!326 = metadata !{i32 786484, i32 0, null, metadata !"dy2ty1", metadata !"dy2ty1", metadata !"", metadata !300, i32 41, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy2ty1] [line 41] [local] [def]
+!327 = metadata !{i32 786484, i32 0, null, metadata !"ty2", metadata !"ty2", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [ty2] [line 31] [local] [def]
+!328 = metadata !{i32 786484, i32 0, null, metadata !"dy1ty1", metadata !"dy1ty1", metadata !"", metadata !300, i32 41, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy1ty1] [line 41] [local] [def]
+!329 = metadata !{i32 786484, i32 0, null, metadata !"dssp", metadata !"dssp", metadata !"", metadata !300, i32 35, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dssp] [line 35] [local] [def]
+!330 = metadata !{i32 786484, i32 0, null, metadata !"c1", metadata !"c1", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c1] [line 45] [local] [def]
+!331 = metadata !{i32 786484, i32 0, null, metadata !"xxcon5", metadata !"xxcon5", metadata !"", metadata !300, i32 38, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [xxcon5] [line 38] [local] [def]
+!332 = metadata !{i32 786484, i32 0, null, metadata !"xxcon4", metadata !"xxcon4", metadata !"", metadata !300, i32 38, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [xxcon4] [line 38] [local] [def]
+!333 = metadata !{i32 786484, i32 0, null, metadata !"xxcon3", metadata !"xxcon3", metadata !"", metadata !300, i32 38, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [xxcon3] [line 38] [local] [def]
+!334 = metadata !{i32 786484, i32 0, null, metadata !"dx5tx1", metadata !"dx5tx1", metadata !"", metadata !300, i32 39, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx5tx1] [line 39] [local] [def]
+!335 = metadata !{i32 786484, i32 0, null, metadata !"dx4tx1", metadata !"dx4tx1", metadata !"", metadata !300, i32 39, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx4tx1] [line 39] [local] [def]
+!336 = metadata !{i32 786484, i32 0, null, metadata !"dx3tx1", metadata !"dx3tx1", metadata !"", metadata !300, i32 39, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx3tx1] [line 39] [local] [def]
+!337 = metadata !{i32 786484, i32 0, null, metadata !"c2", metadata !"c2", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c2] [line 45] [local] [def]
+!338 = metadata !{i32 786484, i32 0, null, metadata !"con43", metadata !"con43", metadata !"", metadata !300, i32 48, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [con43] [line 48] [local] [def]
+!339 = metadata !{i32 786484, i32 0, null, metadata !"xxcon2", metadata !"xxcon2", metadata !"", metadata !300, i32 38, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [xxcon2] [line 38] [local] [def]
+!340 = metadata !{i32 786484, i32 0, null, metadata !"dx2tx1", metadata !"dx2tx1", metadata !"", metadata !300, i32 39, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx2tx1] [line 39] [local] [def]
+!341 = metadata !{i32 786484, i32 0, null, metadata !"tx2", metadata !"tx2", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tx2] [line 31] [local] [def]
+!342 = metadata !{i32 786484, i32 0, null, metadata !"dx1tx1", metadata !"dx1tx1", metadata !"", metadata !300, i32 39, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx1tx1] [line 39] [local] [def]
+!343 = metadata !{i32 786484, i32 0, null, metadata !"forcing", metadata !"forcing", metadata !"", metadata !300, i32 66, metadata !344, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [forcing] [line 66] [local] [def]
+!344 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 1663006848, i64 64, i32 0, i32 0, metadata !20, metadata !345, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 1663006848, align 64, offset 0] [from double]
+!345 = metadata !{metadata !308, metadata !308, metadata !308, metadata !346}
+!346 = metadata !{i32 786465, i64 0, i64 6}       ; [ DW_TAG_subrange_type ] [0, 5]
+!347 = metadata !{i32 786484, i32 0, null, metadata !"qs", metadata !"qs", metadata !"", metadata !300, i32 63, metadata !348, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [qs] [line 63] [local] [def]
+!348 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 277167808, i64 64, i32 0, i32 0, metadata !20, metadata !349, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 277167808, align 64, offset 0] [from double]
+!349 = metadata !{metadata !308, metadata !308, metadata !308}
+!350 = metadata !{i32 786484, i32 0, null, metadata !"square", metadata !"square", metadata !"", metadata !300, i32 65, metadata !348, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [square] [line 65] [local] [def]
+!351 = metadata !{i32 786484, i32 0, null, metadata !"ws", metadata !"ws", metadata !"", metadata !300, i32 62, metadata !348, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [ws] [line 62] [local] [def]
+!352 = metadata !{i32 786484, i32 0, null, metadata !"vs", metadata !"vs", metadata !"", metadata !300, i32 61, metadata !348, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [vs] [line 61] [local] [def]
+!353 = metadata !{i32 786484, i32 0, null, metadata !"us", metadata !"us", metadata !"", metadata !300, i32 60, metadata !348, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [us] [line 60] [local] [def]
+!354 = metadata !{i32 786484, i32 0, null, metadata !"rho_i", metadata !"rho_i", metadata !"", metadata !300, i32 64, metadata !348, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [rho_i] [line 64] [local] [def]
+!355 = metadata !{i32 786484, i32 0, null, metadata !"u", metadata !"u", metadata !"", metadata !300, i32 67, metadata !306, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [u] [line 67] [local] [def]
+!356 = metadata !{i32 786484, i32 0, null, metadata !"ce", metadata !"ce", metadata !"", metadata !300, i32 36, metadata !357, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [ce] [line 36] [local] [def]
+!357 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 4160, i64 64, i32 0, i32 0, metadata !20, metadata !358, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 4160, align 64, offset 0] [from double]
+!358 = metadata !{metadata !93, metadata !359}
+!359 = metadata !{i32 786465, i64 0, i64 13}      ; [ DW_TAG_subrange_type ] [0, 12]
+!360 = metadata !{i32 786484, i32 0, null, metadata !"dnzm1", metadata !"dnzm1", metadata !"", metadata !300, i32 44, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dnzm1] [line 44] [local] [def]
+!361 = metadata !{i32 786484, i32 0, null, metadata !"dnym1", metadata !"dnym1", metadata !"", metadata !300, i32 44, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dnym1] [line 44] [local] [def]
+!362 = metadata !{i32 786484, i32 0, null, metadata !"dnxm1", metadata !"dnxm1", metadata !"", metadata !300, i32 44, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dnxm1] [line 44] [local] [def]
+!363 = metadata !{i32 786484, i32 0, null, metadata !"zzcon1", metadata !"zzcon1", metadata !"", metadata !300, i32 42, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [zzcon1] [line 42] [local] [def]
+!364 = metadata !{i32 786484, i32 0, null, metadata !"yycon1", metadata !"yycon1", metadata !"", metadata !300, i32 40, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [yycon1] [line 40] [local] [def]
+!365 = metadata !{i32 786484, i32 0, null, metadata !"xxcon1", metadata !"xxcon1", metadata !"", metadata !300, i32 38, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [xxcon1] [line 38] [local] [def]
+!366 = metadata !{i32 786484, i32 0, null, metadata !"con16", metadata !"con16", metadata !"", metadata !300, i32 48, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [con16] [line 48] [local] [def]
+!367 = metadata !{i32 786484, i32 0, null, metadata !"c2iv", metadata !"c2iv", metadata !"", metadata !300, i32 48, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c2iv] [line 48] [local] [def]
+!368 = metadata !{i32 786484, i32 0, null, metadata !"c3c4tz3", metadata !"c3c4tz3", metadata !"", metadata !300, i32 48, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c3c4tz3] [line 48] [local] [def]
+!369 = metadata !{i32 786484, i32 0, null, metadata !"c3c4ty3", metadata !"c3c4ty3", metadata !"", metadata !300, i32 48, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c3c4ty3] [line 48] [local] [def]
+!370 = metadata !{i32 786484, i32 0, null, metadata !"c3c4tx3", metadata !"c3c4tx3", metadata !"", metadata !300, i32 48, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c3c4tx3] [line 48] [local] [def]
+!371 = metadata !{i32 786484, i32 0, null, metadata !"comz6", metadata !"comz6", metadata !"", metadata !300, i32 47, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [comz6] [line 47] [local] [def]
+!372 = metadata !{i32 786484, i32 0, null, metadata !"comz5", metadata !"comz5", metadata !"", metadata !300, i32 47, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [comz5] [line 47] [local] [def]
+!373 = metadata !{i32 786484, i32 0, null, metadata !"comz4", metadata !"comz4", metadata !"", metadata !300, i32 47, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [comz4] [line 47] [local] [def]
+!374 = metadata !{i32 786484, i32 0, null, metadata !"comz1", metadata !"comz1", metadata !"", metadata !300, i32 47, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [comz1] [line 47] [local] [def]
+!375 = metadata !{i32 786484, i32 0, null, metadata !"dtdssp", metadata !"dtdssp", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dtdssp] [line 45] [local] [def]
+!376 = metadata !{i32 786484, i32 0, null, metadata !"c2dttz1", metadata !"c2dttz1", metadata !"", metadata !300, i32 47, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c2dttz1] [line 47] [local] [def]
+!377 = metadata !{i32 786484, i32 0, null, metadata !"c2dtty1", metadata !"c2dtty1", metadata !"", metadata !300, i32 47, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c2dtty1] [line 47] [local] [def]
+!378 = metadata !{i32 786484, i32 0, null, metadata !"c2dttx1", metadata !"c2dttx1", metadata !"", metadata !300, i32 47, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c2dttx1] [line 47] [local] [def]
+!379 = metadata !{i32 786484, i32 0, null, metadata !"dttz2", metadata !"dttz2", metadata !"", metadata !300, i32 46, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dttz2] [line 46] [local] [def]
+!380 = metadata !{i32 786484, i32 0, null, metadata !"dttz1", metadata !"dttz1", metadata !"", metadata !300, i32 46, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dttz1] [line 46] [local] [def]
+!381 = metadata !{i32 786484, i32 0, null, metadata !"dtty2", metadata !"dtty2", metadata !"", metadata !300, i32 46, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dtty2] [line 46] [local] [def]
+!382 = metadata !{i32 786484, i32 0, null, metadata !"dtty1", metadata !"dtty1", metadata !"", metadata !300, i32 46, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dtty1] [line 46] [local] [def]
+!383 = metadata !{i32 786484, i32 0, null, metadata !"dttx2", metadata !"dttx2", metadata !"", metadata !300, i32 46, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dttx2] [line 46] [local] [def]
+!384 = metadata !{i32 786484, i32 0, null, metadata !"dttx1", metadata !"dttx1", metadata !"", metadata !300, i32 46, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dttx1] [line 46] [local] [def]
+!385 = metadata !{i32 786484, i32 0, null, metadata !"c5dssp", metadata !"c5dssp", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c5dssp] [line 45] [local] [def]
+!386 = metadata !{i32 786484, i32 0, null, metadata !"c4dssp", metadata !"c4dssp", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c4dssp] [line 45] [local] [def]
+!387 = metadata !{i32 786484, i32 0, null, metadata !"dzmax", metadata !"dzmax", metadata !"", metadata !300, i32 37, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dzmax] [line 37] [local] [def]
+!388 = metadata !{i32 786484, i32 0, null, metadata !"dymax", metadata !"dymax", metadata !"", metadata !300, i32 37, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dymax] [line 37] [local] [def]
+!389 = metadata !{i32 786484, i32 0, null, metadata !"dxmax", metadata !"dxmax", metadata !"", metadata !300, i32 37, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dxmax] [line 37] [local] [def]
+!390 = metadata !{i32 786484, i32 0, null, metadata !"dz5", metadata !"dz5", metadata !"", metadata !300, i32 34, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz5] [line 34] [local] [def]
+!391 = metadata !{i32 786484, i32 0, null, metadata !"dz4", metadata !"dz4", metadata !"", metadata !300, i32 34, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz4] [line 34] [local] [def]
+!392 = metadata !{i32 786484, i32 0, null, metadata !"dz3", metadata !"dz3", metadata !"", metadata !300, i32 34, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz3] [line 34] [local] [def]
+!393 = metadata !{i32 786484, i32 0, null, metadata !"dz2", metadata !"dz2", metadata !"", metadata !300, i32 34, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz2] [line 34] [local] [def]
+!394 = metadata !{i32 786484, i32 0, null, metadata !"dz1", metadata !"dz1", metadata !"", metadata !300, i32 34, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz1] [line 34] [local] [def]
+!395 = metadata !{i32 786484, i32 0, null, metadata !"dy5", metadata !"dy5", metadata !"", metadata !300, i32 33, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy5] [line 33] [local] [def]
+!396 = metadata !{i32 786484, i32 0, null, metadata !"dy4", metadata !"dy4", metadata !"", metadata !300, i32 33, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy4] [line 33] [local] [def]
+!397 = metadata !{i32 786484, i32 0, null, metadata !"dy3", metadata !"dy3", metadata !"", metadata !300, i32 33, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy3] [line 33] [local] [def]
+!398 = metadata !{i32 786484, i32 0, null, metadata !"dy2", metadata !"dy2", metadata !"", metadata !300, i32 33, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy2] [line 33] [local] [def]
+!399 = metadata !{i32 786484, i32 0, null, metadata !"dy1", metadata !"dy1", metadata !"", metadata !300, i32 33, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy1] [line 33] [local] [def]
+!400 = metadata !{i32 786484, i32 0, null, metadata !"dx5", metadata !"dx5", metadata !"", metadata !300, i32 32, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx5] [line 32] [local] [def]
+!401 = metadata !{i32 786484, i32 0, null, metadata !"dx4", metadata !"dx4", metadata !"", metadata !300, i32 32, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx4] [line 32] [local] [def]
+!402 = metadata !{i32 786484, i32 0, null, metadata !"dx3", metadata !"dx3", metadata !"", metadata !300, i32 32, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx3] [line 32] [local] [def]
+!403 = metadata !{i32 786484, i32 0, null, metadata !"dx2", metadata !"dx2", metadata !"", metadata !300, i32 32, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx2] [line 32] [local] [def]
+!404 = metadata !{i32 786484, i32 0, null, metadata !"dx1", metadata !"dx1", metadata !"", metadata !300, i32 32, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx1] [line 32] [local] [def]
+!405 = metadata !{i32 786484, i32 0, null, metadata !"tz3", metadata !"tz3", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tz3] [line 31] [local] [def]
+!406 = metadata !{i32 786484, i32 0, null, metadata !"tz1", metadata !"tz1", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tz1] [line 31] [local] [def]
+!407 = metadata !{i32 786484, i32 0, null, metadata !"ty3", metadata !"ty3", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [ty3] [line 31] [local] [def]
+!408 = metadata !{i32 786484, i32 0, null, metadata !"ty1", metadata !"ty1", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [ty1] [line 31] [local] [def]
+!409 = metadata !{i32 786484, i32 0, null, metadata !"tx3", metadata !"tx3", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tx3] [line 31] [local] [def]
+!410 = metadata !{i32 786484, i32 0, null, metadata !"tx1", metadata !"tx1", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tx1] [line 31] [local] [def]
+!411 = metadata !{i32 786484, i32 0, null, metadata !"conz1", metadata !"conz1", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [conz1] [line 45] [local] [def]
+!412 = metadata !{i32 786484, i32 0, null, metadata !"c1345", metadata !"c1345", metadata !"", metadata !300, i32 44, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c1345] [line 44] [local] [def]
+!413 = metadata !{i32 786484, i32 0, null, metadata !"c3c4", metadata !"c3c4", metadata !"", metadata !300, i32 44, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c3c4] [line 44] [local] [def]
+!414 = metadata !{i32 786484, i32 0, null, metadata !"c1c5", metadata !"c1c5", metadata !"", metadata !300, i32 44, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c1c5] [line 44] [local] [def]
+!415 = metadata !{i32 786484, i32 0, null, metadata !"c1c2", metadata !"c1c2", metadata !"", metadata !300, i32 44, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c1c2] [line 44] [local] [def]
+!416 = metadata !{i32 786484, i32 0, null, metadata !"c5", metadata !"c5", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c5] [line 45] [local] [def]
+!417 = metadata !{i32 786484, i32 0, null, metadata !"c4", metadata !"c4", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c4] [line 45] [local] [def]
+!418 = metadata !{i32 786484, i32 0, null, metadata !"c3", metadata !"c3", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c3] [line 45] [local] [def]
+!419 = metadata !{i32 786484, i32 0, null, metadata !"lhs", metadata !"lhs", metadata !"", metadata !300, i32 69, metadata !420, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [lhs] [line 69] [local] [def]
+!420 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 20787585600, i64 64, i32 0, i32 0, metadata !20, metadata !421, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 20787585600, align 64, offset 0] [from double]
+!421 = metadata !{metadata !308, metadata !308, metadata !308, metadata !178, metadata !93, metadata !93}
+!422 = metadata !{i32 786484, i32 0, null, metadata !"q", metadata !"q", metadata !"", metadata !300, i32 73, metadata !423, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [q] [line 73] [local] [def]
+!423 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 10368, i64 64, i32 0, i32 0, metadata !20, metadata !424, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 10368, align 64, offset 0] [from double]
+!424 = metadata !{metadata !425}
+!425 = metadata !{i32 786465, i64 0, i64 162}     ; [ DW_TAG_subrange_type ] [0, 161]
+!426 = metadata !{i32 786484, i32 0, null, metadata !"cuf", metadata !"cuf", metadata !"", metadata !300, i32 72, metadata !423, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [cuf] [line 72] [local] [def]
+!427 = metadata !{i32 786484, i32 0, null, metadata !"buf", metadata !"buf", metadata !"", metadata !300, i32 75, metadata !428, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [buf] [line 75] [local] [def]
+!428 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 51840, i64 64, i32 0, i32 0, metadata !20, metadata !429, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 51840, align 64, offset 0] [from double]
+!429 = metadata !{metadata !425, metadata !93}
+!430 = metadata !{i32 786484, i32 0, null, metadata !"ue", metadata !"ue", metadata !"", metadata !300, i32 74, metadata !428, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [ue] [line 74] [local] [def]
+!431 = metadata !{i32 786484, i32 0, null, metadata !"njac", metadata !"njac", metadata !"", metadata !300, i32 86, metadata !432, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [njac] [line 86] [local] [def]
+!432 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 6886684800, i64 64, i32 0, i32 0, metadata !20, metadata !433, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 6886684800, align 64, offset 0] [from double]
+!433 = metadata !{metadata !308, metadata !308, metadata !425, metadata !93, metadata !93}
+!434 = metadata !{i32 786484, i32 0, null, metadata !"fjac", metadata !"fjac", metadata !"", metadata !300, i32 84, metadata !432, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [fjac] [line 84] [local] [def]
+!435 = metadata !{i32 786484, i32 0, null, metadata !"tmp3", metadata !"tmp3", metadata !"", metadata !300, i32 88, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tmp3] [line 88] [local] [def]
+!436 = metadata !{i32 786484, i32 0, null, metadata !"tmp2", metadata !"tmp2", metadata !"", metadata !300, i32 88, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tmp2] [line 88] [local] [def]
+!437 = metadata !{i32 786484, i32 0, null, metadata !"tmp1", metadata !"tmp1", metadata !"", metadata !300, i32 88, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tmp1] [line 88] [local] [def]
+!438 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!439 = metadata !{i32 1898, i32 0, metadata !440, null}
+!440 = metadata !{i32 786443, metadata !1, metadata !114, i32 1898, i32 0, i32 107} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!441 = metadata !{i32 1913, i32 0, metadata !442, null}
+!442 = metadata !{i32 786443, metadata !1, metadata !114, i32 1913, i32 0, i32 115} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!443 = metadata !{i32 1923, i32 0, metadata !114, null}
+!444 = metadata !{metadata !"int", metadata !445}
+!445 = metadata !{metadata !"omnipotent char", metadata !446}
+!446 = metadata !{metadata !"Simple C/C++ TBAA"}
+!447 = metadata !{i32 1}
+!448 = metadata !{i32 1925, i32 0, metadata !449, null}
+!449 = metadata !{i32 786443, metadata !1, metadata !114, i32 1925, i32 0, i32 121} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!450 = metadata !{i32 1939, i32 0, metadata !451, null}
+!451 = metadata !{i32 786443, metadata !1, metadata !114, i32 1939, i32 0, i32 127} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!452 = metadata !{i32 1940, i32 0, metadata !453, null}
+!453 = metadata !{i32 786443, metadata !1, metadata !454, i32 1940, i32 0, i32 129} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!454 = metadata !{i32 786443, metadata !1, metadata !451, i32 1939, i32 0, i32 128} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!455 = metadata !{i32 1941, i32 0, metadata !456, null}
+!456 = metadata !{i32 786443, metadata !1, metadata !457, i32 1941, i32 0, i32 131} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!457 = metadata !{i32 786443, metadata !1, metadata !453, i32 1940, i32 0, i32 130} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!458 = metadata !{i32 2020, i32 0, metadata !459, null}
+!459 = metadata !{i32 786443, metadata !1, metadata !460, i32 2020, i32 0, i32 149} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!460 = metadata !{i32 786443, metadata !1, metadata !461, i32 2019, i32 0, i32 148} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!461 = metadata !{i32 786443, metadata !1, metadata !462, i32 2019, i32 0, i32 147} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!462 = metadata !{i32 786443, metadata !1, metadata !463, i32 2018, i32 0, i32 146} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!463 = metadata !{i32 786443, metadata !1, metadata !114, i32 2018, i32 0, i32 145} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!464 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/PowerPC/pr17354.ll b/test/CodeGen/PowerPC/pr17354.ll
new file mode 100644
index 0000000..dca81b1
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr17354.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mcpu=pwr7 -relocation-model=pic <%s | FileCheck %s
+
+; Test that PR17354 is fixed.  We must generate a nop following even
+; local calls when generating code for shared libraries, to permit
+; TOC fixup.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.CS = type { i32 }
+
+@_ZL3glb = internal global [1 x %struct.CS] zeroinitializer, align 4
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+
+define internal void @__cxx_global_var_init() section ".text.startup" {
+entry:
+  call void @_Z4funcv(%struct.CS* sret getelementptr inbounds ([1 x %struct.CS]* @_ZL3glb, i64 0, i64 0))
+  ret void
+}
+
+; CHECK-LABEL: __cxx_global_var_init:
+; CHECK: bl _Z4funcv
+; CHECK-NEXT: nop
+
+; Function Attrs: nounwind
+define void @_Z4funcv(%struct.CS* noalias sret %agg.result) #0 {
+entry:
+  %a_ = getelementptr inbounds %struct.CS* %agg.result, i32 0, i32 0
+  store i32 0, i32* %a_, align 4
+  ret void
+}
+
+define internal void @_GLOBAL__I_a() section ".text.startup" {
+entry:
+  call void @__cxx_global_var_init()
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/recipest.ll b/test/CodeGen/PowerPC/recipest.ll
index 38d7682..891e801 100644
--- a/test/CodeGen/PowerPC/recipest.ll
+++ b/test/CodeGen/PowerPC/recipest.ll
@@ -169,6 +169,7 @@ entry:
   ret double %r
 
 ; CHECK: @foo3
+; CHECK: fcmpu
 ; CHECK-DAG: frsqrte
 ; CHECK-DAG: fnmsub
 ; CHECK: fmul
@@ -195,6 +196,7 @@ entry:
   ret float %r
 
 ; CHECK: @goo3
+; CHECK: fcmpu
 ; CHECK-DAG: frsqrtes
 ; CHECK-DAG: fnmsubs
 ; CHECK: fmuls
@@ -217,7 +219,8 @@ entry:
 
 ; CHECK: @hoo3
 ; CHECK: vrsqrtefp
-; CHECK: vrefp
+; CHECK-DAG: vrefp
+; CHECK-DAG: vcmpeqfp
 
 ; CHECK-SAFE: @hoo3
 ; CHECK-SAFE-NOT: vrsqrtefp
diff --git a/test/CodeGen/PowerPC/reg-names.ll b/test/CodeGen/PowerPC/reg-names.ll
new file mode 100644
index 0000000..f8fa7e4
--- /dev/null
+++ b/test/CodeGen/PowerPC/reg-names.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -ppc-asm-full-reg-names < %s | FileCheck -check-prefix=CHECK-FN %s
+
+define i64 @test1(i64 %a, i64 %b) {
+; CHECK-LABEL: @test1
+; CHECK-FN-LABEL: @test1
+
+entry:
+  ret i64 %b
+
+; CHECK: mr 3, 4
+; CHECK-FN: mr r3, r4
+
+; CHECK: blr
+; CHECK-FN: blr
+}
+
diff --git a/test/CodeGen/PowerPC/reloc-align.ll b/test/CodeGen/PowerPC/reloc-align.ll
index bd5c4d6..13d6ada 100644
--- a/test/CodeGen/PowerPC/reloc-align.ll
+++ b/test/CodeGen/PowerPC/reloc-align.ll
@@ -31,4 +31,4 @@ entry:
   ret i32 %bf.cast
 }
 
-attributes #0 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/rlwimi-and.ll b/test/CodeGen/PowerPC/rlwimi-and.ll
index e20a13f..7963249 100644
--- a/test/CodeGen/PowerPC/rlwimi-and.ll
+++ b/test/CodeGen/PowerPC/rlwimi-and.ll
@@ -28,12 +28,11 @@ codeRepl17:                                       ; preds = %codeRepl4
   store i16 %rvml38.sroa.0.0.insert.insert, i16* undef, align 2
   unreachable
 
+; FIXME: the SLWI could be folded into the RLWIMI to give a rotate of 8.
 ; CHECK: @test
-; CHECK-DAG: slwi [[R1:[0-9]+]],
-; CHECK-DAG: rlwinm [[R2:[0-9]+]],
-; CHECK-DAG: srawi [[R3:[0-9]+]], [[R1]]
-; CHECK-DAG: rlwinm [[R4:[0-9]+]], [[R3]], 0, 23, 23
-; CHECK: rlwimi [[R4]], [[R2]], 0,
+; CHECK-DAG: slwi [[R1:[0-9]+]], {{[0-9]+}}, 31
+; CHECK-DAG: rlwinm [[R2:[0-9]+]], {{[0-9]+}}, 0, 31, 31
+; CHECK: rlwimi [[R2]], [[R1]], 9, 23, 23
 
 codeRepl29:                                       ; preds = %codeRepl1
   unreachable
diff --git a/test/CodeGen/PowerPC/rounding-ops.ll b/test/CodeGen/PowerPC/rounding-ops.ll
index 2c02900..bf0a641 100644
--- a/test/CodeGen/PowerPC/rounding-ops.ll
+++ b/test/CodeGen/PowerPC/rounding-ops.ll
@@ -1,5 +1,4 @@
 ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
-; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math | FileCheck -check-prefix=CHECK-FM %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -9,9 +8,6 @@ define float @test1(float %x) nounwind  {
 
 ; CHECK-LABEL: test1:
 ; CHECK: frim 1, 1
-
-; CHECK-FM-LABEL: test1:
-; CHECK-FM: frim 1, 1
 }
 
 declare float @floorf(float) nounwind readnone
@@ -22,38 +18,29 @@ define double @test2(double %x) nounwind  {
 
 ; CHECK-LABEL: test2:
 ; CHECK: frim 1, 1
-
-; CHECK-FM-LABEL: test2:
-; CHECK-FM: frim 1, 1
 }
 
 declare double @floor(double) nounwind readnone
 
 define float @test3(float %x) nounwind  {
-  %call = tail call float @nearbyintf(float %x) nounwind readnone
+  %call = tail call float @roundf(float %x) nounwind readnone
   ret float %call
 
 ; CHECK-LABEL: test3:
-; CHECK-NOT: frin
-
-; CHECK-FM-LABEL: test3:
-; CHECK-FM: frin 1, 1
+; CHECK: frin 1, 1
 }
 
-declare float @nearbyintf(float) nounwind readnone
+declare float @roundf(float) nounwind readnone
 
 define double @test4(double %x) nounwind  {
-  %call = tail call double @nearbyint(double %x) nounwind readnone
+  %call = tail call double @round(double %x) nounwind readnone
   ret double %call
 
 ; CHECK-LABEL: test4:
-; CHECK-NOT: frin
-
-; CHECK-FM-LABEL: test4:
-; CHECK-FM: frin 1, 1
+; CHECK: frin 1, 1
 }
 
-declare double @nearbyint(double) nounwind readnone
+declare double @round(double) nounwind readnone
 
 define float @test5(float %x) nounwind  {
   %call = tail call float @ceilf(float %x) nounwind readnone
@@ -61,9 +48,6 @@ define float @test5(float %x) nounwind  {
 
 ; CHECK-LABEL: test5:
 ; CHECK: frip 1, 1
-
-; CHECK-FM-LABEL: test5:
-; CHECK-FM: frip 1, 1
 }
 
 declare float @ceilf(float) nounwind readnone
@@ -74,9 +58,6 @@ define double @test6(double %x) nounwind  {
 
 ; CHECK-LABEL: test6:
 ; CHECK: frip 1, 1
-
-; CHECK-FM-LABEL: test6:
-; CHECK-FM: frip 1, 1
 }
 
 declare double @ceil(double) nounwind readnone
@@ -87,9 +68,6 @@ define float @test9(float %x) nounwind  {
 
 ; CHECK-LABEL: test9:
 ; CHECK: friz 1, 1
-
-; CHECK-FM-LABEL: test9:
-; CHECK-FM: friz 1, 1
 }
 
 declare float @truncf(float) nounwind readnone
@@ -100,48 +78,7 @@ define double @test10(double %x) nounwind  {
 
 ; CHECK-LABEL: test10:
 ; CHECK: friz 1, 1
-
-; CHECK-FM-LABEL: test10:
-; CHECK-FM: friz 1, 1
 }
 
 declare double @trunc(double) nounwind readnone
 
-define void @test11(float %x, float* %y) nounwind  {
-  %call = tail call float @rintf(float %x) nounwind readnone
-  store float %call, float* %y
-  ret void
-
-; CHECK-LABEL: test11:
-; CHECK-NOT: frin
-
-; CHECK-FM-LABEL: test11:
-; CHECK-FM: frin [[R2:[0-9]+]], [[R1:[0-9]+]]
-; CHECK-FM: fcmpu [[CR:[0-9]+]], [[R2]], [[R1]]
-; CHECK-FM: beq [[CR]], .LBB[[BB:[0-9]+]]_2
-; CHECK-FM: mtfsb1 6
-; CHECK-FM: .LBB[[BB]]_2:
-; CHECK-FM: blr
-}
-
-declare float @rintf(float) nounwind readnone
-
-define void @test12(double %x, double* %y) nounwind  {
-  %call = tail call double @rint(double %x) nounwind readnone
-  store double %call, double* %y
-  ret void
-
-; CHECK-LABEL: test12:
-; CHECK-NOT: frin
-
-; CHECK-FM-LABEL: test12:
-; CHECK-FM: frin [[R2:[0-9]+]], [[R1:[0-9]+]]
-; CHECK-FM: fcmpu [[CR:[0-9]+]], [[R2]], [[R1]]
-; CHECK-FM: beq [[CR]], .LBB[[BB:[0-9]+]]_2
-; CHECK-FM: mtfsb1 6
-; CHECK-FM: .LBB[[BB]]_2:
-; CHECK-FM: blr
-}
-
-declare double @rint(double) nounwind readnone
-
diff --git a/test/CodeGen/PowerPC/sjlj.ll b/test/CodeGen/PowerPC/sjlj.ll
index 571f3b2..414640b 100644
--- a/test/CodeGen/PowerPC/sjlj.ll
+++ b/test/CodeGen/PowerPC/sjlj.ll
@@ -64,15 +64,16 @@ return:                                           ; preds = %if.end, %if.then
 ; CHECK: std
 ; Make sure that we're not saving VRSAVE on non-Darwin:
 ; CHECK-NOT: mfspr
-; CHECK: stfd
-; CHECK: stvx
 
-; CHECK: addis [[REG:[0-9]+]], 2, env_sigill@toc@ha
-; CHECK: std 31, env_sigill@toc@l([[REG]])
-; CHECK: addi [[REG]], [[REG]], env_sigill@toc@l
-; CHECK: std [[REG]], [[OFF:[0-9]+]](31)                  # 8-byte Folded Spill
-; CHECK: std 1, 16([[REG]])
-; CHECK: std 2, 24([[REG]])
+; CHECK-DAG: stfd
+; CHECK-DAG: stvx
+
+; CHECK-DAG: addis [[REG:[0-9]+]], 2, env_sigill@toc@ha
+; CHECK-DAG: std 31, env_sigill@toc@l([[REG]])
+; CHECK-DAG: addi [[REGA:[0-9]+]], [[REG]], env_sigill@toc@l
+; CHECK-DAG: std [[REGA]], [[OFF:[0-9]+]](31)                  # 8-byte Folded Spill
+; CHECK-DAG: std 1, 16([[REGA]])
+; CHECK-DAG: std 2, 24([[REGA]])
 ; CHECK: bcl 20, 31, .LBB1_1
 ; CHECK: li 3, 1
 ; CHECK: #EH_SjLj_Setup	.LBB1_1
@@ -134,11 +135,11 @@ return:                                           ; preds = %if.end, %if.then
 
 ; CHECK: addis [[REG:[0-9]+]], 2, env_sigill@toc@ha
 ; CHECK: std 31, env_sigill@toc@l([[REG]])
-; CHECK: addi [[REG]], [[REG]], env_sigill@toc@l
-; CHECK: std [[REG]], [[OFF:[0-9]+]](31)                  # 8-byte Folded Spill
-; CHECK: std 1, 16([[REG]])
-; CHECK: std 2, 24([[REG]])
-; CHECK: std 30, 32([[REG]])
+; CHECK: addi [[REGB:[0-9]+]], [[REG]], env_sigill@toc@l
+; CHECK-DAG: std [[REGB]], [[OFF:[0-9]+]](31)                  # 8-byte Folded Spill
+; CHECK-DAG: std 1, 16([[REGB]])
+; CHECK-DAG: std 2, 24([[REGB]])
+; CHECK-DAG: std 30, 32([[REGB]])
 ; CHECK: bcl 20, 31,
 
 ; CHECK: blr
@@ -152,7 +153,7 @@ declare i8* @llvm.stacksave() #3
 
 declare i32 @llvm.eh.sjlj.setjmp(i8*) #3
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { noreturn nounwind }
 attributes #2 = { nounwind readnone }
 attributes #3 = { nounwind }
diff --git a/test/CodeGen/PowerPC/stack-realign.ll b/test/CodeGen/PowerPC/stack-realign.ll
index f7b6d19..1c7a36a 100644
--- a/test/CodeGen/PowerPC/stack-realign.ll
+++ b/test/CodeGen/PowerPC/stack-realign.ll
@@ -11,13 +11,13 @@ define void @goo(%struct.s* byval nocapture readonly %a) {
 entry:
   %x = alloca [2 x i32], align 32
   %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4, !tbaa !0
+  %0 = load i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
-  store i32 %0, i32* %arrayidx, align 32, !tbaa !0
+  store i32 %0, i32* %arrayidx, align 32
   %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4, !tbaa !0
+  %1 = load i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
-  store i32 %1, i32* %arrayidx2, align 4, !tbaa !0
+  store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx)
   ret void
 }
@@ -74,13 +74,13 @@ define void @hoo(%struct.s* byval nocapture readonly %a) {
 entry:
   %x = alloca [200000 x i32], align 32
   %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4, !tbaa !0
+  %0 = load i32* %a1, align 4
   %arrayidx = getelementptr inbounds [200000 x i32]* %x, i64 0, i64 0
-  store i32 %0, i32* %arrayidx, align 32, !tbaa !0
+  store i32 %0, i32* %arrayidx, align 32
   %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4, !tbaa !0
+  %1 = load i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [200000 x i32]* %x, i64 0, i64 1
-  store i32 %1, i32* %arrayidx2, align 4, !tbaa !0
+  store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx)
   ret void
 }
@@ -105,13 +105,13 @@ define void @loo(%struct.s* byval nocapture readonly %a) {
 entry:
   %x = alloca [2 x i32], align 32
   %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4, !tbaa !0
+  %0 = load i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
-  store i32 %0, i32* %arrayidx, align 32, !tbaa !0
+  store i32 %0, i32* %arrayidx, align 32
   %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4, !tbaa !0
+  %1 = load i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
-  store i32 %1, i32* %arrayidx2, align 4, !tbaa !0
+  store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx)
   call void asm sideeffect "", "~{f30}"() nounwind
   ret void
@@ -145,7 +145,3 @@ entry:
 ; CHECK-FP: stfd 30, -16(30)
 
 ; CHECK-FP: blr
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/subsumes-pred-regs.ll b/test/CodeGen/PowerPC/subsumes-pred-regs.ll
new file mode 100644
index 0000000..97ac788
--- /dev/null
+++ b/test/CodeGen/PowerPC/subsumes-pred-regs.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -mcpu=ppc64 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define zeroext i1 @test1() unnamed_addr #0 align 2 {
+
+; CHECK-LABEL: @test1
+
+entry:
+  br i1 undef, label %lor.end, label %lor.rhs
+
+lor.rhs:                                          ; preds = %entry
+  unreachable
+
+lor.end:                                          ; preds = %entry
+  br i1 undef, label %land.rhs, label %if.then
+
+if.then:                                          ; preds = %lor.end
+  br i1 undef, label %return, label %if.end.i24
+
+if.end.i24:                                       ; preds = %if.then
+  %0 = load i32* undef, align 4
+  %lnot.i.i16.i23 = icmp eq i32 %0, 0
+  br i1 %lnot.i.i16.i23, label %if.end7.i37, label %test.exit27.i34
+
+test.exit27.i34: ; preds = %if.end.i24
+  br i1 undef, label %return, label %if.end7.i37
+
+if.end7.i37:                                      ; preds = %test.exit27.i34, %if.end.i24
+  %tobool.i.i36 = icmp eq i8 undef, 0
+  br i1 %tobool.i.i36, label %return, label %if.then9.i39
+
+if.then9.i39:                                     ; preds = %if.end7.i37
+  br i1 %lnot.i.i16.i23, label %return, label %lor.rhs.i.i49
+
+; CHECK: .LBB0_7:
+; CHECK:	beq 1, .LBB0_10
+; CHECK:	beq 0, .LBB0_10
+; CHECK: .LBB0_9:
+
+lor.rhs.i.i49:                                    ; preds = %if.then9.i39
+  %cmp.i.i.i.i48 = icmp ne i64 undef, 0
+  br label %return
+
+land.rhs:                                         ; preds = %lor.end
+  br i1 undef, label %return, label %if.end.i
+
+if.end.i:                                         ; preds = %land.rhs
+  br i1 undef, label %return, label %if.then9.i
+
+if.then9.i:                                       ; preds = %if.end.i
+  br i1 undef, label %return, label %lor.rhs.i.i
+
+lor.rhs.i.i:                                      ; preds = %if.then9.i
+  %cmp.i.i.i.i = icmp ne i64 undef, 0
+  br label %return
+
+return:                                           ; preds = %lor.rhs.i.i, %if.then9.i, %if.end.i, %land.rhs, %lor.rhs.i.i49, %if.then9.i39, %if.end7.i37, %test.exit27.i34, %if.then
+  %retval.0 = phi i1 [ false, %if.then ], [ false, %test.exit27.i34 ], [ true, %if.end7.i37 ], [ true, %if.then9.i39 ], [ %cmp.i.i.i.i48, %lor.rhs.i.i49 ], [ false, %land.rhs ], [ true, %if.end.i ], [ true, %if.then9.i ], [ %cmp.i.i.i.i, %lor.rhs.i.i ]
+  ret i1 %retval.0
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/tls-gd-obj.ll b/test/CodeGen/PowerPC/tls-gd-obj.ll
deleted file mode 100644
index 26cb6f2..0000000
--- a/test/CodeGen/PowerPC/tls-gd-obj.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; RUN: llc -mcpu=pwr7 -O0 -filetype=obj -relocation-model=pic %s -o - | \
-; RUN: llvm-readobj -r | FileCheck %s
-
-; Test correct relocation generation for thread-local storage using
-; the general dynamic model and integrated assembly.
-
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
-target triple = "powerpc64-unknown-linux-gnu"
-
-@a = thread_local global i32 0, align 4
-
-define signext i32 @main() nounwind {
-entry:
-  %retval = alloca i32, align 4
-  store i32 0, i32* %retval
-  %0 = load i32* @a, align 4
-  ret i32 %0
-}
-
-; Verify generation of R_PPC64_GOT_TLSGD16_HA, R_PPC64_GOT_TLSGD16_LO,
-; and R_PPC64_TLSGD for accessing external variable a, and R_PPC64_REL24
-; for the call to __tls_get_addr.
-;
-; CHECK: Relocations [
-; CHECK:   Section (2) .rela.text {
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSGD16_HA a
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSGD16_LO a
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TLSGD          a
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_REL24          __tls_get_addr
-; CHECK:   }
-; CHECK: ]
diff --git a/test/CodeGen/PowerPC/tls-ie-obj.ll b/test/CodeGen/PowerPC/tls-ie-obj.ll
deleted file mode 100644
index f24a94b..0000000
--- a/test/CodeGen/PowerPC/tls-ie-obj.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc -mcpu=pwr7 -O0 -filetype=obj %s -o - | \
-; RUN: llvm-readobj -r | FileCheck %s
-
-; Test correct relocation generation for thread-local storage
-; using the initial-exec model and integrated assembly.
-
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
-target triple = "powerpc64-unknown-linux-gnu"
-
-@a = external thread_local global i32
-
-define signext i32 @main() nounwind {
-entry:
-  %retval = alloca i32, align 4
-  store i32 0, i32* %retval
-  %0 = load i32* @a, align 4
-  ret i32 %0
-}
-
-; Verify generation of R_PPC64_GOT_TPREL16_DS and R_PPC64_TLS for
-; accessing external variable a.
-;
-; CHECK: Relocations [
-; CHECK:   Section (2) .rela.text {
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TPREL16_HA    a
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TPREL16_LO_DS a
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TLS               a
-; CHECK:   }
-; CHECK: ]
diff --git a/test/CodeGen/PowerPC/tls-ld-obj.ll b/test/CodeGen/PowerPC/tls-ld-obj.ll
deleted file mode 100644
index 4a7d7b3..0000000
--- a/test/CodeGen/PowerPC/tls-ld-obj.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; RUN: llc -mcpu=pwr7 -O0 -filetype=obj -relocation-model=pic %s -o - | \
-; RUN: llvm-readobj -r | FileCheck %s
-
-; Test correct relocation generation for thread-local storage using
-; the local dynamic model.
-
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
-target triple = "powerpc64-unknown-linux-gnu"
-
-@a = hidden thread_local global i32 0, align 4
-
-define signext i32 @main() nounwind {
-entry:
-  %retval = alloca i32, align 4
-  store i32 0, i32* %retval
-  %0 = load i32* @a, align 4
-  ret i32 %0
-}
-
-; Verify generation of R_PPC64_GOT_TLSLD16_HA, R_PPC64_GOT_TLSLD16_LO,
-; R_PPC64_TLSLD, R_PPC64_DTPREL16_HA, and R_PPC64_DTPREL16_LO for
-; accessing external variable a, and R_PPC64_REL24 for the call to
-; __tls_get_addr.
-;
-; CHECK: Relocations [
-; CHECK:   Section (2) .rela.text {
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSLD16_HA a
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSLD16_LO a
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TLSLD          a
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_REL24          __tls_get_addr
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_DTPREL16_HA    a
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_DTPREL16_LO    a
-; CHECK:   }
-; CHECK: ]
diff --git a/test/CodeGen/PowerPC/unal-altivec2.ll b/test/CodeGen/PowerPC/unal-altivec2.ll
new file mode 100644
index 0000000..7464675
--- /dev/null
+++ b/test/CodeGen/PowerPC/unal-altivec2.ll
@@ -0,0 +1,166 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(float* noalias nocapture %x, float* noalias nocapture readonly %y) #0 {
+entry:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+; CHECK-LABEL: @foo
+; CHECK: lvsl
+; CHECK: blr
+  %index = phi i64 [ 0, %entry ], [ %index.next.15, %vector.body ]
+  %0 = getelementptr inbounds float* %y, i64 %index
+  %1 = bitcast float* %0 to <4 x float>*
+  %wide.load = load <4 x float>* %1, align 4
+  %2 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load)
+  %3 = getelementptr inbounds float* %x, i64 %index
+  %4 = bitcast float* %3 to <4 x float>*
+  store <4 x float> %2, <4 x float>* %4, align 4
+  %index.next = add i64 %index, 4
+  %5 = getelementptr inbounds float* %y, i64 %index.next
+  %6 = bitcast float* %5 to <4 x float>*
+  %wide.load.1 = load <4 x float>* %6, align 4
+  %7 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.1)
+  %8 = getelementptr inbounds float* %x, i64 %index.next
+  %9 = bitcast float* %8 to <4 x float>*
+  store <4 x float> %7, <4 x float>* %9, align 4
+  %index.next.1 = add i64 %index.next, 4
+  %10 = getelementptr inbounds float* %y, i64 %index.next.1
+  %11 = bitcast float* %10 to <4 x float>*
+  %wide.load.2 = load <4 x float>* %11, align 4
+  %12 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.2)
+  %13 = getelementptr inbounds float* %x, i64 %index.next.1
+  %14 = bitcast float* %13 to <4 x float>*
+  store <4 x float> %12, <4 x float>* %14, align 4
+  %index.next.2 = add i64 %index.next.1, 4
+  %15 = getelementptr inbounds float* %y, i64 %index.next.2
+  %16 = bitcast float* %15 to <4 x float>*
+  %wide.load.3 = load <4 x float>* %16, align 4
+  %17 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.3)
+  %18 = getelementptr inbounds float* %x, i64 %index.next.2
+  %19 = bitcast float* %18 to <4 x float>*
+  store <4 x float> %17, <4 x float>* %19, align 4
+  %index.next.3 = add i64 %index.next.2, 4
+  %20 = getelementptr inbounds float* %y, i64 %index.next.3
+  %21 = bitcast float* %20 to <4 x float>*
+  %wide.load.4 = load <4 x float>* %21, align 4
+  %22 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.4)
+  %23 = getelementptr inbounds float* %x, i64 %index.next.3
+  %24 = bitcast float* %23 to <4 x float>*
+  store <4 x float> %22, <4 x float>* %24, align 4
+  %index.next.4 = add i64 %index.next.3, 4
+  %25 = getelementptr inbounds float* %y, i64 %index.next.4
+  %26 = bitcast float* %25 to <4 x float>*
+  %wide.load.5 = load <4 x float>* %26, align 4
+  %27 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.5)
+  %28 = getelementptr inbounds float* %x, i64 %index.next.4
+  %29 = bitcast float* %28 to <4 x float>*
+  store <4 x float> %27, <4 x float>* %29, align 4
+  %index.next.5 = add i64 %index.next.4, 4
+  %30 = getelementptr inbounds float* %y, i64 %index.next.5
+  %31 = bitcast float* %30 to <4 x float>*
+  %wide.load.6 = load <4 x float>* %31, align 4
+  %32 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.6)
+  %33 = getelementptr inbounds float* %x, i64 %index.next.5
+  %34 = bitcast float* %33 to <4 x float>*
+  store <4 x float> %32, <4 x float>* %34, align 4
+  %index.next.6 = add i64 %index.next.5, 4
+  %35 = getelementptr inbounds float* %y, i64 %index.next.6
+  %36 = bitcast float* %35 to <4 x float>*
+  %wide.load.7 = load <4 x float>* %36, align 4
+  %37 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.7)
+  %38 = getelementptr inbounds float* %x, i64 %index.next.6
+  %39 = bitcast float* %38 to <4 x float>*
+  store <4 x float> %37, <4 x float>* %39, align 4
+  %index.next.7 = add i64 %index.next.6, 4
+  %40 = getelementptr inbounds float* %y, i64 %index.next.7
+  %41 = bitcast float* %40 to <4 x float>*
+  %wide.load.8 = load <4 x float>* %41, align 4
+  %42 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.8)
+  %43 = getelementptr inbounds float* %x, i64 %index.next.7
+  %44 = bitcast float* %43 to <4 x float>*
+  store <4 x float> %42, <4 x float>* %44, align 4
+  %index.next.8 = add i64 %index.next.7, 4
+  %45 = getelementptr inbounds float* %y, i64 %index.next.8
+  %46 = bitcast float* %45 to <4 x float>*
+  %wide.load.9 = load <4 x float>* %46, align 4
+  %47 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.9)
+  %48 = getelementptr inbounds float* %x, i64 %index.next.8
+  %49 = bitcast float* %48 to <4 x float>*
+  store <4 x float> %47, <4 x float>* %49, align 4
+  %index.next.9 = add i64 %index.next.8, 4
+  %50 = getelementptr inbounds float* %y, i64 %index.next.9
+  %51 = bitcast float* %50 to <4 x float>*
+  %wide.load.10 = load <4 x float>* %51, align 4
+  %52 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.10)
+  %53 = getelementptr inbounds float* %x, i64 %index.next.9
+  %54 = bitcast float* %53 to <4 x float>*
+  store <4 x float> %52, <4 x float>* %54, align 4
+  %index.next.10 = add i64 %index.next.9, 4
+  %55 = getelementptr inbounds float* %y, i64 %index.next.10
+  %56 = bitcast float* %55 to <4 x float>*
+  %wide.load.11 = load <4 x float>* %56, align 4
+  %57 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.11)
+  %58 = getelementptr inbounds float* %x, i64 %index.next.10
+  %59 = bitcast float* %58 to <4 x float>*
+  store <4 x float> %57, <4 x float>* %59, align 4
+  %index.next.11 = add i64 %index.next.10, 4
+  %60 = getelementptr inbounds float* %y, i64 %index.next.11
+  %61 = bitcast float* %60 to <4 x float>*
+  %wide.load.12 = load <4 x float>* %61, align 4
+  %62 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.12)
+  %63 = getelementptr inbounds float* %x, i64 %index.next.11
+  %64 = bitcast float* %63 to <4 x float>*
+  store <4 x float> %62, <4 x float>* %64, align 4
+  %index.next.12 = add i64 %index.next.11, 4
+  %65 = getelementptr inbounds float* %y, i64 %index.next.12
+  %66 = bitcast float* %65 to <4 x float>*
+  %wide.load.13 = load <4 x float>* %66, align 4
+  %67 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.13)
+  %68 = getelementptr inbounds float* %x, i64 %index.next.12
+  %69 = bitcast float* %68 to <4 x float>*
+  store <4 x float> %67, <4 x float>* %69, align 4
+  %index.next.13 = add i64 %index.next.12, 4
+  %70 = getelementptr inbounds float* %y, i64 %index.next.13
+  %71 = bitcast float* %70 to <4 x float>*
+  %wide.load.14 = load <4 x float>* %71, align 4
+  %72 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.14)
+  %73 = getelementptr inbounds float* %x, i64 %index.next.13
+  %74 = bitcast float* %73 to <4 x float>*
+  store <4 x float> %72, <4 x float>* %74, align 4
+  %index.next.14 = add i64 %index.next.13, 4
+  %75 = getelementptr inbounds float* %y, i64 %index.next.14
+  %76 = bitcast float* %75 to <4 x float>*
+  %wide.load.15 = load <4 x float>* %76, align 4
+  %77 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.15)
+  %78 = getelementptr inbounds float* %x, i64 %index.next.14
+  %79 = bitcast float* %78 to <4 x float>*
+  store <4 x float> %77, <4 x float>* %79, align 4
+  %index.next.15 = add i64 %index.next.14, 4
+  %80 = icmp eq i64 %index.next.15, 2048
+  br i1 %80, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare <4 x float> @llvm_cos_v4f32(<4 x float>) #1
+
+define <2 x double> @bar(double* %x) {
+entry:
+  %p = bitcast double* %x to <2 x double>*
+  %r = load <2 x double>* %p, align 8
+
+; CHECK-LABEL: @bar
+; CHECK-NOT: lvsl
+; CHECK: blr
+
+  ret <2 x double> %r
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
diff --git a/test/CodeGen/PowerPC/unal4-std.ll b/test/CodeGen/PowerPC/unal4-std.ll
index 169bd78..9f29e31 100644
--- a/test/CodeGen/PowerPC/unal4-std.ll
+++ b/test/CodeGen/PowerPC/unal4-std.ll
@@ -24,4 +24,4 @@ if.end210:                                        ; preds = %entry
 ; CHECK: stdx {{[0-9]+}}, 0,
 }
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/unwind-dw2-g.ll b/test/CodeGen/PowerPC/unwind-dw2-g.ll
index 2baac76..260d036 100644
--- a/test/CodeGen/PowerPC/unwind-dw2-g.ll
+++ b/test/CodeGen/PowerPC/unwind-dw2-g.ll
@@ -19,7 +19,7 @@ declare void @llvm.eh.unwind.init() #0
 attributes #0 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!8}
+!llvm.module.flags = !{!8, !11}
 
 !0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/unwind-dw2.c] [DW_LANG_C99]
 !1 = metadata !{metadata !"/tmp/unwind-dw2.c", metadata !"/tmp"}
@@ -27,8 +27,9 @@ attributes #0 = { nounwind }
 !3 = metadata !{metadata !4}
 !4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
 !5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/unwind-dw2.c]
-!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{null}
 !8 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
 !9 = metadata !{i32 2, i32 0, metadata !4, null}
 !10 = metadata !{i32 3, i32 0, metadata !4, null}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/PowerPC/vec-abi-align.ll b/test/CodeGen/PowerPC/vec-abi-align.ll
new file mode 100644
index 0000000..3239cf6
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec-abi-align.ll
@@ -0,0 +1,60 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.s2 = type { i64, <4 x float> }
+
+@ve = external global <4 x float>
+@n = external global i64
+
+; Function Attrs: nounwind
+define void @test1(i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, <4 x float> inreg %vs.coerce) #0 {
+entry:
+  store <4 x float> %vs.coerce, <4 x float>* @ve, align 16
+  ret void
+
+; CHECK-LABEL: @test1
+; CHECK: stvx 2,
+; CHECK: blr
+}
+
+; Function Attrs: nounwind
+define void @test2(i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, %struct.s2* byval nocapture readonly %vs) #0 {
+entry:
+  %m = getelementptr inbounds %struct.s2* %vs, i64 0, i32 0
+  %0 = load i64* %m, align 8
+  store i64 %0, i64* @n, align 8
+  %v = getelementptr inbounds %struct.s2* %vs, i64 0, i32 1
+  %1 = load <4 x float>* %v, align 16
+  store <4 x float> %1, <4 x float>* @ve, align 16
+  ret void
+
+; CHECK-LABEL: @test2
+; CHECK: ld {{[0-9]+}}, 112(1)
+; CHECK: li [[REG16:[0-9]+]], 16
+; CHECK: addi [[REGB:[0-9]+]], 1, 112
+; CHECK: lvx 2, [[REGB]], [[REG16]]
+; CHECK: blr
+}
+
+; Function Attrs: nounwind
+define void @test3(i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, %struct.s2* byval nocapture readonly %vs) #0 {
+entry:
+  %m = getelementptr inbounds %struct.s2* %vs, i64 0, i32 0
+  %0 = load i64* %m, align 8
+  store i64 %0, i64* @n, align 8
+  %v = getelementptr inbounds %struct.s2* %vs, i64 0, i32 1
+  %1 = load <4 x float>* %v, align 16
+  store <4 x float> %1, <4 x float>* @ve, align 16
+  ret void
+
+; CHECK-LABEL: @test3
+; CHECK: ld {{[0-9]+}}, 128(1)
+; CHECK: li [[REG16:[0-9]+]], 16
+; CHECK: addi [[REGB:[0-9]+]], 1, 128
+; CHECK: lvx 2, [[REGB]], [[REG16]]
+; CHECK: blr
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/vec_extload.ll b/test/CodeGen/PowerPC/vec_extload.ll
index 6373a26..8d16e15 100644
--- a/test/CodeGen/PowerPC/vec_extload.ll
+++ b/test/CodeGen/PowerPC/vec_extload.ll
@@ -5,7 +5,7 @@
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
-; Altivec does not provides an sext intruction, so it expands
+; Altivec does not provides an sext instruction, so it expands
 ; a set of vector stores (stvx), bytes load/sign expand/store
 ; (lbz/stb), and a final vector load (lvx) to load the result
 ; extended vector.
diff --git a/test/CodeGen/PowerPC/zero-not-run.ll b/test/CodeGen/PowerPC/zero-not-run.ll
index 04c4277..9df0d6e 100644
--- a/test/CodeGen/PowerPC/zero-not-run.ll
+++ b/test/CodeGen/PowerPC/zero-not-run.ll
@@ -24,4 +24,4 @@ for.end731:                                       ; preds = %entry
 ; Function Attrs: nounwind
 declare i64 @safe_mod_func_uint64_t_u_u(i64, i64) #0
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/R600/128bit-kernel-args.ll b/test/CodeGen/R600/128bit-kernel-args.ll
index 5c14270..3c4fcf7 100644
--- a/test/CodeGen/R600/128bit-kernel-args.ll
+++ b/test/CodeGen/R600/128bit-kernel-args.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
 ; R600-CHECK: @v4i32_kernel_arg
 ; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y
diff --git a/test/CodeGen/R600/32-bit-local-address-space.ll b/test/CodeGen/R600/32-bit-local-address-space.ll
new file mode 100644
index 0000000..7a12687
--- /dev/null
+++ b/test/CodeGen/R600/32-bit-local-address-space.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+; On Southern Islands GPUs the local address space(3) uses 32-bit pointers and
+; the global address space(1) uses 64-bit pointers.  These tests check to make sure
+; the correct pointer size is used for the local address space.
+
+; The e{{32|64}} suffix on the instructions refers to the encoding size and not
+; the size of the operands.  The operand size is denoted in the instruction name.
+; Instructions with B32, U32, and I32 in their name take 32-bit operands, while
+; instructions with B64, U64, and I64 take 64-bit operands.
+
+; CHECK-LABEL: @local_address_load
+; CHECK: V_MOV_B32_e{{32|64}} [[PTR:v[0-9]]]
+; CHECK: DS_READ_B32 [[PTR]]
+define void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
+entry:
+  %0 = load i32 addrspace(3)* %in
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: @local_address_gep
+; CHECK: S_ADD_I32 [[SPTR:s[0-9]]]
+; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; CHECK: DS_READ_B32 [[VPTR]]
+define void @local_address_gep(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %offset) {
+entry:
+  %0 = getelementptr i32 addrspace(3)* %in, i32 %offset
+  %1 = load i32 addrspace(3)* %0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: @local_address_gep_const_offset
+; CHECK: S_ADD_I32 [[SPTR:s[0-9]]]
+; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; CHECK: DS_READ_B32 [[VPTR]]
+define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
+entry:
+  %0 = getelementptr i32 addrspace(3)* %in, i32 1
+  %1 = load i32 addrspace(3)* %0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: @null_32bit_lds_ptr:
+; CHECK: V_CMP_NE_I32
+; CHECK-NOT: V_CMP_NE_I32
+; CHECK: V_CNDMASK_B32
+define void @null_32bit_lds_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %lds) nounwind {
+  %cmp = icmp ne i32 addrspace(3)* %lds, null
+  %x = select i1 %cmp, i32 123, i32 456
+  store i32 %x, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: @mul_32bit_ptr:
+; CHECK: V_MUL_LO_I32
+; CHECK-NEXT: V_ADD_I32_e32
+; CHECK-NEXT: DS_READ_B32
+define void @mul_32bit_ptr(float addrspace(1)* %out, [3 x float] addrspace(3)* %lds, i32 %tid) {
+  %ptr = getelementptr [3 x float] addrspace(3)* %lds, i32 %tid, i32 0
+  %val = load float addrspace(3)* %ptr
+  store float %val, float addrspace(1)* %out
+  ret void
+}
+
+@g_lds = addrspace(3) global float zeroinitializer, align 4
+
+; CHECK-LABEL: @infer_ptr_alignment_global_offset:
+; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 0
+; CHECK: DS_READ_B32 v{{[0-9]+}}, 0, [[REG]]
+define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %tid) {
+  %val = load float addrspace(3)* @g_lds
+  store float %val, float addrspace(1)* %out
+  ret void
+}
+
+
+@ptr = addrspace(3) global i32 addrspace(3)* null
+@dst = addrspace(3) global [16384 x i32] zeroinitializer
+
+; SI-LABEL: @global_ptr:
+; SI-CHECK: DS_WRITE_B32
+define void @global_ptr() nounwind {
+  store i32 addrspace(3)* getelementptr ([16384 x i32] addrspace(3)* @dst, i32 0, i32 16), i32 addrspace(3)* addrspace(3)* @ptr
+  ret void
+}
diff --git a/test/CodeGen/R600/64bit-kernel-args.ll b/test/CodeGen/R600/64bit-kernel-args.ll
index 34a0a87..0d6bfb1 100644
--- a/test/CodeGen/R600/64bit-kernel-args.ll
+++ b/test/CodeGen/R600/64bit-kernel-args.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
 ; SI-CHECK: @f64_kernel_arg
-; SI-CHECK-DAG: S_LOAD_DWORDX2 SGPR{{[0-9]}}_SGPR{{[0-9]}}, SGPR0_SGPR1, 9
-; SI-CHECK-DAG: S_LOAD_DWORDX2 SGPR{{[0-9]}}_SGPR{{[0-9]}}, SGPR0_SGPR1, 11
+; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 9
+; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 11
 ; SI-CHECK: BUFFER_STORE_DWORDX2
 define void @f64_kernel_arg(double addrspace(1)* %out, double  %in) {
 entry:
diff --git a/test/CodeGen/R600/add.ll b/test/CodeGen/R600/add.ll
index 16f7f97..3d5506b 100644
--- a/test/CodeGen/R600/add.ll
+++ b/test/CodeGen/R600/add.ll
@@ -1,39 +1,55 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
-;EG-CHECK: @test2
+;EG-CHECK-LABEL: @test1:
+;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+;SI-CHECK-LABEL: @test1:
+;SI-CHECK: V_ADD_I32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}}
+;SI-CHECK-NOT: [[REG]]
+;SI-CHECK: BUFFER_STORE_DWORD [[REG]],
+define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %a = load i32 addrspace(1)* %in
+  %b = load i32 addrspace(1)* %b_ptr
+  %result = add i32 %a, %b
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK-LABEL: @test2:
 ;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK: @test2
-;SI-CHECK: V_ADD_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_ADD_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
+;SI-CHECK-LABEL: @test2:
+;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32> addrspace(1) * %in
-  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %a = load <2 x i32> addrspace(1)* %in
+  %b = load <2 x i32> addrspace(1)* %b_ptr
   %result = add <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
 }
 
-;EG-CHECK: @test4
+;EG-CHECK-LABEL: @test4:
 ;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK: @test4
-;SI-CHECK: V_ADD_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_ADD_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_ADD_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_ADD_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
+;SI-CHECK-LABEL: @test4:
+;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1) * %in
-  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %a = load <4 x i32> addrspace(1)* %in
+  %b = load <4 x i32> addrspace(1)* %b_ptr
   %result = add <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
diff --git a/test/CodeGen/R600/add_i64.ll b/test/CodeGen/R600/add_i64.ll
new file mode 100644
index 0000000..303a1cb
--- /dev/null
+++ b/test/CodeGen/R600/add_i64.ll
@@ -0,0 +1,59 @@
+; XFAIL: *
+; This will fail until i64 add is enabled
+
+; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s
+
+
+declare i32 @llvm.SI.tid() readnone
+
+; SI-LABEL: @test_i64_vreg:
+define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) {
+  %tid = call i32 @llvm.SI.tid() readnone
+  %a_ptr = getelementptr i64 addrspace(1)* %inA, i32 %tid
+  %b_ptr = getelementptr i64 addrspace(1)* %inB, i32 %tid
+  %a = load i64 addrspace(1)* %a_ptr
+  %b = load i64 addrspace(1)* %b_ptr
+  %result = add i64 %a, %b
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+; Check that the SGPR add operand is correctly moved to a VGPR.
+; SI-LABEL: @sgpr_operand:
+define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 addrspace(1)* noalias %in_bar, i64 %a) {
+  %foo = load i64 addrspace(1)* %in, align 8
+  %result = add i64 %foo, %a
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+; Swap the arguments. Check that the SGPR -> VGPR copy works with the
+; SGPR as other operand.
+;
+; SI-LABEL: @sgpr_operand_reversed:
+define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %a) {
+  %foo = load i64 addrspace(1)* %in, align 8
+  %result = add i64 %a, %foo
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+
+; SI-LABEL: @test_v2i64_sreg:
+define void @test_v2i64_sreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %a, <2 x i64> %b) {
+  %result = add <2 x i64> %a, %b
+  store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; SI-LABEL: @test_v2i64_vreg:
+define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
+  %tid = call i32 @llvm.SI.tid() readnone
+  %a_ptr = getelementptr <2 x i64> addrspace(1)* %inA, i32 %tid
+  %b_ptr = getelementptr <2 x i64> addrspace(1)* %inB, i32 %tid
+  %a = load <2 x i64> addrspace(1)* %a_ptr
+  %b = load <2 x i64> addrspace(1)* %b_ptr
+  %result = add <2 x i64> %a, %b
+  store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/address-space.ll b/test/CodeGen/R600/address-space.ll
new file mode 100644
index 0000000..1fc616a
--- /dev/null
+++ b/test/CodeGen/R600/address-space.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s
+
+; Test that codegenprepare understands address space sizes
+
+%struct.foo = type { [3 x float], [3 x float] }
+
+; CHECK-LABEL: @do_as_ptr_calcs:
+; CHECK: S_ADD_I32 {{s[0-9]+}},
+; CHECK: S_ADD_I32 [[SREG1:s[0-9]+]],
+; CHECK: V_MOV_B32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
+; CHECK: DS_READ_B32 [[VREG1]],
+define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
+entry:
+  %x = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
+  %y = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 2
+  br label %bb32
+
+bb32:
+  %a = load float addrspace(3)* %x, align 4
+  %b = load float addrspace(3)* %y, align 4
+  %cmp = fcmp one float %a, %b
+  br i1 %cmp, label %bb34, label %bb33
+
+bb33:
+  unreachable
+
+bb34:
+  unreachable
+}
+
+
diff --git a/test/CodeGen/R600/and.ll b/test/CodeGen/R600/and.ll
index 44c21bd..ee9bc83 100644
--- a/test/CodeGen/R600/and.ll
+++ b/test/CodeGen/R600/and.ll
@@ -1,13 +1,13 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
 ;EG-CHECK: @test2
 ;EG-CHECK: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 ;SI-CHECK: @test2
-;SI-CHECK: V_AND_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_AND_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
+;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -19,16 +19,16 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
 }
 
 ;EG-CHECK: @test4
-;EG-CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 ;SI-CHECK: @test4
-;SI-CHECK: V_AND_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_AND_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_AND_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_AND_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
+;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
diff --git a/test/CodeGen/R600/array-ptr-calc-i64.ll b/test/CodeGen/R600/array-ptr-calc-i64.ll
new file mode 100644
index 0000000..652bbfe
--- /dev/null
+++ b/test/CodeGen/R600/array-ptr-calc-i64.ll
@@ -0,0 +1,18 @@
+; XFAIL: *
+; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s
+
+declare i32 @llvm.SI.tid() readnone
+
+
+; SI-LABEL: @test_array_ptr_calc(
+define void @test_array_ptr_calc(i32 addrspace(1)* noalias %out, [16 x i32] addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
+  %tid = call i32 @llvm.SI.tid() readnone
+  %a_ptr = getelementptr [16 x i32] addrspace(1)* %inA, i32 1, i32 %tid
+  %b_ptr = getelementptr i32 addrspace(1)* %inB, i32 %tid
+  %a = load i32 addrspace(1)* %a_ptr
+  %b = load i32 addrspace(1)* %b_ptr
+  %result = add i32 %a, %b
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
diff --git a/test/CodeGen/R600/atomic_load_add.ll b/test/CodeGen/R600/atomic_load_add.ll
new file mode 100644
index 0000000..0bc48a3
--- /dev/null
+++ b/test/CodeGen/R600/atomic_load_add.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+
+; R600-CHECK-LABEL: @atomic_add_local
+; R600-CHECK: LDS_ADD *
+; SI-CHECK-LABEL: @atomic_add_local
+; SI-CHECK: DS_ADD_U32_RTN 0
+define void @atomic_add_local(i32 addrspace(3)* %local) {
+entry:
+   %0 = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
+   ret void
+}
+
+; R600-CHECK-LABEL: @atomic_add_ret_local
+; R600-CHECK: LDS_ADD_RET *
+; SI-CHECK-LABEL: @atomic_add_ret_local
+; SI-CHECK: DS_ADD_U32_RTN 0
+define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
+entry:
+  %0 = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/atomic_load_sub.ll b/test/CodeGen/R600/atomic_load_sub.ll
new file mode 100644
index 0000000..e4a6829
--- /dev/null
+++ b/test/CodeGen/R600/atomic_load_sub.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+
+; R600-CHECK-LABEL: @atomic_sub_local
+; R600-CHECK: LDS_SUB *
+; SI-CHECK-LABEL: @atomic_sub_local
+; SI-CHECK: DS_SUB_U32_RTN 0
+define void @atomic_sub_local(i32 addrspace(3)* %local) {
+entry:
+   %0 = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
+   ret void
+}
+
+; R600-CHECK-LABEL: @atomic_sub_ret_local
+; R600-CHECK: LDS_SUB_RET *
+; SI-CHECK-LABEL: @atomic_sub_ret_local
+; SI-CHECK: DS_SUB_U32_RTN 0
+define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
+entry:
+  %0 = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/bfi_int.ll b/test/CodeGen/R600/bfi_int.ll
index cdccdfa..bbfe856 100644
--- a/test/CodeGen/R600/bfi_int.ll
+++ b/test/CodeGen/R600/bfi_int.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
 ; BFI_INT Definition pattern from ISA docs
 ; (y & x) | (z & ~x)
@@ -38,8 +38,8 @@ entry:
 ; R600-CHECK: @bfi_sha256_ma
 ; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W
 ; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W
-; SI-CHECK: V_XOR_B32_e64 [[DST:VGPR[0-9]+]], {{[SV]GPR[0-9]+, VGPR[0-9]+}}
-; SI-CHECK: V_BFI_B32 {{VGPR[0-9]+}}, [[DST]], {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}}
+; SI-CHECK: V_XOR_B32_e64 [[DST:v[0-9]+]], {{[sv][0-9]+, v[0-9]+}}
+; SI-CHECK: V_BFI_B32 {{v[0-9]+}}, [[DST]], {{[sv][0-9]+, [sv][0-9]+}}
 
 define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
 entry:
diff --git a/test/CodeGen/R600/big_alu.ll b/test/CodeGen/R600/big_alu.ll
new file mode 100644
index 0000000..6b68376
--- /dev/null
+++ b/test/CodeGen/R600/big_alu.ll
@@ -0,0 +1,1174 @@
+;RUN: llc < %s -march=r600 -mcpu=cedar
+;REQUIRES: asserts
+
+;This test ensures that R600 backend can handle ifcvt properly
+;and do not generate ALU clauses with more than 128 instructions.
+
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7, <4 x float> inreg %reg8, <4 x float> inreg %reg9) #0 {
+main_body:
+  %0 = extractelement <4 x float> %reg0, i32 0
+  %1 = extractelement <4 x float> %reg0, i32 1
+  %2 = extractelement <4 x float> %reg0, i32 2
+  %3 = extractelement <4 x float> %reg0, i32 3
+  %4 = extractelement <4 x float> %reg1, i32 0
+  %5 = extractelement <4 x float> %reg9, i32 0
+  %6 = extractelement <4 x float> %reg8, i32 0
+  %7 = fcmp ugt float %6, 0.000000e+00
+  %8 = select i1 %7, float %4, float %5
+  %9 = extractelement <4 x float> %reg1, i32 1
+  %10 = extractelement <4 x float> %reg9, i32 1
+  %11 = extractelement <4 x float> %reg8, i32 0
+  %12 = fcmp ugt float %11, 0.000000e+00
+  %13 = select i1 %12, float %9, float %10
+  %14 = extractelement <4 x float> %reg1, i32 2
+  %15 = extractelement <4 x float> %reg9, i32 2
+  %16 = extractelement <4 x float> %reg8, i32 0
+  %17 = fcmp ugt float %16, 0.000000e+00
+  %18 = select i1 %17, float %14, float %15
+  %19 = extractelement <4 x float> %reg1, i32 3
+  %20 = extractelement <4 x float> %reg9, i32 3
+  %21 = extractelement <4 x float> %reg8, i32 0
+  %22 = extractelement <4 x float> %reg2, i32 0
+  %23 = extractelement <4 x float> %reg2, i32 1
+  %24 = extractelement <4 x float> %reg2, i32 2
+  %25 = extractelement <4 x float> %reg2, i32 3
+  %26 = extractelement <4 x float> %reg3, i32 0
+  %27 = extractelement <4 x float> %reg3, i32 1
+  %28 = extractelement <4 x float> %reg3, i32 2
+  %29 = extractelement <4 x float> %reg3, i32 3
+  %30 = extractelement <4 x float> %reg4, i32 0
+  %31 = extractelement <4 x float> %reg4, i32 1
+  %32 = extractelement <4 x float> %reg4, i32 2
+  %33 = extractelement <4 x float> %reg4, i32 3
+  %34 = extractelement <4 x float> %reg5, i32 0
+  %35 = extractelement <4 x float> %reg5, i32 1
+  %36 = extractelement <4 x float> %reg5, i32 2
+  %37 = extractelement <4 x float> %reg5, i32 3
+  %38 = extractelement <4 x float> %reg6, i32 0
+  %39 = extractelement <4 x float> %reg6, i32 1
+  %40 = extractelement <4 x float> %reg6, i32 2
+  %41 = extractelement <4 x float> %reg6, i32 3
+  %42 = extractelement <4 x float> %reg7, i32 0
+  %43 = extractelement <4 x float> %reg7, i32 1
+  %44 = extractelement <4 x float> %reg7, i32 2
+  %45 = extractelement <4 x float> %reg7, i32 3
+  %46 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %47 = extractelement <4 x float> %46, i32 0
+  %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %49 = extractelement <4 x float> %48, i32 1
+  %50 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %51 = extractelement <4 x float> %50, i32 2
+  %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+  %53 = extractelement <4 x float> %52, i32 0
+  %54 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %55 = extractelement <4 x float> %54, i32 0
+  %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %57 = extractelement <4 x float> %56, i32 1
+  %58 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %59 = extractelement <4 x float> %58, i32 2
+  %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %61 = extractelement <4 x float> %60, i32 3
+  %62 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %63 = extractelement <4 x float> %62, i32 0
+  %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %65 = extractelement <4 x float> %64, i32 1
+  %66 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %67 = extractelement <4 x float> %66, i32 2
+  %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %69 = extractelement <4 x float> %68, i32 0
+  %70 = fcmp oge float %69, 3.500000e+00
+  %71 = sext i1 %70 to i32
+  %72 = bitcast i32 %71 to float
+  %73 = bitcast float %72 to i32
+  %74 = icmp ne i32 %73, 0
+  %. = select i1 %74, float 0.000000e+00, float 0.000000e+00
+  %75 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %76 = extractelement <4 x float> %75, i32 0
+  %77 = fcmp oge float %76, 2.000000e+00
+  %78 = sext i1 %77 to i32
+  %79 = bitcast i32 %78 to float
+  %80 = bitcast float %79 to i32
+  %81 = icmp ne i32 %80, 0
+  br i1 %81, label %IF137, label %ENDIF136
+
+IF137:                                            ; preds = %main_body
+  %82 = insertelement <4 x float> undef, float %30, i32 0
+  %83 = insertelement <4 x float> %82, float %31, i32 1
+  %84 = insertelement <4 x float> %83, float %32, i32 2
+  %85 = insertelement <4 x float> %84, float 0.000000e+00, i32 3
+  %86 = insertelement <4 x float> undef, float %30, i32 0
+  %87 = insertelement <4 x float> %86, float %31, i32 1
+  %88 = insertelement <4 x float> %87, float %32, i32 2
+  %89 = insertelement <4 x float> %88, float 0.000000e+00, i32 3
+  %90 = call float @llvm.AMDGPU.dp4(<4 x float> %85, <4 x float> %89)
+  %91 = call float @llvm.AMDGPU.rsq(float %90)
+  %92 = fmul float %30, %91
+  %93 = fmul float %31, %91
+  %94 = fmul float %32, %91
+  %95 = insertelement <4 x float> undef, float %92, i32 0
+  %96 = insertelement <4 x float> %95, float %93, i32 1
+  %97 = insertelement <4 x float> %96, float %94, i32 2
+  %98 = insertelement <4 x float> %97, float 0.000000e+00, i32 3
+  %99 = insertelement <4 x float> undef, float %37, i32 0
+  %100 = insertelement <4 x float> %99, float %38, i32 1
+  %101 = insertelement <4 x float> %100, float %39, i32 2
+  %102 = insertelement <4 x float> %101, float 0.000000e+00, i32 3
+  %103 = call float @llvm.AMDGPU.dp4(<4 x float> %98, <4 x float> %102)
+  %104 = insertelement <4 x float> undef, float %92, i32 0
+  %105 = insertelement <4 x float> %104, float %93, i32 1
+  %106 = insertelement <4 x float> %105, float %94, i32 2
+  %107 = insertelement <4 x float> %106, float 0.000000e+00, i32 3
+  %108 = insertelement <4 x float> undef, float %40, i32 0
+  %109 = insertelement <4 x float> %108, float %41, i32 1
+  %110 = insertelement <4 x float> %109, float %42, i32 2
+  %111 = insertelement <4 x float> %110, float 0.000000e+00, i32 3
+  %112 = call float @llvm.AMDGPU.dp4(<4 x float> %107, <4 x float> %111)
+  %113 = fsub float -0.000000e+00, %92
+  %114 = fsub float -0.000000e+00, %93
+  %115 = fsub float -0.000000e+00, %94
+  %116 = insertelement <4 x float> undef, float %34, i32 0
+  %117 = insertelement <4 x float> %116, float %35, i32 1
+  %118 = insertelement <4 x float> %117, float %36, i32 2
+  %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 3
+  %120 = insertelement <4 x float> undef, float %113, i32 0
+  %121 = insertelement <4 x float> %120, float %114, i32 1
+  %122 = insertelement <4 x float> %121, float %115, i32 2
+  %123 = insertelement <4 x float> %122, float 0.000000e+00, i32 3
+  %124 = call float @llvm.AMDGPU.dp4(<4 x float> %119, <4 x float> %123)
+  %125 = fdiv float 1.000000e+00, %124
+  %126 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %127 = extractelement <4 x float> %126, i32 0
+  %128 = fmul float %127, %125
+  %129 = fmul float %103, %128
+  %130 = fmul float %112, %128
+  %131 = bitcast float %. to i32
+  %132 = sitofp i32 %131 to float
+  %133 = fdiv float 1.000000e+00, %132
+  %134 = bitcast float %. to i32
+  %135 = add i32 %134, -1
+  %136 = bitcast i32 %135 to float
+  %137 = bitcast float %136 to i32
+  br label %LOOP
+
+ENDIF136:                                         ; preds = %main_body, %ENDIF154
+  %temp68.1 = phi float [ %600, %ENDIF154 ], [ 0.000000e+00, %main_body ]
+  %temp69.0 = phi float [ %602, %ENDIF154 ], [ 0.000000e+00, %main_body ]
+  %temp70.0 = phi float [ %604, %ENDIF154 ], [ 1.000000e+00, %main_body ]
+  %138 = fmul float %26, 0x3F847AE140000000
+  %139 = fmul float %27, 0x3F847AE140000000
+  %140 = fmul float %28, 0x3F847AE140000000
+  %141 = insertelement <4 x float> undef, float %138, i32 0
+  %142 = insertelement <4 x float> %141, float %139, i32 1
+  %143 = insertelement <4 x float> %142, float %140, i32 2
+  %144 = insertelement <4 x float> %143, float 0.000000e+00, i32 3
+  %145 = extractelement <4 x float> %144, i32 0
+  %146 = extractelement <4 x float> %144, i32 1
+  %147 = extractelement <4 x float> %144, i32 2
+  %148 = extractelement <4 x float> %144, i32 3
+  %149 = insertelement <4 x float> undef, float %145, i32 0
+  %150 = insertelement <4 x float> %149, float %146, i32 1
+  %151 = insertelement <4 x float> %150, float %147, i32 2
+  %152 = insertelement <4 x float> %151, float %148, i32 3
+  %153 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %152, i32 16, i32 0, i32 3)
+  %154 = extractelement <4 x float> %153, i32 0
+  %155 = extractelement <4 x float> %153, i32 1
+  %156 = extractelement <4 x float> %153, i32 2
+  %157 = extractelement <4 x float> %153, i32 3
+  %158 = fmul float %26, 0x3F45A07B40000000
+  %159 = fmul float %27, 0x3F45A07B40000000
+  %160 = fmul float %28, 0x3F45A07B40000000
+  %161 = insertelement <4 x float> undef, float %158, i32 0
+  %162 = insertelement <4 x float> %161, float %159, i32 1
+  %163 = insertelement <4 x float> %162, float %160, i32 2
+  %164 = insertelement <4 x float> %163, float 0.000000e+00, i32 3
+  %165 = extractelement <4 x float> %164, i32 0
+  %166 = extractelement <4 x float> %164, i32 1
+  %167 = extractelement <4 x float> %164, i32 2
+  %168 = extractelement <4 x float> %164, i32 3
+  %169 = insertelement <4 x float> undef, float %165, i32 0
+  %170 = insertelement <4 x float> %169, float %166, i32 1
+  %171 = insertelement <4 x float> %170, float %167, i32 2
+  %172 = insertelement <4 x float> %171, float %168, i32 3
+  %173 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %172, i32 16, i32 0, i32 3)
+  %174 = extractelement <4 x float> %173, i32 0
+  %175 = extractelement <4 x float> %173, i32 1
+  %176 = extractelement <4 x float> %173, i32 2
+  %177 = extractelement <4 x float> %173, i32 3
+  %178 = fmul float %176, 3.000000e+03
+  %179 = fadd float %178, %28
+  %180 = fdiv float 1.000000e+00, %33
+  %181 = fmul float %32, %180
+  %182 = call float @fabs(float %181)
+  %183 = fmul float %174, 0x3FD99999A0000000
+  %184 = fadd float %183, 0x3FAEB851E0000000
+  %185 = fmul float %175, 0x3FE3333340000000
+  %186 = fadd float %185, %184
+  %187 = fmul float %176, 2.000000e+00
+  %188 = fadd float %187, %186
+  %189 = fmul float %177, 4.000000e+00
+  %190 = fadd float %189, %188
+  %191 = fmul float %154, 0x3FB99999A0000000
+  %192 = fadd float %191, %190
+  %193 = fmul float %155, 0x3FD99999A0000000
+  %194 = fadd float %193, %192
+  %195 = fmul float %156, 0x3FE99999A0000000
+  %196 = fadd float %195, %194
+  %197 = fmul float %157, 0x4000CCCCC0000000
+  %198 = fadd float %197, %196
+  %199 = fmul float 0xBE5EFB4CC0000000, %182
+  %200 = fmul float %199, %182
+  %201 = call float @llvm.AMDIL.exp.(float %200)
+  %202 = call float @llvm.AMDGPU.lrp(float %201, float %198, float 0x3FA99999A0000000)
+  %203 = fadd float %202, 0x3FF4CCCCC0000000
+  %204 = fmul float %203, 0x3FE1C71C80000000
+  %205 = call float @llvm.AMDIL.clamp.(float %204, float 0.000000e+00, float 1.000000e+00)
+  %206 = fadd float %202, 0x3FF4CCCCC0000000
+  %207 = fmul float %206, 0x3FE1C71C80000000
+  %208 = call float @llvm.AMDIL.clamp.(float %207, float 0.000000e+00, float 1.000000e+00)
+  %209 = fadd float %202, 2.000000e+00
+  %210 = fmul float %209, 0x3FD611A7A0000000
+  %211 = call float @llvm.AMDIL.clamp.(float %210, float 0.000000e+00, float 1.000000e+00)
+  %212 = fmul float 2.000000e+00, %205
+  %213 = fsub float -0.000000e+00, %212
+  %214 = fadd float 3.000000e+00, %213
+  %215 = fmul float %205, %214
+  %216 = fmul float %205, %215
+  %217 = fmul float 2.000000e+00, %208
+  %218 = fsub float -0.000000e+00, %217
+  %219 = fadd float 3.000000e+00, %218
+  %220 = fmul float %208, %219
+  %221 = fmul float %208, %220
+  %222 = fmul float 2.000000e+00, %211
+  %223 = fsub float -0.000000e+00, %222
+  %224 = fadd float 3.000000e+00, %223
+  %225 = fmul float %211, %224
+  %226 = fmul float %211, %225
+  %227 = fmul float %26, 0x3F368B5CC0000000
+  %228 = fmul float %27, 0x3F368B5CC0000000
+  %229 = insertelement <4 x float> undef, float %227, i32 0
+  %230 = insertelement <4 x float> %229, float %228, i32 1
+  %231 = insertelement <4 x float> %230, float 0.000000e+00, i32 2
+  %232 = insertelement <4 x float> %231, float 0.000000e+00, i32 3
+  %233 = extractelement <4 x float> %232, i32 0
+  %234 = extractelement <4 x float> %232, i32 1
+  %235 = insertelement <4 x float> undef, float %233, i32 0
+  %236 = insertelement <4 x float> %235, float %234, i32 1
+  %237 = insertelement <4 x float> %236, float undef, i32 2
+  %238 = insertelement <4 x float> %237, float undef, i32 3
+  %239 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %238, i32 17, i32 1, i32 2)
+  %240 = extractelement <4 x float> %239, i32 0
+  %241 = insertelement <4 x float> undef, float %240, i32 0
+  %242 = insertelement <4 x float> %241, float %228, i32 1
+  %243 = insertelement <4 x float> %242, float 0.000000e+00, i32 2
+  %244 = insertelement <4 x float> %243, float 0.000000e+00, i32 3
+  %245 = extractelement <4 x float> %244, i32 0
+  %246 = insertelement <4 x float> undef, float %245, i32 0
+  %247 = insertelement <4 x float> %246, float undef, i32 1
+  %248 = insertelement <4 x float> %247, float undef, i32 2
+  %249 = insertelement <4 x float> %248, float undef, i32 3
+  %250 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %249, i32 18, i32 2, i32 1)
+  %251 = extractelement <4 x float> %250, i32 0
+  %252 = extractelement <4 x float> %250, i32 1
+  %253 = extractelement <4 x float> %250, i32 2
+  %254 = extractelement <4 x float> %250, i32 3
+  %255 = fmul float %251, %216
+  %256 = fmul float %252, %221
+  %257 = fmul float %253, %226
+  %258 = fmul float %254, 0.000000e+00
+  %259 = fadd float %202, 0x3FF4CCCCC0000000
+  %260 = fmul float %259, 0x3FE1C71C80000000
+  %261 = call float @llvm.AMDIL.clamp.(float %260, float 0.000000e+00, float 1.000000e+00)
+  %262 = fadd float %202, 0x3FF4CCCCC0000000
+  %263 = fmul float %262, 0x3FE1C71C80000000
+  %264 = call float @llvm.AMDIL.clamp.(float %263, float 0.000000e+00, float 1.000000e+00)
+  %265 = fadd float %202, 2.000000e+00
+  %266 = fmul float %265, 0x3FD611A7A0000000
+  %267 = call float @llvm.AMDIL.clamp.(float %266, float 0.000000e+00, float 1.000000e+00)
+  %268 = fmul float 2.000000e+00, %261
+  %269 = fsub float -0.000000e+00, %268
+  %270 = fadd float 3.000000e+00, %269
+  %271 = fmul float %261, %270
+  %272 = fmul float %261, %271
+  %273 = fmul float 2.000000e+00, %264
+  %274 = fsub float -0.000000e+00, %273
+  %275 = fadd float 3.000000e+00, %274
+  %276 = fmul float %264, %275
+  %277 = fmul float %264, %276
+  %278 = fmul float 2.000000e+00, %267
+  %279 = fsub float -0.000000e+00, %278
+  %280 = fadd float 3.000000e+00, %279
+  %281 = fmul float %267, %280
+  %282 = fmul float %267, %281
+  %283 = fmul float %26, 0x3F22DFD6A0000000
+  %284 = fmul float %27, 0x3F22DFD6A0000000
+  %285 = insertelement <4 x float> undef, float %283, i32 0
+  %286 = insertelement <4 x float> %285, float %284, i32 1
+  %287 = insertelement <4 x float> %286, float 0.000000e+00, i32 2
+  %288 = insertelement <4 x float> %287, float 0.000000e+00, i32 3
+  %289 = extractelement <4 x float> %288, i32 0
+  %290 = extractelement <4 x float> %288, i32 1
+  %291 = insertelement <4 x float> undef, float %289, i32 0
+  %292 = insertelement <4 x float> %291, float %290, i32 1
+  %293 = insertelement <4 x float> %292, float undef, i32 2
+  %294 = insertelement <4 x float> %293, float undef, i32 3
+  %295 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %294, i32 19, i32 3, i32 2)
+  %296 = extractelement <4 x float> %295, i32 0
+  %297 = extractelement <4 x float> %295, i32 1
+  %298 = extractelement <4 x float> %295, i32 2
+  %299 = extractelement <4 x float> %295, i32 3
+  %300 = fmul float %296, %272
+  %301 = fmul float %297, %277
+  %302 = fmul float %298, %282
+  %303 = fmul float %299, 0.000000e+00
+  %304 = fmul float %temp68.1, %37
+  %305 = fmul float %temp68.1, %38
+  %306 = fmul float %temp68.1, %39
+  %307 = fmul float %temp69.0, %40
+  %308 = fadd float %307, %304
+  %309 = fmul float %temp69.0, %41
+  %310 = fadd float %309, %305
+  %311 = fmul float %temp69.0, %42
+  %312 = fadd float %311, %306
+  %313 = fmul float %temp70.0, %34
+  %314 = fadd float %313, %308
+  %315 = fmul float %temp70.0, %35
+  %316 = fadd float %315, %310
+  %317 = fmul float %temp70.0, %36
+  %318 = fadd float %317, %312
+  %319 = insertelement <4 x float> undef, float %314, i32 0
+  %320 = insertelement <4 x float> %319, float %316, i32 1
+  %321 = insertelement <4 x float> %320, float %318, i32 2
+  %322 = insertelement <4 x float> %321, float 0.000000e+00, i32 3
+  %323 = insertelement <4 x float> undef, float %314, i32 0
+  %324 = insertelement <4 x float> %323, float %316, i32 1
+  %325 = insertelement <4 x float> %324, float %318, i32 2
+  %326 = insertelement <4 x float> %325, float 0.000000e+00, i32 3
+  %327 = call float @llvm.AMDGPU.dp4(<4 x float> %322, <4 x float> %326)
+  %328 = call float @llvm.AMDGPU.rsq(float %327)
+  %329 = fmul float %314, %328
+  %330 = fmul float %316, %328
+  %331 = fmul float %318, %328
+  %332 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %333 = extractelement <4 x float> %332, i32 0
+  %334 = fsub float -0.000000e+00, %333
+  %335 = fadd float 1.000000e+00, %334
+  %336 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %337 = extractelement <4 x float> %336, i32 0
+  %338 = fsub float -0.000000e+00, %337
+  %339 = fadd float 1.000000e+00, %338
+  %340 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %341 = extractelement <4 x float> %340, i32 0
+  %342 = fsub float -0.000000e+00, %341
+  %343 = fadd float 1.000000e+00, %342
+  %344 = fsub float -0.000000e+00, %335
+  %345 = fadd float %202, %344
+  %346 = fsub float -0.000000e+00, %339
+  %347 = fadd float %202, %346
+  %348 = fadd float %347, 0xBFE3333340000000
+  %349 = fsub float -0.000000e+00, %202
+  %350 = fsub float -0.000000e+00, %343
+  %351 = fadd float %349, %350
+  %352 = insertelement <4 x float> undef, float %43, i32 0
+  %353 = insertelement <4 x float> %352, float %44, i32 1
+  %354 = insertelement <4 x float> %353, float %45, i32 2
+  %355 = insertelement <4 x float> %354, float 0.000000e+00, i32 3
+  %356 = insertelement <4 x float> undef, float %43, i32 0
+  %357 = insertelement <4 x float> %356, float %44, i32 1
+  %358 = insertelement <4 x float> %357, float %45, i32 2
+  %359 = insertelement <4 x float> %358, float 0.000000e+00, i32 3
+  %360 = call float @llvm.AMDGPU.dp4(<4 x float> %355, <4 x float> %359)
+  %361 = call float @llvm.AMDGPU.rsq(float %360)
+  %362 = fmul float %45, %361
+  %363 = call float @fabs(float %362)
+  %364 = fmul float %176, 0x3FECCCCCC0000000
+  %365 = fadd float %364, %363
+  %366 = fadd float %365, 0xBFEFAE1480000000
+  %367 = fmul float %366, 0xC023FFFFC0000000
+  %368 = call float @llvm.AMDIL.clamp.(float %367, float 0.000000e+00, float 1.000000e+00)
+  %369 = fsub float -0.000000e+00, %335
+  %370 = fadd float %202, %369
+  %371 = fadd float %370, 0x3FBEB851E0000000
+  %372 = fsub float -0.000000e+00, %339
+  %373 = fadd float %202, %372
+  %374 = fadd float %373, 0xBFE0A3D700000000
+  %375 = fsub float -0.000000e+00, %202
+  %376 = fsub float -0.000000e+00, %343
+  %377 = fadd float %375, %376
+  %378 = insertelement <4 x float> undef, float %43, i32 0
+  %379 = insertelement <4 x float> %378, float %44, i32 1
+  %380 = insertelement <4 x float> %379, float %45, i32 2
+  %381 = insertelement <4 x float> %380, float 0.000000e+00, i32 3
+  %382 = insertelement <4 x float> undef, float %43, i32 0
+  %383 = insertelement <4 x float> %382, float %44, i32 1
+  %384 = insertelement <4 x float> %383, float %45, i32 2
+  %385 = insertelement <4 x float> %384, float 0.000000e+00, i32 3
+  %386 = call float @llvm.AMDGPU.dp4(<4 x float> %381, <4 x float> %385)
+  %387 = call float @llvm.AMDGPU.rsq(float %386)
+  %388 = fmul float %45, %387
+  %389 = call float @fabs(float %388)
+  %390 = fmul float %176, 0x3FF51EB860000000
+  %391 = fadd float %390, %389
+  %392 = fadd float %391, 0xBFEFAE1480000000
+  %393 = fmul float %392, 0xC0490001A0000000
+  %394 = call float @llvm.AMDIL.clamp.(float %393, float 0.000000e+00, float 1.000000e+00)
+  %395 = fmul float 2.000000e+00, %368
+  %396 = fsub float -0.000000e+00, %395
+  %397 = fadd float 3.000000e+00, %396
+  %398 = fmul float %368, %397
+  %399 = fmul float %368, %398
+  %400 = call float @llvm.AMDGPU.lrp(float %399, float %255, float %345)
+  %401 = call float @llvm.AMDGPU.lrp(float %399, float %256, float %348)
+  %402 = call float @llvm.AMDGPU.lrp(float %399, float %257, float %351)
+  %403 = call float @llvm.AMDGPU.lrp(float %399, float %258, float 0.000000e+00)
+  %404 = fmul float 2.000000e+00, %394
+  %405 = fsub float -0.000000e+00, %404
+  %406 = fadd float 3.000000e+00, %405
+  %407 = fmul float %394, %406
+  %408 = fmul float %394, %407
+  %409 = call float @llvm.AMDGPU.lrp(float %408, float %255, float %371)
+  %410 = call float @llvm.AMDGPU.lrp(float %408, float %256, float %374)
+  %411 = call float @llvm.AMDGPU.lrp(float %408, float %257, float %377)
+  %412 = call float @llvm.AMDGPU.lrp(float %408, float %258, float 0x3FD3333340000000)
+  %413 = fcmp oge float 2.200000e+03, %179
+  %414 = sext i1 %413 to i32
+  %415 = bitcast i32 %414 to float
+  %416 = bitcast float %415 to i32
+  %417 = icmp ne i32 %416, 0
+  br i1 %417, label %IF161, label %ENDIF160
+
+LOOP:                                             ; preds = %ENDIF139, %IF137
+  %temp88.0 = phi float [ 0.000000e+00, %IF137 ], [ %446, %ENDIF139 ]
+  %temp92.0 = phi float [ 1.000000e+00, %IF137 ], [ %.temp92.0, %ENDIF139 ]
+  %temp96.0 = phi float [ 0.000000e+00, %IF137 ], [ %477, %ENDIF139 ]
+  %418 = bitcast float %temp96.0 to i32
+  %419 = icmp sge i32 %418, %137
+  %420 = sext i1 %419 to i32
+  %421 = bitcast i32 %420 to float
+  %422 = bitcast float %421 to i32
+  %423 = icmp ne i32 %422, 0
+  br i1 %423, label %IF140, label %ENDIF139
+
+IF140:                                            ; preds = %LOOP
+  %424 = fmul float %133, 5.000000e-01
+  %425 = fmul float %129, %temp92.0
+  %426 = fadd float %425, %22
+  %427 = fmul float %130, %temp92.0
+  %428 = fadd float %427, %23
+  %429 = insertelement <4 x float> undef, float %426, i32 0
+  %430 = insertelement <4 x float> %429, float %428, i32 1
+  %431 = insertelement <4 x float> %430, float 0.000000e+00, i32 2
+  %432 = insertelement <4 x float> %431, float 0.000000e+00, i32 3
+  %433 = extractelement <4 x float> %432, i32 0
+  %434 = extractelement <4 x float> %432, i32 1
+  %435 = insertelement <4 x float> undef, float %433, i32 0
+  %436 = insertelement <4 x float> %435, float %434, i32 1
+  %437 = insertelement <4 x float> %436, float undef, i32 2
+  %438 = insertelement <4 x float> %437, float undef, i32 3
+  %439 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %438, i32 20, i32 4, i32 2)
+  %440 = extractelement <4 x float> %439, i32 3
+  %441 = fcmp oge float %temp92.0, %440
+  %442 = sext i1 %441 to i32
+  %443 = bitcast i32 %442 to float
+  %444 = bitcast float %443 to i32
+  %445 = icmp ne i32 %444, 0
+  br i1 %445, label %IF146, label %ENDIF145
+
+ENDIF139:                                         ; preds = %LOOP
+  %446 = fadd float %temp88.0, %133
+  %447 = fmul float %129, %446
+  %448 = fadd float %447, %22
+  %449 = fmul float %130, %446
+  %450 = fadd float %449, %23
+  %451 = insertelement <4 x float> undef, float %448, i32 0
+  %452 = insertelement <4 x float> %451, float %450, i32 1
+  %453 = insertelement <4 x float> %452, float 0.000000e+00, i32 2
+  %454 = insertelement <4 x float> %453, float 0.000000e+00, i32 3
+  %455 = extractelement <4 x float> %454, i32 0
+  %456 = extractelement <4 x float> %454, i32 1
+  %457 = insertelement <4 x float> undef, float %455, i32 0
+  %458 = insertelement <4 x float> %457, float %456, i32 1
+  %459 = insertelement <4 x float> %458, float undef, i32 2
+  %460 = insertelement <4 x float> %459, float undef, i32 3
+  %461 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %460, i32 20, i32 4, i32 2)
+  %462 = extractelement <4 x float> %461, i32 3
+  %463 = fcmp olt float 0x3FEFDF3B60000000, %temp92.0
+  %464 = sext i1 %463 to i32
+  %465 = bitcast i32 %464 to float
+  %466 = fcmp oge float %446, %462
+  %467 = sext i1 %466 to i32
+  %468 = bitcast i32 %467 to float
+  %469 = bitcast float %465 to i32
+  %470 = bitcast float %468 to i32
+  %471 = and i32 %469, %470
+  %472 = bitcast i32 %471 to float
+  %473 = bitcast float %472 to i32
+  %474 = icmp ne i32 %473, 0
+  %.temp92.0 = select i1 %474, float %446, float %temp92.0
+  %475 = bitcast float %temp96.0 to i32
+  %476 = add i32 %475, 1
+  %477 = bitcast i32 %476 to float
+  br label %LOOP
+
+IF146:                                            ; preds = %IF140
+  %478 = fmul float 2.000000e+00, %424
+  %479 = fsub float -0.000000e+00, %478
+  %480 = fadd float %temp92.0, %479
+  br label %ENDIF145
+
+ENDIF145:                                         ; preds = %IF140, %IF146
+  %temp88.1 = phi float [ %480, %IF146 ], [ %temp92.0, %IF140 ]
+  %481 = fadd float %temp88.1, %424
+  %482 = fmul float %424, 5.000000e-01
+  %483 = fmul float %129, %481
+  %484 = fadd float %483, %22
+  %485 = fmul float %130, %481
+  %486 = fadd float %485, %23
+  %487 = insertelement <4 x float> undef, float %484, i32 0
+  %488 = insertelement <4 x float> %487, float %486, i32 1
+  %489 = insertelement <4 x float> %488, float 0.000000e+00, i32 2
+  %490 = insertelement <4 x float> %489, float %440, i32 3
+  %491 = extractelement <4 x float> %490, i32 0
+  %492 = extractelement <4 x float> %490, i32 1
+  %493 = insertelement <4 x float> undef, float %491, i32 0
+  %494 = insertelement <4 x float> %493, float %492, i32 1
+  %495 = insertelement <4 x float> %494, float undef, i32 2
+  %496 = insertelement <4 x float> %495, float undef, i32 3
+  %497 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %496, i32 20, i32 4, i32 2)
+  %498 = extractelement <4 x float> %497, i32 3
+  %499 = fcmp oge float %481, %498
+  %500 = sext i1 %499 to i32
+  %501 = bitcast i32 %500 to float
+  %502 = bitcast float %501 to i32
+  %503 = icmp ne i32 %502, 0
+  br i1 %503, label %IF149, label %ENDIF148
+
+IF149:                                            ; preds = %ENDIF145
+  %504 = fmul float 2.000000e+00, %482
+  %505 = fsub float -0.000000e+00, %504
+  %506 = fadd float %481, %505
+  br label %ENDIF148
+
+ENDIF148:                                         ; preds = %ENDIF145, %IF149
+  %temp88.2 = phi float [ %506, %IF149 ], [ %481, %ENDIF145 ]
+  %temp92.2 = phi float [ %481, %IF149 ], [ %temp92.0, %ENDIF145 ]
+  %507 = fadd float %temp88.2, %482
+  %508 = fmul float %482, 5.000000e-01
+  %509 = fmul float %129, %507
+  %510 = fadd float %509, %22
+  %511 = fmul float %130, %507
+  %512 = fadd float %511, %23
+  %513 = insertelement <4 x float> undef, float %510, i32 0
+  %514 = insertelement <4 x float> %513, float %512, i32 1
+  %515 = insertelement <4 x float> %514, float 0.000000e+00, i32 2
+  %516 = insertelement <4 x float> %515, float %498, i32 3
+  %517 = extractelement <4 x float> %516, i32 0
+  %518 = extractelement <4 x float> %516, i32 1
+  %519 = insertelement <4 x float> undef, float %517, i32 0
+  %520 = insertelement <4 x float> %519, float %518, i32 1
+  %521 = insertelement <4 x float> %520, float undef, i32 2
+  %522 = insertelement <4 x float> %521, float undef, i32 3
+  %523 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %522, i32 20, i32 4, i32 2)
+  %524 = extractelement <4 x float> %523, i32 3
+  %525 = fcmp oge float %507, %524
+  %526 = sext i1 %525 to i32
+  %527 = bitcast i32 %526 to float
+  %528 = bitcast float %527 to i32
+  %529 = icmp ne i32 %528, 0
+  br i1 %529, label %IF152, label %ENDIF151
+
+IF152:                                            ; preds = %ENDIF148
+  %530 = fmul float 2.000000e+00, %508
+  %531 = fsub float -0.000000e+00, %530
+  %532 = fadd float %507, %531
+  br label %ENDIF151
+
+ENDIF151:                                         ; preds = %ENDIF148, %IF152
+  %temp88.3 = phi float [ %532, %IF152 ], [ %507, %ENDIF148 ]
+  %temp92.3 = phi float [ %507, %IF152 ], [ %temp92.2, %ENDIF148 ]
+  %533 = fadd float %temp88.3, %508
+  %534 = fmul float %508, 5.000000e-01
+  %535 = fmul float %129, %533
+  %536 = fadd float %535, %22
+  %537 = fmul float %130, %533
+  %538 = fadd float %537, %23
+  %539 = insertelement <4 x float> undef, float %536, i32 0
+  %540 = insertelement <4 x float> %539, float %538, i32 1
+  %541 = insertelement <4 x float> %540, float 0.000000e+00, i32 2
+  %542 = insertelement <4 x float> %541, float %524, i32 3
+  %543 = extractelement <4 x float> %542, i32 0
+  %544 = extractelement <4 x float> %542, i32 1
+  %545 = insertelement <4 x float> undef, float %543, i32 0
+  %546 = insertelement <4 x float> %545, float %544, i32 1
+  %547 = insertelement <4 x float> %546, float undef, i32 2
+  %548 = insertelement <4 x float> %547, float undef, i32 3
+  %549 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %548, i32 20, i32 4, i32 2)
+  %550 = extractelement <4 x float> %549, i32 3
+  %551 = fcmp oge float %533, %550
+  %552 = sext i1 %551 to i32
+  %553 = bitcast i32 %552 to float
+  %554 = bitcast float %553 to i32
+  %555 = icmp ne i32 %554, 0
+  br i1 %555, label %IF155, label %ENDIF154
+
+IF155:                                            ; preds = %ENDIF151
+  %556 = fmul float 2.000000e+00, %534
+  %557 = fsub float -0.000000e+00, %556
+  %558 = fadd float %533, %557
+  br label %ENDIF154
+
+ENDIF154:                                         ; preds = %ENDIF151, %IF155
+  %temp88.4 = phi float [ %558, %IF155 ], [ %533, %ENDIF151 ]
+  %temp92.4 = phi float [ %533, %IF155 ], [ %temp92.3, %ENDIF151 ]
+  %559 = fadd float %temp88.4, %534
+  %560 = fmul float %129, %559
+  %561 = fadd float %560, %22
+  %562 = fmul float %130, %559
+  %563 = fadd float %562, %23
+  %564 = insertelement <4 x float> undef, float %561, i32 0
+  %565 = insertelement <4 x float> %564, float %563, i32 1
+  %566 = insertelement <4 x float> %565, float 0.000000e+00, i32 2
+  %567 = insertelement <4 x float> %566, float %550, i32 3
+  %568 = extractelement <4 x float> %567, i32 0
+  %569 = extractelement <4 x float> %567, i32 1
+  %570 = insertelement <4 x float> undef, float %568, i32 0
+  %571 = insertelement <4 x float> %570, float %569, i32 1
+  %572 = insertelement <4 x float> %571, float undef, i32 2
+  %573 = insertelement <4 x float> %572, float undef, i32 3
+  %574 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %573, i32 20, i32 4, i32 2)
+  %575 = extractelement <4 x float> %574, i32 3
+  %576 = fcmp oge float %559, %575
+  %577 = sext i1 %576 to i32
+  %578 = bitcast i32 %577 to float
+  %579 = bitcast float %578 to i32
+  %580 = icmp ne i32 %579, 0
+  %.temp92.4 = select i1 %580, float %559, float %temp92.4
+  %581 = fmul float %129, %.temp92.4
+  %582 = fadd float %581, %22
+  %583 = fmul float %130, %.temp92.4
+  %584 = fadd float %583, %23
+  %585 = insertelement <4 x float> undef, float %582, i32 0
+  %586 = insertelement <4 x float> %585, float %584, i32 1
+  %587 = insertelement <4 x float> %586, float 0.000000e+00, i32 2
+  %588 = insertelement <4 x float> %587, float %575, i32 3
+  %589 = extractelement <4 x float> %588, i32 0
+  %590 = extractelement <4 x float> %588, i32 1
+  %591 = insertelement <4 x float> undef, float %589, i32 0
+  %592 = insertelement <4 x float> %591, float %590, i32 1
+  %593 = insertelement <4 x float> %592, float undef, i32 2
+  %594 = insertelement <4 x float> %593, float undef, i32 3
+  %595 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %594, i32 20, i32 4, i32 2)
+  %596 = extractelement <4 x float> %595, i32 0
+  %597 = extractelement <4 x float> %595, i32 1
+  %598 = extractelement <4 x float> %595, i32 2
+  %599 = fmul float %596, 2.000000e+00
+  %600 = fadd float %599, -1.000000e+00
+  %601 = fmul float %597, 2.000000e+00
+  %602 = fadd float %601, -1.000000e+00
+  %603 = fmul float %598, 2.000000e+00
+  %604 = fadd float %603, -1.000000e+00
+  br label %ENDIF136
+
+IF161:                                            ; preds = %ENDIF136
+  %605 = fmul float %202, 0x3FB99999A0000000
+  %606 = fcmp uge float 0x3FE4CCCCC0000000, %605
+  %607 = select i1 %606, float 0x3FE4CCCCC0000000, float %605
+  %608 = fcmp uge float %607, 5.000000e-01
+  %609 = select i1 %608, float 5.000000e-01, float %607
+  %610 = call float @llvm.AMDGPU.lrp(float %609, float %400, float %300)
+  %611 = call float @llvm.AMDGPU.lrp(float %609, float %401, float %301)
+  %612 = call float @llvm.AMDGPU.lrp(float %609, float %402, float %302)
+  %613 = call float @llvm.AMDGPU.lrp(float %609, float %403, float %303)
+  %614 = insertelement <4 x float> undef, float %329, i32 0
+  %615 = insertelement <4 x float> %614, float %330, i32 1
+  %616 = insertelement <4 x float> %615, float %331, i32 2
+  %617 = insertelement <4 x float> %616, float 0.000000e+00, i32 3
+  %618 = insertelement <4 x float> undef, float %63, i32 0
+  %619 = insertelement <4 x float> %618, float %65, i32 1
+  %620 = insertelement <4 x float> %619, float %67, i32 2
+  %621 = insertelement <4 x float> %620, float 0.000000e+00, i32 3
+  %622 = call float @llvm.AMDGPU.dp4(<4 x float> %617, <4 x float> %621)
+  %623 = fcmp uge float 0x3FE6666660000000, %622
+  %624 = select i1 %623, float 0x3FE6666660000000, float %622
+  %625 = fmul float %8, %624
+  %626 = fmul float %13, %624
+  %627 = fmul float %18, %624
+  %628 = insertelement <4 x float> undef, float %34, i32 0
+  %629 = insertelement <4 x float> %628, float %35, i32 1
+  %630 = insertelement <4 x float> %629, float %36, i32 2
+  %631 = insertelement <4 x float> %630, float 0.000000e+00, i32 3
+  %632 = insertelement <4 x float> undef, float %63, i32 0
+  %633 = insertelement <4 x float> %632, float %65, i32 1
+  %634 = insertelement <4 x float> %633, float %67, i32 2
+  %635 = insertelement <4 x float> %634, float 0.000000e+00, i32 3
+  %636 = call float @llvm.AMDGPU.dp4(<4 x float> %631, <4 x float> %635)
+  %637 = fcmp uge float 0x3FECCCCCC0000000, %636
+  %638 = select i1 %637, float 0x3FECCCCCC0000000, float %636
+  %639 = fmul float %625, %638
+  %640 = fmul float %626, %638
+  %641 = fmul float %627, %638
+  br label %ENDIF160
+
+ENDIF160:                                         ; preds = %ENDIF136, %IF161
+  %temp84.0 = phi float [ %610, %IF161 ], [ %255, %ENDIF136 ]
+  %temp85.0 = phi float [ %611, %IF161 ], [ %256, %ENDIF136 ]
+  %temp86.0 = phi float [ %612, %IF161 ], [ %257, %ENDIF136 ]
+  %temp87.0 = phi float [ %613, %IF161 ], [ %258, %ENDIF136 ]
+  %temp92.6 = phi float [ %639, %IF161 ], [ %415, %ENDIF136 ]
+  %temp93.0 = phi float [ %640, %IF161 ], [ 0.000000e+00, %ENDIF136 ]
+  %temp94.0 = phi float [ %641, %IF161 ], [ 0.000000e+00, %ENDIF136 ]
+  %642 = fcmp olt float 2.200000e+03, %179
+  %643 = sext i1 %642 to i32
+  %644 = bitcast i32 %643 to float
+  %645 = fcmp olt float %179, 2.300000e+03
+  %646 = sext i1 %645 to i32
+  %647 = bitcast i32 %646 to float
+  %648 = bitcast float %644 to i32
+  %649 = bitcast float %647 to i32
+  %650 = and i32 %648, %649
+  %651 = bitcast i32 %650 to float
+  %652 = bitcast float %651 to i32
+  %653 = icmp ne i32 %652, 0
+  br i1 %653, label %IF164, label %ENDIF163
+
+IF164:                                            ; preds = %ENDIF160
+  %654 = fmul float %202, 5.000000e-01
+  %655 = fcmp uge float 0x3FE4CCCCC0000000, %654
+  %656 = select i1 %655, float 0x3FE4CCCCC0000000, float %654
+  %657 = fcmp uge float %656, 0x3FD6666660000000
+  %658 = select i1 %657, float 0x3FD6666660000000, float %656
+  %659 = call float @llvm.AMDGPU.lrp(float %658, float %400, float %300)
+  %660 = call float @llvm.AMDGPU.lrp(float %658, float %401, float %301)
+  %661 = call float @llvm.AMDGPU.lrp(float %658, float %402, float %302)
+  %662 = call float @llvm.AMDGPU.lrp(float %658, float %403, float %303)
+  %663 = insertelement <4 x float> undef, float %329, i32 0
+  %664 = insertelement <4 x float> %663, float %330, i32 1
+  %665 = insertelement <4 x float> %664, float %331, i32 2
+  %666 = insertelement <4 x float> %665, float 0.000000e+00, i32 3
+  %667 = insertelement <4 x float> undef, float %63, i32 0
+  %668 = insertelement <4 x float> %667, float %65, i32 1
+  %669 = insertelement <4 x float> %668, float %67, i32 2
+  %670 = insertelement <4 x float> %669, float 0.000000e+00, i32 3
+  %671 = call float @llvm.AMDGPU.dp4(<4 x float> %666, <4 x float> %670)
+  %672 = fcmp uge float 0x3FE6666660000000, %671
+  %673 = select i1 %672, float 0x3FE6666660000000, float %671
+  %674 = fmul float %8, %673
+  %675 = fmul float %13, %673
+  %676 = fmul float %18, %673
+  %677 = insertelement <4 x float> undef, float %34, i32 0
+  %678 = insertelement <4 x float> %677, float %35, i32 1
+  %679 = insertelement <4 x float> %678, float %36, i32 2
+  %680 = insertelement <4 x float> %679, float 0.000000e+00, i32 3
+  %681 = insertelement <4 x float> undef, float %63, i32 0
+  %682 = insertelement <4 x float> %681, float %65, i32 1
+  %683 = insertelement <4 x float> %682, float %67, i32 2
+  %684 = insertelement <4 x float> %683, float 0.000000e+00, i32 3
+  %685 = call float @llvm.AMDGPU.dp4(<4 x float> %680, <4 x float> %684)
+  %686 = fcmp uge float 0x3FECCCCCC0000000, %685
+  %687 = select i1 %686, float 0x3FECCCCCC0000000, float %685
+  %688 = fmul float %674, %687
+  %689 = fmul float %675, %687
+  %690 = fmul float %676, %687
+  br label %ENDIF163
+
+ENDIF163:                                         ; preds = %ENDIF160, %IF164
+  %temp84.1 = phi float [ %659, %IF164 ], [ %temp84.0, %ENDIF160 ]
+  %temp85.1 = phi float [ %660, %IF164 ], [ %temp85.0, %ENDIF160 ]
+  %temp86.1 = phi float [ %661, %IF164 ], [ %temp86.0, %ENDIF160 ]
+  %temp87.1 = phi float [ %662, %IF164 ], [ %temp87.0, %ENDIF160 ]
+  %temp92.7 = phi float [ %688, %IF164 ], [ %temp92.6, %ENDIF160 ]
+  %temp93.1 = phi float [ %689, %IF164 ], [ %temp93.0, %ENDIF160 ]
+  %temp94.1 = phi float [ %690, %IF164 ], [ %temp94.0, %ENDIF160 ]
+  %691 = fcmp oge float %179, 2.300000e+03
+  %692 = sext i1 %691 to i32
+  %693 = bitcast i32 %692 to float
+  %694 = fcmp olt float %179, 2.480000e+03
+  %695 = sext i1 %694 to i32
+  %696 = bitcast i32 %695 to float
+  %697 = bitcast float %693 to i32
+  %698 = bitcast float %696 to i32
+  %699 = and i32 %697, %698
+  %700 = bitcast i32 %699 to float
+  %701 = bitcast float %700 to i32
+  %702 = icmp ne i32 %701, 0
+  br i1 %702, label %IF167, label %ENDIF166
+
+IF167:                                            ; preds = %ENDIF163
+  %703 = fmul float %202, 5.000000e-01
+  %704 = fcmp uge float 0x3FE4CCCCC0000000, %703
+  %705 = select i1 %704, float 0x3FE4CCCCC0000000, float %703
+  %706 = fcmp uge float %705, 0x3FD3333340000000
+  %707 = select i1 %706, float 0x3FD3333340000000, float %705
+  %708 = call float @llvm.AMDGPU.lrp(float %707, float %409, float %300)
+  %709 = call float @llvm.AMDGPU.lrp(float %707, float %410, float %301)
+  %710 = call float @llvm.AMDGPU.lrp(float %707, float %411, float %302)
+  %711 = call float @llvm.AMDGPU.lrp(float %707, float %412, float %303)
+  %712 = insertelement <4 x float> undef, float %329, i32 0
+  %713 = insertelement <4 x float> %712, float %330, i32 1
+  %714 = insertelement <4 x float> %713, float %331, i32 2
+  %715 = insertelement <4 x float> %714, float 0.000000e+00, i32 3
+  %716 = insertelement <4 x float> undef, float %63, i32 0
+  %717 = insertelement <4 x float> %716, float %65, i32 1
+  %718 = insertelement <4 x float> %717, float %67, i32 2
+  %719 = insertelement <4 x float> %718, float 0.000000e+00, i32 3
+  %720 = call float @llvm.AMDGPU.dp4(<4 x float> %715, <4 x float> %719)
+  %721 = fcmp uge float 0x3FEB333340000000, %720
+  %722 = select i1 %721, float 0x3FEB333340000000, float %720
+  %723 = fmul float %8, %722
+  %724 = fmul float %13, %722
+  %725 = fmul float %18, %722
+  %726 = insertelement <4 x float> undef, float %34, i32 0
+  %727 = insertelement <4 x float> %726, float %35, i32 1
+  %728 = insertelement <4 x float> %727, float %36, i32 2
+  %729 = insertelement <4 x float> %728, float 0.000000e+00, i32 3
+  %730 = insertelement <4 x float> undef, float %63, i32 0
+  %731 = insertelement <4 x float> %730, float %65, i32 1
+  %732 = insertelement <4 x float> %731, float %67, i32 2
+  %733 = insertelement <4 x float> %732, float 0.000000e+00, i32 3
+  %734 = call float @llvm.AMDGPU.dp4(<4 x float> %729, <4 x float> %733)
+  %735 = fcmp uge float 0x3FECCCCCC0000000, %734
+  %736 = select i1 %735, float 0x3FECCCCCC0000000, float %734
+  %737 = fmul float %723, %736
+  %738 = fmul float %724, %736
+  %739 = fmul float %725, %736
+  br label %ENDIF166
+
+ENDIF166:                                         ; preds = %ENDIF163, %IF167
+  %temp84.2 = phi float [ %708, %IF167 ], [ %temp84.1, %ENDIF163 ]
+  %temp85.2 = phi float [ %709, %IF167 ], [ %temp85.1, %ENDIF163 ]
+  %temp86.2 = phi float [ %710, %IF167 ], [ %temp86.1, %ENDIF163 ]
+  %temp87.2 = phi float [ %711, %IF167 ], [ %temp87.1, %ENDIF163 ]
+  %temp92.8 = phi float [ %737, %IF167 ], [ %temp92.7, %ENDIF163 ]
+  %temp93.2 = phi float [ %738, %IF167 ], [ %temp93.1, %ENDIF163 ]
+  %temp94.2 = phi float [ %739, %IF167 ], [ %temp94.1, %ENDIF163 ]
+  %740 = fcmp oge float %179, 2.480000e+03
+  %741 = sext i1 %740 to i32
+  %742 = bitcast i32 %741 to float
+  %743 = fcmp olt float %179, 2.530000e+03
+  %744 = sext i1 %743 to i32
+  %745 = bitcast i32 %744 to float
+  %746 = bitcast float %742 to i32
+  %747 = bitcast float %745 to i32
+  %748 = and i32 %746, %747
+  %749 = bitcast i32 %748 to float
+  %750 = bitcast float %749 to i32
+  %751 = icmp ne i32 %750, 0
+  br i1 %751, label %IF170, label %ENDIF169
+
+IF170:                                            ; preds = %ENDIF166
+  %752 = fmul float %202, 5.000000e-01
+  %753 = fcmp uge float 0x3FE4CCCCC0000000, %752
+  %754 = select i1 %753, float 0x3FE4CCCCC0000000, float %752
+  %755 = fcmp uge float %754, 0x3FC99999A0000000
+  %756 = select i1 %755, float 0x3FC99999A0000000, float %754
+  %757 = call float @llvm.AMDGPU.lrp(float %756, float %409, float %300)
+  %758 = call float @llvm.AMDGPU.lrp(float %756, float %410, float %301)
+  %759 = call float @llvm.AMDGPU.lrp(float %756, float %411, float %302)
+  %760 = call float @llvm.AMDGPU.lrp(float %756, float %412, float %303)
+  %761 = insertelement <4 x float> undef, float %329, i32 0
+  %762 = insertelement <4 x float> %761, float %330, i32 1
+  %763 = insertelement <4 x float> %762, float %331, i32 2
+  %764 = insertelement <4 x float> %763, float 0.000000e+00, i32 3
+  %765 = insertelement <4 x float> undef, float %63, i32 0
+  %766 = insertelement <4 x float> %765, float %65, i32 1
+  %767 = insertelement <4 x float> %766, float %67, i32 2
+  %768 = insertelement <4 x float> %767, float 0.000000e+00, i32 3
+  %769 = call float @llvm.AMDGPU.dp4(<4 x float> %764, <4 x float> %768)
+  %770 = fcmp uge float 0x3FEB333340000000, %769
+  %771 = select i1 %770, float 0x3FEB333340000000, float %769
+  %772 = fmul float %8, %771
+  %773 = fmul float %13, %771
+  %774 = fmul float %18, %771
+  %775 = insertelement <4 x float> undef, float %34, i32 0
+  %776 = insertelement <4 x float> %775, float %35, i32 1
+  %777 = insertelement <4 x float> %776, float %36, i32 2
+  %778 = insertelement <4 x float> %777, float 0.000000e+00, i32 3
+  %779 = insertelement <4 x float> undef, float %63, i32 0
+  %780 = insertelement <4 x float> %779, float %65, i32 1
+  %781 = insertelement <4 x float> %780, float %67, i32 2
+  %782 = insertelement <4 x float> %781, float 0.000000e+00, i32 3
+  %783 = call float @llvm.AMDGPU.dp4(<4 x float> %778, <4 x float> %782)
+  %784 = fcmp uge float 0x3FECCCCCC0000000, %783
+  %785 = select i1 %784, float 0x3FECCCCCC0000000, float %783
+  %786 = fmul float %772, %785
+  %787 = fmul float %773, %785
+  %788 = fmul float %774, %785
+  br label %ENDIF169
+
+ENDIF169:                                         ; preds = %ENDIF166, %IF170
+  %temp84.3 = phi float [ %757, %IF170 ], [ %temp84.2, %ENDIF166 ]
+  %temp85.3 = phi float [ %758, %IF170 ], [ %temp85.2, %ENDIF166 ]
+  %temp86.3 = phi float [ %759, %IF170 ], [ %temp86.2, %ENDIF166 ]
+  %temp87.3 = phi float [ %760, %IF170 ], [ %temp87.2, %ENDIF166 ]
+  %temp92.9 = phi float [ %786, %IF170 ], [ %temp92.8, %ENDIF166 ]
+  %temp93.3 = phi float [ %787, %IF170 ], [ %temp93.2, %ENDIF166 ]
+  %temp94.3 = phi float [ %788, %IF170 ], [ %temp94.2, %ENDIF166 ]
+  %789 = fcmp oge float %179, 2.530000e+03
+  %790 = sext i1 %789 to i32
+  %791 = bitcast i32 %790 to float
+  %792 = fcmp olt float %179, 2.670000e+03
+  %793 = sext i1 %792 to i32
+  %794 = bitcast i32 %793 to float
+  %795 = bitcast float %791 to i32
+  %796 = bitcast float %794 to i32
+  %797 = and i32 %795, %796
+  %798 = bitcast i32 %797 to float
+  %799 = bitcast float %798 to i32
+  %800 = icmp ne i32 %799, 0
+  br i1 %800, label %IF173, label %ENDIF172
+
+IF173:                                            ; preds = %ENDIF169
+  %801 = fmul float %202, 5.000000e-01
+  %802 = fcmp uge float 0x3FE4CCCCC0000000, %801
+  %803 = select i1 %802, float 0x3FE4CCCCC0000000, float %801
+  %804 = fcmp uge float %803, 0x3FB99999A0000000
+  %805 = select i1 %804, float 0x3FB99999A0000000, float %803
+  %806 = call float @llvm.AMDGPU.lrp(float %805, float %400, float %300)
+  %807 = call float @llvm.AMDGPU.lrp(float %805, float %401, float %301)
+  %808 = call float @llvm.AMDGPU.lrp(float %805, float %402, float %302)
+  %809 = call float @llvm.AMDGPU.lrp(float %805, float %403, float %303)
+  %810 = insertelement <4 x float> undef, float %329, i32 0
+  %811 = insertelement <4 x float> %810, float %330, i32 1
+  %812 = insertelement <4 x float> %811, float %331, i32 2
+  %813 = insertelement <4 x float> %812, float 0.000000e+00, i32 3
+  %814 = insertelement <4 x float> undef, float %63, i32 0
+  %815 = insertelement <4 x float> %814, float %65, i32 1
+  %816 = insertelement <4 x float> %815, float %67, i32 2
+  %817 = insertelement <4 x float> %816, float 0.000000e+00, i32 3
+  %818 = call float @llvm.AMDGPU.dp4(<4 x float> %813, <4 x float> %817)
+  %819 = fcmp uge float 0x3FEB333340000000, %818
+  %820 = select i1 %819, float 0x3FEB333340000000, float %818
+  %821 = fmul float %8, %820
+  %822 = fmul float %13, %820
+  %823 = fmul float %18, %820
+  %824 = insertelement <4 x float> undef, float %34, i32 0
+  %825 = insertelement <4 x float> %824, float %35, i32 1
+  %826 = insertelement <4 x float> %825, float %36, i32 2
+  %827 = insertelement <4 x float> %826, float 0.000000e+00, i32 3
+  %828 = insertelement <4 x float> undef, float %63, i32 0
+  %829 = insertelement <4 x float> %828, float %65, i32 1
+  %830 = insertelement <4 x float> %829, float %67, i32 2
+  %831 = insertelement <4 x float> %830, float 0.000000e+00, i32 3
+  %832 = call float @llvm.AMDGPU.dp4(<4 x float> %827, <4 x float> %831)
+  %833 = fcmp uge float 0x3FECCCCCC0000000, %832
+  %834 = select i1 %833, float 0x3FECCCCCC0000000, float %832
+  %835 = fmul float %821, %834
+  %836 = fmul float %822, %834
+  %837 = fmul float %823, %834
+  br label %ENDIF172
+
+ENDIF172:                                         ; preds = %ENDIF169, %IF173
+  %temp84.4 = phi float [ %806, %IF173 ], [ %temp84.3, %ENDIF169 ]
+  %temp85.4 = phi float [ %807, %IF173 ], [ %temp85.3, %ENDIF169 ]
+  %temp86.4 = phi float [ %808, %IF173 ], [ %temp86.3, %ENDIF169 ]
+  %temp87.4 = phi float [ %809, %IF173 ], [ %temp87.3, %ENDIF169 ]
+  %temp92.10 = phi float [ %835, %IF173 ], [ %temp92.9, %ENDIF169 ]
+  %temp93.4 = phi float [ %836, %IF173 ], [ %temp93.3, %ENDIF169 ]
+  %temp94.4 = phi float [ %837, %IF173 ], [ %temp94.3, %ENDIF169 ]
+  %838 = fcmp oge float %179, 2.670000e+03
+  %839 = sext i1 %838 to i32
+  %840 = bitcast i32 %839 to float
+  %841 = bitcast float %840 to i32
+  %842 = icmp ne i32 %841, 0
+  br i1 %842, label %IF176, label %ENDIF175
+
+IF176:                                            ; preds = %ENDIF172
+  %843 = fmul float %202, 0x3FB99999A0000000
+  %844 = fcmp uge float 0.000000e+00, %843
+  %845 = select i1 %844, float 0.000000e+00, float %843
+  %846 = fcmp uge float %845, 0x3FD99999A0000000
+  %847 = select i1 %846, float 0x3FD99999A0000000, float %845
+  %848 = call float @llvm.AMDGPU.lrp(float %847, float %400, float %300)
+  %849 = call float @llvm.AMDGPU.lrp(float %847, float %401, float %301)
+  %850 = call float @llvm.AMDGPU.lrp(float %847, float %402, float %302)
+  %851 = call float @llvm.AMDGPU.lrp(float %847, float %403, float %303)
+  %852 = insertelement <4 x float> undef, float %329, i32 0
+  %853 = insertelement <4 x float> %852, float %330, i32 1
+  %854 = insertelement <4 x float> %853, float %331, i32 2
+  %855 = insertelement <4 x float> %854, float 0.000000e+00, i32 3
+  %856 = insertelement <4 x float> undef, float %63, i32 0
+  %857 = insertelement <4 x float> %856, float %65, i32 1
+  %858 = insertelement <4 x float> %857, float %67, i32 2
+  %859 = insertelement <4 x float> %858, float 0.000000e+00, i32 3
+  %860 = call float @llvm.AMDGPU.dp4(<4 x float> %855, <4 x float> %859)
+  %861 = fcmp uge float 0x3FEB333340000000, %860
+  %862 = select i1 %861, float 0x3FEB333340000000, float %860
+  %863 = fmul float %8, %862
+  %864 = fmul float %13, %862
+  %865 = fmul float %18, %862
+  %866 = insertelement <4 x float> undef, float %34, i32 0
+  %867 = insertelement <4 x float> %866, float %35, i32 1
+  %868 = insertelement <4 x float> %867, float %36, i32 2
+  %869 = insertelement <4 x float> %868, float 0.000000e+00, i32 3
+  %870 = insertelement <4 x float> undef, float %63, i32 0
+  %871 = insertelement <4 x float> %870, float %65, i32 1
+  %872 = insertelement <4 x float> %871, float %67, i32 2
+  %873 = insertelement <4 x float> %872, float 0.000000e+00, i32 3
+  %874 = call float @llvm.AMDGPU.dp4(<4 x float> %869, <4 x float> %873)
+  %875 = fcmp uge float 0x3FECCCCCC0000000, %874
+  %876 = select i1 %875, float 0x3FECCCCCC0000000, float %874
+  %877 = fmul float %863, %876
+  %878 = fmul float %864, %876
+  %879 = fmul float %865, %876
+  br label %ENDIF175
+
+ENDIF175:                                         ; preds = %ENDIF172, %IF176
+  %temp84.5 = phi float [ %848, %IF176 ], [ %temp84.4, %ENDIF172 ]
+  %temp85.5 = phi float [ %849, %IF176 ], [ %temp85.4, %ENDIF172 ]
+  %temp86.5 = phi float [ %850, %IF176 ], [ %temp86.4, %ENDIF172 ]
+  %temp87.5 = phi float [ %851, %IF176 ], [ %temp87.4, %ENDIF172 ]
+  %temp92.11 = phi float [ %877, %IF176 ], [ %temp92.10, %ENDIF172 ]
+  %temp93.5 = phi float [ %878, %IF176 ], [ %temp93.4, %ENDIF172 ]
+  %temp94.5 = phi float [ %879, %IF176 ], [ %temp94.4, %ENDIF172 ]
+  %880 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %881 = extractelement <4 x float> %880, i32 0
+  %882 = fcmp olt float %881, %179
+  %883 = sext i1 %882 to i32
+  %884 = bitcast i32 %883 to float
+  %885 = bitcast float %884 to i32
+  %886 = icmp ne i32 %885, 0
+  br i1 %886, label %IF179, label %ENDIF178
+
+IF179:                                            ; preds = %ENDIF175
+  %887 = fadd float %202, 1.000000e+00
+  %888 = fadd float %202, 1.000000e+00
+  %889 = fadd float %202, 1.000000e+00
+  %890 = insertelement <4 x float> undef, float %43, i32 0
+  %891 = insertelement <4 x float> %890, float %44, i32 1
+  %892 = insertelement <4 x float> %891, float %45, i32 2
+  %893 = insertelement <4 x float> %892, float 0.000000e+00, i32 3
+  %894 = insertelement <4 x float> undef, float %43, i32 0
+  %895 = insertelement <4 x float> %894, float %44, i32 1
+  %896 = insertelement <4 x float> %895, float %45, i32 2
+  %897 = insertelement <4 x float> %896, float 0.000000e+00, i32 3
+  %898 = call float @llvm.AMDGPU.dp4(<4 x float> %893, <4 x float> %897)
+  %899 = call float @llvm.AMDGPU.rsq(float %898)
+  %900 = fmul float %45, %899
+  %901 = call float @fabs(float %900)
+  %902 = fmul float %176, 0x3FECCCCCC0000000
+  %903 = fadd float %902, %901
+  %904 = fadd float %903, 0xBFEFAE1480000000
+  %905 = fmul float %904, 0xC043FFFE20000000
+  %906 = call float @llvm.AMDIL.clamp.(float %905, float 0.000000e+00, float 1.000000e+00)
+  %907 = fmul float 2.000000e+00, %906
+  %908 = fsub float -0.000000e+00, %907
+  %909 = fadd float 3.000000e+00, %908
+  %910 = fmul float %906, %909
+  %911 = fmul float %906, %910
+  %912 = call float @llvm.AMDGPU.lrp(float %911, float %temp84.5, float %887)
+  %913 = call float @llvm.AMDGPU.lrp(float %911, float %temp85.5, float %888)
+  %914 = call float @llvm.AMDGPU.lrp(float %911, float %temp86.5, float %889)
+  %915 = call float @llvm.AMDGPU.lrp(float %911, float %temp87.5, float 0.000000e+00)
+  %916 = fmul float %202, 5.000000e-01
+  %917 = fcmp uge float 0x3FE4CCCCC0000000, %916
+  %918 = select i1 %917, float 0x3FE4CCCCC0000000, float %916
+  %919 = fcmp uge float %918, 0x3FE3333340000000
+  %920 = select i1 %919, float 0x3FE3333340000000, float %918
+  %921 = call float @llvm.AMDGPU.lrp(float %920, float %912, float %temp84.5)
+  %922 = call float @llvm.AMDGPU.lrp(float %920, float %913, float %temp85.5)
+  %923 = call float @llvm.AMDGPU.lrp(float %920, float %914, float %temp86.5)
+  %924 = call float @llvm.AMDGPU.lrp(float %920, float %915, float %temp87.5)
+  %925 = insertelement <4 x float> undef, float %329, i32 0
+  %926 = insertelement <4 x float> %925, float %330, i32 1
+  %927 = insertelement <4 x float> %926, float %331, i32 2
+  %928 = insertelement <4 x float> %927, float 0.000000e+00, i32 3
+  %929 = insertelement <4 x float> undef, float %63, i32 0
+  %930 = insertelement <4 x float> %929, float %65, i32 1
+  %931 = insertelement <4 x float> %930, float %67, i32 2
+  %932 = insertelement <4 x float> %931, float 0.000000e+00, i32 3
+  %933 = call float @llvm.AMDGPU.dp4(<4 x float> %928, <4 x float> %932)
+  %934 = fcmp uge float 0x3FE99999A0000000, %933
+  %935 = select i1 %934, float 0x3FE99999A0000000, float %933
+  %936 = fmul float %8, %935
+  %937 = fmul float %13, %935
+  %938 = fmul float %18, %935
+  %939 = insertelement <4 x float> undef, float %34, i32 0
+  %940 = insertelement <4 x float> %939, float %35, i32 1
+  %941 = insertelement <4 x float> %940, float %36, i32 2
+  %942 = insertelement <4 x float> %941, float 0.000000e+00, i32 3
+  %943 = insertelement <4 x float> undef, float %63, i32 0
+  %944 = insertelement <4 x float> %943, float %65, i32 1
+  %945 = insertelement <4 x float> %944, float %67, i32 2
+  %946 = insertelement <4 x float> %945, float 0.000000e+00, i32 3
+  %947 = call float @llvm.AMDGPU.dp4(<4 x float> %942, <4 x float> %946)
+  %948 = fcmp uge float 0x3FECCCCCC0000000, %947
+  %949 = select i1 %948, float 0x3FECCCCCC0000000, float %947
+  %950 = fmul float %936, %949
+  %951 = fmul float %937, %949
+  %952 = fmul float %938, %949
+  br label %ENDIF178
+
+ENDIF178:                                         ; preds = %ENDIF175, %IF179
+  %temp84.6 = phi float [ %921, %IF179 ], [ %temp84.5, %ENDIF175 ]
+  %temp85.6 = phi float [ %922, %IF179 ], [ %temp85.5, %ENDIF175 ]
+  %temp86.6 = phi float [ %923, %IF179 ], [ %temp86.5, %ENDIF175 ]
+  %temp87.6 = phi float [ %924, %IF179 ], [ %temp87.5, %ENDIF175 ]
+  %temp92.12 = phi float [ %950, %IF179 ], [ %temp92.11, %ENDIF175 ]
+  %temp93.6 = phi float [ %951, %IF179 ], [ %temp93.5, %ENDIF175 ]
+  %temp94.6 = phi float [ %952, %IF179 ], [ %temp94.5, %ENDIF175 ]
+  %953 = fmul float %55, %temp92.12
+  %954 = fmul float %57, %temp93.6
+  %955 = fmul float %59, %temp94.6
+  %956 = fmul float %61, 0.000000e+00
+  %957 = fmul float %temp84.6, %953
+  %958 = fmul float %temp85.6, %954
+  %959 = fmul float %temp86.6, %955
+  %960 = fmul float %temp87.6, %956
+  %961 = fmul float %2, -2.000000e+00
+  %962 = fadd float %961, 1.000000e+00
+  %963 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
+  %964 = extractelement <4 x float> %963, i32 2
+  %965 = fsub float -0.000000e+00, %964
+  %966 = fadd float %962, %965
+  %967 = fdiv float 1.000000e+00, %966
+  %968 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 24)
+  %969 = extractelement <4 x float> %968, i32 2
+  %970 = fmul float %969, %967
+  %971 = fsub float -0.000000e+00, %53
+  %972 = fmul float %971, %53
+  %973 = fmul float %972, %970
+  %974 = fmul float %973, %970
+  %975 = fmul float %974, 0x3FF7154760000000
+  %976 = call float @llvm.AMDIL.exp.(float %975)
+  %977 = fcmp oeq float %53, 1.000000e+00
+  %978 = sext i1 %977 to i32
+  %979 = bitcast i32 %978 to float
+  %980 = bitcast float %979 to i32
+  %981 = icmp ne i32 %980, 0
+  %.184 = select i1 %981, float 1.000000e+00, float %976
+  %982 = call float @llvm.AMDGPU.lrp(float %.184, float %957, float %47)
+  %983 = call float @llvm.AMDGPU.lrp(float %.184, float %958, float %49)
+  %984 = call float @llvm.AMDGPU.lrp(float %.184, float %959, float %51)
+  %985 = insertelement <4 x float> undef, float %982, i32 0
+  %986 = insertelement <4 x float> %985, float %983, i32 1
+  %987 = insertelement <4 x float> %986, float %984, i32 2
+  %988 = insertelement <4 x float> %987, float %960, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %988, i32 0, i32 0)
+  ret void
+}
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.rsq(float) #1
+
+; Function Attrs: readnone
+declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) #1
+
+; Function Attrs: readonly
+declare float @fabs(float) #2
+
+; Function Attrs: readnone
+declare float @llvm.AMDIL.exp.(float) #1
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.lrp(float, float, float) #1
+
+; Function Attrs: readnone
+declare float @llvm.AMDIL.clamp.(float, float, float) #1
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { readnone }
+attributes #2 = { readonly }
diff --git a/test/CodeGen/R600/bitcast.ll b/test/CodeGen/R600/bitcast.ll
new file mode 100644
index 0000000..bccc416
--- /dev/null
+++ b/test/CodeGen/R600/bitcast.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+
+; This test just checks that the compiler doesn't crash.
+; CHECK-LABEL: @v32i8_to_v8i32
+; CHECK: S_ENDPGM
+
+define void @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
+entry:
+  %1 = load <32 x i8> addrspace(2)* %0
+  %2 = bitcast <32 x i8> %1 to <8 x i32>
+  %3 = extractelement <8 x i32> %2, i32 1
+  %4 = icmp ne i32 %3, 0
+  %5 = select i1 %4, float 0.0, float 1.0
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %5, float %5, float %5, float %5)
+  ret void
+}
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+
diff --git a/test/CodeGen/R600/build_vector.ll b/test/CodeGen/R600/build_vector.ll
index 9b738a2..8179de1 100644
--- a/test/CodeGen/R600/build_vector.ll
+++ b/test/CodeGen/R600/build_vector.ll
@@ -1,14 +1,14 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
 ; R600-CHECK: @build_vector2
 ; R600-CHECK: MOV
 ; R600-CHECK: MOV
 ; R600-CHECK-NOT: MOV
 ; SI-CHECK: @build_vector2
-; SI-CHECK-DAG: V_MOV_B32_e32 [[X:VGPR[0-9]]], 5
-; SI-CHECK-DAG: V_MOV_B32_e32 [[Y:VGPR[0-9]]], 6
-; SI-CHECK: BUFFER_STORE_DWORDX2 [[X]]_[[Y]]
+; SI-CHECK-DAG: V_MOV_B32_e32 v[[X:[0-9]]], 5
+; SI-CHECK-DAG: V_MOV_B32_e32 v[[Y:[0-9]]], 6
+; SI-CHECK: BUFFER_STORE_DWORDX2 v{{\[}}[[X]]:[[Y]]{{\]}}
 define void @build_vector2 (<2 x i32> addrspace(1)* %out) {
 entry:
   store <2 x i32> <i32 5, i32 6>, <2 x i32> addrspace(1)* %out
@@ -22,11 +22,11 @@ entry:
 ; R600-CHECK: MOV
 ; R600-CHECK-NOT: MOV
 ; SI-CHECK: @build_vector4
-; SI-CHECK-DAG: V_MOV_B32_e32 [[X:VGPR[0-9]]], 5
-; SI-CHECK-DAG: V_MOV_B32_e32 [[Y:VGPR[0-9]]], 6
-; SI-CHECK-DAG: V_MOV_B32_e32 [[Z:VGPR[0-9]]], 7
-; SI-CHECK-DAG: V_MOV_B32_e32 [[W:VGPR[0-9]]], 8
-; SI-CHECK: BUFFER_STORE_DWORDX4 [[X]]_[[Y]]_[[Z]]_[[W]]
+; SI-CHECK-DAG: V_MOV_B32_e32 v[[X:[0-9]]], 5
+; SI-CHECK-DAG: V_MOV_B32_e32 v[[Y:[0-9]]], 6
+; SI-CHECK-DAG: V_MOV_B32_e32 v[[Z:[0-9]]], 7
+; SI-CHECK-DAG: V_MOV_B32_e32 v[[W:[0-9]]], 8
+; SI-CHECK: BUFFER_STORE_DWORDX4 v{{\[}}[[X]]:[[W]]{{\]}}
 define void @build_vector4 (<4 x i32> addrspace(1)* %out) {
 entry:
   store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> addrspace(1)* %out
diff --git a/test/CodeGen/R600/combine_vloads.ll b/test/CodeGen/R600/combine_vloads.ll
new file mode 100644
index 0000000..f8ec712
--- /dev/null
+++ b/test/CodeGen/R600/combine_vloads.ll
@@ -0,0 +1,42 @@
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
+
+;
+; kernel void combine_vloads(global char8* src, global char8* result) {
+;   for (int i = 0; i < 1024; ++i)
+;     result[i] = src[0] + src[1] + src[2] + src[3];
+; }
+;
+
+
+; 128-bit loads instead of many 8-bit
+; EG-LABEL: @combine_vloads:
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @combine_vloads(<8 x i8> addrspace(1)* nocapture %src, <8 x i8> addrspace(1)* nocapture %result) nounwind {
+entry:
+  br label %for.body
+
+for.exit:                                         ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.01 = phi i32 [ 0, %entry ], [ %tmp19, %for.body ]
+  %arrayidx_v4 = bitcast <8 x i8> addrspace(1)* %src to <32 x i8> addrspace(1)*
+  %0 = bitcast <32 x i8> addrspace(1)* %arrayidx_v4 to <8 x i32> addrspace(1)*
+  %vecload2 = load <8 x i32> addrspace(1)* %0, align 32
+  %1 = bitcast <8 x i32> %vecload2 to <32 x i8>
+  %tmp5 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %tmp8 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %tmp9 = add nsw <8 x i8> %tmp5, %tmp8
+  %tmp12 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  %tmp13 = add nsw <8 x i8> %tmp9, %tmp12
+  %tmp16 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  %tmp17 = add nsw <8 x i8> %tmp13, %tmp16
+  %scevgep = getelementptr <8 x i8> addrspace(1)* %result, i32 %i.01
+  %2 = bitcast <8 x i8> %tmp17 to <2 x i32>
+  %3 = bitcast <8 x i8> addrspace(1)* %scevgep to <2 x i32> addrspace(1)*
+  store <2 x i32> %2, <2 x i32> addrspace(1)* %3, align 8
+  %tmp19 = add nsw i32 %i.01, 1
+  %exitcond = icmp eq i32 %tmp19, 1024
+  br i1 %exitcond, label %for.exit, label %for.body
+}
diff --git a/test/CodeGen/R600/complex-folding.ll b/test/CodeGen/R600/complex-folding.ll
new file mode 100644
index 0000000..99f0d99
--- /dev/null
+++ b/test/CodeGen/R600/complex-folding.ll
@@ -0,0 +1,19 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @main
+; CHECK-NOT: MOV
+define void @main(<4 x float> inreg %reg0) #0 {
+entry:
+  %0 = extractelement <4 x float> %reg0, i32 0
+  %1 = call float @fabs(float %0)
+  %2 = fptoui float %1 to i32
+  %3 = bitcast i32 %2 to float
+  %4 = insertelement <4 x float> undef, float %3, i32 0
+  call void @llvm.R600.store.swizzle(<4 x float> %4, i32 0, i32 0)
+  ret void
+}
+
+declare float @fabs(float ) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="0" }
+\ No newline at end of file
diff --git a/test/CodeGen/R600/elf.ll b/test/CodeGen/R600/elf.ll
index f460f13..9385150 100644
--- a/test/CodeGen/R600/elf.ll
+++ b/test/CodeGen/R600/elf.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -filetype=obj | llvm-readobj -s - | FileCheck --check-prefix=ELF-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=SI -o - | FileCheck --check-prefix=CONFIG-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s - | FileCheck --check-prefix=ELF-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG-CHECK %s
 
 ; ELF-CHECK: Format: ELF32
 ; ELF-CHECK: Name: .AMDGPU.config
diff --git a/test/CodeGen/R600/extload.ll b/test/CodeGen/R600/extload.ll
new file mode 100644
index 0000000..aa660b3
--- /dev/null
+++ b/test/CodeGen/R600/extload.ll
@@ -0,0 +1,51 @@
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
+
+; EG-LABEL: @anyext_load_i8:
+; EG: AND_INT
+; EG-NEXT: 255
+define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
+  %cast = bitcast i8 addrspace(1)* %src to i32 addrspace(1)*
+  %load = load i32 addrspace(1)* %cast, align 1
+  %x = bitcast i32 %load to <4 x i8>
+  %castOut = bitcast i8 addrspace(1)* %out to <4 x i8> addrspace(1)*
+  store <4 x i8> %x, <4 x i8> addrspace(1)* %castOut, align 1
+  ret void
+}
+
+; EG-LABEL: @anyext_load_i16:
+; EG: AND_INT
+; EG: LSHL
+; EG: 65535
+define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrspace(1)* nocapture noalias %src) nounwind {
+  %cast = bitcast i16 addrspace(1)* %src to i32 addrspace(1)*
+  %load = load i32 addrspace(1)* %cast, align 1
+  %x = bitcast i32 %load to <2 x i16>
+  %castOut = bitcast i16 addrspace(1)* %out to <2 x i16> addrspace(1)*
+  store <2 x i16> %x, <2 x i16> addrspace(1)* %castOut, align 1
+  ret void
+}
+
+; EG-LABEL: @anyext_load_lds_i8:
+; EG: AND_INT
+; EG-NEXT: 255
+define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
+  %cast = bitcast i8 addrspace(3)* %src to i32 addrspace(3)*
+  %load = load i32 addrspace(3)* %cast, align 1
+  %x = bitcast i32 %load to <4 x i8>
+  %castOut = bitcast i8 addrspace(3)* %out to <4 x i8> addrspace(3)*
+  store <4 x i8> %x, <4 x i8> addrspace(3)* %castOut, align 1
+  ret void
+}
+
+; EG-LABEL: @anyext_load_lds_i16:
+; EG: AND_INT
+; EG: LSHL
+; EG: 65535
+define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 addrspace(3)* nocapture noalias %src) nounwind {
+  %cast = bitcast i16 addrspace(3)* %src to i32 addrspace(3)*
+  %load = load i32 addrspace(3)* %cast, align 1
+  %x = bitcast i32 %load to <2 x i16>
+  %castOut = bitcast i16 addrspace(3)* %out to <2 x i16> addrspace(3)*
+  store <2 x i16> %x, <2 x i16> addrspace(3)* %castOut, align 1
+  ret void
+}
diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll
index 78ffd57..a5f5df9 100644
--- a/test/CodeGen/R600/fabs.ll
+++ b/test/CodeGen/R600/fabs.ll
@@ -1,15 +1,15 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
 ; DAGCombiner will transform:
 ; (fabs (f32 bitcast (i32 a))) => (f32 bitcast (and (i32 a), 0x7FFFFFFF))
 ; unless isFabsFree returns true
 
-; R600-CHECK: @fabs_free
+; R600-CHECK-LABEL: @fabs_free
 ; R600-CHECK-NOT: AND
 ; R600-CHECK: |PV.{{[XYZW]}}|
-; SI-CHECK: @fabs_free
-; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 0, 1, 0, 0, 0
+; SI-CHECK-LABEL: @fabs_free
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
 
 define void @fabs_free(float addrspace(1)* %out, i32 %in) {
 entry:
@@ -19,4 +19,36 @@ entry:
   ret void
 }
 
+; R600-CHECK-LABEL: @fabs_v2
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; SI-CHECK-LABEL: @fabs_v2
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+define void @fabs_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+entry:
+  %0 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @fabs_v4
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; SI-CHECK-LABEL: @fabs_v4
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+define void @fabs_v4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+entry:
+  %0 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
+  store <4 x float> %0, <4 x float> addrspace(1)* %out
+  ret void
+}
+
 declare float @fabs(float ) readnone
+declare <2 x float> @llvm.fabs.v2f32(<2 x float> ) readnone
+declare <4 x float> @llvm.fabs.v4f32(<4 x float> ) readnone
diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/R600/fadd.ll
index 97dbe44..f467bb7 100644
--- a/test/CodeGen/R600/fadd.ll
+++ b/test/CodeGen/R600/fadd.ll
@@ -1,23 +1,23 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
-; CHECK: @fadd_f32
-; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-define void @fadd_f32() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
-   %r1 = call float @llvm.R600.load.input(i32 1)
-   %r2 = fadd float %r0, %r1
-   call void @llvm.AMDGPU.store.output(float %r2, i32 0)
+; R600-CHECK: @fadd_f32
+; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
+; SI-CHECK: @fadd_f32
+; SI-CHECK: V_ADD_F32
+define void @fadd_f32(float addrspace(1)* %out, float %a, float %b) {
+entry:
+   %0 = fadd float %a, %b
+   store float %0, float addrspace(1)* %out
    ret void
 }
 
-declare float @llvm.R600.load.input(i32) readnone
-
-declare void @llvm.AMDGPU.store.output(float, i32)
-
-; CHECK: @fadd_v2f32
-; CHECK-DAG: ADD * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
-; CHECK-DAG: ADD * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
+; R600-CHECK: @fadd_v2f32
+; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
+; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
+; SI-CHECK: @fadd_v2f32
+; SI-CHECK: V_ADD_F32
+; SI-CHECK: V_ADD_F32
 define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
 entry:
   %0 = fadd <2 x float> %a, %b
@@ -25,12 +25,16 @@ entry:
   ret void
 }
 
-; CHECK: @fadd_v4f32
-; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
+; R600-CHECK: @fadd_v4f32
+; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; SI-CHECK: @fadd_v4f32
+; SI-CHECK: V_ADD_F32
+; SI-CHECK: V_ADD_F32
+; SI-CHECK: V_ADD_F32
+; SI-CHECK: V_ADD_F32
 define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
   %a = load <4 x float> addrspace(1) * %in
diff --git a/test/CodeGen/R600/fadd64.ll b/test/CodeGen/R600/fadd64.ll
index 130302f..48cd3cf 100644
--- a/test/CodeGen/R600/fadd64.ll
+++ b/test/CodeGen/R600/fadd64.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
 
 ; CHECK: @fadd_f64
-; CHECK: V_ADD_F64 {{VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+}}
+; CHECK: V_ADD_F64 {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}
 
 define void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                       double addrspace(1)* %in2) {
diff --git a/test/CodeGen/R600/fcmp-cnd.ll b/test/CodeGen/R600/fcmp-cnd.ll
index 7373a21..1d4e323 100644
--- a/test/CodeGen/R600/fcmp-cnd.ll
+++ b/test/CodeGen/R600/fcmp-cnd.ll
@@ -2,7 +2,7 @@
 
 ;Not checking arguments 2 and 3 to CNDE, because they may change between
 ;registers and literal.x depending on what the optimizer does.
-;CHECK: CNDE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: CNDE  T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
 entry:
diff --git a/test/CodeGen/R600/fcmp.ll b/test/CodeGen/R600/fcmp.ll
index dc3a779..c76a758 100644
--- a/test/CodeGen/R600/fcmp.ll
+++ b/test/CodeGen/R600/fcmp.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
 ; CHECK: @fcmp_sext
-; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: SETE_DX10  T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @fcmp_sext(i32 addrspace(1)* %out, float addrspace(1)* %in) {
 entry:
diff --git a/test/CodeGen/R600/fcmp64.ll b/test/CodeGen/R600/fcmp64.ll
index 8f2513b..bcc7a8c 100644
--- a/test/CodeGen/R600/fcmp64.ll
+++ b/test/CodeGen/R600/fcmp64.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
 
 ; CHECK: @flt_f64
-; CHECK: V_CMP_LT_F64_e64 {{SGPR[0-9]+_SGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+}}
+; CHECK: V_CMP_LT_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 
 define void @flt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
@@ -14,7 +14,7 @@ define void @flt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 }
 
 ; CHECK: @fle_f64
-; CHECK: V_CMP_LE_F64_e64 {{SGPR[0-9]+_SGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+}}
+; CHECK: V_CMP_LE_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 
 define void @fle_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
@@ -27,7 +27,7 @@ define void @fle_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 }
 
 ; CHECK: @fgt_f64
-; CHECK: V_CMP_GT_F64_e64 {{SGPR[0-9]+_SGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+}}
+; CHECK: V_CMP_GT_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 
 define void @fgt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
@@ -40,7 +40,7 @@ define void @fgt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 }
 
 ; CHECK: @fge_f64
-; CHECK: V_CMP_GE_F64_e64 {{SGPR[0-9]+_SGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+}}
+; CHECK: V_CMP_GE_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 
 define void @fge_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
@@ -53,7 +53,7 @@ define void @fge_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 }
 
 ; CHECK: @fne_f64
-; CHECK: V_CMP_NEQ_F64_e64 {{SGPR[0-9]+_SGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+}}
+; CHECK: V_CMP_NEQ_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 
 define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
@@ -66,7 +66,7 @@ define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
 }
 
 ; CHECK: @feq_f64
-; CHECK: V_CMP_EQ_F64_e64 {{SGPR[0-9]+_SGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+}}
+; CHECK: V_CMP_EQ_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 
 define void @feq_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
diff --git a/test/CodeGen/R600/fconst64.ll b/test/CodeGen/R600/fconst64.ll
index 2402a9c..5c5ee7e 100644
--- a/test/CodeGen/R600/fconst64.ll
+++ b/test/CodeGen/R600/fconst64.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
 
 ; CHECK: @fconst_f64
-; CHECK: V_MOV_B32_e32 {{VGPR[0-9]+}}, 0.000000e+00
-; CHECK-NEXT: V_MOV_B32_e32 {{VGPR[0-9]+}}, 2.312500e+00
+; CHECK: V_MOV_B32_e32 {{v[0-9]+}}, 0.000000e+00
+; CHECK-NEXT: V_MOV_B32_e32 {{v[0-9]+}}, 2.312500e+00
 
 define void @fconst_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
    %r1 = load double addrspace(1)* %in
diff --git a/test/CodeGen/R600/fdiv.ll b/test/CodeGen/R600/fdiv.ll
index 6798eac..3d21524 100644
--- a/test/CodeGen/R600/fdiv.ll
+++ b/test/CodeGen/R600/fdiv.ll
@@ -1,14 +1,20 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
 ; These tests check that fdiv is expanded correctly and also test that the
 ; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate
 ; instruction groups.
 
-; CHECK: @fdiv_v2f32
-; CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
-; CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
-; CHECK-DAG: MUL_IEEE T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
-; CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
+; R600-CHECK: @fdiv_v2f32
+; R600-CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
+; R600-CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
+; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
+; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
+; SI-CHECK: @fdiv_v2f32
+; SI-CHECK-DAG: V_RCP_F32
+; SI-CHECK-DAG: V_MUL_F32
+; SI-CHECK-DAG: V_RCP_F32
+; SI-CHECK-DAG: V_MUL_F32
 define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
 entry:
   %0 = fdiv <2 x float> %a, %b
@@ -16,16 +22,24 @@ entry:
   ret void
 }
 
-; CHECK: @fdiv_v4f32
-; CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK-DAG: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
-; CHECK-DAG: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
-; CHECK-DAG: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
-; CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
-
+; R600-CHECK: @fdiv_v4f32
+; R600-CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
+; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
+; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
+; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
+; SI-CHECK: @fdiv_v4f32
+; SI-CHECK-DAG: V_RCP_F32
+; SI-CHECK-DAG: V_MUL_F32
+; SI-CHECK-DAG: V_RCP_F32
+; SI-CHECK-DAG: V_MUL_F32
+; SI-CHECK-DAG: V_RCP_F32
+; SI-CHECK-DAG: V_MUL_F32
+; SI-CHECK-DAG: V_RCP_F32
+; SI-CHECK-DAG: V_MUL_F32
 define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
   %a = load <4 x float> addrspace(1) * %in
diff --git a/test/CodeGen/R600/fdiv64.ll b/test/CodeGen/R600/fdiv64.ll
index 76c5ca3..79b5c8b 100644
--- a/test/CodeGen/R600/fdiv64.ll
+++ b/test/CodeGen/R600/fdiv64.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
 
 ; CHECK: @fdiv_f64
-; CHECK: V_RCP_F64_e32 {{VGPR[0-9]+_VGPR[0-9]+}}
-; CHECK: V_MUL_F64 {{VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+}}
+; CHECK: V_RCP_F64_e32 {{v\[[0-9]+:[0-9]+\]}}
+; CHECK: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
 
 define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                       double addrspace(1)* %in2) {
diff --git a/test/CodeGen/R600/floor.ll b/test/CodeGen/R600/floor.ll
index 877d69a..67e86c4 100644
--- a/test/CodeGen/R600/floor.ll
+++ b/test/CodeGen/R600/floor.ll
@@ -2,15 +2,15 @@
 
 ;CHECK: FLOOR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @test() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
+define void @test(<4 x float> inreg %reg0) #0 {
+   %r0 = extractelement <4 x float> %reg0, i32 0
    %r1 = call float @floor(float %r0)
-   call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+   %vec = insertelement <4 x float> undef, float %r1, i32 0
+   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
    ret void
 }
 
-declare float @llvm.R600.load.input(i32) readnone
-
-declare void @llvm.AMDGPU.store.output(float, i32)
-
 declare float @floor(float) readonly
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="0" }
+\ No newline at end of file
diff --git a/test/CodeGen/R600/fma.ll b/test/CodeGen/R600/fma.ll
new file mode 100644
index 0000000..51e9d29
--- /dev/null
+++ b/test/CodeGen/R600/fma.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+
+; CHECK: @fma_f32
+; CHECK: V_FMA_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
+
+define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
+                     float addrspace(1)* %in2, float addrspace(1)* %in3) {
+   %r0 = load float addrspace(1)* %in1
+   %r1 = load float addrspace(1)* %in2
+   %r2 = load float addrspace(1)* %in3
+   %r3 = tail call float @llvm.fma.f32(float %r0, float %r1, float %r2)
+   store float %r3, float addrspace(1)* %out
+   ret void
+}
+
+declare float @llvm.fma.f32(float, float, float)
+
+; CHECK: @fma_f64
+; CHECK: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+
+define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
+   %r0 = load double addrspace(1)* %in1
+   %r1 = load double addrspace(1)* %in2
+   %r2 = load double addrspace(1)* %in3
+   %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2)
+   store double %r3, double addrspace(1)* %out
+   ret void
+}
+
+declare double @llvm.fma.f64(double, double, double)
diff --git a/test/CodeGen/R600/fmad.ll b/test/CodeGen/R600/fmad.ll
index 75e65d8..935e351 100644
--- a/test/CodeGen/R600/fmad.ll
+++ b/test/CodeGen/R600/fmad.ll
@@ -2,18 +2,18 @@
 
 ;CHECK: MULADD_IEEE * {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @test() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
-   %r1 = call float @llvm.R600.load.input(i32 1)
-   %r2 = call float @llvm.R600.load.input(i32 2)
+define void @test(<4 x float> inreg %reg0) #0 {
+   %r0 = extractelement <4 x float> %reg0, i32 0
+   %r1 = extractelement <4 x float> %reg0, i32 1
+   %r2 = extractelement <4 x float> %reg0, i32 2
    %r3 = fmul float %r0, %r1
-	%r4 = fadd float %r3, %r2
-   call void @llvm.AMDGPU.store.output(float %r4, i32 0)
+   %r4 = fadd float %r3, %r2
+   %vec = insertelement <4 x float> undef, float %r4, i32 0
+   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
    ret void
 }
 
-declare float @llvm.R600.load.input(i32) readnone
-
-declare void @llvm.AMDGPU.store.output(float, i32)
-
 declare float @fabs(float ) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="0" }
+\ No newline at end of file
diff --git a/test/CodeGen/R600/fmax.ll b/test/CodeGen/R600/fmax.ll
index 8b704e5..d7127f4 100644
--- a/test/CodeGen/R600/fmax.ll
+++ b/test/CodeGen/R600/fmax.ll
@@ -2,15 +2,16 @@
 
 ;CHECK: MAX * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @test() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
-   %r1 = call float @llvm.R600.load.input(i32 1)
-   %r2 = fcmp uge float %r0, %r1
+define void @test(<4 x float> inreg %reg0) #0 {
+   %r0 = extractelement <4 x float> %reg0, i32 0
+   %r1 = extractelement <4 x float> %reg0, i32 1
+   %r2 = fcmp oge float %r0, %r1
    %r3 = select i1 %r2, float %r0, float %r1
-   call void @llvm.AMDGPU.store.output(float %r3, i32 0)
+   %vec = insertelement <4 x float> undef, float %r3, i32 0
+   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
    ret void
 }
 
-declare float @llvm.R600.load.input(i32) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
 
-declare void @llvm.AMDGPU.store.output(float, i32)
+attributes #0 = { "ShaderType"="0" }
+\ No newline at end of file
diff --git a/test/CodeGen/R600/fmin.ll b/test/CodeGen/R600/fmin.ll
index 5e34b7c..defa8c0 100644
--- a/test/CodeGen/R600/fmin.ll
+++ b/test/CodeGen/R600/fmin.ll
@@ -2,15 +2,16 @@
 
 ;CHECK: MIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @test() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
-   %r1 = call float @llvm.R600.load.input(i32 1)
+define void @test(<4 x float> inreg %reg0) #0 {
+   %r0 = extractelement <4 x float> %reg0, i32 0
+   %r1 = extractelement <4 x float> %reg0, i32 1
    %r2 = fcmp uge float %r0, %r1
    %r3 = select i1 %r2, float %r1, float %r0
-   call void @llvm.AMDGPU.store.output(float %r3, i32 0)
+   %vec = insertelement <4 x float> undef, float %r3, i32 0
+   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
    ret void
 }
 
-declare float @llvm.R600.load.input(i32) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
 
-declare void @llvm.AMDGPU.store.output(float, i32)
+attributes #0 = { "ShaderType"="0" }
+\ No newline at end of file
diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/R600/fmul.ll
index 6ef3a11..2a7825f 100644
--- a/test/CodeGen/R600/fmul.ll
+++ b/test/CodeGen/R600/fmul.ll
@@ -1,23 +1,27 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
-; CHECK: @fmul_f32
-; CHECK: MUL_IEEE * {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-define void @fmul_f32() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
-   %r1 = call float @llvm.R600.load.input(i32 1)
-   %r2 = fmul float %r0, %r1
-   call void @llvm.AMDGPU.store.output(float %r2, i32 0)
-   ret void
+; R600-CHECK: @fmul_f32
+; R600-CHECK: MUL_IEEE {{\** *}}{{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
+; SI-CHECK: @fmul_f32
+; SI-CHECK: V_MUL_F32
+define void @fmul_f32(float addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fmul float %a, %b
+  store float %0, float addrspace(1)* %out
+  ret void
 }
 
 declare float @llvm.R600.load.input(i32) readnone
 
 declare void @llvm.AMDGPU.store.output(float, i32)
 
-; CHECK: @fmul_v2f32
-; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW]}}
-; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW]}}
+; R600-CHECK: @fmul_v2f32
+; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
+; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
+; SI-CHECK: @fmul_v2f32
+; SI-CHECK: V_MUL_F32
+; SI-CHECK: V_MUL_F32
 define void @fmul_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
 entry:
   %0 = fmul <2 x float> %a, %b
@@ -25,12 +29,16 @@ entry:
   ret void
 }
 
-; CHECK: @fmul_v4f32
-; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
+; R600-CHECK: @fmul_v4f32
+; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; SI-CHECK: @fmul_v4f32
+; SI-CHECK: V_MUL_F32
+; SI-CHECK: V_MUL_F32
+; SI-CHECK: V_MUL_F32
+; SI-CHECK: V_MUL_F32
 define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
   %a = load <4 x float> addrspace(1) * %in
diff --git a/test/CodeGen/R600/fmul.v4f32.ll b/test/CodeGen/R600/fmul.v4f32.ll
deleted file mode 100644
index 74a58f7..0000000
--- a/test/CodeGen/R600/fmul.v4f32.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-  %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
-  %a = load <4 x float> addrspace(1) * %in
-  %b = load <4 x float> addrspace(1) * %b_ptr
-  %result = fmul <4 x float> %a, %b
-  store <4 x float> %result, <4 x float> addrspace(1)* %out
-  ret void
-}
diff --git a/test/CodeGen/R600/fmul64.ll b/test/CodeGen/R600/fmul64.ll
index 8a57d4a..7c7bf04 100644
--- a/test/CodeGen/R600/fmul64.ll
+++ b/test/CodeGen/R600/fmul64.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
 
 ; CHECK: @fmul_f64
-; CHECK: V_MUL_F64 {{VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+}}
+; CHECK: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 
 define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                       double addrspace(1)* %in2) {
diff --git a/test/CodeGen/R600/fmuladd.ll b/test/CodeGen/R600/fmuladd.ll
new file mode 100644
index 0000000..48944f6
--- /dev/null
+++ b/test/CodeGen/R600/fmuladd.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+
+; CHECK: @fmuladd_f32
+; CHECK: V_MAD_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
+
+define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
+                         float addrspace(1)* %in2, float addrspace(1)* %in3) {
+   %r0 = load float addrspace(1)* %in1
+   %r1 = load float addrspace(1)* %in2
+   %r2 = load float addrspace(1)* %in3
+   %r3 = tail call float @llvm.fmuladd.f32(float %r0, float %r1, float %r2)
+   store float %r3, float addrspace(1)* %out
+   ret void
+}
+
+declare float @llvm.fmuladd.f32(float, float, float)
+
+; CHECK: @fmuladd_f64
+; CHECK: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+
+define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                         double addrspace(1)* %in2, double addrspace(1)* %in3) {
+   %r0 = load double addrspace(1)* %in1
+   %r1 = load double addrspace(1)* %in2
+   %r2 = load double addrspace(1)* %in3
+   %r3 = tail call double @llvm.fmuladd.f64(double %r0, double %r1, double %r2)
+   store double %r3, double addrspace(1)* %out
+   ret void
+}
+
+declare double @llvm.fmuladd.f64(double, double, double)
diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll
index 799db0c..9446aa8 100644
--- a/test/CodeGen/R600/fneg.ll
+++ b/test/CodeGen/R600/fneg.ll
@@ -1,8 +1,23 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
-; CHECK: @fneg_v2
-; CHECK: -PV
-; CHECK: -PV
+; R600-CHECK-LABEL: @fneg
+; R600-CHECK: -PV
+; SI-CHECK-LABEL: @fneg
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
+define void @fneg(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = fsub float -0.000000e+00, %in
+  store float %0, float addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @fneg_v2
+; R600-CHECK: -PV
+; R600-CHECK: -PV
+; SI-CHECK-LABEL: @fneg_v2
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
 define void @fneg_v2(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) {
 entry:
   %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %in
@@ -10,11 +25,16 @@ entry:
   ret void
 }
 
-; CHECK: @fneg_v4
-; CHECK: -PV
-; CHECK: -PV
-; CHECK: -PV
-; CHECK: -PV
+; R600-CHECK-LABEL: @fneg_v4
+; R600-CHECK: -PV
+; R600-CHECK: -T
+; R600-CHECK: -PV
+; R600-CHECK: -PV
+; SI-CHECK-LABEL: @fneg_v4
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
 define void @fneg_v4(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {
 entry:
   %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in
@@ -26,9 +46,12 @@ entry:
 ; (fneg (f32 bitcast (i32 a))) => (f32 bitcast (xor (i32 a), 0x80000000))
 ; unless the target returns true for isNegFree()
 
-; CHECK-NOT: XOR
-; CHECK: -KC0[2].Z
-
+; R600-CHECK-LABEL: @fneg_free
+; R600-CHECK-NOT: XOR
+; R600-CHECK: -KC0[2].Z
+; SI-CHECK-LABEL: @fneg_free
+; XXX: We could use V_ADD_F32_e64 with the negate bit here instead.
+; SI-CHECK: V_SUB_F32_e64 v{{[0-9]}}, 0.000000e+00, s{{[0-9]}}, 0, 0, 0, 0
 define void @fneg_free(float addrspace(1)* %out, i32 %in) {
 entry:
   %0 = bitcast i32 %in to float
diff --git a/test/CodeGen/R600/fp64_to_sint.ll b/test/CodeGen/R600/fp64_to_sint.ll
new file mode 100644
index 0000000..185e21c
--- /dev/null
+++ b/test/CodeGen/R600/fp64_to_sint.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
+
+; CHECK: @fp64_to_sint
+; CHECK: V_CVT_I32_F64_e32
+define void @fp64_to_sint(i32 addrspace(1)* %out, double %in) {
+  %result = fptosi double %in to i32
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/R600/fp_to_sint.ll
index 6471270..8302b4f 100644
--- a/test/CodeGen/R600/fp_to_sint.ll
+++ b/test/CodeGen/R600/fp_to_sint.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
 ; R600-CHECK: @fp_to_sint_v2i32
-; R600-CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; R600-CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 ; SI-CHECK: @fp_to_sint_v2i32
 ; SI-CHECK: V_CVT_I32_F32_e32
 ; SI-CHECK: V_CVT_I32_F32_e32
@@ -14,10 +14,10 @@ define void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
 }
 
 ; R600-CHECK: @fp_to_sint_v4i32
-; R600-CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; R600-CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; R600-CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; R600-CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW]}}
+; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 ; SI-CHECK: @fp_to_sint_v4i32
 ; SI-CHECK: V_CVT_I32_F32_e32
 ; SI-CHECK: V_CVT_I32_F32_e32
diff --git a/test/CodeGen/R600/fp_to_uint.ll b/test/CodeGen/R600/fp_to_uint.ll
index 2a365f9..77db43b 100644
--- a/test/CodeGen/R600/fp_to_uint.ll
+++ b/test/CodeGen/R600/fp_to_uint.ll
@@ -1,8 +1,12 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
-; CHECK: @fp_to_uint_v2i32
-; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; R600-CHECK: @fp_to_uint_v2i32
+; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; SI-CHECK: @fp_to_uint_v2i32
+; SI-CHECK: V_CVT_U32_F32_e32
+; SI-CHECK: V_CVT_U32_F32_e32
 
 define void @fp_to_uint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
   %result = fptoui <2 x float> %in to <2 x i32>
@@ -10,11 +14,16 @@ define void @fp_to_uint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
   ret void
 }
 
-; CHECK: @fp_to_uint_v4i32
-; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; R600-CHECK: @fp_to_uint_v4i32
+; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; SI-CHECK: @fp_to_uint_v4i32
+; SI-CHECK: V_CVT_U32_F32_e32
+; SI-CHECK: V_CVT_U32_F32_e32
+; SI-CHECK: V_CVT_U32_F32_e32
+; SI-CHECK: V_CVT_U32_F32_e32
 
 define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %value = load <4 x float> addrspace(1) * %in
diff --git a/test/CodeGen/R600/fpext.ll b/test/CodeGen/R600/fpext.ll
new file mode 100644
index 0000000..143ee79
--- /dev/null
+++ b/test/CodeGen/R600/fpext.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
+
+; CHECK: @fpext
+; CHECK: V_CVT_F64_F32_e32
+define void @fpext(double addrspace(1)* %out, float %in) {
+  %result = fpext float %in to double
+  store double %result, double addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fptrunc.ll b/test/CodeGen/R600/fptrunc.ll
new file mode 100644
index 0000000..20a8c00
--- /dev/null
+++ b/test/CodeGen/R600/fptrunc.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
+
+; CHECK: @fptrunc
+; CHECK: V_CVT_F32_F64_e32
+define void @fptrunc(float addrspace(1)* %out, double %in) {
+  %result = fptrunc double %in to float
+  store float %result, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fsqrt.ll b/test/CodeGen/R600/fsqrt.ll
index 2613805..ae50b17 100644
--- a/test/CodeGen/R600/fsqrt.ll
+++ b/test/CodeGen/R600/fsqrt.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
 
 ; CHECK: @fsqrt_f32
-; CHECK: V_SQRT_F32_e32 {{VGPR[0-9]+, VGPR[0-9]+}}
+; CHECK: V_SQRT_F32_e32 {{v[0-9]+, v[0-9]+}}
 
 define void @fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
    %r0 = load float addrspace(1)* %in
@@ -11,7 +11,7 @@ define void @fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
 }
 
 ; CHECK: @fsqrt_f64
-; CHECK: V_SQRT_F64_e32 {{VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+}}
+; CHECK: V_SQRT_F64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 
 define void @fsqrt_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
    %r0 = load double addrspace(1)* %in
diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/R600/fsub.ll
index 0fc5860..4f74efb 100644
--- a/test/CodeGen/R600/fsub.ll
+++ b/test/CodeGen/R600/fsub.ll
@@ -1,23 +1,27 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
-; CHECK: @fsub_f32
-; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
-
-define void @fsub_f32() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
-   %r1 = call float @llvm.R600.load.input(i32 1)
-   %r2 = fsub float %r0, %r1
-   call void @llvm.AMDGPU.store.output(float %r2, i32 0)
-   ret void
+; R600-CHECK: @fsub_f32
+; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, -KC0[2].W
+; SI-CHECK: @fsub_f32
+; SI-CHECK: V_SUB_F32
+define void @fsub_f32(float addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fsub float %a, %b
+  store float %0, float addrspace(1)* %out
+  ret void
 }
 
 declare float @llvm.R600.load.input(i32) readnone
 
 declare void @llvm.AMDGPU.store.output(float, i32)
 
-; CHECK: @fsub_v2f32
-; CHECK-DAG: ADD * T{{[0-9]+\.[XYZW]}}, KC0[3].X, -KC0[3].Z
-; CHECK-DAG: ADD * T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y
+; R600-CHECK: @fsub_v2f32
+; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, -KC0[3].Z
+; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y
+; SI-CHECK: @fsub_v2f32
+; SI-CHECK: V_SUB_F32
+; SI-CHECK: V_SUB_F32
 define void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
 entry:
   %0 = fsub <2 x float> %a, %b
@@ -25,11 +29,16 @@ entry:
   ret void
 }
 
-; CHECK: @fsub_v4f32
-; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
-; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
-; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
-; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
+; R600-CHECK: @fsub_v4f32
+; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
+; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
+; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
+; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
+; SI-CHECK: @fsub_v4f32
+; SI-CHECK: V_SUB_F32
+; SI-CHECK: V_SUB_F32
+; SI-CHECK: V_SUB_F32
+; SI-CHECK: V_SUB_F32
 define void @fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
   %a = load <4 x float> addrspace(1) * %in
diff --git a/test/CodeGen/R600/fsub64.ll b/test/CodeGen/R600/fsub64.ll
index fa59dcc..1445a20 100644
--- a/test/CodeGen/R600/fsub64.ll
+++ b/test/CodeGen/R600/fsub64.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
 
 ; CHECK: @fsub_f64
-; CHECK: V_ADD_F64 {{VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+}}, 0, 0, 0, 0, 2
+; CHECK: V_ADD_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}, 0, 0, 0, 0, 2
 
 define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                       double addrspace(1)* %in2) {
diff --git a/test/CodeGen/R600/gep-address-space.ll b/test/CodeGen/R600/gep-address-space.ll
new file mode 100644
index 0000000..4ea21dd
--- /dev/null
+++ b/test/CodeGen/R600/gep-address-space.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s
+
+define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
+; CHECK-LABEL @use_gep_address_space:
+; CHECK: S_ADD_I32
+  %p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16
+  store i32 99, i32 addrspace(3)* %p
+  ret void
+}
+
+define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind {
+; CHECK-LABEL: @gep_as_vector_v4:
+; CHECK: S_ADD_I32
+; CHECK: S_ADD_I32
+; CHECK: S_ADD_I32
+; CHECK: S_ADD_I32
+  %p = getelementptr <4 x [1024 x i32] addrspace(3)*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
+  %p0 = extractelement <4 x i32 addrspace(3)*> %p, i32 0
+  %p1 = extractelement <4 x i32 addrspace(3)*> %p, i32 1
+  %p2 = extractelement <4 x i32 addrspace(3)*> %p, i32 2
+  %p3 = extractelement <4 x i32 addrspace(3)*> %p, i32 3
+  store i32 99, i32 addrspace(3)* %p0
+  store i32 99, i32 addrspace(3)* %p1
+  store i32 99, i32 addrspace(3)* %p2
+  store i32 99, i32 addrspace(3)* %p3
+  ret void
+}
+
+define void @gep_as_vector_v2(<2 x [1024 x i32] addrspace(3)*> %array) nounwind {
+; CHECK-LABEL: @gep_as_vector_v2:
+; CHECK: S_ADD_I32
+; CHECK: S_ADD_I32
+  %p = getelementptr <2 x [1024 x i32] addrspace(3)*> %array, <2 x i16> zeroinitializer, <2 x i16> <i16 16, i16 16>
+  %p0 = extractelement <2 x i32 addrspace(3)*> %p, i32 0
+  %p1 = extractelement <2 x i32 addrspace(3)*> %p, i32 1
+  store i32 99, i32 addrspace(3)* %p0
+  store i32 99, i32 addrspace(3)* %p1
+  ret void
+}
+
diff --git a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
index e3005fe..71705a6 100644
--- a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
+++ b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
@@ -3,7 +3,7 @@
 ;Test that a select with reversed True/False values is correctly lowered
 ;to a SETNE_INT.  There should only be one SETNE_INT instruction.
 
-;CHECK: SETNE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: SETNE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;CHECK-NOT: SETNE_INT
 
 define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
diff --git a/test/CodeGen/R600/imm.ll b/test/CodeGen/R600/imm.ll
index 979efb0..b047315 100644
--- a/test/CodeGen/R600/imm.ll
+++ b/test/CodeGen/R600/imm.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
 ; Use a 64-bit value with lo bits that can be represented as an inline constant
 ; CHECK: @i64_imm_inline_lo
-; CHECK: S_MOV_B32 [[LO:SGPR[0-9]+]], 5
-; CHECK: V_MOV_B32_e32 [[LO_VGPR:VGPR[0-9]+]], [[LO]]
-; CHECK: BUFFER_STORE_DWORDX2 [[LO_VGPR]]_
+; CHECK: S_MOV_B32 [[LO:s[0-9]+]], 5
+; CHECK: V_MOV_B32_e32 v[[LO_VGPR:[0-9]+]], [[LO]]
+; CHECK: BUFFER_STORE_DWORDX2 v{{\[}}[[LO_VGPR]]:
 define void @i64_imm_inline_lo(i64 addrspace(1) *%out) {
 entry:
   store i64 1311768464867721221, i64 addrspace(1) *%out ; 0x1234567800000005
@@ -13,9 +13,9 @@ entry:
 
 ; Use a 64-bit value with hi bits that can be represented as an inline constant
 ; CHECK: @i64_imm_inline_hi
-; CHECK: S_MOV_B32 [[HI:SGPR[0-9]+]], 5
-; CHECK: V_MOV_B32_e32 [[HI_VGPR:VGPR[0-9]+]], [[HI]]
-; CHECK: BUFFER_STORE_DWORDX2 {{VGPR[0-9]+}}_[[HI_VGPR]]
+; CHECK: S_MOV_B32 [[HI:s[0-9]+]], 5
+; CHECK: V_MOV_B32_e32 v[[HI_VGPR:[0-9]+]], [[HI]]
+; CHECK: BUFFER_STORE_DWORDX2 v{{\[[0-9]+:}}[[HI_VGPR]]
 define void @i64_imm_inline_hi(i64 addrspace(1) *%out) {
 entry:
   store i64 21780256376, i64 addrspace(1) *%out ; 0x0000000512345678
diff --git a/test/CodeGen/R600/indirect-addressing-si.ll b/test/CodeGen/R600/indirect-addressing-si.ll
index ba5de22..169d69b 100644
--- a/test/CodeGen/R600/indirect-addressing-si.ll
+++ b/test/CodeGen/R600/indirect-addressing-si.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
 
 ; Tests for indirect addressing on SI, which is implemented using dynamic
 ; indexing of vectors.
 
 ; CHECK: extract_w_offset
-; CHECK: S_MOV_B32 M0
+; CHECK: S_MOV_B32 m0
 ; CHECK-NEXT: V_MOVRELS_B32_e32
 define void @extract_w_offset(float addrspace(1)* %out, i32 %in) {
 entry:
@@ -15,7 +15,7 @@ entry:
 }
 
 ; CHECK: extract_wo_offset
-; CHECK: S_MOV_B32 M0
+; CHECK: S_MOV_B32 m0
 ; CHECK-NEXT: V_MOVRELS_B32_e32
 define void @extract_wo_offset(float addrspace(1)* %out, i32 %in) {
 entry:
@@ -25,7 +25,7 @@ entry:
 }
 
 ; CHECK: insert_w_offset
-; CHECK: S_MOV_B32 M0
+; CHECK: S_MOV_B32 m0
 ; CHECK-NEXT: V_MOVRELD_B32_e32
 define void @insert_w_offset(float addrspace(1)* %out, i32 %in) {
 entry:
@@ -37,7 +37,7 @@ entry:
 }
 
 ; CHECK: insert_wo_offset
-; CHECK: S_MOV_B32 M0
+; CHECK: S_MOV_B32 m0
 ; CHECK-NEXT: V_MOVRELD_B32_e32
 define void @insert_wo_offset(float addrspace(1)* %out, i32 %in) {
 entry:
diff --git a/test/CodeGen/R600/insert_vector_elt.ll b/test/CodeGen/R600/insert_vector_elt.ll
new file mode 100644
index 0000000..05aecce
--- /dev/null
+++ b/test/CodeGen/R600/insert_vector_elt.ll
@@ -0,0 +1,16 @@
+; XFAIL: *
+; RUN: llc < %s -march=r600 -mcpu=redwood -o %t
+
+define void @var_insert(<4 x i32> addrspace(1)* %out, <4 x i32> %x, i32 %val, i32 %idx) nounwind  {
+entry:
+  %tmp3 = insertelement <4 x i32> %x, i32 %val, i32 %idx		; <<4 x i32>> [#uses=1]
+  store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+define void @var_extract(i32 addrspace(1)* %out, <4 x i32> %x, i32 %idx) nounwind  {
+entry:
+  %tmp3 = extractelement <4 x i32> %x, i32 %idx		; <<i32>> [#uses=1]
+  store i32 %tmp3, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/jump-address.ll b/test/CodeGen/R600/jump-address.ll
index 26c298b..ae9c8bb 100644
--- a/test/CodeGen/R600/jump-address.ll
+++ b/test/CodeGen/R600/jump-address.ll
@@ -1,6 +1,6 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-; CHECK: JUMP @5
+; CHECK: JUMP @3
 ; CHECK: EXPORT
 ; CHECK-NOT: EXPORT
 
diff --git a/test/CodeGen/R600/kcache-fold.ll b/test/CodeGen/R600/kcache-fold.ll
index 3d70e4b..0baa3cd 100644
--- a/test/CodeGen/R600/kcache-fold.ll
+++ b/test/CodeGen/R600/kcache-fold.ll
@@ -1,7 +1,7 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
 ; CHECK: @main1
-; CHECK: MOV T{{[0-9]+\.[XYZW], KC0}}
+; CHECK: MOV * T{{[0-9]+\.[XYZW], KC0}}
 define void @main1() {
 main_body:
   %0 = load <4 x float> addrspace(8)* null
@@ -10,7 +10,7 @@ main_body:
   %3 = extractelement <4 x float> %2, i32 0
   %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %5 = extractelement <4 x float> %4, i32 0
-  %6 = fcmp ult float %1, 0.000000e+00
+  %6 = fcmp ogt float %1, 0.000000e+00
   %7 = select i1 %6, float %3, float %5
   %8 = load <4 x float> addrspace(8)* null
   %9 = extractelement <4 x float> %8, i32 1
@@ -18,7 +18,7 @@ main_body:
   %11 = extractelement <4 x float> %10, i32 1
   %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %13 = extractelement <4 x float> %12, i32 1
-  %14 = fcmp ult float %9, 0.000000e+00
+  %14 = fcmp ogt float %9, 0.000000e+00
   %15 = select i1 %14, float %11, float %13
   %16 = load <4 x float> addrspace(8)* null
   %17 = extractelement <4 x float> %16, i32 2
@@ -26,7 +26,7 @@ main_body:
   %19 = extractelement <4 x float> %18, i32 2
   %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %21 = extractelement <4 x float> %20, i32 2
-  %22 = fcmp ult float %17, 0.000000e+00
+  %22 = fcmp ogt float %17, 0.000000e+00
   %23 = select i1 %22, float %19, float %21
   %24 = load <4 x float> addrspace(8)* null
   %25 = extractelement <4 x float> %24, i32 3
@@ -34,7 +34,7 @@ main_body:
   %27 = extractelement <4 x float> %26, i32 3
   %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %29 = extractelement <4 x float> %28, i32 3
-  %30 = fcmp ult float %25, 0.000000e+00
+  %30 = fcmp ogt float %25, 0.000000e+00
   %31 = select i1 %30, float %27, float %29
   %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
   %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
@@ -58,7 +58,7 @@ main_body:
   %3 = extractelement <4 x float> %2, i32 0
   %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %5 = extractelement <4 x float> %4, i32 1
-  %6 = fcmp ult float %1, 0.000000e+00
+  %6 = fcmp ogt float %1, 0.000000e+00
   %7 = select i1 %6, float %3, float %5
   %8 = load <4 x float> addrspace(8)* null
   %9 = extractelement <4 x float> %8, i32 1
@@ -66,7 +66,7 @@ main_body:
   %11 = extractelement <4 x float> %10, i32 0
   %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %13 = extractelement <4 x float> %12, i32 1
-  %14 = fcmp ult float %9, 0.000000e+00
+  %14 = fcmp ogt float %9, 0.000000e+00
   %15 = select i1 %14, float %11, float %13
   %16 = load <4 x float> addrspace(8)* null
   %17 = extractelement <4 x float> %16, i32 2
@@ -74,7 +74,7 @@ main_body:
   %19 = extractelement <4 x float> %18, i32 3
   %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %21 = extractelement <4 x float> %20, i32 2
-  %22 = fcmp ult float %17, 0.000000e+00
+  %22 = fcmp ogt float %17, 0.000000e+00
   %23 = select i1 %22, float %19, float %21
   %24 = load <4 x float> addrspace(8)* null
   %25 = extractelement <4 x float> %24, i32 3
@@ -82,7 +82,7 @@ main_body:
   %27 = extractelement <4 x float> %26, i32 3
   %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %29 = extractelement <4 x float> %28, i32 2
-  %30 = fcmp ult float %25, 0.000000e+00
+  %30 = fcmp ogt float %25, 0.000000e+00
   %31 = select i1 %30, float %27, float %29
   %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
   %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
diff --git a/test/CodeGen/R600/kernel-args.ll b/test/CodeGen/R600/kernel-args.ll
new file mode 100644
index 0000000..247e316
--- /dev/null
+++ b/test/CodeGen/R600/kernel-args.ll
@@ -0,0 +1,455 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+
+; EG-CHECK-LABEL: @i8_arg
+; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @i8_arg
+; SI-CHECK: BUFFER_LOAD_UBYTE
+
+define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
+entry:
+  %0 = zext i8 %in to i32
+  store i32 %0, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @i8_zext_arg
+; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @i8_zext_arg
+; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+
+define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
+entry:
+  %0 = zext i8 %in to i32
+  store i32 %0, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @i8_sext_arg
+; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @i8_sext_arg
+; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+
+define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
+entry:
+  %0 = sext i8 %in to i32
+  store i32 %0, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @i16_arg
+; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @i16_arg
+; SI-CHECK: BUFFER_LOAD_USHORT
+
+define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
+entry:
+  %0 = zext i16 %in to i32
+  store i32 %0, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @i16_zext_arg
+; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @i16_zext_arg
+; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+
+define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
+entry:
+  %0 = zext i16 %in to i32
+  store i32 %0, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @i16_sext_arg
+; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @i16_sext_arg
+; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+
+define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
+entry:
+  %0 = sext i16 %in to i32
+  store i32 %0, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @i32_arg
+; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @i32_arg
+; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
+entry:
+  store i32 %in, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @f32_arg
+; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @f32_arg
+; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
+entry:
+  store float %in, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @v2i8_arg
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; SI-CHECK-LABEL: @v2i8_arg
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
+entry:
+  store <2 x i8> %in, <2 x i8> addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @v2i16_arg
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; SI-CHECK-LABEL: @v2i16_arg
+; SI-CHECK-DAG: BUFFER_LOAD_USHORT
+; SI-CHECK-DAG: BUFFER_LOAD_USHORT
+define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
+entry:
+  store <2 x i16> %in, <2 x i16> addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @v2i32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
+; SI-CHECK-LABEL: @v2i32_arg
+; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 11
+define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
+entry:
+  store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @v2f32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
+; SI-CHECK-LABEL: @v2f32_arg
+; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 11
+define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
+entry:
+  store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @v3i8_arg
+; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
+; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
+; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
+; SI-CHECK-LABEL: @v3i8_arg
+define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
+entry:
+  store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @v3i16_arg
+; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
+; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
+; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
+; SI-CHECK-LABEL: @v3i16_arg
+define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
+entry:
+  store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
+  ret void
+}
+; EG-CHECK-LABEL: @v3i32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
+; SI-CHECK-LABEL: @v3i32_arg
+; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 13
+define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
+entry:
+  store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @v3f32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
+; SI-CHECK-LABEL: @v3f32_arg
+; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 13
+define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
+entry:
+  store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @v4i8_arg
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; SI-CHECK-LABEL: @v4i8_arg
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
+entry:
+  store <4 x i8> %in, <4 x i8> addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @v4i16_arg
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; SI-CHECK-LABEL: @v4i16_arg
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
+entry:
+  store <4 x i16> %in, <4 x i16> addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @v4i32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
+; SI-CHECK-LABEL: @v4i32_arg
+; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 13
+define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
+entry:
+  store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @v4f32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
+; SI-CHECK-LABEL: @v4f32_arg
+; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 13
+define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
+entry:
+  store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @v8i8_arg
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; SI-CHECK-LABEL: @v8i8_arg
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
+entry:
+  store <8 x i8> %in, <8 x i8> addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @v8i16_arg
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; SI-CHECK-LABEL: @v8i16_arg
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
+entry:
+  store <8 x i16> %in, <8 x i16> addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @v8i32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
+; SI-CHECK-LABEL: @v8i32_arg
+; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 17
+define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
+entry:
+  store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @v8f32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
+; SI-CHECK-LABEL: @v8f32_arg
+; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 17
+define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
+entry:
+  store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @v16i8_arg
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; SI-CHECK-LABEL: @v16i8_arg
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
+entry:
+  store <16 x i8> %in, <16 x i8> addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @v16i16_arg
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; SI-CHECK-LABEL: @v16i16_arg
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
+entry:
+  store <16 x i16> %in, <16 x i16> addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @v16i32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
+; SI-CHECK-LABEL: @v16i32_arg
+; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 25
+define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
+entry:
+  store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @v16f32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
+; SI-CHECK-LABEL: @v16f32_arg
+; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 25
+define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
+entry:
+  store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/lds-output-queue.ll b/test/CodeGen/R600/lds-output-queue.ll
new file mode 100644
index 0000000..63a4332
--- /dev/null
+++ b/test/CodeGen/R600/lds-output-queue.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood -verify-machineinstrs | FileCheck %s
+;
+; This test checks that the lds input queue will is empty at the end of
+; the ALU clause.
+
+; CHECK-LABEL: @lds_input_queue
+; CHECK: LDS_READ_RET * OQAP
+; CHECK-NOT: ALU clause
+; CHECK: MOV * T{{[0-9]\.[XYZW]}}, OQAP
+
+@local_mem = internal addrspace(3) unnamed_addr global [2 x i32] [i32 1, i32 2], align 4
+
+define void @lds_input_queue(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %index) {
+entry:
+  %0 = getelementptr inbounds [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
+  %1 = load i32 addrspace(3)* %0
+  call void @llvm.AMDGPU.barrier.local()
+
+  ; This will start a new clause for the vertex fetch
+  %2 = load i32 addrspace(1)* %in
+  %3 = add i32 %1, %2
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+declare void @llvm.AMDGPU.barrier.local()
+
+; The machine scheduler does not do proper alias analysis and assumes that
+; loads from global values (Note that a global value is different that a
+; value from global memory.  A global value is a value that is declared
+; outside of a function, it can reside in any address space) alias with
+; all other loads.
+;
+; This is a problem for scheduling the reads from the local data share (lds).
+; These reads are implemented using two instructions.  The first copies the
+; data from lds into the lds output queue, and the second moves the data from
+; the input queue into main memory.  These two instructions don't have to be
+; scheduled one after the other, but they do need to be scheduled in the same
+; clause.  The aliasing problem mentioned above causes problems when there is a
+; load from global memory which immediately follows a load from a global value that
+; has been declared in the local memory space:
+;
+;  %0 = getelementptr inbounds [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
+;  %1 = load i32 addrspace(3)* %0
+;  %2 = load i32 addrspace(1)* %in
+;
+; The instruction selection phase will generate ISA that looks like this:
+; %OQAP = LDS_READ_RET
+; %vreg0 = MOV %OQAP
+; %vreg1 = VTX_READ_32
+; %vreg2 = ADD_INT %vreg1, %vreg0
+;
+; The bottom scheduler will schedule the two ALU instructions first:
+;
+; UNSCHEDULED:
+; %OQAP = LDS_READ_RET
+; %vreg1 = VTX_READ_32
+;
+; SCHEDULED:
+;
+; vreg0 = MOV %OQAP
+; vreg2 = ADD_INT %vreg1, %vreg2
+;
+; The lack of proper aliasing results in the local memory read (LDS_READ_RET)
+; to consider the global memory read (VTX_READ_32) has a chain dependency, so
+; the global memory read will always be scheduled first.  This will give us a
+; final program which looks like this:
+;
+; Alu clause:
+; %OQAP = LDS_READ_RET
+; VTX clause:
+; %vreg1 = VTX_READ_32
+; Alu clause:
+; vreg0 = MOV %OQAP
+; vreg2 = ADD_INT %vreg1, %vreg2
+;
+; This is an illegal program because the OQAP def and use know occur in
+; different ALU clauses.
+;
+; This test checks this scenario and makes sure it doesn't result in an
+; illegal program.  For now, we have fixed this issue by merging the
+; LDS_READ_RET and MOV together during instruction selection and then
+; expanding them after scheduling.  Once the scheduler has better alias
+; analysis, we should be able to keep these instructions sparate before
+; scheduling.
+;
+; CHECK-LABEL: @local_global_alias
+; CHECK: LDS_READ_RET
+; CHECK-NOT: ALU clause
+; CHECK MOV * T{{[0-9]\.[XYZW]}}, OQAP
+define void @local_global_alias(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+  %0 = getelementptr inbounds [2 x i32] addrspace(3)* @local_mem, i32 0, i32 0
+  %1 = load i32 addrspace(3)* %0
+  %2 = load i32 addrspace(1)* %in
+  %3 = add i32 %2, %1
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/lds-size.ll b/test/CodeGen/R600/lds-size.ll
new file mode 100644
index 0000000..2185180
--- /dev/null
+++ b/test/CodeGen/R600/lds-size.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; This test makes sure we do not double count global values when they are
+; used in different basic blocks.
+
+; CHECK-LABEL: @test
+; CHECK: .long   166120
+; CHECK-NEXT: .long   1
+@lds = internal addrspace(3) unnamed_addr global i32 zeroinitializer, align 4
+
+define void @test(i32 addrspace(1)* %out, i32 %cond) {
+entry:
+  %0 = icmp eq i32 %cond, 0
+  br i1 %0, label %if, label %else
+
+if:
+  store i32 1, i32 addrspace(3)* @lds
+  br label %endif
+
+else:
+  store i32 2, i32 addrspace(3)* @lds
+  br label %endif
+
+endif:
+  ret void
+}
diff --git a/test/CodeGen/R600/lit.local.cfg b/test/CodeGen/R600/lit.local.cfg
index 36ee493..2d8930a 100644
--- a/test/CodeGen/R600/lit.local.cfg
+++ b/test/CodeGen/R600/lit.local.cfg
@@ -1,13 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-targets = set(root.targets_to_build.split())
+targets = set(config.root.targets_to_build.split())
 if not 'R600' in targets:
     config.unsupported = True
-
diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll
index 77b168e..47191e0 100644
--- a/test/CodeGen/R600/literals.ll
+++ b/test/CodeGen/R600/literals.ll
@@ -7,7 +7,8 @@
 ; ADD_INT literal.x KC0[2].Z, 5
 
 ; CHECK: @i32_literal
-; CHECK: ADD_INT * T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
+; CHECK: ADD_INT {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 5
 define void @i32_literal(i32 addrspace(1)* %out, i32 %in) {
 entry:
@@ -23,7 +24,8 @@ entry:
 ; ADD literal.x KC0[2].Z, 5.0
 
 ; CHECK: @float_literal
-; CHECK: ADD * T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
+; CHECK: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.0
 define void @float_literal(float addrspace(1)* %out, float %in) {
 entry:
@@ -31,3 +33,32 @@ entry:
   store float %0, float addrspace(1)* %out
   ret void
 }
+
+; Make sure inline literals are folded into REG_SEQUENCE instructions.
+; CHECK: @inline_literal_reg_sequence
+; CHECK: MOV {{\** *}}T[[GPR:[0-9]]].X, 0.0
+; CHECK-NEXT: MOV {{\** *}}T[[GPR]].Y, 0.0
+; CHECK-NEXT: MOV {{\** *}}T[[GPR]].Z, 0.0
+; CHECK-NEXT: MOV {{\** *}}T[[GPR]].W, 0.0
+
+define void @inline_literal_reg_sequence(<4 x i32> addrspace(1)* %out) {
+entry:
+  store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @inline_literal_dot4
+; CHECK: DOT4 T[[GPR:[0-9]]].X, 1.0
+; CHECK-NEXT: DOT4 T[[GPR]].Y (MASKED), 1.0
+; CHECK-NEXT: DOT4 T[[GPR]].Z (MASKED), 1.0
+; CHECK-NEXT: DOT4 * T[[GPR]].W (MASKED), 1.0
+define void @inline_literal_dot4(float addrspace(1)* %out) {
+entry:
+  %0 = call float @llvm.AMDGPU.dp4(<4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
+  store float %0, float addrspace(1)* %out
+  ret void
+}
+
+declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
+
+attributes #1 = { readnone }
diff --git a/test/CodeGen/R600/llvm.AMDGPU.imax.ll b/test/CodeGen/R600/llvm.AMDGPU.imax.ll
index 3e854c8..1336f4e 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.imax.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.imax.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
 ;CHECK: V_MAX_I32_e32
 
diff --git a/test/CodeGen/R600/llvm.AMDGPU.imin.ll b/test/CodeGen/R600/llvm.AMDGPU.imin.ll
index e227bf8..3435ea4 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.imin.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.imin.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
 ;CHECK: V_MIN_I32_e32
 
diff --git a/test/CodeGen/R600/llvm.AMDGPU.mul.ll b/test/CodeGen/R600/llvm.AMDGPU.mul.ll
index cc0732b..83b56a5 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.mul.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.mul.ll
@@ -2,16 +2,16 @@
 
 ;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @test() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
-   %r1 = call float @llvm.R600.load.input(i32 1)
+define void @test(<4 x float> inreg %reg0) #0 {
+   %r0 = extractelement <4 x float> %reg0, i32 0
+   %r1 = extractelement <4 x float> %reg0, i32 1
    %r2 = call float @llvm.AMDGPU.mul( float %r0, float %r1)
-   call void @llvm.AMDGPU.store.output(float %r2, i32 0)
+   %vec = insertelement <4 x float> undef, float %r2, i32 0
+   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
    ret void
 }
 
-declare float @llvm.R600.load.input(i32) readnone
-
-declare void @llvm.AMDGPU.store.output(float, i32)
-
 declare float @llvm.AMDGPU.mul(float ,float ) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="0" }
+\ No newline at end of file
diff --git a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
index 7627783..e6bb2c4 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
 ; R600-CHECK: @amdgpu_trunc
-; R600-CHECK: TRUNC * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; R600-CHECK: TRUNC T{{[0-9]+\.[XYZW]}}, KC0[2].Z
 ; SI-CHECK: @amdgpu_trunc
 ; SI-CHECK: V_TRUNC_F32
 
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umax.ll b/test/CodeGen/R600/llvm.AMDGPU.umax.ll
index 7699c04..4cfa133 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.umax.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.umax.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
 ;CHECK: V_MAX_U32_e32
 
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umin.ll b/test/CodeGen/R600/llvm.AMDGPU.umin.ll
index a911ad9..14af051 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.umin.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.umin.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
 ;CHECK: V_MIN_U32_e32
 
diff --git a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
index e45722c..0438ecc 100644
--- a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
+++ b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
 ;CHECK: S_MOV_B32
 ;CHECK-NEXT: V_INTERP_MOV_F32
diff --git a/test/CodeGen/R600/llvm.SI.imageload.ll b/test/CodeGen/R600/llvm.SI.imageload.ll
index 0adcdfc..59e00f0 100644
--- a/test/CodeGen/R600/llvm.SI.imageload.ll
+++ b/test/CodeGen/R600/llvm.SI.imageload.ll
@@ -1,15 +1,15 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15, 0, 0, -1
-;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+}}, 3, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 2, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 1, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 4, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 8, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+}}, 5, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+}}, 12, 0, 0, -1
-;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 8, 0, 0, -1
+;CHECK-DAG: IMAGE_LOAD {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
+;CHECK-DAG: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
+;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 2, 0, 0, 0
+;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 1, 0, 0, 0
+;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 4, 0, 0, 0
+;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 8, 0, 0, 0
+;CHECK-DAG: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
+;CHECK-DAG: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
+;CHECK-DAG: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
+;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 8, 0, 0, -1
 
 define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
    %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
@@ -23,25 +23,25 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
    %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
    %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
    %res1 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v1,
-      <8 x i32> undef, i32 1)
+      <32 x i8> undef, i32 1)
    %res2 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v2,
-      <8 x i32> undef, i32 2)
+      <32 x i8> undef, i32 2)
    %res3 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v3,
-      <8 x i32> undef, i32 3)
+      <32 x i8> undef, i32 3)
    %res4 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v4,
-      <8 x i32> undef, i32 4)
+      <32 x i8> undef, i32 4)
    %res5 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v5,
-      <8 x i32> undef, i32 5)
+      <32 x i8> undef, i32 5)
    %res6 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v6,
-      <8 x i32> undef, i32 6)
+      <32 x i8> undef, i32 6)
    %res10 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v10,
-      <8 x i32> undef, i32 10)
+      <32 x i8> undef, i32 10)
    %res11 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v11,
-      <8 x i32> undef, i32 11)
+      <32 x i8> undef, i32 11)
    %res15 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v15,
-      <8 x i32> undef, i32 15)
+      <32 x i8> undef, i32 15)
    %res16 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v16,
-      <8 x i32> undef, i32 16)
+      <32 x i8> undef, i32 16)
    %e1 = extractelement <4 x i32> %res1, i32 0
    %e2 = extractelement <4 x i32> %res2, i32 1
    %e3 = extractelement <4 x i32> %res3, i32 2
@@ -82,6 +82,50 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
    ret void
 }
 
-declare <4 x i32> @llvm.SI.imageload.(<4 x i32>, <8 x i32>, i32) readnone
+; Test that ccordinates are stored in vgprs and not sgprs
+; CHECK: vgpr_coords
+; CHECK: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}
+define void @vgpr_coords(float addrspace(2)* addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+main_body:
+  %20 = getelementptr float addrspace(2)* addrspace(2)* %0, i32 0
+  %21 = load float addrspace(2)* addrspace(2)* %20, !tbaa !2
+  %22 = getelementptr float addrspace(2)* %21, i32 0
+  %23 = load float addrspace(2)* %22, !tbaa !2, !invariant.load !1
+  %24 = getelementptr float addrspace(2)* %21, i32 1
+  %25 = load float addrspace(2)* %24, !tbaa !2, !invariant.load !1
+  %26 = getelementptr float addrspace(2)* %21, i32 4
+  %27 = load float addrspace(2)* %26, !tbaa !2, !invariant.load !1
+  %28 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
+  %29 = load <32 x i8> addrspace(2)* %28, !tbaa !2
+  %30 = bitcast float %27 to i32
+  %31 = bitcast float %23 to i32
+  %32 = bitcast float %25 to i32
+  %33 = insertelement <4 x i32> undef, i32 %31, i32 0
+  %34 = insertelement <4 x i32> %33, i32 %32, i32 1
+  %35 = insertelement <4 x i32> %34, i32 %30, i32 2
+  %36 = insertelement <4 x i32> %35, i32 undef, i32 3
+  %37 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %36, <32 x i8> %29, i32 2)
+  %38 = extractelement <4 x i32> %37, i32 0
+  %39 = extractelement <4 x i32> %37, i32 1
+  %40 = extractelement <4 x i32> %37, i32 2
+  %41 = extractelement <4 x i32> %37, i32 3
+  %42 = bitcast i32 %38 to float
+  %43 = bitcast i32 %39 to float
+  %44 = bitcast i32 %40 to float
+  %45 = bitcast i32 %41 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %42, float %43, float %44, float %45)
+  ret void
+}
+
+declare <4 x i32> @llvm.SI.imageload.(<4 x i32>, <32 x i8>, i32) readnone
+; Function Attrs: nounwind readnone
+declare <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32>, <32 x i8>, i32) #1
 
 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { nounwind readnone }
+
+!0 = metadata !{metadata !"const", null}
+!1 = metadata !{}
+!2 = metadata !{metadata !0, metadata !0, i64 0, i32 1}
diff --git a/test/CodeGen/R600/llvm.SI.resinfo.ll b/test/CodeGen/R600/llvm.SI.resinfo.ll
index eb31514..af3afc1 100644
--- a/test/CodeGen/R600/llvm.SI.resinfo.ll
+++ b/test/CodeGen/R600/llvm.SI.resinfo.ll
@@ -1,40 +1,40 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK: IMAGE_GET_RESINFO {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15, 0, 0, -1
-;CHECK: IMAGE_GET_RESINFO {{VGPR[0-9]+_VGPR[0-9]+}}, 3, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{VGPR[0-9]+}}, 2, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{VGPR[0-9]+}}, 1, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{VGPR[0-9]+}}, 4, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{VGPR[0-9]+}}, 8, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{VGPR[0-9]+_VGPR[0-9]+}}, 5, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{VGPR[0-9]+_VGPR[0-9]+}}, 9, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{VGPR[0-9]+_VGPR[0-9]+}}, 6, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{VGPR[0-9]+_VGPR[0-9]+}}, 10, 0, 0, -1
-;CHECK: IMAGE_GET_RESINFO {{VGPR[0-9]+_VGPR[0-9]+}}, 12, 0, 0, -1
-;CHECK: IMAGE_GET_RESINFO {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{VGPR[0-9]+}}, 8, 0, 0, -1
+;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
+;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v[0-9]+}}, 2, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v[0-9]+}}, 1, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v[0-9]+}}, 4, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v[0-9]+}}, 8, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 9, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 6, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 10, 0, 0, -1
+;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
+;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 11, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 13, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 14, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v[0-9]+}}, 8, 0, 0, -1
 
 define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8,
 		  i32 %a9, i32 %a10, i32 %a11, i32 %a12, i32 %a13, i32 %a14, i32 %a15, i32 %a16) {
-   %res1 = call <4 x i32> @llvm.SI.resinfo(i32 %a1, <8 x i32> undef, i32 1)
-   %res2 = call <4 x i32> @llvm.SI.resinfo(i32 %a2, <8 x i32> undef, i32 2)
-   %res3 = call <4 x i32> @llvm.SI.resinfo(i32 %a3, <8 x i32> undef, i32 3)
-   %res4 = call <4 x i32> @llvm.SI.resinfo(i32 %a4, <8 x i32> undef, i32 4)
-   %res5 = call <4 x i32> @llvm.SI.resinfo(i32 %a5, <8 x i32> undef, i32 5)
-   %res6 = call <4 x i32> @llvm.SI.resinfo(i32 %a6, <8 x i32> undef, i32 6)
-   %res7 = call <4 x i32> @llvm.SI.resinfo(i32 %a7, <8 x i32> undef, i32 7)
-   %res8 = call <4 x i32> @llvm.SI.resinfo(i32 %a8, <8 x i32> undef, i32 8)
-   %res9 = call <4 x i32> @llvm.SI.resinfo(i32 %a9, <8 x i32> undef, i32 9)
-   %res10 = call <4 x i32> @llvm.SI.resinfo(i32 %a10, <8 x i32> undef, i32 10)
-   %res11 = call <4 x i32> @llvm.SI.resinfo(i32 %a11, <8 x i32> undef, i32 11)
-   %res12 = call <4 x i32> @llvm.SI.resinfo(i32 %a12, <8 x i32> undef, i32 12)
-   %res13 = call <4 x i32> @llvm.SI.resinfo(i32 %a13, <8 x i32> undef, i32 13)
-   %res14 = call <4 x i32> @llvm.SI.resinfo(i32 %a14, <8 x i32> undef, i32 14)
-   %res15 = call <4 x i32> @llvm.SI.resinfo(i32 %a15, <8 x i32> undef, i32 15)
-   %res16 = call <4 x i32> @llvm.SI.resinfo(i32 %a16, <8 x i32> undef, i32 16)
+   %res1 = call <4 x i32> @llvm.SI.resinfo(i32 %a1, <32 x i8> undef, i32 1)
+   %res2 = call <4 x i32> @llvm.SI.resinfo(i32 %a2, <32 x i8> undef, i32 2)
+   %res3 = call <4 x i32> @llvm.SI.resinfo(i32 %a3, <32 x i8> undef, i32 3)
+   %res4 = call <4 x i32> @llvm.SI.resinfo(i32 %a4, <32 x i8> undef, i32 4)
+   %res5 = call <4 x i32> @llvm.SI.resinfo(i32 %a5, <32 x i8> undef, i32 5)
+   %res6 = call <4 x i32> @llvm.SI.resinfo(i32 %a6, <32 x i8> undef, i32 6)
+   %res7 = call <4 x i32> @llvm.SI.resinfo(i32 %a7, <32 x i8> undef, i32 7)
+   %res8 = call <4 x i32> @llvm.SI.resinfo(i32 %a8, <32 x i8> undef, i32 8)
+   %res9 = call <4 x i32> @llvm.SI.resinfo(i32 %a9, <32 x i8> undef, i32 9)
+   %res10 = call <4 x i32> @llvm.SI.resinfo(i32 %a10, <32 x i8> undef, i32 10)
+   %res11 = call <4 x i32> @llvm.SI.resinfo(i32 %a11, <32 x i8> undef, i32 11)
+   %res12 = call <4 x i32> @llvm.SI.resinfo(i32 %a12, <32 x i8> undef, i32 12)
+   %res13 = call <4 x i32> @llvm.SI.resinfo(i32 %a13, <32 x i8> undef, i32 13)
+   %res14 = call <4 x i32> @llvm.SI.resinfo(i32 %a14, <32 x i8> undef, i32 14)
+   %res15 = call <4 x i32> @llvm.SI.resinfo(i32 %a15, <32 x i8> undef, i32 15)
+   %res16 = call <4 x i32> @llvm.SI.resinfo(i32 %a16, <32 x i8> undef, i32 16)
    %e1 = extractelement <4 x i32> %res1, i32 0
    %e2 = extractelement <4 x i32> %res2, i32 1
    %e3 = extractelement <4 x i32> %res3, i32 2
@@ -105,6 +105,6 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7,
    ret void
 }
 
-declare <4 x i32> @llvm.SI.resinfo(i32, <8 x i32>, i32) readnone
+declare <4 x i32> @llvm.SI.resinfo(i32, <32 x i8>, i32) readnone
 
 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/llvm.SI.sample-masked.ll b/test/CodeGen/R600/llvm.SI.sample-masked.ll
new file mode 100644
index 0000000..e5e4ec4
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.sample-masked.ll
@@ -0,0 +1,93 @@
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+
+; CHECK-LABEL: @v1
+; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 13
+define void @v1(i32 %a1) {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+  %2 = extractelement <4 x float> %1, i32 0
+  %3 = extractelement <4 x float> %1, i32 2
+  %4 = extractelement <4 x float> %1, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
+  ret void
+}
+
+; CHECK-LABEL: @v2
+; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 11
+define void @v2(i32 %a1) {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+  %2 = extractelement <4 x float> %1, i32 0
+  %3 = extractelement <4 x float> %1, i32 1
+  %4 = extractelement <4 x float> %1, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
+  ret void
+}
+
+; CHECK-LABEL: @v3
+; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 14
+define void @v3(i32 %a1) {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+  %2 = extractelement <4 x float> %1, i32 1
+  %3 = extractelement <4 x float> %1, i32 2
+  %4 = extractelement <4 x float> %1, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
+  ret void
+}
+
+; CHECK-LABEL: @v4
+; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 7
+define void @v4(i32 %a1) {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+  %2 = extractelement <4 x float> %1, i32 0
+  %3 = extractelement <4 x float> %1, i32 1
+  %4 = extractelement <4 x float> %1, i32 2
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
+  ret void
+}
+
+; CHECK-LABEL: @v5
+; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 10
+define void @v5(i32 %a1) {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+  %2 = extractelement <4 x float> %1, i32 1
+  %3 = extractelement <4 x float> %1, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
+  ret void
+}
+
+; CHECK-LABEL: @v6
+; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 6
+define void @v6(i32 %a1) {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+  %2 = extractelement <4 x float> %1, i32 1
+  %3 = extractelement <4 x float> %1, i32 2
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
+  ret void
+}
+
+; CHECK-LABEL: @v7
+; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 9
+define void @v7(i32 %a1) {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+  %2 = extractelement <4 x float> %1, i32 0
+  %3 = extractelement <4 x float> %1, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
+  ret void
+}
+
+declare <4 x float> @llvm.SI.sample.v1i32(<1 x i32>, <32 x i8>, <16 x i8>, i32) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/R600/llvm.SI.sample.ll
index 7655996..d41737c 100644
--- a/test/CodeGen/R600/llvm.SI.sample.ll
+++ b/test/CodeGen/R600/llvm.SI.sample.ll
@@ -1,21 +1,21 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15
-;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 3
-;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+}}, 2
-;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+}}, 1
-;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+}}, 4
-;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8
-;CHECK-DAG: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 5
-;CHECK-DAG: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 9
-;CHECK-DAG: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 6
-;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 10
-;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 12
-;CHECK-DAG: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7
-;CHECK-DAG: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11
-;CHECK-DAG: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13
-;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14
-;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8
+;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 15
+;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 3
+;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 2
+;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 1
+;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 4
+;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 8
+;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 5
+;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 9
+;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 6
+;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 10
+;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 12
+;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 7
+;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 11
+;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 13
+;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 14
+;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 8
 
 define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
    %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
@@ -35,37 +35,37 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
    %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
    %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
    %res1 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v1,
-      <8 x i32> undef, <4 x i32> undef, i32 1)
+      <32 x i8> undef, <16 x i8> undef, i32 1)
    %res2 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v2,
-      <8 x i32> undef, <4 x i32> undef, i32 2)
+      <32 x i8> undef, <16 x i8> undef, i32 2)
    %res3 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v3,
-      <8 x i32> undef, <4 x i32> undef, i32 3)
+      <32 x i8> undef, <16 x i8> undef, i32 3)
    %res4 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v4,
-      <8 x i32> undef, <4 x i32> undef, i32 4)
+      <32 x i8> undef, <16 x i8> undef, i32 4)
    %res5 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v5,
-      <8 x i32> undef, <4 x i32> undef, i32 5)
+      <32 x i8> undef, <16 x i8> undef, i32 5)
    %res6 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v6,
-      <8 x i32> undef, <4 x i32> undef, i32 6)
+      <32 x i8> undef, <16 x i8> undef, i32 6)
    %res7 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v7,
-      <8 x i32> undef, <4 x i32> undef, i32 7)
+      <32 x i8> undef, <16 x i8> undef, i32 7)
    %res8 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v8,
-      <8 x i32> undef, <4 x i32> undef, i32 8)
+      <32 x i8> undef, <16 x i8> undef, i32 8)
    %res9 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v9,
-      <8 x i32> undef, <4 x i32> undef, i32 9)
+      <32 x i8> undef, <16 x i8> undef, i32 9)
    %res10 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v10,
-      <8 x i32> undef, <4 x i32> undef, i32 10)
+      <32 x i8> undef, <16 x i8> undef, i32 10)
    %res11 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v11,
-      <8 x i32> undef, <4 x i32> undef, i32 11)
+      <32 x i8> undef, <16 x i8> undef, i32 11)
    %res12 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v12,
-      <8 x i32> undef, <4 x i32> undef, i32 12)
+      <32 x i8> undef, <16 x i8> undef, i32 12)
    %res13 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v13,
-      <8 x i32> undef, <4 x i32> undef, i32 13)
+      <32 x i8> undef, <16 x i8> undef, i32 13)
    %res14 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v14,
-      <8 x i32> undef, <4 x i32> undef, i32 14)
+      <32 x i8> undef, <16 x i8> undef, i32 14)
    %res15 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v15,
-      <8 x i32> undef, <4 x i32> undef, i32 15)
+      <32 x i8> undef, <16 x i8> undef, i32 15)
    %res16 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v16,
-      <8 x i32> undef, <4 x i32> undef, i32 16)
+      <32 x i8> undef, <16 x i8> undef, i32 16)
    %e1 = extractelement <4 x float> %res1, i32 0
    %e2 = extractelement <4 x float> %res2, i32 1
    %e3 = extractelement <4 x float> %res3, i32 2
@@ -135,6 +135,23 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
    ret void
 }
 
-declare <4 x float> @llvm.SI.sample.(<4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
+; CHECK: @v1
+; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 15
+define void @v1(i32 %a1) {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+  %2 = extractelement <4 x float> %1, i32 0
+  %3 = extractelement <4 x float> %1, i32 1
+  %4 = extractelement <4 x float> %1, i32 2
+  %5 = extractelement <4 x float> %1, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %5)
+  ret void
+}
+
+
+declare <4 x float> @llvm.SI.sample.v1i32(<1 x i32>, <32 x i8>, <16 x i8>, i32) readnone
+
+declare <4 x float> @llvm.SI.sample.(<4 x i32>, <32 x i8>, <16 x i8>, i32) readnone
 
 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/llvm.SI.sampled.ll b/test/CodeGen/R600/llvm.SI.sampled.ll
index 3b05551..21ac725 100644
--- a/test/CodeGen/R600/llvm.SI.sampled.ll
+++ b/test/CodeGen/R600/llvm.SI.sampled.ll
@@ -1,21 +1,21 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15
-;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+}}, 3
-;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 2
-;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 1
-;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 4
-;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 8
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+}}, 5
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+}}, 9
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+}}, 6
-;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+}}, 10
-;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+}}, 12
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13
-;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14
-;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 8
+;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 15
+;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 3
+;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 2
+;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 1
+;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 4
+;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 8
+;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 5
+;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 9
+;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 6
+;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 10
+;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 12
+;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 7
+;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 11
+;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 13
+;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 14
+;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 8
 
 define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
    %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
@@ -35,37 +35,37 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
    %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
    %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
    %res1 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v1,
-      <8 x i32> undef, <4 x i32> undef, i32 1)
+      <32 x i8> undef, <16 x i8> undef, i32 1)
    %res2 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v2,
-      <8 x i32> undef, <4 x i32> undef, i32 2)
+      <32 x i8> undef, <16 x i8> undef, i32 2)
    %res3 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v3,
-      <8 x i32> undef, <4 x i32> undef, i32 3)
+      <32 x i8> undef, <16 x i8> undef, i32 3)
    %res4 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v4,
-      <8 x i32> undef, <4 x i32> undef, i32 4)
+      <32 x i8> undef, <16 x i8> undef, i32 4)
    %res5 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v5,
-      <8 x i32> undef, <4 x i32> undef, i32 5)
+      <32 x i8> undef, <16 x i8> undef, i32 5)
    %res6 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v6,
-      <8 x i32> undef, <4 x i32> undef, i32 6)
+      <32 x i8> undef, <16 x i8> undef, i32 6)
    %res7 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v7,
-      <8 x i32> undef, <4 x i32> undef, i32 7)
+      <32 x i8> undef, <16 x i8> undef, i32 7)
    %res8 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v8,
-      <8 x i32> undef, <4 x i32> undef, i32 8)
+      <32 x i8> undef, <16 x i8> undef, i32 8)
    %res9 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v9,
-      <8 x i32> undef, <4 x i32> undef, i32 9)
+      <32 x i8> undef, <16 x i8> undef, i32 9)
    %res10 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v10,
-      <8 x i32> undef, <4 x i32> undef, i32 10)
+      <32 x i8> undef, <16 x i8> undef, i32 10)
    %res11 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v11,
-      <8 x i32> undef, <4 x i32> undef, i32 11)
+      <32 x i8> undef, <16 x i8> undef, i32 11)
    %res12 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v12,
-      <8 x i32> undef, <4 x i32> undef, i32 12)
+      <32 x i8> undef, <16 x i8> undef, i32 12)
    %res13 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v13,
-      <8 x i32> undef, <4 x i32> undef, i32 13)
+      <32 x i8> undef, <16 x i8> undef, i32 13)
    %res14 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v14,
-      <8 x i32> undef, <4 x i32> undef, i32 14)
+      <32 x i8> undef, <16 x i8> undef, i32 14)
    %res15 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v15,
-      <8 x i32> undef, <4 x i32> undef, i32 15)
+      <32 x i8> undef, <16 x i8> undef, i32 15)
    %res16 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v16,
-      <8 x i32> undef, <4 x i32> undef, i32 16)
+      <32 x i8> undef, <16 x i8> undef, i32 16)
    %e1 = extractelement <4 x float> %res1, i32 0
    %e2 = extractelement <4 x float> %res2, i32 1
    %e3 = extractelement <4 x float> %res3, i32 2
@@ -135,6 +135,6 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
    ret void
 }
 
-declare <4 x float> @llvm.SI.sampled.(<4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
+declare <4 x float> @llvm.SI.sampled.(<4 x i32>, <32 x i8>, <16 x i8>, i32) readnone
 
 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/llvm.SI.tbuffer.store.ll b/test/CodeGen/R600/llvm.SI.tbuffer.store.ll
new file mode 100644
index 0000000..fa7c3ca
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.tbuffer.store.ll
@@ -0,0 +1,44 @@
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+;CHECK_LABEL: @test1
+;CHECK: TBUFFER_STORE_FORMAT_XYZW {{v\[[0-9]+:[0-9]+\]}}, 32, -1, 0, -1, 0, 14, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+define void @test1(i32 %a1, i32 %vaddr) {
+    %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
+    call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
+        i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 1,
+        i32 1, i32 0)
+    ret void
+}
+
+;CHECK_LABEL: @test2
+;CHECK: TBUFFER_STORE_FORMAT_XYZ {{v\[[0-9]+:[0-9]+\]}}, 24, -1, 0, -1, 0, 13, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+define void @test2(i32 %a1, i32 %vaddr) {
+    %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
+    call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
+        i32 3, i32 %vaddr, i32 0, i32 24, i32 13, i32 4, i32 1, i32 0, i32 1,
+        i32 1, i32 0)
+    ret void
+}
+
+;CHECK_LABEL: @test3
+;CHECK: TBUFFER_STORE_FORMAT_XY {{v\[[0-9]+:[0-9]+\]}}, 16, -1, 0, -1, 0, 11, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+define void @test3(i32 %a1, i32 %vaddr) {
+    %vdata = insertelement <2 x i32> undef, i32 %a1, i32 0
+    call void @llvm.SI.tbuffer.store.v2i32(<16 x i8> undef, <2 x i32> %vdata,
+        i32 2, i32 %vaddr, i32 0, i32 16, i32 11, i32 4, i32 1, i32 0, i32 1,
+        i32 1, i32 0)
+    ret void
+}
+
+;CHECK_LABEL: @test4
+;CHECK: TBUFFER_STORE_FORMAT_X {{v[0-9]+}}, 8, -1, 0, -1, 0, 4, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+define void @test4(i32 %vdata, i32 %vaddr) {
+    call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %vdata,
+        i32 1, i32 %vaddr, i32 0, i32 8, i32 4, i32 4, i32 1, i32 0, i32 1,
+        i32 1, i32 0)
+    ret void
+}
+
+declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
+declare void @llvm.SI.tbuffer.store.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
+declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
diff --git a/test/CodeGen/R600/llvm.SI.tid.ll b/test/CodeGen/R600/llvm.SI.tid.ll
index 238d9f2..fe17304 100644
--- a/test/CodeGen/R600/llvm.SI.tid.ll
+++ b/test/CodeGen/R600/llvm.SI.tid.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
 ;CHECK: V_MBCNT_LO_U32_B32_e64
 ;CHECK: V_MBCNT_HI_U32_B32_e32
diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll
index 8fb4559..aaf2305 100644
--- a/test/CodeGen/R600/llvm.cos.ll
+++ b/test/CodeGen/R600/llvm.cos.ll
@@ -5,15 +5,15 @@
 ;CHECK: ADD *
 ;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 
-define void @test() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
+define void @test(<4 x float> inreg %reg0) #0 {
+   %r0 = extractelement <4 x float> %reg0, i32 0
    %r1 = call float @llvm.cos.f32(float %r0)
-   call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+   %vec = insertelement <4 x float> undef, float %r1, i32 0
+   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
    ret void
 }
 
 declare float @llvm.cos.f32(float) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
 
-declare float @llvm.R600.load.input(i32) readnone
-
-declare void @llvm.AMDGPU.store.output(float, i32)
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/llvm.floor.ll b/test/CodeGen/R600/llvm.floor.ll
new file mode 100644
index 0000000..f7071cd
--- /dev/null
+++ b/test/CodeGen/R600/llvm.floor.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+
+; R600-CHECK: @f32
+; R600-CHECK: FLOOR
+; SI-CHECK: @f32
+; SI-CHECK: V_FLOOR_F32_e32
+define void @f32(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = call float @llvm.floor.f32(float %in)
+  store float %0, float addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK: @v2f32
+; R600-CHECK: FLOOR
+; R600-CHECK: FLOOR
+; SI-CHECK: @v2f32
+; SI-CHECK: V_FLOOR_F32_e32
+; SI-CHECK: V_FLOOR_F32_e32
+define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+entry:
+  %0 = call <2 x float> @llvm.floor.v2f32(<2 x float> %in)
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK: @v4f32
+; R600-CHECK: FLOOR
+; R600-CHECK: FLOOR
+; R600-CHECK: FLOOR
+; R600-CHECK: FLOOR
+; SI-CHECK: @v4f32
+; SI-CHECK: V_FLOOR_F32_e32
+; SI-CHECK: V_FLOOR_F32_e32
+; SI-CHECK: V_FLOOR_F32_e32
+; SI-CHECK: V_FLOOR_F32_e32
+define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+entry:
+  %0 = call <4 x float> @llvm.floor.v4f32(<4 x float> %in)
+  store <4 x float> %0, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare float @llvm.floor.f32(float) #0
+
+; Function Attrs: nounwind readonly
+declare <2 x float> @llvm.floor.v2f32(<2 x float>) #0
+
+; Function Attrs: nounwind readonly
+declare <4 x float> @llvm.floor.v4f32(<4 x float>) #0
+
+attributes #0 = { nounwind readonly }
diff --git a/test/CodeGen/R600/llvm.pow.ll b/test/CodeGen/R600/llvm.pow.ll
index 0f51cf4..b587d2b 100644
--- a/test/CodeGen/R600/llvm.pow.ll
+++ b/test/CodeGen/R600/llvm.pow.ll
@@ -4,16 +4,16 @@
 ;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
 ;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 
-define void @test() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
-   %r1 = call float @llvm.R600.load.input(i32 1)
+define void @test(<4 x float> inreg %reg0) #0 {
+   %r0 = extractelement <4 x float> %reg0, i32 0
+   %r1 = extractelement <4 x float> %reg0, i32 1
    %r2 = call float @llvm.pow.f32( float %r0, float %r1)
-   call void @llvm.AMDGPU.store.output(float %r2, i32 0)
+   %vec = insertelement <4 x float> undef, float %r2, i32 0
+   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
    ret void
 }
 
-declare float @llvm.R600.load.input(i32) readnone
-
-declare void @llvm.AMDGPU.store.output(float, i32)
-
 declare float @llvm.pow.f32(float ,float ) readonly
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/llvm.rint.ll b/test/CodeGen/R600/llvm.rint.ll
new file mode 100644
index 0000000..c174b33
--- /dev/null
+++ b/test/CodeGen/R600/llvm.rint.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+
+; R600-CHECK: @f32
+; R600-CHECK: RNDNE
+; SI-CHECK: @f32
+; SI-CHECK: V_RNDNE_F32_e32
+define void @f32(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = call float @llvm.rint.f32(float %in)
+  store float %0, float addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK: @v2f32
+; R600-CHECK: RNDNE
+; R600-CHECK: RNDNE
+; SI-CHECK: @v2f32
+; SI-CHECK: V_RNDNE_F32_e32
+; SI-CHECK: V_RNDNE_F32_e32
+define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+entry:
+  %0 = call <2 x float> @llvm.rint.v2f32(<2 x float> %in)
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK: @v4f32
+; R600-CHECK: RNDNE
+; R600-CHECK: RNDNE
+; R600-CHECK: RNDNE
+; R600-CHECK: RNDNE
+; SI-CHECK: @v4f32
+; SI-CHECK: V_RNDNE_F32_e32
+; SI-CHECK: V_RNDNE_F32_e32
+; SI-CHECK: V_RNDNE_F32_e32
+; SI-CHECK: V_RNDNE_F32_e32
+define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+entry:
+  %0 = call <4 x float> @llvm.rint.v4f32(<4 x float> %in)
+  store <4 x float> %0, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare float @llvm.rint.f32(float) #0
+
+; Function Attrs: nounwind readonly
+declare <2 x float> @llvm.rint.v2f32(<2 x float>) #0
+
+; Function Attrs: nounwind readonly
+declare <4 x float> @llvm.rint.v4f32(<4 x float>) #0
+
+attributes #0 = { nounwind readonly }
diff --git a/test/CodeGen/R600/llvm.round.ll b/test/CodeGen/R600/llvm.round.ll
new file mode 100644
index 0000000..e06d45d
--- /dev/null
+++ b/test/CodeGen/R600/llvm.round.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 --check-prefix=FUNC
+
+; FUNC-LABEL: @f32
+; R600: FRACT
+; R600-DAG: ADD
+; R600-DAG: CEIL
+; R600-DAG: FLOOR
+; R600: CNDGE
+define void @f32(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = call float @llvm.round.f32(float %in)
+  store float %0, float addrspace(1)* %out
+  ret void
+}
+
+; The vector tests are really difficult to verify, since it can be hard to
+; predict how the scheduler will order the instructions.  We already have
+; a test for the scalar case, so the vector tests just check that the
+; compiler doesn't crash.
+
+; FUNC-LABEL: v2f32
+; R600: CF_END
+define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+entry:
+  %0 = call <2 x float> @llvm.round.v2f32(<2 x float> %in)
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: v4f32
+; R600: CF_END
+define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+entry:
+  %0 = call <4 x float> @llvm.round.v4f32(<4 x float> %in)
+  store <4 x float> %0, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+declare float @llvm.round.f32(float)
+declare <2 x float> @llvm.round.v2f32(<2 x float>)
+declare <4 x float> @llvm.round.v4f32(<4 x float>)
diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll
index e94c2ba..9eb9983 100644
--- a/test/CodeGen/R600/llvm.sin.ll
+++ b/test/CodeGen/R600/llvm.sin.ll
@@ -5,15 +5,15 @@
 ;CHECK: ADD *
 ;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 
-define void @test() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
+define void @test(<4 x float> inreg %reg0) #0 {
+   %r0 = extractelement <4 x float> %reg0, i32 0
    %r1 = call float @llvm.sin.f32( float %r0)
-   call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+   %vec = insertelement <4 x float> undef, float %r1, i32 0
+   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
    ret void
 }
 
 declare float @llvm.sin.f32(float) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
 
-declare float @llvm.R600.load.input(i32) readnone
-
-declare void @llvm.AMDGPU.store.output(float, i32)
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/llvm.sqrt.ll b/test/CodeGen/R600/llvm.sqrt.ll
new file mode 100644
index 0000000..0d0d186
--- /dev/null
+++ b/test/CodeGen/R600/llvm.sqrt.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -march=r600 --mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 --mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
+
+; R600-CHECK-LABEL: @sqrt_f32
+; R600-CHECK: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].Z
+; R600-CHECK: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[2].Z, PS
+; SI-CHECK-LABEL: @sqrt_f32
+; SI-CHECK: V_SQRT_F32_e32
+define void @sqrt_f32(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = call float @llvm.sqrt.f32(float %in)
+  store float %0, float addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @sqrt_v2f32
+; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].W
+; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[2].W, PS
+; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].X
+; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].X, PS
+; SI-CHECK-LABEL: @sqrt_v2f32
+; SI-CHECK: V_SQRT_F32_e32
+; SI-CHECK: V_SQRT_F32_e32
+define void @sqrt_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+entry:
+  %0 = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @sqrt_v4f32
+; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].Y
+; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].Y, PS
+; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].Z
+; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].Z, PS
+; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].W
+; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].W, PS
+; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[4].X
+; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[4].X, PS
+; SI-CHECK-LABEL: @sqrt_v4f32
+; SI-CHECK: V_SQRT_F32_e32
+; SI-CHECK: V_SQRT_F32_e32
+; SI-CHECK: V_SQRT_F32_e32
+; SI-CHECK: V_SQRT_F32_e32
+define void @sqrt_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+entry:
+  %0 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %in)
+  store <4 x float> %0, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+declare float @llvm.sqrt.f32(float %in)
+declare <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %in)
diff --git a/test/CodeGen/R600/load-input-fold.ll b/test/CodeGen/R600/load-input-fold.ll
index aff2a6e..ca86d0e 100644
--- a/test/CodeGen/R600/load-input-fold.ll
+++ b/test/CodeGen/R600/load-input-fold.ll
@@ -1,20 +1,20 @@
 ;RUN: llc < %s -march=r600 -mcpu=cayman
 ;REQUIRES: asserts
 
-define void @main() #0 {
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 {
 main_body:
-  %0 = call float @llvm.R600.load.input(i32 4)
-  %1 = call float @llvm.R600.load.input(i32 5)
-  %2 = call float @llvm.R600.load.input(i32 6)
-  %3 = call float @llvm.R600.load.input(i32 7)
-  %4 = call float @llvm.R600.load.input(i32 8)
-  %5 = call float @llvm.R600.load.input(i32 9)
-  %6 = call float @llvm.R600.load.input(i32 10)
-  %7 = call float @llvm.R600.load.input(i32 11)
-  %8 = call float @llvm.R600.load.input(i32 12)
-  %9 = call float @llvm.R600.load.input(i32 13)
-  %10 = call float @llvm.R600.load.input(i32 14)
-  %11 = call float @llvm.R600.load.input(i32 15)
+  %0 = extractelement <4 x float> %reg1, i32 0
+  %1 = extractelement <4 x float> %reg1, i32 1
+  %2 = extractelement <4 x float> %reg1, i32 2
+  %3 = extractelement <4 x float> %reg1, i32 3
+  %4 = extractelement <4 x float> %reg2, i32 0
+  %5 = extractelement <4 x float> %reg2, i32 1
+  %6 = extractelement <4 x float> %reg2, i32 2
+  %7 = extractelement <4 x float> %reg2, i32 3
+  %8 = extractelement <4 x float> %reg3, i32 0
+  %9 = extractelement <4 x float> %reg3, i32 1
+  %10 = extractelement <4 x float> %reg3, i32 2
+  %11 = extractelement <4 x float> %reg3, i32 3
   %12 = load <4 x float> addrspace(8)* null
   %13 = extractelement <4 x float> %12, i32 0
   %14 = fmul float %0, %13
@@ -96,9 +96,6 @@ main_body:
 }
 
 ; Function Attrs: readnone
-declare float @llvm.R600.load.input(i32) #1
-
-; Function Attrs: readnone
 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
 
 ; Function Attrs: readonly
diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll
index f478ef5..e4492d7 100644
--- a/test/CodeGen/R600/load.ll
+++ b/test/CodeGen/R600/load.ll
@@ -1,17 +1,17 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s
 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI-CHECK  %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK  %s
 
 ;===------------------------------------------------------------------------===;
 ; GLOBAL ADDRESS SPACE
 ;===------------------------------------------------------------------------===;
 
 ; Load an i8 value from the global address space.
-; R600-CHECK: @load_i8
+; R600-CHECK-LABEL: @load_i8
 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
 
-; SI-CHECK: @load_i8
-; SI-CHECK: BUFFER_LOAD_UBYTE VGPR{{[0-9]+}},
+; SI-CHECK-LABEL: @load_i8
+; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
 define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
   %1 = load i8 addrspace(1)* %in
   %2 = zext i8 %1 to i32
@@ -19,13 +19,13 @@ define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
   ret void
 }
 
-; R600-CHECK: @load_i8_sext
+; R600-CHECK-LABEL: @load_i8_sext
 ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
 ; R600-CHECK: 24
 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
 ; R600-CHECK: 24
-; SI-CHECK: @load_i8_sext
+; SI-CHECK-LABEL: @load_i8_sext
 ; SI-CHECK: BUFFER_LOAD_SBYTE
 define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
 entry:
@@ -35,10 +35,98 @@ entry:
   ret void
 }
 
+; R600-CHECK-LABEL: @load_v2i8
+; R600-CHECK: VTX_READ_8
+; R600-CHECK: VTX_READ_8
+; SI-CHECK-LABEL: @load_v2i8
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
+entry:
+  %0 = load <2 x i8> addrspace(1)* %in
+  %1 = zext <2 x i8> %0 to <2 x i32>
+  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v2i8_sext
+; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
+; R600-CHECK-DAG: 24
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
+; R600-CHECK-DAG: 24
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
+; R600-CHECK-DAG: 24
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
+; R600-CHECK-DAG: 24
+; SI-CHECK-LABEL: @load_v2i8_sext
+; SI-CHECK: BUFFER_LOAD_SBYTE
+; SI-CHECK: BUFFER_LOAD_SBYTE
+define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
+entry:
+  %0 = load <2 x i8> addrspace(1)* %in
+  %1 = sext <2 x i8> %0 to <2 x i32>
+  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v4i8
+; R600-CHECK: VTX_READ_8
+; R600-CHECK: VTX_READ_8
+; R600-CHECK: VTX_READ_8
+; R600-CHECK: VTX_READ_8
+; SI-CHECK-LABEL: @load_v4i8
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
+entry:
+  %0 = load <4 x i8> addrspace(1)* %in
+  %1 = zext <4 x i8> %0 to <4 x i32>
+  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v4i8_sext
+; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; R600-CHECK-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
+; R600-CHECK-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
+; R600-CHECK-DAG: 24
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
+; R600-CHECK-DAG: 24
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
+; R600-CHECK-DAG: 24
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
+; R600-CHECK-DAG: 24
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]]
+; R600-CHECK-DAG: 24
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]]
+; R600-CHECK-DAG: 24
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]]
+; R600-CHECK-DAG: 24
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
+; R600-CHECK-DAG: 24
+; SI-CHECK-LABEL: @load_v4i8_sext
+; SI-CHECK: BUFFER_LOAD_SBYTE
+; SI-CHECK: BUFFER_LOAD_SBYTE
+; SI-CHECK: BUFFER_LOAD_SBYTE
+; SI-CHECK: BUFFER_LOAD_SBYTE
+define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
+entry:
+  %0 = load <4 x i8> addrspace(1)* %in
+  %1 = sext <4 x i8> %0 to <4 x i32>
+  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
 ; Load an i16 value from the global address space.
-; R600-CHECK: @load_i16
+; R600-CHECK-LABEL: @load_i16
 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: @load_i16
+; SI-CHECK-LABEL: @load_i16
 ; SI-CHECK: BUFFER_LOAD_USHORT
 define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
 entry:
@@ -48,13 +136,13 @@ entry:
   ret void
 }
 
-; R600-CHECK: @load_i16_sext
+; R600-CHECK-LABEL: @load_i16_sext
 ; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
 ; R600-CHECK: 16
 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
 ; R600-CHECK: 16
-; SI-CHECK: @load_i16_sext
+; SI-CHECK-LABEL: @load_i16_sext
 ; SI-CHECK: BUFFER_LOAD_SSHORT
 define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
 entry:
@@ -64,12 +152,100 @@ entry:
   ret void
 }
 
+; R600-CHECK-LABEL: @load_v2i16
+; R600-CHECK: VTX_READ_16
+; R600-CHECK: VTX_READ_16
+; SI-CHECK-LABEL: @load_v2i16
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
+entry:
+  %0 = load <2 x i16> addrspace(1)* %in
+  %1 = zext <2 x i16> %0 to <2 x i32>
+  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v2i16_sext
+; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
+; R600-CHECK-DAG: 16
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
+; R600-CHECK-DAG: 16
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
+; R600-CHECK-DAG: 16
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
+; R600-CHECK-DAG: 16
+; SI-CHECK-LABEL: @load_v2i16_sext
+; SI-CHECK: BUFFER_LOAD_SSHORT
+; SI-CHECK: BUFFER_LOAD_SSHORT
+define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
+entry:
+  %0 = load <2 x i16> addrspace(1)* %in
+  %1 = sext <2 x i16> %0 to <2 x i32>
+  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v4i16
+; R600-CHECK: VTX_READ_16
+; R600-CHECK: VTX_READ_16
+; R600-CHECK: VTX_READ_16
+; R600-CHECK: VTX_READ_16
+; SI-CHECK-LABEL: @load_v4i16
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
+entry:
+  %0 = load <4 x i16> addrspace(1)* %in
+  %1 = zext <4 x i16> %0 to <4 x i32>
+  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v4i16_sext
+; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; R600-CHECK-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
+; R600-CHECK-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
+; R600-CHECK-DAG: 16
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
+; R600-CHECK-DAG: 16
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
+; R600-CHECK-DAG: 16
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
+; R600-CHECK-DAG: 16
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]]
+; R600-CHECK-DAG: 16
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]]
+; R600-CHECK-DAG: 16
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]]
+; R600-CHECK-DAG: 16
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
+; R600-CHECK-DAG: 16
+; SI-CHECK-LABEL: @load_v4i16_sext
+; SI-CHECK: BUFFER_LOAD_SSHORT
+; SI-CHECK: BUFFER_LOAD_SSHORT
+; SI-CHECK: BUFFER_LOAD_SSHORT
+; SI-CHECK: BUFFER_LOAD_SSHORT
+define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
+entry:
+  %0 = load <4 x i16> addrspace(1)* %in
+  %1 = sext <4 x i16> %0 to <4 x i32>
+  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
 ; load an i32 value from the global address space.
-; R600-CHECK: @load_i32
+; R600-CHECK-LABEL: @load_i32
 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
 
-; SI-CHECK: @load_i32
-; SI-CHECK: BUFFER_LOAD_DWORD VGPR{{[0-9]+}}
+; SI-CHECK-LABEL: @load_i32
+; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}}
 define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
   %0 = load i32 addrspace(1)* %in
@@ -78,11 +254,11 @@ entry:
 }
 
 ; load a f32 value from the global address space.
-; R600-CHECK: @load_f32
+; R600-CHECK-LABEL: @load_f32
 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
 
-; SI-CHECK: @load_f32
-; SI-CHECK: BUFFER_LOAD_DWORD VGPR{{[0-9]+}}
+; SI-CHECK-LABEL: @load_f32
+; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}}
 define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
 entry:
   %0 = load float addrspace(1)* %in
@@ -91,10 +267,10 @@ entry:
 }
 
 ; load a v2f32 value from the global address space
-; R600-CHECK: @load_v2f32
+; R600-CHECK-LABEL: @load_v2f32
 ; R600-CHECK: VTX_READ_64
 
-; SI-CHECK: @load_v2f32
+; SI-CHECK-LABEL: @load_v2f32
 ; SI-CHECK: BUFFER_LOAD_DWORDX2
 define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
 entry:
@@ -103,11 +279,11 @@ entry:
   ret void
 }
 
-; R600-CHECK: @load_i64
-; R600-CHECK: RAT
-; R600-CHECK: RAT
+; R600-CHECK-LABEL: @load_i64
+; R600-CHECK: MEM_RAT
+; R600-CHECK: MEM_RAT
 
-; SI-CHECK: @load_i64
+; SI-CHECK-LABEL: @load_i64
 ; SI-CHECK: BUFFER_LOAD_DWORDX2
 define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
 entry:
@@ -116,15 +292,13 @@ entry:
   ret void
 }
 
-; R600-CHECK: @load_i64_sext
-; R600-CHECK: RAT
-; R600-CHECK: RAT
+; R600-CHECK-LABEL: @load_i64_sext
+; R600-CHECK: MEM_RAT
+; R600-CHECK: MEM_RAT
 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}},  literal.x
 ; R600-CHECK: 31
-; SI-CHECK: @load_i64_sext
-; SI-CHECK: BUFFER_LOAD_DWORDX2 [[VAL:VGPR[0-9]_VGPR[0-9]]]
-; SI-CHECK: V_LSHL_B64 [[LSHL:VGPR[0-9]_VGPR[0-9]]], [[VAL]], 32
-; SI-CHECK: V_ASHR_I64 VGPR{{[0-9]}}_VGPR{{[0-9]}}, [[LSHL]], 32
+; SI-CHECK-LABEL: @load_i64_sext
+; SI-CHECK: BUFFER_LOAD_DWORDX2 [[VAL:v\[[0-9]:[0-9]\]]]
 
 define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
@@ -134,9 +308,9 @@ entry:
   ret void
 }
 
-; R600-CHECK: @load_i64_zext
-; R600-CHECK: RAT
-; R600-CHECK: RAT
+; R600-CHECK-LABEL: @load_i64_zext
+; R600-CHECK: MEM_RAT
+; R600-CHECK: MEM_RAT
 define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
   %0 = load i32 addrspace(1)* %in
@@ -150,14 +324,14 @@ entry:
 ;===------------------------------------------------------------------------===;
 
 ; Load a sign-extended i8 value
-; R600-CHECK: @load_const_i8_sext
+; R600-CHECK-LABEL: @load_const_i8_sext
 ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
 ; R600-CHECK: 24
 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
 ; R600-CHECK: 24
-; SI-CHECK: @load_const_i8_sext
-; SI-CHECK: BUFFER_LOAD_SBYTE VGPR{{[0-9]+}},
+; SI-CHECK-LABEL: @load_const_i8_sext
+; SI-CHECK: BUFFER_LOAD_SBYTE v{{[0-9]+}},
 define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
 entry:
   %0 = load i8 addrspace(2)* %in
@@ -167,10 +341,10 @@ entry:
 }
 
 ; Load an aligned i8 value
-; R600-CHECK: @load_const_i8_aligned
+; R600-CHECK-LABEL: @load_const_i8_aligned
 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: @load_const_i8_aligned
-; SI-CHECK: BUFFER_LOAD_UBYTE VGPR{{[0-9]+}},
+; SI-CHECK-LABEL: @load_const_i8_aligned
+; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
 define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
 entry:
   %0 = load i8 addrspace(2)* %in
@@ -180,10 +354,10 @@ entry:
 }
 
 ; Load an un-aligned i8 value
-; R600-CHECK: @load_const_i8_unaligned
+; R600-CHECK-LABEL: @load_const_i8_unaligned
 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: @load_const_i8_unaligned
-; SI-CHECK: BUFFER_LOAD_UBYTE VGPR{{[0-9]+}},
+; SI-CHECK-LABEL: @load_const_i8_unaligned
+; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
 define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
 entry:
   %0 = getelementptr i8 addrspace(2)* %in, i32 1
@@ -194,13 +368,13 @@ entry:
 }
 
 ; Load a sign-extended i16 value
-; R600-CHECK: @load_const_i16_sext
+; R600-CHECK-LABEL: @load_const_i16_sext
 ; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
 ; R600-CHECK: 16
 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
 ; R600-CHECK: 16
-; SI-CHECK: @load_const_i16_sext
+; SI-CHECK-LABEL: @load_const_i16_sext
 ; SI-CHECK: BUFFER_LOAD_SSHORT
 define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
 entry:
@@ -211,9 +385,9 @@ entry:
 }
 
 ; Load an aligned i16 value
-; R600-CHECK: @load_const_i16_aligned
+; R600-CHECK-LABEL: @load_const_i16_aligned
 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: @load_const_i16_aligned
+; SI-CHECK-LABEL: @load_const_i16_aligned
 ; SI-CHECK: BUFFER_LOAD_USHORT
 define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
 entry:
@@ -224,9 +398,9 @@ entry:
 }
 
 ; Load an un-aligned i16 value
-; R600-CHECK: @load_const_i16_unaligned
+; R600-CHECK-LABEL: @load_const_i16_unaligned
 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: @load_const_i16_unaligned
+; SI-CHECK-LABEL: @load_const_i16_unaligned
 ; SI-CHECK: BUFFER_LOAD_USHORT
 define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
 entry:
@@ -238,11 +412,11 @@ entry:
 }
 
 ; Load an i32 value from the constant address space.
-; R600-CHECK: @load_const_addrspace_i32
+; R600-CHECK-LABEL: @load_const_addrspace_i32
 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
 
-; SI-CHECK: @load_const_addrspace_i32
-; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]+}}
+; SI-CHECK-LABEL: @load_const_addrspace_i32
+; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}}
 define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
 entry:
   %0 = load i32 addrspace(2)* %in
@@ -251,14 +425,259 @@ entry:
 }
 
 ; Load a f32 value from the constant address space.
-; R600-CHECK: @load_const_addrspace_f32
+; R600-CHECK-LABEL: @load_const_addrspace_f32
 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
 
-; SI-CHECK: @load_const_addrspace_f32
-; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]+}}
+; SI-CHECK-LABEL: @load_const_addrspace_f32
+; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}}
 define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
   %1 = load float addrspace(2)* %in
   store float %1, float addrspace(1)* %out
   ret void
 }
 
+;===------------------------------------------------------------------------===;
+; LOCAL ADDRESS SPACE
+;===------------------------------------------------------------------------===;
+
+; Load an i8 value from the local address space.
+; R600-CHECK-LABEL: @load_i8_local
+; R600-CHECK: LDS_UBYTE_READ_RET
+; SI-CHECK-LABEL: @load_i8_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_U8
+define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
+  %1 = load i8 addrspace(3)* %in
+  %2 = zext i8 %1 to i32
+  store i32 %2, i32 addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_i8_sext_local
+; R600-CHECK: LDS_UBYTE_READ_RET
+; R600-CHECK: ASHR
+; SI-CHECK-LABEL: @load_i8_sext_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_I8
+define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
+entry:
+  %0 = load i8 addrspace(3)* %in
+  %1 = sext i8 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v2i8_local
+; R600-CHECK: LDS_UBYTE_READ_RET
+; R600-CHECK: LDS_UBYTE_READ_RET
+; SI-CHECK-LABEL: @load_v2i8_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_U8
+; SI-CHECK: DS_READ_U8
+define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
+entry:
+  %0 = load <2 x i8> addrspace(3)* %in
+  %1 = zext <2 x i8> %0 to <2 x i32>
+  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v2i8_sext_local
+; R600-CHECK-DAG: LDS_UBYTE_READ_RET
+; R600-CHECK-DAG: LDS_UBYTE_READ_RET
+; R600-CHECK-DAG: ASHR
+; R600-CHECK-DAG: ASHR
+; SI-CHECK-LABEL: @load_v2i8_sext_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_I8
+; SI-CHECK: DS_READ_I8
+define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
+entry:
+  %0 = load <2 x i8> addrspace(3)* %in
+  %1 = sext <2 x i8> %0 to <2 x i32>
+  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v4i8_local
+; R600-CHECK: LDS_UBYTE_READ_RET
+; R600-CHECK: LDS_UBYTE_READ_RET
+; R600-CHECK: LDS_UBYTE_READ_RET
+; R600-CHECK: LDS_UBYTE_READ_RET
+; SI-CHECK-LABEL: @load_v4i8_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_U8
+; SI-CHECK: DS_READ_U8
+; SI-CHECK: DS_READ_U8
+; SI-CHECK: DS_READ_U8
+define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
+entry:
+  %0 = load <4 x i8> addrspace(3)* %in
+  %1 = zext <4 x i8> %0 to <4 x i32>
+  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v4i8_sext_local
+; R600-CHECK-DAG: LDS_UBYTE_READ_RET
+; R600-CHECK-DAG: LDS_UBYTE_READ_RET
+; R600-CHECK-DAG: LDS_UBYTE_READ_RET
+; R600-CHECK-DAG: LDS_UBYTE_READ_RET
+; R600-CHECK-DAG: ASHR
+; R600-CHECK-DAG: ASHR
+; R600-CHECK-DAG: ASHR
+; R600-CHECK-DAG: ASHR
+; SI-CHECK-LABEL: @load_v4i8_sext_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_I8
+; SI-CHECK: DS_READ_I8
+; SI-CHECK: DS_READ_I8
+; SI-CHECK: DS_READ_I8
+define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
+entry:
+  %0 = load <4 x i8> addrspace(3)* %in
+  %1 = sext <4 x i8> %0 to <4 x i32>
+  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; Load an i16 value from the local address space.
+; R600-CHECK-LABEL: @load_i16_local
+; R600-CHECK: LDS_USHORT_READ_RET
+; SI-CHECK-LABEL: @load_i16_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_U16
+define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
+entry:
+  %0 = load i16	 addrspace(3)* %in
+  %1 = zext i16 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_i16_sext_local
+; R600-CHECK: LDS_USHORT_READ_RET
+; R600-CHECK: ASHR
+; SI-CHECK-LABEL: @load_i16_sext_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_I16
+define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
+entry:
+  %0 = load i16 addrspace(3)* %in
+  %1 = sext i16 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v2i16_local
+; R600-CHECK: LDS_USHORT_READ_RET
+; R600-CHECK: LDS_USHORT_READ_RET
+; SI-CHECK-LABEL: @load_v2i16_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_U16
+; SI-CHECK: DS_READ_U16
+define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
+entry:
+  %0 = load <2 x i16> addrspace(3)* %in
+  %1 = zext <2 x i16> %0 to <2 x i32>
+  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v2i16_sext_local
+; R600-CHECK-DAG: LDS_USHORT_READ_RET
+; R600-CHECK-DAG: LDS_USHORT_READ_RET
+; R600-CHECK-DAG: ASHR
+; R600-CHECK-DAG: ASHR
+; SI-CHECK-LABEL: @load_v2i16_sext_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_I16
+; SI-CHECK: DS_READ_I16
+define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
+entry:
+  %0 = load <2 x i16> addrspace(3)* %in
+  %1 = sext <2 x i16> %0 to <2 x i32>
+  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v4i16_local
+; R600-CHECK: LDS_USHORT_READ_RET
+; R600-CHECK: LDS_USHORT_READ_RET
+; R600-CHECK: LDS_USHORT_READ_RET
+; R600-CHECK: LDS_USHORT_READ_RET
+; SI-CHECK-LABEL: @load_v4i16_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_U16
+; SI-CHECK: DS_READ_U16
+; SI-CHECK: DS_READ_U16
+; SI-CHECK: DS_READ_U16
+define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
+entry:
+  %0 = load <4 x i16> addrspace(3)* %in
+  %1 = zext <4 x i16> %0 to <4 x i32>
+  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v4i16_sext_local
+; R600-CHECK-DAG: LDS_USHORT_READ_RET
+; R600-CHECK-DAG: LDS_USHORT_READ_RET
+; R600-CHECK-DAG: LDS_USHORT_READ_RET
+; R600-CHECK-DAG: LDS_USHORT_READ_RET
+; R600-CHECK-DAG: ASHR
+; R600-CHECK-DAG: ASHR
+; R600-CHECK-DAG: ASHR
+; R600-CHECK-DAG: ASHR
+; SI-CHECK-LABEL: @load_v4i16_sext_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_I16
+; SI-CHECK: DS_READ_I16
+; SI-CHECK: DS_READ_I16
+; SI-CHECK: DS_READ_I16
+define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
+entry:
+  %0 = load <4 x i16> addrspace(3)* %in
+  %1 = sext <4 x i16> %0 to <4 x i32>
+  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; load an i32 value from the glocal address space.
+; R600-CHECK-LABEL: @load_i32_local
+; R600-CHECK: LDS_READ_RET
+; SI-CHECK-LABEL: @load_i32_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_B32
+define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
+entry:
+  %0 = load i32 addrspace(3)* %in
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; load a f32 value from the global address space.
+; R600-CHECK-LABEL: @load_f32_local
+; R600-CHECK: LDS_READ_RET
+; SI-CHECK-LABEL: @load_f32_local
+; SI-CHECK: DS_READ_B32
+define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
+entry:
+  %0 = load float addrspace(3)* %in
+  store float %0, float addrspace(1)* %out
+  ret void
+}
+
+; load a v2f32 value from the local address space
+; R600-CHECK-LABEL: @load_v2f32_local
+; R600-CHECK: LDS_READ_RET
+; R600-CHECK: LDS_READ_RET
+; SI-CHECK-LABEL: @load_v2f32_local
+; SI-CHECK: DS_READ_B32
+; SI-CHECK: DS_READ_B32
+define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
+entry:
+  %0 = load <2 x float> addrspace(3)* %in
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/load.vec.ll b/test/CodeGen/R600/load.vec.ll
index 8cba0b6..81a6310 100644
--- a/test/CodeGen/R600/load.vec.ll
+++ b/test/CodeGen/R600/load.vec.ll
@@ -1,11 +1,11 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK  %s
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI-CHECK  %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK  %s
 
 ; load a v2i32 value from the global address space.
 ; EG-CHECK: @load_v2i32
 ; EG-CHECK: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0
 ; SI-CHECK: @load_v2i32
-; SI-CHECK: BUFFER_LOAD_DWORDX2 VGPR{{[0-9]+}}
+; SI-CHECK: BUFFER_LOAD_DWORDX2 v[{{[0-9]+:[0-9]+}}]
 define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %a = load <2 x i32> addrspace(1) * %in
   store <2 x i32> %a, <2 x i32> addrspace(1)* %out
@@ -16,7 +16,7 @@ define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i
 ; EG-CHECK: @load_v4i32
 ; EG-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0
 ; SI-CHECK: @load_v4i32
-; SI-CHECK: BUFFER_LOAD_DWORDX4 VGPR{{[0-9]+}}
+; SI-CHECK: BUFFER_LOAD_DWORDX4 v[{{[0-9]+:[0-9]+}}]
 define void @load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %a = load <4 x i32> addrspace(1) * %in
   store <4 x i32> %a, <4 x i32> addrspace(1)* %out
diff --git a/test/CodeGen/R600/load64.ll b/test/CodeGen/R600/load64.ll
index 3b4a8f8..e351e41 100644
--- a/test/CodeGen/R600/load64.ll
+++ b/test/CodeGen/R600/load64.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
 
 ; load a f64 value from the global address space.
 ; CHECK: @load_f64
-; CHECK: BUFFER_LOAD_DWORDX2 VGPR{{[0-9]+}}
+; CHECK: BUFFER_LOAD_DWORDX2 v[{{[0-9]+:[0-9]+}}]
 define void @load_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
 entry:
   %0 = load double addrspace(1)* %in
@@ -12,7 +12,7 @@ entry:
 
 ; Load a f64 value from the constant address space.
 ; CHECK: @load_const_addrspace_f64
-; CHECK: S_LOAD_DWORDX2 SGPR{{[0-9]+}}
+; CHECK: S_LOAD_DWORDX2 s[{{[0-9]+:[0-9]+}}]
 define void @load_const_addrspace_f64(double addrspace(1)* %out, double addrspace(2)* %in) {
   %1 = load double addrspace(2)* %in
   store double %1, double addrspace(1)* %out
diff --git a/test/CodeGen/R600/local-memory-two-objects.ll b/test/CodeGen/R600/local-memory-two-objects.ll
index 6d3610e..e2d8406 100644
--- a/test/CodeGen/R600/local-memory-two-objects.ll
+++ b/test/CodeGen/R600/local-memory-two-objects.ll
@@ -1,27 +1,34 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-; TODO: Add RUN and CHECK lines for SI once this test works there
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
 @local_memory_two_objects.local_mem0 = internal addrspace(3) unnamed_addr global [4 x i32] zeroinitializer, align 4
 @local_memory_two_objects.local_mem1 = internal addrspace(3) unnamed_addr global [4 x i32] zeroinitializer, align 4
 
-; CHECK: @local_memory_two_objects
+; EG-CHECK: @local_memory_two_objects
 
 ; Check that the LDS size emitted correctly
-; CHECK: .long 166120
-; CHECK-NEXT: .long 8
-
-; Make sure the lds writes are using different addresses.
-; CHECK: LDS_WRITE {{[*]*}} {{PV|T}}[[ADDRW:[0-9]*\.[XYZW]]]
-; CHECK-NOT: LDS_WRITE {{[*]*}} T[[ADDRW]]
+; EG-CHECK: .long 166120
+; EG-CHECK-NEXT: .long 8
+; SI-CHECK: .long 47180
+; SI-CHECK-NEXT: .long 32768
+
+; We would like to check the the lds writes are using different
+; addresses, but due to variations in the scheduler, we can't do
+; this consistently on evergreen GPUs.
+; EG-CHECK: LDS_WRITE
+; EG-CHECK: LDS_WRITE
+; SI-CHECK: DS_WRITE_B32 0, {{v[0-9]*}}, v[[ADDRW:[0-9]*]]
+; SI-CHECK-NOT: DS_WRITE_B32 0, {{v[0-9]*}}, v[[ADDRW]]
 
 ; GROUP_BARRIER must be the last instruction in a clause
-; CHECK: GROUP_BARRIER
-; CHECK-NEXT: ALU clause
+; EG-CHECK: GROUP_BARRIER
+; EG-CHECK-NEXT: ALU clause
 
 ; Make sure the lds reads are using different addresses.
-; CHECK: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
-; CHECK-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
+; EG-CHECK: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
+; EG-CHECK-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
+; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, 0, [[ADDRR:v[0-9]+]]
+; SI-CHECK-NOT: DS_READ_B32 {{v[0-9]+}}, 0, [[ADDRR]]
 
 define void @local_memory_two_objects(i32 addrspace(1)* %out) {
 entry:
diff --git a/test/CodeGen/R600/local-memory.ll b/test/CodeGen/R600/local-memory.ll
index 5458fb9..2168a3d 100644
--- a/test/CodeGen/R600/local-memory.ll
+++ b/test/CodeGen/R600/local-memory.ll
@@ -1,19 +1,24 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=CI-CHECK %s
 
-@local_memory.local_mem = internal addrspace(3) unnamed_addr global [16 x i32] zeroinitializer, align 4
+@local_memory.local_mem = internal addrspace(3) unnamed_addr global [128 x i32] zeroinitializer, align 4
 
-; EG-CHECK: @local_memory
-; SI-CHECK: @local_memory
+; EG-CHECK-LABEL: @local_memory
+; SI-CHECK-LABEL: @local_memory
+; CI-CHECK-LABEL: @local_memory
 
 ; Check that the LDS size emitted correctly
 ; EG-CHECK: .long 166120
-; EG-CHECK-NEXT: .long 16
+; EG-CHECK-NEXT: .long 128
 ; SI-CHECK: .long 47180
-; SI-CHECK-NEXT: .long 32768
+; SI-CHECK-NEXT: .long 65536
+; CI-CHECK: .long 47180
+; CI-CHECK-NEXT: .long 32768
 
 ; EG-CHECK: LDS_WRITE
-; SI-CHECK: DS_WRITE_B32
+; SI-CHECK_NOT: S_WQM_B64
+; SI-CHECK: DS_WRITE_B32 0
 
 ; GROUP_BARRIER must be the last instruction in a clause
 ; EG-CHECK: GROUP_BARRIER
@@ -21,18 +26,18 @@
 ; SI-CHECK: S_BARRIER
 
 ; EG-CHECK: LDS_READ_RET
-; SI-CHECK: DS_READ_B32
+; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, 0
 
 define void @local_memory(i32 addrspace(1)* %out) {
 entry:
   %y.i = call i32 @llvm.r600.read.tidig.x() #0
-  %arrayidx = getelementptr inbounds [16 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %y.i
+  %arrayidx = getelementptr inbounds [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %y.i
   store i32 %y.i, i32 addrspace(3)* %arrayidx, align 4
   %add = add nsw i32 %y.i, 1
   %cmp = icmp eq i32 %add, 16
   %.add = select i1 %cmp, i32 0, i32 %add
   call void @llvm.AMDGPU.barrier.local()
-  %arrayidx1 = getelementptr inbounds [16 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %.add
+  %arrayidx1 = getelementptr inbounds [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %.add
   %0 = load i32 addrspace(3)* %arrayidx1, align 4
   %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %out, i32 %y.i
   store i32 %0, i32 addrspace(1)* %arrayidx2, align 4
diff --git a/test/CodeGen/R600/lshl.ll b/test/CodeGen/R600/lshl.ll
index 806e681..2162839 100644
--- a/test/CodeGen/R600/lshl.ll
+++ b/test/CodeGen/R600/lshl.ll
@@ -1,6 +1,6 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK: V_LSHL_B32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 1
+;CHECK: S_LSHL_B32 s{{[0-9]}}, s{{[0-9]}}, 1
 
 define void @test(i32 %p) {
    %i = mul i32 %p, 2
diff --git a/test/CodeGen/R600/lshr.ll b/test/CodeGen/R600/lshr.ll
index cfbcc34..886d1c4 100644
--- a/test/CodeGen/R600/lshr.ll
+++ b/test/CodeGen/R600/lshr.ll
@@ -1,6 +1,6 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK: V_LSHR_B32_e64 {{VGPR[0-9]}}, SGPR{{[0-9]}}, 1
+;CHECK: S_LSHR_B32 s{{[0-9]}}, s{{[0-9]}}, 1
 
 define void @test(i32 %p) {
    %i = udiv i32 %p, 2
diff --git a/test/CodeGen/R600/mad_int24.ll b/test/CodeGen/R600/mad_int24.ll
index ce42ae7..df063ec 100644
--- a/test/CodeGen/R600/mad_int24.ll
+++ b/test/CodeGen/R600/mad_int24.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
 ; EG-CHECK: @i32_mad24
 ; Signed 24-bit multiply is not supported on pre-Cayman GPUs.
diff --git a/test/CodeGen/R600/mad_uint24.ll b/test/CodeGen/R600/mad_uint24.ll
index 00aa64a..66a070e 100644
--- a/test/CodeGen/R600/mad_uint24.ll
+++ b/test/CodeGen/R600/mad_uint24.ll
@@ -1,10 +1,10 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
-; EG-CHECK: @u32_mad24
+; EG-CHECK-LABEL: @u32_mad24
 ; EG-CHECK: MULADD_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W, KC0[3].X
-; SI-CHECK: @u32_mad24
+; SI-CHECK-LABEL: @u32_mad24
 ; SI-CHECK: V_MAD_U32_U24
 
 define void @u32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
@@ -19,7 +19,7 @@ entry:
   ret void
 }
 
-; EG-CHECK: @i16_mad24
+; EG-CHECK-LABEL: @i16_mad24
 ; EG-CHECK-DAG: VTX_READ_16 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
 ; EG-CHECK-DAG: VTX_READ_16 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
 ; EG-CHECK-DAG: VTX_READ_16 [[C:T[0-9]\.X]], T{{[0-9]}}.X, 48
@@ -30,10 +30,10 @@ entry:
 ; EG-CHECK: 16
 ; EG-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]], literal.x
 ; EG-CHECK: 16
-; SI-CHECK: @i16_mad24
-; SI-CHECK: V_MAD_U32_U24 [[MAD:VGPR[0-9]]], {{[SV]GPR[0-9], [SV]GPR[0-9]}}
-; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:VGPR[0-9]]], 16, [[MAD]]
-; SI-CHECK: V_ASHRREV_I32_e32 VGPR{{[0-9]}}, 16, [[LSHL]]
+; SI-CHECK-LABEL: @i16_mad24
+; SI-CHECK: V_MAD_U32_U24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:v[0-9]]], 16, [[MAD]]
+; SI-CHECK: V_ASHRREV_I32_e32 v{{[0-9]}}, 16, [[LSHL]]
 
 define void @i16_mad24(i32 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) {
 entry:
@@ -44,7 +44,7 @@ entry:
   ret void
 }
 
-; EG-CHECK: @i8_mad24
+; EG-CHECK-LABEL: @i8_mad24
 ; EG-CHECK-DAG: VTX_READ_8 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
 ; EG-CHECK-DAG: VTX_READ_8 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
 ; EG-CHECK-DAG: VTX_READ_8 [[C:T[0-9]\.X]], T{{[0-9]}}.X, 48
@@ -55,10 +55,10 @@ entry:
 ; EG-CHECK: 24
 ; EG-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]], literal.x
 ; EG-CHECK: 24
-; SI-CHECK: @i8_mad24
-; SI-CHECK: V_MAD_U32_U24 [[MUL:VGPR[0-9]]], {{[SV]GPR[0-9], [SV]GPR[0-9]}}
-; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:VGPR[0-9]]], 24, [[MUL]]
-; SI-CHECK: V_ASHRREV_I32_e32 VGPR{{[0-9]}}, 24, [[LSHL]]
+; SI-CHECK-LABEL: @i8_mad24
+; SI-CHECK: V_MAD_U32_U24 [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:v[0-9]]], 24, [[MUL]]
+; SI-CHECK: V_ASHRREV_I32_e32 v{{[0-9]}}, 24, [[LSHL]]
 
 define void @i8_mad24(i32 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) {
 entry:
diff --git a/test/CodeGen/R600/max-literals.ll b/test/CodeGen/R600/max-literals.ll
index c31b7c0..65a6d2b 100644
--- a/test/CodeGen/R600/max-literals.ll
+++ b/test/CodeGen/R600/max-literals.ll
@@ -3,13 +3,13 @@
 ; CHECK: @main
 ; CHECK: ADD *
 
-define void @main() #0 {
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
 main_body:
-  %0 = call float @llvm.R600.load.input(i32 4)
-  %1 = call float @llvm.R600.load.input(i32 5)
-  %2 = call float @llvm.R600.load.input(i32 6)
-  %3 = call float @llvm.R600.load.input(i32 7)
-  %4 = call float @llvm.R600.load.input(i32 8)
+  %0 = extractelement <4 x float> %reg1, i32 0
+  %1 = extractelement <4 x float> %reg1, i32 1
+  %2 = extractelement <4 x float> %reg1, i32 2
+  %3 = extractelement <4 x float> %reg1, i32 3
+  %4 = extractelement <4 x float> %reg2, i32 0
   %5 = fadd float %0, 2.0
   %6 = fadd float %1, 3.0
   %7 = fadd float %2, 4.0
@@ -32,13 +32,13 @@ main_body:
 ; CHECK: @main
 ; CHECK-NOT: ADD *
 
-define void @main2() #0 {
+define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
 main_body:
-  %0 = call float @llvm.R600.load.input(i32 4)
-  %1 = call float @llvm.R600.load.input(i32 5)
-  %2 = call float @llvm.R600.load.input(i32 6)
-  %3 = call float @llvm.R600.load.input(i32 7)
-  %4 = call float @llvm.R600.load.input(i32 8)
+  %0 = extractelement <4 x float> %reg1, i32 0
+  %1 = extractelement <4 x float> %reg1, i32 1
+  %2 = extractelement <4 x float> %reg1, i32 2
+  %3 = extractelement <4 x float> %reg1, i32 3
+  %4 = extractelement <4 x float> %reg2, i32 0
   %5 = fadd float %0, 2.0
   %6 = fadd float %1, 3.0
   %7 = fadd float %2, 4.0
@@ -59,7 +59,6 @@ main_body:
 }
 
 ; Function Attrs: readnone
-declare float @llvm.R600.load.input(i32) #1
 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
 
 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
diff --git a/test/CodeGen/R600/mul.ll b/test/CodeGen/R600/mul.ll
index 18a17b6..8c27e28 100644
--- a/test/CodeGen/R600/mul.ll
+++ b/test/CodeGen/R600/mul.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
 ; mul24 and mad24 are affected
 
@@ -8,8 +8,8 @@
 ;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 ;SI-CHECK: @test2
-;SI-CHECK: V_MUL_LO_I32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_MUL_LO_I32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
+;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -27,10 +27,10 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
 ;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 ;SI-CHECK: @test4
-;SI-CHECK: V_MUL_LO_I32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_MUL_LO_I32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_MUL_LO_I32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_MUL_LO_I32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
+;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
diff --git a/test/CodeGen/R600/mul_int24.ll b/test/CodeGen/R600/mul_int24.ll
index 16ae760..66a1a9e 100644
--- a/test/CodeGen/R600/mul_int24.ll
+++ b/test/CodeGen/R600/mul_int24.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
 ; EG-CHECK: @i32_mul24
 ; Signed 24-bit multiply is not supported on pre-Cayman GPUs.
diff --git a/test/CodeGen/R600/mul_uint24.ll b/test/CodeGen/R600/mul_uint24.ll
index b1a7f94..6e6d549 100644
--- a/test/CodeGen/R600/mul_uint24.ll
+++ b/test/CodeGen/R600/mul_uint24.ll
@@ -1,10 +1,10 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
-; EG-CHECK: @u32_mul24
+; EG-CHECK-LABEL: @u32_mul24
 ; EG-CHECK: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W
-; SI-CHECK: @u32_mul24
+; SI-CHECK-LABEL: @u32_mul24
 ; SI-CHECK: V_MUL_U32_U24
 
 define void @u32_mul24(i32 addrspace(1)* %out, i32 %a, i32 %b) {
@@ -18,7 +18,7 @@ entry:
   ret void
 }
 
-; EG-CHECK: @i16_mul24
+; EG-CHECK-LABEL: @i16_mul24
 ; EG-CHECK-DAG: VTX_READ_16 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
 ; EG-CHECK-DAG: VTX_READ_16 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
 ; The order of A and B does not matter.
@@ -28,10 +28,10 @@ entry:
 ; EG-CHECK: 16
 ; EG-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]], literal.x
 ; EG-CHECK: 16
-; SI-CHECK: @i16_mul24
-; SI-CHECK: V_MUL_U32_U24_e{{(32|64)}} [[MUL:VGPR[0-9]]], {{[SV]GPR[0-9], [SV]GPR[0-9]}}
-; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:VGPR[0-9]]], 16, [[MUL]]
-; SI-CHECK: V_ASHRREV_I32_e32 VGPR{{[0-9]}}, 16, [[LSHL]]
+; SI-CHECK-LABEL: @i16_mul24
+; SI-CHECK: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:v[0-9]]], 16, [[MUL]]
+; SI-CHECK: V_ASHRREV_I32_e32 v{{[0-9]}}, 16, [[LSHL]]
 
 define void @i16_mul24(i32 addrspace(1)* %out, i16 %a, i16 %b) {
 entry:
@@ -41,7 +41,7 @@ entry:
   ret void
 }
 
-; EG-CHECK: @i8_mul24
+; EG-CHECK-LABEL: @i8_mul24
 ; EG-CHECK-DAG: VTX_READ_8 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
 ; EG-CHECK-DAG: VTX_READ_8 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
 ; The order of A and B does not matter.
@@ -51,10 +51,10 @@ entry:
 ; EG-CHECK: 24
 ; EG-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]], literal.x
 ; EG-CHECK: 24
-; SI-CHECK: @i8_mul24
-; SI-CHECK: V_MUL_U32_U24_e{{(32|64)}} [[MUL:VGPR[0-9]]], {{[SV]GPR[0-9], [SV]GPR[0-9]}}
-; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:VGPR[0-9]]], 24, [[MUL]]
-; SI-CHECK: V_ASHRREV_I32_e32 VGPR{{[0-9]}}, 24, [[LSHL]]
+; SI-CHECK-LABEL: @i8_mul24
+; SI-CHECK: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:v[0-9]]], 24, [[MUL]]
+; SI-CHECK: V_ASHRREV_I32_e32 v{{[0-9]}}, 24, [[LSHL]]
 
 define void @i8_mul24(i32 addrspace(1)* %out, i8 %a, i8 %b) {
 entry:
diff --git a/test/CodeGen/R600/mulhu.ll b/test/CodeGen/R600/mulhu.ll
index eb379d1..d5fc014 100644
--- a/test/CodeGen/R600/mulhu.ll
+++ b/test/CodeGen/R600/mulhu.ll
@@ -1,8 +1,8 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK: V_MOV_B32_e32 VGPR{{[0-9]+}}, -1431655765
-;CHECK: V_MUL_HI_U32 VGPR0, {{[SV]GPR[0-9]+}}, {{VGPR[0-9]+}}
-;CHECK-NEXT: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0
+;CHECK: V_MOV_B32_e32 v{{[0-9]+}}, -1431655765
+;CHECK: V_MUL_HI_U32 v0, {{[sv][0-9]+}}, {{v[0-9]+}}
+;CHECK-NEXT: V_LSHRREV_B32_e32 v0, 1, v0
 
 define void @test(i32 %p) {
    %i = udiv i32 %p, 3
diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll
index 4a4e892..35d23b3 100644
--- a/test/CodeGen/R600/or.ll
+++ b/test/CodeGen/R600/or.ll
@@ -1,13 +1,13 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
-; EG-CHECK: @or_v2i32
+; EG-CHECK-LABEL: @or_v2i32
 ; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK: @or_v2i32
-;SI-CHECK: V_OR_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_OR_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
+;SI-CHECK-LABEL: @or_v2i32
+;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -18,17 +18,17 @@ define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in)
   ret void
 }
 
-; EG-CHECK: @or_v4i32
+; EG-CHECK-LABEL: @or_v4i32
 ; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK: @or_v4i32
-;SI-CHECK: V_OR_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_OR_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_OR_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_OR_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
+;SI-CHECK-LABEL: @or_v4i32
+;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
@@ -38,3 +38,16 @@ define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in)
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
 }
+
+; EG-CHECK-LABEL: @or_i64
+; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
+; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
+; SI-CHECK-LABEL: @or_i64
+; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
+; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
+define void @or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+	%0 = or i64 %a, %b
+	store i64 %0, i64 addrspace(1)* %out
+	ret void
+}
diff --git a/test/CodeGen/R600/predicate-dp4.ll b/test/CodeGen/R600/predicate-dp4.ll
new file mode 100644
index 0000000..e48d6a7
--- /dev/null
+++ b/test/CodeGen/R600/predicate-dp4.ll
@@ -0,0 +1,27 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman
+
+; CHECK-LABEL: @main
+; CHECK: PRED_SETE_INT * Pred,
+; CHECK: DOT4 T{{[0-9]+}}.X, T0.X, T0.X, Pred_sel_one
+define void @main(<4 x float> inreg) #0 {
+main_body:
+  %1 = extractelement <4 x float> %0, i32 0
+  %2 = bitcast float %1 to i32
+  %3 = icmp eq i32 %2, 0
+  br i1 %3, label %IF, label %ENDIF
+
+IF:                                             ; preds = %main_body
+  %4 = call float @llvm.AMDGPU.dp4(<4 x float> %0, <4 x float> %0)
+  br label %ENDIF
+
+ENDIF:                                            ; preds = %IF, %main_body
+  %5 = phi float [%4, %IF], [0.000000e+00, %main_body]
+  %6 = insertelement <4 x float> undef, float %5, i32 0
+  call void @llvm.R600.store.swizzle(<4 x float> %6, i32 0, i32 0)
+  ret void
+}
+
+declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+attributes #1 = { readnone }
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/predicates.ll b/test/CodeGen/R600/predicates.ll
index 0d3eeef..902508f 100644
--- a/test/CodeGen/R600/predicates.ll
+++ b/test/CodeGen/R600/predicates.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mattr=disable-irstructurizer -mcpu=redwood | FileCheck %s
 
 ; These tests make sure the compiler is optimizing branches using predicates
 ; when it is legal to do so.
diff --git a/test/CodeGen/R600/indirect-addressing.ll b/test/CodeGen/R600/private-memory.ll
index bd72cd9..48a013c 100644
--- a/test/CodeGen/R600/indirect-addressing.ll
+++ b/test/CodeGen/R600/private-memory.ll
@@ -1,16 +1,24 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
 
 ; This test checks that uses and defs of the AR register happen in the same
 ; instruction clause.
 
-; CHECK: @mova_same_clause
-; CHECK: MOVA_INT
-; CHECK-NOT: ALU clause
-; CHECK: 0 + AR.x
-; CHECK: MOVA_INT
-; CHECK-NOT: ALU clause
-; CHECK: 0 + AR.x
+; R600-CHECK-LABEL: @mova_same_clause
+; R600-CHECK: MOVA_INT
+; R600-CHECK-NOT: ALU clause
+; R600-CHECK: 0 + AR.x
+; R600-CHECK: MOVA_INT
+; R600-CHECK-NOT: ALU clause
+; R600-CHECK: 0 + AR.x
 
+; SI-CHECK-LABEL: @mova_same_clause
+; SI-CHECK: V_READFIRSTLANE
+; SI-CHECK: V_MOVRELD
+; SI-CHECK: S_CBRANCH
+; SI-CHECK: V_READFIRSTLANE
+; SI-CHECK: V_MOVRELD
+; SI-CHECK: S_CBRANCH
 define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
 entry:
   %stack = alloca [5 x i32], align 4
@@ -38,9 +46,10 @@ entry:
 ; XXX: This generated code has unnecessary MOVs, we should be able to optimize
 ; this.
 
-; CHECK: @multiple_structs
-; CHECK-NOT: MOVA_INT
-
+; R600-CHECK-LABEL: @multiple_structs
+; R600-CHECK-NOT: MOVA_INT
+; SI-CHECK-LABEL: @multiple_structs
+; SI-CHECK-NOT: V_MOVREL
 %struct.point = type { i32, i32 }
 
 define void @multiple_structs(i32 addrspace(1)* %out) {
@@ -63,3 +72,44 @@ entry:
   store i32 %0, i32 addrspace(1)* %out
   ret void
 }
+
+; Test direct access of a private array inside a loop.  The private array
+; loads and stores should be lowered to copies, so there shouldn't be any
+; MOVA instructions.
+
+; R600-CHECK-LABLE: @direct_loop
+; R600-CHECK-NOT: MOVA_INT
+; SI-CHECK-LABEL: @direct_loop
+; SI-CHECK-NOT: V_MOVREL
+
+define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+  %prv_array_const = alloca [2 x i32]
+  %prv_array = alloca [2 x i32]
+  %a = load i32 addrspace(1)* %in
+  %b_src_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %b = load i32 addrspace(1)* %b_src_ptr
+  %a_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
+  store i32 %a, i32* %a_dst_ptr
+  %b_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 1
+  store i32 %b, i32* %b_dst_ptr
+  br label %for.body
+
+for.body:
+  %inc = phi i32 [0, %entry], [%count, %for.body]
+  %x_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
+  %x = load i32* %x_ptr
+  %y_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
+  %y = load i32* %y_ptr
+  %xy = add i32 %x, %y
+  store i32 %xy, i32* %y_ptr
+  %count = add i32 %inc, 1
+  %done = icmp eq i32 %count, 4095
+  br i1 %done, label %for.end, label %for.body
+
+for.end:
+  %value_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
+  %value = load i32* %value_ptr
+  store i32 %value, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/pv-packing.ll b/test/CodeGen/R600/pv-packing.ll
index 03fc204..e5615b9 100644
--- a/test/CodeGen/R600/pv-packing.ll
+++ b/test/CodeGen/R600/pv-packing.ll
@@ -3,17 +3,17 @@
 ;CHECK: DOT4  T{{[0-9]\.X}}
 ;CHECK: MULADD_IEEE * T{{[0-9]\.W}}
 
-define void @main() #0 {
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 {
 main_body:
-  %0 = call float @llvm.R600.load.input(i32 4)
-  %1 = call float @llvm.R600.load.input(i32 5)
-  %2 = call float @llvm.R600.load.input(i32 6)
-  %3 = call float @llvm.R600.load.input(i32 8)
-  %4 = call float @llvm.R600.load.input(i32 9)
-  %5 = call float @llvm.R600.load.input(i32 10)
-  %6 = call float @llvm.R600.load.input(i32 12)
-  %7 = call float @llvm.R600.load.input(i32 13)
-  %8 = call float @llvm.R600.load.input(i32 14)
+  %0 = extractelement <4 x float> %reg1, i32 0
+  %1 = extractelement <4 x float> %reg1, i32 1
+  %2 = extractelement <4 x float> %reg1, i32 2
+  %3 = extractelement <4 x float> %reg2, i32 0
+  %4 = extractelement <4 x float> %reg2, i32 1
+  %5 = extractelement <4 x float> %reg2, i32 2
+  %6 = extractelement <4 x float> %reg3, i32 0
+  %7 = extractelement <4 x float> %reg3, i32 1
+  %8 = extractelement <4 x float> %reg3, i32 2
   %9 = load <4 x float> addrspace(8)* null
   %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %11 = call float @llvm.AMDGPU.dp4(<4 x float> %9, <4 x float> %9)
@@ -36,9 +36,6 @@ main_body:
 }
 
 ; Function Attrs: readnone
-declare float @llvm.R600.load.input(i32) #1
-
-; Function Attrs: readnone
 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
 
 
@@ -46,5 +43,3 @@ declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
 
 attributes #0 = { "ShaderType"="1" }
 attributes #1 = { readnone }
-attributes #2 = { readonly }
-attributes #3 = { nounwind readonly }
diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll
index 6e0b744..5a930b2 100644
--- a/test/CodeGen/R600/pv.ll
+++ b/test/CodeGen/R600/pv.ll
@@ -1,38 +1,38 @@
 ; RUN: llc < %s -march=r600 | FileCheck %s
 
 ;CHECK: DOT4 * T{{[0-9]\.W}} (MASKED)
-;CHECK: CNDGE T{{[0-9].[XYZW]}}, PV.X
+;CHECK: MAX T{{[0-9].[XYZW]}}, 0.0, PV.X
 
-define void @main() #0 {
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7) #0 {
 main_body:
-  %0 = call float @llvm.R600.load.input(i32 4)
-  %1 = call float @llvm.R600.load.input(i32 5)
-  %2 = call float @llvm.R600.load.input(i32 6)
-  %3 = call float @llvm.R600.load.input(i32 7)
-  %4 = call float @llvm.R600.load.input(i32 8)
-  %5 = call float @llvm.R600.load.input(i32 9)
-  %6 = call float @llvm.R600.load.input(i32 10)
-  %7 = call float @llvm.R600.load.input(i32 11)
-  %8 = call float @llvm.R600.load.input(i32 12)
-  %9 = call float @llvm.R600.load.input(i32 13)
-  %10 = call float @llvm.R600.load.input(i32 14)
-  %11 = call float @llvm.R600.load.input(i32 15)
-  %12 = call float @llvm.R600.load.input(i32 16)
-  %13 = call float @llvm.R600.load.input(i32 17)
-  %14 = call float @llvm.R600.load.input(i32 18)
-  %15 = call float @llvm.R600.load.input(i32 19)
-  %16 = call float @llvm.R600.load.input(i32 20)
-  %17 = call float @llvm.R600.load.input(i32 21)
-  %18 = call float @llvm.R600.load.input(i32 22)
-  %19 = call float @llvm.R600.load.input(i32 23)
-  %20 = call float @llvm.R600.load.input(i32 24)
-  %21 = call float @llvm.R600.load.input(i32 25)
-  %22 = call float @llvm.R600.load.input(i32 26)
-  %23 = call float @llvm.R600.load.input(i32 27)
-  %24 = call float @llvm.R600.load.input(i32 28)
-  %25 = call float @llvm.R600.load.input(i32 29)
-  %26 = call float @llvm.R600.load.input(i32 30)
-  %27 = call float @llvm.R600.load.input(i32 31)
+  %0 = extractelement <4 x float> %reg1, i32 0
+  %1 = extractelement <4 x float> %reg1, i32 1
+  %2 = extractelement <4 x float> %reg1, i32 2
+  %3 = extractelement <4 x float> %reg1, i32 3
+  %4 = extractelement <4 x float> %reg2, i32 0
+  %5 = extractelement <4 x float> %reg2, i32 1
+  %6 = extractelement <4 x float> %reg2, i32 2
+  %7 = extractelement <4 x float> %reg2, i32 3
+  %8 = extractelement <4 x float> %reg3, i32 0
+  %9 = extractelement <4 x float> %reg3, i32 1
+  %10 = extractelement <4 x float> %reg3, i32 2
+  %11 = extractelement <4 x float> %reg3, i32 3
+  %12 = extractelement <4 x float> %reg4, i32 0
+  %13 = extractelement <4 x float> %reg4, i32 1
+  %14 = extractelement <4 x float> %reg4, i32 2
+  %15 = extractelement <4 x float> %reg4, i32 3
+  %16 = extractelement <4 x float> %reg5, i32 0
+  %17 = extractelement <4 x float> %reg5, i32 1
+  %18 = extractelement <4 x float> %reg5, i32 2
+  %19 = extractelement <4 x float> %reg5, i32 3
+  %20 = extractelement <4 x float> %reg6, i32 0
+  %21 = extractelement <4 x float> %reg6, i32 1
+  %22 = extractelement <4 x float> %reg6, i32 2
+  %23 = extractelement <4 x float> %reg6, i32 3
+  %24 = extractelement <4 x float> %reg7, i32 0
+  %25 = extractelement <4 x float> %reg7, i32 1
+  %26 = extractelement <4 x float> %reg7, i32 2
+  %27 = extractelement <4 x float> %reg7, i32 3
   %28 = load <4 x float> addrspace(8)* null
   %29 = extractelement <4 x float> %28, i32 0
   %30 = fmul float %0, %29
@@ -219,9 +219,6 @@ main_body:
 }
 
 ; Function Attrs: readnone
-declare float @llvm.R600.load.input(i32) #1
-
-; Function Attrs: readnone
 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
 
 ; Function Attrs: readonly
diff --git a/test/CodeGen/R600/r600-encoding.ll b/test/CodeGen/R600/r600-encoding.ll
index 6ef3c31..b760c88 100644
--- a/test/CodeGen/R600/r600-encoding.ll
+++ b/test/CodeGen/R600/r600-encoding.ll
@@ -10,15 +10,16 @@
 ; R600-CHECK: @test
 ; R600-CHECK: MUL_IEEE {{[ *TXYZWPVxyzw.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x02,0x[0-9a-f]+,0x[0-9a-f]+}}]
 
-define void @test() {
+define void @test(<4 x float> inreg %reg0) #0 {
 entry:
-  %0 = call float @llvm.R600.load.input(i32 0)
-  %1 = call float @llvm.R600.load.input(i32 1)
-  %2 = fmul float %0, %1
-  call void @llvm.AMDGPU.store.output(float %2, i32 0)
+  %r0 = extractelement <4 x float> %reg0, i32 0
+  %r1 = extractelement <4 x float> %reg0, i32 1
+  %r2 = fmul float %r0, %r1
+  %vec = insertelement <4 x float> undef, float %r2, i32 0
+  call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
   ret void
 }
 
-declare float @llvm.R600.load.input(i32) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
 
-declare void @llvm.AMDGPU.store.output(float, i32)
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/r600-export-fix.ll b/test/CodeGen/R600/r600-export-fix.ll
new file mode 100644
index 0000000..73bc063
--- /dev/null
+++ b/test/CodeGen/R600/r600-export-fix.ll
@@ -0,0 +1,142 @@
+; RUN: llc < %s -march=r600 -mcpu=cedar | FileCheck %s
+
+;CHECK:	EXPORT T{{[0-9]}}.XYZW
+;CHECK:	EXPORT T{{[0-9]}}.0000
+;CHECK: EXPORT T{{[0-9]}}.0000
+;CHECK: EXPORT T{{[0-9]}}.0XZW
+;CHECK: EXPORT T{{[0-9]}}.XYZW
+;CHECK: EXPORT T{{[0-9]}}.YX00
+;CHECK: EXPORT T{{[0-9]}}.0000
+;CHECK: EXPORT T{{[0-9]}}.0000
+
+
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
+main_body:
+  %0 = extractelement <4 x float> %reg1, i32 0
+  %1 = extractelement <4 x float> %reg1, i32 1
+  %2 = extractelement <4 x float> %reg1, i32 2
+  %3 = extractelement <4 x float> %reg1, i32 3
+  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %5 = extractelement <4 x float> %4, i32 0
+  %6 = fmul float %5, %0
+  %7 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %8 = extractelement <4 x float> %7, i32 1
+  %9 = fmul float %8, %0
+  %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %11 = extractelement <4 x float> %10, i32 2
+  %12 = fmul float %11, %0
+  %13 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %14 = extractelement <4 x float> %13, i32 3
+  %15 = fmul float %14, %0
+  %16 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %17 = extractelement <4 x float> %16, i32 0
+  %18 = fmul float %17, %1
+  %19 = fadd float %18, %6
+  %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %21 = extractelement <4 x float> %20, i32 1
+  %22 = fmul float %21, %1
+  %23 = fadd float %22, %9
+  %24 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %25 = extractelement <4 x float> %24, i32 2
+  %26 = fmul float %25, %1
+  %27 = fadd float %26, %12
+  %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %29 = extractelement <4 x float> %28, i32 3
+  %30 = fmul float %29, %1
+  %31 = fadd float %30, %15
+  %32 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %33 = extractelement <4 x float> %32, i32 0
+  %34 = fmul float %33, %2
+  %35 = fadd float %34, %19
+  %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %37 = extractelement <4 x float> %36, i32 1
+  %38 = fmul float %37, %2
+  %39 = fadd float %38, %23
+  %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %41 = extractelement <4 x float> %40, i32 2
+  %42 = fmul float %41, %2
+  %43 = fadd float %42, %27
+  %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %45 = extractelement <4 x float> %44, i32 3
+  %46 = fmul float %45, %2
+  %47 = fadd float %46, %31
+  %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %49 = extractelement <4 x float> %48, i32 0
+  %50 = fmul float %49, %3
+  %51 = fadd float %50, %35
+  %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %53 = extractelement <4 x float> %52, i32 1
+  %54 = fmul float %53, %3
+  %55 = fadd float %54, %39
+  %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %57 = extractelement <4 x float> %56, i32 2
+  %58 = fmul float %57, %3
+  %59 = fadd float %58, %43
+  %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %61 = extractelement <4 x float> %60, i32 3
+  %62 = fmul float %61, %3
+  %63 = fadd float %62, %47
+  %64 = load <4 x float> addrspace(8)* null
+  %65 = extractelement <4 x float> %64, i32 0
+  %66 = load <4 x float> addrspace(8)* null
+  %67 = extractelement <4 x float> %66, i32 1
+  %68 = load <4 x float> addrspace(8)* null
+  %69 = extractelement <4 x float> %68, i32 2
+  %70 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %71 = extractelement <4 x float> %70, i32 0
+  %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %73 = extractelement <4 x float> %72, i32 1
+  %74 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %75 = extractelement <4 x float> %74, i32 2
+  %76 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %77 = extractelement <4 x float> %76, i32 0
+  %78 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %79 = extractelement <4 x float> %78, i32 1
+  %80 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %81 = extractelement <4 x float> %80, i32 2
+  %82 = insertelement <4 x float> undef, float %51, i32 0
+  %83 = insertelement <4 x float> %82, float %55, i32 1
+  %84 = insertelement <4 x float> %83, float %59, i32 2
+  %85 = insertelement <4 x float> %84, float %63, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %85, i32 60, i32 1)
+  %86 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
+  %87 = insertelement <4 x float> %86, float 0.000000e+00, i32 1
+  %88 = insertelement <4 x float> %87, float 0.000000e+00, i32 2
+  %89 = insertelement <4 x float> %88, float 0.000000e+00, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %89, i32 0, i32 2)
+  %90 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
+  %91 = insertelement <4 x float> %90, float 0.000000e+00, i32 1
+  %92 = insertelement <4 x float> %91, float 0.000000e+00, i32 2
+  %93 = insertelement <4 x float> %92, float 0.000000e+00, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %93, i32 1, i32 2)
+  %94 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
+  %95 = insertelement <4 x float> %94, float %65, i32 1
+  %96 = insertelement <4 x float> %95, float %67, i32 2
+  %97 = insertelement <4 x float> %96, float %69, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %97, i32 2, i32 2)
+  %98 = insertelement <4 x float> undef, float %77, i32 0
+  %99 = insertelement <4 x float> %98, float %79, i32 1
+  %100 = insertelement <4 x float> %99, float %81, i32 2
+  %101 = insertelement <4 x float> %100, float %71, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %101, i32 3, i32 2)
+  %102 = insertelement <4 x float> undef, float %73, i32 0
+  %103 = insertelement <4 x float> %102, float %75, i32 1
+  %104 = insertelement <4 x float> %103, float 0.000000e+00, i32 2
+  %105 = insertelement <4 x float> %104, float 0.000000e+00, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %105, i32 4, i32 2)
+  %106 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
+  %107 = insertelement <4 x float> %106, float 0.000000e+00, i32 1
+  %108 = insertelement <4 x float> %107, float 0.000000e+00, i32 2
+  %109 = insertelement <4 x float> %108, float 0.000000e+00, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %109, i32 5, i32 2)
+  %110 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
+  %111 = insertelement <4 x float> %110, float 0.000000e+00, i32 1
+  %112 = insertelement <4 x float> %111, float 0.000000e+00, i32 2
+  %113 = insertelement <4 x float> %112, float 0.000000e+00, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %113, i32 6, i32 2)
+  ret void
+}
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="1" }
diff --git a/test/CodeGen/R600/r600cfg.ll b/test/CodeGen/R600/r600cfg.ll
index 895ad5e..6dee3ef 100644
--- a/test/CodeGen/R600/r600cfg.ll
+++ b/test/CodeGen/R600/r600cfg.ll
@@ -1,12 +1,12 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood
 ;REQUIRES: asserts
 
-define void @main() #0 {
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
 main_body:
-  %0 = call float @llvm.R600.load.input(i32 4)
-  %1 = call float @llvm.R600.load.input(i32 5)
-  %2 = call float @llvm.R600.load.input(i32 6)
-  %3 = call float @llvm.R600.load.input(i32 7)
+  %0 = extractelement <4 x float> %reg1, i32 0
+  %1 = extractelement <4 x float> %reg1, i32 1
+  %2 = extractelement <4 x float> %reg1, i32 2
+  %3 = extractelement <4 x float> %reg1, i32 3
   %4 = bitcast float %0 to i32
   %5 = icmp eq i32 %4, 0
   %6 = sext i1 %5 to i32
@@ -113,12 +113,8 @@ ENDIF48:                                          ; preds = %LOOP47
   br label %LOOP47
 }
 
-; Function Attrs: readnone
-declare float @llvm.R600.load.input(i32) #1
-
 declare void @llvm.R600.store.stream.output(<4 x float>, i32, i32, i32)
 
 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
 
 attributes #0 = { "ShaderType"="1" }
-attributes #1 = { readnone }
diff --git a/test/CodeGen/R600/reciprocal.ll b/test/CodeGen/R600/reciprocal.ll
index 2783929..b4ac47a 100644
--- a/test/CodeGen/R600/reciprocal.ll
+++ b/test/CodeGen/R600/reciprocal.ll
@@ -2,15 +2,14 @@
 
 ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @test() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
+define void @test(<4 x float> inreg %reg0) #0  {
+   %r0 = extractelement <4 x float> %reg0, i32 0
    %r1 = fdiv float 1.0, %r0
-   call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+   %vec = insertelement <4 x float> undef, float %r1, i32 0
+   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
    ret void
 }
 
-declare float @llvm.R600.load.input(i32) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
 
-declare void @llvm.AMDGPU.store.output(float, i32)
-
-declare float @llvm.AMDGPU.rcp(float ) readnone
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/rotr.ll b/test/CodeGen/R600/rotr.ll
index 5c4c4e9..edf7aee 100644
--- a/test/CodeGen/R600/rotr.ll
+++ b/test/CodeGen/R600/rotr.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood -o - | FileCheck --check-prefix=R600-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=SI -o - | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
-; R600-CHECK: @rotr
+; R600-CHECK-LABEL: @rotr:
 ; R600-CHECK: BIT_ALIGN_INT
 
-; SI-CHECK: @rotr
+; SI-CHECK-LABEL: @rotr:
 ; SI-CHECK: V_ALIGNBIT_B32
 define void @rotr(i32 addrspace(1)* %in, i32 %x, i32 %y) {
 entry:
@@ -16,14 +16,16 @@ entry:
   ret void
 }
 
-; R600-CHECK: @rotl
+; R600-CHECK-LABEL: @rotl:
 ; R600-CHECK: SUB_INT {{\** T[0-9]+\.[XYZW]}}, literal.x
 ; R600-CHECK-NEXT: 32
-; R600-CHECK: BIT_ALIGN_INT {{\** T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].Z, PV.{{[XYZW]}}
+; R600-CHECK: BIT_ALIGN_INT {{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].Z, PV.{{[XYZW]}}
 
-; SI-CHECK: @rotl
-; SI-CHECK: V_SUB_I32_e64 [[DST:VGPR[0-9]+]], 32, {{[SV]GPR[0-9]+}}
-; SI-CHECK: V_ALIGNBIT_B32 {{VGPR[0-9]+, [SV]GPR[0-9]+, VGPR[0-9]+}}, [[DST]]
+
+; SI-CHECK-LABEL: @rotl:
+; SI-CHECK: S_SUB_I32 [[SDST:s[0-9]+]], 32, {{[s][0-9]+}}
+; SI-CHECK: V_MOV_B32_e32 [[VDST:v[0-9]+]], [[SDST]]
+; SI-CHECK: V_ALIGNBIT_B32 {{v[0-9]+, [s][0-9]+, v[0-9]+}}, [[VDST]]
 define void @rotl(i32 addrspace(1)* %in, i32 %x, i32 %y) {
 entry:
   %0 = shl i32 %x, %y
diff --git a/test/CodeGen/R600/rv7x0_count3.ll b/test/CodeGen/R600/rv7x0_count3.ll
index 474d6ba..c3fd923 100644
--- a/test/CodeGen/R600/rv7x0_count3.ll
+++ b/test/CodeGen/R600/rv7x0_count3.ll
@@ -1,12 +1,12 @@
 ; RUN: llc < %s -march=r600 -show-mc-encoding  -mcpu=rv710 | FileCheck %s
 
-; CHECK: TEX 9 @4 ;  encoding: [0x04,0x00,0x00,0x00,0x00,0x04,0x88,0x80]
+; CHECK: TEX 9 @6 ;  encoding: [0x06,0x00,0x00,0x00,0x00,0x04,0x88,0x80]
 
-define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-   %1 = call float @llvm.R600.load.input(i32 4)
-   %2 = call float @llvm.R600.load.input(i32 5)
-   %3 = call float @llvm.R600.load.input(i32 6)
-   %4 = call float @llvm.R600.load.input(i32 7)
+define void @test(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
+   %1 = extractelement <4 x float> %reg1, i32 0
+   %2 = extractelement <4 x float> %reg1, i32 1
+   %3 = extractelement <4 x float> %reg1, i32 2
+   %4 = extractelement <4 x float> %reg1, i32 3
    %5 = insertelement <4 x float> undef, float %1, i32 0
    %6 = insertelement <4 x float> %5, float %2, i32 1
    %7 = insertelement <4 x float> %6, float %3, i32 2
@@ -36,9 +36,6 @@ define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in)
 
 declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
 
-; Function Attrs: readnone
-declare float @llvm.R600.load.input(i32) #1
-
-
 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-attributes #1 = { readnone }
+
+attributes #0 = { "ShaderType"="1" }
diff --git a/test/CodeGen/R600/schedule-fs-loop-nested-if.ll b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll
index ba9620c..11e8f51 100644
--- a/test/CodeGen/R600/schedule-fs-loop-nested-if.ll
+++ b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll
@@ -1,12 +1,12 @@
-;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
 ;REQUIRES: asserts
 
-define void @main() {
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #1 {
 main_body:
-  %0 = call float @llvm.R600.interp.input(i32 0, i32 0)
-  %1 = call float @llvm.R600.interp.input(i32 1, i32 0)
-  %2 = call float @llvm.R600.interp.input(i32 2, i32 0)
-  %3 = call float @llvm.R600.interp.input(i32 3, i32 0)
+  %0 = extractelement <4 x float> %reg1, i32 0
+  %1 = extractelement <4 x float> %reg1, i32 1
+  %2 = extractelement <4 x float> %reg1, i32 2
+  %3 = extractelement <4 x float> %reg1, i32 3
   %4 = fcmp ult float %1, 0.000000e+00
   %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00
   %6 = fsub float -0.000000e+00, %5
@@ -74,10 +74,9 @@ ELSE17:                                           ; preds = %ELSE
   br label %ENDIF
 }
 
-declare float @llvm.R600.interp.input(i32, i32) #0
-
 declare float @llvm.AMDIL.clamp.(float, float, float) #0
 
 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
 
 attributes #0 = { readnone }
+attributes #1 = { "ShaderType"="1" }
diff --git a/test/CodeGen/R600/schedule-fs-loop-nested.ll b/test/CodeGen/R600/schedule-fs-loop-nested.ll
index 5e875c4..b917ec6 100644
--- a/test/CodeGen/R600/schedule-fs-loop-nested.ll
+++ b/test/CodeGen/R600/schedule-fs-loop-nested.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
 ;REQUIRES: asserts
 
 define void @main() {
diff --git a/test/CodeGen/R600/schedule-fs-loop.ll b/test/CodeGen/R600/schedule-fs-loop.ll
index d142cac..d6c194b 100644
--- a/test/CodeGen/R600/schedule-fs-loop.ll
+++ b/test/CodeGen/R600/schedule-fs-loop.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
 ;REQUIRES: asserts
 
 define void @main() {
diff --git a/test/CodeGen/R600/schedule-if-2.ll b/test/CodeGen/R600/schedule-if-2.ll
index 6afd677..38aad18 100644
--- a/test/CodeGen/R600/schedule-if-2.ll
+++ b/test/CodeGen/R600/schedule-if-2.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
 ;REQUIRES: asserts
 
 define void @main() {
diff --git a/test/CodeGen/R600/schedule-if.ll b/test/CodeGen/R600/schedule-if.ll
index 347d92f..f960c93 100644
--- a/test/CodeGen/R600/schedule-if.ll
+++ b/test/CodeGen/R600/schedule-if.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
 ;REQUIRES: asserts
 
 define void @main() {
diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
index 44b7c2f..33b20d3 100644
--- a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
+++ b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
@@ -1,12 +1,12 @@
 ;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
 ;REQUIRES: asserts
 
-define void @main() {
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
 main_body:
-  %0 = call float @llvm.R600.load.input(i32 4)
-  %1 = call float @llvm.R600.load.input(i32 5)
-  %2 = call float @llvm.R600.load.input(i32 6)
-  %3 = call float @llvm.R600.load.input(i32 7)
+  %0 = extractelement <4 x float> %reg1, i32 0
+  %1 = extractelement <4 x float> %reg1, i32 1
+  %2 = extractelement <4 x float> %reg1, i32 2
+  %3 = extractelement <4 x float> %reg1, i32 3
   %4 = fcmp ult float %0, 0.000000e+00
   %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00
   %6 = fsub float -0.000000e+00, %5
@@ -127,8 +127,6 @@ ENDIF19:                                          ; preds = %ENDIF16
   br label %LOOP
 }
 
-declare float @llvm.R600.load.input(i32) #0
-
 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
 
-attributes #0 = { readnone }
+attributes #0 = { "ShaderType"="1" }
diff --git a/test/CodeGen/R600/select.ll b/test/CodeGen/R600/select.ll
new file mode 100644
index 0000000..f940142
--- /dev/null
+++ b/test/CodeGen/R600/select.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; Normally icmp + select is optimized to select_cc, when this happens the
+; DAGLegalizer never sees the select and doesn't have a chance to leaglize it.
+;
+; In order to avoid the select_cc optimization, this test case calculates the
+; condition for the select in a separate basic block.
+
+; CHECK-LABEL: @select
+; CHECK-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.X
+; CHECK-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.X
+; CHECK-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
+; CHECK-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
+; CHECK-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XYZW
+; CHECK-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XYZW
+define void @select (i32 addrspace(1)* %i32out, float addrspace(1)* %f32out,
+                     <2 x i32> addrspace(1)* %v2i32out, <2 x float> addrspace(1)* %v2f32out,
+                     <4 x i32> addrspace(1)* %v4i32out, <4 x float> addrspace(1)* %v4f32out,
+                     i32 %cond) {
+entry:
+  br label %for
+body:
+  %inc = add i32 %i, 1
+  %br_cmp.i = icmp eq i1 %br_cmp, 0
+  br label %for
+for:
+  %i = phi i32 [ %inc, %body], [ 0, %entry ]
+  %br_cmp = phi i1 [ %br_cmp.i, %body ], [ 0, %entry ]
+  %0 = icmp eq i32 %cond, %i
+  %1 = select i1 %br_cmp, i32 2, i32 3
+  %2 = select i1 %br_cmp, float 2.0 , float 5.0
+  %3 = select i1 %br_cmp, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 4, i32 5>
+  %4 = select i1 %br_cmp, <2 x float> <float 2.0, float 3.0>, <2 x float> <float 4.0, float 5.0>
+  %5 = select i1 %br_cmp, <4 x i32> <i32 2 , i32 3, i32 4, i32 5>, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
+  %6 = select i1 %br_cmp, <4 x float> <float 2.0, float 3.0, float 4.0, float 5.0>, <4 x float> <float 6.0, float 7.0, float 8.0, float 9.0>
+  br i1 %0, label %body, label %done
+
+done:
+  store i32 %1, i32 addrspace(1)* %i32out
+  store float %2, float addrspace(1)* %f32out
+  store <2 x i32> %3, <2 x i32> addrspace(1)* %v2i32out
+  store <2 x float> %4, <2 x float> addrspace(1)* %v2f32out
+  store <4 x i32> %5, <4 x i32> addrspace(1)* %v4i32out
+  store <4 x float> %6, <4 x float> addrspace(1)* %v4f32out
+  ret void
+}
diff --git a/test/CodeGen/R600/selectcc-cnd.ll b/test/CodeGen/R600/selectcc-cnd.ll
index d7287b4..0bfca69 100644
--- a/test/CodeGen/R600/selectcc-cnd.ll
+++ b/test/CodeGen/R600/selectcc-cnd.ll
@@ -1,8 +1,8 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
 ;CHECK-NOT: SETE
-;CHECK: CNDE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1.0, literal.x,
-;CHECK-NEXT: {{[-0-9]+\(2.0}}
+;CHECK: CNDE {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1.0, literal.x,
+;CHECK: 1073741824
 define void @test(float addrspace(1)* %out, float addrspace(1)* %in) {
   %1 = load float addrspace(1)* %in
   %2 = fcmp oeq float %1, 0.0
diff --git a/test/CodeGen/R600/selectcc-cnde-int.ll b/test/CodeGen/R600/selectcc-cnde-int.ll
index 768dc7d..d568888 100644
--- a/test/CodeGen/R600/selectcc-cnde-int.ll
+++ b/test/CodeGen/R600/selectcc-cnde-int.ll
@@ -1,7 +1,7 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
 ;CHECK-NOT: SETE_INT
-;CHECK: CNDE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, literal.x,
+;CHECK: CNDE_INT {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, literal.x,
 ;CHECK-NEXT: 2
 define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %1 = load i32 addrspace(1)* %in
diff --git a/test/CodeGen/R600/selectcc-opt.ll b/test/CodeGen/R600/selectcc-opt.ll
index 7e2d559..834c030 100644
--- a/test/CodeGen/R600/selectcc-opt.ll
+++ b/test/CodeGen/R600/selectcc-opt.ll
@@ -6,7 +6,7 @@
 
 define void @test_a(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ult float %in, 0.000000e+00
+  %0 = fcmp olt float %in, 0.000000e+00
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fsub float -0.000000e+00, %1
   %3 = fptosi float %2 to i32
@@ -34,7 +34,7 @@ ENDIF:
 ; CHECK-NEXT: ALU clause starting
 define void @test_b(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ult float %in, 0.0
+  %0 = fcmp olt float %in, 0.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fsub float -0.000000e+00, %1
   %3 = fptosi float %2 to i32
diff --git a/test/CodeGen/R600/set-dx10.ll b/test/CodeGen/R600/set-dx10.ll
index 291a7bd..5c7d499 100644
--- a/test/CodeGen/R600/set-dx10.ll
+++ b/test/CodeGen/R600/set-dx10.ll
@@ -5,7 +5,8 @@
 ; SET*DX10 instructions.
 
 ; CHECK: @fcmp_une_select_fptosi
-; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
@@ -18,7 +19,8 @@ entry:
 }
 
 ; CHECK: @fcmp_une_select_i32
-; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
@@ -28,12 +30,13 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_ueq_select_fptosi
-; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK: @fcmp_oeq_select_fptosi
+; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ueq float %in, 5.0
+  %0 = fcmp oeq float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fsub float -0.000000e+00, %1
   %3 = fptosi float %2 to i32
@@ -41,23 +44,25 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_ueq_select_i32
-; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK: @fcmp_oeq_select_i32
+; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ueq float %in, 5.0
+  %0 = fcmp oeq float %in, 5.0
   %1 = select i1 %0, i32 -1, i32 0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
 
-; CHECK: @fcmp_ugt_select_fptosi
-; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK: @fcmp_ogt_select_fptosi
+; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ugt float %in, 5.0
+  %0 = fcmp ogt float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fsub float -0.000000e+00, %1
   %3 = fptosi float %2 to i32
@@ -65,23 +70,25 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_ugt_select_i32
-; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK: @fcmp_ogt_select_i32
+; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ugt float %in, 5.0
+  %0 = fcmp ogt float %in, 5.0
   %1 = select i1 %0, i32 -1, i32 0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
 
-; CHECK: @fcmp_uge_select_fptosi
-; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK: @fcmp_oge_select_fptosi
+; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp uge float %in, 5.0
+  %0 = fcmp oge float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fsub float -0.000000e+00, %1
   %3 = fptosi float %2 to i32
@@ -89,23 +96,25 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_uge_select_i32
-; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK: @fcmp_oge_select_i32
+; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp uge float %in, 5.0
+  %0 = fcmp oge float %in, 5.0
   %1 = select i1 %0, i32 -1, i32 0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
 
-; CHECK: @fcmp_ule_select_fptosi
-; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
+; CHECK: @fcmp_ole_select_fptosi
+; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ule float %in, 5.0
+  %0 = fcmp ole float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fsub float -0.000000e+00, %1
   %3 = fptosi float %2 to i32
@@ -113,23 +122,25 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_ule_select_i32
-; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
+; CHECK: @fcmp_ole_select_i32
+; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ule float %in, 5.0
+  %0 = fcmp ole float %in, 5.0
   %1 = select i1 %0, i32 -1, i32 0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
 
-; CHECK: @fcmp_ult_select_fptosi
-; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
+; CHECK: @fcmp_olt_select_fptosi
+; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ult float %in, 5.0
+  %0 = fcmp olt float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fsub float -0.000000e+00, %1
   %3 = fptosi float %2 to i32
@@ -137,12 +148,13 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_ult_select_i32
-; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
+; CHECK: @fcmp_olt_select_i32
+; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ult float %in, 5.0
+  %0 = fcmp olt float %in, 5.0
   %1 = select i1 %0, i32 -1, i32 0
   store i32 %1, i32 addrspace(1)* %out
   ret void
diff --git a/test/CodeGen/R600/setcc.ll b/test/CodeGen/R600/setcc.ll
index 992de70..8d34c4a 100644
--- a/test/CodeGen/R600/setcc.ll
+++ b/test/CodeGen/R600/setcc.ll
@@ -1,8 +1,9 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s
 
-; CHECK: @setcc_v2i32
-; EG-CHECK-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z
-; EG-CHECK-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[2].W, KC0[3].Y
+; FUNC-LABEL: @setcc_v2i32
+; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z
+; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[2].W, KC0[3].Y
 
 define void @setcc_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) {
   %result = icmp eq <2 x i32> %a, %b
@@ -11,11 +12,11 @@ define void @setcc_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %
   ret void
 }
 
-; CHECK: @setcc_v4i32
-; EG-CHECK-DAG: SETE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; EG-CHECK-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; EG-CHECK-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; EG-CHECK-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; FUNC-LABEL: @setcc_v4i32
+; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
@@ -26,3 +27,307 @@ define void @setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %
   store <4 x i32> %sext, <4 x i32> addrspace(1)* %out
   ret void
 }
+
+;;;==========================================================================;;;
+;; Float comparisons
+;;;==========================================================================;;;
+
+; FUNC-LABEL: @f32_oeq
+; R600: SETE_DX10
+; SI: V_CMP_EQ_F32
+define void @f32_oeq(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp oeq float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_ogt
+; R600: SETGT_DX10
+; SI: V_CMP_GT_F32
+define void @f32_ogt(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp ogt float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_oge
+; R600: SETGE_DX10
+; SI: V_CMP_GE_F32
+define void @f32_oge(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp oge float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_olt
+; R600: SETGT_DX10
+; SI: V_CMP_LT_F32
+define void @f32_olt(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp olt float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_ole
+; R600: SETGE_DX10
+; SI: V_CMP_LE_F32
+define void @f32_ole(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp ole float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_one
+; R600-DAG: SETE_DX10
+; R600-DAG: SETE_DX10
+; R600-DAG: AND_INT
+; R600-DAG: SETNE_DX10
+; R600-DAG: AND_INT
+; R600-DAG: SETNE_INT
+; SI: V_CMP_O_F32
+; SI: V_CMP_NEQ_F32
+; SI: S_AND_B64
+define void @f32_one(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp one float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_ord
+; R600-DAG: SETE_DX10
+; R600-DAG: SETE_DX10
+; R600-DAG: AND_INT
+; R600-DAG: SETNE_INT
+; SI: V_CMP_O_F32
+define void @f32_ord(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp ord float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_ueq
+; R600-DAG: SETNE_DX10
+; R600-DAG: SETNE_DX10
+; R600-DAG: OR_INT
+; R600-DAG: SETE_DX10
+; R600-DAG: OR_INT
+; R600-DAG: SETNE_INT
+; SI: V_CMP_U_F32
+; SI: V_CMP_EQ_F32
+; SI: S_OR_B64
+define void @f32_ueq(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp ueq float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_ugt
+; R600: SETGE
+; R600: SETE_DX10
+; SI: V_CMP_U_F32
+; SI: V_CMP_GT_F32
+; SI: S_OR_B64
+define void @f32_ugt(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp ugt float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_uge
+; R600: SETGT
+; R600: SETE_DX10
+; SI: V_CMP_U_F32
+; SI: V_CMP_GE_F32
+; SI: S_OR_B64
+define void @f32_uge(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp uge float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_ult
+; R600: SETGE
+; R600: SETE_DX10
+; SI: V_CMP_U_F32
+; SI: V_CMP_LT_F32
+; SI: S_OR_B64
+define void @f32_ult(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp ult float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_ule
+; R600: SETGT
+; R600: SETE_DX10
+; SI: V_CMP_U_F32
+; SI: V_CMP_LE_F32
+; SI: S_OR_B64
+define void @f32_ule(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp ule float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_une
+; R600: SETNE_DX10
+; SI: V_CMP_NEQ_F32
+define void @f32_une(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp une float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_uno
+; R600: SETNE_DX10
+; R600: SETNE_DX10
+; R600: OR_INT
+; R600: SETNE_INT
+; SI: V_CMP_U_F32
+define void @f32_uno(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp uno float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+;;;==========================================================================;;;
+;; 32-bit integer comparisons
+;;;==========================================================================;;;
+
+; FUNC-LABEL: @i32_eq
+; R600: SETE_INT
+; SI: V_CMP_EQ_I32
+define void @i32_eq(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = icmp eq i32 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i32_ne
+; R600: SETNE_INT
+; SI: V_CMP_NE_I32
+define void @i32_ne(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = icmp ne i32 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i32_ugt
+; R600: SETGT_UINT
+; SI: V_CMP_GT_U32
+define void @i32_ugt(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = icmp ugt i32 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i32_uge
+; R600: SETGE_UINT
+; SI: V_CMP_GE_U32
+define void @i32_uge(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = icmp uge i32 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i32_ult
+; R600: SETGT_UINT
+; SI: V_CMP_LT_U32
+define void @i32_ult(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = icmp ult i32 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i32_ule
+; R600: SETGE_UINT
+; SI: V_CMP_LE_U32
+define void @i32_ule(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = icmp ule i32 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i32_sgt
+; R600: SETGT_INT
+; SI: V_CMP_GT_I32
+define void @i32_sgt(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = icmp sgt i32 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i32_sge
+; R600: SETGE_INT
+; SI: V_CMP_GE_I32
+define void @i32_sge(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = icmp sge i32 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i32_slt
+; R600: SETGT_INT
+; SI: V_CMP_LT_I32
+define void @i32_slt(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = icmp slt i32 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i32_sle
+; R600: SETGE_INT
+; SI: V_CMP_LE_I32
+define void @i32_sle(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = icmp sle i32 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/setcc64.ll b/test/CodeGen/R600/setcc64.ll
new file mode 100644
index 0000000..9202fc0
--- /dev/null
+++ b/test/CodeGen/R600/setcc64.ll
@@ -0,0 +1,263 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+
+; XXX: Merge this into setcc, once R600 supports 64-bit operations
+
+;;;==========================================================================;;;
+;; Double comparisons
+;;;==========================================================================;;;
+
+; FUNC-LABEL: @f64_oeq
+; SI: V_CMP_EQ_F64
+define void @f64_oeq(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp oeq double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_ogt
+; SI: V_CMP_GT_F64
+define void @f64_ogt(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp ogt double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_oge
+; SI: V_CMP_GE_F64
+define void @f64_oge(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp oge double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_olt
+; SI: V_CMP_LT_F64
+define void @f64_olt(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp olt double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_ole
+; SI: V_CMP_LE_F64
+define void @f64_ole(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp ole double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_one
+; SI: V_CMP_O_F64
+; SI: V_CMP_NEQ_F64
+; SI: S_AND_B64
+define void @f64_one(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp one double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_ord
+; SI: V_CMP_O_F64
+define void @f64_ord(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp ord double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_ueq
+; SI: V_CMP_U_F64
+; SI: V_CMP_EQ_F64
+; SI: S_OR_B64
+define void @f64_ueq(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp ueq double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_ugt
+; SI: V_CMP_U_F64
+; SI: V_CMP_GT_F64
+; SI: S_OR_B64
+define void @f64_ugt(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp ugt double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_uge
+; SI: V_CMP_U_F64
+; SI: V_CMP_GE_F64
+; SI: S_OR_B64
+define void @f64_uge(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp uge double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_ult
+; SI: V_CMP_U_F64
+; SI: V_CMP_LT_F64
+; SI: S_OR_B64
+define void @f64_ult(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp ult double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_ule
+; SI: V_CMP_U_F64
+; SI: V_CMP_LE_F64
+; SI: S_OR_B64
+define void @f64_ule(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp ule double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_une
+; SI: V_CMP_NEQ_F64
+define void @f64_une(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp une double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_uno
+; SI: V_CMP_U_F64
+define void @f64_uno(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp uno double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+;;;==========================================================================;;;
+;; 64-bit integer comparisons
+;;;==========================================================================;;;
+
+; FUNC-LABEL: @i64_eq
+; SI: V_CMP_EQ_I64
+define void @i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+  %0 = icmp eq i64 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i64_ne
+; SI: V_CMP_NE_I64
+define void @i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+  %0 = icmp ne i64 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i64_ugt
+; SI: V_CMP_GT_U64
+define void @i64_ugt(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+  %0 = icmp ugt i64 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i64_uge
+; SI: V_CMP_GE_U64
+define void @i64_uge(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+  %0 = icmp uge i64 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i64_ult
+; SI: V_CMP_LT_U64
+define void @i64_ult(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+  %0 = icmp ult i64 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i64_ule
+; SI: V_CMP_LE_U64
+define void @i64_ule(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+  %0 = icmp ule i64 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i64_sgt
+; SI: V_CMP_GT_I64
+define void @i64_sgt(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+  %0 = icmp sgt i64 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i64_sge
+; SI: V_CMP_GE_I64
+define void @i64_sge(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+  %0 = icmp sge i64 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i64_slt
+; SI: V_CMP_LT_I64
+define void @i64_slt(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+  %0 = icmp slt i64 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i64_sle
+; SI: V_CMP_LE_I64
+define void @i64_sle(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+  %0 = icmp sle i64 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/R600/seto.ll
index 19716f8..8633a4b 100644
--- a/test/CodeGen/R600/seto.ll
+++ b/test/CodeGen/R600/seto.ll
@@ -1,6 +1,6 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK: V_CMP_O_F32_e64 SGPR0_SGPR1, {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}}, 0, 0, 0, 0
+;CHECK: V_CMP_O_F32_e64 s[0:1], {{[sv][0-9]+, [sv][0-9]+}}, 0, 0, 0, 0
 
 define void @main(float %p) {
 main_body:
diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/R600/setuo.ll
index 929dbb1..c77a37e 100644
--- a/test/CodeGen/R600/setuo.ll
+++ b/test/CodeGen/R600/setuo.ll
@@ -1,6 +1,6 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK: V_CMP_U_F32_e64 SGPR0_SGPR1, {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}}, 0, 0, 0, 0
+;CHECK: V_CMP_U_F32_e64 s[0:1], {{[sv][0-9]+, [sv][0-9]+}}, 0, 0, 0, 0
 
 define void @main(float %p) {
 main_body:
diff --git a/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll b/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
new file mode 100644
index 0000000..d74161b
--- /dev/null
+++ b/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+
+; Copy VGPR -> SGPR used twice as an instruction operand, which is then
+; used in an REG_SEQUENCE that also needs to be handled.
+
+; SI-LABEL: @test_dup_operands:
+; SI: V_ADD_I32_e32
+define void @test_dup_operands(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) {
+  %a = load <2 x i32> addrspace(1)* %in
+  %lo = extractelement <2 x i32> %a, i32 0
+  %hi = extractelement <2 x i32> %a, i32 1
+  %add = add i32 %lo, %lo
+  %vec0 = insertelement <2 x i32> undef, i32 %add, i32 0
+  %vec1 = insertelement <2 x i32> %vec0, i32 %hi, i32 1
+  store <2 x i32> %vec1, <2 x i32> addrspace(1)* %out, align 8
+  ret void
+}
+
diff --git a/test/CodeGen/R600/sgpr-copy.ll b/test/CodeGen/R600/sgpr-copy.ll
index b0d4549..5472c1b 100644
--- a/test/CodeGen/R600/sgpr-copy.ll
+++ b/test/CodeGen/R600/sgpr-copy.ll
@@ -1,15 +1,15 @@
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=SI  | FileCheck %s
 
 ; This test checks that no VGPR to SGPR copies are created by the register
 ; allocator.
-; CHECK: @main
-; CHECK: S_BUFFER_LOAD_DWORD [[DST:SGPR[0-9]]], {{[SGPR_[0-9]+}}, 0
-; CHECK: V_MOV_B32_e32 VGPR{{[0-9]}}, [[DST]]
+; CHECK-LABEL: @phi1
+; CHECK: S_BUFFER_LOAD_DWORD [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0
+; CHECK: V_MOV_B32_e32 v{{[0-9]}}, [[DST]]
 
-define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+define void @phi1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
-  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
+  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
   %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0)
   %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
   %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
@@ -28,13 +28,133 @@ ENDIF:                                            ; preds = %main_body, %ELSE
   ret void
 }
 
+; Make sure this program doesn't crash
+; CHECK-LABEL: @phi2
+define void @phi2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+main_body:
+  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
+  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
+  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
+  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
+  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
+  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 40)
+  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
+  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
+  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
+  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
+  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
+  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
+  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 76)
+  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
+  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
+  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
+  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 92)
+  %37 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
+  %38 = load <32 x i8> addrspace(2)* %37, !tbaa !1
+  %39 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
+  %40 = load <16 x i8> addrspace(2)* %39, !tbaa !1
+  %41 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
+  %42 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
+  %43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
+  %44 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
+  %45 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
+  %46 = bitcast float %41 to i32
+  %47 = bitcast float %42 to i32
+  %48 = insertelement <2 x i32> undef, i32 %46, i32 0
+  %49 = insertelement <2 x i32> %48, i32 %47, i32 1
+  %50 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %49, <32 x i8> %38, <16 x i8> %40, i32 2)
+  %51 = extractelement <4 x float> %50, i32 2
+  %52 = call float @fabs(float %51)
+  %53 = fmul float %43, %43
+  %54 = fmul float %44, %44
+  %55 = fadd float %54, %53
+  %56 = fmul float %45, %45
+  %57 = fadd float %55, %56
+  %58 = call float @llvm.AMDGPU.rsq(float %57)
+  %59 = fmul float %43, %58
+  %60 = fmul float %44, %58
+  %61 = fmul float %45, %58
+  %62 = fmul float %59, %23
+  %63 = fmul float %60, %24
+  %64 = fadd float %63, %62
+  %65 = fmul float %61, %25
+  %66 = fadd float %64, %65
+  %67 = fsub float -0.000000e+00, %26
+  %68 = fmul float %66, %52
+  %69 = fadd float %68, %67
+  %70 = fmul float %27, %69
+  %71 = fmul float %28, %69
+  %72 = call float @fabs(float %70)
+  %73 = fcmp olt float 0x3EE4F8B580000000, %72
+  %74 = sext i1 %73 to i32
+  %75 = bitcast i32 %74 to float
+  %76 = bitcast float %75 to i32
+  %77 = icmp ne i32 %76, 0
+  br i1 %77, label %IF, label %ENDIF
+
+IF:                                               ; preds = %main_body
+  %78 = fsub float -0.000000e+00, %70
+  %79 = call float @llvm.AMDIL.exp.(float %78)
+  %80 = fsub float -0.000000e+00, %79
+  %81 = fadd float 1.000000e+00, %80
+  %82 = fdiv float 1.000000e+00, %70
+  %83 = fmul float %81, %82
+  %84 = fmul float %32, %83
+  br label %ENDIF
+
+ENDIF:                                            ; preds = %main_body, %IF
+  %temp4.0 = phi float [ %84, %IF ], [ %32, %main_body ]
+  %85 = call float @fabs(float %71)
+  %86 = fcmp olt float 0x3EE4F8B580000000, %85
+  %87 = sext i1 %86 to i32
+  %88 = bitcast i32 %87 to float
+  %89 = bitcast float %88 to i32
+  %90 = icmp ne i32 %89, 0
+  br i1 %90, label %IF25, label %ENDIF24
+
+IF25:                                             ; preds = %ENDIF
+  %91 = fsub float -0.000000e+00, %71
+  %92 = call float @llvm.AMDIL.exp.(float %91)
+  %93 = fsub float -0.000000e+00, %92
+  %94 = fadd float 1.000000e+00, %93
+  %95 = fdiv float 1.000000e+00, %71
+  %96 = fmul float %94, %95
+  %97 = fmul float %36, %96
+  br label %ENDIF24
+
+ENDIF24:                                          ; preds = %ENDIF, %IF25
+  %temp8.0 = phi float [ %97, %IF25 ], [ %36, %ENDIF ]
+  %98 = fmul float %29, %temp4.0
+  %99 = fmul float %30, %temp4.0
+  %100 = fmul float %31, %temp4.0
+  %101 = fmul float %33, %temp8.0
+  %102 = fadd float %101, %98
+  %103 = fmul float %34, %temp8.0
+  %104 = fadd float %103, %99
+  %105 = fmul float %35, %temp8.0
+  %106 = fadd float %105, %100
+  %107 = call float @llvm.pow.f32(float %52, float %22)
+  %108 = fsub float -0.000000e+00, %102
+  %109 = fmul float %108, %107
+  %110 = fsub float -0.000000e+00, %104
+  %111 = fmul float %110, %107
+  %112 = fsub float -0.000000e+00, %106
+  %113 = fmul float %112, %107
+  %114 = call i32 @llvm.SI.packf16(float %109, float %111)
+  %115 = bitcast i32 %114 to float
+  %116 = call i32 @llvm.SI.packf16(float %113, float 1.000000e+00)
+  %117 = bitcast i32 %116 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %115, float %117, float %115, float %117)
+  ret void
+}
+
 ; We just want ot make sure the program doesn't crash
-; CHECK: @loop
+; CHECK-LABEL: @loop
 
 define void @loop(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
-  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
+  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
   %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0)
   %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 4)
   %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 8)
@@ -79,6 +199,129 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float
 attributes #0 = { "ShaderType"="0" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { readonly }
+attributes #3 = { readnone }
+attributes #4 = { nounwind readonly }
+
+!0 = metadata !{metadata !"const", null}
+!1 = metadata !{metadata !0, metadata !0, i64 0, i32 1}
+
+; Function Attrs: nounwind readnone
+declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.rsq(float) #3
+
+; Function Attrs: readnone
+declare float @llvm.AMDIL.exp.(float) #3
+
+; Function Attrs: nounwind readonly
+declare float @llvm.pow.f32(float, float) #4
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.SI.packf16(float, float) #1
+
+; This checks for a bug in the FixSGPRCopies pass where VReg96
+; registers were being identified as an SGPR regclass which was causing
+; an assertion failure.
+
+; CHECK-LABEL: @sample_v3
+; CHECK: IMAGE_SAMPLE
+; CHECK: IMAGE_SAMPLE
+; CHECK: EXP
+; CHECK: S_ENDPGM
+define void @sample_v3([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 
-!0 = metadata !{metadata !"const", null, i32 1}
+entry:
+  %21 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
+  %22 = load <16 x i8> addrspace(2)* %21, !tbaa !2
+  %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 16)
+  %24 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
+  %25 = load <32 x i8> addrspace(2)* %24, !tbaa !2
+  %26 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
+  %27 = load <16 x i8> addrspace(2)* %26, !tbaa !2
+  %28 = fcmp oeq float %23, 0.0
+  br i1 %28, label %if, label %else
+
+if:
+  %val.if = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> <i32 0, i32 0>, <32 x i8> %25, <16 x i8> %27, i32 2)
+  %val.if.0 = extractelement <4 x float> %val.if, i32 0
+  %val.if.1 = extractelement <4 x float> %val.if, i32 1
+  %val.if.2 = extractelement <4 x float> %val.if, i32 2
+  br label %endif
+
+else:
+  %val.else = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> <i32 1, i32 0>, <32 x i8> %25, <16 x i8> %27, i32 2)
+  %val.else.0 = extractelement <4 x float> %val.else, i32 0
+  %val.else.1 = extractelement <4 x float> %val.else, i32 1
+  %val.else.2 = extractelement <4 x float> %val.else, i32 2
+  br label %endif
+
+endif:
+  %val.0 = phi float [%val.if.0, %if], [%val.else.0, %else]
+  %val.1 = phi float [%val.if.1, %if], [%val.else.1, %else]
+  %val.2 = phi float [%val.if.2, %if], [%val.else.2, %else]
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %val.0, float %val.1, float %val.2, float 0.0)
+  ret void
+}
+
+!2 = metadata !{metadata !"const", null, i32 1}
+
+; CHECK-LABEL: @copy1
+; CHECK: BUFFER_LOAD_DWORD
+; CHECK: V_ADD
+; CHECK: S_ENDPGM
+define void @copy1(float addrspace(1)* %out, float addrspace(1)* %in0) {
+entry:
+  %0 = load float addrspace(1)* %in0
+  %1 = fcmp oeq float %0, 0.0
+  br i1 %1, label %if0, label %endif
+
+if0:
+  %2 = bitcast float %0 to i32
+  %3 = fcmp olt float %0, 0.0
+  br i1 %3, label %if1, label %endif
+
+if1:
+  %4 = add i32 %2, 1
+  br label %endif
+
+endif:
+  %5 = phi i32 [ 0, %entry ], [ %2, %if0 ], [ %4, %if1 ]
+  %6 = bitcast i32 %5 to float
+  store float %6, float addrspace(1)* %out
+  ret void
+}
+
+; This test is just checking that we don't crash / assertion fail.
+; CHECK-LABEL: @copy2
+; CHECK: S_ENDPGM
+
+define void @copy2([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+entry:
+  br label %LOOP68
+
+LOOP68:
+  %temp4.7 = phi float [ 0.000000e+00, %entry ], [ %v, %ENDIF69 ]
+  %t = phi i32 [ 20, %entry ], [ %x, %ENDIF69 ]
+  %g = icmp eq i32 0, %t
+  %l = bitcast float %temp4.7 to i32
+  br i1 %g, label %IF70, label %ENDIF69
+
+IF70:
+  %q = icmp ne i32 %l, 13
+  %temp.8 = select i1 %q, float 1.000000e+00, float 0.000000e+00
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %temp.8, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
+  ret void
+
+ENDIF69:
+  %u = add i32 %l, %t
+  %v = bitcast i32 %u to float
+  %x = add i32 %t, -1
+  br label %LOOP68
+}
+
+attributes #0 = { "ShaderType"="0" }
 
diff --git a/test/CodeGen/R600/shared-op-cycle.ll b/test/CodeGen/R600/shared-op-cycle.ll
new file mode 100644
index 0000000..0484fc9
--- /dev/null
+++ b/test/CodeGen/R600/shared-op-cycle.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @main
+; CHECK: MULADD_IEEE *
+; CHECK-NOT: MULADD_IEEE *
+
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
+   %w0 = extractelement <4 x float> %reg0, i32 3
+   %w1 = extractelement <4 x float> %reg1, i32 3
+   %w2 = extractelement <4 x float> %reg2, i32 3
+   %sq0 = fmul float %w0, %w0
+   %r0 = fadd float %sq0, 2.0
+   %sq1 = fmul float %w1, %w1
+   %r1 = fadd float %sq1, 2.0
+   %sq2 = fmul float %w2, %w2
+   %r2 = fadd float %sq2, 2.0
+   %v0 = insertelement <4 x float> undef, float %r0, i32 0
+   %v1 = insertelement <4 x float> %v0, float %r1, i32 1
+   %v2 = insertelement <4 x float> %v1, float %r2, i32 2
+   %res = call float @llvm.AMDGPU.dp4(<4 x float> %v2, <4 x float> %v2)
+   %vecres = insertelement <4 x float> undef, float %res, i32 0
+   call void @llvm.R600.store.swizzle(<4 x float> %vecres, i32 0, i32 2)
+   ret void
+}
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="1" }
+attributes #1 = { readnone }
+\ No newline at end of file
diff --git a/test/CodeGen/R600/shl.ll b/test/CodeGen/R600/shl.ll
index d99e325..4a6aab4 100644
--- a/test/CodeGen/R600/shl.ll
+++ b/test/CodeGen/R600/shl.ll
@@ -1,13 +1,13 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
 ;EG-CHECK: @shl_v2i32
 ;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 ;SI-CHECK: @shl_v2i32
-;SI-CHECK: V_LSHL_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_LSHL_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
+;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -25,10 +25,10 @@ define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in
 ;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 ;SI-CHECK: @shl_v4i32
-;SI-CHECK: V_LSHL_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_LSHL_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_LSHL_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_LSHL_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
+;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
diff --git a/test/CodeGen/R600/short-args.ll b/test/CodeGen/R600/short-args.ll
deleted file mode 100644
index 20d0ae4..0000000
--- a/test/CodeGen/R600/short-args.ll
+++ /dev/null
@@ -1,69 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
-; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
-
-; EG-CHECK: @i8_arg
-; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK: BUFFER_LOAD_UBYTE
-
-define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
-entry:
-  %0 = zext i8 %in to i32
-  store i32 %0, i32 addrspace(1)* %out, align 4
-  ret void
-}
-
-; EG-CHECK: @i8_zext_arg
-; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
-
-define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
-entry:
-  %0 = zext i8 %in to i32
-  store i32 %0, i32 addrspace(1)* %out, align 4
-  ret void
-}
-
-; EG-CHECK: @i8_sext_arg
-; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
-
-define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
-entry:
-  %0 = sext i8 %in to i32
-  store i32 %0, i32 addrspace(1)* %out, align 4
-  ret void
-}
-
-; EG-CHECK: @i16_arg
-; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK: BUFFER_LOAD_USHORT
-
-define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
-entry:
-  %0 = zext i16 %in to i32
-  store i32 %0, i32 addrspace(1)* %out, align 4
-  ret void
-}
-
-; EG-CHECK: @i16_zext_arg
-; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
-
-define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
-entry:
-  %0 = zext i16 %in to i32
-  store i32 %0, i32 addrspace(1)* %out, align 4
-  ret void
-}
-
-; EG-CHECK: @i16_sext_arg
-; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]}}, SGPR0_SGPR1, 11
-
-define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
-entry:
-  %0 = sext i16 %in to i32
-  store i32 %0, i32 addrspace(1)* %out, align 4
-  ret void
-}
diff --git a/test/CodeGen/R600/si-annotate-cf-assertion.ll b/test/CodeGen/R600/si-annotate-cf-assertion.ll
new file mode 100644
index 0000000..9886fe9
--- /dev/null
+++ b/test/CodeGen/R600/si-annotate-cf-assertion.ll
@@ -0,0 +1,23 @@
+; XFAIL: *
+; RUN: llc -march=r600 -mcpu=SI -asm-verbose=false < %s | FileCheck %s
+
+
+define void @test(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind {
+; CHECK-LABEL: @test:
+
+entry:
+  switch i32 %x, label %sw.default [
+    i32 0, label %sw.bb
+    i32 60, label %sw.bb
+  ]
+
+sw.bb:
+  unreachable
+
+sw.default:
+  unreachable
+
+sw.epilog:
+  ret void
+}
+
diff --git a/test/CodeGen/R600/si-lod-bias.ll b/test/CodeGen/R600/si-lod-bias.ll
new file mode 100644
index 0000000..8d7a79c
--- /dev/null
+++ b/test/CodeGen/R600/si-lod-bias.ll
@@ -0,0 +1,51 @@
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+; This shader has the potential to generated illegal VGPR to SGPR copies if
+; the wrong register class is used for the REG_SEQUENCE instructions.
+
+; CHECK: @main
+; CHECK: IMAGE_SAMPLE_B v{{\[[0-9]:[0-9]\]}}, 15, 0, 0, 0, 0, 0, 0, 0, v{{\[[0-9]:[0-9]\]}}
+
+define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+main_body:
+  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
+  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
+  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
+  %23 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
+  %24 = load <32 x i8> addrspace(2)* %23, !tbaa !1
+  %25 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
+  %26 = load <16 x i8> addrspace(2)* %25, !tbaa !1
+  %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
+  %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
+  %29 = bitcast float %22 to i32
+  %30 = bitcast float %27 to i32
+  %31 = bitcast float %28 to i32
+  %32 = insertelement <4 x i32> undef, i32 %29, i32 0
+  %33 = insertelement <4 x i32> %32, i32 %30, i32 1
+  %34 = insertelement <4 x i32> %33, i32 %31, i32 2
+  %35 = insertelement <4 x i32> %34, i32 undef, i32 3
+  %36 = call <4 x float> @llvm.SI.sampleb.v4i32(<4 x i32> %35, <32 x i8> %24, <16 x i8> %26, i32 2)
+  %37 = extractelement <4 x float> %36, i32 0
+  %38 = extractelement <4 x float> %36, i32 1
+  %39 = extractelement <4 x float> %36, i32 2
+  %40 = extractelement <4 x float> %36, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %37, float %38, float %39, float %40)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare float @llvm.SI.load.const(<16 x i8>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.SI.sampleb.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { nounwind readnone }
+
+!0 = metadata !{metadata !"const", null}
+!1 = metadata !{metadata !0, metadata !0, i64 0, i32 1}
diff --git a/test/CodeGen/R600/si-sgpr-spill.ll b/test/CodeGen/R600/si-sgpr-spill.ll
new file mode 100644
index 0000000..05c5e31
--- /dev/null
+++ b/test/CodeGen/R600/si-sgpr-spill.ll
@@ -0,0 +1,692 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s
+
+; XXX: Enable when spilling is supported
+; XFAIL: *
+
+; These tests check that the compiler won't crash when it needs to spill
+; SGPRs.
+
+; CHECK-LABEL: @main
+; Writing to M0 from an SMRD instruction will hang the GPU.
+; CHECK-NOT: S_BUFFER_LOAD_DWORD m0
+; CHECK: S_ENDPGM
+@ddxy_lds = external addrspace(3) global [64 x i32]
+
+define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+main_body:
+  %21 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
+  %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0
+  %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 96)
+  %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 100)
+  %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 104)
+  %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 112)
+  %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 116)
+  %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 120)
+  %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 128)
+  %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 132)
+  %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 140)
+  %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 144)
+  %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 160)
+  %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 176)
+  %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 180)
+  %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 184)
+  %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 192)
+  %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 196)
+  %39 = call float @llvm.SI.load.const(<16 x i8> %22, i32 200)
+  %40 = call float @llvm.SI.load.const(<16 x i8> %22, i32 208)
+  %41 = call float @llvm.SI.load.const(<16 x i8> %22, i32 212)
+  %42 = call float @llvm.SI.load.const(<16 x i8> %22, i32 216)
+  %43 = call float @llvm.SI.load.const(<16 x i8> %22, i32 224)
+  %44 = call float @llvm.SI.load.const(<16 x i8> %22, i32 240)
+  %45 = call float @llvm.SI.load.const(<16 x i8> %22, i32 244)
+  %46 = call float @llvm.SI.load.const(<16 x i8> %22, i32 248)
+  %47 = call float @llvm.SI.load.const(<16 x i8> %22, i32 256)
+  %48 = call float @llvm.SI.load.const(<16 x i8> %22, i32 272)
+  %49 = call float @llvm.SI.load.const(<16 x i8> %22, i32 276)
+  %50 = call float @llvm.SI.load.const(<16 x i8> %22, i32 280)
+  %51 = call float @llvm.SI.load.const(<16 x i8> %22, i32 288)
+  %52 = call float @llvm.SI.load.const(<16 x i8> %22, i32 292)
+  %53 = call float @llvm.SI.load.const(<16 x i8> %22, i32 296)
+  %54 = call float @llvm.SI.load.const(<16 x i8> %22, i32 304)
+  %55 = call float @llvm.SI.load.const(<16 x i8> %22, i32 308)
+  %56 = call float @llvm.SI.load.const(<16 x i8> %22, i32 312)
+  %57 = call float @llvm.SI.load.const(<16 x i8> %22, i32 368)
+  %58 = call float @llvm.SI.load.const(<16 x i8> %22, i32 372)
+  %59 = call float @llvm.SI.load.const(<16 x i8> %22, i32 376)
+  %60 = call float @llvm.SI.load.const(<16 x i8> %22, i32 384)
+  %61 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
+  %62 = load <32 x i8> addrspace(2)* %61, !tbaa !0
+  %63 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
+  %64 = load <16 x i8> addrspace(2)* %63, !tbaa !0
+  %65 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1
+  %66 = load <32 x i8> addrspace(2)* %65, !tbaa !0
+  %67 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1
+  %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
+  %69 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 2
+  %70 = load <32 x i8> addrspace(2)* %69, !tbaa !0
+  %71 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 2
+  %72 = load <16 x i8> addrspace(2)* %71, !tbaa !0
+  %73 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 3
+  %74 = load <32 x i8> addrspace(2)* %73, !tbaa !0
+  %75 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 3
+  %76 = load <16 x i8> addrspace(2)* %75, !tbaa !0
+  %77 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 4
+  %78 = load <32 x i8> addrspace(2)* %77, !tbaa !0
+  %79 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 4
+  %80 = load <16 x i8> addrspace(2)* %79, !tbaa !0
+  %81 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 5
+  %82 = load <32 x i8> addrspace(2)* %81, !tbaa !0
+  %83 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 5
+  %84 = load <16 x i8> addrspace(2)* %83, !tbaa !0
+  %85 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 6
+  %86 = load <32 x i8> addrspace(2)* %85, !tbaa !0
+  %87 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 6
+  %88 = load <16 x i8> addrspace(2)* %87, !tbaa !0
+  %89 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 7
+  %90 = load <32 x i8> addrspace(2)* %89, !tbaa !0
+  %91 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 7
+  %92 = load <16 x i8> addrspace(2)* %91, !tbaa !0
+  %93 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %4, <2 x i32> %6)
+  %94 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %4, <2 x i32> %6)
+  %95 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %4, <2 x i32> %6)
+  %96 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %4, <2 x i32> %6)
+  %97 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %4, <2 x i32> %6)
+  %98 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %4, <2 x i32> %6)
+  %99 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %4, <2 x i32> %6)
+  %100 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %4, <2 x i32> %6)
+  %101 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %4, <2 x i32> %6)
+  %102 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %4, <2 x i32> %6)
+  %103 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %4, <2 x i32> %6)
+  %104 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %4, <2 x i32> %6)
+  %105 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %4, <2 x i32> %6)
+  %106 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %4, <2 x i32> %6)
+  %107 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %4, <2 x i32> %6)
+  %108 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %4, <2 x i32> %6)
+  %109 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %4, <2 x i32> %6)
+  %110 = call i32 @llvm.SI.tid()
+  %111 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %110
+  %112 = bitcast float %93 to i32
+  store i32 %112, i32 addrspace(3)* %111
+  %113 = bitcast float %94 to i32
+  store i32 %113, i32 addrspace(3)* %111
+  %114 = call i32 @llvm.SI.tid()
+  %115 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %114
+  %116 = and i32 %114, -4
+  %117 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %116
+  %118 = add i32 %116, 1
+  %119 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %118
+  %120 = bitcast float %93 to i32
+  store i32 %120, i32 addrspace(3)* %115
+  %121 = load i32 addrspace(3)* %117
+  %122 = bitcast i32 %121 to float
+  %123 = load i32 addrspace(3)* %119
+  %124 = bitcast i32 %123 to float
+  %125 = fsub float %124, %122
+  %126 = bitcast float %94 to i32
+  store i32 %126, i32 addrspace(3)* %115
+  %127 = load i32 addrspace(3)* %117
+  %128 = bitcast i32 %127 to float
+  %129 = load i32 addrspace(3)* %119
+  %130 = bitcast i32 %129 to float
+  %131 = fsub float %130, %128
+  %132 = insertelement <4 x float> undef, float %125, i32 0
+  %133 = insertelement <4 x float> %132, float %131, i32 1
+  %134 = insertelement <4 x float> %133, float %131, i32 2
+  %135 = insertelement <4 x float> %134, float %131, i32 3
+  %136 = extractelement <4 x float> %135, i32 0
+  %137 = extractelement <4 x float> %135, i32 1
+  %138 = fmul float %60, %93
+  %139 = fmul float %60, %94
+  %140 = fmul float %60, %94
+  %141 = fmul float %60, %94
+  %142 = call i32 @llvm.SI.tid()
+  %143 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %142
+  %144 = bitcast float %138 to i32
+  store i32 %144, i32 addrspace(3)* %143
+  %145 = bitcast float %139 to i32
+  store i32 %145, i32 addrspace(3)* %143
+  %146 = bitcast float %140 to i32
+  store i32 %146, i32 addrspace(3)* %143
+  %147 = bitcast float %141 to i32
+  store i32 %147, i32 addrspace(3)* %143
+  %148 = call i32 @llvm.SI.tid()
+  %149 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %148
+  %150 = and i32 %148, -4
+  %151 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %150
+  %152 = add i32 %150, 2
+  %153 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %152
+  %154 = bitcast float %138 to i32
+  store i32 %154, i32 addrspace(3)* %149
+  %155 = load i32 addrspace(3)* %151
+  %156 = bitcast i32 %155 to float
+  %157 = load i32 addrspace(3)* %153
+  %158 = bitcast i32 %157 to float
+  %159 = fsub float %158, %156
+  %160 = bitcast float %139 to i32
+  store i32 %160, i32 addrspace(3)* %149
+  %161 = load i32 addrspace(3)* %151
+  %162 = bitcast i32 %161 to float
+  %163 = load i32 addrspace(3)* %153
+  %164 = bitcast i32 %163 to float
+  %165 = fsub float %164, %162
+  %166 = bitcast float %140 to i32
+  store i32 %166, i32 addrspace(3)* %149
+  %167 = load i32 addrspace(3)* %151
+  %168 = bitcast i32 %167 to float
+  %169 = load i32 addrspace(3)* %153
+  %170 = bitcast i32 %169 to float
+  %171 = fsub float %170, %168
+  %172 = bitcast float %141 to i32
+  store i32 %172, i32 addrspace(3)* %149
+  %173 = load i32 addrspace(3)* %151
+  %174 = bitcast i32 %173 to float
+  %175 = load i32 addrspace(3)* %153
+  %176 = bitcast i32 %175 to float
+  %177 = fsub float %176, %174
+  %178 = insertelement <4 x float> undef, float %159, i32 0
+  %179 = insertelement <4 x float> %178, float %165, i32 1
+  %180 = insertelement <4 x float> %179, float %171, i32 2
+  %181 = insertelement <4 x float> %180, float %177, i32 3
+  %182 = extractelement <4 x float> %181, i32 0
+  %183 = extractelement <4 x float> %181, i32 1
+  %184 = fdiv float 1.000000e+00, %97
+  %185 = fmul float %33, %184
+  %186 = fcmp uge float 1.000000e+00, %185
+  %187 = select i1 %186, float %185, float 1.000000e+00
+  %188 = fmul float %187, %30
+  %189 = call float @ceil(float %188)
+  %190 = fcmp uge float 3.000000e+00, %189
+  %191 = select i1 %190, float 3.000000e+00, float %189
+  %192 = fdiv float 1.000000e+00, %191
+  %193 = fdiv float 1.000000e+00, %30
+  %194 = fmul float %191, %193
+  %195 = fmul float %31, %194
+  %196 = fmul float %95, %95
+  %197 = fmul float %96, %96
+  %198 = fadd float %197, %196
+  %199 = fmul float %97, %97
+  %200 = fadd float %198, %199
+  %201 = call float @llvm.AMDGPU.rsq(float %200)
+  %202 = fmul float %95, %201
+  %203 = fmul float %96, %201
+  %204 = fmul float %202, %29
+  %205 = fmul float %203, %29
+  %206 = fmul float %204, -1.000000e+00
+  %207 = fmul float %205, 1.000000e+00
+  %208 = fmul float %206, %32
+  %209 = fmul float %207, %32
+  %210 = fsub float -0.000000e+00, %208
+  %211 = fadd float %93, %210
+  %212 = fsub float -0.000000e+00, %209
+  %213 = fadd float %94, %212
+  %214 = fmul float %206, %192
+  %215 = fmul float %207, %192
+  %216 = fmul float -1.000000e+00, %192
+  %217 = bitcast float %136 to i32
+  %218 = bitcast float %182 to i32
+  %219 = bitcast float %137 to i32
+  %220 = bitcast float %183 to i32
+  %221 = insertelement <8 x i32> undef, i32 %217, i32 0
+  %222 = insertelement <8 x i32> %221, i32 %218, i32 1
+  %223 = insertelement <8 x i32> %222, i32 %219, i32 2
+  %224 = insertelement <8 x i32> %223, i32 %220, i32 3
+  br label %LOOP
+
+LOOP:                                             ; preds = %ENDIF, %main_body
+  %temp24.0 = phi float [ 1.000000e+00, %main_body ], [ %258, %ENDIF ]
+  %temp28.0 = phi float [ %211, %main_body ], [ %253, %ENDIF ]
+  %temp29.0 = phi float [ %213, %main_body ], [ %255, %ENDIF ]
+  %temp30.0 = phi float [ 1.000000e+00, %main_body ], [ %257, %ENDIF ]
+  %225 = fcmp oge float %temp24.0, %191
+  %226 = sext i1 %225 to i32
+  %227 = bitcast i32 %226 to float
+  %228 = bitcast float %227 to i32
+  %229 = icmp ne i32 %228, 0
+  br i1 %229, label %IF, label %ENDIF
+
+IF:                                               ; preds = %LOOP
+  %230 = bitcast float %136 to i32
+  %231 = bitcast float %182 to i32
+  %232 = bitcast float %137 to i32
+  %233 = bitcast float %183 to i32
+  %234 = insertelement <8 x i32> undef, i32 %230, i32 0
+  %235 = insertelement <8 x i32> %234, i32 %231, i32 1
+  %236 = insertelement <8 x i32> %235, i32 %232, i32 2
+  %237 = insertelement <8 x i32> %236, i32 %233, i32 3
+  br label %LOOP65
+
+ENDIF:                                            ; preds = %LOOP
+  %238 = bitcast float %temp28.0 to i32
+  %239 = bitcast float %temp29.0 to i32
+  %240 = insertelement <8 x i32> %224, i32 %238, i32 4
+  %241 = insertelement <8 x i32> %240, i32 %239, i32 5
+  %242 = insertelement <8 x i32> %241, i32 undef, i32 6
+  %243 = insertelement <8 x i32> %242, i32 undef, i32 7
+  %244 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %243, <32 x i8> %62, <16 x i8> %64, i32 2)
+  %245 = extractelement <4 x float> %244, i32 3
+  %246 = fcmp oge float %temp30.0, %245
+  %247 = sext i1 %246 to i32
+  %248 = bitcast i32 %247 to float
+  %249 = bitcast float %248 to i32
+  %250 = and i32 %249, 1065353216
+  %251 = bitcast i32 %250 to float
+  %252 = fmul float %214, %251
+  %253 = fadd float %252, %temp28.0
+  %254 = fmul float %215, %251
+  %255 = fadd float %254, %temp29.0
+  %256 = fmul float %216, %251
+  %257 = fadd float %256, %temp30.0
+  %258 = fadd float %temp24.0, 1.000000e+00
+  br label %LOOP
+
+LOOP65:                                           ; preds = %ENDIF66, %IF
+  %temp24.1 = phi float [ 0.000000e+00, %IF ], [ %610, %ENDIF66 ]
+  %temp28.1 = phi float [ %temp28.0, %IF ], [ %605, %ENDIF66 ]
+  %temp29.1 = phi float [ %temp29.0, %IF ], [ %607, %ENDIF66 ]
+  %temp30.1 = phi float [ %temp30.0, %IF ], [ %609, %ENDIF66 ]
+  %temp32.0 = phi float [ 1.000000e+00, %IF ], [ %611, %ENDIF66 ]
+  %259 = fcmp oge float %temp24.1, %195
+  %260 = sext i1 %259 to i32
+  %261 = bitcast i32 %260 to float
+  %262 = bitcast float %261 to i32
+  %263 = icmp ne i32 %262, 0
+  br i1 %263, label %IF67, label %ENDIF66
+
+IF67:                                             ; preds = %LOOP65
+  %264 = bitcast float %136 to i32
+  %265 = bitcast float %182 to i32
+  %266 = bitcast float %137 to i32
+  %267 = bitcast float %183 to i32
+  %268 = bitcast float %temp28.1 to i32
+  %269 = bitcast float %temp29.1 to i32
+  %270 = insertelement <8 x i32> undef, i32 %264, i32 0
+  %271 = insertelement <8 x i32> %270, i32 %265, i32 1
+  %272 = insertelement <8 x i32> %271, i32 %266, i32 2
+  %273 = insertelement <8 x i32> %272, i32 %267, i32 3
+  %274 = insertelement <8 x i32> %273, i32 %268, i32 4
+  %275 = insertelement <8 x i32> %274, i32 %269, i32 5
+  %276 = insertelement <8 x i32> %275, i32 undef, i32 6
+  %277 = insertelement <8 x i32> %276, i32 undef, i32 7
+  %278 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %277, <32 x i8> %66, <16 x i8> %68, i32 2)
+  %279 = extractelement <4 x float> %278, i32 0
+  %280 = extractelement <4 x float> %278, i32 1
+  %281 = extractelement <4 x float> %278, i32 2
+  %282 = extractelement <4 x float> %278, i32 3
+  %283 = fmul float %282, %47
+  %284 = bitcast float %136 to i32
+  %285 = bitcast float %182 to i32
+  %286 = bitcast float %137 to i32
+  %287 = bitcast float %183 to i32
+  %288 = bitcast float %temp28.1 to i32
+  %289 = bitcast float %temp29.1 to i32
+  %290 = insertelement <8 x i32> undef, i32 %284, i32 0
+  %291 = insertelement <8 x i32> %290, i32 %285, i32 1
+  %292 = insertelement <8 x i32> %291, i32 %286, i32 2
+  %293 = insertelement <8 x i32> %292, i32 %287, i32 3
+  %294 = insertelement <8 x i32> %293, i32 %288, i32 4
+  %295 = insertelement <8 x i32> %294, i32 %289, i32 5
+  %296 = insertelement <8 x i32> %295, i32 undef, i32 6
+  %297 = insertelement <8 x i32> %296, i32 undef, i32 7
+  %298 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %297, <32 x i8> %82, <16 x i8> %84, i32 2)
+  %299 = extractelement <4 x float> %298, i32 0
+  %300 = extractelement <4 x float> %298, i32 1
+  %301 = extractelement <4 x float> %298, i32 2
+  %302 = bitcast float %136 to i32
+  %303 = bitcast float %182 to i32
+  %304 = bitcast float %137 to i32
+  %305 = bitcast float %183 to i32
+  %306 = bitcast float %temp28.1 to i32
+  %307 = bitcast float %temp29.1 to i32
+  %308 = insertelement <8 x i32> undef, i32 %302, i32 0
+  %309 = insertelement <8 x i32> %308, i32 %303, i32 1
+  %310 = insertelement <8 x i32> %309, i32 %304, i32 2
+  %311 = insertelement <8 x i32> %310, i32 %305, i32 3
+  %312 = insertelement <8 x i32> %311, i32 %306, i32 4
+  %313 = insertelement <8 x i32> %312, i32 %307, i32 5
+  %314 = insertelement <8 x i32> %313, i32 undef, i32 6
+  %315 = insertelement <8 x i32> %314, i32 undef, i32 7
+  %316 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %315, <32 x i8> %78, <16 x i8> %80, i32 2)
+  %317 = extractelement <4 x float> %316, i32 0
+  %318 = extractelement <4 x float> %316, i32 1
+  %319 = extractelement <4 x float> %316, i32 2
+  %320 = fmul float %317, %23
+  %321 = fmul float %318, %24
+  %322 = fmul float %319, %25
+  %323 = fmul float %299, %26
+  %324 = fadd float %323, %320
+  %325 = fmul float %300, %27
+  %326 = fadd float %325, %321
+  %327 = fmul float %301, %28
+  %328 = fadd float %327, %322
+  %329 = fadd float %279, %324
+  %330 = fadd float %280, %326
+  %331 = fadd float %281, %328
+  %332 = bitcast float %136 to i32
+  %333 = bitcast float %182 to i32
+  %334 = bitcast float %137 to i32
+  %335 = bitcast float %183 to i32
+  %336 = bitcast float %temp28.1 to i32
+  %337 = bitcast float %temp29.1 to i32
+  %338 = insertelement <8 x i32> undef, i32 %332, i32 0
+  %339 = insertelement <8 x i32> %338, i32 %333, i32 1
+  %340 = insertelement <8 x i32> %339, i32 %334, i32 2
+  %341 = insertelement <8 x i32> %340, i32 %335, i32 3
+  %342 = insertelement <8 x i32> %341, i32 %336, i32 4
+  %343 = insertelement <8 x i32> %342, i32 %337, i32 5
+  %344 = insertelement <8 x i32> %343, i32 undef, i32 6
+  %345 = insertelement <8 x i32> %344, i32 undef, i32 7
+  %346 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %345, <32 x i8> %62, <16 x i8> %64, i32 2)
+  %347 = extractelement <4 x float> %346, i32 0
+  %348 = extractelement <4 x float> %346, i32 1
+  %349 = extractelement <4 x float> %346, i32 2
+  %350 = fadd float %347, -5.000000e-01
+  %351 = fadd float %348, -5.000000e-01
+  %352 = fadd float %349, -5.000000e-01
+  %353 = fmul float %350, %350
+  %354 = fmul float %351, %351
+  %355 = fadd float %354, %353
+  %356 = fmul float %352, %352
+  %357 = fadd float %355, %356
+  %358 = call float @llvm.AMDGPU.rsq(float %357)
+  %359 = fmul float %350, %358
+  %360 = fmul float %351, %358
+  %361 = fmul float %352, %358
+  %362 = bitcast float %136 to i32
+  %363 = bitcast float %182 to i32
+  %364 = bitcast float %137 to i32
+  %365 = bitcast float %183 to i32
+  %366 = bitcast float %temp28.1 to i32
+  %367 = bitcast float %temp29.1 to i32
+  %368 = insertelement <8 x i32> undef, i32 %362, i32 0
+  %369 = insertelement <8 x i32> %368, i32 %363, i32 1
+  %370 = insertelement <8 x i32> %369, i32 %364, i32 2
+  %371 = insertelement <8 x i32> %370, i32 %365, i32 3
+  %372 = insertelement <8 x i32> %371, i32 %366, i32 4
+  %373 = insertelement <8 x i32> %372, i32 %367, i32 5
+  %374 = insertelement <8 x i32> %373, i32 undef, i32 6
+  %375 = insertelement <8 x i32> %374, i32 undef, i32 7
+  %376 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %375, <32 x i8> %70, <16 x i8> %72, i32 2)
+  %377 = extractelement <4 x float> %376, i32 0
+  %378 = extractelement <4 x float> %376, i32 1
+  %379 = extractelement <4 x float> %376, i32 2
+  %380 = extractelement <4 x float> %376, i32 3
+  %381 = fsub float -0.000000e+00, %95
+  %382 = fsub float -0.000000e+00, %96
+  %383 = fsub float -0.000000e+00, %97
+  %384 = fmul float %359, %381
+  %385 = fmul float %360, %382
+  %386 = fadd float %385, %384
+  %387 = fmul float %361, %383
+  %388 = fadd float %386, %387
+  %389 = fmul float %388, %359
+  %390 = fmul float %388, %360
+  %391 = fmul float %388, %361
+  %392 = fmul float 2.000000e+00, %389
+  %393 = fmul float 2.000000e+00, %390
+  %394 = fmul float 2.000000e+00, %391
+  %395 = fsub float -0.000000e+00, %392
+  %396 = fadd float %381, %395
+  %397 = fsub float -0.000000e+00, %393
+  %398 = fadd float %382, %397
+  %399 = fsub float -0.000000e+00, %394
+  %400 = fadd float %383, %399
+  %401 = fmul float %396, %98
+  %402 = fmul float %396, %99
+  %403 = fmul float %396, %100
+  %404 = fmul float %398, %101
+  %405 = fadd float %404, %401
+  %406 = fmul float %398, %102
+  %407 = fadd float %406, %402
+  %408 = fmul float %398, %103
+  %409 = fadd float %408, %403
+  %410 = fmul float %400, %104
+  %411 = fadd float %410, %405
+  %412 = fmul float %400, %105
+  %413 = fadd float %412, %407
+  %414 = fmul float %400, %106
+  %415 = fadd float %414, %409
+  %416 = bitcast float %136 to i32
+  %417 = bitcast float %182 to i32
+  %418 = bitcast float %137 to i32
+  %419 = bitcast float %183 to i32
+  %420 = bitcast float %temp28.1 to i32
+  %421 = bitcast float %temp29.1 to i32
+  %422 = insertelement <8 x i32> undef, i32 %416, i32 0
+  %423 = insertelement <8 x i32> %422, i32 %417, i32 1
+  %424 = insertelement <8 x i32> %423, i32 %418, i32 2
+  %425 = insertelement <8 x i32> %424, i32 %419, i32 3
+  %426 = insertelement <8 x i32> %425, i32 %420, i32 4
+  %427 = insertelement <8 x i32> %426, i32 %421, i32 5
+  %428 = insertelement <8 x i32> %427, i32 undef, i32 6
+  %429 = insertelement <8 x i32> %428, i32 undef, i32 7
+  %430 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %429, <32 x i8> %86, <16 x i8> %88, i32 2)
+  %431 = extractelement <4 x float> %430, i32 0
+  %432 = extractelement <4 x float> %430, i32 1
+  %433 = extractelement <4 x float> %430, i32 2
+  %434 = fmul float %48, %411
+  %435 = fmul float %49, %411
+  %436 = fmul float %50, %411
+  %437 = fmul float %51, %413
+  %438 = fadd float %437, %434
+  %439 = fmul float %52, %413
+  %440 = fadd float %439, %435
+  %441 = fmul float %53, %413
+  %442 = fadd float %441, %436
+  %443 = fmul float %54, %415
+  %444 = fadd float %443, %438
+  %445 = fmul float %55, %415
+  %446 = fadd float %445, %440
+  %447 = fmul float %56, %415
+  %448 = fadd float %447, %442
+  %449 = insertelement <4 x float> undef, float %444, i32 0
+  %450 = insertelement <4 x float> %449, float %446, i32 1
+  %451 = insertelement <4 x float> %450, float %448, i32 2
+  %452 = insertelement <4 x float> %451, float %195, i32 3
+  %453 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %452)
+  %454 = extractelement <4 x float> %453, i32 0
+  %455 = extractelement <4 x float> %453, i32 1
+  %456 = extractelement <4 x float> %453, i32 2
+  %457 = extractelement <4 x float> %453, i32 3
+  %458 = call float @fabs(float %456)
+  %459 = fdiv float 1.000000e+00, %458
+  %460 = fmul float %454, %459
+  %461 = fadd float %460, 1.500000e+00
+  %462 = fmul float %455, %459
+  %463 = fadd float %462, 1.500000e+00
+  %464 = bitcast float %463 to i32
+  %465 = bitcast float %461 to i32
+  %466 = bitcast float %457 to i32
+  %467 = insertelement <4 x i32> undef, i32 %464, i32 0
+  %468 = insertelement <4 x i32> %467, i32 %465, i32 1
+  %469 = insertelement <4 x i32> %468, i32 %466, i32 2
+  %470 = insertelement <4 x i32> %469, i32 undef, i32 3
+  %471 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %470, <32 x i8> %90, <16 x i8> %92, i32 4)
+  %472 = extractelement <4 x float> %471, i32 0
+  %473 = extractelement <4 x float> %471, i32 1
+  %474 = extractelement <4 x float> %471, i32 2
+  %475 = fmul float %431, %472
+  %476 = fadd float %475, %329
+  %477 = fmul float %432, %473
+  %478 = fadd float %477, %330
+  %479 = fmul float %433, %474
+  %480 = fadd float %479, %331
+  %481 = fmul float %107, %107
+  %482 = fmul float %108, %108
+  %483 = fadd float %482, %481
+  %484 = fmul float %109, %109
+  %485 = fadd float %483, %484
+  %486 = call float @llvm.AMDGPU.rsq(float %485)
+  %487 = fmul float %107, %486
+  %488 = fmul float %108, %486
+  %489 = fmul float %109, %486
+  %490 = fmul float %377, %40
+  %491 = fmul float %378, %41
+  %492 = fmul float %379, %42
+  %493 = fmul float %359, %487
+  %494 = fmul float %360, %488
+  %495 = fadd float %494, %493
+  %496 = fmul float %361, %489
+  %497 = fadd float %495, %496
+  %498 = fmul float %497, %359
+  %499 = fmul float %497, %360
+  %500 = fmul float %497, %361
+  %501 = fmul float 2.000000e+00, %498
+  %502 = fmul float 2.000000e+00, %499
+  %503 = fmul float 2.000000e+00, %500
+  %504 = fsub float -0.000000e+00, %501
+  %505 = fadd float %487, %504
+  %506 = fsub float -0.000000e+00, %502
+  %507 = fadd float %488, %506
+  %508 = fsub float -0.000000e+00, %503
+  %509 = fadd float %489, %508
+  %510 = fmul float %95, %95
+  %511 = fmul float %96, %96
+  %512 = fadd float %511, %510
+  %513 = fmul float %97, %97
+  %514 = fadd float %512, %513
+  %515 = call float @llvm.AMDGPU.rsq(float %514)
+  %516 = fmul float %95, %515
+  %517 = fmul float %96, %515
+  %518 = fmul float %97, %515
+  %519 = fmul float %505, %516
+  %520 = fmul float %507, %517
+  %521 = fadd float %520, %519
+  %522 = fmul float %509, %518
+  %523 = fadd float %521, %522
+  %524 = fsub float -0.000000e+00, %523
+  %525 = fcmp uge float %524, 0.000000e+00
+  %526 = select i1 %525, float %524, float 0.000000e+00
+  %527 = fmul float %43, %380
+  %528 = fadd float %527, 1.000000e+00
+  %529 = call float @llvm.pow.f32(float %526, float %528)
+  %530 = fmul float %476, %37
+  %531 = fmul float %478, %38
+  %532 = fmul float %480, %39
+  %533 = fmul float %359, %487
+  %534 = fmul float %360, %488
+  %535 = fadd float %534, %533
+  %536 = fmul float %361, %489
+  %537 = fadd float %535, %536
+  %538 = fcmp uge float %537, 0.000000e+00
+  %539 = select i1 %538, float %537, float 0.000000e+00
+  %540 = fmul float %530, %539
+  %541 = fmul float %531, %539
+  %542 = fmul float %532, %539
+  %543 = fmul float %490, %529
+  %544 = fadd float %543, %540
+  %545 = fmul float %491, %529
+  %546 = fadd float %545, %541
+  %547 = fmul float %492, %529
+  %548 = fadd float %547, %542
+  %549 = fmul float %476, %34
+  %550 = fmul float %478, %35
+  %551 = fmul float %480, %36
+  %552 = fmul float %544, %57
+  %553 = fadd float %552, %549
+  %554 = fmul float %546, %58
+  %555 = fadd float %554, %550
+  %556 = fmul float %548, %59
+  %557 = fadd float %556, %551
+  %558 = bitcast float %136 to i32
+  %559 = bitcast float %182 to i32
+  %560 = bitcast float %137 to i32
+  %561 = bitcast float %183 to i32
+  %562 = bitcast float %temp28.1 to i32
+  %563 = bitcast float %temp29.1 to i32
+  %564 = insertelement <8 x i32> undef, i32 %558, i32 0
+  %565 = insertelement <8 x i32> %564, i32 %559, i32 1
+  %566 = insertelement <8 x i32> %565, i32 %560, i32 2
+  %567 = insertelement <8 x i32> %566, i32 %561, i32 3
+  %568 = insertelement <8 x i32> %567, i32 %562, i32 4
+  %569 = insertelement <8 x i32> %568, i32 %563, i32 5
+  %570 = insertelement <8 x i32> %569, i32 undef, i32 6
+  %571 = insertelement <8 x i32> %570, i32 undef, i32 7
+  %572 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %571, <32 x i8> %74, <16 x i8> %76, i32 2)
+  %573 = extractelement <4 x float> %572, i32 0
+  %574 = extractelement <4 x float> %572, i32 1
+  %575 = extractelement <4 x float> %572, i32 2
+  %576 = fmul float %573, %44
+  %577 = fadd float %576, %553
+  %578 = fmul float %574, %45
+  %579 = fadd float %578, %555
+  %580 = fmul float %575, %46
+  %581 = fadd float %580, %557
+  %582 = call i32 @llvm.SI.packf16(float %577, float %579)
+  %583 = bitcast i32 %582 to float
+  %584 = call i32 @llvm.SI.packf16(float %581, float %283)
+  %585 = bitcast i32 %584 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %583, float %585, float %583, float %585)
+  ret void
+
+ENDIF66:                                          ; preds = %LOOP65
+  %586 = bitcast float %temp28.1 to i32
+  %587 = bitcast float %temp29.1 to i32
+  %588 = insertelement <8 x i32> %237, i32 %586, i32 4
+  %589 = insertelement <8 x i32> %588, i32 %587, i32 5
+  %590 = insertelement <8 x i32> %589, i32 undef, i32 6
+  %591 = insertelement <8 x i32> %590, i32 undef, i32 7
+  %592 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %591, <32 x i8> %62, <16 x i8> %64, i32 2)
+  %593 = extractelement <4 x float> %592, i32 3
+  %594 = fcmp oge float %temp30.1, %593
+  %595 = sext i1 %594 to i32
+  %596 = bitcast i32 %595 to float
+  %597 = bitcast float %596 to i32
+  %598 = and i32 %597, 1065353216
+  %599 = bitcast i32 %598 to float
+  %600 = fmul float 5.000000e-01, %temp32.0
+  %601 = fsub float -0.000000e+00, %600
+  %602 = fmul float %599, %temp32.0
+  %603 = fadd float %602, %601
+  %604 = fmul float %214, %603
+  %605 = fadd float %604, %temp28.1
+  %606 = fmul float %215, %603
+  %607 = fadd float %606, %temp29.1
+  %608 = fmul float %216, %603
+  %609 = fadd float %608, %temp30.1
+  %610 = fadd float %temp24.1, 1.000000e+00
+  %611 = fmul float %temp32.0, 5.000000e-01
+  br label %LOOP65
+}
+
+; Function Attrs: nounwind readnone
+declare float @llvm.SI.load.const(<16 x i8>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
+
+; Function Attrs: readnone
+declare i32 @llvm.SI.tid() #2
+
+; Function Attrs: readonly
+declare float @ceil(float) #3
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.rsq(float) #2
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.SI.sampled.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32) #1
+
+; Function Attrs: readnone
+declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2
+
+; Function Attrs: readnone
+declare float @fabs(float) #2
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
+
+; Function Attrs: nounwind readonly
+declare float @llvm.pow.f32(float, float) #4
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.SI.packf16(float, float) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { readnone }
+attributes #3 = { readonly }
+attributes #4 = { nounwind readonly }
+
+!0 = metadata !{metadata !"const", null, i32 1}
diff --git a/test/CodeGen/R600/si-vector-hang.ll b/test/CodeGen/R600/si-vector-hang.ll
new file mode 100644
index 0000000..093234f
--- /dev/null
+++ b/test/CodeGen/R600/si-vector-hang.ll
@@ -0,0 +1,107 @@
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+; CHECK: @test_8_min_char
+; CHECK: BUFFER_STORE_BYTE
+; CHECK: BUFFER_STORE_BYTE
+; CHECK: BUFFER_STORE_BYTE
+; CHECK: BUFFER_STORE_BYTE
+; CHECK: BUFFER_STORE_BYTE
+; CHECK: BUFFER_STORE_BYTE
+; CHECK: BUFFER_STORE_BYTE
+; CHECK: BUFFER_STORE_BYTE
+; ModuleID = 'radeon'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
+target triple = "r600--"
+
+; Function Attrs: nounwind
+define void @test_8_min_char(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture readonly %in0, i8 addrspace(1)* nocapture readonly %in1) #0 {
+entry:
+  %0 = load i8 addrspace(1)* %in0, align 1
+  %1 = insertelement <8 x i8> undef, i8 %0, i32 0
+  %arrayidx2.i.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 1
+  %2 = load i8 addrspace(1)* %arrayidx2.i.i, align 1
+  %3 = insertelement <8 x i8> %1, i8 %2, i32 1
+  %arrayidx6.i.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 2
+  %4 = load i8 addrspace(1)* %arrayidx6.i.i, align 1
+  %5 = insertelement <8 x i8> %3, i8 %4, i32 2
+  %arrayidx10.i.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 3
+  %6 = load i8 addrspace(1)* %arrayidx10.i.i, align 1
+  %7 = insertelement <8 x i8> %5, i8 %6, i32 3
+  %arrayidx.i.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 4
+  %8 = load i8 addrspace(1)* %arrayidx.i.i, align 1
+  %9 = insertelement <8 x i8> undef, i8 %8, i32 0
+  %arrayidx2.i9.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 5
+  %10 = load i8 addrspace(1)* %arrayidx2.i9.i, align 1
+  %11 = insertelement <8 x i8> %9, i8 %10, i32 1
+  %arrayidx6.i11.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 6
+  %12 = load i8 addrspace(1)* %arrayidx6.i11.i, align 1
+  %13 = insertelement <8 x i8> %11, i8 %12, i32 2
+  %arrayidx10.i13.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 7
+  %14 = load i8 addrspace(1)* %arrayidx10.i13.i, align 1
+  %15 = insertelement <8 x i8> %13, i8 %14, i32 3
+  %vecinit5.i = shufflevector <8 x i8> %7, <8 x i8> %15, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  %16 = load i8 addrspace(1)* %in1, align 1
+  %17 = insertelement <8 x i8> undef, i8 %16, i32 0
+  %arrayidx2.i.i4 = getelementptr inbounds i8 addrspace(1)* %in1, i64 1
+  %18 = load i8 addrspace(1)* %arrayidx2.i.i4, align 1
+  %19 = insertelement <8 x i8> %17, i8 %18, i32 1
+  %arrayidx6.i.i5 = getelementptr inbounds i8 addrspace(1)* %in1, i64 2
+  %20 = load i8 addrspace(1)* %arrayidx6.i.i5, align 1
+  %21 = insertelement <8 x i8> %19, i8 %20, i32 2
+  %arrayidx10.i.i6 = getelementptr inbounds i8 addrspace(1)* %in1, i64 3
+  %22 = load i8 addrspace(1)* %arrayidx10.i.i6, align 1
+  %23 = insertelement <8 x i8> %21, i8 %22, i32 3
+  %arrayidx.i.i7 = getelementptr inbounds i8 addrspace(1)* %in1, i64 4
+  %24 = load i8 addrspace(1)* %arrayidx.i.i7, align 1
+  %25 = insertelement <8 x i8> undef, i8 %24, i32 0
+  %arrayidx2.i9.i8 = getelementptr inbounds i8 addrspace(1)* %in1, i64 5
+  %26 = load i8 addrspace(1)* %arrayidx2.i9.i8, align 1
+  %27 = insertelement <8 x i8> %25, i8 %26, i32 1
+  %arrayidx6.i11.i9 = getelementptr inbounds i8 addrspace(1)* %in1, i64 6
+  %28 = load i8 addrspace(1)* %arrayidx6.i11.i9, align 1
+  %29 = insertelement <8 x i8> %27, i8 %28, i32 2
+  %arrayidx10.i13.i10 = getelementptr inbounds i8 addrspace(1)* %in1, i64 7
+  %30 = load i8 addrspace(1)* %arrayidx10.i13.i10, align 1
+  %31 = insertelement <8 x i8> %29, i8 %30, i32 3
+  %vecinit5.i11 = shufflevector <8 x i8> %23, <8 x i8> %31, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  %cmp.i = icmp slt <8 x i8> %vecinit5.i, %vecinit5.i11
+  %cond.i = select <8 x i1> %cmp.i, <8 x i8> %vecinit5.i, <8 x i8> %vecinit5.i11
+  %32 = extractelement <8 x i8> %cond.i, i32 0
+  store i8 %32, i8 addrspace(1)* %out, align 1
+  %33 = extractelement <8 x i8> %cond.i, i32 1
+  %arrayidx2.i.i.i = getelementptr inbounds i8 addrspace(1)* %out, i64 1
+  store i8 %33, i8 addrspace(1)* %arrayidx2.i.i.i, align 1
+  %34 = extractelement <8 x i8> %cond.i, i32 2
+  %arrayidx.i.i.i = getelementptr inbounds i8 addrspace(1)* %out, i64 2
+  store i8 %34, i8 addrspace(1)* %arrayidx.i.i.i, align 1
+  %35 = extractelement <8 x i8> %cond.i, i32 3
+  %arrayidx2.i6.i.i = getelementptr inbounds i8 addrspace(1)* %out, i64 3
+  store i8 %35, i8 addrspace(1)* %arrayidx2.i6.i.i, align 1
+  %arrayidx.i.i3 = getelementptr inbounds i8 addrspace(1)* %out, i64 4
+  %36 = extractelement <8 x i8> %cond.i, i32 4
+  store i8 %36, i8 addrspace(1)* %arrayidx.i.i3, align 1
+  %37 = extractelement <8 x i8> %cond.i, i32 5
+  %arrayidx2.i.i6.i = getelementptr inbounds i8 addrspace(1)* %out, i64 5
+  store i8 %37, i8 addrspace(1)* %arrayidx2.i.i6.i, align 1
+  %38 = extractelement <8 x i8> %cond.i, i32 6
+  %arrayidx.i.i7.i = getelementptr inbounds i8 addrspace(1)* %out, i64 6
+  store i8 %38, i8 addrspace(1)* %arrayidx.i.i7.i, align 1
+  %39 = extractelement <8 x i8> %cond.i, i32 7
+  %arrayidx2.i6.i8.i = getelementptr inbounds i8 addrspace(1)* %out, i64 7
+  store i8 %39, i8 addrspace(1)* %arrayidx2.i6.i8.i, align 1
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}
+
+!0 = metadata !{null}
+!1 = metadata !{null}
+!2 = metadata !{null}
+!3 = metadata !{void (i8 addrspace(1)*, i8 addrspace(1)*, i8 addrspace(1)*)* @test_8_min_char}
+!4 = metadata !{null}
+!5 = metadata !{null}
+!6 = metadata !{null}
+!7 = metadata !{null}
+!8 = metadata !{null}
diff --git a/test/CodeGen/R600/sign_extend.ll b/test/CodeGen/R600/sign_extend.ll
index e4ef534..1212cee 100644
--- a/test/CodeGen/R600/sign_extend.ll
+++ b/test/CodeGen/R600/sign_extend.ll
@@ -1,5 +1,5 @@
 
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
 
 ; CHECK: V_ASHR
 define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c)  {
diff --git a/test/CodeGen/R600/sint_to_fp.ll b/test/CodeGen/R600/sint_to_fp.ll
index 4e88494..9241799 100644
--- a/test/CodeGen/R600/sint_to_fp.ll
+++ b/test/CodeGen/R600/sint_to_fp.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
 ; R600-CHECK: @sint_to_fp_v2i32
 ; R600-CHECK-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
diff --git a/test/CodeGen/R600/sint_to_fp64.ll b/test/CodeGen/R600/sint_to_fp64.ll
new file mode 100644
index 0000000..5abc9d1
--- /dev/null
+++ b/test/CodeGen/R600/sint_to_fp64.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
+
+; CHECK: @sint_to_fp64
+; CHECK: V_CVT_F64_I32_e32
+define void @sint_to_fp64(double addrspace(1)* %out, i32 %in) {
+  %result = sitofp i32 %in to double
+  store double %result, double addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll
index 5220a96..fe9df10 100644
--- a/test/CodeGen/R600/sra.ll
+++ b/test/CodeGen/R600/sra.ll
@@ -1,13 +1,13 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
-;EG-CHECK: @ashr_v2i32
+;EG-CHECK-LABEL: @ashr_v2i32
 ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK: @ashr_v2i32
-;SI-CHECK: V_ASHR_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_ASHR_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
+;SI-CHECK-LABEL: @ashr_v2i32
+;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -18,17 +18,17 @@ define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i
   ret void
 }
 
-;EG-CHECK: @ashr_v4i32
+;EG-CHECK-LABEL: @ashr_v4i32
 ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK: @ashr_v4i32
-;SI-CHECK: V_ASHR_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_ASHR_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_ASHR_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_ASHR_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
+;SI-CHECK-LABEL: @ashr_v4i32
+;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
@@ -39,11 +39,11 @@ define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i
   ret void
 }
 
-;EG-CHECK: @ashr_i64
+;EG-CHECK-LABEL: @ashr_i64
 ;EG-CHECK: ASHR
 
-;SI-CHECK: @ashr_i64
-;SI-CHECK: V_ASHR_I64
+;SI-CHECK-LABEL: @ashr_i64
+;SI-CHECK: S_ASHR_I64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8
 define void @ashr_i64(i64 addrspace(1)* %out, i32 %in) {
 entry:
   %0 = sext i32 %in to i64
diff --git a/test/CodeGen/R600/srl.ll b/test/CodeGen/R600/srl.ll
index d1dcd7f..7637355 100644
--- a/test/CodeGen/R600/srl.ll
+++ b/test/CodeGen/R600/srl.ll
@@ -1,13 +1,13 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
 ;EG-CHECK: @lshr_v2i32
 ;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 ;SI-CHECK: @lshr_v2i32
-;SI-CHECK: V_LSHR_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_LSHR_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
+;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -26,10 +26,10 @@ define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i
 ;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 ;SI-CHECK: @lshr_v4i32
-;SI-CHECK: V_LSHR_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_LSHR_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_LSHR_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_LSHR_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
+;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
diff --git a/test/CodeGen/R600/store-vector-ptrs.ll b/test/CodeGen/R600/store-vector-ptrs.ll
new file mode 100644
index 0000000..01210ce
--- /dev/null
+++ b/test/CodeGen/R600/store-vector-ptrs.ll
@@ -0,0 +1,8 @@
+; XFAIL: *
+; RUN: llc -march=r600 -mcpu=SI < %s
+
+define void @store_vector_ptrs(<4 x i32*>* %out, <4 x [1024 x i32]*> %array) nounwind {
+  %p = getelementptr <4 x [1024 x i32]*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
+  store <4 x i32*> %p, <4 x i32*>* %out
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll
index 1bda5e6..5e51d56 100644
--- a/test/CodeGen/R600/store.ll
+++ b/test/CodeGen/R600/store.ll
@@ -1,13 +1,118 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=CM-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+
+;===------------------------------------------------------------------------===;
+; Global Address Space
+;===------------------------------------------------------------------------===;
+
+; i8 store
+; EG-CHECK-LABEL: @store_i8
+; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
+; EG-CHECK: VTX_READ_8 [[VAL:T[0-9]\.X]], [[VAL]]
+; IG 0: Get the byte index and truncate the value
+; EG-CHECK: AND_INT T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
+; EG-CHECK-NEXT: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.y
+; EG-CHECK-NEXT: 3(4.203895e-45), 255(3.573311e-43)
+; IG 1: Truncate the calculated the shift amount for the mask
+; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
+; EG-CHECK-NEXT: 3
+; IG 2: Shift the value and the mask
+; EG-CHECK: LSHL T[[RW_GPR]].X, T{{[0-9]}}.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]]
+; EG-CHECK: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]]
+; EG-CHECK-NEXT: 255
+; IG 3: Initialize the Y and Z channels to zero
+;       XXX: An optimal scheduler should merge this into one of the prevous IGs.
+; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0
+; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0
+
+; SI-CHECK-LABEL: @store_i8
+; SI-CHECK: BUFFER_STORE_BYTE
+
+define void @store_i8(i8 addrspace(1)* %out, i8 %in) {
+entry:
+  store i8 %in, i8 addrspace(1)* %out
+  ret void
+}
+
+; i16 store
+; EG-CHECK-LABEL: @store_i16
+; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
+; EG-CHECK: VTX_READ_16 [[VAL:T[0-9]\.X]], [[VAL]]
+; IG 0: Get the byte index and truncate the value
+; EG-CHECK: AND_INT T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
+; EG-CHECK: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.y
+; EG-CHECK-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
+; IG 1: Truncate the calculated the shift amount for the mask
+; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
+; EG-CHECK: 3
+; IG 2: Shift the value and the mask
+; EG-CHECK: LSHL T[[RW_GPR]].X, T{{[0-9]}}.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]]
+; EG-CHECK: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]]
+; EG-CHECK-NEXT: 65535
+; IG 3: Initialize the Y and Z channels to zero
+;       XXX: An optimal scheduler should merge this into one of the prevous IGs.
+; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0
+; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0
+
+; SI-CHECK-LABEL: @store_i16
+; SI-CHECK: BUFFER_STORE_SHORT
+define void @store_i16(i16 addrspace(1)* %out, i16 %in) {
+entry:
+  store i16 %in, i16 addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @store_v2i8
+; EG-CHECK: MEM_RAT MSKOR
+; EG-CHECK-NOT: MEM_RAT MSKOR
+; SI-CHECK-LABEL: @store_v2i8
+; SI-CHECK: BUFFER_STORE_BYTE
+; SI-CHECK: BUFFER_STORE_BYTE
+define void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
+entry:
+  %0 = trunc <2 x i32> %in to <2 x i8>
+  store <2 x i8> %0, <2 x i8> addrspace(1)* %out
+  ret void
+}
+
+
+; EG-CHECK-LABEL: @store_v2i16
+; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
+; CM-CHECK-LABEL: @store_v2i16
+; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
+; SI-CHECK-LABEL: @store_v2i16
+; SI-CHECK: BUFFER_STORE_SHORT
+; SI-CHECK: BUFFER_STORE_SHORT
+define void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
+entry:
+  %0 = trunc <2 x i32> %in to <2 x i16>
+  store <2 x i16> %0, <2 x i16> addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @store_v4i8
+; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
+; CM-CHECK-LABEL: @store_v4i8
+; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
+; SI-CHECK-LABEL: @store_v4i8
+; SI-CHECK: BUFFER_STORE_BYTE
+; SI-CHECK: BUFFER_STORE_BYTE
+; SI-CHECK: BUFFER_STORE_BYTE
+; SI-CHECK: BUFFER_STORE_BYTE
+define void @store_v4i8(<4 x i8> addrspace(1)* %out, <4 x i32> %in) {
+entry:
+  %0 = trunc <4 x i32> %in to <4 x i8>
+  store <4 x i8> %0, <4 x i8> addrspace(1)* %out
+  ret void
+}
 
 ; floating-point store
-; EG-CHECK: @store_f32
-; EG-CHECK: RAT_WRITE_CACHELESS_32_eg T{{[0-9]+\.X, T[0-9]+\.X}}, 1
-; CM-CHECK: @store_f32
-; CM-CHECK: EXPORT_RAT_INST_STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: @store_f32
+; EG-CHECK-LABEL: @store_f32
+; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.X, T[0-9]+\.X}}, 1
+; CM-CHECK-LABEL: @store_f32
+; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}}
+; SI-CHECK-LABEL: @store_f32
 ; SI-CHECK: BUFFER_STORE_DWORD
 
 define void @store_f32(float addrspace(1)* %out, float %in) {
@@ -15,22 +120,141 @@ define void @store_f32(float addrspace(1)* %out, float %in) {
   ret void
 }
 
+; EG-CHECK-LABEL: @store_v4i16
+; EG-CHECK: MEM_RAT MSKOR
+; EG-CHECK: MEM_RAT MSKOR
+; EG-CHECK: MEM_RAT MSKOR
+; EG-CHECK: MEM_RAT MSKOR
+; EG-CHECK-NOT: MEM_RAT MSKOR
+; SI-CHECK-LABEL: @store_v4i16
+; SI-CHECK: BUFFER_STORE_SHORT
+; SI-CHECK: BUFFER_STORE_SHORT
+; SI-CHECK: BUFFER_STORE_SHORT
+; SI-CHECK: BUFFER_STORE_SHORT
+; SI-CHECK-NOT: BUFFER_STORE_BYTE
+define void @store_v4i16(<4 x i16> addrspace(1)* %out, <4 x i32> %in) {
+entry:
+  %0 = trunc <4 x i32> %in to <4 x i16>
+  store <4 x i16> %0, <4 x i16> addrspace(1)* %out
+  ret void
+}
+
 ; vec2 floating-point stores
-; EG-CHECK: @store_v2f32
-; EG-CHECK: RAT_WRITE_CACHELESS_64_eg
-; CM-CHECK: @store_v2f32
-; CM-CHECK: EXPORT_RAT_INST_STORE_DWORD
-; SI-CHECK: @store_v2f32
+; EG-CHECK-LABEL: @store_v2f32
+; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
+; CM-CHECK-LABEL: @store_v2f32
+; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
+; SI-CHECK-LABEL: @store_v2f32
 ; SI-CHECK: BUFFER_STORE_DWORDX2
 
 define void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) {
 entry:
   %0 = insertelement <2 x float> <float 0.0, float 0.0>, float %a, i32 0
-  %1 = insertelement <2 x float> %0, float %b, i32 0
+  %1 = insertelement <2 x float> %0, float %b, i32 1
   store <2 x float> %1, <2 x float> addrspace(1)* %out
   ret void
 }
 
+; EG-CHECK-LABEL: @store_v4i32
+; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
+; EG-CHECK-NOT: MEM_RAT_CACHELESS STORE_RAW
+; CM-CHECK-LABEL: @store_v4i32
+; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
+; CM-CHECK-NOT: MEM_RAT_CACHELESS STORE_DWORD
+; SI-CHECK-LABEL: @store_v4i32
+; SI-CHECK: BUFFER_STORE_DWORDX4
+define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
+entry:
+  store <4 x i32> %in, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+;===------------------------------------------------------------------------===;
+; Local Address Space
+;===------------------------------------------------------------------------===;
+
+; EG-CHECK-LABEL: @store_local_i8
+; EG-CHECK: LDS_BYTE_WRITE
+; SI-CHECK-LABEL: @store_local_i8
+; SI-CHECK: DS_WRITE_B8
+define void @store_local_i8(i8 addrspace(3)* %out, i8 %in) {
+  store i8 %in, i8 addrspace(3)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @store_local_i16
+; EG-CHECK: LDS_SHORT_WRITE
+; SI-CHECK-LABEL: @store_local_i16
+; SI-CHECK: DS_WRITE_B16
+define void @store_local_i16(i16 addrspace(3)* %out, i16 %in) {
+  store i16 %in, i16 addrspace(3)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @store_local_v2i16
+; EG-CHECK: LDS_WRITE
+; CM-CHECK-LABEL: @store_local_v2i16
+; CM-CHECK: LDS_WRITE
+; SI-CHECK-LABEL: @store_local_v2i16
+; SI-CHECK: DS_WRITE_B16
+; SI-CHECK: DS_WRITE_B16
+define void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) {
+entry:
+  store <2 x i16> %in, <2 x i16> addrspace(3)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @store_local_v4i8
+; EG-CHECK: LDS_WRITE
+; CM-CHECK-LABEL: @store_local_v4i8
+; CM-CHECK: LDS_WRITE
+; SI-CHECK-LABEL: @store_local_v4i8
+; SI-CHECK: DS_WRITE_B8
+; SI-CHECK: DS_WRITE_B8
+; SI-CHECK: DS_WRITE_B8
+; SI-CHECK: DS_WRITE_B8
+define void @store_local_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
+entry:
+  store <4 x i8> %in, <4 x i8> addrspace(3)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @store_local_v2i32
+; EG-CHECK: LDS_WRITE
+; EG-CHECK: LDS_WRITE
+; CM-CHECK-LABEL: @store_local_v2i32
+; CM-CHECK: LDS_WRITE
+; CM-CHECK: LDS_WRITE
+; SI-CHECK-LABEL: @store_local_v2i32
+; SI-CHECK: DS_WRITE_B32
+; SI-CHECK: DS_WRITE_B32
+define void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) {
+entry:
+  store <2 x i32> %in, <2 x i32> addrspace(3)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @store_local_v4i32
+; EG-CHECK: LDS_WRITE
+; EG-CHECK: LDS_WRITE
+; EG-CHECK: LDS_WRITE
+; EG-CHECK: LDS_WRITE
+; CM-CHECK-LABEL: @store_local_v4i32
+; CM-CHECK: LDS_WRITE
+; CM-CHECK: LDS_WRITE
+; CM-CHECK: LDS_WRITE
+; CM-CHECK: LDS_WRITE
+; SI-CHECK-LABEL: @store_local_v4i32
+; SI-CHECK: DS_WRITE_B32
+; SI-CHECK: DS_WRITE_B32
+; SI-CHECK: DS_WRITE_B32
+; SI-CHECK: DS_WRITE_B32
+define void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
+entry:
+  store <4 x i32> %in, <4 x i32> addrspace(3)* %out
+  ret void
+}
+
 ; The stores in this function are combined by the optimizer to create a
 ; 64-bit store with 32-bit alignment.  This is legal for SI and the legalizer
 ; should not try to split the 64-bit store back into 2 32-bit stores.
@@ -38,25 +262,21 @@ entry:
 ; Evergreen / Northern Islands don't support 64-bit stores yet, so there should
 ; be two 32-bit stores.
 
-; EG-CHECK: @vecload2
-; EG-CHECK: RAT_WRITE_CACHELESS_64_eg
-; CM-CHECK: @vecload2
-; CM-CHECK: EXPORT_RAT_INST_STORE_DWORD
-; SI-CHECK: @vecload2
+; EG-CHECK-LABEL: @vecload2
+; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
+; CM-CHECK-LABEL: @vecload2
+; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
+; SI-CHECK-LABEL: @vecload2
 ; SI-CHECK: BUFFER_STORE_DWORDX2
 define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
 entry:
-  %0 = load i32 addrspace(2)* %mem, align 4, !tbaa !5
+  %0 = load i32 addrspace(2)* %mem, align 4
   %arrayidx1.i = getelementptr inbounds i32 addrspace(2)* %mem, i64 1
-  %1 = load i32 addrspace(2)* %arrayidx1.i, align 4, !tbaa !5
-  store i32 %0, i32 addrspace(1)* %out, align 4, !tbaa !5
+  %1 = load i32 addrspace(2)* %arrayidx1.i, align 4
+  store i32 %0, i32 addrspace(1)* %out, align 4
   %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %out, i64 1
-  store i32 %1, i32 addrspace(1)* %arrayidx1, align 4, !tbaa !5
+  store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
   ret void
 }
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!5 = metadata !{metadata !"int", metadata !6}
-!6 = metadata !{metadata !"omnipotent char", metadata !7}
-!7 = metadata !{metadata !"Simple C/C++ TBAA"}
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/R600/store.r600.ll b/test/CodeGen/R600/store.r600.ll
index 5ffb7f1..00589a0 100644
--- a/test/CodeGen/R600/store.r600.ll
+++ b/test/CodeGen/R600/store.r600.ll
@@ -4,7 +4,7 @@
 
 ; v4i32 store
 ; EG-CHECK: @store_v4i32
-; EG-CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
+; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
 
 define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %1 = load <4 x i32> addrspace(1) * %in
@@ -14,7 +14,7 @@ define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %
 
 ; v4f32 store
 ; EG-CHECK: @store_v4f32
-; EG-CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
+; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
 define void @store_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %1 = load <4 x float> addrspace(1) * %in
   store <4 x float> %1, <4 x float> addrspace(1)* %out
diff --git a/test/CodeGen/R600/structurize.ll b/test/CodeGen/R600/structurize.ll
new file mode 100644
index 0000000..c2acd93
--- /dev/null
+++ b/test/CodeGen/R600/structurize.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood -mattr=disable-irstructurizer | FileCheck %s
+; Test case for a crash in the AMDILCFGStructurizer from a CFG like this:
+;
+;                            entry
+;                           /     \
+;               diamond_head       branch_from
+;                 /      \           |
+;    diamond_false        diamond_true
+;                 \      /
+;                   done
+;
+; When the diamond_true branch had more than 100 instructions.
+;
+;
+
+; CHECK-LABEL: @branch_into_diamond
+; === entry block:
+; CHECK: ALU_PUSH_BEFORE
+; === Branch instruction (IF):
+; CHECK: JUMP
+  ; === branch_from block
+  ; CHECK: ALU
+  ; === Duplicated diamond_true block (There can be more than one ALU clause):
+  ; === XXX: We should be able to optimize this so the basic block is not
+  ; === duplicated.  See comments in
+  ; === AMDGPUCFGStructurizer::improveSimpleJumpintoIf()
+  ; CHECK: ALU
+; === Branch instruction (ELSE):
+; CHECK: ELSE
+  ; === diamond_head block:
+  ; CHECK: ALU_PUSH_BEFORE
+  ; === Branch instruction (IF):
+  ; CHECK: JUMP
+    ; === diamond_true block (There can be more than one ALU clause):
+    ; ALU
+  ; === Branch instruction (ELSE):
+  ; CHECK: ELSE
+    ; === diamond_false block plus implicit ENDIF
+    ; CHECK: ALU_POP_AFTER
+; === Branch instruction (ENDIF):
+; CHECK: POP
+; === done block:
+; CHECK: ALU
+; CHECK: MEM_RAT_CACHELESS
+; CHECK: CF_END
+
+
+define void @branch_into_diamond(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
+entry:
+%0 = icmp ne i32 %a, 0
+  br i1 %0, label %diamond_head, label %branch_from
+
+diamond_head:
+  %1 = icmp ne i32 %a, 1
+  br i1 %1, label %diamond_true, label %diamond_false
+
+branch_from:
+  %2 = add i32 %a, 1
+  br label %diamond_true
+
+diamond_false:
+  %3 = add i32 %a, 2
+  br label %done
+
+diamond_true:
+  %4 = phi i32 [%2, %branch_from], [%a, %diamond_head]
+  ; This block needs to be > 100 ISA instructions to hit the bug,
+  ; so we'll use udiv instructions.
+  %div0 = udiv i32 %a, %b
+  %div1 = udiv i32 %div0, %4
+  %div2 = udiv i32 %div1, 11
+  %div3 = udiv i32 %div2, %a
+  %div4 = udiv i32 %div3, %b
+  %div5 = udiv i32 %div4, %c
+  %div6 = udiv i32 %div5, %div0
+  %div7 = udiv i32 %div6, %div1
+  br label %done
+
+done:
+  %5 = phi i32 [%3, %diamond_false], [%div7, %diamond_true]
+  store i32 %5, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/structurize1.ll b/test/CodeGen/R600/structurize1.ll
new file mode 100644
index 0000000..8c10301
--- /dev/null
+++ b/test/CodeGen/R600/structurize1.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -march=r600 -mattr=disable-ifcvt -mcpu=redwood | FileCheck %s
+
+; This tests for abug where the AMDILCFGStructurizer was crashing on loops
+; like this:
+;
+; for (i = 0; i < x; i++) {
+;   if (cond0) {
+;     if (cond1) {
+;
+;     } else {
+;
+;     }
+;     if (cond2) {
+;
+;     }
+;   }
+; }
+
+; CHECK-LABEL: @if_inside_loop
+; CHECK: LOOP_START_DX10
+; CHECK: END_LOOP
+define void @if_inside_loop(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
+entry:
+  br label %for.body
+
+for.body:
+  %0 = phi i32 [0, %entry], [%inc, %for.inc]
+  %val = phi i32 [0, %entry], [%val.for.inc, %for.inc]
+  %inc = add i32 %0, 1
+  %1 = icmp ult i32 10, %a
+  br i1 %1, label %for.inc, label %if.then
+
+if.then:
+  %2 = icmp ne i32 0, %b
+  br i1 %2, label %if.then.true, label %if.then.false
+
+if.then.true:
+  %3 = add i32 %a, %val
+  br label %if
+
+if.then.false:
+  %4 = mul i32 %a, %val
+  br label %if
+
+if:
+  %val.if = phi i32 [%3, %if.then.true], [%4, %if.then.false]
+  %5 = icmp ne i32 0, %c
+  br i1 %5, label %if.true, label %for.inc
+
+if.true:
+  %6 = add i32 %a, %val.if
+  br label %for.inc
+
+for.inc:
+  %val.for.inc = phi i32 [%val, %for.body], [%val.if, %if], [%6, %if.true]
+  %7 = icmp ne i32 0, %d
+  br i1 %7, label %for.body, label %exit
+
+exit:
+  store i32 %val.for.inc, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/R600/sub.ll
index 3bd4cb8..5fdd2b8 100644
--- a/test/CodeGen/R600/sub.ll
+++ b/test/CodeGen/R600/sub.ll
@@ -1,13 +1,13 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
 ;EG-CHECK: @test2
-;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 ;SI-CHECK: @test2
-;SI-CHECK: V_SUB_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_SUB_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
+;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -19,16 +19,16 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
 }
 
 ;EG-CHECK: @test4
-;EG-CHECK: SUB_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 ;SI-CHECK: @test4
-;SI-CHECK: V_SUB_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_SUB_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_SUB_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_SUB_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
+;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
diff --git a/test/CodeGen/R600/swizzle-export.ll b/test/CodeGen/R600/swizzle-export.ll
index b2175af..16c3f19 100644
--- a/test/CodeGen/R600/swizzle-export.ll
+++ b/test/CodeGen/R600/swizzle-export.ll
@@ -6,12 +6,12 @@
 ;EG-CHECK: EXPORT T{{[0-9]+}}.XXWX
 ;EG-CHECK: EXPORT T{{[0-9]+}}.XXXW
 
-define void @main() #0 {
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
 main_body:
-  %0 = call float @llvm.R600.load.input(i32 4)
-  %1 = call float @llvm.R600.load.input(i32 5)
-  %2 = call float @llvm.R600.load.input(i32 6)
-  %3 = call float @llvm.R600.load.input(i32 7)
+  %0 = extractelement <4 x float> %reg1, i32 0
+  %1 = extractelement <4 x float> %reg1, i32 1
+  %2 = extractelement <4 x float> %reg1, i32 2
+  %3 = extractelement <4 x float> %reg1, i32 3
   %4 = load <4 x float> addrspace(8)* null
   %5 = extractelement <4 x float> %4, i32 1
   %6 = load <4 x float> addrspace(8)* null
@@ -93,14 +93,15 @@ main_body:
 }
 
 ; EG-CHECK: @main2
-; EG-CHECK: T{{[0-9]+}}.ZXY0
+; EG-CHECK: T{{[0-9]+}}.XY__
+; EG-CHECK: T{{[0-9]+}}.YXZ0
 
-define void @main2() #0 {
+define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
 main_body:
-  %0 = call float @llvm.R600.load.input(i32 4)
-  %1 = call float @llvm.R600.load.input(i32 5)
-  %2 = call float @llvm.R600.load.input(i32 6)
-  %3 = call float @llvm.R600.load.input(i32 7)
+  %0 = extractelement <4 x float> %reg1, i32 0
+  %1 = extractelement <4 x float> %reg1, i32 1
+  %2 = fadd float %0, 2.5
+  %3 = fmul float %1, 3.5
   %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %5 = extractelement <4 x float> %4, i32 0
   %6 = call float @llvm.cos.f32(float %5)
@@ -108,27 +109,21 @@ main_body:
   %8 = extractelement <4 x float> %7, i32 0
   %9 = load <4 x float> addrspace(8)* null
   %10 = extractelement <4 x float> %9, i32 1
-  %11 = insertelement <4 x float> undef, float %0, i32 0
-  %12 = insertelement <4 x float> %11, float %1, i32 1
-  %13 = insertelement <4 x float> %12, float %2, i32 2
-  %14 = insertelement <4 x float> %13, float %3, i32 3
-  call void @llvm.R600.store.swizzle(<4 x float> %14, i32 60, i32 1)
-  %15 = insertelement <4 x float> undef, float %6, i32 0
-  %16 = insertelement <4 x float> %15, float %8, i32 1
-  %17 = insertelement <4 x float> %16, float %10, i32 2
-  %18 = insertelement <4 x float> %17, float 0.000000e+00, i32 3
-  call void @llvm.R600.store.swizzle(<4 x float> %18, i32 0, i32 2)
+  %11 = insertelement <4 x float> undef, float %2, i32 0
+  %12 = insertelement <4 x float> %11, float %3, i32 1
+  call void @llvm.R600.store.swizzle(<4 x float> %12, i32 60, i32 1)
+  %13 = insertelement <4 x float> undef, float %6, i32 0
+  %14 = insertelement <4 x float> %13, float %8, i32 1
+  %15 = insertelement <4 x float> %14, float %10, i32 2
+  %16 = insertelement <4 x float> %15, float 0.000000e+00, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %16, i32 0, i32 2)
   ret void
 }
 
-; Function Attrs: readnone
-declare float @llvm.R600.load.input(i32) #1
-
 ; Function Attrs: nounwind readonly
-declare float @llvm.cos.f32(float) #2
+declare float @llvm.cos.f32(float) #1
 
 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
 
 attributes #0 = { "ShaderType"="1" }
-attributes #1 = { readnone }
-attributes #2 = { nounwind readonly }
+attributes #1 = { nounwind readonly }
diff --git a/test/CodeGen/R600/tex-clause-antidep.ll b/test/CodeGen/R600/tex-clause-antidep.ll
index 5979609..cbb9c50 100644
--- a/test/CodeGen/R600/tex-clause-antidep.ll
+++ b/test/CodeGen/R600/tex-clause-antidep.ll
@@ -3,11 +3,11 @@
 ;CHECK: TEX
 ;CHECK-NEXT: ALU
 
-define void @test() {
-  %1 = call float @llvm.R600.load.input(i32 0)
-  %2 = call float @llvm.R600.load.input(i32 1)
-  %3 = call float @llvm.R600.load.input(i32 2)
-  %4 = call float @llvm.R600.load.input(i32 3)
+define void @test(<4 x float> inreg %reg0) #0 {
+  %1 = extractelement <4 x float> %reg0, i32 0
+  %2 = extractelement <4 x float> %reg0, i32 1
+  %3 = extractelement <4 x float> %reg0, i32 2
+  %4 = extractelement <4 x float> %reg0, i32 3
   %5 = insertelement <4 x float> undef, float %1, i32 0
   %6 = insertelement <4 x float> %5, float %2, i32 1
   %7 = insertelement <4 x float> %6, float %3, i32 2
@@ -19,6 +19,7 @@ define void @test() {
   ret void
 }
 
-declare float @llvm.R600.load.input(i32) readnone
 declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone
 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="1" }
+\ No newline at end of file
diff --git a/test/CodeGen/R600/texture-input-merge.ll b/test/CodeGen/R600/texture-input-merge.ll
index 5d0ecef..789538a 100644
--- a/test/CodeGen/R600/texture-input-merge.ll
+++ b/test/CodeGen/R600/texture-input-merge.ll
@@ -2,11 +2,11 @@
 
 ;CHECK-NOT: MOV
 
-define void @test() {
-  %1 = call float @llvm.R600.load.input(i32 0)
-  %2 = call float @llvm.R600.load.input(i32 1)
-  %3 = call float @llvm.R600.load.input(i32 2)
-  %4 = call float @llvm.R600.load.input(i32 3)
+define void @test(<4 x float> inreg %reg0) #0 {
+  %1 = extractelement <4 x float> %reg0, i32 0
+  %2 = extractelement <4 x float> %reg0, i32 1
+  %3 = extractelement <4 x float> %reg0, i32 2
+  %4 = extractelement <4 x float> %reg0, i32 3
   %5 = fmul float %1, 3.0
   %6 = fmul float %2, 3.0
   %7 = fmul float %3, 3.0
@@ -25,6 +25,7 @@ define void @test() {
   ret void
 }
 
-declare float @llvm.R600.load.input(i32) readnone
 declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone
 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="1" }
+\ No newline at end of file
diff --git a/test/CodeGen/R600/trunc-vector-store-assertion-failure.ll b/test/CodeGen/R600/trunc-vector-store-assertion-failure.ll
new file mode 100644
index 0000000..ec959c2
--- /dev/null
+++ b/test/CodeGen/R600/trunc-vector-store-assertion-failure.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; This tests for a bug in the SelectionDAG where custom lowered truncated
+; vector stores at the end of a basic block were not being added to the
+; LegalizedNodes list, which triggered an assertion failure.
+
+; CHECK-LABEL: @test
+; CHECK: MEM_RAT_CACHELESS STORE_RAW
+define void @test(<4 x i8> addrspace(1)* %out, i32 %cond, <4 x i8> %in) {
+entry:
+  %0 = icmp eq i32 %cond, 0
+  br i1 %0, label %if, label %done
+
+if:
+  store <4 x i8> %in, <4 x i8> addrspace(1)* %out
+  br label %done
+
+done:
+  ret void
+}
diff --git a/test/CodeGen/R600/trunc.ll b/test/CodeGen/R600/trunc.ll
new file mode 100644
index 0000000..0bd320a
--- /dev/null
+++ b/test/CodeGen/R600/trunc.ll
@@ -0,0 +1,30 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
+
+define void @trunc_i64_to_i32_store(i32 addrspace(1)* %out, i64 %in) {
+; SI-LABEL: @trunc_i64_to_i32_store
+; SI: S_LOAD_DWORD s0, s[0:1], 11
+; SI: V_MOV_B32_e32 v0, s0
+; SI: BUFFER_STORE_DWORD v0
+
+; EG-LABEL: @trunc_i64_to_i32_store
+; EG: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG: LSHR
+; EG-NEXT: 2(
+
+  %result = trunc i64 %in to i32 store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @trunc_shl_i64:
+; SI: S_LOAD_DWORDX2
+; SI: S_LOAD_DWORDX2 [[SREG:s\[[0-9]+:[0-9]+\]]]
+; SI: S_LSHL_B64 s{{\[}}[[LO_SREG:[0-9]+]]:{{[0-9]+\]}}, [[SREG]], 2
+; SI: MOV_B32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
+; SI: BUFFER_STORE_DWORD v[[LO_VREG]],
+define void @trunc_shl_i64(i32 addrspace(1)* %out, i64 %a) {
+  %b = shl i64 %a, 2
+  %result = trunc i64 %b to i32
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/udiv.ll b/test/CodeGen/R600/udiv.ll
index 08fe2ef..5371321 100644
--- a/test/CodeGen/R600/udiv.ll
+++ b/test/CodeGen/R600/udiv.ll
@@ -1,13 +1,26 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+
+;EG-CHECK-LABEL: @test
+;EG-CHECK-NOT: SETGE_INT
+;EG-CHECK: CF_END
+
+define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %a = load i32 addrspace(1) * %in
+  %b = load i32 addrspace(1) * %b_ptr
+  %result = udiv i32 %a, %b
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
 
 ;The code generated by udiv is long and complex and may frequently change.
 ;The goal of this test is to make sure the ISel doesn't fail when it gets
 ;a v4i32 udiv
 
-;EG-CHECK: @test2
+;EG-CHECK-LABEL: @test2
 ;EG-CHECK: CF_END
-;SI-CHECK: @test2
+;SI-CHECK-LABEL: @test2
 ;SI-CHECK: S_ENDPGM
 
 define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
@@ -19,9 +32,9 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   ret void
 }
 
-;EG-CHECK: @test4
+;EG-CHECK-LABEL: @test4
 ;EG-CHECK: CF_END
-;SI-CHECK: @test4
+;SI-CHECK-LABEL: @test4
 ;SI-CHECK: S_ENDPGM
 
 define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
diff --git a/test/CodeGen/R600/uint_to_fp.ll b/test/CodeGen/R600/uint_to_fp.ll
index faac77a..a5ac355 100644
--- a/test/CodeGen/R600/uint_to_fp.ll
+++ b/test/CodeGen/R600/uint_to_fp.ll
@@ -1,10 +1,10 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
-; R600-CHECK: @uint_to_fp_v2i32
+; R600-CHECK-LABEL: @uint_to_fp_v2i32
 ; R600-CHECK-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
 ; R600-CHECK-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
-; SI-CHECK: @uint_to_fp_v2i32
+; SI-CHECK-LABEL: @uint_to_fp_v2i32
 ; SI-CHECK: V_CVT_F32_U32_e32
 ; SI-CHECK: V_CVT_F32_U32_e32
 define void @uint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) {
@@ -13,12 +13,12 @@ define void @uint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) {
   ret void
 }
 
-; R600-CHECK: @uint_to_fp_v4i32
+; R600-CHECK-LABEL: @uint_to_fp_v4i32
 ; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI-CHECK: @uint_to_fp_v4i32
+; SI-CHECK-LABEL: @uint_to_fp_v4i32
 ; SI-CHECK: V_CVT_F32_U32_e32
 ; SI-CHECK: V_CVT_F32_U32_e32
 ; SI-CHECK: V_CVT_F32_U32_e32
@@ -29,3 +29,18 @@ define void @uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspac
   store <4 x float> %result, <4 x float> addrspace(1)* %out
   ret void
 }
+
+; R600-CHECK-LABEL: @uint_to_fp_i64_f32
+; R600-CHECK: UINT_TO_FLT
+; R600-CHECK: UINT_TO_FLT
+; R600-CHECK: MULADD_IEEE
+; SI-CHECK-LABEL: @uint_to_fp_i64_f32
+; SI-CHECK: V_CVT_F32_U32_e32
+; SI-CHECK: V_CVT_F32_U32_e32
+; SI-CHECK: V_MAD_F32
+define void @uint_to_fp_i64_f32(float addrspace(1)* %out, i64 %in) {
+entry:
+  %0 = uitofp i64 %in to float
+  store float %0, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/R600/unaligned-load-store.ll
new file mode 100644
index 0000000..2824ff8
--- /dev/null
+++ b/test/CodeGen/R600/unaligned-load-store.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+
+; SI-LABEL: @unaligned_load_store_i32:
+; DS_READ_U32 {{v[0-9]+}}, 0, [[REG]]
+define void @unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
+  %v = load i32 addrspace(3)* %p, align 1
+  store i32 %v, i32 addrspace(3)* %r, align 1
+  ret void
+}
+
+; SI-LABEL: @unaligned_load_store_v4i32:
+; DS_READ_U32 {{v[0-9]+}}, 0, [[REG]]
+define void @unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind {
+  %v = load <4 x i32> addrspace(3)* %p, align 1
+  store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1
+  ret void
+}
diff --git a/test/CodeGen/R600/unsupported-cc.ll b/test/CodeGen/R600/unsupported-cc.ll
index cf29833..f986a02 100644
--- a/test/CodeGen/R600/unsupported-cc.ll
+++ b/test/CodeGen/R600/unsupported-cc.ll
@@ -2,8 +2,9 @@
 
 ; These tests are for condition codes that are not supported by the hardware
 
-; CHECK: @slt
-; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-LABEL: @slt
+; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 5(7.006492e-45)
 define void @slt(i32 addrspace(1)* %out, i32 %in) {
 entry:
@@ -13,8 +14,9 @@ entry:
   ret void
 }
 
-; CHECK: @ult_i32
-; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-LABEL: @ult_i32
+; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 5(7.006492e-45)
 define void @ult_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
@@ -24,9 +26,11 @@ entry:
   ret void
 }
 
-; CHECK: @ult_float
-; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-LABEL: @ult_float
+; CHECK: SETGE * T{{[0-9]}}.[[CHAN:[XYZW]]], KC0[2].Z, literal.x
 ; CHECK-NEXT: 1084227584(5.000000e+00)
+; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0
+; CHECK-NEXT: LSHR *
 define void @ult_float(float addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp ult float %in, 5.0
@@ -35,9 +39,22 @@ entry:
   ret void
 }
 
-; CHECK: @olt
-; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-;CHECK-NEXT: 1084227584(5.000000e+00)
+; CHECK-LABEL: @ult_float_native
+; CHECK: SETGE T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
+; CHECK-NEXT: LSHR *
+; CHECK-NEXT: 1084227584(5.000000e+00)
+define void @ult_float_native(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ult float %in, 5.0
+  %1 = select i1 %0, float 0.0, float 1.0
+  store float %1, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: @olt
+; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR *
+; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @olt(float addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp olt float %in, 5.0
@@ -46,8 +63,9 @@ entry:
   ret void
 }
 
-; CHECK: @sle
-; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-LABEL: @sle
+; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 6(8.407791e-45)
 define void @sle(i32 addrspace(1)* %out, i32 %in) {
 entry:
@@ -57,8 +75,9 @@ entry:
   ret void
 }
 
-; CHECK: @ule_i32
-; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-LABEL: @ule_i32
+; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 6(8.407791e-45)
 define void @ule_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
@@ -68,9 +87,11 @@ entry:
   ret void
 }
 
-; CHECK: @ule_float
-; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-LABEL: @ule_float
+; CHECK: SETGT * T{{[0-9]}}.[[CHAN:[XYZW]]], KC0[2].Z, literal.x
 ; CHECK-NEXT: 1084227584(5.000000e+00)
+; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0
+; CHECK-NEXT: LSHR *
 define void @ule_float(float addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp ule float %in, 5.0
@@ -79,8 +100,21 @@ entry:
   ret void
 }
 
-; CHECK: @ole
-; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-LABEL: @ule_float_native
+; CHECK: SETGT T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
+; CHECK-NEXT: LSHR *
+; CHECK-NEXT: 1084227584(5.000000e+00)
+define void @ule_float_native(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ule float %in, 5.0
+  %1 = select i1 %0, float 0.0, float 1.0
+  store float %1, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: @ole
+; CHECK: SETGE T{{[0-9]\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR *
 ; CHECK-NEXT:1084227584(5.000000e+00)
 define void @ole(float addrspace(1)* %out, float %in) {
 entry:
diff --git a/test/CodeGen/R600/urecip.ll b/test/CodeGen/R600/urecip.ll
index dad02dd..e808e3d 100644
--- a/test/CodeGen/R600/urecip.ll
+++ b/test/CodeGen/R600/urecip.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
 ;CHECK: V_RCP_IFLAG_F32_e32
 
diff --git a/test/CodeGen/R600/urem.ll b/test/CodeGen/R600/urem.ll
index cf3474c..8045145 100644
--- a/test/CodeGen/R600/urem.ll
+++ b/test/CodeGen/R600/urem.ll
@@ -1,5 +1,5 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
 ;The code generated by urem is long and complex and may frequently change.
 ;The goal of this test is to make sure the ISel doesn't fail when it gets
diff --git a/test/CodeGen/R600/vertex-fetch-encoding.ll b/test/CodeGen/R600/vertex-fetch-encoding.ll
index d892229..7ea7a5c 100644
--- a/test/CodeGen/R600/vertex-fetch-encoding.ll
+++ b/test/CodeGen/R600/vertex-fetch-encoding.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=barts | FileCheck --check-prefix=NI-CHECK %s
-; RUN: not llc < %s -march=r600 -show-mc-encoding -mcpu=cayman | FileCheck --check-prefix=CM-CHECK %s
+; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=cayman | FileCheck --check-prefix=CM-CHECK %s
 
 ; NI-CHECK: @vtx_fetch32
 ; NI-CHECK: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0 ; encoding: [0x40,0x01,0x0[[GPR]],0x10,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x08,0x00
diff --git a/test/CodeGen/R600/vselect.ll b/test/CodeGen/R600/vselect.ll
index 72a9084..dca7b06 100644
--- a/test/CodeGen/R600/vselect.ll
+++ b/test/CodeGen/R600/vselect.ll
@@ -1,9 +1,9 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
 ;EG-CHECK: @test_select_v2i32
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 ;SI-CHECK: @test_select_v2i32
 ;SI-CHECK: V_CNDMASK_B32_e64
@@ -20,8 +20,8 @@ entry:
 }
 
 ;EG-CHECK: @test_select_v2f32
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 ;SI-CHECK: @test_select_v2f32
 ;SI-CHECK: V_CNDMASK_B32_e64
@@ -31,17 +31,17 @@ define void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrs
 entry:
   %0 = load <2 x float> addrspace(1)* %in0
   %1 = load <2 x float> addrspace(1)* %in1
-  %cmp = fcmp one <2 x float> %0, %1
+  %cmp = fcmp une <2 x float> %0, %1
   %result = select <2 x i1> %cmp, <2 x float> %0, <2 x float> %1
   store <2 x float> %result, <2 x float> addrspace(1)* %out
   ret void
 }
 
 ;EG-CHECK: @test_select_v4i32
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 ;SI-CHECK: @test_select_v4i32
 ;SI-CHECK: V_CNDMASK_B32_e64
@@ -60,16 +60,16 @@ entry:
 }
 
 ;EG-CHECK: @test_select_v4f32
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in0, <4 x float> addrspace(1)* %in1) {
 entry:
   %0 = load <4 x float> addrspace(1)* %in0
   %1 = load <4 x float> addrspace(1)* %in1
-  %cmp = fcmp one <4 x float> %0, %1
+  %cmp = fcmp une <4 x float> %0, %1
   %result = select <4 x i1> %cmp, <4 x float> %0, <4 x float> %1
   store <4 x float> %result, <4 x float> addrspace(1)* %out
   ret void
diff --git a/test/CodeGen/R600/vselect64.ll b/test/CodeGen/R600/vselect64.ll
new file mode 100644
index 0000000..604695b
--- /dev/null
+++ b/test/CodeGen/R600/vselect64.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck  %s
+; XXX: Merge this test into vselect.ll once SI supports 64-bit select.
+
+; CHECK-LABEL: @test_select_v4i64
+; Make sure the vectors aren't being stored on the stack.  We know they are
+; being stored on the stack if the shaders uses at leat 10 registers.
+; CHECK-NOT: {{\**}} MOV T{{[0-9][0-9]}}.X
+define void @test_select_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> %c) {
+entry:
+       %cmp = icmp ne  <4 x i32> %c, <i32 0, i32 0, i32 0, i32 0>
+       %result = select <4 x i1> %cmp, <4 x i64> <i64 0, i64 1, i64 2, i64 3>, <4 x i64> <i64 4, i64 5, i64 6, i64 7>
+       store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+       ret void
+}
+
diff --git a/test/CodeGen/R600/wait.ll b/test/CodeGen/R600/wait.ll
new file mode 100644
index 0000000..2cf88fe
--- /dev/null
+++ b/test/CodeGen/R600/wait.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=r600 -mcpu=SI --verify-machineinstrs | FileCheck %s
+
+;CHECK-LABEL: @main
+;CHECK: S_WAITCNT lgkmcnt(0)
+;CHECK: S_WAITCNT vmcnt(0)
+;CHECK: S_WAITCNT expcnt(0) lgkmcnt(0)
+
+define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
+main_body:
+  %10 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
+  %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0
+  %12 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %11, i32 0, i32 %6)
+  %13 = extractelement <4 x float> %12, i32 0
+  %14 = extractelement <4 x float> %12, i32 1
+  %15 = extractelement <4 x float> %12, i32 2
+  %16 = extractelement <4 x float> %12, i32 3
+  %17 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
+  %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0
+  %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %6)
+  %20 = extractelement <4 x float> %19, i32 0
+  %21 = extractelement <4 x float> %19, i32 1
+  %22 = extractelement <4 x float> %19, i32 2
+  %23 = extractelement <4 x float> %19, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %20, float %21, float %22, float %23)
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %13, float %14, float %15, float %16)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="1" }
+attributes #1 = { nounwind readnone }
+
+!0 = metadata !{metadata !"const", null, i32 1}
diff --git a/test/CodeGen/R600/work-item-intrinsics.ll b/test/CodeGen/R600/work-item-intrinsics.ll
index 7998983..9618d7f 100644
--- a/test/CodeGen/R600/work-item-intrinsics.ll
+++ b/test/CodeGen/R600/work-item-intrinsics.ll
@@ -1,12 +1,12 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
 ; R600-CHECK: @ngroups_x
-; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV * [[VAL]], KC0[0].X
+; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; R600-CHECK: MOV [[VAL]], KC0[0].X
 ; SI-CHECK: @ngroups_x
-; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 0
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
 define void @ngroups_x (i32 addrspace(1)* %out) {
 entry:
@@ -16,11 +16,11 @@ entry:
 }
 
 ; R600-CHECK: @ngroups_y
-; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV * [[VAL]], KC0[0].Y
+; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; R600-CHECK: MOV [[VAL]], KC0[0].Y
 ; SI-CHECK: @ngroups_y
-; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 1
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 1
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
 define void @ngroups_y (i32 addrspace(1)* %out) {
 entry:
@@ -30,11 +30,11 @@ entry:
 }
 
 ; R600-CHECK: @ngroups_z
-; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV * [[VAL]], KC0[0].Z
+; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; R600-CHECK: MOV [[VAL]], KC0[0].Z
 ; SI-CHECK: @ngroups_z
-; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 2
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 2
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
 define void @ngroups_z (i32 addrspace(1)* %out) {
 entry:
@@ -44,11 +44,11 @@ entry:
 }
 
 ; R600-CHECK: @global_size_x
-; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV * [[VAL]], KC0[0].W
+; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; R600-CHECK: MOV [[VAL]], KC0[0].W
 ; SI-CHECK: @global_size_x
-; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 3
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 3
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
 define void @global_size_x (i32 addrspace(1)* %out) {
 entry:
@@ -58,11 +58,11 @@ entry:
 }
 
 ; R600-CHECK: @global_size_y
-; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV * [[VAL]], KC0[1].X
+; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; R600-CHECK: MOV [[VAL]], KC0[1].X
 ; SI-CHECK: @global_size_y
-; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 4
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 4
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
 define void @global_size_y (i32 addrspace(1)* %out) {
 entry:
@@ -72,11 +72,11 @@ entry:
 }
 
 ; R600-CHECK: @global_size_z
-; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV * [[VAL]], KC0[1].Y
+; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; R600-CHECK: MOV [[VAL]], KC0[1].Y
 ; SI-CHECK: @global_size_z
-; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 5
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 5
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
 define void @global_size_z (i32 addrspace(1)* %out) {
 entry:
@@ -86,11 +86,11 @@ entry:
 }
 
 ; R600-CHECK: @local_size_x
-; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV * [[VAL]], KC0[1].Z
+; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; R600-CHECK: MOV [[VAL]], KC0[1].Z
 ; SI-CHECK: @local_size_x
-; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 6
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 6
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
 define void @local_size_x (i32 addrspace(1)* %out) {
 entry:
@@ -100,11 +100,11 @@ entry:
 }
 
 ; R600-CHECK: @local_size_y
-; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV * [[VAL]], KC0[1].W
+; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; R600-CHECK: MOV [[VAL]], KC0[1].W
 ; SI-CHECK: @local_size_y
-; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 7
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 7
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
 define void @local_size_y (i32 addrspace(1)* %out) {
 entry:
@@ -114,11 +114,11 @@ entry:
 }
 
 ; R600-CHECK: @local_size_z
-; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV * [[VAL]], KC0[2].X
+; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; R600-CHECK: MOV [[VAL]], KC0[2].X
 ; SI-CHECK: @local_size_z
-; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 8
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 8
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
 define void @local_size_z (i32 addrspace(1)* %out) {
 entry:
@@ -127,12 +127,12 @@ entry:
   ret void
 }
 
-; The tgid values are stored in SGPRs offset by the number of user SGPRs.
-; Currently we always use exactly 2 user SGPRs for the pointer to the
+; The tgid values are stored in ss offset by the number of user ss.
+; Currently we always use exactly 2 user ss for the pointer to the
 ; kernel arguments, but this may change in the future.
 
 ; SI-CHECK: @tgid_x
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], SGPR2
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s2
 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
 define void @tgid_x (i32 addrspace(1)* %out) {
 entry:
@@ -142,7 +142,7 @@ entry:
 }
 
 ; SI-CHECK: @tgid_y
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], SGPR3
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s3
 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
 define void @tgid_y (i32 addrspace(1)* %out) {
 entry:
@@ -152,7 +152,7 @@ entry:
 }
 
 ; SI-CHECK: @tgid_z
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], SGPR4
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s4
 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
 define void @tgid_z (i32 addrspace(1)* %out) {
 entry:
@@ -162,7 +162,7 @@ entry:
 }
 
 ; SI-CHECK: @tidig_x
-; SI-CHECK: BUFFER_STORE_DWORD VGPR0
+; SI-CHECK: BUFFER_STORE_DWORD v0
 define void @tidig_x (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.tidig.x() #0
@@ -171,7 +171,7 @@ entry:
 }
 
 ; SI-CHECK: @tidig_y
-; SI-CHECK: BUFFER_STORE_DWORD VGPR1
+; SI-CHECK: BUFFER_STORE_DWORD v1
 define void @tidig_y (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.tidig.y() #0
@@ -180,7 +180,7 @@ entry:
 }
 
 ; SI-CHECK: @tidig_z
-; SI-CHECK: BUFFER_STORE_DWORD VGPR2
+; SI-CHECK: BUFFER_STORE_DWORD v2
 define void @tidig_z (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.tidig.z() #0
diff --git a/test/CodeGen/R600/wrong-transalu-pos-fix.ll b/test/CodeGen/R600/wrong-transalu-pos-fix.ll
new file mode 100644
index 0000000..b1cbe3f
--- /dev/null
+++ b/test/CodeGen/R600/wrong-transalu-pos-fix.ll
@@ -0,0 +1,86 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; We want all MULLO_INT inst to be last in their instruction group
+;CHECK: @fill3d
+;CHECK-NOT: MULLO_INT T[0-9]+
+
+; ModuleID = 'radeon'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
+target triple = "r600--"
+
+; Function Attrs: nounwind
+define void @fill3d(i32 addrspace(1)* nocapture %out) #0 {
+entry:
+  %x.i = tail call i32 @llvm.r600.read.global.size.x() #1
+  %y.i18 = tail call i32 @llvm.r600.read.global.size.y() #1
+  %mul = mul i32 %y.i18, %x.i
+  %z.i17 = tail call i32 @llvm.r600.read.global.size.z() #1
+  %mul3 = mul i32 %mul, %z.i17
+  %x.i.i = tail call i32 @llvm.r600.read.tgid.x() #1
+  %x.i12.i = tail call i32 @llvm.r600.read.local.size.x() #1
+  %mul26.i = mul i32 %x.i12.i, %x.i.i
+  %x.i4.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %add.i16 = add i32 %x.i4.i, %mul26.i
+  %mul7 = mul i32 %add.i16, %y.i18
+  %y.i.i = tail call i32 @llvm.r600.read.tgid.y() #1
+  %y.i14.i = tail call i32 @llvm.r600.read.local.size.y() #1
+  %mul30.i = mul i32 %y.i14.i, %y.i.i
+  %y.i6.i = tail call i32 @llvm.r600.read.tidig.y() #1
+  %add.i14 = add i32 %mul30.i, %mul7
+  %mul819 = add i32 %add.i14, %y.i6.i
+  %add = mul i32 %mul819, %z.i17
+  %z.i.i = tail call i32 @llvm.r600.read.tgid.z() #1
+  %z.i16.i = tail call i32 @llvm.r600.read.local.size.z() #1
+  %mul33.i = mul i32 %z.i16.i, %z.i.i
+  %z.i8.i = tail call i32 @llvm.r600.read.tidig.z() #1
+  %add.i = add i32 %z.i8.i, %mul33.i
+  %add13 = add i32 %add.i, %add
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %add13
+  store i32 %mul3, i32 addrspace(1)* %arrayidx, align 4
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.z() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.local.size.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.local.size.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.local.size.z() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.z() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.global.size.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.global.size.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.global.size.z() #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!opencl.kernels = !{!0, !1, !2}
+
+!0 = metadata !{null}
+!1 = metadata !{null}
+!2 = metadata !{void (i32 addrspace(1)*)* @fill3d}
diff --git a/test/CodeGen/R600/xor.ll b/test/CodeGen/R600/xor.ll
index f52729d..c12b0c1 100644
--- a/test/CodeGen/R600/xor.ll
+++ b/test/CodeGen/R600/xor.ll
@@ -1,13 +1,13 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
 ;EG-CHECK: @xor_v2i32
-;EG-CHECK: XOR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: XOR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 ;SI-CHECK: @xor_v2i32
-;SI-CHECK: V_XOR_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_XOR_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
+;SI-CHECK: V_XOR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_XOR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 
 define void @xor_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) {
@@ -19,16 +19,16 @@ define void @xor_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in
 }
 
 ;EG-CHECK: @xor_v4i32
-;EG-CHECK: XOR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: XOR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: XOR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: XOR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 ;SI-CHECK: @xor_v4i32
-;SI-CHECK: V_XOR_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_XOR_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_XOR_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
-;SI-CHECK: V_XOR_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
+;SI-CHECK: V_XOR_B32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_XOR_B32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_XOR_B32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_XOR_B32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) {
   %a = load <4 x i32> addrspace(1) * %in0
@@ -37,3 +37,20 @@ define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
 }
+
+;EG-CHECK: @xor_i1
+;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
+
+;SI-CHECK: @xor_i1
+;SI-CHECK: S_XOR_B64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
+
+define void @xor_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
+  %a = load float addrspace(1) * %in0
+  %b = load float addrspace(1) * %in1
+  %acmp = fcmp oge float %a, 0.000000e+00
+  %bcmp = fcmp oge float %b, 0.000000e+00
+  %xor = xor i1 %acmp, %bcmp
+  %result = select i1 %xor, float %a, float %b
+  store float %result, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/zero_extend.ll b/test/CodeGen/R600/zero_extend.ll
index 413b849..481b3b3 100644
--- a/test/CodeGen/R600/zero_extend.ll
+++ b/test/CodeGen/R600/zero_extend.ll
@@ -1,13 +1,13 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
 ; R600-CHECK: @test
-; R600-CHECK: RAT_WRITE_CACHELESS_32_eg
-; R600-CHECK: RAT_WRITE_CACHELESS_32_eg
+; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW
+; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW
 
 ; SI-CHECK: @test
-; SI-CHECK: V_MOV_B32_e32 [[ZERO:VGPR[0-9]]], 0
-; SI-CHECK: BUFFER_STORE_DWORDX2 VGPR0_[[ZERO]]
+; SI-CHECK: V_MOV_B32_e32 v[[ZERO:[0-9]]], 0
+; SI-CHECK: BUFFER_STORE_DWORDX2 v[0:[[ZERO]]{{\]}}
 define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
 entry:
   %0 = mul i32 %a, %b
diff --git a/test/CodeGen/SI/sanity.ll b/test/CodeGen/SI/sanity.ll
deleted file mode 100644
index 62cdcf5..0000000
--- a/test/CodeGen/SI/sanity.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
-
-; CHECK: S_ENDPGM
-
-define void @main() {
-main_body:
-  call void @llvm.AMDGPU.shader.type(i32 1)
-  %0 = load <4 x i32> addrspace(2)* addrspace(8)* inttoptr (i32 6 to <4 x i32> addrspace(2)* addrspace(8)*)
-  %1 = getelementptr <4 x i32> addrspace(2)* %0, i32 0
-  %2 = load <4 x i32> addrspace(2)* %1
-  %3 = call i32 @llvm.SI.vs.load.buffer.index()
-  %4 = call <4 x float> @llvm.SI.vs.load.input(<4 x i32> %2, i32 0, i32 %3)
-  %5 = extractelement <4 x float> %4, i32 0
-  %6 = extractelement <4 x float> %4, i32 1
-  %7 = extractelement <4 x float> %4, i32 2
-  %8 = extractelement <4 x float> %4, i32 3
-  %9 = load <4 x i32> addrspace(2)* addrspace(8)* inttoptr (i32 6 to <4 x i32> addrspace(2)* addrspace(8)*)
-  %10 = getelementptr <4 x i32> addrspace(2)* %9, i32 1
-  %11 = load <4 x i32> addrspace(2)* %10
-  %12 = call i32 @llvm.SI.vs.load.buffer.index()
-  %13 = call <4 x float> @llvm.SI.vs.load.input(<4 x i32> %11, i32 0, i32 %12)
-  %14 = extractelement <4 x float> %13, i32 0
-  %15 = extractelement <4 x float> %13, i32 1
-  %16 = extractelement <4 x float> %13, i32 2
-  %17 = extractelement <4 x float> %13, i32 3
-  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %14, float %15, float %16, float %17)
-  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %5, float %6, float %7, float %8)
-  ret void
-}
-
-declare void @llvm.AMDGPU.shader.type(i32)
-
-declare i32 @llvm.SI.vs.load.buffer.index() readnone
-
-declare <4 x float> @llvm.SI.vs.load.input(<4 x i32>, i32, i32)
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/SPARC/2011-01-11-CC.ll b/test/CodeGen/SPARC/2011-01-11-CC.ll
index edbcb49..50f3a65 100644
--- a/test/CodeGen/SPARC/2011-01-11-CC.ll
+++ b/test/CodeGen/SPARC/2011-01-11-CC.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -march=sparc <%s | FileCheck %s -check-prefix=V8
 ; RUN: llc -march=sparc -mattr=v9 <%s | FileCheck %s -check-prefix=V9
+; RUN: llc -mtriple=sparc64-unknown-linux <%s | FileCheck %s -check-prefix=SPARC64
 
 
 define i32 @test_addx(i64 %a, i64 %b, i64 %c) nounwind readnone noinline {
@@ -65,9 +66,11 @@ define i32 @test_select_int_fcc(float %f, i32 %a, i32 %b) nounwind readnone noin
 entry:
 ;V8-LABEL: test_select_int_fcc:
 ;V8: fcmps
+;V8-NEXT: nop
 ;V8: {{fbe|fbne}}
 ;V9-LABEL: test_select_int_fcc:
 ;V9: fcmps
+;V9-NEXT-NOT: nop
 ;V9-NOT: {{fbe|fbne}}
 ;V9: mov{{e|ne}} %fcc0
   %0 = fcmp une float %f, 0.000000e+00
@@ -94,12 +97,95 @@ define double @test_select_dfp_fcc(double %f, double %f1, double %f2) nounwind r
 entry:
 ;V8-LABEL: test_select_dfp_fcc:
 ;V8: fcmpd
+;V8-NEXT: nop
 ;V8: {{fbne|fbe}}
 ;V9-LABEL: test_select_dfp_fcc:
 ;V9: fcmpd
+;V9-NEXT-NOT: nop
 ;V9-NOT: {{fbne|fbe}}
 ;V9: fmovd{{e|ne}} %fcc0
   %0 = fcmp une double %f, 0.000000e+00
   %1 = select i1 %0, double %f1, double %f2
   ret double %1
 }
+
+define i32 @test_float_cc(double %a, double %b, i32 %c, i32 %d) {
+entry:
+; V8-LABEL: test_float_cc
+; V8:       fcmpd
+; V8:       {{fbl|fbuge}} .LBB
+; V8:       fcmpd
+; V8:       {{fbule|fbg}} .LBB
+
+; V9-LABEL: test_float_cc
+; V9:       fcmpd
+; V9:       {{fbl|fbuge}} .LBB
+; V9:       fcmpd
+; V9:       {{fbule|fbg}} .LBB
+
+   %0 = fcmp uge double %a, 0.000000e+00
+   br i1 %0, label %loop, label %loop.2
+
+loop:
+   %1 = icmp eq i32 %c, 10
+   br i1 %1, label %loop, label %exit.0
+
+loop.2:
+   %2 = fcmp ogt double %b, 0.000000e+00
+   br i1 %2, label %exit.1, label %loop
+
+exit.0:
+   ret i32 0
+
+exit.1:
+   ret i32 1
+}
+
+; V8-LABEL: test_adde_sube
+; V8:       addcc
+; V8:       addxcc
+; V8:       addxcc
+; V8:       addxcc
+; V8:       subcc
+; V8:       subxcc
+; V8:       subxcc
+; V8:       subxcc
+
+
+; V9-LABEL: test_adde_sube
+; V9:       addcc
+; V9:       addxcc
+; V9:       addxcc
+; V9:       addxcc
+; V9:       subcc
+; V9:       subxcc
+; V9:       subxcc
+; V9:       subxcc
+
+; SPARC64-LABEL: test_adde_sube
+; SPARC64:       addcc
+; SPARC64:       addxcc
+; SPARC64:       addxcc
+; SPARC64:       addxcc
+; SPARC64:       subcc
+; SPARC64:       subxcc
+; SPARC64:       subxcc
+; SPARC64:       subxcc
+
+
+define void @test_adde_sube(i8* %a, i8* %b, i8* %sum, i8* %diff) {
+entry:
+   %0 = bitcast i8* %a to i128*
+   %1 = bitcast i8* %b to i128*
+   %2 = load i128* %0
+   %3 = load i128* %1
+   %4 = add i128 %2, %3
+   %5 = bitcast i8* %sum to i128*
+   store i128 %4, i128* %5
+   tail call void asm sideeffect "", "=*m,*m"(i128 *%0, i128* %5) nounwind
+   %6 = load i128* %0
+   %7 = sub i128 %2, %6
+   %8 = bitcast i8* %diff to i128*
+   store i128 %7, i128* %8
+   ret void
+}
diff --git a/test/CodeGen/SPARC/2011-01-11-Call.ll b/test/CodeGen/SPARC/2011-01-11-Call.ll
index 7350e92..a0f478e 100644
--- a/test/CodeGen/SPARC/2011-01-11-Call.ll
+++ b/test/CodeGen/SPARC/2011-01-11-Call.ll
@@ -1,4 +1,24 @@
 ; RUN: llc -march=sparc -O0 <%s
+; RUN: llc -march=sparc   <%s | FileCheck %s --check-prefix=V8
+; RUN: llc -march=sparcv9 <%s | FileCheck %s --check-prefix=V9
+
+; V8-LABEL: test
+; V8:       save %sp
+; V8:       call foo
+; V8-NEXT:  nop
+; V8:       call bar
+; V8-NEXT:  nop
+; V8:       jmp %i7+8
+; V8-NEXT:  restore
+
+; V9-LABEL: test
+; V9:       save %sp
+; V9:       call foo
+; V9-NEXT:  nop
+; V9:       call bar
+; V9-NEXT:  nop
+; V9:       jmp %i7+8
+; V9-NEXT:  restore
 
 define void @test() nounwind {
 entry:
@@ -11,3 +31,23 @@ declare i32 @foo(...)
 
 declare void @bar(...)
 
+
+; V8-LABEL: test_tail_call_with_return
+; V8:       save %sp
+; V8:       call foo
+; V8-NEXT:  nop
+; V8:       jmp %i7+8
+; V8-NEXT:  restore %g0, %o0, %o0
+
+; V9-LABEL: test_tail_call_with_return
+; V9:       save %sp
+; V9:       call foo
+; V9-NEXT:  nop
+; V9:       jmp %i7+8
+; V9-NEXT:  restore %g0, %o0, %o0
+
+define i32 @test_tail_call_with_return() nounwind {
+entry:
+ %0 = tail call i32 (...)* @foo() nounwind
+ ret i32 %0
+}
diff --git a/test/CodeGen/SPARC/2013-05-17-CallFrame.ll b/test/CodeGen/SPARC/2013-05-17-CallFrame.ll
index 9e9e821..81f586f 100644
--- a/test/CodeGen/SPARC/2013-05-17-CallFrame.ll
+++ b/test/CodeGen/SPARC/2013-05-17-CallFrame.ll
@@ -1,10 +1,20 @@
-; RUN: llc -march=sparc < %s | FileCheck %s
+; RUN: llc -march=sparc   < %s | FileCheck %s --check-prefix=V8
+; RUN: llc -march=sparcv9 < %s | FileCheck %s --check-prefix=SPARC64
+
+; V8-LABEL: variable_alloca_with_adj_call_stack
+; V8:       save %sp, -96, %sp
+; V8:       add {{.+}}, 96, %o0
+; V8:       add %sp, -16, %sp
+; V8:       call foo
+; V8:       add %sp, 16, %sp
+
+; SPARC64-LABEL: variable_alloca_with_adj_call_stack
+; SPARC64:       save %sp, -128, %sp
+; SPARC64:       add {{.+}}, 2175, %o0
+; SPARC64:       add %sp, -80, %sp
+; SPARC64:       call foo
+; SPARC64:       add %sp, 80, %sp
 
-; CHECK: variable_alloca_with_adj_call_stack
-; CHECK: save %sp, -96, %sp
-; CHECK: add %sp, -16, %sp
-; CHECK: call foo
-; CHECK: add %sp, 16, %sp
 define void @variable_alloca_with_adj_call_stack(i32 %num) {
 entry:
   %0 = alloca i8, i32 %num, align 8
diff --git a/test/CodeGen/SPARC/64abi.ll b/test/CodeGen/SPARC/64abi.ll
index 5a7eb40..8b752a1 100644
--- a/test/CodeGen/SPARC/64abi.ll
+++ b/test/CodeGen/SPARC/64abi.ll
@@ -376,3 +376,38 @@ define signext i32 @ret_nosext(i32 signext %a0) {
 define signext i32 @ret_nozext(i32 signext %a0) {
   ret i32 %a0
 }
+
+; CHECK-LABEL: test_register_directive
+; CHECK:       .register %g2, #scratch
+; CHECK:       .register %g3, #scratch
+; CHECK:       add %i0, 2, %g2
+; CHECK:       add %i0, 3, %g3
+define i32 @test_register_directive(i32 %i0) {
+entry:
+  %0 = add nsw i32 %i0, 2
+  %1 = add nsw i32 %i0, 3
+  tail call void asm sideeffect "", "r,r,~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6},~{o7},~{g1},~{g4},~{g5},~{g6},~{g7}"(i32 %0, i32 %1)
+  %2 = add nsw i32 %0, %1
+  ret i32 %2
+}
+
+; CHECK-LABEL: test_large_stack
+
+; CHECK:       sethi 16, %g1
+; CHECK:       xor %g1, -176, %g1
+; CHECK:       save %sp, %g1, %sp
+
+; CHECK:       sethi 14, %g1
+; CHECK:       xor %g1, -1, %g1
+; CHECK:       add %g1, %fp, %g1
+; CHECK:       call use_buf
+
+define i32 @test_large_stack() {
+entry:
+  %buffer1 = alloca [16384 x i8], align 8
+  %buffer1.sub = getelementptr inbounds [16384 x i8]* %buffer1, i32 0, i32 0
+  %0 = call i32 @use_buf(i32 16384, i8* %buffer1.sub)
+  ret i32 %0
+}
+
+declare i32 @use_buf(i32, i8*)
diff --git a/test/CodeGen/SPARC/64bit.ll b/test/CodeGen/SPARC/64bit.ll
index f778f9d..f5ed047 100644
--- a/test/CodeGen/SPARC/64bit.ll
+++ b/test/CodeGen/SPARC/64bit.ll
@@ -285,3 +285,26 @@ entry:
   store i64 0, i64* %0, align 8
   ret i64 0
 }
+
+; CHECK-LABEL: bit_ops
+; CHECK:       popc
+
+; OPT-LABEL: bit_ops
+; OPT:       popc
+
+define i64 @bit_ops(i64 %arg) {
+entry:
+  %0 = tail call i64 @llvm.ctpop.i64(i64 %arg)
+  %1 = tail call i64 @llvm.ctlz.i64(i64 %arg, i1 true)
+  %2 = tail call i64 @llvm.cttz.i64(i64 %arg, i1 true)
+  %3 = tail call i64 @llvm.bswap.i64(i64 %arg)
+  %4 = add i64 %0, %1
+  %5 = add i64 %2, %3
+  %6 = add i64 %4, %5
+  ret i64 %6
+}
+
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
+declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone
+declare i64 @llvm.bswap.i64(i64) nounwind readnone
diff --git a/test/CodeGen/SPARC/64cond.ll b/test/CodeGen/SPARC/64cond.ll
index bdc5e70..7451b04 100644
--- a/test/CodeGen/SPARC/64cond.ll
+++ b/test/CodeGen/SPARC/64cond.ll
@@ -109,3 +109,17 @@ entry:
   %rv = select i1 %tobool, i64 123, i64 0
   ret i64 %rv
 }
+
+; CHECK-LABEL: setcc_resultty
+; CHECK:       cmp
+; CHECK:       movne %xcc, 1, [[R:%[gilo][0-7]]]
+; CHECK:       or [[R]], %i1, %i0
+
+define i1 @setcc_resultty(i64 %a, i1 %b) {
+  %a0 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %a, i64 32)
+  %a1 = extractvalue { i64, i1 } %a0, 1
+  %a4 = or i1 %a1, %b
+  ret i1 %a4
+}
+
+declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64)
diff --git a/test/CodeGen/SPARC/constpool.ll b/test/CodeGen/SPARC/constpool.ll
index d8b7b15..b861676 100644
--- a/test/CodeGen/SPARC/constpool.ll
+++ b/test/CodeGen/SPARC/constpool.ll
@@ -39,8 +39,10 @@ entry:
 ; v8pic32: sethi %hi(.LCPI0_0), %[[R1:[gilo][0-7]]]
 ; v8pic32: add %[[R1]], %lo(.LCPI0_0), %[[Goffs:[gilo][0-7]]]
 ; v8pic32: ld [%[[GOT:[gilo][0-7]]]+%[[Goffs]]], %[[Gaddr:[gilo][0-7]]]
-; v8pic32: jmp %o7+8
 ; v8pic32: ld [%[[Gaddr]]], %f0
+; v8pic32: jmp %i7+8
+; v8pic32: restore
+
 
 
 ; v9pic32: floatCP
@@ -48,6 +50,8 @@ entry:
 ; v9pic32: sethi %hi(.LCPI0_0), %[[R1:[gilo][0-7]]]
 ; v9pic32: add %[[R1]], %lo(.LCPI0_0), %[[Goffs:[gilo][0-7]]]
 ; v9pic32: ldx [%[[GOT:[gilo][0-7]]]+%[[Goffs]]], %[[Gaddr:[gilo][0-7]]]
-; v9pic32: jmp %o7+8
 ; v9pic32: ld [%[[Gaddr]]], %f1
+; v9pic32: jmp %i7+8
+; v9pic32: restore
+
 
diff --git a/test/CodeGen/SPARC/exception.ll b/test/CodeGen/SPARC/exception.ll
new file mode 100644
index 0000000..cb5b6e5
--- /dev/null
+++ b/test/CodeGen/SPARC/exception.ll
@@ -0,0 +1,112 @@
+; RUN: llc < %s -march=sparc | FileCheck %s
+
+
+%struct.__fundamental_type_info_pseudo = type { %struct.__type_info_pseudo }
+%struct.__type_info_pseudo = type { i8*, i8* }
+
+@_ZTIi = external constant %struct.__fundamental_type_info_pseudo
+@_ZTIf = external constant %struct.__fundamental_type_info_pseudo
+@.cst = linker_private unnamed_addr constant [12 x i8] c"catched int\00", align 64
+@.cst1 = linker_private unnamed_addr constant [14 x i8] c"catched float\00", align 64
+
+; CHECK-LABEL: main:
+; CHECK:       .cfi_startproc
+; CHECK:       .cfi_def_cfa_register 30
+; CHECK:       .cfi_window_save
+; CHECK:       .cfi_register 15, 31
+
+; CHECK:        call __cxa_throw
+; CHECK:        call __cxa_throw
+
+; CHECK:        call __cxa_begin_catch
+; CHECK:        call __cxa_end_catch
+
+; CHECK:        call __cxa_begin_catch
+; CHECK:        call __cxa_end_catch
+
+; CHECK:        .cfi_endproc
+
+define i32 @main(i32 %argc, i8** nocapture readnone %argv) unnamed_addr #0 {
+entry:
+  %0 = icmp eq i32 %argc, 2
+  %1 = tail call i8* @__cxa_allocate_exception(i32 4) #1
+  br i1 %0, label %"3", label %"4"
+
+"3":                                              ; preds = %entry
+  %2 = bitcast i8* %1 to i32*
+  store i32 0, i32* %2, align 4
+  invoke void @__cxa_throw(i8* %1, i8* bitcast (%struct.__fundamental_type_info_pseudo* @_ZTIi to i8*), void (i8*)* null) #2
+          to label %3 unwind label %"8"
+
+; <label>:3                                       ; preds = %"3"
+  unreachable
+
+"4":                                              ; preds = %entry
+  %4 = bitcast i8* %1 to float*
+  store float 1.000000e+00, float* %4, align 4
+
+
+  invoke void @__cxa_throw(i8* %1, i8* bitcast (%struct.__fundamental_type_info_pseudo* @_ZTIf to i8*), void (i8*)* null) #2
+          to label %5 unwind label %"8"
+
+; <label>:5                                       ; preds = %"4"
+  unreachable
+
+"5":                                              ; preds = %"13", %"11"
+  %6 = phi i32 [ 2, %"13" ], [ 0, %"11" ]
+  ret i32 %6
+
+"8":                                              ; preds = %"4", %"3"
+  %exc = landingpad { i8*, i32 } personality i32 (i32, i64, i8*, i8*)* @__gxx_personality_v0
+          catch %struct.__fundamental_type_info_pseudo* @_ZTIi
+          catch %struct.__fundamental_type_info_pseudo* @_ZTIf
+  %exc_ptr12 = extractvalue { i8*, i32 } %exc, 0
+  %filter13 = extractvalue { i8*, i32 } %exc, 1
+  %typeid = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%struct.__fundamental_type_info_pseudo* @_ZTIi to i8*))
+  %7 = icmp eq i32 %filter13, %typeid
+  br i1 %7, label %"11", label %8
+
+; <label>:8                                       ; preds = %"8"
+  %typeid8 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%struct.__fundamental_type_info_pseudo* @_ZTIf to i8*))
+  %9 = icmp eq i32 %filter13, %typeid8
+  br i1 %9, label %"13", label %"9"
+
+"9":                                              ; preds = %8
+  resume { i8*, i32 } %exc
+
+"11":                                             ; preds = %"8"
+  %10 = tail call i8* @__cxa_begin_catch(i8* %exc_ptr12) #1
+  %11 = tail call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.cst, i32 0, i32 0))
+  tail call void @__cxa_end_catch() #1
+  br label %"5"
+
+"13":                                             ; preds = %8
+  %12 = tail call i8* @__cxa_begin_catch(i8* %exc_ptr12) #1
+  %13 = tail call i32 @puts(i8* getelementptr inbounds ([14 x i8]* @.cst1, i32 0, i32 0))
+  tail call void @__cxa_end_catch() #1
+  br label %"5"
+}
+
+; Function Attrs: nounwind
+declare i8* @__cxa_allocate_exception(i32) #1
+
+; Function Attrs: noreturn
+declare void @__cxa_throw(i8*, i8*, void (i8*)*) #2
+
+declare void @__cxa_end_catch()
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #3
+
+; Function Attrs: nounwind
+declare i8* @__cxa_begin_catch(i8*) #1
+
+; Function Attrs: nounwind
+declare i32 @puts(i8* nocapture readonly) #1
+
+declare i32 @__gxx_personality_v0(i32, i64, i8*, i8*)
+
+attributes #0 = { "no-frame-pointer-elim-non-leaf"="false" }
+attributes #1 = { nounwind }
+attributes #2 = { noreturn }
+attributes #3 = { nounwind readnone }
diff --git a/test/CodeGen/SPARC/float.ll b/test/CodeGen/SPARC/float.ll
index 8dfd371..6636704 100644
--- a/test/CodeGen/SPARC/float.ll
+++ b/test/CodeGen/SPARC/float.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -march=sparc < %s | FileCheck %s -check-prefix=V8
 ; RUN: llc -march=sparc -O0 < %s | FileCheck %s -check-prefix=V8-UNOPT
 ; RUN: llc -march=sparc -mattr=v9 < %s | FileCheck %s -check-prefix=V9
-
+; RUN: llc -mtriple=sparc64-unknown-linux < %s | FileCheck %s -check-prefix=SPARC64
 
 ; V8-LABEL:     test_neg:
 ; V8:     call get_double
@@ -16,6 +16,9 @@
 ; V9-LABEL:     test_neg:
 ; V9:     fnegd %f0, %f0
 
+; SPARC64-LABEL: test_neg:
+; SPARC64:       fnegd %f0, %f0
+
 define double @test_neg() {
 entry:
   %0 = tail call double @get_double()
@@ -35,6 +38,10 @@ entry:
 ; V9-LABEL:     test_abs:
 ; V9:     fabsd %f0, %f0
 
+
+; SPARC64-LABEL:     test_abs:
+; SPARC64:     fabsd %f0, %f0
+
 define double @test_abs() {
 entry:
   %0 = tail call double @get_double()
@@ -45,3 +52,198 @@ entry:
 declare double @get_double()
 declare double @llvm.fabs.f64(double) nounwind readonly
 
+; V8-LABEL:    test_v9_floatreg:
+; V8:          fsubd {{.+}}, {{.+}}, {{.+}}
+; V8:          faddd {{.+}}, {{.+}}, [[R:%f(((1|2)?(0|2|4|6|8))|30)]]
+; V8:          std [[R]], [%{{.+}}]
+; V8:          ldd [%{{.+}}], %f0
+
+; V9-LABEL:    test_v9_floatreg:
+; V9:          fsubd {{.+}}, {{.+}}, {{.+}}
+; V9:          faddd {{.+}}, {{.+}}, [[R:%f((3(2|4|6|8))|((4|5)(0|2|4|6|8))|(60|62))]]
+; V9:          fmovd [[R]], %f0
+
+; SPARC64-LABEL:    test_v9_floatreg:
+; SPARC64:          fsubd {{.+}}, {{.+}}, {{.+}}
+; SPARC64:          faddd {{.+}}, {{.+}}, [[R:%f((3(2|4|6|8))|((4|5)(0|2|4|6|8))|(60|62))]]
+; SPARC64:          fmovd [[R]], %f0
+
+define double @test_v9_floatreg() {
+entry:
+  %0 = tail call double @get_double()
+  %1 = tail call double @get_double()
+  %2 = fsub double %0, %1
+  tail call void asm sideeffect "", "~{f0},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"()
+  %3 = fadd double %2, %2
+  ret double %3
+}
+
+; V8-LABEL:    test_xtos_stox
+; V8:          call __floatdisf
+; V8:          call __fixsfdi
+
+; V9-LABEL:    test_xtos_stox
+; V9:          call __floatdisf
+; V9:          call __fixsfdi
+
+; SPARC64-LABEL:    test_xtos_stox
+; SPARC64:          fxtos
+; SPARC64:          fstox
+
+define void @test_xtos_stox(i64 %a, i64* %ptr0, float* %ptr1) {
+entry:
+  %0 = sitofp i64 %a to float
+  store float %0, float* %ptr1, align 8
+  %1 = fptosi float %0 to i64
+  store i64 %1, i64* %ptr0, align 8
+  ret void
+}
+
+; V8-LABEL:    test_itos_stoi
+; V8:          fitos
+; V8:          fstoi
+
+; V9-LABEL:    test_itos_stoi
+; V9:          fitos
+; V9:          fstoi
+
+; SPARC64-LABEL:    test_itos_stoi
+; SPARC64:          fitos
+; SPARC64:          fstoi
+
+define void @test_itos_stoi(i32 %a, i32* %ptr0, float* %ptr1) {
+entry:
+  %0 = sitofp i32 %a to float
+  store float %0, float* %ptr1, align 8
+  %1 = fptosi float %0 to i32
+  store i32 %1, i32* %ptr0, align 8
+  ret void
+}
+
+
+; V8-LABEL:    test_xtod_dtox
+; V8:          call __floatdidf
+; V8:          call __fixdfdi
+
+; V9-LABEL:    test_xtod_dtox
+; V9:          call __floatdidf
+; V9:          call __fixdfdi
+
+; SPARC64-LABEL:    test_xtod_dtox
+; SPARC64:          fxtod
+; SPARC64:          fdtox
+
+define void @test_xtod_dtox(i64 %a, i64* %ptr0, double* %ptr1) {
+entry:
+  %0 = sitofp i64 %a to double
+  store double %0, double* %ptr1, align 8
+  %1 = fptosi double %0 to i64
+  store i64 %1, i64* %ptr0, align 8
+  ret void
+}
+
+; V8-LABEL:    test_itod_dtoi
+; V8:          fitod
+; V8:          fdtoi
+
+; V9-LABEL:    test_itod_dtoi
+; V9:          fitod
+; V9:          fdtoi
+
+; SPARC64-LABEL:    test_itod_dtoi
+; SPARC64:          fitod
+; SPARC64:          fdtoi
+
+define void @test_itod_dtoi(i32 %a, i32* %ptr0, double* %ptr1) {
+entry:
+  %0 = sitofp i32 %a to double
+  store double %0, double* %ptr1, align 8
+  %1 = fptosi double %0 to i32
+  store i32 %1, i32* %ptr0, align 8
+  ret void
+}
+
+; V8-LABEL:    test_uxtos_stoux
+; V8:          call __floatundisf
+; V8:          call __fixunssfdi
+
+; V9-LABEL:    test_uxtos_stoux
+; V9:          call __floatundisf
+; V9:          call __fixunssfdi
+
+; SPARC64-LABEL:   test_uxtos_stoux
+; SPARC64-NOT:     call __floatundisf
+; SPARC64-NOT:     call __fixunssfdi
+
+define void @test_uxtos_stoux(i64 %a, i64* %ptr0, float* %ptr1) {
+entry:
+  %0 = uitofp i64 %a to float
+  store float %0, float* %ptr1, align 8
+  %1 = fptoui float %0 to i64
+  store i64 %1, i64* %ptr0, align 8
+  ret void
+}
+
+; V8-LABEL:    test_utos_stou
+; V8:          fdtos
+; V8:          fstoi
+
+; V9-LABEL:    test_utos_stou
+; V9:          fdtos
+; V9:          fstoi
+
+; SPARC64-LABEL:    test_utos_stou
+; SPARC64:     fdtos
+; SPARC64:     fstoi
+
+define void @test_utos_stou(i32 %a, i32* %ptr0, float* %ptr1) {
+entry:
+  %0 = uitofp i32 %a to float
+  store float %0, float* %ptr1, align 8
+  %1 = fptoui float %0 to i32
+  store i32 %1, i32* %ptr0, align 8
+  ret void
+}
+
+
+; V8-LABEL:    test_uxtod_dtoux
+; V8:          call __floatundidf
+; V8:          call __fixunsdfdi
+
+; V9-LABEL:    test_uxtod_dtoux
+; V9:          call __floatundidf
+; V9:          call __fixunsdfdi
+
+; SPARC64-LABEL:    test_uxtod_dtoux
+; SPARC64-NOT:          call __floatundidf
+; SPARC64-NOT:          call __floatunsdfdi
+
+define void @test_uxtod_dtoux(i64 %a, i64* %ptr0, double* %ptr1) {
+entry:
+  %0 = uitofp i64 %a to double
+  store double %0, double* %ptr1, align 8
+  %1 = fptoui double %0 to i64
+  store i64 %1, i64* %ptr0, align 8
+  ret void
+}
+
+; V8-LABEL:    test_utod_dtou
+; V8-NOT:      fitod
+; V8:          fdtoi
+
+; V9-LABEL:    test_utod_dtou
+; V9-NOT:      fitod
+; V9:          fdtoi
+
+; SPARC64-LABEL:    test_utod_dtou
+; SPARC64-NOT:      fitod
+; SPARC64:          fdtoi
+
+define void @test_utod_dtou(i32 %a, double %b, i32* %ptr0, double* %ptr1) {
+entry:
+  %0 = uitofp i32 %a to double
+  store double %0, double* %ptr1, align 8
+  %1 = fptoui double %b to i32
+  store i32 %1, i32* %ptr0, align 8
+  ret void
+}
diff --git a/test/CodeGen/SPARC/fp128.ll b/test/CodeGen/SPARC/fp128.ll
new file mode 100644
index 0000000..c761361
--- /dev/null
+++ b/test/CodeGen/SPARC/fp128.ll
@@ -0,0 +1,234 @@
+; RUN: llc < %s -march=sparc -mattr=hard-quad-float | FileCheck %s --check-prefix=HARD
+; RUN: llc < %s -march=sparc -mattr=-hard-quad-float | FileCheck %s --check-prefix=SOFT
+
+
+; HARD-LABEL: f128_ops
+; HARD:       ldd
+; HARD:       ldd
+; HARD:       ldd
+; HARD:       ldd
+; HARD:       faddq [[R0:.+]],  [[R1:.+]],  [[R2:.+]]
+; HARD:       fsubq [[R2]], [[R3:.+]], [[R4:.+]]
+; HARD:       fmulq [[R4]], [[R5:.+]], [[R6:.+]]
+; HARD:       fdivq [[R6]], [[R2]]
+; HARD:       std
+; HARD:       std
+
+; SOFT-LABEL: f128_ops
+; SOFT:       ldd
+; SOFT:       ldd
+; SOFT:       ldd
+; SOFT:       ldd
+; SOFT:       call _Q_add
+; SOFT:       call _Q_sub
+; SOFT:       call _Q_mul
+; SOFT:       call _Q_div
+; SOFT:       std
+; SOFT:       std
+
+define void @f128_ops(fp128* noalias sret %scalar.result, fp128* byval %a, fp128* byval %b, fp128* byval %c, fp128* byval %d) {
+entry:
+  %0 = load fp128* %a, align 8
+  %1 = load fp128* %b, align 8
+  %2 = load fp128* %c, align 8
+  %3 = load fp128* %d, align 8
+  %4 = fadd fp128 %0, %1
+  %5 = fsub fp128 %4, %2
+  %6 = fmul fp128 %5, %3
+  %7 = fdiv fp128 %6, %4
+  store fp128 %7, fp128* %scalar.result, align 8
+  ret void
+}
+
+; HARD-LABEL: f128_spill
+; HARD:       std %f{{.+}}, [%[[S0:.+]]]
+; HARD:       std %f{{.+}}, [%[[S1:.+]]]
+; HARD-DAG:   ldd [%[[S0]]], %f{{.+}}
+; HARD-DAG:   ldd [%[[S1]]], %f{{.+}}
+; HARD:       jmp
+
+; SOFT-LABEL: f128_spill
+; SOFT:       std %f{{.+}}, [%[[S0:.+]]]
+; SOFT:       std %f{{.+}}, [%[[S1:.+]]]
+; SOFT-DAG:   ldd [%[[S0]]], %f{{.+}}
+; SOFT-DAG:   ldd [%[[S1]]], %f{{.+}}
+; SOFT:       jmp
+
+define void @f128_spill(fp128* noalias sret %scalar.result, fp128* byval %a) {
+entry:
+  %0 = load fp128* %a, align 8
+  call void asm sideeffect "", "~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"()
+  store fp128 %0, fp128* %scalar.result, align 8
+  ret void
+}
+
+; HARD-LABEL: f128_compare
+; HARD:       fcmpq
+; HARD-NEXT:  nop
+
+; SOFT-LABEL: f128_compare
+; SOFT:       _Q_cmp
+
+define i32 @f128_compare(fp128* byval %f0, fp128* byval %f1, i32 %a, i32 %b) {
+entry:
+   %0 = load fp128* %f0, align 8
+   %1 = load fp128* %f1, align 8
+   %cond = fcmp ult fp128 %0, %1
+   %ret = select i1 %cond, i32 %a, i32 %b
+   ret i32 %ret
+}
+
+; HARD-LABEL: f128_compare2
+; HARD:       fcmpq
+; HARD:       fb{{ule|g}}
+
+; SOFT-LABEL: f128_compare2
+; SOFT:       _Q_cmp
+; SOFT:       cmp
+
+define i32 @f128_compare2() {
+entry:
+  %0 = fcmp ogt fp128 undef, 0xL00000000000000000000000000000000
+  br i1 %0, label %"5", label %"7"
+
+"5":                                              ; preds = %entry
+  ret i32 0
+
+"7":                                              ; preds = %entry
+  ret i32 1
+}
+
+
+; HARD-LABEL: f128_abs
+; HARD:       fabss
+
+; SOFT-LABEL: f128_abs
+; SOFT:       fabss
+
+define void @f128_abs(fp128* noalias sret %scalar.result, fp128* byval %a) {
+entry:
+  %0 = load fp128* %a, align 8
+  %1 = tail call fp128 @llvm.fabs.f128(fp128 %0)
+  store fp128 %1, fp128* %scalar.result, align 8
+  ret void
+}
+
+declare fp128 @llvm.fabs.f128(fp128) nounwind readonly
+
+; HARD-LABEL: int_to_f128
+; HARD:       fitoq
+
+; SOFT-LABEL: int_to_f128
+; SOFT:       _Q_itoq
+
+define void @int_to_f128(fp128* noalias sret %scalar.result, i32 %i) {
+entry:
+  %0 = sitofp i32 %i to fp128
+  store fp128 %0, fp128* %scalar.result, align 8
+  ret void
+}
+
+; HARD-LABEL: fp128_unaligned
+; HARD:       ldub
+; HARD:       faddq
+; HARD:       stb
+; HARD:       jmp
+
+; SOFT-LABEL: fp128_unaligned
+; SOFT:       ldub
+; SOFT:       call _Q_add
+; SOFT:       stb
+; SOFT:       jmp
+
+define void @fp128_unaligned(fp128* %a, fp128* %b, fp128* %c) {
+entry:
+  %0 = load fp128* %a, align 1
+  %1 = load fp128* %b, align 1
+  %2 = fadd fp128 %0, %1
+  store fp128 %2, fp128* %c, align 1
+  ret void
+}
+
+; HARD-LABEL: uint_to_f128
+; HARD:       fdtoq
+
+; SOFT-LABEL: uint_to_f128
+; SOFT:       _Q_utoq
+
+define void @uint_to_f128(fp128* noalias sret %scalar.result, i32 %i) {
+entry:
+  %0 = uitofp i32 %i to fp128
+  store fp128 %0, fp128* %scalar.result, align 8
+  ret void
+}
+
+; HARD-LABEL: f128_to_i32
+; HARD:       fqtoi
+; HARD:       fqtoi
+
+; SOFT-LABEL: f128_to_i32
+; SOFT:       call _Q_qtou
+; SOFT:       call _Q_qtoi
+
+
+define i32 @f128_to_i32(fp128* %a, fp128* %b) {
+entry:
+  %0 = load fp128* %a, align 8
+  %1 = load fp128* %b, align 8
+  %2 = fptoui fp128 %0 to i32
+  %3 = fptosi fp128 %1 to i32
+  %4 = add i32 %2, %3
+  ret i32 %4
+}
+
+; HARD-LABEL:    test_itoq_qtoi
+; HARD:          call _Q_lltoq
+; HARD:          call _Q_qtoll
+; HARD:          fitoq
+; HARD:          fqtoi
+
+; SOFT-LABEL:    test_itoq_qtoi
+; SOFT:          call _Q_lltoq
+; SOFT:          call _Q_qtoll
+; SOFT:          call _Q_itoq
+; SOFT:          call _Q_qtoi
+
+define void @test_itoq_qtoi(i64 %a, i32 %b, i64* %ptr0, fp128* %ptr1) {
+entry:
+  %0 = sitofp i64 %a to fp128
+  store  fp128 %0, fp128* %ptr1, align 8
+  %1 = fptosi fp128 %0 to i64
+  store  i64 %1, i64* %ptr0, align 8
+  %2 = sitofp i32 %b to fp128
+  store  fp128 %2, fp128* %ptr1, align 8
+  %3 = fptosi fp128 %2 to i32
+  %4 = bitcast i64* %ptr0 to i32*
+  store  i32 %3, i32* %4, align 8
+  ret void
+}
+
+; HARD-LABEL:    test_utoq_qtou
+; HARD-DAG:      call _Q_ulltoq
+; HARD-DAG:      call _Q_qtoull
+; HARD-DAG:      fdtoq
+; HARD-DAG:      fqtoi
+
+; SOFT-LABEL:    test_utoq_qtou
+; SOFT-DAG:      call _Q_ulltoq
+; SOFT-DAG:      call _Q_qtoull
+; SOFT-DAG:      call _Q_utoq
+; SOFT-DAG:      call _Q_qtou
+
+define void @test_utoq_qtou(i64 %a, i32 %b, i64* %ptr0, fp128* %ptr1) {
+entry:
+  %0 = uitofp i64 %a to fp128
+  store  fp128 %0, fp128* %ptr1, align 8
+  %1 = fptoui fp128 %0 to i64
+  store  i64 %1, i64* %ptr0, align 8
+  %2 = uitofp i32 %b to fp128
+  store  fp128 %2, fp128* %ptr1, align 8
+  %3 = fptoui fp128 %2 to i32
+  %4 = bitcast i64* %ptr0 to i32*
+  store  i32 %3, i32* %4, align 8
+  ret void
+}
diff --git a/test/CodeGen/SPARC/globals.ll b/test/CodeGen/SPARC/globals.ll
index 0e0dfc8..7e3effe 100644
--- a/test/CodeGen/SPARC/globals.ll
+++ b/test/CodeGen/SPARC/globals.ll
@@ -41,8 +41,9 @@ define zeroext i8 @loadG() {
 ; v8pic32: sethi %hi(G), %[[R1:[gilo][0-7]]]
 ; v8pic32: add %[[R1]], %lo(G), %[[Goffs:[gilo][0-7]]]
 ; v8pic32: ld [%[[GOT:[gilo][0-7]]]+%[[Goffs]]], %[[Gaddr:[gilo][0-7]]]
-; v8pic32: jmp %o7+8
-; v8pic32: ldub [%[[Gaddr]]], %o0
+; v8pic32: ldub [%[[Gaddr]]], %i0
+; v8pic32: jmp %i7+8
+; v8pic32: restore
 
 
 ; v9pic32: loadG
@@ -50,6 +51,7 @@ define zeroext i8 @loadG() {
 ; v9pic32: sethi %hi(G), %[[R1:[gilo][0-7]]]
 ; v9pic32: add %[[R1]], %lo(G), %[[Goffs:[gilo][0-7]]]
 ; v9pic32: ldx [%[[GOT:[gilo][0-7]]]+%[[Goffs]]], %[[Gaddr:[gilo][0-7]]]
-; v9pic32: jmp %o7+8
-; v9pic32: ldub [%[[Gaddr]]], %o0
+; v9pic32: ldub [%[[Gaddr]]], %i0
+; v9pic32: jmp %i7+8
+; v9pic32: restore
 
diff --git a/test/CodeGen/SPARC/lit.local.cfg b/test/CodeGen/SPARC/lit.local.cfg
index 6f30a87..4d344fa 100644
--- a/test/CodeGen/SPARC/lit.local.cfg
+++ b/test/CodeGen/SPARC/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.test']
-
 targets = set(config.root.targets_to_build.split())
 if not 'Sparc' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/SPARC/rem.ll b/test/CodeGen/SPARC/rem.ll
new file mode 100644
index 0000000..abef1fc
--- /dev/null
+++ b/test/CodeGen/SPARC/rem.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=sparcv9 | FileCheck %s
+
+; CHECK-LABEL: test1:
+; CHECK:        sdivx %o0, %o1, %o2
+; CHECK-NEXT:   mulx %o2, %o1, %o1
+; CHECK-NEXT:   jmp %o7+8
+; CHECK-NEXT:   sub %o0, %o1, %o0
+
+define i64 @test1(i64 %X, i64 %Y) {
+        %tmp1 = srem i64 %X, %Y
+        ret i64 %tmp1
+}
+
+; CHECK-LABEL: test2:
+; CHECK:        udivx %o0, %o1, %o2
+; CHECK-NEXT:   mulx %o2, %o1, %o1
+; CHECK-NEXT:   jmp %o7+8
+; CHECK-NEXT:   sub %o0, %o1, %o0
+
+define i64 @test2(i64 %X, i64 %Y) {
+        %tmp1 = urem i64 %X, %Y
+        ret i64 %tmp1
+}
+
+; PR18150
+; CHECK-LABEL: test3
+; CHECK:       sethi 2545, [[R0:%[gilo][0-7]]]
+; CHECK:       or    [[R0]], 379, [[R1:%[gilo][0-7]]]
+; CHECK:       mulx  %o0, [[R1]], [[R2:%[gilo][0-7]]]
+; CHECK:       udivx [[R2]], 1021, [[R3:%[gilo][0-7]]]
+; CHECK:       mulx  [[R3]], 1021, [[R4:%[gilo][0-7]]]
+; CHECK:       sub   [[R2]], [[R4]], %o0
+
+define i64 @test3(i64 %b) {
+entry:
+  %mul = mul i64 %b, 2606459
+  %rem = urem i64 %mul, 1021
+  ret i64 %rem
+}
diff --git a/test/CodeGen/SPARC/setjmp.ll b/test/CodeGen/SPARC/setjmp.ll
new file mode 100644
index 0000000..39984fb
--- /dev/null
+++ b/test/CodeGen/SPARC/setjmp.ll
@@ -0,0 +1,72 @@
+;RUN: llc -march=sparc   < %s | FileCheck %s
+;RUN: llc -march=sparcv9 < %s | FileCheck %s --check-prefix=V9
+
+
+%0 = type { [32 x i32] }
+%struct.jmpbuf_env = type { i32, i32, [1 x %struct.__jmp_buf_tag], i32 }
+%struct.__jmp_buf_tag = type { [3 x i32], i32, %0 }
+
+@jenv = common unnamed_addr global %struct.jmpbuf_env* null
+@.cst = linker_private unnamed_addr constant [30 x i8] c"in bar with jmp_buf's id: %d\0A\00", align 64
+
+; CHECK-LABEL: foo
+; CHECK-DAG:   st {{.+}}, [%i0]
+; CHECK-DAG:   st {{.+}}, [%i0+4]
+; CHECK:       call _setjmp
+; CHECK:       ld [%fp+{{.+}}], %[[R:[gilo][0-7]]]
+; CHECK:       st %o0, [%[[R]]+{{.+}}]
+
+; V9-LABEL:   foo
+; V9-DAG:     st {{.+}}, [%i0]
+; V9-DAG:     st {{.+}}, [%i0+4]
+; V9:         call _setjmp
+; V9:         ldx [%fp+{{.+}}], %[[R:[gilo][0-7]]]
+; V9:         st %o0, [%[[R]]+{{.+}}]
+
+; Function Attrs: nounwind
+define i32 @foo(%struct.jmpbuf_env* byval %inbuf) #0 {
+entry:
+  %0 = getelementptr inbounds %struct.jmpbuf_env* %inbuf, i32 0, i32 0
+  store i32 0, i32* %0, align 4, !tbaa !4
+  %1 = getelementptr inbounds %struct.jmpbuf_env* %inbuf, i32 0, i32 1
+  store i32 1, i32* %1, align 4, !tbaa !4
+  %2 = getelementptr inbounds %struct.jmpbuf_env* %inbuf, i32 0, i32 2, i32 0
+  %3 = call i32 @_setjmp(%struct.__jmp_buf_tag* %2) #2
+  %4 = getelementptr inbounds %struct.jmpbuf_env* %inbuf, i32 0, i32 3
+  store i32 %3, i32* %4, align 4, !tbaa !4
+  store %struct.jmpbuf_env* %inbuf, %struct.jmpbuf_env** @jenv, align 4, !tbaa !3
+  %5 = load i32* %1, align 4, !tbaa !4
+  %6 = icmp eq i32 %5, 1
+  %7 = icmp eq i32 %3, 0
+  %or.cond = and i1 %6, %7
+  br i1 %or.cond, label %"4.i", label %bar.exit
+
+"4.i":                                            ; preds = %entry
+  call void @longjmp(%struct.__jmp_buf_tag* %2, i32 0) #1
+  unreachable
+
+bar.exit:                                         ; preds = %entry
+  %8 = load i32* %0, align 4, !tbaa !4
+  %9 = call i32 (i8*, ...)* @printf(i8* noalias getelementptr inbounds ([30 x i8]* @.cst, i32 0, i32 0), i32 %8) #0
+  ret i32 0
+}
+
+; Function Attrs: nounwind returns_twice
+declare i32 @_setjmp(%struct.__jmp_buf_tag*) #2
+
+; Function Attrs: noreturn nounwind
+declare void @longjmp(%struct.__jmp_buf_tag*, i32) #1
+
+; Function Attrs: nounwind
+declare i32 @printf(i8* nocapture, ...) #0
+
+
+attributes #0 = { nounwind }
+attributes #1 = { noreturn nounwind }
+attributes #2 = { nounwind returns_twice }
+
+!0 = metadata !{metadata !"alias set 6: struct.jmpbuf_env*", metadata !1}
+!1 = metadata !{metadata !1}
+!2 = metadata !{metadata !"alias set 3: int", metadata !1}
+!3 = metadata !{metadata !0, metadata !0, i64 0}
+!4 = metadata !{metadata !2, metadata !2, i64 0}
diff --git a/test/CodeGen/SPARC/tls.ll b/test/CodeGen/SPARC/tls.ll
new file mode 100644
index 0000000..660ddff
--- /dev/null
+++ b/test/CodeGen/SPARC/tls.ll
@@ -0,0 +1,73 @@
+; RUN: llc <%s -march=sparc   -relocation-model=static | FileCheck %s --check-prefix=v8abs
+; RUN: llc <%s -march=sparcv9 -relocation-model=static | FileCheck %s --check-prefix=v9abs
+; RUN: llc <%s -march=sparc   -relocation-model=pic    | FileCheck %s --check-prefix=pic
+; RUN: llc <%s -march=sparcv9 -relocation-model=pic    | FileCheck %s --check-prefix=pic
+
+
+@local_symbol = internal thread_local global i32 0
+@extern_symbol = external thread_local global i32
+
+; v8abs-LABEL:  test_tls_local
+; v8abs:        sethi  %tle_hix22(local_symbol), [[R0:%[goli][0-7]]]
+; v8abs:        xor    [[R0]], %tle_lox10(local_symbol), [[R1:%[goli][0-7]]]
+; v8abs:        ld     [%g7+[[R1]]]
+
+; v9abs-LABEL:  test_tls_local
+; v9abs:        sethi  %tle_hix22(local_symbol), [[R0:%[goli][0-7]]]
+; v9abs:        xor    [[R0]], %tle_lox10(local_symbol), [[R1:%[goli][0-7]]]
+; v9abs:        ld     [%g7+[[R1]]]
+
+; pic-LABEL:  test_tls_local
+; pic:        or     {{%[goli][0-7]}}, %lo(_GLOBAL_OFFSET_TABLE_+{{.+}}), [[PC:%[goli][0-7]]]
+; pic:        add    [[PC]], %o7, [[GOTBASE:%[goli][0-7]]]
+; pic-DAG:    sethi  %tldm_hi22(local_symbol), [[R0:%[goli][0-7]]]
+; pic-DAG:    add    [[R0]], %tldm_lo10(local_symbol), [[R1:%[goli][0-7]]]
+; pic-DAG:    add    [[GOTBASE]], [[R1]], %o0, %tldm_add(local_symbol)
+; pic-DAG:    call   __tls_get_addr, %tldm_call(local_symbol)
+; pic-DAG:    sethi  %tldo_hix22(local_symbol), [[R2:%[goli][0-7]]]
+; pic-DAG:    xor    [[R2]], %tldo_lox10(local_symbol), [[R3:%[goli][0-7]]]
+; pic:        add    %o0, [[R3]], {{.+}}, %tldo_add(local_symbol)
+
+define i32 @test_tls_local() {
+entry:
+  %0 = load i32* @local_symbol, align 4
+  %1 = add i32 %0, 1
+  store i32 %1, i32* @local_symbol, align 4
+  ret i32 %1
+}
+
+
+; v8abs-LABEL:  test_tls_extern
+; v8abs:        or     {{%[goli][0-7]}}, %lo(_GLOBAL_OFFSET_TABLE_+{{.+}}), [[PC:%[goli][0-7]]]
+; v8abs:        add    [[PC]], %o7, %[[GOTBASE:[goli][0-7]]]
+; v8abs:        sethi  %tie_hi22(extern_symbol), [[R1:%[goli][0-7]]]
+; v8abs:        add    [[R1]], %tie_lo10(extern_symbol), %[[R2:[goli][0-7]]]
+; v8abs:        ld     [%[[GOTBASE]]+%[[R2]]], [[R3:%[goli][0-7]]], %tie_ld(extern_symbol)
+; v8abs:        add    %g7, [[R3]], %[[R4:[goli][0-7]]], %tie_add(extern_symbol)
+; v8abs:        ld     [%[[R4]]]
+
+; v9abs-LABEL:  test_tls_extern
+; v9abs:        or     {{%[goli][0-7]}}, %lo(_GLOBAL_OFFSET_TABLE_+{{.+}}), [[PC:%[goli][0-7]]]
+; v9abs:        add    [[PC]], %o7, %[[GOTBASE:[goli][0-7]]]
+; v9abs:        sethi  %tie_hi22(extern_symbol), [[R1:%[goli][0-7]]]
+; v9abs:        add    [[R1]], %tie_lo10(extern_symbol), %[[R2:[goli][0-7]]]
+; v9abs:        ldx    [%[[GOTBASE]]+%[[R2]]], [[R3:%[goli][0-7]]], %tie_ldx(extern_symbol)
+; v9abs:        add    %g7, [[R3]], %[[R4:[goli][0-7]]], %tie_add(extern_symbol)
+; v9abs:        ld     [%[[R4]]]
+
+; pic-LABEL:  test_tls_extern
+; pic:        or     {{%[goli][0-7]}}, %lo(_GLOBAL_OFFSET_TABLE_+{{.+}}), [[PC:%[goli][0-7]]]
+; pic:        add    [[PC]], %o7, [[GOTBASE:%[goli][0-7]]]
+; pic:        sethi  %tgd_hi22(extern_symbol), [[R0:%[goli][0-7]]]
+; pic:        add    [[R0]], %tgd_lo10(extern_symbol), [[R1:%[goli][0-7]]]
+; pic:        add    [[GOTBASE]], [[R1]], %o0, %tgd_add(extern_symbol)
+; pic:        call   __tls_get_addr, %tgd_call(extern_symbol)
+; pic-NEXT:   nop
+
+define i32 @test_tls_extern() {
+entry:
+  %0 = load i32* @extern_symbol, align 4
+  %1 = add i32 %0, 1
+  store i32 %1, i32* @extern_symbol, align 4
+  ret i32 %1
+}
diff --git a/test/CodeGen/SystemZ/Large/branch-range-09.py b/test/CodeGen/SystemZ/Large/branch-range-09.py
new file mode 100644
index 0000000..b3fd813
--- /dev/null
+++ b/test/CodeGen/SystemZ/Large/branch-range-09.py
@@ -0,0 +1,107 @@
+# Test 32-bit COMPARE LOGICAL AND BRANCH in cases where the sheer number of
+# instructions causes some branches to be out of range.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
+
+# Construct:
+#
+# before0:
+#   conditional branch to after0
+#   ...
+# beforeN:
+#   conditional branch to after0
+# main:
+#   0xffcc bytes, from MVIY instructions
+#   conditional branch to main
+# after0:
+#   ...
+#   conditional branch to main
+# afterN:
+#
+# Each conditional branch sequence occupies 12 bytes if it uses a short
+# branch and 14 if it uses a long one.  The ones before "main:" have to
+# take the branch length into account, which is 6 for short branches,
+# so the final (0x34 - 6) / 12 == 3 blocks can use short branches.
+# The ones after "main:" do not, so the first 0x34 / 12 == 4 blocks
+# can use short branches.
+#
+# CHECK: lb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: clr %r4, [[REG]]
+# CHECK: jgl [[LABEL:\.L[^ ]*]]
+# CHECK: lb [[REG:%r[0-5]]], 1(%r3)
+# CHECK: clr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 2(%r3)
+# CHECK: clr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 3(%r3)
+# CHECK: clr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 4(%r3)
+# CHECK: clr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 5(%r3)
+# CHECK: clrjl %r4, [[REG]], [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 6(%r3)
+# CHECK: clrjl %r4, [[REG]], [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 7(%r3)
+# CHECK: clrjl %r4, [[REG]], [[LABEL]]
+# ...main goes here...
+# CHECK: lb [[REG:%r[0-5]]], 25(%r3)
+# CHECK: clrjl %r4, [[REG]], [[LABEL:\.L[^ ]*]]
+# CHECK: lb [[REG:%r[0-5]]], 26(%r3)
+# CHECK: clrjl %r4, [[REG]], [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 27(%r3)
+# CHECK: clrjl %r4, [[REG]], [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 28(%r3)
+# CHECK: clrjl %r4, [[REG]], [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 29(%r3)
+# CHECK: clr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 30(%r3)
+# CHECK: clr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 31(%r3)
+# CHECK: clr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 32(%r3)
+# CHECK: clr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+
+branch_blocks = 8
+main_size = 0xffcc
+
+print 'define void @f1(i8 *%base, i8 *%stop, i32 %limit) {'
+print 'entry:'
+print '  br label %before0'
+print ''
+
+for i in xrange(branch_blocks):
+    next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
+    print 'before%d:' % i
+    print '  %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i)
+    print '  %%bcur%d = load volatile i8 *%%bstop%d' % (i, i)
+    print '  %%bext%d = sext i8 %%bcur%d to i32' % (i, i)
+    print '  %%btest%d = icmp ult i32 %%limit, %%bext%d' % (i, i)
+    print '  br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
+    print ''
+
+print '%s:' % next
+a, b = 1, 1
+for i in xrange(0, main_size, 6):
+    a, b = b, a + b
+    offset = 4096 + b % 500000
+    value = a % 256
+    print '  %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+    print '  store volatile i8 %d, i8 *%%ptr%d' % (value, i)
+
+for i in xrange(branch_blocks):
+    print '  %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25)
+    print '  %%acur%d = load volatile i8 *%%astop%d' % (i, i)
+    print '  %%aext%d = sext i8 %%acur%d to i32' % (i, i)
+    print '  %%atest%d = icmp ult i32 %%limit, %%aext%d' % (i, i)
+    print '  br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
+    print ''
+    print 'after%d:' % i
+
+print '  ret void'
+print '}'
diff --git a/test/CodeGen/SystemZ/Large/branch-range-10.py b/test/CodeGen/SystemZ/Large/branch-range-10.py
new file mode 100644
index 0000000..3aeea3e
--- /dev/null
+++ b/test/CodeGen/SystemZ/Large/branch-range-10.py
@@ -0,0 +1,111 @@
+# Test 64-bit COMPARE LOGICAL AND BRANCH in cases where the sheer number of
+# instructions causes some branches to be out of range.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
+
+# Construct:
+#
+# before0:
+#   conditional branch to after0
+#   ...
+# beforeN:
+#   conditional branch to after0
+# main:
+#   0xffcc bytes, from MVIY instructions
+#   conditional branch to main
+# after0:
+#   ...
+#   conditional branch to main
+# afterN:
+#
+# Each conditional branch sequence occupies 12 bytes if it uses a short
+# branch and 16 if it uses a long one.  The ones before "main:" have to
+# take the branch length into account, which is 6 for short branches,
+# so the final (0x34 - 6) / 12 == 3 blocks can use short branches.
+# The ones after "main:" do not, so the first 0x34 / 12 == 4 blocks
+# can use short branches.  The conservative algorithm we use makes
+# one of the forward branches unnecessarily long, as noted in the
+# check output below.
+#
+# CHECK: lgb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: clgr %r4, [[REG]]
+# CHECK: jgl [[LABEL:\.L[^ ]*]]
+# CHECK: lgb [[REG:%r[0-5]]], 1(%r3)
+# CHECK: clgr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 2(%r3)
+# CHECK: clgr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 3(%r3)
+# CHECK: clgr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 4(%r3)
+# CHECK: clgr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# ...as mentioned above, the next one could be a CLGRJL instead...
+# CHECK: lgb [[REG:%r[0-5]]], 5(%r3)
+# CHECK: clgr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 6(%r3)
+# CHECK: clgrjl %r4, [[REG]], [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 7(%r3)
+# CHECK: clgrjl %r4, [[REG]], [[LABEL]]
+# ...main goes here...
+# CHECK: lgb [[REG:%r[0-5]]], 25(%r3)
+# CHECK: clgrjl %r4, [[REG]], [[LABEL:\.L[^ ]*]]
+# CHECK: lgb [[REG:%r[0-5]]], 26(%r3)
+# CHECK: clgrjl %r4, [[REG]], [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 27(%r3)
+# CHECK: clgrjl %r4, [[REG]], [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 28(%r3)
+# CHECK: clgrjl %r4, [[REG]], [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 29(%r3)
+# CHECK: clgr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 30(%r3)
+# CHECK: clgr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 31(%r3)
+# CHECK: clgr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 32(%r3)
+# CHECK: clgr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+
+branch_blocks = 8
+main_size = 0xffcc
+
+print 'define void @f1(i8 *%base, i8 *%stop, i64 %limit) {'
+print 'entry:'
+print '  br label %before0'
+print ''
+
+for i in xrange(branch_blocks):
+    next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
+    print 'before%d:' % i
+    print '  %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i)
+    print '  %%bcur%d = load volatile i8 *%%bstop%d' % (i, i)
+    print '  %%bext%d = sext i8 %%bcur%d to i64' % (i, i)
+    print '  %%btest%d = icmp ult i64 %%limit, %%bext%d' % (i, i)
+    print '  br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
+    print ''
+
+print '%s:' % next
+a, b = 1, 1
+for i in xrange(0, main_size, 6):
+    a, b = b, a + b
+    offset = 4096 + b % 500000
+    value = a % 256
+    print '  %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+    print '  store volatile i8 %d, i8 *%%ptr%d' % (value, i)
+
+for i in xrange(branch_blocks):
+    print '  %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25)
+    print '  %%acur%d = load volatile i8 *%%astop%d' % (i, i)
+    print '  %%aext%d = sext i8 %%acur%d to i64' % (i, i)
+    print '  %%atest%d = icmp ult i64 %%limit, %%aext%d' % (i, i)
+    print '  br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
+    print ''
+    print 'after%d:' % i
+
+print '  ret void'
+print '}'
diff --git a/test/CodeGen/SystemZ/Large/branch-range-11.py b/test/CodeGen/SystemZ/Large/branch-range-11.py
new file mode 100644
index 0000000..034902c
--- /dev/null
+++ b/test/CodeGen/SystemZ/Large/branch-range-11.py
@@ -0,0 +1,127 @@
+# Test 32-bit COMPARE LOGICAL IMMEDIATE AND BRANCH in cases where the sheer
+# number of instructions causes some branches to be out of range.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
+
+# Construct:
+#
+# before0:
+#   conditional branch to after0
+#   ...
+# beforeN:
+#   conditional branch to after0
+# main:
+#   0xffc6 bytes, from MVIY instructions
+#   conditional branch to main
+# after0:
+#   ...
+#   conditional branch to main
+# afterN:
+#
+# Each conditional branch sequence occupies 14 bytes if it uses a short
+# branch and 20 if it uses a long one.  The ones before "main:" have to
+# take the branch length into account, which is 6 for short branches,
+# so the final (0x3a - 6) / 14 == 3 blocks can use short branches.
+# The ones after "main:" do not, so the first 0x3a / 14 == 4 blocks
+# can use short branches.  The conservative algorithm we use makes
+# one of the forward branches unnecessarily long, as noted in the
+# check output below.
+#
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clfi [[REG]], 50
+# CHECK: jgl [[LABEL:\.L[^ ]*]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clfi [[REG]], 51
+# CHECK: jgl [[LABEL]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clfi [[REG]], 52
+# CHECK: jgl [[LABEL]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clfi [[REG]], 53
+# CHECK: jgl [[LABEL]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clfi [[REG]], 54
+# CHECK: jgl [[LABEL]]
+# ...as mentioned above, the next one could be a CLIJL instead...
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clfi [[REG]], 55
+# CHECK: jgl [[LABEL]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clijl [[REG]], 56, [[LABEL]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clijl [[REG]], 57, [[LABEL]]
+# ...main goes here...
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clijl [[REG]], 100, [[LABEL:\.L[^ ]*]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clijl [[REG]], 101, [[LABEL]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clijl [[REG]], 102, [[LABEL]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clijl [[REG]], 103, [[LABEL]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clfi [[REG]], 104
+# CHECK: jgl [[LABEL]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clfi [[REG]], 105
+# CHECK: jgl [[LABEL]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clfi [[REG]], 106
+# CHECK: jgl [[LABEL]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clfi [[REG]], 107
+# CHECK: jgl [[LABEL]]
+
+branch_blocks = 8
+main_size = 0xffc6
+
+print 'define void @f1(i8 *%base, i32 *%stopa, i32 *%stopb) {'
+print 'entry:'
+print '  br label %before0'
+print ''
+
+for i in xrange(branch_blocks):
+    next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
+    print 'before%d:' % i
+    print '  %%bcur%da = load volatile i32 *%%stopa' % i
+    print '  %%bcur%db = load volatile i32 *%%stopb' % i
+    print '  %%bsub%d = sub i32 %%bcur%da, %%bcur%db' % (i, i, i)
+    print '  %%btest%d = icmp ult i32 %%bsub%d, %d' % (i, i, i + 50)
+    print '  br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
+    print ''
+
+print '%s:' % next
+a, b = 1, 1
+for i in xrange(0, main_size, 6):
+    a, b = b, a + b
+    offset = 4096 + b % 500000
+    value = a % 256
+    print '  %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+    print '  store volatile i8 %d, i8 *%%ptr%d' % (value, i)
+
+for i in xrange(branch_blocks):
+    print '  %%acur%da = load volatile i32 *%%stopa' % i
+    print '  %%acur%db = load volatile i32 *%%stopb' % i
+    print '  %%asub%d = sub i32 %%acur%da, %%acur%db' % (i, i, i)
+    print '  %%atest%d = icmp ult i32 %%asub%d, %d' % (i, i, i + 100)
+    print '  br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
+    print ''
+    print 'after%d:' % i
+
+print '  ret void'
+print '}'
diff --git a/test/CodeGen/SystemZ/Large/branch-range-12.py b/test/CodeGen/SystemZ/Large/branch-range-12.py
new file mode 100644
index 0000000..007d477
--- /dev/null
+++ b/test/CodeGen/SystemZ/Large/branch-range-12.py
@@ -0,0 +1,127 @@
+# Test 64-bit COMPARE LOGICAL IMMEDIATE AND BRANCH in cases where the sheer
+# number of instructions causes some branches to be out of range.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
+
+# Construct:
+#
+# before0:
+#   conditional branch to after0
+#   ...
+# beforeN:
+#   conditional branch to after0
+# main:
+#   0xffb4 bytes, from MVIY instructions
+#   conditional branch to main
+# after0:
+#   ...
+#   conditional branch to main
+# afterN:
+#
+# Each conditional branch sequence occupies 18 bytes if it uses a short
+# branch and 24 if it uses a long one.  The ones before "main:" have to
+# take the branch length into account, which is 6 for short branches,
+# so the final (0x4c - 6) / 18 == 3 blocks can use short branches.
+# The ones after "main:" do not, so the first 0x4c / 18 == 4 blocks
+# can use short branches.  The conservative algorithm we use makes
+# one of the forward branches unnecessarily long, as noted in the
+# check output below.
+#
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgfi [[REG]], 50
+# CHECK: jgl [[LABEL:\.L[^ ]*]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgfi [[REG]], 51
+# CHECK: jgl [[LABEL]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgfi [[REG]], 52
+# CHECK: jgl [[LABEL]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgfi [[REG]], 53
+# CHECK: jgl [[LABEL]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgfi [[REG]], 54
+# CHECK: jgl [[LABEL]]
+# ...as mentioned above, the next one could be a CLGIJL instead...
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgfi [[REG]], 55
+# CHECK: jgl [[LABEL]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgijl [[REG]], 56, [[LABEL]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgijl [[REG]], 57, [[LABEL]]
+# ...main goes here...
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgijl [[REG]], 100, [[LABEL:\.L[^ ]*]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgijl [[REG]], 101, [[LABEL]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgijl [[REG]], 102, [[LABEL]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgijl [[REG]], 103, [[LABEL]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgfi [[REG]], 104
+# CHECK: jgl [[LABEL]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgfi [[REG]], 105
+# CHECK: jgl [[LABEL]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgfi [[REG]], 106
+# CHECK: jgl [[LABEL]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgfi [[REG]], 107
+# CHECK: jgl [[LABEL]]
+
+branch_blocks = 8
+main_size = 0xffb4
+
+print 'define void @f1(i8 *%base, i64 *%stopa, i64 *%stopb) {'
+print 'entry:'
+print '  br label %before0'
+print ''
+
+for i in xrange(branch_blocks):
+    next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
+    print 'before%d:' % i
+    print '  %%bcur%da = load volatile i64 *%%stopa' % i
+    print '  %%bcur%db = load volatile i64 *%%stopb' % i
+    print '  %%bsub%d = sub i64 %%bcur%da, %%bcur%db' % (i, i, i)
+    print '  %%btest%d = icmp ult i64 %%bsub%d, %d' % (i, i, i + 50)
+    print '  br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
+    print ''
+
+print '%s:' % next
+a, b = 1, 1
+for i in xrange(0, main_size, 6):
+    a, b = b, a + b
+    offset = 4096 + b % 500000
+    value = a % 256
+    print '  %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+    print '  store volatile i8 %d, i8 *%%ptr%d' % (value, i)
+
+for i in xrange(branch_blocks):
+    print '  %%acur%da = load volatile i64 *%%stopa' % i
+    print '  %%acur%db = load volatile i64 *%%stopb' % i
+    print '  %%asub%d = sub i64 %%acur%da, %%acur%db' % (i, i, i)
+    print '  %%atest%d = icmp ult i64 %%asub%d, %d' % (i, i, i + 100)
+    print '  br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
+    print ''
+    print 'after%d:' % i
+
+print '  ret void'
+print '}'
diff --git a/test/CodeGen/SystemZ/alias-01.ll b/test/CodeGen/SystemZ/alias-01.ll
new file mode 100644
index 0000000..8839aad
--- /dev/null
+++ b/test/CodeGen/SystemZ/alias-01.ll
@@ -0,0 +1,19 @@
+; Test 32-bit ANDs in which the second operand is variable.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check that there are no spills.
+define void @f1(<16 x i32> *%src1, <16 x float> *%dest) {
+; CHECK-LABEL: f1:
+; CHECK-NOT: %r15
+; CHECK: br %r14
+  %val = load <16 x i32> *%src1, !tbaa !1
+  %add = add <16 x i32> %val, %val
+  %res = bitcast <16 x i32> %add to <16 x float>
+  store <16 x float> %res, <16 x float> *%dest, !tbaa !2
+  ret void
+}
+
+!0 = metadata !{ metadata !"root" }
+!1 = metadata !{ metadata !"set1", metadata !0 }
+!2 = metadata !{ metadata !"set2", metadata !0 }
diff --git a/test/CodeGen/SystemZ/alloca-02.ll b/test/CodeGen/SystemZ/alloca-02.ll
index b6ed7f7..b5787b1 100644
--- a/test/CodeGen/SystemZ/alloca-02.ll
+++ b/test/CodeGen/SystemZ/alloca-02.ll
@@ -21,18 +21,21 @@ define i64 @f1(i64 %length, i64 %index) {
 ;
 ; CHECK-C-LABEL: f1:
 ; CHECK-C: lgr %r15, [[ADDR:%r[1-5]]]
-; CHECK-C: la [[TMP:%r[1-5]]], 160(%r3,[[ADDR]])
-; CHECK-C: mvi 0([[TMP]]), 2
+; CHECK-C-DAG: la %r2, 160([[ADDR]])
+; CHECK-C-DAG: lhi [[TMP:%r[0-5]]], 2
+; CHECK-C: stc [[TMP]], 0({{%r3,%r2|%r2,%r3}})
 ;
 ; CHECK-D-LABEL: f1:
 ; CHECK-D: lgr %r15, [[ADDR:%r[1-5]]]
-; CHECK-D: la [[TMP:%r[1-5]]], 160(%r3,[[ADDR]])
-; CHECK-D: mvi 4095([[TMP]]), 3
+; CHECK-D-DAG: la %r2, 160([[ADDR]])
+; CHECK-D-DAG: lhi [[TMP:%r[0-5]]], 3
+; CHECK-D: stc [[TMP]], 4095({{%r3,%r2|%r2,%r3}})
 ;
 ; CHECK-E-LABEL: f1:
 ; CHECK-E: lgr %r15, [[ADDR:%r[1-5]]]
-; CHECK-E: la [[TMP:%r[1-5]]], 160(%r3,[[ADDR]])
-; CHECK-E: mviy 4096([[TMP]]), 4
+; CHECK-E-DAG: la %r2, 160([[ADDR]])
+; CHECK-E-DAG: lhi [[TMP:%r[0-5]]], 4
+; CHECK-E: stcy [[TMP]], 4096({{%r3,%r2|%r2,%r3}})
   %a = alloca i8, i64 %length
   store volatile i8 0, i8 *%a
   %b = getelementptr i8 *%a, i64 4095
diff --git a/test/CodeGen/SystemZ/and-08.ll b/test/CodeGen/SystemZ/and-08.ll
new file mode 100644
index 0000000..7ded115
--- /dev/null
+++ b/test/CodeGen/SystemZ/and-08.ll
@@ -0,0 +1,378 @@
+; Test memory-to-memory ANDs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+@g1src = global i8 1
+@g1dst = global i8 1
+@g2src = global i16 2
+@g2dst = global i16 2
+
+; Test the simple i8 case.
+define void @f1(i8 *%ptr1) {
+; CHECK-LABEL: f1:
+; CHECK: nc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i8 *%ptr1, i64 1
+  %val = load i8 *%ptr1
+  %old = load i8 *%ptr2
+  %and = and i8 %val, %old
+  store i8 %and, i8 *%ptr2
+  ret void
+}
+
+; ...and again in reverse.
+define void @f2(i8 *%ptr1) {
+; CHECK-LABEL: f2:
+; CHECK: nc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i8 *%ptr1, i64 1
+  %val = load i8 *%ptr1
+  %old = load i8 *%ptr2
+  %and = and i8 %old, %val
+  store i8 %and, i8 *%ptr2
+  ret void
+}
+
+; Test i8 cases where one value is zero-extended to 32 bits and the other
+; sign-extended.
+define void @f3(i8 *%ptr1) {
+; CHECK-LABEL: f3:
+; CHECK: nc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i8 *%ptr1, i64 1
+  %val = load i8 *%ptr1
+  %extval = zext i8 %val to i32
+  %old = load i8 *%ptr2
+  %extold = sext i8 %old to i32
+  %and = and i32 %extval, %extold
+  %trunc = trunc i32 %and to i8
+  store i8 %trunc, i8 *%ptr2
+  ret void
+}
+
+; ...and again with the extension types reversed.
+define void @f4(i8 *%ptr1) {
+; CHECK-LABEL: f4:
+; CHECK: nc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i8 *%ptr1, i64 1
+  %val = load i8 *%ptr1
+  %extval = sext i8 %val to i32
+  %old = load i8 *%ptr2
+  %extold = zext i8 %old to i32
+  %and = and i32 %extval, %extold
+  %trunc = trunc i32 %and to i8
+  store i8 %trunc, i8 *%ptr2
+  ret void
+}
+
+; ...and again with two sign extensions.
+define void @f5(i8 *%ptr1) {
+; CHECK-LABEL: f5:
+; CHECK: nc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i8 *%ptr1, i64 1
+  %val = load i8 *%ptr1
+  %extval = sext i8 %val to i32
+  %old = load i8 *%ptr2
+  %extold = sext i8 %old to i32
+  %and = and i32 %extval, %extold
+  %trunc = trunc i32 %and to i8
+  store i8 %trunc, i8 *%ptr2
+  ret void
+}
+
+; ...and again with two zero extensions.
+define void @f6(i8 *%ptr1) {
+; CHECK-LABEL: f6:
+; CHECK: nc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i8 *%ptr1, i64 1
+  %val = load i8 *%ptr1
+  %extval = zext i8 %val to i32
+  %old = load i8 *%ptr2
+  %extold = zext i8 %old to i32
+  %and = and i32 %extval, %extold
+  %trunc = trunc i32 %and to i8
+  store i8 %trunc, i8 *%ptr2
+  ret void
+}
+
+; Test i8 cases where the value is extended to 64 bits (just one case
+; this time).
+define void @f7(i8 *%ptr1) {
+; CHECK-LABEL: f7:
+; CHECK: nc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i8 *%ptr1, i64 1
+  %val = load i8 *%ptr1
+  %extval = sext i8 %val to i64
+  %old = load i8 *%ptr2
+  %extold = zext i8 %old to i64
+  %and = and i64 %extval, %extold
+  %trunc = trunc i64 %and to i8
+  store i8 %trunc, i8 *%ptr2
+  ret void
+}
+
+; Test the simple i16 case.
+define void @f8(i16 *%ptr1) {
+; CHECK-LABEL: f8:
+; CHECK: nc 2(2,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i16 *%ptr1, i64 1
+  %val = load i16 *%ptr1
+  %old = load i16 *%ptr2
+  %and = and i16 %val, %old
+  store i16 %and, i16 *%ptr2
+  ret void
+}
+
+; Test i16 cases where the value is extended to 32 bits.
+define void @f9(i16 *%ptr1) {
+; CHECK-LABEL: f9:
+; CHECK: nc 2(2,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i16 *%ptr1, i64 1
+  %val = load i16 *%ptr1
+  %extval = zext i16 %val to i32
+  %old = load i16 *%ptr2
+  %extold = sext i16 %old to i32
+  %and = and i32 %extval, %extold
+  %trunc = trunc i32 %and to i16
+  store i16 %trunc, i16 *%ptr2
+  ret void
+}
+
+; Test i16 cases where the value is extended to 64 bits.
+define void @f10(i16 *%ptr1) {
+; CHECK-LABEL: f10:
+; CHECK: nc 2(2,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i16 *%ptr1, i64 1
+  %val = load i16 *%ptr1
+  %extval = sext i16 %val to i64
+  %old = load i16 *%ptr2
+  %extold = zext i16 %old to i64
+  %and = and i64 %extval, %extold
+  %trunc = trunc i64 %and to i16
+  store i16 %trunc, i16 *%ptr2
+  ret void
+}
+
+; Test the simple i32 case.
+define void @f11(i32 *%ptr1) {
+; CHECK-LABEL: f11:
+; CHECK: nc 4(4,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i32 *%ptr1, i64 1
+  %val = load i32 *%ptr1
+  %old = load i32 *%ptr2
+  %and = and i32 %old, %val
+  store i32 %and, i32 *%ptr2
+  ret void
+}
+
+; Test i32 cases where the value is extended to 64 bits.
+define void @f12(i32 *%ptr1) {
+; CHECK-LABEL: f12:
+; CHECK: nc 4(4,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i32 *%ptr1, i64 1
+  %val = load i32 *%ptr1
+  %extval = sext i32 %val to i64
+  %old = load i32 *%ptr2
+  %extold = zext i32 %old to i64
+  %and = and i64 %extval, %extold
+  %trunc = trunc i64 %and to i32
+  store i32 %trunc, i32 *%ptr2
+  ret void
+}
+
+; Test the i64 case.
+define void @f13(i64 *%ptr1) {
+; CHECK-LABEL: f13:
+; CHECK: nc 8(8,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i64 *%ptr1, i64 1
+  %val = load i64 *%ptr1
+  %old = load i64 *%ptr2
+  %and = and i64 %old, %val
+  store i64 %and, i64 *%ptr2
+  ret void
+}
+
+; Make sure that we don't use NC if the first load is volatile.
+define void @f14(i64 *%ptr1) {
+; CHECK-LABEL: f14:
+; CHECK-NOT: nc
+; CHECK: br %r14
+  %ptr2 = getelementptr i64 *%ptr1, i64 1
+  %val = load volatile i64 *%ptr1
+  %old = load i64 *%ptr2
+  %and = and i64 %old, %val
+  store i64 %and, i64 *%ptr2
+  ret void
+}
+
+; ...likewise the second.
+define void @f15(i64 *%ptr1) {
+; CHECK-LABEL: f15:
+; CHECK-NOT: nc
+; CHECK: br %r14
+  %ptr2 = getelementptr i64 *%ptr1, i64 1
+  %val = load i64 *%ptr1
+  %old = load volatile i64 *%ptr2
+  %and = and i64 %old, %val
+  store i64 %and, i64 *%ptr2
+  ret void
+}
+
+; ...likewise the store.
+define void @f16(i64 *%ptr1) {
+; CHECK-LABEL: f16:
+; CHECK-NOT: nc
+; CHECK: br %r14
+  %ptr2 = getelementptr i64 *%ptr1, i64 1
+  %val = load i64 *%ptr1
+  %old = load i64 *%ptr2
+  %and = and i64 %old, %val
+  store volatile i64 %and, i64 *%ptr2
+  ret void
+}
+
+; Test that NC is not used for aligned loads and stores if there is
+; no way of telling whether they alias.  We don't want to use NC in
+; cases where the addresses could be equal.
+define void @f17(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f17:
+; CHECK-NOT: nc
+; CHECK: br %r14
+  %val = load i64 *%ptr1
+  %old = load i64 *%ptr2
+  %and = and i64 %old, %val
+  store i64 %and, i64 *%ptr2
+  ret void
+}
+
+; ...but if one of the loads isn't aligned, we can't be sure.
+define void @f18(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f18:
+; CHECK-NOT: nc
+; CHECK: br %r14
+  %val = load i64 *%ptr1, align 2
+  %old = load i64 *%ptr2
+  %and = and i64 %old, %val
+  store i64 %and, i64 *%ptr2
+  ret void
+}
+
+; Repeat the previous test with the operands in the opposite order.
+define void @f19(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f19:
+; CHECK-NOT: nc
+; CHECK: br %r14
+  %val = load i64 *%ptr1, align 2
+  %old = load i64 *%ptr2
+  %and = and i64 %val, %old
+  store i64 %and, i64 *%ptr2
+  ret void
+}
+
+; ...and again with the other operand being unaligned.
+define void @f20(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f20:
+; CHECK-NOT: nc
+; CHECK: br %r14
+  %val = load i64 *%ptr1
+  %old = load i64 *%ptr2, align 2
+  %and = and i64 %val, %old
+  store i64 %and, i64 *%ptr2, align 2
+  ret void
+}
+
+; Test a case where there is definite overlap.
+define void @f21(i64 %base) {
+; CHECK-LABEL: f21:
+; CHECK-NOT: nc
+; CHECK: br %r14
+  %add = add i64 %base, 1
+  %ptr1 = inttoptr i64 %base to i64 *
+  %ptr2 = inttoptr i64 %add to i64 *
+  %val = load i64 *%ptr1
+  %old = load i64 *%ptr2, align 1
+  %and = and i64 %old, %val
+  store i64 %and, i64 *%ptr2, align 1
+  ret void
+}
+
+; Test that we can use NC for global addresses for i8.
+define void @f22(i8 *%ptr) {
+; CHECK-LABEL: f22:
+; CHECK-DAG: larl [[SRC:%r[0-5]]], g1src
+; CHECK-DAG: larl [[DST:%r[0-5]]], g1dst
+; CHECK: nc 0(1,[[DST]]), 0([[SRC]])
+; CHECK: br %r14
+  %val = load i8 *@g1src
+  %old = load i8 *@g1dst
+  %and = and i8 %val, %old
+  store i8 %and, i8 *@g1dst
+  ret void
+}
+
+; Test that we use NC even where LHRL and STHRL are available.
+define void @f23(i16 *%ptr) {
+; CHECK-LABEL: f23:
+; CHECK-DAG: larl [[SRC:%r[0-5]]], g2src
+; CHECK-DAG: larl [[DST:%r[0-5]]], g2dst
+; CHECK: nc 0(2,[[DST]]), 0([[SRC]])
+; CHECK: br %r14
+  %val = load i16 *@g2src
+  %old = load i16 *@g2dst
+  %and = and i16 %val, %old
+  store i16 %and, i16 *@g2dst
+  ret void
+}
+
+; Test a case where offset disambiguation is enough.
+define void @f24(i64 *%ptr1) {
+; CHECK-LABEL: f24:
+; CHECK: nc 8(8,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i64 *%ptr1, i64 1
+  %val = load i64 *%ptr1, align 1
+  %old = load i64 *%ptr2, align 1
+  %and = and i64 %old, %val
+  store i64 %and, i64 *%ptr2, align 1
+  ret void
+}
+
+; Test a case where TBAA tells us there is no alias.
+define void @f25(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f25:
+; CHECK: nc 0(8,%r3), 0(%r2)
+; CHECK: br %r14
+  %val = load i64 *%ptr1, align 2, !tbaa !3
+  %old = load i64 *%ptr2, align 2, !tbaa !4
+  %and = and i64 %old, %val
+  store i64 %and, i64 *%ptr2, align 2, !tbaa !4
+  ret void
+}
+
+; Test a case where TBAA information is present but doesn't help.
+define void @f26(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f26:
+; CHECK-NOT: nc
+; CHECK: br %r14
+  %val = load i64 *%ptr1, align 2, !tbaa !3
+  %old = load i64 *%ptr2, align 2, !tbaa !3
+  %and = and i64 %old, %val
+  store i64 %and, i64 *%ptr2, align 2, !tbaa !3
+  ret void
+}
+
+!0 = metadata !{ metadata !"root" }
+!1 = metadata !{ metadata !"set1", metadata !0 }
+!2 = metadata !{ metadata !"set2", metadata !0 }
+!3 = metadata !{ metadata !1, metadata !1, i64 0}
+!4 = metadata !{ metadata !2, metadata !2, i64 0}
diff --git a/test/CodeGen/SystemZ/args-06.ll b/test/CodeGen/SystemZ/args-06.ll
index a89fe9b..644fcec9 100644
--- a/test/CodeGen/SystemZ/args-06.ll
+++ b/test/CodeGen/SystemZ/args-06.ll
@@ -27,8 +27,8 @@ define i16 @f2(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16 %f, i16 %g) {
 ; CHECK: ar %r2, %r4
 ; CHECK: ar %r2, %r5
 ; CHECK: ar %r2, %r6
-; CHECK: lh {{%r[0-5]}}, 166(%r15)
-; CHECK: lh {{%r[0-5]}}, 174(%r15)
+; CHECK: ah %r2, 166(%r15)
+; CHECK: ah %r2, 174(%r15)
 ; CHECK: br %r14
   %addb = add i16 %a, %b
   %addc = add i16 %addb, %c
diff --git a/test/CodeGen/SystemZ/asm-17.ll b/test/CodeGen/SystemZ/asm-17.ll
index 33234fc..7bc9da3 100644
--- a/test/CodeGen/SystemZ/asm-17.ll
+++ b/test/CodeGen/SystemZ/asm-17.ll
@@ -80,3 +80,26 @@ define float @f7(float %in) {
   call void asm sideeffect "blah", "~{f0},~{cc}"()
   ret float %in
 }
+
+; Test that both registers in a GR128 pair get hoisted.
+define void @f8(i32 %count) {
+; CHECK-LABEL: f8
+; CHECK-DAG: lhi %r0, 0
+; CHECK-DAG: lhi %r1, 1
+; CHECK: %loop
+; CHECK-NOT: %r
+; CHECK: blah %r0, %r1
+; CHECK: br %r14
+entry:
+  br label %loop
+
+loop:
+  %this = phi i32 [ %count, %entry ], [ %next, %loop ]
+  call void asm sideeffect "blah $0, $1", "{r0},{r1}" (i32 0, i32 1)
+  %next = sub i32 %this, 1
+  %cmp = icmp ne i32 %next, 0
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/asm-18.ll b/test/CodeGen/SystemZ/asm-18.ll
new file mode 100644
index 0000000..d60654b
--- /dev/null
+++ b/test/CodeGen/SystemZ/asm-18.ll
@@ -0,0 +1,745 @@
+; Test high-word operations, using "h" constraints to force a high
+; register and "r" constraints to force a low register.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Test loads and stores involving mixtures of high and low registers.
+define void @f1(i32 *%ptr1, i32 *%ptr2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lfh [[REG1:%r[0-5]]], 0(%r2)
+; CHECK-DAG: l [[REG2:%r[0-5]]], 0(%r3)
+; CHECK-DAG: lfh [[REG3:%r[0-5]]], 4096(%r2)
+; CHECK-DAG: ly [[REG4:%r[0-5]]], 524284(%r3)
+; CHECK: blah [[REG1]], [[REG2]], [[REG3]], [[REG4]]
+; CHECK-DAG: stfh [[REG1]], 0(%r2)
+; CHECK-DAG: st [[REG2]], 0(%r3)
+; CHECK-DAG: stfh [[REG3]], 4096(%r2)
+; CHECK-DAG: sty [[REG4]], 524284(%r3)
+; CHECK: br %r14
+  %ptr3 = getelementptr i32 *%ptr1, i64 1024
+  %ptr4 = getelementptr i32 *%ptr2, i64 131071
+  %old1 = load i32 *%ptr1
+  %old2 = load i32 *%ptr2
+  %old3 = load i32 *%ptr3
+  %old4 = load i32 *%ptr4
+  %res = call { i32, i32, i32, i32 } asm "blah $0, $1, $2, $3",
+              "=h,=r,=h,=r,0,1,2,3"(i32 %old1, i32 %old2, i32 %old3, i32 %old4)
+  %new1 = extractvalue { i32, i32, i32, i32 } %res, 0
+  %new2 = extractvalue { i32, i32, i32, i32 } %res, 1
+  %new3 = extractvalue { i32, i32, i32, i32 } %res, 2
+  %new4 = extractvalue { i32, i32, i32, i32 } %res, 3
+  store i32 %new1, i32 *%ptr1
+  store i32 %new2, i32 *%ptr2
+  store i32 %new3, i32 *%ptr3
+  store i32 %new4, i32 *%ptr4
+  ret void
+}
+
+; Test moves involving mixtures of high and low registers.
+define i32 @f2(i32 %old) {
+; CHECK-LABEL: f2:
+; CHECK-DAG: risbhg [[REG1:%r[0-5]]], %r2, 0, 159, 32
+; CHECK-DAG: lr %r3, %r2
+; CHECK: stepa [[REG1]], %r2, %r3
+; CHECK: risbhg {{%r[0-5]}}, [[REG1]], 0, 159, 0
+; CHECK: stepb [[REG2:%r[0-5]]]
+; CHECK: risblg %r2, [[REG2]], 0, 159, 32
+; CHECK: br %r14
+  %tmp = call i32 asm "stepa $1, $2, $3",
+                      "=h,0,{r2},{r3}"(i32 %old, i32 %old, i32 %old)
+  %new = call i32 asm "stepb $1, $2", "=&h,0,h"(i32 %tmp, i32 %tmp)
+  ret i32 %new
+}
+
+; Test sign-extending 8-bit loads into mixtures of high and low registers.
+define void @f3(i8 *%ptr1, i8 *%ptr2) {
+; CHECK-LABEL: f3:
+; CHECK-DAG: lbh [[REG1:%r[0-5]]], 0(%r2)
+; CHECK-DAG: lb [[REG2:%r[0-5]]], 0(%r3)
+; CHECK-DAG: lbh [[REG3:%r[0-5]]], 4096(%r2)
+; CHECK-DAG: lb [[REG4:%r[0-5]]], 524287(%r3)
+; CHECK: blah [[REG1]], [[REG2]]
+; CHECK: br %r14
+  %ptr3 = getelementptr i8 *%ptr1, i64 4096
+  %ptr4 = getelementptr i8 *%ptr2, i64 524287
+  %val1 = load i8 *%ptr1
+  %val2 = load i8 *%ptr2
+  %val3 = load i8 *%ptr3
+  %val4 = load i8 *%ptr4
+  %ext1 = sext i8 %val1 to i32
+  %ext2 = sext i8 %val2 to i32
+  %ext3 = sext i8 %val3 to i32
+  %ext4 = sext i8 %val4 to i32
+  call void asm sideeffect "blah $0, $1, $2, $3",
+                           "h,r,h,r"(i32 %ext1, i32 %ext2, i32 %ext3, i32 %ext4)
+  ret void
+}
+
+; Test sign-extending 16-bit loads into mixtures of high and low registers.
+define void @f4(i16 *%ptr1, i16 *%ptr2) {
+; CHECK-LABEL: f4:
+; CHECK-DAG: lhh [[REG1:%r[0-5]]], 0(%r2)
+; CHECK-DAG: lh [[REG2:%r[0-5]]], 0(%r3)
+; CHECK-DAG: lhh [[REG3:%r[0-5]]], 4096(%r2)
+; CHECK-DAG: lhy [[REG4:%r[0-5]]], 524286(%r3)
+; CHECK: blah [[REG1]], [[REG2]]
+; CHECK: br %r14
+  %ptr3 = getelementptr i16 *%ptr1, i64 2048
+  %ptr4 = getelementptr i16 *%ptr2, i64 262143
+  %val1 = load i16 *%ptr1
+  %val2 = load i16 *%ptr2
+  %val3 = load i16 *%ptr3
+  %val4 = load i16 *%ptr4
+  %ext1 = sext i16 %val1 to i32
+  %ext2 = sext i16 %val2 to i32
+  %ext3 = sext i16 %val3 to i32
+  %ext4 = sext i16 %val4 to i32
+  call void asm sideeffect "blah $0, $1, $2, $3",
+                           "h,r,h,r"(i32 %ext1, i32 %ext2, i32 %ext3, i32 %ext4)
+  ret void
+}
+
+; Test zero-extending 8-bit loads into mixtures of high and low registers.
+define void @f5(i8 *%ptr1, i8 *%ptr2) {
+; CHECK-LABEL: f5:
+; CHECK-DAG: llch [[REG1:%r[0-5]]], 0(%r2)
+; CHECK-DAG: llc [[REG2:%r[0-5]]], 0(%r3)
+; CHECK-DAG: llch [[REG3:%r[0-5]]], 4096(%r2)
+; CHECK-DAG: llc [[REG4:%r[0-5]]], 524287(%r3)
+; CHECK: blah [[REG1]], [[REG2]]
+; CHECK: br %r14
+  %ptr3 = getelementptr i8 *%ptr1, i64 4096
+  %ptr4 = getelementptr i8 *%ptr2, i64 524287
+  %val1 = load i8 *%ptr1
+  %val2 = load i8 *%ptr2
+  %val3 = load i8 *%ptr3
+  %val4 = load i8 *%ptr4
+  %ext1 = zext i8 %val1 to i32
+  %ext2 = zext i8 %val2 to i32
+  %ext3 = zext i8 %val3 to i32
+  %ext4 = zext i8 %val4 to i32
+  call void asm sideeffect "blah $0, $1, $2, $3",
+                           "h,r,h,r"(i32 %ext1, i32 %ext2, i32 %ext3, i32 %ext4)
+  ret void
+}
+
+; Test zero-extending 16-bit loads into mixtures of high and low registers.
+define void @f6(i16 *%ptr1, i16 *%ptr2) {
+; CHECK-LABEL: f6:
+; CHECK-DAG: llhh [[REG1:%r[0-5]]], 0(%r2)
+; CHECK-DAG: llh [[REG2:%r[0-5]]], 0(%r3)
+; CHECK-DAG: llhh [[REG3:%r[0-5]]], 4096(%r2)
+; CHECK-DAG: llh [[REG4:%r[0-5]]], 524286(%r3)
+; CHECK: blah [[REG1]], [[REG2]]
+; CHECK: br %r14
+  %ptr3 = getelementptr i16 *%ptr1, i64 2048
+  %ptr4 = getelementptr i16 *%ptr2, i64 262143
+  %val1 = load i16 *%ptr1
+  %val2 = load i16 *%ptr2
+  %val3 = load i16 *%ptr3
+  %val4 = load i16 *%ptr4
+  %ext1 = zext i16 %val1 to i32
+  %ext2 = zext i16 %val2 to i32
+  %ext3 = zext i16 %val3 to i32
+  %ext4 = zext i16 %val4 to i32
+  call void asm sideeffect "blah $0, $1, $2, $3",
+                           "h,r,h,r"(i32 %ext1, i32 %ext2, i32 %ext3, i32 %ext4)
+  ret void
+}
+
+; Test truncating stores of high and low registers into 8-bit memory.
+define void @f7(i8 *%ptr1, i8 *%ptr2) {
+; CHECK-LABEL: f7:
+; CHECK: blah [[REG1:%r[0-5]]], [[REG2:%r[0-5]]]
+; CHECK-DAG: stch [[REG1]], 0(%r2)
+; CHECK-DAG: stc [[REG2]], 0(%r3)
+; CHECK-DAG: stch [[REG1]], 4096(%r2)
+; CHECK-DAG: stcy [[REG2]], 524287(%r3)
+; CHECK: br %r14
+  %res = call { i32, i32 } asm "blah $0, $1", "=h,=r"()
+  %res1 = extractvalue { i32, i32 } %res, 0
+  %res2 = extractvalue { i32, i32 } %res, 1
+  %trunc1 = trunc i32 %res1 to i8
+  %trunc2 = trunc i32 %res2 to i8
+  %ptr3 = getelementptr i8 *%ptr1, i64 4096
+  %ptr4 = getelementptr i8 *%ptr2, i64 524287
+  store i8 %trunc1, i8 *%ptr1
+  store i8 %trunc2, i8 *%ptr2
+  store i8 %trunc1, i8 *%ptr3
+  store i8 %trunc2, i8 *%ptr4
+  ret void
+}
+
+; Test truncating stores of high and low registers into 16-bit memory.
+define void @f8(i16 *%ptr1, i16 *%ptr2) {
+; CHECK-LABEL: f8:
+; CHECK: blah [[REG1:%r[0-5]]], [[REG2:%r[0-5]]]
+; CHECK-DAG: sthh [[REG1]], 0(%r2)
+; CHECK-DAG: sth [[REG2]], 0(%r3)
+; CHECK-DAG: sthh [[REG1]], 4096(%r2)
+; CHECK-DAG: sthy [[REG2]], 524286(%r3)
+; CHECK: br %r14
+  %res = call { i32, i32 } asm "blah $0, $1", "=h,=r"()
+  %res1 = extractvalue { i32, i32 } %res, 0
+  %res2 = extractvalue { i32, i32 } %res, 1
+  %trunc1 = trunc i32 %res1 to i16
+  %trunc2 = trunc i32 %res2 to i16
+  %ptr3 = getelementptr i16 *%ptr1, i64 2048
+  %ptr4 = getelementptr i16 *%ptr2, i64 262143
+  store i16 %trunc1, i16 *%ptr1
+  store i16 %trunc2, i16 *%ptr2
+  store i16 %trunc1, i16 *%ptr3
+  store i16 %trunc2, i16 *%ptr4
+  ret void
+}
+
+; Test zero extensions from 8 bits between mixtures of high and low registers.
+define i32 @f9(i8 %val1, i8 %val2) {
+; CHECK-LABEL: f9:
+; CHECK-DAG: risbhg [[REG1:%r[0-5]]], %r2, 24, 159, 32
+; CHECK-DAG: llcr [[REG2:%r[0-5]]], %r3
+; CHECK: stepa [[REG1]], [[REG2]]
+; CHECK: risbhg [[REG3:%r[0-5]]], [[REG1]], 24, 159, 0
+; CHECK: stepb [[REG3]]
+; CHECK: risblg %r2, [[REG3]], 24, 159, 32
+; CHECK: br %r14
+  %ext1 = zext i8 %val1 to i32
+  %ext2 = zext i8 %val2 to i32
+  %val3 = call i8 asm sideeffect "stepa $0, $1", "=h,0,r"(i32 %ext1, i32 %ext2)
+  %ext3 = zext i8 %val3 to i32
+  %val4 = call i8 asm sideeffect "stepb $0", "=h,0"(i32 %ext3)
+  %ext4 = zext i8 %val4 to i32
+  ret i32 %ext4
+}
+
+; Test zero extensions from 16 bits between mixtures of high and low registers.
+define i32 @f10(i16 %val1, i16 %val2) {
+; CHECK-LABEL: f10:
+; CHECK-DAG: risbhg [[REG1:%r[0-5]]], %r2, 16, 159, 32
+; CHECK-DAG: llhr [[REG2:%r[0-5]]], %r3
+; CHECK: stepa [[REG1]], [[REG2]]
+; CHECK: risbhg [[REG3:%r[0-5]]], [[REG1]], 16, 159, 0
+; CHECK: stepb [[REG3]]
+; CHECK: risblg %r2, [[REG3]], 16, 159, 32
+; CHECK: br %r14
+  %ext1 = zext i16 %val1 to i32
+  %ext2 = zext i16 %val2 to i32
+  %val3 = call i16 asm sideeffect "stepa $0, $1", "=h,0,r"(i32 %ext1, i32 %ext2)
+  %ext3 = zext i16 %val3 to i32
+  %val4 = call i16 asm sideeffect "stepb $0", "=h,0"(i32 %ext3)
+  %ext4 = zext i16 %val4 to i32
+  ret i32 %ext4
+}
+
+; Test loads of 16-bit constants into mixtures of high and low registers.
+define void @f11() {
+; CHECK-LABEL: f11:
+; CHECK-DAG: iihf [[REG1:%r[0-5]]], 4294934529
+; CHECK-DAG: lhi [[REG2:%r[0-5]]], -32768
+; CHECK-DAG: llihl [[REG3:%r[0-5]]], 32766
+; CHECK-DAG: lhi [[REG4:%r[0-5]]], 32767
+; CHECK: blah [[REG1]], [[REG2]], [[REG3]], [[REG4]]
+; CHECK: br %r14
+  call void asm sideeffect "blah $0, $1, $2, $3",
+                           "h,r,h,r"(i32 -32767, i32 -32768,
+                                     i32 32766, i32 32767)
+  ret void
+}
+
+; Test loads of unsigned constants into mixtures of high and low registers.
+; For stepc, we expect the h and r operands to be paired by the register
+; allocator.  It doesn't really matter which comes first: LLILL/IIHF would
+; be just as good.
+define void @f12() {
+; CHECK-LABEL: f12:
+; CHECK-DAG: llihl [[REG1:%r[0-5]]], 32768
+; CHECK-DAG: llihl [[REG2:%r[0-5]]], 65535
+; CHECK-DAG: llihh [[REG3:%r[0-5]]], 1
+; CHECK-DAG: llihh [[REG4:%r[0-5]]], 65535
+; CHECK: stepa [[REG1]], [[REG2]], [[REG3]], [[REG4]]
+; CHECK-DAG: llill [[REG1:%r[0-5]]], 32769
+; CHECK-DAG: llill [[REG2:%r[0-5]]], 65534
+; CHECK-DAG: llilh [[REG3:%r[0-5]]], 2
+; CHECK-DAG: llilh [[REG4:%r[0-5]]], 65534
+; CHECK: stepb [[REG1]], [[REG2]], [[REG3]], [[REG4]]
+; CHECK-DAG: llihl [[REG1:%r[0-5]]], 32770
+; CHECK-DAG: iilf [[REG1]], 65533
+; CHECK-DAG: llihh [[REG2:%r[0-5]]], 4
+; CHECK-DAG: iilf [[REG2]], 524288
+; CHECK: stepc [[REG1]], [[REG1]], [[REG2]], [[REG2]]
+; CHECK-DAG: iihf [[REG1:%r[0-5]]], 3294967296
+; CHECK-DAG: iilf [[REG2:%r[0-5]]], 4294567296
+; CHECK-DAG: iihf [[REG3:%r[0-5]]], 1000000000
+; CHECK-DAG: iilf [[REG4:%r[0-5]]], 400000
+; CHECK: stepd [[REG1]], [[REG2]], [[REG3]], [[REG4]]
+; CHECK: br %r14
+  call void asm sideeffect "stepa $0, $1, $2, $3",
+                           "h,h,h,h"(i32 32768, i32 65535,
+                                     i32 65536, i32 -65536)
+  call void asm sideeffect "stepb $0, $1, $2, $3",
+                           "r,r,r,r"(i32 32769, i32 65534,
+                                     i32 131072, i32 -131072)
+  call void asm sideeffect "stepc $0, $1, $2, $3",
+                           "h,r,h,r"(i32 32770, i32 65533,
+                                     i32 262144, i32 524288)
+  call void asm sideeffect "stepd $0, $1, $2, $3",
+                           "h,r,h,r"(i32 -1000000000, i32 -400000,
+                                     i32 1000000000, i32 400000)
+  ret void
+}
+
+; Test selects involving high registers.
+define void @f13(i32 %x, i32 %y) {
+; CHECK-LABEL: f13:
+; CHECK: llihl [[REG:%r[0-5]]], 0
+; CHECK: cije %r2, 0
+; CHECK: iihf [[REG]], 2102030405
+; CHECK: blah [[REG]]
+; CHECK: br %r14
+  %cmp = icmp eq i32 %x, 0
+  %val = select i1 %cmp, i32 0, i32 2102030405
+  call void asm sideeffect "blah $0", "h"(i32 %val)
+  ret void
+}
+
+; Test selects involving low registers.
+define void @f14(i32 %x, i32 %y) {
+; CHECK-LABEL: f14:
+; CHECK: lhi [[REG:%r[0-5]]], 0
+; CHECK: cije %r2, 0
+; CHECK: iilf [[REG]], 2102030405
+; CHECK: blah [[REG]]
+; CHECK: br %r14
+  %cmp = icmp eq i32 %x, 0
+  %val = select i1 %cmp, i32 0, i32 2102030405
+  call void asm sideeffect "blah $0", "r"(i32 %val)
+  ret void
+}
+
+; Test immediate insertion involving high registers.
+define void @f15() {
+; CHECK-LABEL: f15:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: iihh [[REG]], 4660
+; CHECK: stepb [[REG]]
+; CHECK: iihl [[REG]], 34661
+; CHECK: stepc [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=h"()
+  %and1 = and i32 %res1, 65535
+  %or1 = or i32 %and1, 305397760
+  %res2 = call i32 asm "stepb $0, $1", "=h,h"(i32 %or1)
+  %and2 = and i32 %res2, -65536
+  %or2 = or i32 %and2, 34661
+  call void asm sideeffect "stepc $0", "h"(i32 %or2)
+  ret void
+}
+
+; Test immediate insertion involving low registers.
+define void @f16() {
+; CHECK-LABEL: f16:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: iilh [[REG]], 4660
+; CHECK: stepb [[REG]]
+; CHECK: iill [[REG]], 34661
+; CHECK: stepc [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=r"()
+  %and1 = and i32 %res1, 65535
+  %or1 = or i32 %and1, 305397760
+  %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %or1)
+  %and2 = and i32 %res2, -65536
+  %or2 = or i32 %and2, 34661
+  call void asm sideeffect "stepc $0", "r"(i32 %or2)
+  ret void
+}
+
+; Test immediate OR involving high registers.
+define void @f17() {
+; CHECK-LABEL: f17:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: oihh [[REG]], 4660
+; CHECK: stepb [[REG]]
+; CHECK: oihl [[REG]], 34661
+; CHECK: stepc [[REG]]
+; CHECK: oihf [[REG]], 12345678
+; CHECK: stepd [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=h"()
+  %or1 = or i32 %res1, 305397760
+  %res2 = call i32 asm "stepb $0, $1", "=h,h"(i32 %or1)
+  %or2 = or i32 %res2, 34661
+  %res3 = call i32 asm "stepc $0, $1", "=h,h"(i32 %or2)
+  %or3 = or i32 %res3, 12345678
+  call void asm sideeffect "stepd $0", "h"(i32 %or3)
+  ret void
+}
+
+; Test immediate OR involving low registers.
+define void @f18() {
+; CHECK-LABEL: f18:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: oilh [[REG]], 4660
+; CHECK: stepb [[REG]]
+; CHECK: oill [[REG]], 34661
+; CHECK: stepc [[REG]]
+; CHECK: oilf [[REG]], 12345678
+; CHECK: stepd [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=r"()
+  %or1 = or i32 %res1, 305397760
+  %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %or1)
+  %or2 = or i32 %res2, 34661
+  %res3 = call i32 asm "stepc $0, $1", "=r,r"(i32 %or2)
+  %or3 = or i32 %res3, 12345678
+  call void asm sideeffect "stepd $0", "r"(i32 %or3)
+  ret void
+}
+
+; Test immediate XOR involving high registers.
+define void @f19() {
+; CHECK-LABEL: f19:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: xihf [[REG]], 305397760
+; CHECK: stepb [[REG]]
+; CHECK: xihf [[REG]], 34661
+; CHECK: stepc [[REG]]
+; CHECK: xihf [[REG]], 12345678
+; CHECK: stepd [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=h"()
+  %xor1 = xor i32 %res1, 305397760
+  %res2 = call i32 asm "stepb $0, $1", "=h,h"(i32 %xor1)
+  %xor2 = xor i32 %res2, 34661
+  %res3 = call i32 asm "stepc $0, $1", "=h,h"(i32 %xor2)
+  %xor3 = xor i32 %res3, 12345678
+  call void asm sideeffect "stepd $0", "h"(i32 %xor3)
+  ret void
+}
+
+; Test immediate XOR involving low registers.
+define void @f20() {
+; CHECK-LABEL: f20:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: xilf [[REG]], 305397760
+; CHECK: stepb [[REG]]
+; CHECK: xilf [[REG]], 34661
+; CHECK: stepc [[REG]]
+; CHECK: xilf [[REG]], 12345678
+; CHECK: stepd [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=r"()
+  %xor1 = xor i32 %res1, 305397760
+  %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %xor1)
+  %xor2 = xor i32 %res2, 34661
+  %res3 = call i32 asm "stepc $0, $1", "=r,r"(i32 %xor2)
+  %xor3 = xor i32 %res3, 12345678
+  call void asm sideeffect "stepd $0", "r"(i32 %xor3)
+  ret void
+}
+
+; Test two-operand immediate AND involving high registers.
+define void @f21() {
+; CHECK-LABEL: f21:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: nihh [[REG]], 4096
+; CHECK: stepb [[REG]]
+; CHECK: nihl [[REG]], 57536
+; CHECK: stepc [[REG]]
+; CHECK: nihf [[REG]], 12345678
+; CHECK: stepd [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=h"()
+  %and1 = and i32 %res1, 268500991
+  %res2 = call i32 asm "stepb $0, $1", "=h,h"(i32 %and1)
+  %and2 = and i32 %res2, -8000
+  %res3 = call i32 asm "stepc $0, $1", "=h,h"(i32 %and2)
+  %and3 = and i32 %res3, 12345678
+  call void asm sideeffect "stepd $0", "h"(i32 %and3)
+  ret void
+}
+
+; Test two-operand immediate AND involving low registers.
+define void @f22() {
+; CHECK-LABEL: f22:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: nilh [[REG]], 4096
+; CHECK: stepb [[REG]]
+; CHECK: nill [[REG]], 57536
+; CHECK: stepc [[REG]]
+; CHECK: nilf [[REG]], 12345678
+; CHECK: stepd [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=r"()
+  %and1 = and i32 %res1, 268500991
+  %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %and1)
+  %and2 = and i32 %res2, -8000
+  %res3 = call i32 asm "stepc $0, $1", "=r,r"(i32 %and2)
+  %and3 = and i32 %res3, 12345678
+  call void asm sideeffect "stepd $0", "r"(i32 %and3)
+  ret void
+}
+
+; Test three-operand immediate AND involving mixtures of low and high registers.
+define i32 @f23(i32 %old) {
+; CHECK-LABEL: f23:
+; CHECK-DAG: risblg [[REG1:%r[0-5]]], %r2, 28, 158, 0
+; CHECK-DAG: risbhg [[REG2:%r[0-5]]], %r2, 24, 158, 32
+; CHECK: stepa %r2, [[REG1]], [[REG2]]
+; CHECK-DAG: risbhg [[REG3:%r[0-5]]], [[REG2]], 25, 159, 0
+; CHECK-DAG: risblg %r2, [[REG2]], 24, 152, 32
+; CHECK: stepb [[REG2]], [[REG3]], %r2
+; CHECK: br %r14
+  %and1 = and i32 %old, 14
+  %and2 = and i32 %old, 254
+  %res1 = call i32 asm "stepa $1, $2, $3",
+                       "=h,r,r,0"(i32 %old, i32 %and1, i32 %and2)
+  %and3 = and i32 %res1, 127
+  %and4 = and i32 %res1, 128
+  %res2 = call i32 asm "stepb $1, $2, $3",
+                       "=r,h,h,0"(i32 %res1, i32 %and3, i32 %and4)
+  ret i32 %res2
+}
+
+; Test RISB[LH]G insertions involving mixtures of high and low registers.
+define i32 @f24(i32 %old) {
+; CHECK-LABEL: f24:
+; CHECK-DAG: risblg [[REG1:%r[0-5]]], %r2, 28, 158, 1
+; CHECK-DAG: risbhg [[REG2:%r[0-5]]], %r2, 24, 158, 29
+; CHECK: stepa %r2, [[REG1]], [[REG2]]
+; CHECK-DAG: risbhg [[REG3:%r[0-5]]], [[REG2]], 25, 159, 62
+; CHECK-DAG: risblg %r2, [[REG2]], 24, 152, 37
+; CHECK: stepb [[REG2]], [[REG3]], %r2
+; CHECK: br %r14
+  %shift1 = shl i32 %old, 1
+  %and1 = and i32 %shift1, 14
+  %shift2 = lshr i32 %old, 3
+  %and2 = and i32 %shift2, 254
+  %res1 = call i32 asm "stepa $1, $2, $3",
+                       "=h,r,r,0"(i32 %old, i32 %and1, i32 %and2)
+  %shift3 = lshr i32 %res1, 2
+  %and3 = and i32 %shift3, 127
+  %shift4 = shl i32 %res1, 5
+  %and4 = and i32 %shift4, 128
+  %res2 = call i32 asm "stepb $1, $2, $3",
+                       "=r,h,h,0"(i32 %res1, i32 %and3, i32 %and4)
+  ret i32 %res2
+}
+
+; Test TMxx involving mixtures of high and low registers.
+define i32 @f25(i32 %old) {
+; CHECK-LABEL: f25:
+; CHECK-DAG: tmll %r2, 1
+; CHECK-DAG: tmlh %r2, 1
+; CHECK: stepa [[REG1:%r[0-5]]],
+; CHECK-DAG: tmhl [[REG1]], 1
+; CHECK-DAG: tmhh [[REG1]], 1
+; CHECK: stepb %r2,
+; CHECK: br %r14
+  %and1 = and i32 %old, 1
+  %and2 = and i32 %old, 65536
+  %cmp1 = icmp eq i32 %and1, 0
+  %cmp2 = icmp eq i32 %and2, 0
+  %sel1 = select i1 %cmp1, i32 100, i32 200
+  %sel2 = select i1 %cmp2, i32 100, i32 200
+  %res1 = call i32 asm "stepa $0, $1, $2",
+                       "=h,r,r"(i32 %sel1, i32 %sel2)
+  %and3 = and i32 %res1, 1
+  %and4 = and i32 %res1, 65536
+  %cmp3 = icmp eq i32 %and3, 0
+  %cmp4 = icmp eq i32 %and4, 0
+  %sel3 = select i1 %cmp3, i32 100, i32 200
+  %sel4 = select i1 %cmp4, i32 100, i32 200
+  %res2 = call i32 asm "stepb $0, $1, $2",
+                       "=r,h,h"(i32 %sel3, i32 %sel4)
+  ret i32 %res2
+}
+
+; Test two-operand halfword immediate addition involving high registers.
+define void @f26() {
+; CHECK-LABEL: f26:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: aih [[REG]], -32768
+; CHECK: stepb [[REG]]
+; CHECK: aih [[REG]], 1
+; CHECK: stepc [[REG]]
+; CHECK: aih [[REG]], 32767
+; CHECK: stepd [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=h"()
+  %add1 = add i32 %res1, -32768
+  %res2 = call i32 asm "stepb $0, $1", "=h,h"(i32 %add1)
+  %add2 = add i32 %res2, 1
+  %res3 = call i32 asm "stepc $0, $1", "=h,h"(i32 %add2)
+  %add3 = add i32 %res3, 32767
+  call void asm sideeffect "stepd $0", "h"(i32 %add3)
+  ret void
+}
+
+; Test two-operand halfword immediate addition involving low registers.
+define void @f27() {
+; CHECK-LABEL: f27:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: ahi [[REG]], -32768
+; CHECK: stepb [[REG]]
+; CHECK: ahi [[REG]], 1
+; CHECK: stepc [[REG]]
+; CHECK: ahi [[REG]], 32767
+; CHECK: stepd [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=r"()
+  %add1 = add i32 %res1, -32768
+  %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %add1)
+  %add2 = add i32 %res2, 1
+  %res3 = call i32 asm "stepc $0, $1", "=r,r"(i32 %add2)
+  %add3 = add i32 %res3, 32767
+  call void asm sideeffect "stepd $0", "r"(i32 %add3)
+  ret void
+}
+
+; Test three-operand halfword immediate addition involving mixtures of low
+; and high registers.  RISBHG/AIH would be OK too, instead of AHIK/RISBHG.
+define i32 @f28(i32 %old) {
+; CHECK-LABEL: f28:
+; CHECK: ahik [[REG1:%r[0-5]]], %r2, 14
+; CHECK: stepa %r2, [[REG1]]
+; CHECK: ahik [[TMP:%r[0-5]]], [[REG1]], 254
+; CHECK: risbhg [[REG2:%r[0-5]]], [[TMP]], 0, 159, 32
+; CHECK: stepb [[REG1]], [[REG2]]
+; CHECK: risbhg [[REG3:%r[0-5]]], [[REG2]], 0, 159, 0
+; CHECK: aih [[REG3]], 127
+; CHECK: stepc [[REG2]], [[REG3]]
+; CHECK: risblg %r2, [[REG3]], 0, 159, 32
+; CHECK: ahi %r2, 128
+; CHECK: stepd [[REG3]], %r2
+; CHECK: br %r14
+  %add1 = add i32 %old, 14
+  %res1 = call i32 asm "stepa $1, $2",
+                       "=r,r,0"(i32 %old, i32 %add1)
+  %add2 = add i32 %res1, 254
+  %res2 = call i32 asm "stepb $1, $2",
+                       "=h,r,0"(i32 %res1, i32 %add2)
+  %add3 = add i32 %res2, 127
+  %res3 = call i32 asm "stepc $1, $2",
+                       "=h,h,0"(i32 %res2, i32 %add3)
+  %add4 = add i32 %res3, 128
+  %res4 = call i32 asm "stepd $1, $2",
+                       "=r,h,0"(i32 %res3, i32 %add4)
+  ret i32 %res4
+}
+
+; Test large immediate addition involving high registers.
+define void @f29() {
+; CHECK-LABEL: f29:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: aih [[REG]], -32769
+; CHECK: stepb [[REG]]
+; CHECK: aih [[REG]], 32768
+; CHECK: stepc [[REG]]
+; CHECK: aih [[REG]], 1000000000
+; CHECK: stepd [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=h"()
+  %add1 = add i32 %res1, -32769
+  %res2 = call i32 asm "stepb $0, $1", "=h,h"(i32 %add1)
+  %add2 = add i32 %res2, 32768
+  %res3 = call i32 asm "stepc $0, $1", "=h,h"(i32 %add2)
+  %add3 = add i32 %res3, 1000000000
+  call void asm sideeffect "stepd $0", "h"(i32 %add3)
+  ret void
+}
+
+; Test large immediate addition involving low registers.
+define void @f30() {
+; CHECK-LABEL: f30:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: afi [[REG]], -32769
+; CHECK: stepb [[REG]]
+; CHECK: afi [[REG]], 32768
+; CHECK: stepc [[REG]]
+; CHECK: afi [[REG]], 1000000000
+; CHECK: stepd [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=r"()
+  %add1 = add i32 %res1, -32769
+  %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %add1)
+  %add2 = add i32 %res2, 32768
+  %res3 = call i32 asm "stepc $0, $1", "=r,r"(i32 %add2)
+  %add3 = add i32 %res3, 1000000000
+  call void asm sideeffect "stepd $0", "r"(i32 %add3)
+  ret void
+}
+
+; Test large immediate comparison involving high registers.
+define i32 @f31() {
+; CHECK-LABEL: f31:
+; CHECK: stepa [[REG1:%r[0-5]]]
+; CHECK: cih [[REG1]], 1000000000
+; CHECK: stepb [[REG2:%r[0-5]]]
+; CHECK: clih [[REG2]], 1000000000
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=h"()
+  %cmp1 = icmp sle i32 %res1, 1000000000
+  %sel1 = select i1 %cmp1, i32 0, i32 1
+  %res2 = call i32 asm "stepb $0, $1", "=h,r"(i32 %sel1)
+  %cmp2 = icmp ule i32 %res2, 1000000000
+  %sel2 = select i1 %cmp2, i32 0, i32 1
+  ret i32 %sel2
+}
+
+; Test large immediate comparison involving low registers.
+define i32 @f32() {
+; CHECK-LABEL: f32:
+; CHECK: stepa [[REG1:%r[0-5]]]
+; CHECK: cfi [[REG1]], 1000000000
+; CHECK: stepb [[REG2:%r[0-5]]]
+; CHECK: clfi [[REG2]], 1000000000
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=r"()
+  %cmp1 = icmp sle i32 %res1, 1000000000
+  %sel1 = select i1 %cmp1, i32 0, i32 1
+  %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %sel1)
+  %cmp2 = icmp ule i32 %res2, 1000000000
+  %sel2 = select i1 %cmp2, i32 0, i32 1
+  ret i32 %sel2
+}
+
+; Test memory comparison involving high registers.
+define void @f33(i32 *%ptr1, i32 *%ptr2) {
+; CHECK-LABEL: f33:
+; CHECK: stepa [[REG1:%r[0-5]]]
+; CHECK: chf [[REG1]], 0(%r2)
+; CHECK: stepb [[REG2:%r[0-5]]]
+; CHECK: clhf [[REG2]], 0(%r3)
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=h"()
+  %load1 = load i32 *%ptr1
+  %cmp1 = icmp sle i32 %res1, %load1
+  %sel1 = select i1 %cmp1, i32 0, i32 1
+  %res2 = call i32 asm "stepb $0, $1", "=h,r"(i32 %sel1)
+  %load2 = load i32 *%ptr2
+  %cmp2 = icmp ule i32 %res2, %load2
+  %sel2 = select i1 %cmp2, i32 0, i32 1
+  store i32 %sel2, i32 *%ptr1
+  ret void
+}
+
+; Test memory comparison involving low registers.
+define void @f34(i32 *%ptr1, i32 *%ptr2) {
+; CHECK-LABEL: f34:
+; CHECK: stepa [[REG1:%r[0-5]]]
+; CHECK: c [[REG1]], 0(%r2)
+; CHECK: stepb [[REG2:%r[0-5]]]
+; CHECK: cl [[REG2]], 0(%r3)
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=r"()
+  %load1 = load i32 *%ptr1
+  %cmp1 = icmp sle i32 %res1, %load1
+  %sel1 = select i1 %cmp1, i32 0, i32 1
+  %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %sel1)
+  %load2 = load i32 *%ptr2
+  %cmp2 = icmp ule i32 %res2, %load2
+  %sel2 = select i1 %cmp2, i32 0, i32 1
+  store i32 %sel2, i32 *%ptr1
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll b/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll
index a15fe57..2b750c4 100644
--- a/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll
@@ -91,8 +91,7 @@ define i8 @f3(i8 *%src, i8 %b) {
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
 ; CHECK: [[LOOP:\.[^:]*]]:
 ; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
-; CHECK: clr [[ROT]], %r3
-; CHECK: jle [[KEEP:\..*]]
+; CHECK: clrjle [[ROT]], %r3, [[KEEP:\..*]]
 ; CHECK: risbg [[ROT]], %r3, 32, 39, 0
 ; CHECK: [[KEEP]]:
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
@@ -112,7 +111,7 @@ define i8 @f3(i8 *%src, i8 %b) {
 ; CHECK-SHIFT2-LABEL: f3:
 ; CHECK-SHIFT2: sll %r3, 24
 ; CHECK-SHIFT2: rll
-; CHECK-SHIFT2: clr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: clrjle {{%r[0-9]+}}, %r3,
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: br %r14
@@ -128,8 +127,7 @@ define i8 @f4(i8 *%src, i8 %b) {
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
 ; CHECK: [[LOOP:\.[^:]*]]:
 ; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
-; CHECK: clr [[ROT]], %r3
-; CHECK: jhe [[KEEP:\..*]]
+; CHECK: clrjhe [[ROT]], %r3, [[KEEP:\..*]]
 ; CHECK: risbg [[ROT]], %r3, 32, 39, 0
 ; CHECK: [[KEEP]]:
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
@@ -149,7 +147,7 @@ define i8 @f4(i8 *%src, i8 %b) {
 ; CHECK-SHIFT2-LABEL: f4:
 ; CHECK-SHIFT2: sll %r3, 24
 ; CHECK-SHIFT2: rll
-; CHECK-SHIFT2: clr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: clrjhe {{%r[0-9]+}}, %r3,
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: br %r14
@@ -196,7 +194,7 @@ define i8 @f6(i8 *%src) {
 define i8 @f7(i8 *%src) {
 ; CHECK-LABEL: f7:
 ; CHECK: llilh [[SRC2:%r[0-9]+]], 256
-; CHECK: clr [[ROT:%r[0-9]+]], [[SRC2]]
+; CHECK: clrjle [[ROT:%r[0-9]+]], [[SRC2]],
 ; CHECK: risbg [[ROT]], [[SRC2]], 32, 39, 0
 ; CHECK: br %r14
 ;
@@ -213,7 +211,7 @@ define i8 @f7(i8 *%src) {
 define i8 @f8(i8 *%src) {
 ; CHECK-LABEL: f8:
 ; CHECK: llilh [[SRC2:%r[0-9]+]], 65024
-; CHECK: clr [[ROT:%r[0-9]+]], [[SRC2]]
+; CHECK: clrjhe [[ROT:%r[0-9]+]], [[SRC2]],
 ; CHECK: risbg [[ROT]], [[SRC2]], 32, 39, 0
 ; CHECK: br %r14
 ;
diff --git a/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll b/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll
index c0ae883..98ffedf 100644
--- a/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll
@@ -91,8 +91,7 @@ define i16 @f3(i16 *%src, i16 %b) {
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
 ; CHECK: [[LOOP:\.[^:]*]]:
 ; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
-; CHECK: clr [[ROT]], %r3
-; CHECK: jle [[KEEP:\..*]]
+; CHECK: clrjle [[ROT]], %r3, [[KEEP:\..*]]
 ; CHECK: risbg [[ROT]], %r3, 32, 47, 0
 ; CHECK: [[KEEP]]:
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
@@ -112,7 +111,7 @@ define i16 @f3(i16 *%src, i16 %b) {
 ; CHECK-SHIFT2-LABEL: f3:
 ; CHECK-SHIFT2: sll %r3, 16
 ; CHECK-SHIFT2: rll
-; CHECK-SHIFT2: clr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: clrjle {{%r[0-9]+}}, %r3,
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: br %r14
@@ -128,8 +127,7 @@ define i16 @f4(i16 *%src, i16 %b) {
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
 ; CHECK: [[LOOP:\.[^:]*]]:
 ; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
-; CHECK: clr [[ROT]], %r3
-; CHECK: jhe [[KEEP:\..*]]
+; CHECK: clrjhe [[ROT]], %r3, [[KEEP:\..*]]
 ; CHECK: risbg [[ROT]], %r3, 32, 47, 0
 ; CHECK: [[KEEP]]:
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
@@ -149,7 +147,7 @@ define i16 @f4(i16 *%src, i16 %b) {
 ; CHECK-SHIFT2-LABEL: f4:
 ; CHECK-SHIFT2: sll %r3, 16
 ; CHECK-SHIFT2: rll
-; CHECK-SHIFT2: clr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: clrjhe {{%r[0-9]+}}, %r3,
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: br %r14
@@ -196,7 +194,7 @@ define i16 @f6(i16 *%src) {
 define i16 @f7(i16 *%src) {
 ; CHECK-LABEL: f7:
 ; CHECK: llilh [[SRC2:%r[0-9]+]], 1
-; CHECK: clr [[ROT:%r[0-9]+]], [[SRC2]]
+; CHECK: clrjle [[ROT:%r[0-9]+]], [[SRC2]],
 ; CHECK: risbg [[ROT]], [[SRC2]], 32, 47, 0
 ; CHECK: br %r14
 ;
@@ -213,7 +211,7 @@ define i16 @f7(i16 *%src) {
 define i16 @f8(i16 *%src) {
 ; CHECK-LABEL: f8:
 ; CHECK: llilh [[SRC2:%r[0-9]+]], 65534
-; CHECK: clr [[ROT:%r[0-9]+]], [[SRC2]]
+; CHECK: clrjhe [[ROT:%r[0-9]+]], [[SRC2]],
 ; CHECK: risbg [[ROT]], [[SRC2]], 32, 47, 0
 ; CHECK: br %r14
 ;
diff --git a/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll b/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll
index 3a9485a..f2152c6 100644
--- a/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll
@@ -1,6 +1,7 @@
-; Test 32-bit atomic minimum and maximum.
+; Test 32-bit atomic minimum and maximum.  Here we match the z10 versions,
+; which can't use LOCR.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Check signed minium.
 define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
@@ -37,9 +38,8 @@ define i32 @f3(i32 %dummy, i32 *%src, i32 %b) {
 ; CHECK-LABEL: f3:
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: [[LOOP:\.[^:]*]]:
-; CHECK: clr %r2, %r4
 ; CHECK: lr [[NEW:%r[0-9]+]], %r2
-; CHECK: jle [[KEEP:\..*]]
+; CHECK: clrjle %r2, %r4, [[KEEP:\..*]]
 ; CHECK: lr [[NEW]], %r4
 ; CHECK: cs %r2, [[NEW]], 0(%r3)
 ; CHECK: jl [[LOOP]]
@@ -53,9 +53,8 @@ define i32 @f4(i32 %dummy, i32 *%src, i32 %b) {
 ; CHECK-LABEL: f4:
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: [[LOOP:\.[^:]*]]:
-; CHECK: clr %r2, %r4
 ; CHECK: lr [[NEW:%r[0-9]+]], %r2
-; CHECK: jhe [[KEEP:\..*]]
+; CHECK: clrjhe %r2, %r4, [[KEEP:\..*]]
 ; CHECK: lr [[NEW]], %r4
 ; CHECK: cs %r2, [[NEW]], 0(%r3)
 ; CHECK: jl [[LOOP]]
diff --git a/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll b/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll
index ebed147..037eb1a 100644
--- a/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll
@@ -1,6 +1,7 @@
-; Test 64-bit atomic minimum and maximum.
+; Test 64-bit atomic minimum and maximum.  Here we match the z10 versions,
+; which can't use LOCGR.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Check signed minium.
 define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
@@ -37,9 +38,8 @@ define i64 @f3(i64 %dummy, i64 *%src, i64 %b) {
 ; CHECK-LABEL: f3:
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: [[LOOP:\.[^:]*]]:
-; CHECK: clgr %r2, %r4
 ; CHECK: lgr [[NEW:%r[0-9]+]], %r2
-; CHECK: jle [[KEEP:\..*]]
+; CHECK: clgrjle %r2, %r4, [[KEEP:\..*]]
 ; CHECK: lgr [[NEW]], %r4
 ; CHECK: csg %r2, [[NEW]], 0(%r3)
 ; CHECK: jl [[LOOP]]
@@ -53,9 +53,8 @@ define i64 @f4(i64 %dummy, i64 *%src, i64 %b) {
 ; CHECK-LABEL: f4:
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: [[LOOP:\.[^:]*]]:
-; CHECK: clgr %r2, %r4
 ; CHECK: lgr [[NEW:%r[0-9]+]], %r2
-; CHECK: jhe [[KEEP:\..*]]
+; CHECK: clgrjhe %r2, %r4, [[KEEP:\..*]]
 ; CHECK: lgr [[NEW]], %r4
 ; CHECK: csg %r2, [[NEW]], 0(%r3)
 ; CHECK: jl [[LOOP]]
diff --git a/test/CodeGen/SystemZ/branch-05.ll b/test/CodeGen/SystemZ/branch-05.ll
index d657c9b..b2157b5 100644
--- a/test/CodeGen/SystemZ/branch-05.ll
+++ b/test/CodeGen/SystemZ/branch-05.ll
@@ -5,8 +5,7 @@
 define i32 @f1(i32 %x, i32 %y, i32 %op) {
 ; CHECK-LABEL: f1:
 ; CHECK: ahi %r4, -1
-; CHECK: clfi %r4, 5
-; CHECK-NEXT: jh
+; CHECK: clijh %r4, 5,
 ; CHECK: llgfr [[OP64:%r[0-5]]], %r4
 ; CHECK: sllg [[INDEX:%r[1-5]]], [[OP64]], 3
 ; CHECK: larl [[BASE:%r[1-5]]]
diff --git a/test/CodeGen/SystemZ/branch-06.ll b/test/CodeGen/SystemZ/branch-06.ll
index 13e5a84..2fa23b7 100644
--- a/test/CodeGen/SystemZ/branch-06.ll
+++ b/test/CodeGen/SystemZ/branch-06.ll
@@ -3,6 +3,7 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 declare i32 @foo()
+@g1 = global i16 0
 
 define void @f1(i32 %target) {
 ; CHECK-LABEL: f1:
@@ -87,3 +88,103 @@ loop:
 exit:
   ret void
 }
+
+; Check that CRJ is used for checking equality with a zero-extending
+; character load.
+define void @f7(i8 *%targetptr) {
+; CHECK-LABEL: f7:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: llc [[REG:%r[0-5]]],
+; CHECK: crje %r2, [[REG]], .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %byte = load i8 *%targetptr
+  %target = zext i8 %byte to i32
+  %cond = icmp eq i32 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+; ...and zero-extending i16 loads.
+define void @f8(i16 *%targetptr) {
+; CHECK-LABEL: f8:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: llh [[REG:%r[0-5]]],
+; CHECK: crje %r2, [[REG]], .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %half = load i16 *%targetptr
+  %target = zext i16 %half to i32
+  %cond = icmp eq i32 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+; ...unless the address is a global.
+define void @f9(i16 *%targetptr) {
+; CHECK-LABEL: f9:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: clhrl %r2, g1
+; CHECK: je .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %half = load i16 *@g1
+  %target = zext i16 %half to i32
+  %cond = icmp eq i32 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+; Check that CRJ is used for checking order between two zero-extending
+; byte loads, even if the original comparison was unsigned.
+define void @f10(i8 *%targetptr1) {
+; CHECK-LABEL: f10:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK-DAG: llc [[REG1:%r[0-5]]], 0(
+; CHECK-DAG: llc [[REG2:%r[0-5]]], 1(
+; CHECK: crjl [[REG1]], [[REG2]], .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %targetptr2 = getelementptr i8 *%targetptr1, i64 1
+  %byte1 = load i8 *%targetptr1
+  %byte2 = load i8 *%targetptr2
+  %ext1 = zext i8 %byte1 to i32
+  %ext2 = zext i8 %byte2 to i32
+  %cond = icmp ult i32 %ext1, %ext2
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+; ...likewise halfword loads.
+define void @f11(i16 *%targetptr1) {
+; CHECK-LABEL: f11:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK-DAG: llh [[REG1:%r[0-5]]], 0(
+; CHECK-DAG: llh [[REG2:%r[0-5]]], 2(
+; CHECK: crjl [[REG1]], [[REG2]], .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %targetptr2 = getelementptr i16 *%targetptr1, i64 1
+  %half1 = load i16 *%targetptr1
+  %half2 = load i16 *%targetptr2
+  %ext1 = zext i16 %half1 to i32
+  %ext2 = zext i16 %half2 to i32
+  %cond = icmp ult i32 %ext1, %ext2
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/branch-07.ll b/test/CodeGen/SystemZ/branch-07.ll
index b715a05..bac6071 100644
--- a/test/CodeGen/SystemZ/branch-07.ll
+++ b/test/CodeGen/SystemZ/branch-07.ll
@@ -97,10 +97,9 @@ exit:
 ; Test a vector of 0/-1 results for i32 EQ.
 define i64 @f7(i64 %a, i64 %b) {
 ; CHECK-LABEL: f7:
-; CHECK: lhi [[REG:%r[0-5]]], -1
-; CHECK: crje {{%r[0-5]}}
-; CHECK: lhi [[REG]], 0
-; CHECK-NOT: sra
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: afi [[REG]], -268435456
+; CHECK: sra [[REG]], 31
 ; CHECK: br %r14
   %avec = bitcast i64 %a to <2 x i32>
   %bvec = bitcast i64 %b to <2 x i32>
@@ -113,10 +112,9 @@ define i64 @f7(i64 %a, i64 %b) {
 ; Test a vector of 0/-1 results for i32 NE.
 define i64 @f8(i64 %a, i64 %b) {
 ; CHECK-LABEL: f8:
-; CHECK: lhi [[REG:%r[0-5]]], -1
-; CHECK: crjlh {{%r[0-5]}}
-; CHECK: lhi [[REG]], 0
-; CHECK-NOT: sra
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: afi [[REG]], 1879048192
+; CHECK: sra [[REG]], 31
 ; CHECK: br %r14
   %avec = bitcast i64 %a to <2 x i32>
   %bvec = bitcast i64 %b to <2 x i32>
@@ -129,10 +127,10 @@ define i64 @f8(i64 %a, i64 %b) {
 ; Test a vector of 0/-1 results for i64 EQ.
 define void @f9(i64 %a, i64 %b, <2 x i64> *%dest) {
 ; CHECK-LABEL: f9:
-; CHECK: lghi [[REG:%r[0-5]]], -1
-; CHECK: crje {{%r[0-5]}}
-; CHECK: lghi [[REG]], 0
-; CHECK-NOT: sra
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: afi [[REG]], -268435456
+; CHECK: sllg [[REG2:%r[0-5]]], [[REG]], 32
+; CHECK: srag {{%r[0-5]}}, [[REG2]], 63
 ; CHECK: br %r14
   %avec = bitcast i64 %a to <2 x i32>
   %bvec = bitcast i64 %b to <2 x i32>
@@ -145,10 +143,10 @@ define void @f9(i64 %a, i64 %b, <2 x i64> *%dest) {
 ; Test a vector of 0/-1 results for i64 NE.
 define void @f10(i64 %a, i64 %b, <2 x i64> *%dest) {
 ; CHECK-LABEL: f10:
-; CHECK: lghi [[REG:%r[0-5]]], -1
-; CHECK: crjlh {{%r[0-5]}}
-; CHECK: lghi [[REG]], 0
-; CHECK-NOT: sra
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: afi [[REG]], 1879048192
+; CHECK: sllg [[REG2:%r[0-5]]], [[REG]], 32
+; CHECK: srag {{%r[0-5]}}, [[REG2]], 63
 ; CHECK: br %r14
   %avec = bitcast i64 %a to <2 x i32>
   %bvec = bitcast i64 %b to <2 x i32>
diff --git a/test/CodeGen/SystemZ/branch-08.ll b/test/CodeGen/SystemZ/branch-08.ll
index c4dc467..6741d29 100644
--- a/test/CodeGen/SystemZ/branch-08.ll
+++ b/test/CodeGen/SystemZ/branch-08.ll
@@ -6,14 +6,15 @@ declare void @foo() noreturn
 
 ; Check a case where a separate branch is needed and where the original
 ; order should be reversed.
-define i32 @f1(i32 %a, i32 %b) {
+define i32 @f1(i32 %a, i32 *%bptr) {
 ; CHECK-LABEL: f1:
-; CHECK: clr %r2, %r3
+; CHECK: cl %r2, 0(%r3)
 ; CHECK: jl .L[[LABEL:.*]]
 ; CHECK: br %r14
 ; CHECK: .L[[LABEL]]:
 ; CHECK: brasl %r14, foo@PLT
 entry:
+  %b = load i32 *%bptr
   %cmp = icmp ult i32 %a, %b
   br i1 %cmp, label %callit, label %return
 
diff --git a/test/CodeGen/SystemZ/branch-09.ll b/test/CodeGen/SystemZ/branch-09.ll
new file mode 100644
index 0000000..5591f5b
--- /dev/null
+++ b/test/CodeGen/SystemZ/branch-09.ll
@@ -0,0 +1,62 @@
+; Test all condition-code masks that are relevant for CLRJ.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i32 @foo()
+@g1 = global i16 0
+
+define void @f1(i32 %target) {
+; CHECK-LABEL: f1:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: clrjle %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %cond = icmp ule i32 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f2(i32 %target) {
+; CHECK-LABEL: f2:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: clrjl %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %cond = icmp ult i32 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f3(i32 %target) {
+; CHECK-LABEL: f3:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: clrjh %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %cond = icmp ugt i32 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f4(i32 %target) {
+; CHECK-LABEL: f4:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: clrjhe %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %cond = icmp uge i32 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/branch-10.ll b/test/CodeGen/SystemZ/branch-10.ll
new file mode 100644
index 0000000..ec6e759
--- /dev/null
+++ b/test/CodeGen/SystemZ/branch-10.ll
@@ -0,0 +1,62 @@
+; Test all condition-code masks that are relevant for CLGRJ.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i64 @foo()
+@g1 = global i16 0
+
+define void @f1(i64 %target) {
+; CHECK-LABEL: f1:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: clgrjle %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i64 @foo()
+  %cond = icmp ule i64 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f2(i64 %target) {
+; CHECK-LABEL: f2:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: clgrjl %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i64 @foo()
+  %cond = icmp ult i64 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f3(i64 %target) {
+; CHECK-LABEL: f3:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: clgrjh %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i64 @foo()
+  %cond = icmp ugt i64 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f4(i64 %target) {
+; CHECK-LABEL: f4:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: clgrjhe %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i64 @foo()
+  %cond = icmp uge i64 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/call-03.ll b/test/CodeGen/SystemZ/call-03.ll
new file mode 100644
index 0000000..1f314ea
--- /dev/null
+++ b/test/CodeGen/SystemZ/call-03.ll
@@ -0,0 +1,125 @@
+; Test sibling calls.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @ok(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
+                 float %f4, double %f6)
+declare void @uses_r6(i8 %r2, i16 %r3, i32 %r4, i64 %r5, i64 %r6)
+declare void @uses_indirect(fp128 %r2)
+declare void @uses_stack(float %f0, float %f2, float %f4, float %f6,
+                         float %stack)
+declare i32 @returns_i32()
+declare i64 @returns_i64()
+
+; Check the maximum number of arguments that we can pass and still use
+; a sibling call.
+define void @f1() {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lzer %f0
+; CHECK-DAG: lzdr %f2
+; CHECK-DAG: lhi %r2, 1
+; CHECK-DAG: lhi %r3, 2
+; CHECK-DAG: lhi %r4, 3
+; CHECK-DAG: lghi %r5, 4
+; CHECK-DAG: {{ler %f4, %f0|lzer %f4}}
+; CHECK-DAG: {{ldr %f6, %f2|lzdr %f6}}
+; CHECK: jg ok@PLT
+  tail call void @ok(i8 1, i16 2, i32 3, i64 4, float 0.0, double 0.0,
+                     float 0.0, double 0.0)
+  ret void
+}
+
+; Check a call that uses %r6 to pass an argument.  At the moment we don't
+; use sibling calls in that case.
+define void @f2() {
+; CHECK-LABEL: f2:
+; CHECK: brasl %r14, uses_r6@PLT
+; CHECK: br %r14
+  tail call void @uses_r6(i8 1, i16 2, i32 3, i64 4, i64 5)
+  ret void
+}
+
+; Check a call that passes indirect arguments.  We can't use sibling
+; calls in that case.
+define void @f3() {
+; CHECK-LABEL: f3:
+; CHECK: brasl %r14, uses_indirect@PLT
+; CHECK: br %r14
+  tail call void @uses_indirect(fp128 0xL00000000000000000000000000000000)
+  ret void
+}
+
+; Check a call that uses direct stack arguments, which again prevents
+; sibling calls
+define void @f4() {
+; CHECK-LABEL: f4:
+; CHECK: brasl %r14, uses_stack@PLT
+; CHECK: br %r14
+  tail call void @uses_stack(float 0.0, float 0.0, float 0.0, float 0.0,
+                             float 0.0)
+  ret void
+}
+
+; Check an indirect call.  In this case the only acceptable choice for
+; the target register is %r1.
+define void @f5(void(i32, i32, i32, i32) *%foo) {
+; CHECK-LABEL: f5:
+; CHECK: lgr %r1, %r2
+; CHECK-DAG: lhi %r2, 1
+; CHECK-DAG: lhi %r3, 2
+; CHECK-DAG: lhi %r4, 3
+; CHECK-DAG: lhi %r5, 4
+; CHECK: br %r1
+  tail call void %foo(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; Check an indirect call that will be forced into a call-saved GPR
+; (which should be %r13, the highest GPR not used for anything else).
+define void @f6(void(i32) *%foo) {
+; CHECK-LABEL: f6:
+; CHECK: stmg %r13, %r15, 104(%r15)
+; CHECK: lgr %r13, %r2
+; CHECK: brasl %r14, returns_i32
+; CHECK: lgr %r1, %r13
+; CHECK: lmg %r13, %r15, 264(%r15)
+; CHECK: br %r1
+  %arg = call i32 @returns_i32()
+  tail call void %foo(i32 %arg)
+  ret void
+}
+
+; Test a function that returns a value.
+define i64 @f7() {
+; CHECK-LABEL: f7:
+; CHECK: jg returns_i64@PLT
+  %res = tail call i64 @returns_i64()
+  ret i64 %res
+}
+
+; Test a function that returns a value truncated from i64 to i32.
+define i32 @f8() {
+; CHECK-LABEL: f8:
+; CHECK: jg returns_i64@PLT
+  %res = tail call i64 @returns_i64()
+  %trunc = trunc i64 %res to i32
+  ret i32 %trunc
+}
+
+; Test a function that returns a value truncated from i64 to i7.
+define i7 @f9() {
+; CHECK-LABEL: f9:
+; CHECK: jg returns_i64@PLT
+  %res = tail call i64 @returns_i64()
+  %trunc = trunc i64 %res to i7
+  ret i7 %trunc
+}
+
+; Test a function that returns a value truncated from i32 to i8.
+define i8 @f10() {
+; CHECK-LABEL: f10:
+; CHECK: jg returns_i32@PLT
+  %res = tail call i32 @returns_i32()
+  %trunc = trunc i32 %res to i8
+  ret i8 %trunc
+}
diff --git a/test/CodeGen/SystemZ/cond-store-01.ll b/test/CodeGen/SystemZ/cond-store-01.ll
index 80e6d91..d55ea21 100644
--- a/test/CodeGen/SystemZ/cond-store-01.ll
+++ b/test/CodeGen/SystemZ/cond-store-01.ll
@@ -1,6 +1,7 @@
-; Test 8-bit conditional stores that are presented as selects.
+; Test 8-bit conditional stores that are presented as selects.  The volatile
+; tests require z10, which use a branch instead of a LOCR.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 declare void @foo(i8 *)
 
@@ -13,7 +14,7 @@ define void @f1(i8 *%ptr, i8 %alt, i32 %limit) {
 ; CHECK: stc %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store i8 %res, i8 *%ptr
@@ -29,7 +30,7 @@ define void @f2(i8 *%ptr, i8 %alt, i32 %limit) {
 ; CHECK: stc %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i8 *%ptr
   %res = select i1 %cond, i8 %alt, i8 %orig
   store i8 %res, i8 *%ptr
@@ -46,7 +47,7 @@ define void @f3(i8 *%ptr, i32 %alt, i32 %limit) {
 ; CHECK: stc %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i8 *%ptr
   %ext = sext i8 %orig to i32
   %res = select i1 %cond, i32 %ext, i32 %alt
@@ -64,7 +65,7 @@ define void @f4(i8 *%ptr, i32 %alt, i32 %limit) {
 ; CHECK: stc %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i8 *%ptr
   %ext = sext i8 %orig to i32
   %res = select i1 %cond, i32 %alt, i32 %ext
@@ -83,7 +84,7 @@ define void @f5(i8 *%ptr, i32 %alt, i32 %limit) {
 ; CHECK: stc %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i8 *%ptr
   %ext = zext i8 %orig to i32
   %res = select i1 %cond, i32 %ext, i32 %alt
@@ -101,7 +102,7 @@ define void @f6(i8 *%ptr, i32 %alt, i32 %limit) {
 ; CHECK: stc %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i8 *%ptr
   %ext = zext i8 %orig to i32
   %res = select i1 %cond, i32 %alt, i32 %ext
@@ -120,7 +121,7 @@ define void @f7(i8 *%ptr, i64 %alt, i32 %limit) {
 ; CHECK: stc %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i8 *%ptr
   %ext = sext i8 %orig to i64
   %res = select i1 %cond, i64 %ext, i64 %alt
@@ -138,7 +139,7 @@ define void @f8(i8 *%ptr, i64 %alt, i32 %limit) {
 ; CHECK: stc %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i8 *%ptr
   %ext = sext i8 %orig to i64
   %res = select i1 %cond, i64 %alt, i64 %ext
@@ -157,7 +158,7 @@ define void @f9(i8 *%ptr, i64 %alt, i32 %limit) {
 ; CHECK: stc %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i8 *%ptr
   %ext = zext i8 %orig to i64
   %res = select i1 %cond, i64 %ext, i64 %alt
@@ -175,7 +176,7 @@ define void @f10(i8 *%ptr, i64 %alt, i32 %limit) {
 ; CHECK: stc %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i8 *%ptr
   %ext = zext i8 %orig to i64
   %res = select i1 %cond, i64 %alt, i64 %ext
@@ -194,7 +195,7 @@ define void @f11(i8 *%base, i8 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%base, i64 4095
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store i8 %res, i8 *%ptr
@@ -211,7 +212,7 @@ define void @f12(i8 *%base, i8 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%base, i64 4096
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store i8 %res, i8 *%ptr
@@ -228,7 +229,7 @@ define void @f13(i8 *%base, i8 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%base, i64 524287
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store i8 %res, i8 *%ptr
@@ -247,7 +248,7 @@ define void @f14(i8 *%base, i8 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%base, i64 524288
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store i8 %res, i8 *%ptr
@@ -264,7 +265,7 @@ define void @f15(i8 *%base, i8 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%base, i64 -524288
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store i8 %res, i8 *%ptr
@@ -283,7 +284,7 @@ define void @f16(i8 *%base, i8 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%base, i64 -524289
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store i8 %res, i8 *%ptr
@@ -302,7 +303,7 @@ define void @f17(i64 %base, i64 %index, i8 %alt, i32 %limit) {
   %add1 = add i64 %base, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i8 *
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store i8 %res, i8 *%ptr
@@ -317,7 +318,7 @@ define void @f18(i8 *%ptr, i8 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: stc {{%r[0-5]}}, 0(%r2)
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load volatile i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store i8 %res, i8 *%ptr
@@ -332,7 +333,7 @@ define void @f19(i8 *%ptr, i8 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: stc %r3, 0(%r2)
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store volatile i8 %res, i8 *%ptr
@@ -352,7 +353,7 @@ define void @f20(i8 *%ptr, i8 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: stc {{%r[0-9]+}},
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load atomic i8 *%ptr unordered, align 1
   %res = select i1 %cond, i8 %orig, i8 %alt
   store i8 %res, i8 *%ptr
@@ -368,7 +369,7 @@ define void @f21(i8 *%ptr, i8 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: cs {{%r[0-9]+}},
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store atomic i8 %res, i8 *%ptr unordered, align 1
@@ -388,7 +389,7 @@ define void @f22(i8 %alt, i32 %limit) {
 ; CHECK: br %r14
   %ptr = alloca i8
   call void @foo(i8 *%ptr)
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store i8 %res, i8 *%ptr
diff --git a/test/CodeGen/SystemZ/cond-store-02.ll b/test/CodeGen/SystemZ/cond-store-02.ll
index e01a853..91bc486 100644
--- a/test/CodeGen/SystemZ/cond-store-02.ll
+++ b/test/CodeGen/SystemZ/cond-store-02.ll
@@ -1,6 +1,7 @@
-; Test 16-bit conditional stores that are presented as selects.
+; Test 16-bit conditional stores that are presented as selects.  The volatile
+; tests require z10, which use a branch instead of a LOCR.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 declare void @foo(i16 *)
 
@@ -13,7 +14,7 @@ define void @f1(i16 *%ptr, i16 %alt, i32 %limit) {
 ; CHECK: sth %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store i16 %res, i16 *%ptr
@@ -29,7 +30,7 @@ define void @f2(i16 *%ptr, i16 %alt, i32 %limit) {
 ; CHECK: sth %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i16 *%ptr
   %res = select i1 %cond, i16 %alt, i16 %orig
   store i16 %res, i16 *%ptr
@@ -46,7 +47,7 @@ define void @f3(i16 *%ptr, i32 %alt, i32 %limit) {
 ; CHECK: sth %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i16 *%ptr
   %ext = sext i16 %orig to i32
   %res = select i1 %cond, i32 %ext, i32 %alt
@@ -64,7 +65,7 @@ define void @f4(i16 *%ptr, i32 %alt, i32 %limit) {
 ; CHECK: sth %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i16 *%ptr
   %ext = sext i16 %orig to i32
   %res = select i1 %cond, i32 %alt, i32 %ext
@@ -83,7 +84,7 @@ define void @f5(i16 *%ptr, i32 %alt, i32 %limit) {
 ; CHECK: sth %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i16 *%ptr
   %ext = zext i16 %orig to i32
   %res = select i1 %cond, i32 %ext, i32 %alt
@@ -101,7 +102,7 @@ define void @f6(i16 *%ptr, i32 %alt, i32 %limit) {
 ; CHECK: sth %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i16 *%ptr
   %ext = zext i16 %orig to i32
   %res = select i1 %cond, i32 %alt, i32 %ext
@@ -120,7 +121,7 @@ define void @f7(i16 *%ptr, i64 %alt, i32 %limit) {
 ; CHECK: sth %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i16 *%ptr
   %ext = sext i16 %orig to i64
   %res = select i1 %cond, i64 %ext, i64 %alt
@@ -138,7 +139,7 @@ define void @f8(i16 *%ptr, i64 %alt, i32 %limit) {
 ; CHECK: sth %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i16 *%ptr
   %ext = sext i16 %orig to i64
   %res = select i1 %cond, i64 %alt, i64 %ext
@@ -157,7 +158,7 @@ define void @f9(i16 *%ptr, i64 %alt, i32 %limit) {
 ; CHECK: sth %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i16 *%ptr
   %ext = zext i16 %orig to i64
   %res = select i1 %cond, i64 %ext, i64 %alt
@@ -175,7 +176,7 @@ define void @f10(i16 *%ptr, i64 %alt, i32 %limit) {
 ; CHECK: sth %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i16 *%ptr
   %ext = zext i16 %orig to i64
   %res = select i1 %cond, i64 %alt, i64 %ext
@@ -194,7 +195,7 @@ define void @f11(i16 *%base, i16 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%base, i64 2047
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store i16 %res, i16 *%ptr
@@ -211,7 +212,7 @@ define void @f12(i16 *%base, i16 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%base, i64 2048
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store i16 %res, i16 *%ptr
@@ -228,7 +229,7 @@ define void @f13(i16 *%base, i16 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%base, i64 262143
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store i16 %res, i16 *%ptr
@@ -247,7 +248,7 @@ define void @f14(i16 *%base, i16 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%base, i64 262144
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store i16 %res, i16 *%ptr
@@ -264,7 +265,7 @@ define void @f15(i16 *%base, i16 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%base, i64 -262144
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store i16 %res, i16 *%ptr
@@ -283,7 +284,7 @@ define void @f16(i16 *%base, i16 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%base, i64 -262145
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store i16 %res, i16 *%ptr
@@ -302,7 +303,7 @@ define void @f17(i64 %base, i64 %index, i16 %alt, i32 %limit) {
   %add1 = add i64 %base, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i16 *
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store i16 %res, i16 *%ptr
@@ -317,7 +318,7 @@ define void @f18(i16 *%ptr, i16 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: sth {{%r[0-5]}}, 0(%r2)
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load volatile i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store i16 %res, i16 *%ptr
@@ -332,7 +333,7 @@ define void @f19(i16 *%ptr, i16 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: sth %r3, 0(%r2)
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store volatile i16 %res, i16 *%ptr
@@ -352,7 +353,7 @@ define void @f20(i16 *%ptr, i16 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: sth {{%r[0-9]+}},
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load atomic i16 *%ptr unordered, align 2
   %res = select i1 %cond, i16 %orig, i16 %alt
   store i16 %res, i16 *%ptr
@@ -368,7 +369,7 @@ define void @f21(i16 *%ptr, i16 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: cs {{%r[0-9]+}},
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store atomic i16 %res, i16 *%ptr unordered, align 2
@@ -388,7 +389,7 @@ define void @f22(i16 %alt, i32 %limit) {
 ; CHECK: br %r14
   %ptr = alloca i16
   call void @foo(i16 *%ptr)
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store i16 %res, i16 *%ptr
diff --git a/test/CodeGen/SystemZ/cond-store-03.ll b/test/CodeGen/SystemZ/cond-store-03.ll
index e122bc2..d4fd48d 100644
--- a/test/CodeGen/SystemZ/cond-store-03.ll
+++ b/test/CodeGen/SystemZ/cond-store-03.ll
@@ -13,7 +13,7 @@ define void @f1(i32 *%ptr, i32 %alt, i32 %limit) {
 ; CHECK: st %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
@@ -29,7 +29,7 @@ define void @f2(i32 *%ptr, i32 %alt, i32 %limit) {
 ; CHECK: st %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i32 *%ptr
   %res = select i1 %cond, i32 %alt, i32 %orig
   store i32 %res, i32 *%ptr
@@ -46,7 +46,7 @@ define void @f3(i32 *%ptr, i64 %alt, i32 %limit) {
 ; CHECK: st %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i32 *%ptr
   %ext = sext i32 %orig to i64
   %res = select i1 %cond, i64 %ext, i64 %alt
@@ -64,7 +64,7 @@ define void @f4(i32 *%ptr, i64 %alt, i32 %limit) {
 ; CHECK: st %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i32 *%ptr
   %ext = sext i32 %orig to i64
   %res = select i1 %cond, i64 %alt, i64 %ext
@@ -83,7 +83,7 @@ define void @f5(i32 *%ptr, i64 %alt, i32 %limit) {
 ; CHECK: st %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i32 *%ptr
   %ext = zext i32 %orig to i64
   %res = select i1 %cond, i64 %ext, i64 %alt
@@ -101,7 +101,7 @@ define void @f6(i32 *%ptr, i64 %alt, i32 %limit) {
 ; CHECK: st %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i32 *%ptr
   %ext = zext i32 %orig to i64
   %res = select i1 %cond, i64 %alt, i64 %ext
@@ -120,7 +120,7 @@ define void @f7(i32 *%base, i32 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 1023
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
@@ -137,7 +137,7 @@ define void @f8(i32 *%base, i32 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 1024
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
@@ -154,7 +154,7 @@ define void @f9(i32 *%base, i32 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 131071
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
@@ -173,7 +173,7 @@ define void @f10(i32 *%base, i32 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 131072
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
@@ -190,7 +190,7 @@ define void @f11(i32 *%base, i32 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 -131072
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
@@ -209,7 +209,7 @@ define void @f12(i32 *%base, i32 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 -131073
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
@@ -228,7 +228,7 @@ define void @f13(i64 %base, i64 %index, i32 %alt, i32 %limit) {
   %add1 = add i64 %base, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i32 *
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
@@ -243,7 +243,7 @@ define void @f14(i32 *%ptr, i32 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: st {{%r[0-5]}}, 0(%r2)
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load volatile i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
@@ -258,7 +258,7 @@ define void @f15(i32 *%ptr, i32 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: st %r3, 0(%r2)
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store volatile i32 %res, i32 *%ptr
@@ -277,7 +277,7 @@ define void @f16(i32 *%ptr, i32 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: st {{%r[0-5]}}, 0(%r2)
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load atomic i32 *%ptr unordered, align 4
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
@@ -293,7 +293,7 @@ define void @f17(i32 *%ptr, i32 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: cs {{%r[0-5]}}, %r3, 0(%r2)
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store atomic i32 %res, i32 *%ptr unordered, align 4
@@ -313,7 +313,7 @@ define void @f18(i32 %alt, i32 %limit) {
 ; CHECK: br %r14
   %ptr = alloca i32
   call void @foo(i32 *%ptr)
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
diff --git a/test/CodeGen/SystemZ/cond-store-04.ll b/test/CodeGen/SystemZ/cond-store-04.ll
index 4ed23a3..fc565c4 100644
--- a/test/CodeGen/SystemZ/cond-store-04.ll
+++ b/test/CodeGen/SystemZ/cond-store-04.ll
@@ -13,7 +13,7 @@ define void @f1(i64 *%ptr, i64 %alt, i32 %limit) {
 ; CHECK: stg %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
@@ -29,7 +29,7 @@ define void @f2(i64 *%ptr, i64 %alt, i32 %limit) {
 ; CHECK: stg %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i64 *%ptr
   %res = select i1 %cond, i64 %alt, i64 %orig
   store i64 %res, i64 *%ptr
@@ -46,7 +46,7 @@ define void @f3(i64 *%base, i64 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%base, i64 65535
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
@@ -65,7 +65,7 @@ define void @f4(i64 *%base, i64 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%base, i64 65536
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
@@ -82,7 +82,7 @@ define void @f5(i64 *%base, i64 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%base, i64 -65536
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
@@ -101,7 +101,7 @@ define void @f6(i64 *%base, i64 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%base, i64 -65537
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
@@ -120,7 +120,7 @@ define void @f7(i64 %base, i64 %index, i64 %alt, i32 %limit) {
   %add1 = add i64 %base, %index
   %add2 = add i64 %add1, 524287
   %ptr = inttoptr i64 %add2 to i64 *
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
@@ -135,7 +135,7 @@ define void @f8(i64 *%ptr, i64 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: stg {{%r[0-5]}}, 0(%r2)
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load volatile i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
@@ -150,7 +150,7 @@ define void @f9(i64 *%ptr, i64 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: stg %r3, 0(%r2)
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store volatile i64 %res, i64 *%ptr
@@ -169,7 +169,7 @@ define void @f10(i64 *%ptr, i64 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: stg {{%r[0-5]}}, 0(%r2)
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load atomic i64 *%ptr unordered, align 8
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
@@ -185,7 +185,7 @@ define void @f11(i64 *%ptr, i64 %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: csg {{%r[0-5]}}, %r3, 0(%r2)
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store atomic i64 %res, i64 *%ptr unordered, align 8
@@ -205,7 +205,7 @@ define void @f12(i64 %alt, i32 %limit) {
 ; CHECK: br %r14
   %ptr = alloca i64
   call void @foo(i64 *%ptr)
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
diff --git a/test/CodeGen/SystemZ/cond-store-05.ll b/test/CodeGen/SystemZ/cond-store-05.ll
index e41c8fe..f8056f7 100644
--- a/test/CodeGen/SystemZ/cond-store-05.ll
+++ b/test/CodeGen/SystemZ/cond-store-05.ll
@@ -13,7 +13,7 @@ define void @f1(float *%ptr, float %alt, i32 %limit) {
 ; CHECK: ste %f0, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load float *%ptr
   %res = select i1 %cond, float %orig, float %alt
   store float %res, float *%ptr
@@ -29,7 +29,7 @@ define void @f2(float *%ptr, float %alt, i32 %limit) {
 ; CHECK: ste %f0, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load float *%ptr
   %res = select i1 %cond, float %alt, float %orig
   store float %res, float *%ptr
@@ -46,7 +46,7 @@ define void @f3(float *%base, float %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr float *%base, i64 1023
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load float *%ptr
   %res = select i1 %cond, float %orig, float %alt
   store float %res, float *%ptr
@@ -63,7 +63,7 @@ define void @f4(float *%base, float %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr float *%base, i64 1024
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load float *%ptr
   %res = select i1 %cond, float %orig, float %alt
   store float %res, float *%ptr
@@ -80,7 +80,7 @@ define void @f5(float *%base, float %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr float *%base, i64 131071
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load float *%ptr
   %res = select i1 %cond, float %orig, float %alt
   store float %res, float *%ptr
@@ -99,7 +99,7 @@ define void @f6(float *%base, float %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr float *%base, i64 131072
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load float *%ptr
   %res = select i1 %cond, float %orig, float %alt
   store float %res, float *%ptr
@@ -116,7 +116,7 @@ define void @f7(float *%base, float %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr float *%base, i64 -131072
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load float *%ptr
   %res = select i1 %cond, float %orig, float %alt
   store float %res, float *%ptr
@@ -135,7 +135,7 @@ define void @f8(float *%base, float %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr float *%base, i64 -131073
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load float *%ptr
   %res = select i1 %cond, float %orig, float %alt
   store float %res, float *%ptr
@@ -154,7 +154,7 @@ define void @f9(i64 %base, i64 %index, float %alt, i32 %limit) {
   %add1 = add i64 %base, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to float *
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load float *%ptr
   %res = select i1 %cond, float %orig, float %alt
   store float %res, float *%ptr
@@ -169,7 +169,7 @@ define void @f10(float *%ptr, float %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: ste {{%f[0-5]}}, 0(%r2)
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load volatile float *%ptr
   %res = select i1 %cond, float %orig, float %alt
   store float %res, float *%ptr
@@ -184,7 +184,7 @@ define void @f11(float *%ptr, float %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: ste %f0, 0(%r2)
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load float *%ptr
   %res = select i1 %cond, float %orig, float %alt
   store volatile float %res, float *%ptr
@@ -204,7 +204,7 @@ define void @f12(float %alt, i32 %limit) {
 ; CHECK: br %r14
   %ptr = alloca float
   call void @foo(float *%ptr)
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load float *%ptr
   %res = select i1 %cond, float %orig, float %alt
   store float %res, float *%ptr
diff --git a/test/CodeGen/SystemZ/cond-store-06.ll b/test/CodeGen/SystemZ/cond-store-06.ll
index 759a3e0..6668195 100644
--- a/test/CodeGen/SystemZ/cond-store-06.ll
+++ b/test/CodeGen/SystemZ/cond-store-06.ll
@@ -13,7 +13,7 @@ define void @f1(double *%ptr, double %alt, i32 %limit) {
 ; CHECK: std %f0, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load double *%ptr
   %res = select i1 %cond, double %orig, double %alt
   store double %res, double *%ptr
@@ -29,7 +29,7 @@ define void @f2(double *%ptr, double %alt, i32 %limit) {
 ; CHECK: std %f0, 0(%r2)
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load double *%ptr
   %res = select i1 %cond, double %alt, double %orig
   store double %res, double *%ptr
@@ -46,7 +46,7 @@ define void @f3(double *%base, double %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr double *%base, i64 511
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load double *%ptr
   %res = select i1 %cond, double %orig, double %alt
   store double %res, double *%ptr
@@ -63,7 +63,7 @@ define void @f4(double *%base, double %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr double *%base, i64 512
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load double *%ptr
   %res = select i1 %cond, double %orig, double %alt
   store double %res, double *%ptr
@@ -80,7 +80,7 @@ define void @f5(double *%base, double %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr double *%base, i64 65535
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load double *%ptr
   %res = select i1 %cond, double %orig, double %alt
   store double %res, double *%ptr
@@ -99,7 +99,7 @@ define void @f6(double *%base, double %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr double *%base, i64 65536
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load double *%ptr
   %res = select i1 %cond, double %orig, double %alt
   store double %res, double *%ptr
@@ -116,7 +116,7 @@ define void @f7(double *%base, double %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr double *%base, i64 -65536
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load double *%ptr
   %res = select i1 %cond, double %orig, double %alt
   store double %res, double *%ptr
@@ -135,7 +135,7 @@ define void @f8(double *%base, double %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %ptr = getelementptr double *%base, i64 -65537
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load double *%ptr
   %res = select i1 %cond, double %orig, double %alt
   store double %res, double *%ptr
@@ -154,7 +154,7 @@ define void @f9(i64 %base, i64 %index, double %alt, i32 %limit) {
   %add1 = add i64 %base, %index
   %add2 = add i64 %add1, 524287
   %ptr = inttoptr i64 %add2 to double *
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load double *%ptr
   %res = select i1 %cond, double %orig, double %alt
   store double %res, double *%ptr
@@ -169,7 +169,7 @@ define void @f10(double *%ptr, double %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: std {{%f[0-5]}}, 0(%r2)
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load volatile double *%ptr
   %res = select i1 %cond, double %orig, double %alt
   store double %res, double *%ptr
@@ -184,7 +184,7 @@ define void @f11(double *%ptr, double %alt, i32 %limit) {
 ; CHECK: [[LABEL]]:
 ; CHECK: std %f0, 0(%r2)
 ; CHECK: br %r14
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load double *%ptr
   %res = select i1 %cond, double %orig, double %alt
   store volatile double %res, double *%ptr
@@ -204,7 +204,7 @@ define void @f12(double %alt, i32 %limit) {
 ; CHECK: br %r14
   %ptr = alloca double
   call void @foo(double *%ptr)
-  %cond = icmp ult i32 %limit, 42
+  %cond = icmp ult i32 %limit, 420
   %orig = load double *%ptr
   %res = select i1 %cond, double %orig, double %alt
   store double %res, double *%ptr
diff --git a/test/CodeGen/SystemZ/fp-cmp-01.ll b/test/CodeGen/SystemZ/fp-cmp-01.ll
index 6a9598e..d7c0cce 100644
--- a/test/CodeGen/SystemZ/fp-cmp-01.ll
+++ b/test/CodeGen/SystemZ/fp-cmp-01.ll
@@ -1,6 +1,7 @@
-; Test 32-bit floating-point comparison.
+; Test 32-bit floating-point comparison.  The tests assume a z10 implementation
+; of select, using conditional branches rather than LOCGR.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 declare float @foo()
 
@@ -159,3 +160,160 @@ define i64 @f8(i64 %a, i64 %b, float %f) {
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
 }
+
+; Check the comparison can be reversed if that allows CEB to be used,
+; first with oeq.
+define i64 @f9(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: je {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp oeq float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; ...then one.
+define i64 @f10(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f10:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: jlh {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp one float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; ...then olt.
+define i64 @f11(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f11:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp olt float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; ...then ole.
+define i64 @f12(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f12:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: jhe {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp ole float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; ...then oge.
+define i64 @f13(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f13:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: jle {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp oge float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; ...then ogt.
+define i64 @f14(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f14:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: jl {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp ogt float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; ...then ueq.
+define i64 @f15(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f15:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: jnlh {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp ueq float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; ...then une.
+define i64 @f16(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f16:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: jne {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp une float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; ...then ult.
+define i64 @f17(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f17:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: jnle {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp ult float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; ...then ule.
+define i64 @f18(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f18:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: jnl {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp ule float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; ...then uge.
+define i64 @f19(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f19:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: jnh {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp uge float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; ...then ugt.
+define i64 @f20(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f20:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: jnhe {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp ugt float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/fp-cmp-02.ll b/test/CodeGen/SystemZ/fp-cmp-02.ll
index 309d12e..c61f04e 100644
--- a/test/CodeGen/SystemZ/fp-cmp-02.ll
+++ b/test/CodeGen/SystemZ/fp-cmp-02.ll
@@ -1,6 +1,7 @@
-; Test 64-bit floating-point comparison.
+; Test 64-bit floating-point comparison.  The tests assume a z10 implementation
+; of select, using conditional branches rather than LOCGR.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 declare double @foo()
 
@@ -159,3 +160,16 @@ define i64 @f8(i64 %a, i64 %b, double %f) {
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
 }
+
+; Check the comparison can be reversed if that allows CDB to be used,
+define i64 @f9(i64 %a, i64 %b, double %f2, double *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: cdb %f0, 0(%r4)
+; CHECK-NEXT: jl {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load double *%ptr
+  %cond = fcmp ogt double %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/fp-cmp-03.ll b/test/CodeGen/SystemZ/fp-cmp-03.ll
index 0f71f4e..e777d00 100644
--- a/test/CodeGen/SystemZ/fp-cmp-03.ll
+++ b/test/CodeGen/SystemZ/fp-cmp-03.ll
@@ -1,6 +1,7 @@
-; Test 128-bit floating-point comparison.
+; Test 128-bit floating-point comparison.  The tests assume a z10 implementation
+; of select, using conditional branches rather than LOCGR.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; There is no memory form of 128-bit comparison.
 define i64 @f1(i64 %a, i64 %b, fp128 *%ptr, float %f2) {
diff --git a/test/CodeGen/SystemZ/fp-move-02.ll b/test/CodeGen/SystemZ/fp-move-02.ll
index b4f0428..505ee8d 100644
--- a/test/CodeGen/SystemZ/fp-move-02.ll
+++ b/test/CodeGen/SystemZ/fp-move-02.ll
@@ -1,6 +1,7 @@
-; Test moves between FPRs and GPRs.
+; Test moves between FPRs and GPRs.  The 32-bit cases test the z10
+; implementation, which has no high-word support.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 declare i64 @foo()
 declare double @bar()
@@ -63,11 +64,11 @@ define double @f5(i64 %a) {
 
 ; Test 128-bit moves from GPRs to FPRs.  i128 isn't a legitimate type,
 ; so this goes through memory.
-; FIXME: it would be better to use one MVC here.
 define void @f6(fp128 *%a, i128 *%b) {
 ; CHECK-LABEL: f6:
 ; CHECK: lg
-; CHECK: mvc
+; CHECK: lg
+; CHECK: stg
 ; CHECK: stg
 ; CHECK: br %r14
   %val = load i128 *%b
diff --git a/test/CodeGen/SystemZ/fp-move-09.ll b/test/CodeGen/SystemZ/fp-move-09.ll
new file mode 100644
index 0000000..52b2ee2
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-move-09.ll
@@ -0,0 +1,62 @@
+; Test moves between FPRs and GPRs for z196 and above.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Check that moves from i32s to floats can use high registers.
+define float @f1(i16 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: llhh [[REG:%r[0-5]]], 0(%r2)
+; CHECK: oihh [[REG]], 16256
+; CHECK: ldgr %f0, [[REG]]
+; CHECK: br %r14
+  %base = load i16 *%ptr
+  %ext = zext i16 %base to i32
+  %full = or i32 %ext, 1065353216
+  %res = bitcast i32 %full to float
+  ret float %res
+}
+
+; Check that moves from floats to i32s can use high registers.
+; This "store the low byte" technique is used by llvmpipe, for example.
+define void @f2(float %val, i8 *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: lgdr [[REG:%r[0-5]]], %f0
+; CHECK: stch [[REG]], 0(%r2)
+; CHECK: br %r14
+  %res = bitcast float %val to i32
+  %trunc = trunc i32 %res to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Like f2, but with a conditional store.
+define void @f3(float %val, i8 *%ptr, i32 %which) {
+; CHECK-LABEL: f3:
+; CHECK: cijlh %r3, 0,
+; CHECK: lgdr [[REG:%r[0-5]]], %f0
+; CHECK: stch [[REG]], 0(%r2)
+; CHECK: br %r14
+  %int = bitcast float %val to i32
+  %trunc = trunc i32 %int to i8
+  %old = load i8 *%ptr
+  %cmp = icmp eq i32 %which, 0
+  %res = select i1 %cmp, i8 %trunc, i8 %old
+  store i8 %res, i8 *%ptr
+  ret void
+}
+
+; ...and again with 16-bit memory.
+define void @f4(float %val, i16 *%ptr, i32 %which) {
+; CHECK-LABEL: f4:
+; CHECK: cijlh %r3, 0,
+; CHECK: lgdr [[REG:%r[0-5]]], %f0
+; CHECK: sthh [[REG]], 0(%r2)
+; CHECK: br %r14
+  %int = bitcast float %val to i32
+  %trunc = trunc i32 %int to i16
+  %old = load i16 *%ptr
+  %cmp = icmp eq i32 %which, 0
+  %res = select i1 %cmp, i16 %trunc, i16 %old
+  store i16 %res, i16 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-round-01.ll b/test/CodeGen/SystemZ/fp-round-01.ll
index f2530dc..565db5a 100644
--- a/test/CodeGen/SystemZ/fp-round-01.ll
+++ b/test/CodeGen/SystemZ/fp-round-01.ll
@@ -1,9 +1,8 @@
-; Test rint()-like rounding, with non-integer values triggering an
-; inexact condition.
+; Test rounding functions for z10.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
-; Test f32.
+; Test rint for f32.
 declare float @llvm.rint.f32(float %f)
 define float @f1(float %f) {
 ; CHECK-LABEL: f1:
@@ -13,7 +12,7 @@ define float @f1(float %f) {
   ret float %res
 }
 
-; Test f64.
+; Test rint for f64.
 declare double @llvm.rint.f64(double %f)
 define double @f2(double %f) {
 ; CHECK-LABEL: f2:
@@ -23,7 +22,7 @@ define double @f2(double %f) {
   ret double %res
 }
 
-; Test f128.
+; Test rint for f128.
 declare fp128 @llvm.rint.f128(fp128 %f)
 define void @f3(fp128 *%ptr) {
 ; CHECK-LABEL: f3:
@@ -34,3 +33,118 @@ define void @f3(fp128 *%ptr) {
   store fp128 %res, fp128 *%ptr
   ret void
 }
+
+; Test nearbyint for f32.
+declare float @llvm.nearbyint.f32(float %f)
+define float @f4(float %f) {
+; CHECK-LABEL: f4:
+; CHECK: brasl %r14, nearbyintf@PLT
+; CHECK: br %r14
+  %res = call float @llvm.nearbyint.f32(float %f)
+  ret float %res
+}
+
+; Test nearbyint for f64.
+declare double @llvm.nearbyint.f64(double %f)
+define double @f5(double %f) {
+; CHECK-LABEL: f5:
+; CHECK: brasl %r14, nearbyint@PLT
+; CHECK: br %r14
+  %res = call double @llvm.nearbyint.f64(double %f)
+  ret double %res
+}
+
+; Test nearbyint for f128: omitted for now because we cannot handle
+; indirect arguments.
+
+; Test floor for f32.
+declare float @llvm.floor.f32(float %f)
+define float @f7(float %f) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, floorf@PLT
+; CHECK: br %r14
+  %res = call float @llvm.floor.f32(float %f)
+  ret float %res
+}
+
+; Test floor for f64.
+declare double @llvm.floor.f64(double %f)
+define double @f8(double %f) {
+; CHECK-LABEL: f8:
+; CHECK: brasl %r14, floor@PLT
+; CHECK: br %r14
+  %res = call double @llvm.floor.f64(double %f)
+  ret double %res
+}
+
+; Test floor for f128: omitted for now because we cannot handle
+; indirect arguments.
+
+; Test ceil for f32.
+declare float @llvm.ceil.f32(float %f)
+define float @f10(float %f) {
+; CHECK-LABEL: f10:
+; CHECK: brasl %r14, ceilf@PLT
+; CHECK: br %r14
+  %res = call float @llvm.ceil.f32(float %f)
+  ret float %res
+}
+
+; Test ceil for f64.
+declare double @llvm.ceil.f64(double %f)
+define double @f11(double %f) {
+; CHECK-LABEL: f11:
+; CHECK: brasl %r14, ceil@PLT
+; CHECK: br %r14
+  %res = call double @llvm.ceil.f64(double %f)
+  ret double %res
+}
+
+; Test ceil for f128: omitted for now because we cannot handle
+; indirect arguments.
+
+; Test trunc for f32.
+declare float @llvm.trunc.f32(float %f)
+define float @f13(float %f) {
+; CHECK-LABEL: f13:
+; CHECK: brasl %r14, truncf@PLT
+; CHECK: br %r14
+  %res = call float @llvm.trunc.f32(float %f)
+  ret float %res
+}
+
+; Test trunc for f64.
+declare double @llvm.trunc.f64(double %f)
+define double @f14(double %f) {
+; CHECK-LABEL: f14:
+; CHECK: brasl %r14, trunc@PLT
+; CHECK: br %r14
+  %res = call double @llvm.trunc.f64(double %f)
+  ret double %res
+}
+
+; Test trunc for f128: omitted for now because we cannot handle
+; indirect arguments.
+
+; Test round for f32.
+declare float @llvm.round.f32(float %f)
+define float @f16(float %f) {
+; CHECK-LABEL: f16:
+; CHECK: brasl %r14, roundf@PLT
+; CHECK: br %r14
+  %res = call float @llvm.round.f32(float %f)
+  ret float %res
+}
+
+; Test round for f64.
+declare double @llvm.round.f64(double %f)
+define double @f17(double %f) {
+; CHECK-LABEL: f17:
+; CHECK: brasl %r14, round@PLT
+; CHECK: br %r14
+  %res = call double @llvm.round.f64(double %f)
+  ret double %res
+}
+
+; Test round for f128: omitted for now because we cannot handle
+; indirect arguments.
diff --git a/test/CodeGen/SystemZ/fp-round-02.ll b/test/CodeGen/SystemZ/fp-round-02.ll
new file mode 100644
index 0000000..d79c9c4
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-round-02.ll
@@ -0,0 +1,195 @@
+; Test rounding functions for z196 and above.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Test rint for f32.
+declare float @llvm.rint.f32(float %f)
+define float @f1(float %f) {
+; CHECK-LABEL: f1:
+; CHECK: fiebr %f0, 0, %f0
+; CHECK: br %r14
+  %res = call float @llvm.rint.f32(float %f)
+  ret float %res
+}
+
+; Test rint for f64.
+declare double @llvm.rint.f64(double %f)
+define double @f2(double %f) {
+; CHECK-LABEL: f2:
+; CHECK: fidbr %f0, 0, %f0
+; CHECK: br %r14
+  %res = call double @llvm.rint.f64(double %f)
+  ret double %res
+}
+
+; Test rint for f128.
+declare fp128 @llvm.rint.f128(fp128 %f)
+define void @f3(fp128 *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: fixbr %f0, 0, %f0
+; CHECK: br %r14
+  %src = load fp128 *%ptr
+  %res = call fp128 @llvm.rint.f128(fp128 %src)
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test nearbyint for f32.
+declare float @llvm.nearbyint.f32(float %f)
+define float @f4(float %f) {
+; CHECK-LABEL: f4:
+; CHECK: fiebra %f0, 0, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.nearbyint.f32(float %f)
+  ret float %res
+}
+
+; Test nearbyint for f64.
+declare double @llvm.nearbyint.f64(double %f)
+define double @f5(double %f) {
+; CHECK-LABEL: f5:
+; CHECK: fidbra %f0, 0, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.nearbyint.f64(double %f)
+  ret double %res
+}
+
+; Test nearbyint for f128.
+declare fp128 @llvm.nearbyint.f128(fp128 %f)
+define void @f6(fp128 *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: fixbra %f0, 0, %f0, 4
+; CHECK: br %r14
+  %src = load fp128 *%ptr
+  %res = call fp128 @llvm.nearbyint.f128(fp128 %src)
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test floor for f32.
+declare float @llvm.floor.f32(float %f)
+define float @f7(float %f) {
+; CHECK-LABEL: f7:
+; CHECK: fiebra %f0, 7, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.floor.f32(float %f)
+  ret float %res
+}
+
+; Test floor for f64.
+declare double @llvm.floor.f64(double %f)
+define double @f8(double %f) {
+; CHECK-LABEL: f8:
+; CHECK: fidbra %f0, 7, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.floor.f64(double %f)
+  ret double %res
+}
+
+; Test floor for f128.
+declare fp128 @llvm.floor.f128(fp128 %f)
+define void @f9(fp128 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: fixbra %f0, 7, %f0, 4
+; CHECK: br %r14
+  %src = load fp128 *%ptr
+  %res = call fp128 @llvm.floor.f128(fp128 %src)
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test ceil for f32.
+declare float @llvm.ceil.f32(float %f)
+define float @f10(float %f) {
+; CHECK-LABEL: f10:
+; CHECK: fiebra %f0, 6, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.ceil.f32(float %f)
+  ret float %res
+}
+
+; Test ceil for f64.
+declare double @llvm.ceil.f64(double %f)
+define double @f11(double %f) {
+; CHECK-LABEL: f11:
+; CHECK: fidbra %f0, 6, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.ceil.f64(double %f)
+  ret double %res
+}
+
+; Test ceil for f128.
+declare fp128 @llvm.ceil.f128(fp128 %f)
+define void @f12(fp128 *%ptr) {
+; CHECK-LABEL: f12:
+; CHECK: fixbra %f0, 6, %f0, 4
+; CHECK: br %r14
+  %src = load fp128 *%ptr
+  %res = call fp128 @llvm.ceil.f128(fp128 %src)
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test trunc for f32.
+declare float @llvm.trunc.f32(float %f)
+define float @f13(float %f) {
+; CHECK-LABEL: f13:
+; CHECK: fiebra %f0, 5, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.trunc.f32(float %f)
+  ret float %res
+}
+
+; Test trunc for f64.
+declare double @llvm.trunc.f64(double %f)
+define double @f14(double %f) {
+; CHECK-LABEL: f14:
+; CHECK: fidbra %f0, 5, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.trunc.f64(double %f)
+  ret double %res
+}
+
+; Test trunc for f128.
+declare fp128 @llvm.trunc.f128(fp128 %f)
+define void @f15(fp128 *%ptr) {
+; CHECK-LABEL: f15:
+; CHECK: fixbra %f0, 5, %f0, 4
+; CHECK: br %r14
+  %src = load fp128 *%ptr
+  %res = call fp128 @llvm.trunc.f128(fp128 %src)
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test round for f32.
+declare float @llvm.round.f32(float %f)
+define float @f16(float %f) {
+; CHECK-LABEL: f16:
+; CHECK: fiebra %f0, 1, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.round.f32(float %f)
+  ret float %res
+}
+
+; Test round for f64.
+declare double @llvm.round.f64(double %f)
+define double @f17(double %f) {
+; CHECK-LABEL: f17:
+; CHECK: fidbra %f0, 1, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.round.f64(double %f)
+  ret double %res
+}
+
+; Test round for f128.
+declare fp128 @llvm.round.f128(fp128 %f)
+define void @f18(fp128 *%ptr) {
+; CHECK-LABEL: f18:
+; CHECK: fixbra %f0, 1, %f0, 4
+; CHECK: br %r14
+  %src = load fp128 *%ptr
+  %res = call fp128 @llvm.round.f128(fp128 %src)
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-sqrt-01.ll b/test/CodeGen/SystemZ/fp-sqrt-01.ll
index b6568d6..7465af4 100644
--- a/test/CodeGen/SystemZ/fp-sqrt-01.ll
+++ b/test/CodeGen/SystemZ/fp-sqrt-01.ll
@@ -2,7 +2,8 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
-declare float @llvm.sqrt.f32(float %f)
+declare float @llvm.sqrt.f32(float)
+declare float @sqrtf(float)
 
 ; Check register square root.
 define float @f1(float %val) {
@@ -152,3 +153,17 @@ define void @f7(float *%ptr) {
 
   ret void
 }
+
+; Check that a call to the normal sqrtf function is lowered.
+define float @f8(float %dummy, float %val) {
+; CHECK-LABEL: f8:
+; CHECK: sqebr %f0, %f2
+; CHECK: cebr %f0, %f0
+; CHECK: jo [[LABEL:\.L.*]]
+; CHECK: br %r14
+; CHECK: [[LABEL]]:
+; CHECK: ler %f0, %f2
+; CHECK: jg sqrtf@PLT
+  %res = tail call float @sqrtf(float %val)
+  ret float %res
+}
diff --git a/test/CodeGen/SystemZ/fp-sqrt-02.ll b/test/CodeGen/SystemZ/fp-sqrt-02.ll
index b07a2c6..66ffd19 100644
--- a/test/CodeGen/SystemZ/fp-sqrt-02.ll
+++ b/test/CodeGen/SystemZ/fp-sqrt-02.ll
@@ -3,6 +3,7 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 declare double @llvm.sqrt.f64(double %f)
+declare double @sqrt(double)
 
 ; Check register square root.
 define double @f1(double %val) {
@@ -152,3 +153,17 @@ define void @f7(double *%ptr) {
 
   ret void
 }
+
+; Check that a call to the normal sqrt function is lowered.
+define double @f8(double %dummy, double %val) {
+; CHECK-LABEL: f8:
+; CHECK: sqdbr %f0, %f2
+; CHECK: cdbr %f0, %f0
+; CHECK: jo [[LABEL:\.L.*]]
+; CHECK: br %r14
+; CHECK: [[LABEL]]:
+; CHECK: ldr %f0, %f2
+; CHECK: jg sqrt@PLT
+  %res = tail call double @sqrt(double %val)
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/frame-13.ll b/test/CodeGen/SystemZ/frame-13.ll
index 1d38354..393850f 100644
--- a/test/CodeGen/SystemZ/frame-13.ll
+++ b/test/CodeGen/SystemZ/frame-13.ll
@@ -1,8 +1,11 @@
 ; Test the handling of base + 12-bit displacement addresses for large frames,
-; in cases where no 20-bit form exists.
+; in cases where no 20-bit form exists.  The tests here assume z10 register
+; pressure, without the high words being available.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \
+; RUN:   FileCheck -check-prefix=CHECK-NOFP %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | \
+; RUN:   FileCheck -check-prefix=CHECK-FP %s
 
 ; This file tests what happens when a displacement is converted from
 ; being relative to the start of a frame object to being relative to
@@ -182,17 +185,16 @@ define void @f8() {
 }
 
 ; Check a case where the original displacement is out of range.  The backend
-; should force an LAY from the outset.  We don't yet do any kind of anchor
-; optimization, so there should be no offset on the MVHI itself.
+; should force STY to be used instead.
 define void @f9() {
 ; CHECK-NOFP-LABEL: f9:
-; CHECK-NOFP: lay %r1, 12296(%r15)
-; CHECK-NOFP: mvhi 0(%r1), 42
+; CHECK-NOFP: lhi [[TMP:%r[0-5]]], 42
+; CHECK-NOFP: sty [[TMP]], 12296(%r15)
 ; CHECK-NOFP: br %r14
 ;
 ; CHECK-FP-LABEL: f9:
-; CHECK-FP: lay %r1, 12296(%r11)
-; CHECK-FP: mvhi 0(%r1), 42
+; CHECK-FP: lhi [[TMP:%r[0-5]]], 42
+; CHECK-FP: sty [[TMP]], 12296(%r11)
 ; CHECK-FP: br %r14
   %region1 = alloca [2006 x i32], align 8
   %region2 = alloca [2006 x i32], align 8
diff --git a/test/CodeGen/SystemZ/frame-14.ll b/test/CodeGen/SystemZ/frame-14.ll
index 22a45ee..3b48179 100644
--- a/test/CodeGen/SystemZ/frame-14.ll
+++ b/test/CodeGen/SystemZ/frame-14.ll
@@ -1,9 +1,13 @@
 ; Test the handling of base + displacement addresses for large frames,
 ; in cases where both 12-bit and 20-bit displacements are allowed.
+; The tests here assume z10 register pressure, without the high words
+; being available.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \
+; RUN:   FileCheck -check-prefix=CHECK-NOFP %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | \
+; RUN:   FileCheck -check-prefix=CHECK-FP %s
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
-
 ; This file tests what happens when a displacement is converted from
 ; being relative to the start of a frame object to being relative to
 ; the frame itself.  In some cases the test is only possible if two
diff --git a/test/CodeGen/SystemZ/frame-15.ll b/test/CodeGen/SystemZ/frame-15.ll
index d8b291d..b3c95e7 100644
--- a/test/CodeGen/SystemZ/frame-15.ll
+++ b/test/CodeGen/SystemZ/frame-15.ll
@@ -1,8 +1,11 @@
 ; Test the handling of base + index + 12-bit displacement addresses for
-; large frames, in cases where no 20-bit form exists.
+; large frames, in cases where no 20-bit form exists.  The tests here
+; assume z10 register pressure, without the high words being available.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \
+; RUN:   FileCheck -check-prefix=CHECK-NOFP %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | \
+; RUN:   FileCheck -check-prefix=CHECK-FP %s
 
 declare void @foo(float *%ptr1, float *%ptr2)
 
diff --git a/test/CodeGen/SystemZ/frame-16.ll b/test/CodeGen/SystemZ/frame-16.ll
index 9f43b49..f7e2dfa 100644
--- a/test/CodeGen/SystemZ/frame-16.ll
+++ b/test/CodeGen/SystemZ/frame-16.ll
@@ -1,8 +1,12 @@
 ; Test the handling of base + index + displacement addresses for large frames,
 ; in cases where both 12-bit and 20-bit displacements are allowed.
+; The tests here assume z10 register pressure, without the high words
+; being available.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \
+; RUN:   FileCheck -check-prefix=CHECK-NOFP %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | \
+; RUN:   FileCheck -check-prefix=CHECK-FP %s
 
 ; This file tests what happens when a displacement is converted from
 ; being relative to the start of a frame object to being relative to
diff --git a/test/CodeGen/SystemZ/frame-18.ll b/test/CodeGen/SystemZ/frame-18.ll
index 57d6f7d..21dfc12 100644
--- a/test/CodeGen/SystemZ/frame-18.ll
+++ b/test/CodeGen/SystemZ/frame-18.ll
@@ -1,6 +1,7 @@
-; Test spilling of GPRs.
+; Test spilling of GPRs.  The tests here assume z10 register pressure,
+; without the high words being available.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; We need to allocate a 4-byte spill slot, rounded to 8 bytes.  The frame
 ; size should be exactly 160 + 8 = 168.
diff --git a/test/CodeGen/SystemZ/insert-06.ll b/test/CodeGen/SystemZ/insert-06.ll
index 8366b2c..edcd0c5 100644
--- a/test/CodeGen/SystemZ/insert-06.ll
+++ b/test/CodeGen/SystemZ/insert-06.ll
@@ -165,3 +165,16 @@ define i64 @f13(i64 %a, i32 %b) {
   %or = or i64 %shift, %low
   ret i64 %or
 }
+
+; We previously wrongly removed the upper AND as dead.
+define i64 @f14(i64 %a, i64 %b) {
+; CHECK-LABEL: f14:
+; CHECK: risbg {{%r[0-5]}}, %r2, 6, 134, 0
+; CHECK: br %r14
+  %and1 = and i64 %a, 144115188075855872
+  %and2 = and i64 %b, 15
+  %or = or i64 %and1, %and2
+  %res = icmp eq i64 %or, 0
+  %ext = sext i1 %res to i64
+  ret i64 %ext
+}
diff --git a/test/CodeGen/SystemZ/int-abs-01.ll b/test/CodeGen/SystemZ/int-abs-01.ll
new file mode 100644
index 0000000..40fb611
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-abs-01.ll
@@ -0,0 +1,83 @@
+; Test integer absolute.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test i32->i32 absolute using slt.
+define i32 @f1(i32 %val) {
+; CHECK-LABEL: f1:
+; CHECK: lpr %r2, %r2
+; CHECK: br %r14
+  %cmp = icmp slt i32 %val, 0
+  %neg = sub i32 0, %val
+  %res = select i1 %cmp, i32 %neg, i32 %val
+  ret i32 %res
+}
+
+; Test i32->i32 absolute using sle.
+define i32 @f2(i32 %val) {
+; CHECK-LABEL: f2:
+; CHECK: lpr %r2, %r2
+; CHECK: br %r14
+  %cmp = icmp sle i32 %val, 0
+  %neg = sub i32 0, %val
+  %res = select i1 %cmp, i32 %neg, i32 %val
+  ret i32 %res
+}
+
+; Test i32->i32 absolute using sgt.
+define i32 @f3(i32 %val) {
+; CHECK-LABEL: f3:
+; CHECK: lpr %r2, %r2
+; CHECK: br %r14
+  %cmp = icmp sgt i32 %val, 0
+  %neg = sub i32 0, %val
+  %res = select i1 %cmp, i32 %val, i32 %neg
+  ret i32 %res
+}
+
+; Test i32->i32 absolute using sge.
+define i32 @f4(i32 %val) {
+; CHECK-LABEL: f4:
+; CHECK: lpr %r2, %r2
+; CHECK: br %r14
+  %cmp = icmp sge i32 %val, 0
+  %neg = sub i32 0, %val
+  %res = select i1 %cmp, i32 %val, i32 %neg
+  ret i32 %res
+}
+
+; Test i32->i64 absolute.
+define i64 @f5(i32 %val) {
+; CHECK-LABEL: f5:
+; CHECK: lpgfr %r2, %r2
+; CHECK: br %r14
+  %ext = sext i32 %val to i64
+  %cmp = icmp slt i64 %ext, 0
+  %neg = sub i64 0, %ext
+  %res = select i1 %cmp, i64 %neg, i64 %ext
+  ret i64 %res
+}
+
+; Test i32->i64 absolute that uses an "in-register" form of sign extension.
+define i64 @f6(i64 %val) {
+; CHECK-LABEL: f6:
+; CHECK: lpgfr %r2, %r2
+; CHECK: br %r14
+  %trunc = trunc i64 %val to i32
+  %ext = sext i32 %trunc to i64
+  %cmp = icmp slt i64 %ext, 0
+  %neg = sub i64 0, %ext
+  %res = select i1 %cmp, i64 %neg, i64 %ext
+  ret i64 %res
+}
+
+; Test i64 absolute.
+define i64 @f7(i64 %val) {
+; CHECK-LABEL: f7:
+; CHECK: lpgr %r2, %r2
+; CHECK: br %r14
+  %cmp = icmp slt i64 %val, 0
+  %neg = sub i64 0, %val
+  %res = select i1 %cmp, i64 %neg, i64 %val
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/int-add-09.ll b/test/CodeGen/SystemZ/int-add-09.ll
index 717fed0..fd151a7 100644
--- a/test/CodeGen/SystemZ/int-add-09.ll
+++ b/test/CodeGen/SystemZ/int-add-09.ll
@@ -7,7 +7,7 @@
 define void @f1(i128 *%aptr) {
 ; CHECK-LABEL: f1:
 ; CHECK: algfi {{%r[0-5]}}, 1
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 128
@@ -20,7 +20,7 @@ define void @f1(i128 *%aptr) {
 define void @f2(i128 *%aptr) {
 ; CHECK-LABEL: f2:
 ; CHECK: algfi {{%r[0-5]}}, 4294967295
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 128
@@ -33,7 +33,7 @@ define void @f2(i128 *%aptr) {
 define void @f3(i128 *%aptr) {
 ; CHECK-LABEL: f3:
 ; CHECK: algr
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 128
@@ -46,7 +46,7 @@ define void @f3(i128 *%aptr) {
 define void @f4(i128 *%aptr) {
 ; CHECK-LABEL: f4:
 ; CHECK: algr
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 128
diff --git a/test/CodeGen/SystemZ/int-add-10.ll b/test/CodeGen/SystemZ/int-add-10.ll
index 66a275b..01d0a66 100644
--- a/test/CodeGen/SystemZ/int-add-10.ll
+++ b/test/CodeGen/SystemZ/int-add-10.ll
@@ -7,7 +7,7 @@
 define void @f1(i128 *%aptr, i32 %b) {
 ; CHECK-LABEL: f1:
 ; CHECK: algfr {{%r[0-5]}}, %r3
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 127
@@ -21,7 +21,7 @@ define void @f1(i128 *%aptr, i32 %b) {
 define void @f2(i128 *%aptr, i64 %b) {
 ; CHECK-LABEL: f2:
 ; CHECK: algfr {{%r[0-5]}}, %r3
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 127
@@ -37,7 +37,7 @@ define void @f2(i128 *%aptr, i64 %b) {
 define void @f3(i128 *%aptr, i64 %b) {
 ; CHECK-LABEL: f3:
 ; CHECK: algfr {{%r[0-5]}}, %r3
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 127
@@ -52,7 +52,7 @@ define void @f3(i128 *%aptr, i64 %b) {
 define void @f4(i128 *%aptr, i32 *%bsrc) {
 ; CHECK-LABEL: f4:
 ; CHECK: algf {{%r[0-5]}}, 0(%r3)
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 127
@@ -67,7 +67,7 @@ define void @f4(i128 *%aptr, i32 *%bsrc) {
 define void @f5(i128 *%aptr, i32 *%bsrc) {
 ; CHECK-LABEL: f5:
 ; CHECK: algf {{%r[0-5]}}, 524284(%r3)
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 127
@@ -85,7 +85,7 @@ define void @f6(i128 *%aptr, i32 *%bsrc) {
 ; CHECK-LABEL: f6:
 ; CHECK: agfi %r3, 524288
 ; CHECK: algf {{%r[0-5]}}, 0(%r3)
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 127
@@ -101,7 +101,7 @@ define void @f6(i128 *%aptr, i32 *%bsrc) {
 define void @f7(i128 *%aptr, i32 *%bsrc) {
 ; CHECK-LABEL: f7:
 ; CHECK: algf {{%r[0-5]}}, -4(%r3)
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 127
@@ -117,7 +117,7 @@ define void @f7(i128 *%aptr, i32 *%bsrc) {
 define void @f8(i128 *%aptr, i32 *%bsrc) {
 ; CHECK-LABEL: f8:
 ; CHECK: algf {{%r[0-5]}}, -524288(%r3)
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 127
@@ -135,7 +135,7 @@ define void @f9(i128 *%aptr, i32 *%bsrc) {
 ; CHECK-LABEL: f9:
 ; CHECK: agfi %r3, -524292
 ; CHECK: algf {{%r[0-5]}}, 0(%r3)
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 127
diff --git a/test/CodeGen/SystemZ/int-add-11.ll b/test/CodeGen/SystemZ/int-add-11.ll
index 6c617ba..679c206 100644
--- a/test/CodeGen/SystemZ/int-add-11.ll
+++ b/test/CodeGen/SystemZ/int-add-11.ll
@@ -1,6 +1,7 @@
-; Test 32-bit additions of constants to memory.
+; Test 32-bit additions of constants to memory.  The tests here
+; assume z10 register pressure, without the high words being available.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Check additions of 1.
 define void @f1(i32 *%ptr) {
@@ -126,3 +127,169 @@ define void @f10(i64 %base, i64 %index) {
   store i32 %add, i32 *%ptr
   ret void
 }
+
+; Check that adding 127 to a spilled value can use ASI.
+define void @f11(i32 *%ptr, i32 %sel) {
+; CHECK-LABEL: f11:
+; CHECK: asi {{[0-9]+}}(%r15), 127
+; CHECK: br %r14
+entry:
+  %val0 = load volatile i32 *%ptr
+  %val1 = load volatile i32 *%ptr
+  %val2 = load volatile i32 *%ptr
+  %val3 = load volatile i32 *%ptr
+  %val4 = load volatile i32 *%ptr
+  %val5 = load volatile i32 *%ptr
+  %val6 = load volatile i32 *%ptr
+  %val7 = load volatile i32 *%ptr
+  %val8 = load volatile i32 *%ptr
+  %val9 = load volatile i32 *%ptr
+  %val10 = load volatile i32 *%ptr
+  %val11 = load volatile i32 *%ptr
+  %val12 = load volatile i32 *%ptr
+  %val13 = load volatile i32 *%ptr
+  %val14 = load volatile i32 *%ptr
+  %val15 = load volatile i32 *%ptr
+
+  %test = icmp ne i32 %sel, 0
+  br i1 %test, label %add, label %store
+
+add:
+  %add0 = add i32 %val0, 127
+  %add1 = add i32 %val1, 127
+  %add2 = add i32 %val2, 127
+  %add3 = add i32 %val3, 127
+  %add4 = add i32 %val4, 127
+  %add5 = add i32 %val5, 127
+  %add6 = add i32 %val6, 127
+  %add7 = add i32 %val7, 127
+  %add8 = add i32 %val8, 127
+  %add9 = add i32 %val9, 127
+  %add10 = add i32 %val10, 127
+  %add11 = add i32 %val11, 127
+  %add12 = add i32 %val12, 127
+  %add13 = add i32 %val13, 127
+  %add14 = add i32 %val14, 127
+  %add15 = add i32 %val15, 127
+  br label %store
+
+store:
+  %new0 = phi i32 [ %val0, %entry ], [ %add0, %add ]
+  %new1 = phi i32 [ %val1, %entry ], [ %add1, %add ]
+  %new2 = phi i32 [ %val2, %entry ], [ %add2, %add ]
+  %new3 = phi i32 [ %val3, %entry ], [ %add3, %add ]
+  %new4 = phi i32 [ %val4, %entry ], [ %add4, %add ]
+  %new5 = phi i32 [ %val5, %entry ], [ %add5, %add ]
+  %new6 = phi i32 [ %val6, %entry ], [ %add6, %add ]
+  %new7 = phi i32 [ %val7, %entry ], [ %add7, %add ]
+  %new8 = phi i32 [ %val8, %entry ], [ %add8, %add ]
+  %new9 = phi i32 [ %val9, %entry ], [ %add9, %add ]
+  %new10 = phi i32 [ %val10, %entry ], [ %add10, %add ]
+  %new11 = phi i32 [ %val11, %entry ], [ %add11, %add ]
+  %new12 = phi i32 [ %val12, %entry ], [ %add12, %add ]
+  %new13 = phi i32 [ %val13, %entry ], [ %add13, %add ]
+  %new14 = phi i32 [ %val14, %entry ], [ %add14, %add ]
+  %new15 = phi i32 [ %val15, %entry ], [ %add15, %add ]
+
+  store volatile i32 %new0, i32 *%ptr
+  store volatile i32 %new1, i32 *%ptr
+  store volatile i32 %new2, i32 *%ptr
+  store volatile i32 %new3, i32 *%ptr
+  store volatile i32 %new4, i32 *%ptr
+  store volatile i32 %new5, i32 *%ptr
+  store volatile i32 %new6, i32 *%ptr
+  store volatile i32 %new7, i32 *%ptr
+  store volatile i32 %new8, i32 *%ptr
+  store volatile i32 %new9, i32 *%ptr
+  store volatile i32 %new10, i32 *%ptr
+  store volatile i32 %new11, i32 *%ptr
+  store volatile i32 %new12, i32 *%ptr
+  store volatile i32 %new13, i32 *%ptr
+  store volatile i32 %new14, i32 *%ptr
+  store volatile i32 %new15, i32 *%ptr
+
+  ret void
+}
+
+; Check that adding -128 to a spilled value can use ASI.
+define void @f12(i32 *%ptr, i32 %sel) {
+; CHECK-LABEL: f12:
+; CHECK: asi {{[0-9]+}}(%r15), -128
+; CHECK: br %r14
+entry:
+  %val0 = load volatile i32 *%ptr
+  %val1 = load volatile i32 *%ptr
+  %val2 = load volatile i32 *%ptr
+  %val3 = load volatile i32 *%ptr
+  %val4 = load volatile i32 *%ptr
+  %val5 = load volatile i32 *%ptr
+  %val6 = load volatile i32 *%ptr
+  %val7 = load volatile i32 *%ptr
+  %val8 = load volatile i32 *%ptr
+  %val9 = load volatile i32 *%ptr
+  %val10 = load volatile i32 *%ptr
+  %val11 = load volatile i32 *%ptr
+  %val12 = load volatile i32 *%ptr
+  %val13 = load volatile i32 *%ptr
+  %val14 = load volatile i32 *%ptr
+  %val15 = load volatile i32 *%ptr
+
+  %test = icmp ne i32 %sel, 0
+  br i1 %test, label %add, label %store
+
+add:
+  %add0 = add i32 %val0, -128
+  %add1 = add i32 %val1, -128
+  %add2 = add i32 %val2, -128
+  %add3 = add i32 %val3, -128
+  %add4 = add i32 %val4, -128
+  %add5 = add i32 %val5, -128
+  %add6 = add i32 %val6, -128
+  %add7 = add i32 %val7, -128
+  %add8 = add i32 %val8, -128
+  %add9 = add i32 %val9, -128
+  %add10 = add i32 %val10, -128
+  %add11 = add i32 %val11, -128
+  %add12 = add i32 %val12, -128
+  %add13 = add i32 %val13, -128
+  %add14 = add i32 %val14, -128
+  %add15 = add i32 %val15, -128
+  br label %store
+
+store:
+  %new0 = phi i32 [ %val0, %entry ], [ %add0, %add ]
+  %new1 = phi i32 [ %val1, %entry ], [ %add1, %add ]
+  %new2 = phi i32 [ %val2, %entry ], [ %add2, %add ]
+  %new3 = phi i32 [ %val3, %entry ], [ %add3, %add ]
+  %new4 = phi i32 [ %val4, %entry ], [ %add4, %add ]
+  %new5 = phi i32 [ %val5, %entry ], [ %add5, %add ]
+  %new6 = phi i32 [ %val6, %entry ], [ %add6, %add ]
+  %new7 = phi i32 [ %val7, %entry ], [ %add7, %add ]
+  %new8 = phi i32 [ %val8, %entry ], [ %add8, %add ]
+  %new9 = phi i32 [ %val9, %entry ], [ %add9, %add ]
+  %new10 = phi i32 [ %val10, %entry ], [ %add10, %add ]
+  %new11 = phi i32 [ %val11, %entry ], [ %add11, %add ]
+  %new12 = phi i32 [ %val12, %entry ], [ %add12, %add ]
+  %new13 = phi i32 [ %val13, %entry ], [ %add13, %add ]
+  %new14 = phi i32 [ %val14, %entry ], [ %add14, %add ]
+  %new15 = phi i32 [ %val15, %entry ], [ %add15, %add ]
+
+  store volatile i32 %new0, i32 *%ptr
+  store volatile i32 %new1, i32 *%ptr
+  store volatile i32 %new2, i32 *%ptr
+  store volatile i32 %new3, i32 *%ptr
+  store volatile i32 %new4, i32 *%ptr
+  store volatile i32 %new5, i32 *%ptr
+  store volatile i32 %new6, i32 *%ptr
+  store volatile i32 %new7, i32 *%ptr
+  store volatile i32 %new8, i32 *%ptr
+  store volatile i32 %new9, i32 *%ptr
+  store volatile i32 %new10, i32 *%ptr
+  store volatile i32 %new11, i32 *%ptr
+  store volatile i32 %new12, i32 *%ptr
+  store volatile i32 %new13, i32 *%ptr
+  store volatile i32 %new14, i32 *%ptr
+  store volatile i32 %new15, i32 *%ptr
+
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-add-12.ll b/test/CodeGen/SystemZ/int-add-12.ll
index ef4dc39..741cce1 100644
--- a/test/CodeGen/SystemZ/int-add-12.ll
+++ b/test/CodeGen/SystemZ/int-add-12.ll
@@ -126,3 +126,169 @@ define void @f10(i64 %base, i64 %index) {
   store i64 %add, i64 *%ptr
   ret void
 }
+
+; Check that adding 127 to a spilled value can use AGSI.
+define void @f11(i64 *%ptr, i32 %sel) {
+; CHECK-LABEL: f11:
+; CHECK: agsi {{[0-9]+}}(%r15), 127
+; CHECK: br %r14
+entry:
+  %val0 = load volatile i64 *%ptr
+  %val1 = load volatile i64 *%ptr
+  %val2 = load volatile i64 *%ptr
+  %val3 = load volatile i64 *%ptr
+  %val4 = load volatile i64 *%ptr
+  %val5 = load volatile i64 *%ptr
+  %val6 = load volatile i64 *%ptr
+  %val7 = load volatile i64 *%ptr
+  %val8 = load volatile i64 *%ptr
+  %val9 = load volatile i64 *%ptr
+  %val10 = load volatile i64 *%ptr
+  %val11 = load volatile i64 *%ptr
+  %val12 = load volatile i64 *%ptr
+  %val13 = load volatile i64 *%ptr
+  %val14 = load volatile i64 *%ptr
+  %val15 = load volatile i64 *%ptr
+
+  %test = icmp ne i32 %sel, 0
+  br i1 %test, label %add, label %store
+
+add:
+  %add0 = add i64 %val0, 127
+  %add1 = add i64 %val1, 127
+  %add2 = add i64 %val2, 127
+  %add3 = add i64 %val3, 127
+  %add4 = add i64 %val4, 127
+  %add5 = add i64 %val5, 127
+  %add6 = add i64 %val6, 127
+  %add7 = add i64 %val7, 127
+  %add8 = add i64 %val8, 127
+  %add9 = add i64 %val9, 127
+  %add10 = add i64 %val10, 127
+  %add11 = add i64 %val11, 127
+  %add12 = add i64 %val12, 127
+  %add13 = add i64 %val13, 127
+  %add14 = add i64 %val14, 127
+  %add15 = add i64 %val15, 127
+  br label %store
+
+store:
+  %new0 = phi i64 [ %val0, %entry ], [ %add0, %add ]
+  %new1 = phi i64 [ %val1, %entry ], [ %add1, %add ]
+  %new2 = phi i64 [ %val2, %entry ], [ %add2, %add ]
+  %new3 = phi i64 [ %val3, %entry ], [ %add3, %add ]
+  %new4 = phi i64 [ %val4, %entry ], [ %add4, %add ]
+  %new5 = phi i64 [ %val5, %entry ], [ %add5, %add ]
+  %new6 = phi i64 [ %val6, %entry ], [ %add6, %add ]
+  %new7 = phi i64 [ %val7, %entry ], [ %add7, %add ]
+  %new8 = phi i64 [ %val8, %entry ], [ %add8, %add ]
+  %new9 = phi i64 [ %val9, %entry ], [ %add9, %add ]
+  %new10 = phi i64 [ %val10, %entry ], [ %add10, %add ]
+  %new11 = phi i64 [ %val11, %entry ], [ %add11, %add ]
+  %new12 = phi i64 [ %val12, %entry ], [ %add12, %add ]
+  %new13 = phi i64 [ %val13, %entry ], [ %add13, %add ]
+  %new14 = phi i64 [ %val14, %entry ], [ %add14, %add ]
+  %new15 = phi i64 [ %val15, %entry ], [ %add15, %add ]
+
+  store volatile i64 %new0, i64 *%ptr
+  store volatile i64 %new1, i64 *%ptr
+  store volatile i64 %new2, i64 *%ptr
+  store volatile i64 %new3, i64 *%ptr
+  store volatile i64 %new4, i64 *%ptr
+  store volatile i64 %new5, i64 *%ptr
+  store volatile i64 %new6, i64 *%ptr
+  store volatile i64 %new7, i64 *%ptr
+  store volatile i64 %new8, i64 *%ptr
+  store volatile i64 %new9, i64 *%ptr
+  store volatile i64 %new10, i64 *%ptr
+  store volatile i64 %new11, i64 *%ptr
+  store volatile i64 %new12, i64 *%ptr
+  store volatile i64 %new13, i64 *%ptr
+  store volatile i64 %new14, i64 *%ptr
+  store volatile i64 %new15, i64 *%ptr
+
+  ret void
+}
+
+; Check that adding -128 to a spilled value can use AGSI.
+define void @f12(i64 *%ptr, i32 %sel) {
+; CHECK-LABEL: f12:
+; CHECK: agsi {{[0-9]+}}(%r15), -128
+; CHECK: br %r14
+entry:
+  %val0 = load volatile i64 *%ptr
+  %val1 = load volatile i64 *%ptr
+  %val2 = load volatile i64 *%ptr
+  %val3 = load volatile i64 *%ptr
+  %val4 = load volatile i64 *%ptr
+  %val5 = load volatile i64 *%ptr
+  %val6 = load volatile i64 *%ptr
+  %val7 = load volatile i64 *%ptr
+  %val8 = load volatile i64 *%ptr
+  %val9 = load volatile i64 *%ptr
+  %val10 = load volatile i64 *%ptr
+  %val11 = load volatile i64 *%ptr
+  %val12 = load volatile i64 *%ptr
+  %val13 = load volatile i64 *%ptr
+  %val14 = load volatile i64 *%ptr
+  %val15 = load volatile i64 *%ptr
+
+  %test = icmp ne i32 %sel, 0
+  br i1 %test, label %add, label %store
+
+add:
+  %add0 = add i64 %val0, -128
+  %add1 = add i64 %val1, -128
+  %add2 = add i64 %val2, -128
+  %add3 = add i64 %val3, -128
+  %add4 = add i64 %val4, -128
+  %add5 = add i64 %val5, -128
+  %add6 = add i64 %val6, -128
+  %add7 = add i64 %val7, -128
+  %add8 = add i64 %val8, -128
+  %add9 = add i64 %val9, -128
+  %add10 = add i64 %val10, -128
+  %add11 = add i64 %val11, -128
+  %add12 = add i64 %val12, -128
+  %add13 = add i64 %val13, -128
+  %add14 = add i64 %val14, -128
+  %add15 = add i64 %val15, -128
+  br label %store
+
+store:
+  %new0 = phi i64 [ %val0, %entry ], [ %add0, %add ]
+  %new1 = phi i64 [ %val1, %entry ], [ %add1, %add ]
+  %new2 = phi i64 [ %val2, %entry ], [ %add2, %add ]
+  %new3 = phi i64 [ %val3, %entry ], [ %add3, %add ]
+  %new4 = phi i64 [ %val4, %entry ], [ %add4, %add ]
+  %new5 = phi i64 [ %val5, %entry ], [ %add5, %add ]
+  %new6 = phi i64 [ %val6, %entry ], [ %add6, %add ]
+  %new7 = phi i64 [ %val7, %entry ], [ %add7, %add ]
+  %new8 = phi i64 [ %val8, %entry ], [ %add8, %add ]
+  %new9 = phi i64 [ %val9, %entry ], [ %add9, %add ]
+  %new10 = phi i64 [ %val10, %entry ], [ %add10, %add ]
+  %new11 = phi i64 [ %val11, %entry ], [ %add11, %add ]
+  %new12 = phi i64 [ %val12, %entry ], [ %add12, %add ]
+  %new13 = phi i64 [ %val13, %entry ], [ %add13, %add ]
+  %new14 = phi i64 [ %val14, %entry ], [ %add14, %add ]
+  %new15 = phi i64 [ %val15, %entry ], [ %add15, %add ]
+
+  store volatile i64 %new0, i64 *%ptr
+  store volatile i64 %new1, i64 *%ptr
+  store volatile i64 %new2, i64 *%ptr
+  store volatile i64 %new3, i64 *%ptr
+  store volatile i64 %new4, i64 *%ptr
+  store volatile i64 %new5, i64 *%ptr
+  store volatile i64 %new6, i64 *%ptr
+  store volatile i64 %new7, i64 *%ptr
+  store volatile i64 %new8, i64 *%ptr
+  store volatile i64 %new9, i64 *%ptr
+  store volatile i64 %new10, i64 *%ptr
+  store volatile i64 %new11, i64 *%ptr
+  store volatile i64 %new12, i64 *%ptr
+  store volatile i64 %new13, i64 *%ptr
+  store volatile i64 %new14, i64 *%ptr
+  store volatile i64 %new15, i64 *%ptr
+
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-01.ll b/test/CodeGen/SystemZ/int-cmp-01.ll
index dbfe0df..6653b6f 100644
--- a/test/CodeGen/SystemZ/int-cmp-01.ll
+++ b/test/CodeGen/SystemZ/int-cmp-01.ll
@@ -149,3 +149,17 @@ define void @f10(i32 %lhs, i64 %base, i64 %index, i32 *%dst) {
   store i32 %res, i32 *%dst
   ret void
 }
+
+; Check the comparison can be reversed if that allows CH to be used.
+define double @f11(double %a, double %b, i32 %rhs, i16 *%src) {
+; CHECK-LABEL: f11:
+; CHECK: ch %r2, 0(%r3)
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %half = load i16 *%src
+  %lhs = sext i16 %half to i32
+  %cond = icmp slt i32 %lhs, %rhs
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-02.ll b/test/CodeGen/SystemZ/int-cmp-02.ll
index 26e1391..4a8a1a9 100644
--- a/test/CodeGen/SystemZ/int-cmp-02.ll
+++ b/test/CodeGen/SystemZ/int-cmp-02.ll
@@ -181,3 +181,16 @@ while.body:
 while.end:
   ret void
 }
+
+; Check the comparison can be reversed if that allows C to be used.
+define double @f13(double %a, double %b, i32 %i2, i32 *%ptr) {
+; CHECK-LABEL: f13:
+; CHECK: c %r2, 0(%r3)
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %i1 = load i32 *%ptr
+  %cond = icmp slt i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-03.ll b/test/CodeGen/SystemZ/int-cmp-03.ll
index 2d679cf..aa654e0 100644
--- a/test/CodeGen/SystemZ/int-cmp-03.ll
+++ b/test/CodeGen/SystemZ/int-cmp-03.ll
@@ -5,8 +5,7 @@
 ; Check register comparison.
 define double @f1(double %a, double %b, i32 %i1, i32 %i2) {
 ; CHECK-LABEL: f1:
-; CHECK: clr %r2, %r3
-; CHECK-NEXT: jl
+; CHECK: clrjl %r2, %r3
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ult i32 %i1, %i2
@@ -160,3 +159,16 @@ define double @f11(double %a, double %b, i32 %i1, i64 %base, i64 %index) {
   %res = select i1 %cond, double %a, double %b
   ret double %res
 }
+
+; Check the comparison can be reversed if that allows CL to be used.
+define double @f12(double %a, double %b, i32 %i2, i32 *%ptr) {
+; CHECK-LABEL: f12:
+; CHECK: cl %r2, 0(%r3)
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %i1 = load i32 *%ptr
+  %cond = icmp ult i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-04.ll b/test/CodeGen/SystemZ/int-cmp-04.ll
index 54c4b5b..a6606f3 100644
--- a/test/CodeGen/SystemZ/int-cmp-04.ll
+++ b/test/CodeGen/SystemZ/int-cmp-04.ll
@@ -105,3 +105,17 @@ define void @f7(i64 %lhs, i64 %base, i64 %index, i64 *%dst) {
   store i64 %res, i64 *%dst
   ret void
 }
+
+; Check the comparison can be reversed if that allows CGH to be used.
+define double @f8(double %a, double %b, i64 %rhs, i16 *%src) {
+; CHECK-LABEL: f8:
+; CHECK: cgh %r2, 0(%r3)
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %half = load i16 *%src
+  %lhs = sext i16 %half to i64
+  %cond = icmp slt i64 %lhs, %rhs
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-05.ll b/test/CodeGen/SystemZ/int-cmp-05.ll
index 36d12a5..f15b76b 100644
--- a/test/CodeGen/SystemZ/int-cmp-05.ll
+++ b/test/CodeGen/SystemZ/int-cmp-05.ll
@@ -54,7 +54,7 @@ define double @f4(double %a, double %b, i64 %i1, i32 %unext) {
   ret double %res
 }
 
-; Check signed comparisonn with memory.
+; Check signed comparison with memory.
 define double @f5(double %a, double %b, i64 %i1, i32 *%ptr) {
 ; CHECK-LABEL: f5:
 ; CHECK: cgf %r2, 0(%r3)
@@ -290,3 +290,17 @@ define i64 @f15(i32 *%ptr0) {
 
   ret i64 %sel9
 }
+
+; Check the comparison can be reversed if that allows CGF to be used.
+define double @f16(double %a, double %b, i64 %i2, i32 *%ptr) {
+; CHECK-LABEL: f16:
+; CHECK: cgf %r2, 0(%r3)
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %unext = load i32 *%ptr
+  %i1 = sext i32 %unext to i64
+  %cond = icmp slt i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-06.ll b/test/CodeGen/SystemZ/int-cmp-06.ll
index cdd6114..8ab62e8 100644
--- a/test/CodeGen/SystemZ/int-cmp-06.ll
+++ b/test/CodeGen/SystemZ/int-cmp-06.ll
@@ -104,7 +104,7 @@ define double @f8(double %a, double %b, i64 %i1, i64 %unext) {
   ret double %res
 }
 
-; Check unsigned comparisonn with memory.
+; Check unsigned comparison with memory.
 define double @f9(double %a, double %b, i64 %i1, i32 *%ptr) {
 ; CHECK-LABEL: f9:
 ; CHECK: clgf %r2, 0(%r3)
@@ -340,3 +340,17 @@ define i64 @f19(i32 *%ptr0) {
 
   ret i64 %sel9
 }
+
+; Check the comparison can be reversed if that allows CLGF to be used.
+define double @f20(double %a, double %b, i64 %i2, i32 *%ptr) {
+; CHECK-LABEL: f20:
+; CHECK: clgf %r2, 0(%r3)
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %unext = load i32 *%ptr
+  %i1 = zext i32 %unext to i64
+  %cond = icmp ult i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-07.ll b/test/CodeGen/SystemZ/int-cmp-07.ll
index 3308cb0..530d178 100644
--- a/test/CodeGen/SystemZ/int-cmp-07.ll
+++ b/test/CodeGen/SystemZ/int-cmp-07.ll
@@ -115,3 +115,16 @@ define double @f8(double %a, double %b, i64 %i1, i64 %base, i64 %index) {
   %res = select i1 %cond, double %a, double %b
   ret double %res
 }
+
+; Check the comparison can be reversed if that allows CG to be used.
+define double @f9(double %a, double %b, i64 %i2, i64 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: cg %r2, 0(%r3)
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %i1 = load i64 *%ptr
+  %cond = icmp slt i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-08.ll b/test/CodeGen/SystemZ/int-cmp-08.ll
index e68a0fe..ebf158a 100644
--- a/test/CodeGen/SystemZ/int-cmp-08.ll
+++ b/test/CodeGen/SystemZ/int-cmp-08.ll
@@ -5,8 +5,7 @@
 ; Check CLGR.
 define double @f1(double %a, double %b, i64 %i1, i64 %i2) {
 ; CHECK-LABEL: f1:
-; CHECK: clgr %r2, %r3
-; CHECK-NEXT: jl
+; CHECK: clgrjl %r2, %r3
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ult i64 %i1, %i2
@@ -116,3 +115,16 @@ define double @f8(double %a, double %b, i64 %i1, i64 %base, i64 %index) {
   %res = select i1 %cond, double %a, double %b
   ret double %res
 }
+
+; Check the comparison can be reversed if that allows CLG to be used.
+define double @f9(double %a, double %b, i64 %i2, i64 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: clg %r2, 0(%r3)
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %i1 = load i64 *%ptr
+  %cond = icmp ult i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-10.ll b/test/CodeGen/SystemZ/int-cmp-10.ll
index e30e014..4d4c4bb 100644
--- a/test/CodeGen/SystemZ/int-cmp-10.ll
+++ b/test/CodeGen/SystemZ/int-cmp-10.ll
@@ -2,12 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
-; Check a value near the low end of the range.  We use CFI for comparisons
-; with zero, or things that are equivalent to them.
+; Check a value near the low end of the range.  We use signed forms for
+; comparisons with zero, or things that are equivalent to them.
 define double @f1(double %a, double %b, i32 %i1) {
 ; CHECK-LABEL: f1:
-; CHECK: clfi %r2, 1
-; CHECK-NEXT: jh
+; CHECK: clijh %r2, 1
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ugt i32 %i1, 1
@@ -15,9 +14,32 @@ define double @f1(double %a, double %b, i32 %i1) {
   ret double %res
 }
 
-; Check a value near the high end of the range.
+; Check the top of the CLIJ range.
 define double @f2(double %a, double %b, i32 %i1) {
 ; CHECK-LABEL: f2:
+; CHECK: clijl %r2, 255
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ult i32 %i1, 255
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which needs a separate comparison.
+define double @f3(double %a, double %b, i32 %i1) {
+; CHECK-LABEL: f3:
+; CHECK: clfi %r2, 256
+; CHECK: jl
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ult i32 %i1, 256
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check a value near the high end of the range.
+define double @f4(double %a, double %b, i32 %i1) {
+; CHECK-LABEL: f4:
 ; CHECK: clfi %r2, 4294967280
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
diff --git a/test/CodeGen/SystemZ/int-cmp-12.ll b/test/CodeGen/SystemZ/int-cmp-12.ll
index f57f6ec..077b224 100644
--- a/test/CodeGen/SystemZ/int-cmp-12.ll
+++ b/test/CodeGen/SystemZ/int-cmp-12.ll
@@ -2,12 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
-; Check a value near the low end of the range.  We use CGFI for comparisons
-; with zero, or things that are equivalent to them.
+; Check a value near the low end of the range.  We use signed forms for
+; comparisons with zero, or things that are equivalent to them.
 define double @f1(double %a, double %b, i64 %i1) {
 ; CHECK-LABEL: f1:
-; CHECK: clgfi %r2, 1
-; CHECK-NEXT: jh
+; CHECK: clgijh %r2, 1
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ugt i64 %i1, 1
@@ -15,9 +14,32 @@ define double @f1(double %a, double %b, i64 %i1) {
   ret double %res
 }
 
-; Check the high end of the CLGFI range.
+; Check the top of the CLGIJ range.
 define double @f2(double %a, double %b, i64 %i1) {
 ; CHECK-LABEL: f2:
+; CHECK: clgijl %r2, 255
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ult i64 %i1, 255
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which needs a separate comparison.
+define double @f3(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f3:
+; CHECK: clgfi %r2, 256
+; CHECK: jl
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ult i64 %i1, 256
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CLGFI range.
+define double @f4(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f4:
 ; CHECK: clgfi %r2, 4294967295
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
@@ -28,10 +50,9 @@ define double @f2(double %a, double %b, i64 %i1) {
 }
 
 ; Check the next value up, which must use a register comparison.
-define double @f3(double %a, double %b, i64 %i1) {
-; CHECK-LABEL: f3:
-; CHECK: clgr %r2,
-; CHECK-NEXT: jl
+define double @f5(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f5:
+; CHECK: clgrjl %r2,
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ult i64 %i1, 4294967296
diff --git a/test/CodeGen/SystemZ/int-cmp-20.ll b/test/CodeGen/SystemZ/int-cmp-20.ll
index 7ecde77..98c41cd 100644
--- a/test/CodeGen/SystemZ/int-cmp-20.ll
+++ b/test/CodeGen/SystemZ/int-cmp-20.ll
@@ -63,7 +63,7 @@ define double @f4(double %a, double %b, i8 *%ptr) {
 ; extension.  The condition is always true.
 define double @f5(double %a, double %b, i8 *%ptr) {
 ; CHECK-LABEL: f5:
-; CHECK-NOT: cli
+; CHECK-NOT: cli {{.*}}
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = zext i8 %val to i32
@@ -79,7 +79,7 @@ define double @f5(double %a, double %b, i8 *%ptr) {
 ; and simply ignore CLI for this range.  First check the low end of the range.
 define double @f6(double %a, double %b, i8 *%ptr) {
 ; CHECK-LABEL: f6:
-; CHECK-NOT: cli
+; CHECK-NOT: cli {{.*}}
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i32
@@ -91,7 +91,7 @@ define double @f6(double %a, double %b, i8 *%ptr) {
 ; ...and then the high end.
 define double @f7(double %a, double %b, i8 *%ptr) {
 ; CHECK-LABEL: f7:
-; CHECK-NOT: cli
+; CHECK-NOT: cli {{.*}}
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i32
@@ -118,7 +118,7 @@ define double @f8(double %a, double %b, i8 *%ptr) {
 ; extension.  This cannot use CLI.
 define double @f9(double %a, double %b, i8 *%ptr) {
 ; CHECK-LABEL: f9:
-; CHECK-NOT: cli
+; CHECK-NOT: cli {{.*}}
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i32
@@ -145,7 +145,7 @@ define double @f10(double %a, double %b, i8 *%ptr) {
 ; extension.  This cannot use CLI.
 define double @f11(double %a, double %b, i8 *%ptr) {
 ; CHECK-LABEL: f11:
-; CHECK-NOT: cli
+; CHECK-NOT: cli {{.*}}
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i32
@@ -158,7 +158,7 @@ define double @f11(double %a, double %b, i8 *%ptr) {
 ; extension.  The condition is always true.
 define double @f12(double %a, double %b, i8 *%ptr) {
 ; CHECK-LABEL: f12:
-; CHECK-NOT: cli
+; CHECK-NOT: cli {{.*}}
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = zext i8 %val to i32
diff --git a/test/CodeGen/SystemZ/int-cmp-36.ll b/test/CodeGen/SystemZ/int-cmp-36.ll
index 831b05f..fa2d4bf 100644
--- a/test/CodeGen/SystemZ/int-cmp-36.ll
+++ b/test/CodeGen/SystemZ/int-cmp-36.ll
@@ -100,3 +100,22 @@ exit:
   %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
   ret i32 %res
 }
+
+; Check the comparison can be reversed if that allows CHRL to be used.
+define i32 @f6(i32 %src2) {
+; CHECK-LABEL: f6:
+; CHECK: chrl %r2, g
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src1 = sext i16 %val to i32
+  %cond = icmp slt i32 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i32 %src2, %src2
+  br label %exit
+exit:
+  %res = phi i32 [ %src2, %entry ], [ %mul, %mulb ]
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-37.ll b/test/CodeGen/SystemZ/int-cmp-37.ll
index 97d210e..8095ed1 100644
--- a/test/CodeGen/SystemZ/int-cmp-37.ll
+++ b/test/CodeGen/SystemZ/int-cmp-37.ll
@@ -86,8 +86,7 @@ define i32 @f5(i32 %src1) {
 ; CHECK-LABEL: f5:
 ; CHECK: lgrl [[REG:%r[0-5]]], h@GOT
 ; CHECK: llh [[VAL:%r[0-5]]], 0([[REG]])
-; CHECK: clr %r2, [[VAL]]
-; CHECK-NEXT: jl
+; CHECK: clrjl %r2, [[VAL]],
 ; CHECK: br %r14
 entry:
   %val = load i16 *@h, align 1
@@ -101,3 +100,22 @@ exit:
   %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
   ret i32 %res
 }
+
+; Check the comparison can be reversed if that allows CLHRL to be used.
+define i32 @f6(i32 %src2) {
+; CHECK-LABEL: f6:
+; CHECK: clhrl %r2, g
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src1 = zext i16 %val to i32
+  %cond = icmp ult i32 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i32 %src2, %src2
+  br label %exit
+exit:
+  %res = phi i32 [ %src2, %entry ], [ %mul, %mulb ]
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-38.ll b/test/CodeGen/SystemZ/int-cmp-38.ll
index d5a852c..9017583 100644
--- a/test/CodeGen/SystemZ/int-cmp-38.ll
+++ b/test/CodeGen/SystemZ/int-cmp-38.ll
@@ -115,3 +115,21 @@ exit:
   %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
   ret i32 %res
 }
+
+; Check the comparison can be reversed if that allows CRL to be used.
+define i32 @f7(i32 %src2) {
+; CHECK-LABEL: f7:
+; CHECK: crl %r2, g
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %src1 = load i32 *@g
+  %cond = icmp slt i32 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i32 %src2, %src2
+  br label %exit
+exit:
+  %res = phi i32 [ %src2, %entry ], [ %mul, %mulb ]
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-39.ll b/test/CodeGen/SystemZ/int-cmp-39.ll
index d442058..fc9547d 100644
--- a/test/CodeGen/SystemZ/int-cmp-39.ll
+++ b/test/CodeGen/SystemZ/int-cmp-39.ll
@@ -100,3 +100,22 @@ exit:
   %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
   ret i64 %res
 }
+
+; Check the comparison can be reversed if that allows CGHRL to be used.
+define i64 @f6(i64 %src2) {
+; CHECK-LABEL: f6:
+; CHECK: cghrl %r2, g
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src1 = sext i16 %val to i64
+  %cond = icmp slt i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src2, %src2
+  br label %exit
+exit:
+  %res = phi i64 [ %src2, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-40.ll b/test/CodeGen/SystemZ/int-cmp-40.ll
index 6dab2db..9c532f1 100644
--- a/test/CodeGen/SystemZ/int-cmp-40.ll
+++ b/test/CodeGen/SystemZ/int-cmp-40.ll
@@ -86,8 +86,7 @@ define i64 @f5(i64 %src1) {
 ; CHECK-LABEL: f5:
 ; CHECK: lgrl [[REG:%r[0-5]]], h@GOT
 ; CHECK: llgh [[VAL:%r[0-5]]], 0([[REG]])
-; CHECK: clgr %r2, [[VAL]]
-; CHECK-NEXT: jl
+; CHECK: clgrjl %r2, [[VAL]],
 ; CHECK: br %r14
 entry:
   %val = load i16 *@h, align 1
@@ -101,3 +100,22 @@ exit:
   %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
   ret i64 %res
 }
+
+; Check the comparison can be reversed if that allows CLGHRL to be used.
+define i64 @f6(i64 %src2) {
+; CHECK-LABEL: f6:
+; CHECK: clghrl %r2, g
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src1 = zext i16 %val to i64
+  %cond = icmp ult i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src2, %src2
+  br label %exit
+exit:
+  %res = phi i64 [ %src2, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-41.ll b/test/CodeGen/SystemZ/int-cmp-41.ll
index 099681d..77f6e7d 100644
--- a/test/CodeGen/SystemZ/int-cmp-41.ll
+++ b/test/CodeGen/SystemZ/int-cmp-41.ll
@@ -100,3 +100,22 @@ exit:
   %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
   ret i64 %res
 }
+
+; Check the comparison can be reversed if that allows CGFRL to be used.
+define i64 @f6(i64 %src2) {
+; CHECK-LABEL: f6:
+; CHECK: cgfrl %r2, g
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %val = load i32 *@g
+  %src1 = sext i32 %val to i64
+  %cond = icmp slt i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src2, %src2
+  br label %exit
+exit:
+  %res = phi i64 [ %src2, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-42.ll b/test/CodeGen/SystemZ/int-cmp-42.ll
index 26a268d..94ef008 100644
--- a/test/CodeGen/SystemZ/int-cmp-42.ll
+++ b/test/CodeGen/SystemZ/int-cmp-42.ll
@@ -100,3 +100,22 @@ exit:
   %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
   ret i64 %res
 }
+
+; Check the comparison can be reversed if that allows CLGFRL to be used.
+define i64 @f6(i64 %src2) {
+; CHECK-LABEL: f6:
+; CHECK: clgfrl %r2, g
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %val = load i32 *@g
+  %src1 = zext i32 %val to i64
+  %cond = icmp ult i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src2, %src2
+  br label %exit
+exit:
+  %res = phi i64 [ %src2, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-43.ll b/test/CodeGen/SystemZ/int-cmp-43.ll
index e5e1390..1a62588 100644
--- a/test/CodeGen/SystemZ/int-cmp-43.ll
+++ b/test/CodeGen/SystemZ/int-cmp-43.ll
@@ -96,3 +96,21 @@ exit:
   %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
   ret i64 %res
 }
+
+; Check the comparison can be reversed if that allows CGRL to be used.
+define i64 @f6(i64 %src2) {
+; CHECK-LABEL: f6:
+; CHECK: cgrl %r2, g
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %src1 = load i64 *@g
+  %cond = icmp slt i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src2, %src2
+  br label %exit
+exit:
+  %res = phi i64 [ %src2, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-44.ll b/test/CodeGen/SystemZ/int-cmp-44.ll
index b94f482..ae0133f 100644
--- a/test/CodeGen/SystemZ/int-cmp-44.ll
+++ b/test/CodeGen/SystemZ/int-cmp-44.ll
@@ -203,11 +203,11 @@ exit:
 ; comparisons with zero if the immediate covers the whole register.
 define i32 @f11(i32 %a, i32 %b, i32 *%dest) {
 ; CHECK-LABEL: f11:
-; CHECK: nilf %r2, 100
+; CHECK: nilf %r2, 100000001
 ; CHECK-NEXT: jl .L{{.*}}
 ; CHECK: br %r14
 entry:
-  %res = and i32 %a, 100
+  %res = and i32 %a, 100000001
   %cmp = icmp ne i32 %res, 0
   br i1 %cmp, label %exit, label %store
 
diff --git a/test/CodeGen/SystemZ/int-cmp-46.ll b/test/CodeGen/SystemZ/int-cmp-46.ll
new file mode 100644
index 0000000..f311942
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-46.ll
@@ -0,0 +1,491 @@
+; Test the use of TEST UNDER MASK for 32-bit operations.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+@g = global i32 0
+
+; Check the lowest useful TMLL value.
+define void @f1(i32 %a) {
+; CHECK-LABEL: f1:
+; CHECK: tmll %r2, 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 1
+  %cmp = icmp eq i32 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the high end of the TMLL range.
+define void @f2(i32 %a) {
+; CHECK-LABEL: f2:
+; CHECK: tmll %r2, 65535
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 65535
+  %cmp = icmp ne i32 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the lowest useful TMLH value, which is the next value up.
+define void @f3(i32 %a) {
+; CHECK-LABEL: f3:
+; CHECK: tmlh %r2, 1
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 65536
+  %cmp = icmp ne i32 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the next value up again, which cannot use TM.
+define void @f4(i32 %a) {
+; CHECK-LABEL: f4:
+; CHECK-NOT: {{tm[lh].}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 4294901759
+  %cmp = icmp eq i32 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the high end of the TMLH range.
+define void @f5(i32 %a) {
+; CHECK-LABEL: f5:
+; CHECK: tmlh %r2, 65535
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 4294901760
+  %cmp = icmp eq i32 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for LT comparisons that are equivalent to
+; an equality comparison with zero.
+define void @f6(i32 %a) {
+; CHECK-LABEL: f6:
+; CHECK: tmll %r2, 240
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 240
+  %cmp = icmp slt i32 %and, 16
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; ...same again with LE.
+define void @f7(i32 %a) {
+; CHECK-LABEL: f7:
+; CHECK: tmll %r2, 240
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 240
+  %cmp = icmp sle i32 %and, 15
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for GE comparisons that are equivalent to
+; an inequality comparison with zero.
+define void @f8(i32 %a) {
+; CHECK-LABEL: f8:
+; CHECK: tmll %r2, 240
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 240
+  %cmp = icmp uge i32 %and, 16
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; ...same again with GT.
+define void @f9(i32 %a) {
+; CHECK-LABEL: f9:
+; CHECK: tmll %r2, 240
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 240
+  %cmp = icmp ugt i32 %and, 15
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for LT comparisons that effectively
+; test whether the top bit is clear.
+define void @f10(i32 %a) {
+; CHECK-LABEL: f10:
+; CHECK: tmll %r2, 35
+; CHECK: jle {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 35
+  %cmp = icmp ult i32 %and, 8
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; ...same again with LE.
+define void @f11(i32 %a) {
+; CHECK-LABEL: f11:
+; CHECK: tmll %r2, 35
+; CHECK: jle {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 35
+  %cmp = icmp ule i32 %and, 31
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for GE comparisons that effectively test
+; whether the top bit is set.
+define void @f12(i32 %a) {
+; CHECK-LABEL: f12:
+; CHECK: tmll %r2, 140
+; CHECK: jnle {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 140
+  %cmp = icmp uge i32 %and, 128
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; ...same again for GT.
+define void @f13(i32 %a) {
+; CHECK-LABEL: f13:
+; CHECK: tmll %r2, 140
+; CHECK: jnle {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 140
+  %cmp = icmp ugt i32 %and, 126
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for equality comparisons with the mask.
+define void @f14(i32 %a) {
+; CHECK-LABEL: f14:
+; CHECK: tmll %r2, 101
+; CHECK: jo {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 101
+  %cmp = icmp eq i32 %and, 101
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for inequality comparisons with the mask.
+define void @f15(i32 %a) {
+; CHECK-LABEL: f15:
+; CHECK: tmll %r2, 65519
+; CHECK: jno {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 65519
+  %cmp = icmp ne i32 %and, 65519
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for LT comparisons that are equivalent
+; to inequality comparisons with the mask.
+define void @f16(i32 %a) {
+; CHECK-LABEL: f16:
+; CHECK: tmll %r2, 130
+; CHECK: jno {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 130
+  %cmp = icmp ult i32 %and, 129
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; ...same again with LE.
+define void @f17(i32 %a) {
+; CHECK-LABEL: f17:
+; CHECK: tmll %r2, 130
+; CHECK: jno {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 130
+  %cmp = icmp ule i32 %and, 128
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for GE comparisons that are equivalent
+; to equality comparisons with the mask.
+define void @f18(i32 %a) {
+; CHECK-LABEL: f18:
+; CHECK: tmll %r2, 194
+; CHECK: jo {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 194
+  %cmp = icmp uge i32 %and, 193
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; ...same again for GT.
+define void @f19(i32 %a) {
+; CHECK-LABEL: f19:
+; CHECK: tmll %r2, 194
+; CHECK: jo {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 194
+  %cmp = icmp ugt i32 %and, 192
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for equality comparisons for the low bit
+; when the mask has two bits.
+define void @f20(i32 %a) {
+; CHECK-LABEL: f20:
+; CHECK: tmll %r2, 20
+; CHECK: jl {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 20
+  %cmp = icmp eq i32 %and, 4
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for inequality comparisons for the low bit
+; when the mask has two bits.
+define void @f21(i32 %a) {
+; CHECK-LABEL: f21:
+; CHECK: tmll %r2, 20
+; CHECK: jnl {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 20
+  %cmp = icmp ne i32 %and, 4
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for equality comparisons for the high bit
+; when the mask has two bits.
+define void @f22(i32 %a) {
+; CHECK-LABEL: f22:
+; CHECK: tmll %r2, 20
+; CHECK: jh {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 20
+  %cmp = icmp eq i32 %and, 16
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for inequality comparisons for the high bit
+; when the mask has two bits.
+define void @f23(i32 %a) {
+; CHECK-LABEL: f23:
+; CHECK: tmll %r2, 20
+; CHECK: jnh {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 20
+  %cmp = icmp ne i32 %and, 16
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can fold an SHL into a TMxx mask.
+define void @f24(i32 %a) {
+; CHECK-LABEL: f24:
+; CHECK: tmll %r2, 255
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %shl = shl i32 %a, 12
+  %and = and i32 %shl, 1044480
+  %cmp = icmp ne i32 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can fold an SHR into a TMxx mask.
+define void @f25(i32 %a) {
+; CHECK-LABEL: f25:
+; CHECK: tmlh %r2, 512
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %shr = lshr i32 %a, 25
+  %and = and i32 %shr, 1
+  %cmp = icmp ne i32 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-47.ll b/test/CodeGen/SystemZ/int-cmp-47.ll
new file mode 100644
index 0000000..9ebcbfe
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-47.ll
@@ -0,0 +1,234 @@
+; Test the use of TEST UNDER MASK for 64-bit operations.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+@g = global i32 0
+
+; Check the lowest useful TMLL value.
+define void @f1(i64 %a) {
+; CHECK-LABEL: f1:
+; CHECK: tmll %r2, 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 1
+  %cmp = icmp eq i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the high end of the TMLL range.
+define void @f2(i64 %a) {
+; CHECK-LABEL: f2:
+; CHECK: tmll %r2, 65535
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 65535
+  %cmp = icmp ne i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the lowest useful TMLH value, which is the next value up.
+define void @f3(i64 %a) {
+; CHECK-LABEL: f3:
+; CHECK: tmlh %r2, 1
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 65536
+  %cmp = icmp ne i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the next value up again, which cannot use TM.
+define void @f4(i64 %a) {
+; CHECK-LABEL: f4:
+; CHECK-NOT: {{tm[lh].}}
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 4294901759
+  %cmp = icmp eq i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the high end of the TMLH range.
+define void @f5(i64 %a) {
+; CHECK-LABEL: f5:
+; CHECK: tmlh %r2, 65535
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 4294901760
+  %cmp = icmp eq i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the lowest useful TMHL value.
+define void @f6(i64 %a) {
+; CHECK-LABEL: f6:
+; CHECK: tmhl %r2, 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 4294967296
+  %cmp = icmp eq i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the next value up again, which cannot use TM.
+define void @f7(i64 %a) {
+; CHECK-LABEL: f7:
+; CHECK-NOT: {{tm[lh].}}
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 4294967297
+  %cmp = icmp ne i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the high end of the TMHL range.
+define void @f8(i64 %a) {
+; CHECK-LABEL: f8:
+; CHECK: tmhl %r2, 65535
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 281470681743360
+  %cmp = icmp ne i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the lowest useful TMHH value.
+define void @f9(i64 %a) {
+; CHECK-LABEL: f9:
+; CHECK: tmhh %r2, 1
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 281474976710656
+  %cmp = icmp ne i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the high end of the TMHH range.
+define void @f10(i64 %a) {
+; CHECK-LABEL: f10:
+; CHECK: tmhh %r2, 65535
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 18446462598732840960
+  %cmp = icmp eq i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can fold an SHL into a TMxx mask.
+define void @f11(i64 %a) {
+; CHECK-LABEL: f11:
+; CHECK: tmhl %r2, 32768
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %shl = shl i64 %a, 1
+  %and = and i64 %shl, 281474976710656
+  %cmp = icmp ne i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can fold an SHR into a TMxx mask.
+define void @f12(i64 %a) {
+; CHECK-LABEL: f12:
+; CHECK: tmhh %r2, 256
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %shr = lshr i64 %a, 56
+  %and = and i64 %shr, 1
+  %cmp = icmp ne i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-48.ll b/test/CodeGen/SystemZ/int-cmp-48.ll
new file mode 100644
index 0000000..d7c6370
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-48.ll
@@ -0,0 +1,245 @@
+; Test the use of TM and TMY.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+@g = global i32 0
+
+; Check a simple branching use of TM.
+define void @f1(i8 *%src) {
+; CHECK-LABEL: f1:
+; CHECK: tm 0(%r2), 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %byte = load i8 *%src
+  %and = and i8 %byte, 1
+  %cmp = icmp eq i8 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+
+; Check that we do not fold across an aliasing store.
+define void @f2(i8 *%src) {
+; CHECK-LABEL: f2:
+; CHECK: llc [[REG:%r[0-5]]], 0(%r2)
+; CHECK: mvi 0(%r2), 0
+; CHECK: tmll [[REG]], 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %byte = load i8 *%src
+  store i8 0, i8 *%src
+  %and = and i8 %byte, 1
+  %cmp = icmp eq i8 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check a simple select-based use of TM.
+define double @f3(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f3:
+; CHECK: tm 0(%r2), 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+  %byte = load i8 *%src
+  %and = and i8 %byte, 1
+  %cmp = icmp eq i8 %and, 0
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; Check that we do not fold across an aliasing store.
+define double @f4(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f4:
+; CHECK: tm 0(%r2), 1
+; CHECK: je {{\.L.*}}
+; CHECK: mvi 0(%r2), 0
+; CHECK: br %r14
+  %byte = load i8 *%src
+  %and = and i8 %byte, 1
+  %cmp = icmp eq i8 %and, 0
+  %res = select i1 %cmp, double %b, double %a
+  store i8 0, i8 *%src
+  ret double %res
+}
+
+; Check an inequality check.
+define double @f5(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f5:
+; CHECK: tm 0(%r2), 1
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+  %byte = load i8 *%src
+  %and = and i8 %byte, 1
+  %cmp = icmp ne i8 %and, 0
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; Check that we can also use TM for equality comparisons with the mask.
+define double @f6(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f6:
+; CHECK: tm 0(%r2), 254
+; CHECK: jo {{\.L.*}}
+; CHECK: br %r14
+  %byte = load i8 *%src
+  %and = and i8 %byte, 254
+  %cmp = icmp eq i8 %and, 254
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; Check inequality comparisons with the mask.
+define double @f7(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f7:
+; CHECK: tm 0(%r2), 254
+; CHECK: jno {{\.L.*}}
+; CHECK: br %r14
+  %byte = load i8 *%src
+  %and = and i8 %byte, 254
+  %cmp = icmp ne i8 %and, 254
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; Check that we do not use the memory TM instruction when CC is being tested
+; for 2.
+define double @f8(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f8:
+; CHECK: llc [[REG:%r[0-5]]], 0(%r2)
+; CHECK: tmll [[REG]], 3
+; CHECK: jh {{\.L.*}}
+; CHECK: br %r14
+  %byte = load i8 *%src
+  %and = and i8 %byte, 3
+  %cmp = icmp eq i8 %and, 2
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; ...likewise 1.
+define double @f9(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f9:
+; CHECK: llc [[REG:%r[0-5]]], 0(%r2)
+; CHECK: tmll [[REG]], 3
+; CHECK: jl {{\.L.*}}
+; CHECK: br %r14
+  %byte = load i8 *%src
+  %and = and i8 %byte, 3
+  %cmp = icmp eq i8 %and, 1
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; Check the high end of the TM range.
+define double @f10(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f10:
+; CHECK: tm 4095(%r2), 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 4095
+  %byte = load i8 *%ptr
+  %and = and i8 %byte, 1
+  %cmp = icmp eq i8 %and, 0
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; Check the low end of the positive TMY range.
+define double @f11(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f11:
+; CHECK: tmy 4096(%r2), 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 4096
+  %byte = load i8 *%ptr
+  %and = and i8 %byte, 1
+  %cmp = icmp eq i8 %and, 0
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; Check the high end of the TMY range.
+define double @f12(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f12:
+; CHECK: tmy 524287(%r2), 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524287
+  %byte = load i8 *%ptr
+  %and = and i8 %byte, 1
+  %cmp = icmp eq i8 %and, 0
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; Check the next byte up, which needs separate address logic.
+define double @f13(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f13:
+; CHECK: agfi %r2, 524288
+; CHECK: tm 0(%r2), 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524288
+  %byte = load i8 *%ptr
+  %and = and i8 %byte, 1
+  %cmp = icmp eq i8 %and, 0
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; Check the low end of the TMY range.
+define double @f14(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f14:
+; CHECK: tmy -524288(%r2), 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524288
+  %byte = load i8 *%ptr
+  %and = and i8 %byte, 1
+  %cmp = icmp eq i8 %and, 0
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; Check the next byte down, which needs separate address logic.
+define double @f15(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f15:
+; CHECK: agfi %r2, -524289
+; CHECK: tm 0(%r2), 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524289
+  %byte = load i8 *%ptr
+  %and = and i8 %byte, 1
+  %cmp = icmp eq i8 %and, 0
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; Check that TM(Y) does not allow an index
+define double @f16(i8 *%src, i64 %index, double %a, double %b) {
+; CHECK-LABEL: f16:
+; CHECK: tm 0({{%r[1-5]}}), 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 %index
+  %byte = load i8 *%ptr
+  %and = and i8 %byte, 1
+  %cmp = icmp eq i8 %and, 0
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-49.ll b/test/CodeGen/SystemZ/int-cmp-49.ll
new file mode 100644
index 0000000..83f18a2
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-49.ll
@@ -0,0 +1,49 @@
+; That that we don't try to use z196 instructions on z10 for TMHH and TMHL.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -O0 | FileCheck %s
+
+@g = global i32 0
+
+; Check the lowest useful TMHL value.
+define void @f1(i64 %a) {
+; CHECK-LABEL: f1:
+; CHECK-NOT: risblg
+; CHECK-NOT: risbhg
+; CHECK: tmhl {{%r[0-5]}}, 1
+; CHECK-NOT: risblg
+; CHECK-NOT: risbhg
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 4294967296
+  %cmp = icmp eq i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the lowest useful TMHH value.
+define void @f2(i64 %a) {
+; CHECK-LABEL: f2:
+; CHECK-NOT: risblg
+; CHECK-NOT: risbhg
+; CHECK: tmhh {{%r[0-5]}}, 1
+; CHECK-NOT: risblg
+; CHECK-NOT: risbhg
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 281474976710656
+  %cmp = icmp ne i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-const-03.ll b/test/CodeGen/SystemZ/int-const-03.ll
index 78db963..af1cef2 100644
--- a/test/CodeGen/SystemZ/int-const-03.ll
+++ b/test/CodeGen/SystemZ/int-const-03.ll
@@ -139,11 +139,11 @@ define void @f14(i8 *%src) {
   ret void
 }
 
-; Check that MVI does not allow an index
+; Check that MVI does not allow an index.  We prefer STC in that case.
 define void @f15(i64 %src, i64 %index) {
 ; CHECK-LABEL: f15:
-; CHECK: agr %r2, %r3
-; CHECK: mvi 4095(%r2), 42
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: stc [[TMP]], 4095({{%r2,%r3|%r3,%r2}}
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4095
@@ -152,11 +152,11 @@ define void @f15(i64 %src, i64 %index) {
   ret void
 }
 
-; Check that MVIY does not allow an index
+; Check that MVIY does not allow an index.  We prefer STCY in that case.
 define void @f16(i64 %src, i64 %index) {
 ; CHECK-LABEL: f16:
-; CHECK: agr %r2, %r3
-; CHECK: mviy 4096(%r2), 42
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: stcy [[TMP]], 4096({{%r2,%r3|%r3,%r2}}
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4096
diff --git a/test/CodeGen/SystemZ/int-const-04.ll b/test/CodeGen/SystemZ/int-const-04.ll
index c109faa..aced50b 100644
--- a/test/CodeGen/SystemZ/int-const-04.ll
+++ b/test/CodeGen/SystemZ/int-const-04.ll
@@ -75,34 +75,34 @@ define void @f8(i16 *%a) {
   ret void
 }
 
-; Check the next halfword up, which needs separate address logic.
-; Other sequences besides this one would be OK.
+; Check the next halfword up, which is out of range.  We prefer STHY
+; in that case.
 define void @f9(i16 *%a) {
 ; CHECK-LABEL: f9:
-; CHECK: aghi %r2, 4096
-; CHECK: mvhhi 0(%r2), 42
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: sthy [[TMP]], 4096(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%a, i64 2048
   store i16 42, i16 *%ptr
   ret void
 }
 
-; Check negative displacements, which also need separate address logic.
+; Check negative displacements, for which we again prefer STHY.
 define void @f10(i16 *%a) {
 ; CHECK-LABEL: f10:
-; CHECK: aghi %r2, -2
-; CHECK: mvhhi 0(%r2), 42
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: sthy [[TMP]], -2(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%a, i64 -1
   store i16 42, i16 *%ptr
   ret void
 }
 
-; Check that MVHHI does not allow an index
+; Check that MVHHI does not allow an index.
 define void @f11(i64 %src, i64 %index) {
 ; CHECK-LABEL: f11:
-; CHECK: agr %r2, %r3
-; CHECK: mvhhi 0(%r2), 42
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: sth [[TMP]], 0({{%r2,%r3|%r3,%r2}})
 ; CHECK: br %r14
   %add = add i64 %src, %index
   %ptr = inttoptr i64 %add to i16 *
diff --git a/test/CodeGen/SystemZ/int-const-05.ll b/test/CodeGen/SystemZ/int-const-05.ll
index d0c8569..98d6851 100644
--- a/test/CodeGen/SystemZ/int-const-05.ll
+++ b/test/CodeGen/SystemZ/int-const-05.ll
@@ -66,34 +66,33 @@ define void @f7(i32 *%a) {
   ret void
 }
 
-; Check the next word up, which needs separate address logic.
-; Other sequences besides this one would be OK.
+; Check the next word up, which is out of range.  We prefer STY in that case.
 define void @f8(i32 *%a) {
 ; CHECK-LABEL: f8:
-; CHECK: aghi %r2, 4096
-; CHECK: mvhi 0(%r2), 42
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: sty [[TMP]], 4096(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%a, i64 1024
   store i32 42, i32 *%ptr
   ret void
 }
 
-; Check negative displacements, which also need separate address logic.
+; Check negative displacements, for which we again prefer STY.
 define void @f9(i32 *%a) {
 ; CHECK-LABEL: f9:
-; CHECK: aghi %r2, -4
-; CHECK: mvhi 0(%r2), 42
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: sty [[TMP]], -4(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%a, i64 -1
   store i32 42, i32 *%ptr
   ret void
 }
 
-; Check that MVHI does not allow an index
+; Check that MVHI does not allow an index.
 define void @f10(i64 %src, i64 %index) {
 ; CHECK-LABEL: f10:
-; CHECK: agr %r2, %r3
-; CHECK: mvhi 0(%r2), 42
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: st [[TMP]], 0({{%r2,%r3|%r3,%r2}})
 ; CHECK: br %r14
   %add = add i64 %src, %index
   %ptr = inttoptr i64 %add to i32 *
diff --git a/test/CodeGen/SystemZ/int-const-06.ll b/test/CodeGen/SystemZ/int-const-06.ll
index 12a555c..cf07c66 100644
--- a/test/CodeGen/SystemZ/int-const-06.ll
+++ b/test/CodeGen/SystemZ/int-const-06.ll
@@ -66,34 +66,34 @@ define void @f7(i64 *%a) {
   ret void
 }
 
-; Check the next doubleword up, which needs separate address logic.
-; Other sequences besides this one would be OK.
+; Check the next doubleword up, which is out of range.  We prefer STG
+; in that case.
 define void @f8(i64 *%a) {
 ; CHECK-LABEL: f8:
-; CHECK: aghi %r2, 4096
-; CHECK: mvghi 0(%r2), 42
+; CHECK: lghi [[TMP:%r[0-5]]], 42
+; CHECK: stg [[TMP]], 4096(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%a, i64 512
   store i64 42, i64 *%ptr
   ret void
 }
 
-; Check negative displacements, which also need separate address logic.
+; Check negative displacements, for which we again prefer STG.
 define void @f9(i64 *%a) {
 ; CHECK-LABEL: f9:
-; CHECK: aghi %r2, -8
-; CHECK: mvghi 0(%r2), 42
+; CHECK: lghi [[TMP:%r[0-5]]], 42
+; CHECK: stg [[TMP]], -8(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%a, i64 -1
   store i64 42, i64 *%ptr
   ret void
 }
 
-; Check that MVGHI does not allow an index
+; Check that MVGHI does not allow an index.
 define void @f10(i64 %src, i64 %index) {
 ; CHECK-LABEL: f10:
-; CHECK: agr %r2, %r3
-; CHECK: mvghi 0(%r2), 42
+; CHECK: lghi [[TMP:%r[0-5]]], 42
+; CHECK: stg [[TMP]], 0({{%r2,%r3|%r3,%r2}})
 ; CHECK: br %r14
   %add = add i64 %src, %index
   %ptr = inttoptr i64 %add to i64 *
diff --git a/test/CodeGen/SystemZ/int-conv-02.ll b/test/CodeGen/SystemZ/int-conv-02.ll
index 18cfd4a..dd7760d 100644
--- a/test/CodeGen/SystemZ/int-conv-02.ll
+++ b/test/CodeGen/SystemZ/int-conv-02.ll
@@ -1,6 +1,7 @@
-; Test zero extensions from a byte to an i32.
+; Test zero extensions from a byte to an i32.    The tests here
+; assume z10 register pressure, without the high words being available.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Test register extension, starting with an i32.
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/SystemZ/int-conv-06.ll b/test/CodeGen/SystemZ/int-conv-06.ll
index 9c95bad..33860d1 100644
--- a/test/CodeGen/SystemZ/int-conv-06.ll
+++ b/test/CodeGen/SystemZ/int-conv-06.ll
@@ -1,6 +1,7 @@
-; Test zero extensions from a halfword to an i32.
+; Test zero extensions from a halfword to an i32.  The tests here
+; assume z10 register pressure, without the high words being available.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Test register extension, starting with an i32.
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/SystemZ/int-conv-09.ll b/test/CodeGen/SystemZ/int-conv-09.ll
index db4c333..b9c5089 100644
--- a/test/CodeGen/SystemZ/int-conv-09.ll
+++ b/test/CodeGen/SystemZ/int-conv-09.ll
@@ -102,80 +102,3 @@ define i64 @f9(i64 %src, i64 %index) {
   %ext = sext i32 %word to i64
   ret i64 %ext
 }
-
-; Test a case where we spill the source of at least one LGFR.  We want
-; to use LGF if possible.
-define void @f10(i64 *%ptr1, i32 *%ptr2) {
-; CHECK-LABEL: f10:
-; CHECK: lgf {{%r[0-9]+}}, 16{{[04]}}(%r15)
-; CHECK: br %r14
-  %val0 = load volatile i32 *%ptr2
-  %val1 = load volatile i32 *%ptr2
-  %val2 = load volatile i32 *%ptr2
-  %val3 = load volatile i32 *%ptr2
-  %val4 = load volatile i32 *%ptr2
-  %val5 = load volatile i32 *%ptr2
-  %val6 = load volatile i32 *%ptr2
-  %val7 = load volatile i32 *%ptr2
-  %val8 = load volatile i32 *%ptr2
-  %val9 = load volatile i32 *%ptr2
-  %val10 = load volatile i32 *%ptr2
-  %val11 = load volatile i32 *%ptr2
-  %val12 = load volatile i32 *%ptr2
-  %val13 = load volatile i32 *%ptr2
-  %val14 = load volatile i32 *%ptr2
-  %val15 = load volatile i32 *%ptr2
-
-  %ext0 = sext i32 %val0 to i64
-  %ext1 = sext i32 %val1 to i64
-  %ext2 = sext i32 %val2 to i64
-  %ext3 = sext i32 %val3 to i64
-  %ext4 = sext i32 %val4 to i64
-  %ext5 = sext i32 %val5 to i64
-  %ext6 = sext i32 %val6 to i64
-  %ext7 = sext i32 %val7 to i64
-  %ext8 = sext i32 %val8 to i64
-  %ext9 = sext i32 %val9 to i64
-  %ext10 = sext i32 %val10 to i64
-  %ext11 = sext i32 %val11 to i64
-  %ext12 = sext i32 %val12 to i64
-  %ext13 = sext i32 %val13 to i64
-  %ext14 = sext i32 %val14 to i64
-  %ext15 = sext i32 %val15 to i64
-
-  store volatile i32 %val0, i32 *%ptr2
-  store volatile i32 %val1, i32 *%ptr2
-  store volatile i32 %val2, i32 *%ptr2
-  store volatile i32 %val3, i32 *%ptr2
-  store volatile i32 %val4, i32 *%ptr2
-  store volatile i32 %val5, i32 *%ptr2
-  store volatile i32 %val6, i32 *%ptr2
-  store volatile i32 %val7, i32 *%ptr2
-  store volatile i32 %val8, i32 *%ptr2
-  store volatile i32 %val9, i32 *%ptr2
-  store volatile i32 %val10, i32 *%ptr2
-  store volatile i32 %val11, i32 *%ptr2
-  store volatile i32 %val12, i32 *%ptr2
-  store volatile i32 %val13, i32 *%ptr2
-  store volatile i32 %val14, i32 *%ptr2
-  store volatile i32 %val15, i32 *%ptr2
-
-  store volatile i64 %ext0, i64 *%ptr1
-  store volatile i64 %ext1, i64 *%ptr1
-  store volatile i64 %ext2, i64 *%ptr1
-  store volatile i64 %ext3, i64 *%ptr1
-  store volatile i64 %ext4, i64 *%ptr1
-  store volatile i64 %ext5, i64 *%ptr1
-  store volatile i64 %ext6, i64 *%ptr1
-  store volatile i64 %ext7, i64 *%ptr1
-  store volatile i64 %ext8, i64 *%ptr1
-  store volatile i64 %ext9, i64 *%ptr1
-  store volatile i64 %ext10, i64 *%ptr1
-  store volatile i64 %ext11, i64 *%ptr1
-  store volatile i64 %ext12, i64 *%ptr1
-  store volatile i64 %ext13, i64 *%ptr1
-  store volatile i64 %ext14, i64 *%ptr1
-  store volatile i64 %ext15, i64 *%ptr1
-
-  ret void
-}
diff --git a/test/CodeGen/SystemZ/int-conv-10.ll b/test/CodeGen/SystemZ/int-conv-10.ll
index f2f71d9..781c74c 100644
--- a/test/CodeGen/SystemZ/int-conv-10.ll
+++ b/test/CodeGen/SystemZ/int-conv-10.ll
@@ -111,80 +111,3 @@ define i64 @f10(i64 %src, i64 %index) {
   %ext = zext i32 %word to i64
   ret i64 %ext
 }
-
-; Test a case where we spill the source of at least one LLGFR.  We want
-; to use LLGF if possible.
-define void @f11(i64 *%ptr1, i32 *%ptr2) {
-; CHECK-LABEL: f11:
-; CHECK: llgf {{%r[0-9]+}}, 16{{[04]}}(%r15)
-; CHECK: br %r14
-  %val0 = load volatile i32 *%ptr2
-  %val1 = load volatile i32 *%ptr2
-  %val2 = load volatile i32 *%ptr2
-  %val3 = load volatile i32 *%ptr2
-  %val4 = load volatile i32 *%ptr2
-  %val5 = load volatile i32 *%ptr2
-  %val6 = load volatile i32 *%ptr2
-  %val7 = load volatile i32 *%ptr2
-  %val8 = load volatile i32 *%ptr2
-  %val9 = load volatile i32 *%ptr2
-  %val10 = load volatile i32 *%ptr2
-  %val11 = load volatile i32 *%ptr2
-  %val12 = load volatile i32 *%ptr2
-  %val13 = load volatile i32 *%ptr2
-  %val14 = load volatile i32 *%ptr2
-  %val15 = load volatile i32 *%ptr2
-
-  %ext0 = zext i32 %val0 to i64
-  %ext1 = zext i32 %val1 to i64
-  %ext2 = zext i32 %val2 to i64
-  %ext3 = zext i32 %val3 to i64
-  %ext4 = zext i32 %val4 to i64
-  %ext5 = zext i32 %val5 to i64
-  %ext6 = zext i32 %val6 to i64
-  %ext7 = zext i32 %val7 to i64
-  %ext8 = zext i32 %val8 to i64
-  %ext9 = zext i32 %val9 to i64
-  %ext10 = zext i32 %val10 to i64
-  %ext11 = zext i32 %val11 to i64
-  %ext12 = zext i32 %val12 to i64
-  %ext13 = zext i32 %val13 to i64
-  %ext14 = zext i32 %val14 to i64
-  %ext15 = zext i32 %val15 to i64
-
-  store volatile i32 %val0, i32 *%ptr2
-  store volatile i32 %val1, i32 *%ptr2
-  store volatile i32 %val2, i32 *%ptr2
-  store volatile i32 %val3, i32 *%ptr2
-  store volatile i32 %val4, i32 *%ptr2
-  store volatile i32 %val5, i32 *%ptr2
-  store volatile i32 %val6, i32 *%ptr2
-  store volatile i32 %val7, i32 *%ptr2
-  store volatile i32 %val8, i32 *%ptr2
-  store volatile i32 %val9, i32 *%ptr2
-  store volatile i32 %val10, i32 *%ptr2
-  store volatile i32 %val11, i32 *%ptr2
-  store volatile i32 %val12, i32 *%ptr2
-  store volatile i32 %val13, i32 *%ptr2
-  store volatile i32 %val14, i32 *%ptr2
-  store volatile i32 %val15, i32 *%ptr2
-
-  store volatile i64 %ext0, i64 *%ptr1
-  store volatile i64 %ext1, i64 *%ptr1
-  store volatile i64 %ext2, i64 *%ptr1
-  store volatile i64 %ext3, i64 *%ptr1
-  store volatile i64 %ext4, i64 *%ptr1
-  store volatile i64 %ext5, i64 *%ptr1
-  store volatile i64 %ext6, i64 *%ptr1
-  store volatile i64 %ext7, i64 *%ptr1
-  store volatile i64 %ext8, i64 *%ptr1
-  store volatile i64 %ext9, i64 *%ptr1
-  store volatile i64 %ext10, i64 *%ptr1
-  store volatile i64 %ext11, i64 *%ptr1
-  store volatile i64 %ext12, i64 *%ptr1
-  store volatile i64 %ext13, i64 *%ptr1
-  store volatile i64 %ext14, i64 *%ptr1
-  store volatile i64 %ext15, i64 *%ptr1
-
-  ret void
-}
diff --git a/test/CodeGen/SystemZ/int-conv-11.ll b/test/CodeGen/SystemZ/int-conv-11.ll
new file mode 100644
index 0000000..3076962
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-conv-11.ll
@@ -0,0 +1,350 @@
+; Test spills of zero extensions when high GR32s are available.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Test a case where we spill the source of at least one LLCRMux.  We want
+; to use LLC(H) if possible.
+define void @f1(i32 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: llc{{h?}} {{%r[0-9]+}}, 16{{[37]}}(%r15)
+; CHECK: br %r14
+  %val0 = load volatile i32 *%ptr
+  %val1 = load volatile i32 *%ptr
+  %val2 = load volatile i32 *%ptr
+  %val3 = load volatile i32 *%ptr
+  %val4 = load volatile i32 *%ptr
+  %val5 = load volatile i32 *%ptr
+  %val6 = load volatile i32 *%ptr
+  %val7 = load volatile i32 *%ptr
+  %val8 = load volatile i32 *%ptr
+  %val9 = load volatile i32 *%ptr
+  %val10 = load volatile i32 *%ptr
+  %val11 = load volatile i32 *%ptr
+  %val12 = load volatile i32 *%ptr
+  %val13 = load volatile i32 *%ptr
+  %val14 = load volatile i32 *%ptr
+  %val15 = load volatile i32 *%ptr
+  %val16 = load volatile i32 *%ptr
+  %val17 = load volatile i32 *%ptr
+  %val18 = load volatile i32 *%ptr
+  %val19 = load volatile i32 *%ptr
+  %val20 = load volatile i32 *%ptr
+  %val21 = load volatile i32 *%ptr
+  %val22 = load volatile i32 *%ptr
+  %val23 = load volatile i32 *%ptr
+  %val24 = load volatile i32 *%ptr
+  %val25 = load volatile i32 *%ptr
+  %val26 = load volatile i32 *%ptr
+  %val27 = load volatile i32 *%ptr
+  %val28 = load volatile i32 *%ptr
+  %val29 = load volatile i32 *%ptr
+  %val30 = load volatile i32 *%ptr
+  %val31 = load volatile i32 *%ptr
+
+  %trunc0 = trunc i32 %val0 to i8
+  %trunc1 = trunc i32 %val1 to i8
+  %trunc2 = trunc i32 %val2 to i8
+  %trunc3 = trunc i32 %val3 to i8
+  %trunc4 = trunc i32 %val4 to i8
+  %trunc5 = trunc i32 %val5 to i8
+  %trunc6 = trunc i32 %val6 to i8
+  %trunc7 = trunc i32 %val7 to i8
+  %trunc8 = trunc i32 %val8 to i8
+  %trunc9 = trunc i32 %val9 to i8
+  %trunc10 = trunc i32 %val10 to i8
+  %trunc11 = trunc i32 %val11 to i8
+  %trunc12 = trunc i32 %val12 to i8
+  %trunc13 = trunc i32 %val13 to i8
+  %trunc14 = trunc i32 %val14 to i8
+  %trunc15 = trunc i32 %val15 to i8
+  %trunc16 = trunc i32 %val16 to i8
+  %trunc17 = trunc i32 %val17 to i8
+  %trunc18 = trunc i32 %val18 to i8
+  %trunc19 = trunc i32 %val19 to i8
+  %trunc20 = trunc i32 %val20 to i8
+  %trunc21 = trunc i32 %val21 to i8
+  %trunc22 = trunc i32 %val22 to i8
+  %trunc23 = trunc i32 %val23 to i8
+  %trunc24 = trunc i32 %val24 to i8
+  %trunc25 = trunc i32 %val25 to i8
+  %trunc26 = trunc i32 %val26 to i8
+  %trunc27 = trunc i32 %val27 to i8
+  %trunc28 = trunc i32 %val28 to i8
+  %trunc29 = trunc i32 %val29 to i8
+  %trunc30 = trunc i32 %val30 to i8
+  %trunc31 = trunc i32 %val31 to i8
+
+  %ext0 = zext i8 %trunc0 to i32
+  %ext1 = zext i8 %trunc1 to i32
+  %ext2 = zext i8 %trunc2 to i32
+  %ext3 = zext i8 %trunc3 to i32
+  %ext4 = zext i8 %trunc4 to i32
+  %ext5 = zext i8 %trunc5 to i32
+  %ext6 = zext i8 %trunc6 to i32
+  %ext7 = zext i8 %trunc7 to i32
+  %ext8 = zext i8 %trunc8 to i32
+  %ext9 = zext i8 %trunc9 to i32
+  %ext10 = zext i8 %trunc10 to i32
+  %ext11 = zext i8 %trunc11 to i32
+  %ext12 = zext i8 %trunc12 to i32
+  %ext13 = zext i8 %trunc13 to i32
+  %ext14 = zext i8 %trunc14 to i32
+  %ext15 = zext i8 %trunc15 to i32
+  %ext16 = zext i8 %trunc16 to i32
+  %ext17 = zext i8 %trunc17 to i32
+  %ext18 = zext i8 %trunc18 to i32
+  %ext19 = zext i8 %trunc19 to i32
+  %ext20 = zext i8 %trunc20 to i32
+  %ext21 = zext i8 %trunc21 to i32
+  %ext22 = zext i8 %trunc22 to i32
+  %ext23 = zext i8 %trunc23 to i32
+  %ext24 = zext i8 %trunc24 to i32
+  %ext25 = zext i8 %trunc25 to i32
+  %ext26 = zext i8 %trunc26 to i32
+  %ext27 = zext i8 %trunc27 to i32
+  %ext28 = zext i8 %trunc28 to i32
+  %ext29 = zext i8 %trunc29 to i32
+  %ext30 = zext i8 %trunc30 to i32
+  %ext31 = zext i8 %trunc31 to i32
+
+  store volatile i32 %val0, i32 *%ptr
+  store volatile i32 %val1, i32 *%ptr
+  store volatile i32 %val2, i32 *%ptr
+  store volatile i32 %val3, i32 *%ptr
+  store volatile i32 %val4, i32 *%ptr
+  store volatile i32 %val5, i32 *%ptr
+  store volatile i32 %val6, i32 *%ptr
+  store volatile i32 %val7, i32 *%ptr
+  store volatile i32 %val8, i32 *%ptr
+  store volatile i32 %val9, i32 *%ptr
+  store volatile i32 %val10, i32 *%ptr
+  store volatile i32 %val11, i32 *%ptr
+  store volatile i32 %val12, i32 *%ptr
+  store volatile i32 %val13, i32 *%ptr
+  store volatile i32 %val14, i32 *%ptr
+  store volatile i32 %val15, i32 *%ptr
+  store volatile i32 %val16, i32 *%ptr
+  store volatile i32 %val17, i32 *%ptr
+  store volatile i32 %val18, i32 *%ptr
+  store volatile i32 %val19, i32 *%ptr
+  store volatile i32 %val20, i32 *%ptr
+  store volatile i32 %val21, i32 *%ptr
+  store volatile i32 %val22, i32 *%ptr
+  store volatile i32 %val23, i32 *%ptr
+  store volatile i32 %val24, i32 *%ptr
+  store volatile i32 %val25, i32 *%ptr
+  store volatile i32 %val26, i32 *%ptr
+  store volatile i32 %val27, i32 *%ptr
+  store volatile i32 %val28, i32 *%ptr
+  store volatile i32 %val29, i32 *%ptr
+  store volatile i32 %val30, i32 *%ptr
+  store volatile i32 %val31, i32 *%ptr
+
+  store volatile i32 %ext0, i32 *%ptr
+  store volatile i32 %ext1, i32 *%ptr
+  store volatile i32 %ext2, i32 *%ptr
+  store volatile i32 %ext3, i32 *%ptr
+  store volatile i32 %ext4, i32 *%ptr
+  store volatile i32 %ext5, i32 *%ptr
+  store volatile i32 %ext6, i32 *%ptr
+  store volatile i32 %ext7, i32 *%ptr
+  store volatile i32 %ext8, i32 *%ptr
+  store volatile i32 %ext9, i32 *%ptr
+  store volatile i32 %ext10, i32 *%ptr
+  store volatile i32 %ext11, i32 *%ptr
+  store volatile i32 %ext12, i32 *%ptr
+  store volatile i32 %ext13, i32 *%ptr
+  store volatile i32 %ext14, i32 *%ptr
+  store volatile i32 %ext15, i32 *%ptr
+  store volatile i32 %ext16, i32 *%ptr
+  store volatile i32 %ext17, i32 *%ptr
+  store volatile i32 %ext18, i32 *%ptr
+  store volatile i32 %ext19, i32 *%ptr
+  store volatile i32 %ext20, i32 *%ptr
+  store volatile i32 %ext21, i32 *%ptr
+  store volatile i32 %ext22, i32 *%ptr
+  store volatile i32 %ext23, i32 *%ptr
+  store volatile i32 %ext24, i32 *%ptr
+  store volatile i32 %ext25, i32 *%ptr
+  store volatile i32 %ext26, i32 *%ptr
+  store volatile i32 %ext27, i32 *%ptr
+  store volatile i32 %ext28, i32 *%ptr
+  store volatile i32 %ext29, i32 *%ptr
+  store volatile i32 %ext30, i32 *%ptr
+  store volatile i32 %ext31, i32 *%ptr
+
+  ret void
+}
+
+; Same again with i16, which should use LLH(H).
+define void @f2(i32 *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: llh{{h?}} {{%r[0-9]+}}, 16{{[26]}}(%r15)
+; CHECK: br %r14
+  %val0 = load volatile i32 *%ptr
+  %val1 = load volatile i32 *%ptr
+  %val2 = load volatile i32 *%ptr
+  %val3 = load volatile i32 *%ptr
+  %val4 = load volatile i32 *%ptr
+  %val5 = load volatile i32 *%ptr
+  %val6 = load volatile i32 *%ptr
+  %val7 = load volatile i32 *%ptr
+  %val8 = load volatile i32 *%ptr
+  %val9 = load volatile i32 *%ptr
+  %val10 = load volatile i32 *%ptr
+  %val11 = load volatile i32 *%ptr
+  %val12 = load volatile i32 *%ptr
+  %val13 = load volatile i32 *%ptr
+  %val14 = load volatile i32 *%ptr
+  %val15 = load volatile i32 *%ptr
+  %val16 = load volatile i32 *%ptr
+  %val17 = load volatile i32 *%ptr
+  %val18 = load volatile i32 *%ptr
+  %val19 = load volatile i32 *%ptr
+  %val20 = load volatile i32 *%ptr
+  %val21 = load volatile i32 *%ptr
+  %val22 = load volatile i32 *%ptr
+  %val23 = load volatile i32 *%ptr
+  %val24 = load volatile i32 *%ptr
+  %val25 = load volatile i32 *%ptr
+  %val26 = load volatile i32 *%ptr
+  %val27 = load volatile i32 *%ptr
+  %val28 = load volatile i32 *%ptr
+  %val29 = load volatile i32 *%ptr
+  %val30 = load volatile i32 *%ptr
+  %val31 = load volatile i32 *%ptr
+
+  %trunc0 = trunc i32 %val0 to i16
+  %trunc1 = trunc i32 %val1 to i16
+  %trunc2 = trunc i32 %val2 to i16
+  %trunc3 = trunc i32 %val3 to i16
+  %trunc4 = trunc i32 %val4 to i16
+  %trunc5 = trunc i32 %val5 to i16
+  %trunc6 = trunc i32 %val6 to i16
+  %trunc7 = trunc i32 %val7 to i16
+  %trunc8 = trunc i32 %val8 to i16
+  %trunc9 = trunc i32 %val9 to i16
+  %trunc10 = trunc i32 %val10 to i16
+  %trunc11 = trunc i32 %val11 to i16
+  %trunc12 = trunc i32 %val12 to i16
+  %trunc13 = trunc i32 %val13 to i16
+  %trunc14 = trunc i32 %val14 to i16
+  %trunc15 = trunc i32 %val15 to i16
+  %trunc16 = trunc i32 %val16 to i16
+  %trunc17 = trunc i32 %val17 to i16
+  %trunc18 = trunc i32 %val18 to i16
+  %trunc19 = trunc i32 %val19 to i16
+  %trunc20 = trunc i32 %val20 to i16
+  %trunc21 = trunc i32 %val21 to i16
+  %trunc22 = trunc i32 %val22 to i16
+  %trunc23 = trunc i32 %val23 to i16
+  %trunc24 = trunc i32 %val24 to i16
+  %trunc25 = trunc i32 %val25 to i16
+  %trunc26 = trunc i32 %val26 to i16
+  %trunc27 = trunc i32 %val27 to i16
+  %trunc28 = trunc i32 %val28 to i16
+  %trunc29 = trunc i32 %val29 to i16
+  %trunc30 = trunc i32 %val30 to i16
+  %trunc31 = trunc i32 %val31 to i16
+
+  %ext0 = zext i16 %trunc0 to i32
+  %ext1 = zext i16 %trunc1 to i32
+  %ext2 = zext i16 %trunc2 to i32
+  %ext3 = zext i16 %trunc3 to i32
+  %ext4 = zext i16 %trunc4 to i32
+  %ext5 = zext i16 %trunc5 to i32
+  %ext6 = zext i16 %trunc6 to i32
+  %ext7 = zext i16 %trunc7 to i32
+  %ext8 = zext i16 %trunc8 to i32
+  %ext9 = zext i16 %trunc9 to i32
+  %ext10 = zext i16 %trunc10 to i32
+  %ext11 = zext i16 %trunc11 to i32
+  %ext12 = zext i16 %trunc12 to i32
+  %ext13 = zext i16 %trunc13 to i32
+  %ext14 = zext i16 %trunc14 to i32
+  %ext15 = zext i16 %trunc15 to i32
+  %ext16 = zext i16 %trunc16 to i32
+  %ext17 = zext i16 %trunc17 to i32
+  %ext18 = zext i16 %trunc18 to i32
+  %ext19 = zext i16 %trunc19 to i32
+  %ext20 = zext i16 %trunc20 to i32
+  %ext21 = zext i16 %trunc21 to i32
+  %ext22 = zext i16 %trunc22 to i32
+  %ext23 = zext i16 %trunc23 to i32
+  %ext24 = zext i16 %trunc24 to i32
+  %ext25 = zext i16 %trunc25 to i32
+  %ext26 = zext i16 %trunc26 to i32
+  %ext27 = zext i16 %trunc27 to i32
+  %ext28 = zext i16 %trunc28 to i32
+  %ext29 = zext i16 %trunc29 to i32
+  %ext30 = zext i16 %trunc30 to i32
+  %ext31 = zext i16 %trunc31 to i32
+
+  store volatile i32 %val0, i32 *%ptr
+  store volatile i32 %val1, i32 *%ptr
+  store volatile i32 %val2, i32 *%ptr
+  store volatile i32 %val3, i32 *%ptr
+  store volatile i32 %val4, i32 *%ptr
+  store volatile i32 %val5, i32 *%ptr
+  store volatile i32 %val6, i32 *%ptr
+  store volatile i32 %val7, i32 *%ptr
+  store volatile i32 %val8, i32 *%ptr
+  store volatile i32 %val9, i32 *%ptr
+  store volatile i32 %val10, i32 *%ptr
+  store volatile i32 %val11, i32 *%ptr
+  store volatile i32 %val12, i32 *%ptr
+  store volatile i32 %val13, i32 *%ptr
+  store volatile i32 %val14, i32 *%ptr
+  store volatile i32 %val15, i32 *%ptr
+  store volatile i32 %val16, i32 *%ptr
+  store volatile i32 %val17, i32 *%ptr
+  store volatile i32 %val18, i32 *%ptr
+  store volatile i32 %val19, i32 *%ptr
+  store volatile i32 %val20, i32 *%ptr
+  store volatile i32 %val21, i32 *%ptr
+  store volatile i32 %val22, i32 *%ptr
+  store volatile i32 %val23, i32 *%ptr
+  store volatile i32 %val24, i32 *%ptr
+  store volatile i32 %val25, i32 *%ptr
+  store volatile i32 %val26, i32 *%ptr
+  store volatile i32 %val27, i32 *%ptr
+  store volatile i32 %val28, i32 *%ptr
+  store volatile i32 %val29, i32 *%ptr
+  store volatile i32 %val30, i32 *%ptr
+  store volatile i32 %val31, i32 *%ptr
+
+  store volatile i32 %ext0, i32 *%ptr
+  store volatile i32 %ext1, i32 *%ptr
+  store volatile i32 %ext2, i32 *%ptr
+  store volatile i32 %ext3, i32 *%ptr
+  store volatile i32 %ext4, i32 *%ptr
+  store volatile i32 %ext5, i32 *%ptr
+  store volatile i32 %ext6, i32 *%ptr
+  store volatile i32 %ext7, i32 *%ptr
+  store volatile i32 %ext8, i32 *%ptr
+  store volatile i32 %ext9, i32 *%ptr
+  store volatile i32 %ext10, i32 *%ptr
+  store volatile i32 %ext11, i32 *%ptr
+  store volatile i32 %ext12, i32 *%ptr
+  store volatile i32 %ext13, i32 *%ptr
+  store volatile i32 %ext14, i32 *%ptr
+  store volatile i32 %ext15, i32 *%ptr
+  store volatile i32 %ext16, i32 *%ptr
+  store volatile i32 %ext17, i32 *%ptr
+  store volatile i32 %ext18, i32 *%ptr
+  store volatile i32 %ext19, i32 *%ptr
+  store volatile i32 %ext20, i32 *%ptr
+  store volatile i32 %ext21, i32 *%ptr
+  store volatile i32 %ext22, i32 *%ptr
+  store volatile i32 %ext23, i32 *%ptr
+  store volatile i32 %ext24, i32 *%ptr
+  store volatile i32 %ext25, i32 *%ptr
+  store volatile i32 %ext26, i32 *%ptr
+  store volatile i32 %ext27, i32 *%ptr
+  store volatile i32 %ext28, i32 *%ptr
+  store volatile i32 %ext29, i32 *%ptr
+  store volatile i32 %ext30, i32 *%ptr
+  store volatile i32 %ext31, i32 *%ptr
+
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-div-06.ll b/test/CodeGen/SystemZ/int-div-06.ll
new file mode 100644
index 0000000..8576b1b
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-div-06.ll
@@ -0,0 +1,56 @@
+; Test that divisions by constants are implemented as multiplications.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check signed 32-bit division.
+define i32 @f1(i32 %a) {
+; CHECK-LABEL: f1:
+; CHECK: lgfr [[REG:%r[0-5]]], %r2
+; CHECK: msgfi [[REG]], 502748801
+; CHECK-DAG: srlg [[RES1:%r[0-5]]], [[REG]], 63
+; CHECK-DAG: srag %r2, [[REG]], 46
+; CHECK: ar %r2, [[RES1]]
+; CHECK: br %r14
+  %b = sdiv i32 %a, 139968
+  ret i32 %b
+}
+
+; Check unsigned 32-bit division.
+define i32 @f2(i32 %a) {
+; CHECK-LABEL: f2:
+; CHECK: llgfr [[REG:%r[0-5]]], %r2
+; CHECK: msgfi [[REG]], 502748801
+; CHECK: srlg %r2, [[REG]], 46
+; CHECK: br %r14
+  %b = udiv i32 %a, 139968
+  ret i32 %b
+}
+
+; Check signed 64-bit division.
+define i64 @f3(i64 %dummy, i64 %a) {
+; CHECK-LABEL: f3:
+; CHECK-DAG: llihf [[CONST:%r[0-5]]], 1005497601
+; CHECK-DAG: oilf [[CONST]], 4251762321
+; CHECK-DAG: srag [[REG:%r[0-5]]], %r3, 63
+; CHECK-DAG: ngr [[REG]], [[CONST]]
+; CHECK-DAG: mlgr %r2, [[CONST]]
+; CHECK: sgr %r2, [[REG]]
+; CHECK: srlg [[RES1:%r[0-5]]], %r2, 63
+; CHECK: srag %r2, %r2, 15
+; CHECK: agr %r2, [[RES1]]
+; CHECK: br %r14
+  %b = sdiv i64 %a, 139968
+  ret i64 %b
+}
+
+; Check unsigned 64-bit division.
+define i64 @f4(i64 %dummy, i64 %a) {
+; CHECK-LABEL: f4:
+; CHECK: llihf [[CONST:%r[0-5]]], 1005497601
+; CHECK: oilf [[CONST]], 4251762321
+; CHECK: mlgr %r2, [[CONST]]
+; CHECK: srlg %r2, %r2, 15
+; CHECK: br %r14
+  %b = udiv i64 %a, 139968
+  ret i64 %b
+}
diff --git a/test/CodeGen/SystemZ/int-move-08.ll b/test/CodeGen/SystemZ/int-move-08.ll
index f16dd8e..56fcbc6 100644
--- a/test/CodeGen/SystemZ/int-move-08.ll
+++ b/test/CodeGen/SystemZ/int-move-08.ll
@@ -10,6 +10,8 @@
 @gsrc32u = global i32 1, align 2, section "foo"
 @gdst16u = global i16 2, align 1, section "foo"
 @gdst32u = global i32 2, align 2, section "foo"
+@garray8 = global [2 x i8] [i8 100, i8 101]
+@garray16 = global [2 x i16] [i16 102, i16 103]
 
 ; Check sign-extending loads from i16.
 define i32 @f1() {
@@ -97,3 +99,36 @@ define void @f8() {
   store i32 %val, i32 *@gdst32u, align 2
   ret void
 }
+
+; Test a case where we want to use one LARL for accesses to two different
+; parts of a variable.
+define void @f9() {
+; CHECK-LABEL: f9:
+; CHECK: larl [[REG:%r[0-5]]], garray8
+; CHECK: llc [[VAL:%r[0-5]]], 0([[REG]])
+; CHECK: srl [[VAL]], 1
+; CHECK: stc [[VAL]], 1([[REG]])
+; CHECK: br %r14
+  %ptr1 = getelementptr [2 x i8] *@garray8, i64 0, i64 0
+  %ptr2 = getelementptr [2 x i8] *@garray8, i64 0, i64 1
+  %val = load i8 *%ptr1
+  %shr = lshr i8 %val, 1
+  store i8 %shr, i8 *%ptr2
+  ret void
+}
+
+; Test a case where we want to use separate relative-long addresses for
+; two different parts of a variable.
+define void @f10() {
+; CHECK-LABEL: f10:
+; CHECK: llhrl [[VAL:%r[0-5]]], garray16
+; CHECK: srl [[VAL]], 1
+; CHECK: sthrl [[VAL]], garray16+2
+; CHECK: br %r14
+  %ptr1 = getelementptr [2 x i16] *@garray16, i64 0, i64 0
+  %ptr2 = getelementptr [2 x i16] *@garray16, i64 0, i64 1
+  %val = load i16 *%ptr1
+  %shr = lshr i16 %val, 1
+  store i16 %shr, i16 *%ptr2
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-mul-08.ll b/test/CodeGen/SystemZ/int-mul-08.ll
index a245760..90b26a4 100644
--- a/test/CodeGen/SystemZ/int-mul-08.ll
+++ b/test/CodeGen/SystemZ/int-mul-08.ll
@@ -22,9 +22,13 @@ define i64 @f1(i64 %dummy, i64 %a, i64 %b) {
 ; This needs a rather convoluted sequence.
 define i64 @f2(i64 %dummy, i64 %a, i64 %b) {
 ; CHECK-LABEL: f2:
-; CHECK: mlgr
-; CHECK: agr
-; CHECK: agr
+; CHECK-DAG: srag [[RES1:%r[0-5]]], %r3, 63
+; CHECK-DAG: srag [[RES2:%r[0-5]]], %r4, 63
+; CHECK-DAG: ngr [[RES1]], %r4
+; CHECK-DAG: ngr [[RES2]], %r3
+; CHECK-DAG: agr [[RES2]], [[RES1]]
+; CHECK-DAG: mlgr %r2, %r4
+; CHECK: sgr %r2, [[RES2]]
 ; CHECK: br %r14
   %ax = sext i64 %a to i128
   %bx = sext i64 %b to i128
diff --git a/test/CodeGen/SystemZ/int-neg-02.ll b/test/CodeGen/SystemZ/int-neg-02.ll
new file mode 100644
index 0000000..e26194c
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-neg-02.ll
@@ -0,0 +1,91 @@
+; Test negative integer absolute.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test i32->i32 negative absolute using slt.
+define i32 @f1(i32 %val) {
+; CHECK-LABEL: f1:
+; CHECK: lnr %r2, %r2
+; CHECK: br %r14
+  %cmp = icmp slt i32 %val, 0
+  %neg = sub i32 0, %val
+  %abs = select i1 %cmp, i32 %neg, i32 %val
+  %res = sub i32 0, %abs
+  ret i32 %res
+}
+
+; Test i32->i32 negative absolute using sle.
+define i32 @f2(i32 %val) {
+; CHECK-LABEL: f2:
+; CHECK: lnr %r2, %r2
+; CHECK: br %r14
+  %cmp = icmp sle i32 %val, 0
+  %neg = sub i32 0, %val
+  %abs = select i1 %cmp, i32 %neg, i32 %val
+  %res = sub i32 0, %abs
+  ret i32 %res
+}
+
+; Test i32->i32 negative absolute using sgt.
+define i32 @f3(i32 %val) {
+; CHECK-LABEL: f3:
+; CHECK: lnr %r2, %r2
+; CHECK: br %r14
+  %cmp = icmp sgt i32 %val, 0
+  %neg = sub i32 0, %val
+  %abs = select i1 %cmp, i32 %val, i32 %neg
+  %res = sub i32 0, %abs
+  ret i32 %res
+}
+
+; Test i32->i32 negative absolute using sge.
+define i32 @f4(i32 %val) {
+; CHECK-LABEL: f4:
+; CHECK: lnr %r2, %r2
+; CHECK: br %r14
+  %cmp = icmp sge i32 %val, 0
+  %neg = sub i32 0, %val
+  %abs = select i1 %cmp, i32 %val, i32 %neg
+  %res = sub i32 0, %abs
+  ret i32 %res
+}
+
+; Test i32->i64 negative absolute.
+define i64 @f5(i32 %val) {
+; CHECK-LABEL: f5:
+; CHECK: lngfr %r2, %r2
+; CHECK: br %r14
+  %ext = sext i32 %val to i64
+  %cmp = icmp slt i64 %ext, 0
+  %neg = sub i64 0, %ext
+  %abs = select i1 %cmp, i64 %neg, i64 %ext
+  %res = sub i64 0, %abs
+  ret i64 %res
+}
+
+; Test i32->i64 negative absolute that uses an "in-register" form of
+; sign extension.
+define i64 @f6(i64 %val) {
+; CHECK-LABEL: f6:
+; CHECK: lngfr %r2, %r2
+; CHECK: br %r14
+  %trunc = trunc i64 %val to i32
+  %ext = sext i32 %trunc to i64
+  %cmp = icmp slt i64 %ext, 0
+  %neg = sub i64 0, %ext
+  %abs = select i1 %cmp, i64 %neg, i64 %ext
+  %res = sub i64 0, %abs
+  ret i64 %res
+}
+
+; Test i64 negative absolute.
+define i64 @f7(i64 %val) {
+; CHECK-LABEL: f7:
+; CHECK: lngr %r2, %r2
+; CHECK: br %r14
+  %cmp = icmp slt i64 %val, 0
+  %neg = sub i64 0, %val
+  %abs = select i1 %cmp, i64 %neg, i64 %val
+  %res = sub i64 0, %abs
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/lit.local.cfg b/test/CodeGen/SystemZ/lit.local.cfg
index 79528d1..b12af09 100644
--- a/test/CodeGen/SystemZ/lit.local.cfg
+++ b/test/CodeGen/SystemZ/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'SystemZ' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/SystemZ/memchr-01.ll b/test/CodeGen/SystemZ/memchr-01.ll
new file mode 100644
index 0000000..c51690b
--- /dev/null
+++ b/test/CodeGen/SystemZ/memchr-01.ll
@@ -0,0 +1,21 @@
+; Test memchr using SRST, with a weird but usable prototype.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i8 *@memchr(i8 *%src, i16 %char, i32 %len)
+
+; Test a simple forwarded call.
+define i8 *@f1(i8 *%src, i16 %char, i32 %len) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lgr [[REG:%r[1-5]]], %r2
+; CHECK-DAG: algfr %r2, %r4
+; CHECK-DAG: llcr %r0, %r3
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: srst %r2, [[REG]]
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK: jl {{\.L.*}}
+; CHECK: lghi %r2, 0
+; CHECK: br %r14
+  %res = call i8 *@memchr(i8 *%src, i16 %char, i32 %len)
+  ret i8 *%res
+}
diff --git a/test/CodeGen/SystemZ/memchr-02.ll b/test/CodeGen/SystemZ/memchr-02.ll
new file mode 100644
index 0000000..982b396
--- /dev/null
+++ b/test/CodeGen/SystemZ/memchr-02.ll
@@ -0,0 +1,57 @@
+; Test memchr using SRST, with the correct prototype.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i8 *@memchr(i8 *%src, i32 %char, i64 %len)
+
+; Test a simple forwarded call.
+define i8 *@f1(i64 %len, i8 *%src, i32 %char) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: agr %r2, %r3
+; CHECK-DAG: llcr %r0, %r4
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: srst %r2, %r3
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK: jl {{\.L.*}}
+; CHECK: lghi %r2, 0
+; CHECK: br %r14
+  %res = call i8 *@memchr(i8 *%src, i32 %char, i64 %len)
+  ret i8 *%res
+}
+
+; Test a doubled call with no use of %r0 in between.  There should be a
+; single load of %r0.
+define i8 *@f2(i8 *%src, i8 *%charptr, i64 %len) {
+; CHECK-LABEL: f2:
+; CHECK: llc %r0, 0(%r3)
+; CHECK-NOT: %r0
+; CHECK: srst [[RES1:%r[1-5]]], %r2
+; CHECK-NOT: %r0
+; CHECK: srst %r2, [[RES1]]
+; CHECK: br %r14
+  %char = load volatile i8 *%charptr
+  %charext = zext i8 %char to i32
+  %res1 = call i8 *@memchr(i8 *%src, i32 %charext, i64 %len)
+  %res2 = call i8 *@memchr(i8 *%res1, i32 %charext, i64 %len)
+  ret i8 *%res2
+}
+
+; Test a doubled call with a use of %r0 in between.  %r0 must be loaded
+; for each loop.
+define i8 *@f3(i8 *%src, i8 *%charptr, i64 %len) {
+; CHECK-LABEL: f3:
+; CHECK: llc [[CHAR:%r[1-5]]], 0(%r3)
+; CHECK: lr %r0, [[CHAR]]
+; CHECK: srst [[RES1:%r[1-5]]], %r2
+; CHECK: lhi %r0, 0
+; CHECK: blah %r0
+; CHECK: lr %r0, [[CHAR]]
+; CHECK: srst %r2, [[RES1]]
+; CHECK: br %r14
+  %char = load volatile i8 *%charptr
+  %charext = zext i8 %char to i32
+  %res1 = call i8 *@memchr(i8 *%src, i32 %charext, i64 %len)
+  call void asm sideeffect "blah $0", "{r0}" (i32 0)
+  %res2 = call i8 *@memchr(i8 *%res1, i32 %charext, i64 %len)
+  ret i8 *%res2
+}
diff --git a/test/CodeGen/SystemZ/memcmp-01.ll b/test/CodeGen/SystemZ/memcmp-01.ll
new file mode 100644
index 0000000..a014419
--- /dev/null
+++ b/test/CodeGen/SystemZ/memcmp-01.ll
@@ -0,0 +1,221 @@
+; Test memcmp using CLC, with i32 results.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare signext i32 @memcmp(i8 *%src1, i8 *%src2, i64 %size)
+
+; Zero-length comparisons should be optimized away.
+define i32 @f1(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f1:
+; CHECK: lhi %r2, 0
+; CHECK: br %r14
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 0)
+  ret i32 %res
+}
+
+; Check a case where the result is used as an integer.
+define i32 @f2(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f2:
+; CHECK: clc 0(2,%r2), 0(%r3)
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: rll %r2, [[REG]], 31
+; CHECK: br %r14
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 2)
+  ret i32 %res
+}
+
+; Check a case where the result is tested for equality.
+define void @f3(i8 *%src1, i8 *%src2, i32 *%dest) {
+; CHECK-LABEL: f3:
+; CHECK: clc 0(3,%r2), 0(%r3)
+; CHECK-NEXT: je {{\..*}}
+; CHECK: br %r14
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 3)
+  %cmp = icmp eq i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 0, i32 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check a case where the result is tested for inequality.
+define void @f4(i8 *%src1, i8 *%src2, i32 *%dest) {
+; CHECK-LABEL: f4:
+; CHECK: clc 0(4,%r2), 0(%r3)
+; CHECK-NEXT: jlh {{\..*}}
+; CHECK: br %r14
+entry:
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 4)
+  %cmp = icmp ne i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 0, i32 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check a case where the result is tested via slt.
+define void @f5(i8 *%src1, i8 *%src2, i32 *%dest) {
+; CHECK-LABEL: f5:
+; CHECK: clc 0(5,%r2), 0(%r3)
+; CHECK-NEXT: jl {{\..*}}
+; CHECK: br %r14
+entry:
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 5)
+  %cmp = icmp slt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 0, i32 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check a case where the result is tested for sgt.
+define void @f6(i8 *%src1, i8 *%src2, i32 *%dest) {
+; CHECK-LABEL: f6:
+; CHECK: clc 0(6,%r2), 0(%r3)
+; CHECK-NEXT: jh {{\..*}}
+; CHECK: br %r14
+entry:
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 6)
+  %cmp = icmp sgt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 0, i32 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the upper end of the CLC range.  Here the result is used both as
+; an integer and for branching.
+define i32 @f7(i8 *%src1, i8 *%src2, i32 *%dest) {
+; CHECK-LABEL: f7:
+; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: rll %r2, [[REG]], 31
+; CHECK: jl {{.L*}}
+; CHECK: br %r14
+entry:
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 256)
+  %cmp = icmp slt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 0, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; 257 bytes needs two CLCs.
+define i32 @f8(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f8:
+; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: jlh [[LABEL:\..*]]
+; CHECK: clc 256(1,%r2), 256(%r3)
+; CHECK: [[LABEL]]:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: br %r14
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257)
+  ret i32 %res
+}
+
+; Test a comparison of 258 bytes in which the CC result can be used directly.
+define void @f9(i8 *%src1, i8 *%src2, i32 *%dest) {
+; CHECK-LABEL: f9:
+; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: jlh [[LABEL:\..*]]
+; CHECK: clc 256(1,%r2), 256(%r3)
+; CHECK: [[LABEL]]:
+; CHECK-NEXT: jl .L
+; CHECK: br %r14
+entry:
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257)
+  %cmp = icmp slt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 0, i32 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Test the largest size that can use two CLCs.
+define i32 @f10(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f10:
+; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: jlh [[LABEL:\..*]]
+; CHECK: clc 256(256,%r2), 256(%r3)
+; CHECK: [[LABEL]]:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: br %r14
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 512)
+  ret i32 %res
+}
+
+; Test the smallest size that needs 3 CLCs.
+define i32 @f11(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f11:
+; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: jlh [[LABEL:\..*]]
+; CHECK: clc 256(256,%r2), 256(%r3)
+; CHECK: jlh [[LABEL]]
+; CHECK: clc 512(1,%r2), 512(%r3)
+; CHECK: [[LABEL]]:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: br %r14
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 513)
+  ret i32 %res
+}
+
+; Test the largest size than can use 3 CLCs.
+define i32 @f12(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f12:
+; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: jlh [[LABEL:\..*]]
+; CHECK: clc 256(256,%r2), 256(%r3)
+; CHECK: jlh [[LABEL]]
+; CHECK: clc 512(256,%r2), 512(%r3)
+; CHECK: [[LABEL]]:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: br %r14
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 768)
+  ret i32 %res
+}
+
+; The next size up uses a loop instead.  We leave the more complicated
+; loop tests to memcpy-01.ll, which shares the same form.
+define i32 @f13(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f13:
+; CHECK: lghi [[COUNT:%r[0-5]]], 3
+; CHECK: [[LOOP:.L[^:]*]]:
+; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: jlh [[LABEL:\..*]]
+; CHECK-DAG: la %r2, 256(%r2)
+; CHECK-DAG: la %r3, 256(%r3)
+; CHECK: brctg [[COUNT]], [[LOOP]]
+; CHECK: clc 0(1,%r2), 0(%r3)
+; CHECK: [[LABEL]]:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: br %r14
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 769)
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/memcmp-02.ll b/test/CodeGen/SystemZ/memcmp-02.ll
new file mode 100644
index 0000000..74b090d
--- /dev/null
+++ b/test/CodeGen/SystemZ/memcmp-02.ll
@@ -0,0 +1,139 @@
+; Test memcmp using CLC, with i64 results.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i64 @memcmp(i8 *%src1, i8 *%src2, i64 %size)
+
+; Zero-length comparisons should be optimized away.
+define i64 @f1(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f1:
+; CHECK: lghi %r2, 0
+; CHECK: br %r14
+  %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 0)
+  ret i64 %res
+}
+
+; Check a case where the result is used as an integer.
+define i64 @f2(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f2:
+; CHECK: clc 0(2,%r2), 0(%r3)
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: rll [[REG]], [[REG]], 31
+; CHECK: lgfr %r2, [[REG]]
+; CHECK: br %r14
+  %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 2)
+  ret i64 %res
+}
+
+; Check a case where the result is tested for equality.
+define void @f3(i8 *%src1, i8 *%src2, i64 *%dest) {
+; CHECK-LABEL: f3:
+; CHECK: clc 0(3,%r2), 0(%r3)
+; CHECK-NEXT: je {{\..*}}
+; CHECK: br %r14
+  %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 3)
+  %cmp = icmp eq i64 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 0, i64 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check a case where the result is tested for inequality.
+define void @f4(i8 *%src1, i8 *%src2, i64 *%dest) {
+; CHECK-LABEL: f4:
+; CHECK: clc 0(4,%r2), 0(%r3)
+; CHECK-NEXT: jlh {{\..*}}
+; CHECK: br %r14
+entry:
+  %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 4)
+  %cmp = icmp ne i64 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 0, i64 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check a case where the result is tested via slt.
+define void @f5(i8 *%src1, i8 *%src2, i64 *%dest) {
+; CHECK-LABEL: f5:
+; CHECK: clc 0(5,%r2), 0(%r3)
+; CHECK-NEXT: jl {{\..*}}
+; CHECK: br %r14
+entry:
+  %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 5)
+  %cmp = icmp slt i64 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 0, i64 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check a case where the result is tested for sgt.
+define void @f6(i8 *%src1, i8 *%src2, i64 *%dest) {
+; CHECK-LABEL: f6:
+; CHECK: clc 0(6,%r2), 0(%r3)
+; CHECK-NEXT: jh {{\..*}}
+; CHECK: br %r14
+entry:
+  %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 6)
+  %cmp = icmp sgt i64 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 0, i64 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the upper end of the CLC range.  Here the result is used both as
+; an integer and for branching.
+define i64 @f7(i8 *%src1, i8 *%src2, i64 *%dest) {
+; CHECK-LABEL: f7:
+; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: rll [[REG]], [[REG]], 31
+; CHECK: lgfr %r2, [[REG]]
+; CHECK: jl {{.L*}}
+; CHECK: br %r14
+entry:
+  %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 256)
+  %cmp = icmp slt i64 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 0, i64 *%dest
+  br label %exit
+
+exit:
+  ret i64 %res
+}
+
+; 257 bytes needs two CLCs.
+define i64 @f8(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f8:
+; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: jlh [[LABEL:\..*]]
+; CHECK: clc 256(1,%r2), 256(%r3)
+; CHECK: [[LABEL]]:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: br %r14
+  %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 257)
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/memcpy-01.ll b/test/CodeGen/SystemZ/memcpy-01.ll
index 7cb58b3..b53ec54 100644
--- a/test/CodeGen/SystemZ/memcpy-01.ll
+++ b/test/CodeGen/SystemZ/memcpy-01.ll
@@ -4,7 +4,9 @@
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8 *nocapture, i8 *nocapture, i32, i32, i1) nounwind
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8 *nocapture, i8 *nocapture, i64, i32, i1) nounwind
+declare void @foo(i8 *, i8 *)
 
+; Test a no-op move, i32 version.
 define void @f1(i8 *%dest, i8 *%src) {
 ; CHECK-LABEL: f1:
 ; CHECK-NOT: %r2
@@ -15,6 +17,7 @@ define void @f1(i8 *%dest, i8 *%src) {
   ret void
 }
 
+; Test a no-op move, i64 version.
 define void @f2(i8 *%dest, i8 *%src) {
 ; CHECK-LABEL: f2:
 ; CHECK-NOT: %r2
@@ -25,6 +28,7 @@ define void @f2(i8 *%dest, i8 *%src) {
   ret void
 }
 
+; Test a 1-byte move, i32 version.
 define void @f3(i8 *%dest, i8 *%src) {
 ; CHECK-LABEL: f3:
 ; CHECK: mvc 0(1,%r2), 0(%r3)
@@ -34,6 +38,7 @@ define void @f3(i8 *%dest, i8 *%src) {
   ret void
 }
 
+; Test a 1-byte move, i64 version.
 define void @f4(i8 *%dest, i8 *%src) {
 ; CHECK-LABEL: f4:
 ; CHECK: mvc 0(1,%r2), 0(%r3)
@@ -43,6 +48,7 @@ define void @f4(i8 *%dest, i8 *%src) {
   ret void
 }
 
+; Test the upper range of a single MVC, i32 version.
 define void @f5(i8 *%dest, i8 *%src) {
 ; CHECK-LABEL: f5:
 ; CHECK: mvc 0(256,%r2), 0(%r3)
@@ -52,6 +58,7 @@ define void @f5(i8 *%dest, i8 *%src) {
   ret void
 }
 
+; Test the upper range of a single MVC, i64 version.
 define void @f6(i8 *%dest, i8 *%src) {
 ; CHECK-LABEL: f6:
 ; CHECK: mvc 0(256,%r2), 0(%r3)
@@ -61,22 +68,168 @@ define void @f6(i8 *%dest, i8 *%src) {
   ret void
 }
 
-; 257 bytes is too big for a single MVC.  For now expect none, so that
-; the test fails and gets updated when large copies are implemented.
+; Test the first case that needs two MVCs.
 define void @f7(i8 *%dest, i8 *%src) {
 ; CHECK-LABEL: f7:
-; CHECK-NOT: mvc
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: mvc 256(1,%r2), 256(%r3)
 ; CHECK: br %r14
   call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 257, i32 1,
                                        i1 false)
   ret void
 }
 
+; Test the last-but-one case that needs two MVCs.
 define void @f8(i8 *%dest, i8 *%src) {
 ; CHECK-LABEL: f8:
-; CHECK-NOT: mvc
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: mvc 256(255,%r2), 256(%r3)
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 511, i32 1,
+                                       i1 false)
+  ret void
+}
+
+; Test the last case that needs two MVCs.
+define void @f9(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f9:
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: mvc 256(256,%r2), 256(%r3)
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 512, i32 1,
+                                       i1 false)
+  ret void
+}
+
+; Test an arbitrary value that uses straight-line code.
+define void @f10(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f10:
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: mvc 256(256,%r2), 256(%r3)
+; CHECK: mvc 512(256,%r2), 512(%r3)
+; CHECK: mvc 768(256,%r2), 768(%r3)
+; CHECK: mvc 1024(255,%r2), 1024(%r3)
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1,
+                                       i1 false)
+  ret void
+}
+
+; ...and again in cases where not all parts are in range of MVC.
+define void @f11(i8 *%srcbase, i8 *%destbase) {
+; CHECK-LABEL: f11:
+; CHECK: mvc 4000(256,%r2), 3500(%r3)
+; CHECK: lay [[NEWDEST:%r[1-5]]], 4256(%r2)
+; CHECK: mvc 0(256,[[NEWDEST]]), 3756(%r3)
+; CHECK: mvc 256(256,[[NEWDEST]]), 4012(%r3)
+; CHECK: lay [[NEWSRC:%r[1-5]]], 4268(%r3)
+; CHECK: mvc 512(256,[[NEWDEST]]), 0([[NEWSRC]])
+; CHECK: mvc 768(255,[[NEWDEST]]), 256([[NEWSRC]])
+; CHECK: br %r14
+  %dest = getelementptr i8 *%srcbase, i64 4000
+  %src = getelementptr i8* %destbase, i64 3500
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1,
+                                       i1 false)
+  ret void
+}
+
+; ...and again with a destination frame base that goes out of range.
+define void @f12() {
+; CHECK-LABEL: f12:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: mvc 4076(256,%r15), 2100(%r15)
+; CHECK: lay [[NEWDEST:%r[1-5]]], 4332(%r15)
+; CHECK: mvc 0(256,[[NEWDEST]]), 2356(%r15)
+; CHECK: mvc 256(256,[[NEWDEST]]), 2612(%r15)
+; CHECK: mvc 512(256,[[NEWDEST]]), 2868(%r15)
+; CHECK: mvc 768(255,[[NEWDEST]]), 3124(%r15)
+; CHECK: brasl %r14, foo@PLT
+; CHECK: br %r14
+  %arr = alloca [6000 x i8]
+  %dest = getelementptr [6000 x i8] *%arr, i64 0, i64 3900
+  %src = getelementptr [6000 x i8] *%arr, i64 0, i64 1924
+  call void @foo(i8 *%dest, i8 *%src)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1,
+                                       i1 false)
+  call void @foo(i8 *%dest, i8 *%src)
+  ret void
+}
+
+; ...and again with a source frame base that goes out of range.
+define void @f13() {
+; CHECK-LABEL: f13:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: mvc 200(256,%r15), 3826(%r15)
+; CHECK: mvc 456(256,%r15), 4082(%r15)
+; CHECK: lay [[NEWSRC:%r[1-5]]], 4338(%r15)
+; CHECK: mvc 712(256,%r15), 0([[NEWSRC]])
+; CHECK: mvc 968(256,%r15), 256([[NEWSRC]])
+; CHECK: mvc 1224(255,%r15), 512([[NEWSRC]])
+; CHECK: brasl %r14, foo@PLT
+; CHECK: br %r14
+  %arr = alloca [6000 x i8]
+  %dest = getelementptr [6000 x i8] *%arr, i64 0, i64 24
+  %src = getelementptr [6000 x i8] *%arr, i64 0, i64 3650
+  call void @foo(i8 *%dest, i8 *%src)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1,
+                                       i1 false)
+  call void @foo(i8 *%dest, i8 *%src)
+  ret void
+}
+
+; Test the last case that is done using straight-line code.
+define void @f14(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f14:
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: mvc 256(256,%r2), 256(%r3)
+; CHECK: mvc 512(256,%r2), 512(%r3)
+; CHECK: mvc 768(256,%r2), 768(%r3)
+; CHECK: mvc 1024(256,%r2), 1024(%r3)
+; CHECK: mvc 1280(256,%r2), 1280(%r3)
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1536, i32 1,
+                                       i1 false)
+  ret void
+}
+
+; Test the first case that is done using a loop.
+define void @f15(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f15:
+; CHECK: lghi [[COUNT:%r[0-5]]], 6
+; CHECK: [[LABEL:\.L[^:]*]]:
+; CHECK: pfd 2, 768(%r2)
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: la %r2, 256(%r2)
+; CHECK: la %r3, 256(%r3)
+; CHECK: brctg [[COUNT]], [[LABEL]]
+; CHECK: mvc 0(1,%r2), 0(%r3)
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1537, i32 1,
+                                       i1 false)
+  ret void
+}
+
+; ...and again with frame bases, where the base must be loaded into a
+; register before the loop.
+define void @f16() {
+; CHECK-LABEL: f16:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-DAG: lghi [[COUNT:%r[0-5]]], 6
+; CHECK-DAG: la [[BASE:%r[0-5]]], 160(%r15)
+; CHECK: [[LABEL:\.L[^:]*]]:
+; CHECK: pfd 2, 2368([[BASE]])
+; CHECK: mvc 1600(256,[[BASE]]), 0([[BASE]])
+; CHECK: la [[BASE]], 256([[BASE]])
+; CHECK: brctg [[COUNT]], [[LABEL]]
+; CHECK: mvc 1600(1,[[BASE]]), 0([[BASE]])
+; CHECK: brasl %r14, foo@PLT
 ; CHECK: br %r14
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 257, i32 1,
+  %arr = alloca [3200 x i8]
+  %dest = getelementptr [3200 x i8] *%arr, i64 0, i64 1600
+  %src = getelementptr [3200 x i8] *%arr, i64 0, i64 0
+  call void @foo(i8 *%dest, i8 *%src)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1537, i32 1,
                                        i1 false)
+  call void @foo(i8 *%dest, i8 *%src)
   ret void
 }
diff --git a/test/CodeGen/SystemZ/memcpy-02.ll b/test/CodeGen/SystemZ/memcpy-02.ll
index 83b2cd8..2b01091 100644
--- a/test/CodeGen/SystemZ/memcpy-02.ll
+++ b/test/CodeGen/SystemZ/memcpy-02.ll
@@ -2,11 +2,14 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
-@g1 = global i8 1
-@g2 = global i16 2
+@g1src = global i8 1
+@g1dst = global i8 1
+@g2src = global i16 2
+@g2dst = global i16 2
 @g3 = global i32 3
 @g4 = global i64 4
-@g5 = external global fp128, align 16
+@g5src = external global fp128, align 16
+@g5dst = external global fp128, align 16
 
 ; Test the simple i8 case.
 define void @f1(i8 *%ptr1) {
@@ -237,18 +240,19 @@ define void @f19(i64 *%ptr1) {
   ret void
 }
 
-; Test that MVC is used for aligned loads and stores, even if there is
-; no way of telling whether they alias.
+; Test that MVC is not used for aligned loads and stores if there is
+; no way of telling whether they alias.  We don't want to use MVC in
+; cases where the addresses could be equal.
 define void @f20(i64 *%ptr1, i64 *%ptr2) {
 ; CHECK-LABEL: f20:
-; CHECK: mvc 0(8,%r3), 0(%r2)
+; CHECK-NOT: mvc
 ; CHECK: br %r14
   %val = load i64 *%ptr1
   store i64 %val, i64 *%ptr2
   ret void
 }
 
-; ...but if the loads aren't aligned, we can't be sure.
+; ...and again for unaligned loads and stores.
 define void @f21(i64 *%ptr1, i64 *%ptr2) {
 ; CHECK-LABEL: f21:
 ; CHECK-NOT: mvc
@@ -274,50 +278,29 @@ define void @f22(i64 %base) {
 ; Test that we can use MVC for global addresses for i8.
 define void @f23(i8 *%ptr) {
 ; CHECK-LABEL: f23:
-; CHECK: larl [[REG:%r[0-5]]], g1
-; CHECK: mvc 0(1,%r2), 0([[REG]])
+; CHECK-DAG: larl [[SRC:%r[0-5]]], g1src
+; CHECK-DAG: larl [[DST:%r[0-5]]], g1dst
+; CHECK: mvc 0(1,[[DST]]), 0([[SRC]])
 ; CHECK: br %r14
-  %val = load i8 *@g1
-  store i8 %val, i8 *%ptr
+  %val = load i8 *@g1src
+  store i8 %val, i8 *@g1dst
   ret void
 }
 
-; ...and again with the global on the store.
-define void @f24(i8 *%ptr) {
+; Test that we use LHRL and STHRL for i16.
+define void @f24(i16 *%ptr) {
 ; CHECK-LABEL: f24:
-; CHECK: larl [[REG:%r[0-5]]], g1
-; CHECK: mvc 0(1,[[REG]]), 0(%r2)
-; CHECK: br %r14
-  %val = load i8 *%ptr
-  store i8 %val, i8 *@g1
-  ret void
-}
-
-; Test that we use LHRL for i16.
-define void @f25(i16 *%ptr) {
-; CHECK-LABEL: f25:
-; CHECK: lhrl [[REG:%r[0-5]]], g2
-; CHECK: sth [[REG]], 0(%r2)
+; CHECK: lhrl [[REG:%r[0-5]]], g2src
+; CHECK: sthrl [[REG]], g2dst
 ; CHECK: br %r14
-  %val = load i16 *@g2
-  store i16 %val, i16 *%ptr
-  ret void
-}
-
-; ...likewise STHRL.
-define void @f26(i16 *%ptr) {
-; CHECK-LABEL: f26:
-; CHECK: lh [[REG:%r[0-5]]], 0(%r2)
-; CHECK: sthrl [[REG]], g2
-; CHECK: br %r14
-  %val = load i16 *%ptr
-  store i16 %val, i16 *@g2
+  %val = load i16 *@g2src
+  store i16 %val, i16 *@g2dst
   ret void
 }
 
 ; Test that we use LRL for i32.
-define void @f27(i32 *%ptr) {
-; CHECK-LABEL: f27:
+define void @f25(i32 *%ptr) {
+; CHECK-LABEL: f25:
 ; CHECK: lrl [[REG:%r[0-5]]], g3
 ; CHECK: st [[REG]], 0(%r2)
 ; CHECK: br %r14
@@ -327,8 +310,8 @@ define void @f27(i32 *%ptr) {
 }
 
 ; ...likewise STRL.
-define void @f28(i32 *%ptr) {
-; CHECK-LABEL: f28:
+define void @f26(i32 *%ptr) {
+; CHECK-LABEL: f26:
 ; CHECK: l [[REG:%r[0-5]]], 0(%r2)
 ; CHECK: strl [[REG]], g3
 ; CHECK: br %r14
@@ -338,8 +321,8 @@ define void @f28(i32 *%ptr) {
 }
 
 ; Test that we use LGRL for i64.
-define void @f29(i64 *%ptr) {
-; CHECK-LABEL: f29:
+define void @f27(i64 *%ptr) {
+; CHECK-LABEL: f27:
 ; CHECK: lgrl [[REG:%r[0-5]]], g4
 ; CHECK: stg [[REG]], 0(%r2)
 ; CHECK: br %r14
@@ -349,8 +332,8 @@ define void @f29(i64 *%ptr) {
 }
 
 ; ...likewise STGRL.
-define void @f30(i64 *%ptr) {
-; CHECK-LABEL: f30:
+define void @f28(i64 *%ptr) {
+; CHECK-LABEL: f28:
 ; CHECK: lg [[REG:%r[0-5]]], 0(%r2)
 ; CHECK: stgrl [[REG]], g4
 ; CHECK: br %r14
@@ -360,30 +343,20 @@ define void @f30(i64 *%ptr) {
 }
 
 ; Test that we can use MVC for global addresses for fp128.
-define void @f31(fp128 *%ptr) {
-; CHECK-LABEL: f31:
-; CHECK: larl [[REG:%r[0-5]]], g5
-; CHECK: mvc 0(16,%r2), 0([[REG]])
-; CHECK: br %r14
-  %val = load fp128 *@g5, align 16
-  store fp128 %val, fp128 *%ptr, align 16
-  ret void
-}
-
-; ...and again with the global on the store.
-define void @f32(fp128 *%ptr) {
-; CHECK-LABEL: f32:
-; CHECK: larl [[REG:%r[0-5]]], g5
-; CHECK: mvc 0(16,[[REG]]), 0(%r2)
+define void @f29(fp128 *%ptr) {
+; CHECK-LABEL: f29:
+; CHECK-DAG: larl [[SRC:%r[0-5]]], g5src
+; CHECK-DAG: larl [[DST:%r[0-5]]], g5dst
+; CHECK: mvc 0(16,[[DST]]), 0([[SRC]])
 ; CHECK: br %r14
-  %val = load fp128 *%ptr, align 16
-  store fp128 %val, fp128 *@g5, align 16
+  %val = load fp128 *@g5src, align 16
+  store fp128 %val, fp128 *@g5dst, align 16
   ret void
 }
 
 ; Test a case where offset disambiguation is enough.
-define void @f33(i64 *%ptr1) {
-; CHECK-LABEL: f33:
+define void @f30(i64 *%ptr1) {
+; CHECK-LABEL: f30:
 ; CHECK: mvc 8(8,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i64 *%ptr1, i64 1
@@ -393,8 +366,8 @@ define void @f33(i64 *%ptr1) {
 }
 
 ; Test f21 in cases where TBAA tells us there is no alias.
-define void @f34(i64 *%ptr1, i64 *%ptr2) {
-; CHECK-LABEL: f34:
+define void @f31(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f31:
 ; CHECK: mvc 0(8,%r3), 0(%r2)
 ; CHECK: br %r14
   %val = load i64 *%ptr1, align 2, !tbaa !1
@@ -403,8 +376,8 @@ define void @f34(i64 *%ptr1, i64 *%ptr2) {
 }
 
 ; Test f21 in cases where TBAA is present but doesn't help.
-define void @f35(i64 *%ptr1, i64 *%ptr2) {
-; CHECK-LABEL: f35:
+define void @f32(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f32:
 ; CHECK-NOT: mvc
 ; CHECK: br %r14
   %val = load i64 *%ptr1, align 2, !tbaa !1
@@ -413,5 +386,7 @@ define void @f35(i64 *%ptr1, i64 *%ptr2) {
 }
 
 !0 = metadata !{ metadata !"root" }
-!1 = metadata !{ metadata !"set1", metadata !0 }
-!2 = metadata !{ metadata !"set2", metadata !0 }
+!1 = metadata !{ metadata !3, metadata !3, i64 0 }
+!2 = metadata !{ metadata !4, metadata !4, i64 0 }
+!3 = metadata !{ metadata !"set1", metadata !0 }
+!4 = metadata !{ metadata !"set2", metadata !0 }
diff --git a/test/CodeGen/SystemZ/memset-01.ll b/test/CodeGen/SystemZ/memset-01.ll
index b272a5b..f17901c 100644
--- a/test/CodeGen/SystemZ/memset-01.ll
+++ b/test/CodeGen/SystemZ/memset-01.ll
@@ -103,22 +103,58 @@ define void @f10(i8 *%dest, i8 %val) {
   ret void
 }
 
-; 258 bytes, i32 version.  258 bytes is too big for a single MVC.
-; For now expect none, so that the test fails and gets updated when
-; large copies are implemented.
+; 258 bytes, i32 version.  We need two MVCs.
 define void @f11(i8 *%dest, i8 %val) {
 ; CHECK-LABEL: f11:
-; CHECK-NOT: mvc
+; CHECK: stc %r3, 0(%r2)
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: mvc 257(1,%r2), 256(%r2)
 ; CHECK: br %r14
   call void @llvm.memset.p0i8.i32(i8 *%dest, i8 %val, i32 258, i32 1, i1 false)
   ret void
 }
 
-; 258 bytes, i64 version, with the same comments as above.
+; 258 bytes, i64 version.
 define void @f12(i8 *%dest, i8 %val) {
 ; CHECK-LABEL: f12:
-; CHECK-NOT: mvc
+; CHECK: stc %r3, 0(%r2)
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: mvc 257(1,%r2), 256(%r2)
 ; CHECK: br %r14
   call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 258, i32 1, i1 false)
   ret void
 }
+
+; Test the largest case for which straight-line code is used.
+define void @f13(i8 *%dest, i8 %val) {
+; CHECK-LABEL: f13:
+; CHECK: stc %r3, 0(%r2)
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: mvc 257(256,%r2), 256(%r2)
+; CHECK: mvc 513(256,%r2), 512(%r2)
+; CHECK: mvc 769(256,%r2), 768(%r2)
+; CHECK: mvc 1025(256,%r2), 1024(%r2)
+; CHECK: mvc 1281(256,%r2), 1280(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 1537, i32 1,
+                                  i1 false)
+  ret void
+}
+
+; Test the next size up, which uses a loop.  We leave the other corner
+; cases to memcpy-01.ll.
+define void @f14(i8 *%dest, i8 %val) {
+; CHECK-LABEL: f14:
+; CHECK: stc %r3, 0(%r2)
+; CHECK: lghi [[COUNT:%r[0-5]]], 6
+; CHECK: [[LABEL:\.L[^:]*]]:
+; CHECK: pfd 2, 769(%r2)
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: la %r2, 256(%r2)
+; CHECK: brctg [[COUNT]], [[LABEL]]
+; CHECK: mvc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 1538, i32 1,
+                                  i1 false)
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/memset-02.ll b/test/CodeGen/SystemZ/memset-02.ll
index b74d907..b4724c0 100644
--- a/test/CodeGen/SystemZ/memset-02.ll
+++ b/test/CodeGen/SystemZ/memset-02.ll
@@ -139,21 +139,23 @@ define void @f14(i8 *%dest) {
   ret void
 }
 
-; 258 bytes, i32 version.  258 bytes is too big for a single MVC.
-; For now expect none, so that the test fails and gets updated when
-; large copies are implemented.
+; 258 bytes, i32 version.  We need two MVCs.
 define void @f15(i8 *%dest) {
 ; CHECK-LABEL: f15:
-; CHECK-NOT: mvc
+; CHECK: mvi 0(%r2), 128
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: mvc 257(1,%r2), 256(%r2)
 ; CHECK: br %r14
   call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 258, i32 1, i1 false)
   ret void
 }
 
-; 258 bytes, i64 version, with the same comments as above.
+; 258 bytes, i64 version.
 define void @f16(i8 *%dest) {
 ; CHECK-LABEL: f16:
-; CHECK-NOT: mvc
+; CHECK: mvi 0(%r2), 128
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: mvc 257(1,%r2), 256(%r2)
 ; CHECK: br %r14
   call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 258, i32 1, i1 false)
   ret void
diff --git a/test/CodeGen/SystemZ/memset-03.ll b/test/CodeGen/SystemZ/memset-03.ll
index 1d48f1a..a95f89f 100644
--- a/test/CodeGen/SystemZ/memset-03.ll
+++ b/test/CodeGen/SystemZ/memset-03.ll
@@ -140,8 +140,7 @@ define void @f14(i8 *%dest) {
 ; 7 bytes, i32 version.
 define void @f15(i8 *%dest) {
 ; CHECK-LABEL: f15:
-; CHECK: mvi 0(%r2), 0
-; CHECK: mvc 1(6,%r2), 0(%r2)
+; CHECK: xc 0(7,%r2), 0(%r2)
 ; CHECK: br %r14
   call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 7, i32 1, i1 false)
   ret void
@@ -150,8 +149,7 @@ define void @f15(i8 *%dest) {
 ; 7 bytes, i64 version.
 define void @f16(i8 *%dest) {
 ; CHECK-LABEL: f16:
-; CHECK: mvi 0(%r2), 0
-; CHECK: mvc 1(6,%r2), 0(%r2)
+; CHECK: xc 0(7,%r2), 0(%r2)
 ; CHECK: br %r14
   call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 7, i32 1, i1 false)
   ret void
@@ -218,8 +216,7 @@ define void @f22(i8 *%dest) {
 ; 11 bytes, i32 version.
 define void @f23(i8 *%dest) {
 ; CHECK-LABEL: f23:
-; CHECK: mvi 0(%r2), 0
-; CHECK: mvc 1(10,%r2), 0(%r2)
+; CHECK: xc 0(11,%r2), 0(%r2)
 ; CHECK: br %r14
   call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 11, i32 1, i1 false)
   ret void
@@ -228,8 +225,7 @@ define void @f23(i8 *%dest) {
 ; 11 bytes, i64 version.
 define void @f24(i8 *%dest) {
 ; CHECK-LABEL: f24:
-; CHECK: mvi 0(%r2), 0
-; CHECK: mvc 1(10,%r2), 0(%r2)
+; CHECK: xc 0(11,%r2), 0(%r2)
 ; CHECK: br %r14
   call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 11, i32 1, i1 false)
   ret void
@@ -258,8 +254,7 @@ define void @f26(i8 *%dest) {
 ; 13 bytes, i32 version.
 define void @f27(i8 *%dest) {
 ; CHECK-LABEL: f27:
-; CHECK: mvi 0(%r2), 0
-; CHECK: mvc 1(12,%r2), 0(%r2)
+; CHECK: xc 0(13,%r2), 0(%r2)
 ; CHECK: br %r14
   call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 13, i32 1, i1 false)
   ret void
@@ -268,8 +263,7 @@ define void @f27(i8 *%dest) {
 ; 13 bytes, i64 version.
 define void @f28(i8 *%dest) {
 ; CHECK-LABEL: f28:
-; CHECK: mvi 0(%r2), 0
-; CHECK: mvc 1(12,%r2), 0(%r2)
+; CHECK: xc 0(13,%r2), 0(%r2)
 ; CHECK: br %r14
   call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 13, i32 1, i1 false)
   ret void
@@ -278,8 +272,7 @@ define void @f28(i8 *%dest) {
 ; 14 bytes, i32 version.
 define void @f29(i8 *%dest) {
 ; CHECK-LABEL: f29:
-; CHECK: mvi 0(%r2), 0
-; CHECK: mvc 1(13,%r2), 0(%r2)
+; CHECK: xc 0(14,%r2), 0(%r2)
 ; CHECK: br %r14
   call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 14, i32 1, i1 false)
   ret void
@@ -288,8 +281,7 @@ define void @f29(i8 *%dest) {
 ; 14 bytes, i64 version.
 define void @f30(i8 *%dest) {
 ; CHECK-LABEL: f30:
-; CHECK: mvi 0(%r2), 0
-; CHECK: mvc 1(13,%r2), 0(%r2)
+; CHECK: xc 0(14,%r2), 0(%r2)
 ; CHECK: br %r14
   call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 14, i32 1, i1 false)
   ret void
@@ -298,8 +290,7 @@ define void @f30(i8 *%dest) {
 ; 15 bytes, i32 version.
 define void @f31(i8 *%dest) {
 ; CHECK-LABEL: f31:
-; CHECK: mvi 0(%r2), 0
-; CHECK: mvc 1(14,%r2), 0(%r2)
+; CHECK: xc 0(15,%r2), 0(%r2)
 ; CHECK: br %r14
   call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 15, i32 1, i1 false)
   ret void
@@ -308,8 +299,7 @@ define void @f31(i8 *%dest) {
 ; 15 bytes, i64 version.
 define void @f32(i8 *%dest) {
 ; CHECK-LABEL: f32:
-; CHECK: mvi 0(%r2), 0
-; CHECK: mvc 1(14,%r2), 0(%r2)
+; CHECK: xc 0(15,%r2), 0(%r2)
 ; CHECK: br %r14
   call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 15, i32 1, i1 false)
   ret void
@@ -338,8 +328,7 @@ define void @f34(i8 *%dest) {
 ; 17 bytes, i32 version.
 define void @f35(i8 *%dest) {
 ; CHECK-LABEL: f35:
-; CHECK: mvi 0(%r2), 0
-; CHECK: mvc 1(16,%r2), 0(%r2)
+; CHECK: xc 0(17,%r2), 0(%r2)
 ; CHECK: br %r14
   call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 17, i32 1, i1 false)
   ret void
@@ -348,49 +337,46 @@ define void @f35(i8 *%dest) {
 ; 17 bytes, i64 version.
 define void @f36(i8 *%dest) {
 ; CHECK-LABEL: f36:
-; CHECK: mvi 0(%r2), 0
-; CHECK: mvc 1(16,%r2), 0(%r2)
+; CHECK: xc 0(17,%r2), 0(%r2)
 ; CHECK: br %r14
   call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 17, i32 1, i1 false)
   ret void
 }
 
-; 257 bytes, i32 version.
+; 256 bytes, i32 version.
 define void @f37(i8 *%dest) {
 ; CHECK-LABEL: f37:
-; CHECK: mvi 0(%r2), 0
-; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: xc 0(256,%r2), 0(%r2)
 ; CHECK: br %r14
-  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 257, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 256, i32 1, i1 false)
   ret void
 }
 
-; 257 bytes, i64 version.
+; 256 bytes, i64 version.
 define void @f38(i8 *%dest) {
 ; CHECK-LABEL: f38:
-; CHECK: mvi 0(%r2), 0
-; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: xc 0(256,%r2), 0(%r2)
 ; CHECK: br %r14
-  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 257, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 256, i32 1, i1 false)
   ret void
 }
 
-; 258 bytes, i32 version.  258 bytes is too big for a single MVC.
-; For now expect none, so that the test fails and gets updated when
-; large copies are implemented.
+; 257 bytes, i32 version.  We need two MVCs.
 define void @f39(i8 *%dest) {
 ; CHECK-LABEL: f39:
-; CHECK-NOT: mvc
+; CHECK: xc 0(256,%r2), 0(%r2)
+; CHECK: xc 256(1,%r2), 256(%r2)
 ; CHECK: br %r14
-  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 258, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 257, i32 1, i1 false)
   ret void
 }
 
-; 258 bytes, i64 version, with the same comments as above.
+; 257 bytes, i64 version.
 define void @f40(i8 *%dest) {
 ; CHECK-LABEL: f40:
-; CHECK-NOT: mvc
+; CHECK: xc 0(256,%r2), 0(%r2)
+; CHECK: xc 256(1,%r2), 256(%r2)
 ; CHECK: br %r14
-  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 258, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 257, i32 1, i1 false)
   ret void
 }
diff --git a/test/CodeGen/SystemZ/memset-04.ll b/test/CodeGen/SystemZ/memset-04.ll
index 9288692..7906e8d 100644
--- a/test/CodeGen/SystemZ/memset-04.ll
+++ b/test/CodeGen/SystemZ/memset-04.ll
@@ -375,21 +375,23 @@ define void @f38(i8 *%dest) {
   ret void
 }
 
-; 258 bytes, i32 version.  258 bytes is too big for a single MVC.
-; For now expect none, so that the test fails and gets updated when
-; large copies are implemented.
+; 258 bytes, i32 version.  We need two MVCs.
 define void @f39(i8 *%dest) {
 ; CHECK-LABEL: f39:
-; CHECK-NOT: mvc
+; CHECK: mvi 0(%r2), 255
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: mvc 257(1,%r2), 256(%r2)
 ; CHECK: br %r14
   call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 258, i32 1, i1 false)
   ret void
 }
 
-; 258 bytes, i64 version, with the same comments as above.
+; 258 bytes, i64 version.
 define void @f40(i8 *%dest) {
 ; CHECK-LABEL: f40:
-; CHECK-NOT: mvc
+; CHECK: mvi 0(%r2), 255
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: mvc 257(1,%r2), 256(%r2)
 ; CHECK: br %r14
   call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 258, i32 1, i1 false)
   ret void
diff --git a/test/CodeGen/SystemZ/or-08.ll b/test/CodeGen/SystemZ/or-08.ll
new file mode 100644
index 0000000..8f5bf31
--- /dev/null
+++ b/test/CodeGen/SystemZ/or-08.ll
@@ -0,0 +1,57 @@
+; Test memory-to-memory ORs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test the simple i8 case.
+define void @f1(i8 *%ptr1) {
+; CHECK-LABEL: f1:
+; CHECK: oc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i8 *%ptr1, i64 1
+  %val = load i8 *%ptr1
+  %old = load i8 *%ptr2
+  %or = or i8 %val, %old
+  store i8 %or, i8 *%ptr2
+  ret void
+}
+
+; Test the simple i16 case.
+define void @f2(i16 *%ptr1) {
+; CHECK-LABEL: f2:
+; CHECK: oc 2(2,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i16 *%ptr1, i64 1
+  %val = load i16 *%ptr1
+  %old = load i16 *%ptr2
+  %or = or i16 %val, %old
+  store i16 %or, i16 *%ptr2
+  ret void
+}
+
+; Test the simple i32 case.
+define void @f3(i32 *%ptr1) {
+; CHECK-LABEL: f3:
+; CHECK: oc 4(4,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i32 *%ptr1, i64 1
+  %val = load i32 *%ptr1
+  %old = load i32 *%ptr2
+  %or = or i32 %old, %val
+  store i32 %or, i32 *%ptr2
+  ret void
+}
+
+; Test the i64 case.
+define void @f4(i64 *%ptr1) {
+; CHECK-LABEL: f4:
+; CHECK: oc 8(8,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i64 *%ptr1, i64 1
+  %val = load i64 *%ptr1
+  %old = load i64 *%ptr2
+  %or = or i64 %old, %val
+  store i64 %or, i64 *%ptr2
+  ret void
+}
+
+; Leave other more complicated tests to and-08.ll.
diff --git a/test/CodeGen/SystemZ/prefetch-01.ll b/test/CodeGen/SystemZ/prefetch-01.ll
new file mode 100644
index 0000000..bb7fea9
--- /dev/null
+++ b/test/CodeGen/SystemZ/prefetch-01.ll
@@ -0,0 +1,87 @@
+; Test data prefetching.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @llvm.prefetch(i8*, i32, i32, i32)
+
+@g = global [4096 x i8] zeroinitializer
+
+; Check that instruction read prefetches are ignored.
+define void @f1(i8 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK-NOT: %r2
+; CHECK: br %r14
+  call void @llvm.prefetch(i8 *%ptr, i32 0, i32 0, i32 0)
+  ret void
+}
+
+; Check that instruction write prefetches are ignored.
+define void @f2(i8 *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK-NOT: %r2
+; CHECK: br %r14
+  call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 0)
+  ret void
+}
+
+; Check data read prefetches.
+define void @f3(i8 *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: pfd 1, 0(%r2)
+; CHECK: br %r14
+  call void @llvm.prefetch(i8 *%ptr, i32 0, i32 0, i32 1)
+  ret void
+}
+
+; Check data write prefetches.
+define void @f4(i8 *%ptr) {
+; CHECK-LABEL: f4:
+; CHECK: pfd 2, 0(%r2)
+; CHECK: br %r14
+  call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
+  ret void
+}
+
+; Check an address at the negative end of the range.
+define void @f5(i8 *%base, i64 %index) {
+; CHECK-LABEL: f5:
+; CHECK: pfd 2, -524288({{%r2,%r3|%r3,%r2}})
+; CHECK: br %r14
+  %add = add i64 %index, -524288
+  %ptr = getelementptr i8 *%base, i64 %add
+  call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
+  ret void
+}
+
+; Check an address at the positive end of the range.
+define void @f6(i8 *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: pfd 2, 524287({{%r2,%r3|%r3,%r2}})
+; CHECK: br %r14
+  %add = add i64 %index, 524287
+  %ptr = getelementptr i8 *%base, i64 %add
+  call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
+  ret void
+}
+
+; Check that the next address up still compiles.
+define void @f7(i8 *%base, i64 %index) {
+; CHECK-LABEL: f7:
+; CHECK: 524288
+; CHECK: pfd 2,
+; CHECK: br %r14
+  %add = add i64 %index, 524288
+  %ptr = getelementptr i8 *%base, i64 %add
+  call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
+  ret void
+}
+
+; Check pc-relative prefetches.
+define void @f8() {
+; CHECK-LABEL: f8:
+; CHECK: pfdrl 2, g
+; CHECK: br %r14
+  %ptr = getelementptr [4096 x i8] *@g, i64 0, i64 0
+  call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/risbg-01.ll b/test/CodeGen/SystemZ/risbg-01.ll
index 85de6dc..a4d11fd 100644
--- a/test/CodeGen/SystemZ/risbg-01.ll
+++ b/test/CodeGen/SystemZ/risbg-01.ll
@@ -1,6 +1,7 @@
 ; Test sequences that can use RISBG with a zeroed first operand.
+; The tests here assume that RISBLG isn't available.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Test an extraction of bit 0 from a right-shifted value.
 define i32 @f1(i32 %foo) {
@@ -455,3 +456,17 @@ define i64 @f40(i64 %foo, i64 *%dest) {
   %and = and i64 %shl, 2147483647
   ret i64 %and
 }
+
+; In this case the sign extension is converted to a pair of 32-bit shifts,
+; which is then extended to 64 bits.  We previously used the wrong bit size
+; when testing whether the shifted-in bits of the shift right were significant.
+define i64 @f41(i1 %x) {
+; CHECK-LABEL: f41:
+; CHECK: sll %r2, 31
+; CHECK: sra %r2, 31
+; CHECK: llgcr %r2, %r2
+; CHECK: br %r14
+  %ext = sext i1 %x to i8
+  %ext2 = zext i8 %ext to i64
+  ret i64 %ext2
+}
diff --git a/test/CodeGen/SystemZ/setcc-01.ll b/test/CodeGen/SystemZ/setcc-01.ll
new file mode 100644
index 0000000..4626760
--- /dev/null
+++ b/test/CodeGen/SystemZ/setcc-01.ll
@@ -0,0 +1,74 @@
+; Test SETCC for every integer condition.  The tests here assume that
+; RISBLG isn't available.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+
+; Test CC in { 0 }, with 3 don't care.
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK-LABEL: f1:
+; CHECK: ipm %r2
+; CHECK-NEXT: afi %r2, -268435456
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = icmp eq i32 %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 1 }, with 3 don't care.
+define i32 @f2(i32 %a, i32 %b) {
+; CHECK-LABEL: f2:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 36
+; CHECK: br %r14
+  %cond = icmp slt i32 %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 0, 1 }, with 3 don't care.
+define i32 @f3(i32 %a, i32 %b) {
+; CHECK-LABEL: f3:
+; CHECK: ipm %r2
+; CHECK-NEXT: afi %r2, -536870912
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = icmp sle i32 %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 2 }, with 3 don't care.
+define i32 @f4(i32 %a, i32 %b) {
+; CHECK-LABEL: f4:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 35
+; CHECK: br %r14
+  %cond = icmp sgt i32 %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 0, 2 }, with 3 don't care.
+define i32 @f5(i32 %a, i32 %b) {
+; CHECK-LABEL: f5:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK-NEXT: xilf [[REG]], 4294967295
+; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 36
+; CHECK: br %r14
+  %cond = icmp sge i32 %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 1, 2 }, with 3 don't care.
+define i32 @f6(i32 %a, i32 %b) {
+; CHECK-LABEL: f6:
+; CHECK: ipm %r2
+; CHECK-NEXT: afi %r2, 1879048192
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = icmp ne i32 %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/setcc-02.ll b/test/CodeGen/SystemZ/setcc-02.ll
new file mode 100644
index 0000000..6a7be47
--- /dev/null
+++ b/test/CodeGen/SystemZ/setcc-02.ll
@@ -0,0 +1,174 @@
+; Test SETCC for every floating-point condition.  The tests here assume that
+; RISBLG isn't available.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+
+; Test CC in { 0 }
+define i32 @f1(float %a, float %b) {
+; CHECK-LABEL: f1:
+; CHECK: ipm %r2
+; CHECK-NEXT: afi %r2, -268435456
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = fcmp oeq float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 1 }
+define i32 @f2(float %a, float %b) {
+; CHECK-LABEL: f2:
+; CHECK: ipm %r2
+; CHECK-NEXT: xilf %r2, 268435456
+; CHECK-NEXT: afi %r2, -268435456
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = fcmp olt float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 0, 1 }
+define i32 @f3(float %a, float %b) {
+; CHECK-LABEL: f3:
+; CHECK: ipm %r2
+; CHECK-NEXT: afi %r2, -536870912
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = fcmp ole float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 2 }
+define i32 @f4(float %a, float %b) {
+; CHECK-LABEL: f4:
+; CHECK: ipm %r2
+; CHECK-NEXT: xilf %r2, 268435456
+; CHECK-NEXT: afi %r2, 1342177280
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = fcmp ogt float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 0, 2 }
+define i32 @f5(float %a, float %b) {
+; CHECK-LABEL: f5:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK-NEXT: xilf [[REG]], 4294967295
+; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 36
+; CHECK: br %r14
+  %cond = fcmp oge float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 1, 2 }
+define i32 @f6(float %a, float %b) {
+; CHECK-LABEL: f6:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK-NEXT: afi [[REG]], 268435456
+; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 35
+; CHECK: br %r14
+  %cond = fcmp one float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 0, 1, 2 }
+define i32 @f7(float %a, float %b) {
+; CHECK-LABEL: f7:
+; CHECK: ipm %r2
+; CHECK-NEXT: afi %r2, -805306368
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = fcmp ord float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 3 }
+define i32 @f8(float %a, float %b) {
+; CHECK-LABEL: f8:
+; CHECK: ipm %r2
+; CHECK-NEXT: afi %r2, 1342177280
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = fcmp uno float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 0, 3 }
+define i32 @f9(float %a, float %b) {
+; CHECK-LABEL: f9:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK-NEXT: afi [[REG]], -268435456
+; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 35
+; CHECK: br %r14
+  %cond = fcmp ueq float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 1, 3 }
+define i32 @f10(float %a, float %b) {
+; CHECK-LABEL: f10:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 36
+; CHECK: br %r14
+  %cond = fcmp ult float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 0, 1, 3 }
+define i32 @f11(float %a, float %b) {
+; CHECK-LABEL: f11:
+; CHECK: ipm %r2
+; CHECK-NEXT: xilf %r2, 268435456
+; CHECK-NEXT: afi %r2, -805306368
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = fcmp ule float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 2, 3 }
+define i32 @f12(float %a, float %b) {
+; CHECK-LABEL: f12:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 35
+; CHECK: br %r14
+  %cond = fcmp ugt float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 0, 2, 3 }
+define i32 @f13(float %a, float %b) {
+; CHECK-LABEL: f13:
+; CHECK: ipm %r2
+; CHECK-NEXT: xilf %r2, 268435456
+; CHECK-NEXT: afi %r2, 1879048192
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = fcmp uge float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 1, 2, 3 }
+define i32 @f14(float %a, float %b) {
+; CHECK-LABEL: f14:
+; CHECK: ipm %r2
+; CHECK-NEXT: afi %r2, 1879048192
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = fcmp une float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/shift-10.ll b/test/CodeGen/SystemZ/shift-10.ll
new file mode 100644
index 0000000..46ed218
--- /dev/null
+++ b/test/CodeGen/SystemZ/shift-10.ll
@@ -0,0 +1,78 @@
+; Test compound shifts.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test a shift right followed by a sign extension.  This can use two shifts.
+define i64 @f1(i32 %a) {
+; CHECK-LABEL: f1:
+; CHECK: sllg [[REG:%r[0-5]]], %r2, 62
+; CHECK: srag %r2, [[REG]], 63
+; CHECK: br %r14
+  %shr = lshr i32 %a, 1
+  %trunc = trunc i32 %shr to i1
+  %ext = sext i1 %trunc to i64
+  ret i64 %ext
+}
+
+; ...and again with the highest shift count.
+define i64 @f2(i32 %a) {
+; CHECK-LABEL: f2:
+; CHECK: sllg [[REG:%r[0-5]]], %r2, 32
+; CHECK: srag %r2, [[REG]], 63
+; CHECK: br %r14
+  %shr = lshr i32 %a, 31
+  %trunc = trunc i32 %shr to i1
+  %ext = sext i1 %trunc to i64
+  ret i64 %ext
+}
+
+; Test a left shift that of an extended right shift in a case where folding
+; is possible.
+define i64 @f3(i32 %a) {
+; CHECK-LABEL: f3:
+; CHECK: risbg %r2, %r2, 27, 181, 9
+; CHECK: br %r14
+  %shr = lshr i32 %a, 1
+  %ext = zext i32 %shr to i64
+  %shl = shl i64 %ext, 10
+  %and = and i64 %shl, 137438952960
+  ret i64 %and
+}
+
+; ...and again with a larger right shift.
+define i64 @f4(i32 %a) {
+; CHECK-LABEL: f4:
+; CHECK: risbg %r2, %r2, 30, 158, 3
+; CHECK: br %r14
+  %shr = lshr i32 %a, 30
+  %ext = sext i32 %shr to i64
+  %shl = shl i64 %ext, 33
+  %and = and i64 %shl, 8589934592
+  ret i64 %and
+}
+
+; Repeat the previous test in a case where all bits outside the
+; bottom 3 matter.
+define i64 @f5(i32 %a) {
+; CHECK-LABEL: f5:
+; CHECK: risbg %r2, %r2, 29, 158, 3
+; CHECK: lhi %r2, 7
+; CHECK: br %r14
+  %shr = lshr i32 %a, 30
+  %ext = sext i32 %shr to i64
+  %shl = shl i64 %ext, 33
+  %or = or i64 %shl, 7
+  ret i64 %or
+}
+
+; Test that SRA gets replaced with SRL if the sign bit is the only one
+; that matters.
+define i64 @f6(i64 %a) {
+; CHECK-LABEL: f6:
+; CHECK: risbg %r2, %r2, 55, 183, 19
+; CHECK: br %r14
+  %shl = shl i64 %a, 10
+  %shr = ashr i64 %shl, 60
+  %and = and i64 %shr, 256
+  ret i64 %and
+}
diff --git a/test/CodeGen/SystemZ/spill-01.ll b/test/CodeGen/SystemZ/spill-01.ll
index 9de89d6..ca64a88 100644
--- a/test/CodeGen/SystemZ/spill-01.ll
+++ b/test/CodeGen/SystemZ/spill-01.ll
@@ -1,6 +1,7 @@
-; Test spilling using MVC.
+; Test spilling using MVC.  The tests here assume z10 register pressure,
+; without the high words being available.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 declare void @foo()
 
diff --git a/test/CodeGen/SystemZ/strcmp-01.ll b/test/CodeGen/SystemZ/strcmp-01.ll
new file mode 100644
index 0000000..122c160
--- /dev/null
+++ b/test/CodeGen/SystemZ/strcmp-01.ll
@@ -0,0 +1,70 @@
+; Test strcmp using CLST, i32 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare signext i32 @strcmp(i8 *%src1, i8 *%src2)
+
+; Check a case where the result is used as an integer.
+define i32 @f1(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f1:
+; CHECK: lhi %r0, 0
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: clst %r2, %r3
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NEXT: BB#{{[0-9]+}}
+; CHECK-NEXT: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: rll %r2, [[REG]], 31
+; CHECK: br %r14
+  %res = call i32 @strcmp(i8 *%src1, i8 *%src2)
+  ret i32 %res
+}
+
+; Check a case where the result is tested for equality.
+define void @f2(i8 *%src1, i8 *%src2, i32 *%dest) {
+; CHECK-LABEL: f2:
+; CHECK: lhi %r0, 0
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: clst %r2, %r3
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NEXT: BB#{{[0-9]+}}
+; CHECK-NEXT: je {{\.L.*}}
+; CHECK: br %r14
+  %res = call i32 @strcmp(i8 *%src1, i8 *%src2)
+  %cmp = icmp eq i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 0, i32 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Test a case where the result is used both as an integer and for
+; branching.
+define i32 @f3(i8 *%src1, i8 *%src2, i32 *%dest) {
+; CHECK-LABEL: f3:
+; CHECK: lhi %r0, 0
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: clst %r2, %r3
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NEXT: BB#{{[0-9]+}}
+; CHECK-NEXT: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: rll %r2, [[REG]], 31
+; CHECK: jl {{\.L*}}
+; CHECK: br %r14
+entry:
+  %res = call i32 @strcmp(i8 *%src1, i8 *%src2)
+  %cmp = icmp slt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 0, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/strcmp-02.ll b/test/CodeGen/SystemZ/strcmp-02.ll
new file mode 100644
index 0000000..27bd00b
--- /dev/null
+++ b/test/CodeGen/SystemZ/strcmp-02.ll
@@ -0,0 +1,72 @@
+; Test strcmp using CLST, i64 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i64 @strcmp(i8 *%src1, i8 *%src2)
+
+; Check a case where the result is used as an integer.
+define i64 @f1(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f1:
+; CHECK: lhi %r0, 0
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: clst %r2, %r3
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NEXT: BB#{{[0-9]+}}
+; CHECK-NEXT: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: rll [[REG]], [[REG]], 31
+; CHECK: lgfr %r2, [[REG]]
+; CHECK: br %r14
+  %res = call i64 @strcmp(i8 *%src1, i8 *%src2)
+  ret i64 %res
+}
+
+; Check a case where the result is tested for equality.
+define void @f2(i8 *%src1, i8 *%src2, i64 *%dest) {
+; CHECK-LABEL: f2:
+; CHECK: lhi %r0, 0
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: clst %r2, %r3
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NEXT: BB#{{[0-9]+}}
+; CHECK-NEXT: je {{\.L.*}}
+; CHECK: br %r14
+  %res = call i64 @strcmp(i8 *%src1, i8 *%src2)
+  %cmp = icmp eq i64 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 0, i64 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Test a case where the result is used both as an integer and for
+; branching.
+define i64 @f3(i8 *%src1, i8 *%src2, i64 *%dest) {
+; CHECK-LABEL: f3:
+; CHECK: lhi %r0, 0
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: clst %r2, %r3
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NEXT: BB#{{[0-9]+}}
+; CHECK-NEXT: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: rll [[REG]], [[REG]], 31
+; CHECK: lgfr %r2, [[REG]]
+; CHECK: jl {{\.L*}}
+; CHECK: br %r14
+entry:
+  %res = call i64 @strcmp(i8 *%src1, i8 *%src2)
+  %cmp = icmp slt i64 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 0, i64 *%dest
+  br label %exit
+
+exit:
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/strcpy-01.ll b/test/CodeGen/SystemZ/strcpy-01.ll
new file mode 100644
index 0000000..29bab62
--- /dev/null
+++ b/test/CodeGen/SystemZ/strcpy-01.ll
@@ -0,0 +1,50 @@
+; Test strcpy using MVST.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i8 *@strcpy(i8 *%dest, i8 *%src)
+declare i8 *@stpcpy(i8 *%dest, i8 *%src)
+
+; Check strcpy.
+define i8 *@f1(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lhi %r0, 0
+; CHECK-DAG: lgr [[REG:%r[145]]], %r2
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK-NEXT: mvst [[REG]], %r3
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NOT: %r2
+; CHECK: br %r14
+  %res = call i8 *@strcpy(i8 *%dest, i8 *%src)
+  ret i8 *%res
+}
+
+; Check stpcpy.
+define i8 *@f2(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f2:
+; CHECK: lhi %r0, 0
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK-NEXT: mvst %r2, %r3
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NOT: %r2
+; CHECK: br %r14
+  %res = call i8 *@stpcpy(i8 *%dest, i8 *%src)
+  ret i8 *%res
+}
+
+; Check correct operation with other loads and stores.  The load must
+; come before the loop and the store afterwards.
+define i32 @f3(i32 %dummy, i8 *%dest, i8 *%src, i32 *%resptr, i32 *%storeptr) {
+; CHECK-LABEL: f3:
+; CHECK-DAG: lhi %r0, 0
+; CHECK-DAG: l %r2, 0(%r5)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK-NEXT: mvst %r3, %r4
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK: mvhi 0(%r6), 0
+; CHECK: br %r14
+  %res = load i32 *%resptr
+  %unused = call i8 *@strcpy(i8 *%dest, i8 *%src)
+  store i32 0, i32 *%storeptr
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/strlen-01.ll b/test/CodeGen/SystemZ/strlen-01.ll
new file mode 100644
index 0000000..16161d4
--- /dev/null
+++ b/test/CodeGen/SystemZ/strlen-01.ll
@@ -0,0 +1,39 @@
+; Test strlen using SRST, i64 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i64 @strlen(i8 *%src)
+declare i64 @strnlen(i8 *%src, i64 %len)
+
+; Test strlen with its proper i64 prototype.  It would also be valid for
+; the uses of %r3 and REG after the LGR to be swapped.
+define i64 @f1(i32 %dummy, i8 *%src) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lhi %r0, 0
+; CHECK-DAG: lghi %r2, 0
+; CHECK-DAG: lgr [[REG:%r[145]]], %r3
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK-NEXT: srst %r2, [[REG]]
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NEXT: BB#{{[0-9]+}}
+; CHECK-NEXT: sgr %r2, %r3
+; CHECK: br %r14
+  %res = call i64 @strlen(i8 *%src)
+  ret i64 %res
+}
+
+; Test strnlen with its proper i64 prototype.
+define i64 @f2(i64 %len, i8 *%src) {
+; CHECK-LABEL: f2:
+; CHECK-DAG: agr %r2, %r3
+; CHECK-DAG: lhi %r0, 0
+; CHECK-DAG: lgr [[REG:%r[145]]], %r3
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK-NEXT: srst %r2, [[REG]]
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NEXT: BB#{{[0-9]+}}
+; CHECK-NEXT: sgr %r2, %r3
+; CHECK: br %r14
+  %res = call i64 @strnlen(i8 *%src, i64 %len)
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/strlen-02.ll b/test/CodeGen/SystemZ/strlen-02.ll
new file mode 100644
index 0000000..e1abbff
--- /dev/null
+++ b/test/CodeGen/SystemZ/strlen-02.ll
@@ -0,0 +1,39 @@
+; Test strlen using SRST, i32 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i32 @strlen(i8 *%src)
+declare i32 @strnlen(i8 *%src, i32 %len)
+
+; Test strlen with an i32-based prototype.  It would also be valid for
+; the uses of %r3 and REG after the LGR to be swapped.
+define i32 @f1(i32 %dummy, i8 *%src) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lhi %r0, 0
+; CHECK-DAG: lghi %r2, 0
+; CHECK-DAG: lgr [[REG:%r[145]]], %r3
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK-NEXT: srst %r2, [[REG]]
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NEXT: BB#{{[0-9]+}}
+; CHECK-NEXT: sgr %r2, %r3
+; CHECK: br %r14
+  %res = call i32 @strlen(i8 *%src)
+  ret i32 %res
+}
+
+; Test strnlen with an i32-based prototype.
+define i32 @f2(i32 zeroext %len, i8 *%src) {
+; CHECK-LABEL: f2:
+; CHECK-DAG: agr %r2, %r3
+; CHECK-DAG: lhi %r0, 0
+; CHECK-DAG: lgr [[REG:%r[145]]], %r3
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK-NEXT: srst %r2, [[REG]]
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NEXT: BB#{{[0-9]+}}
+; CHECK-NEXT: sgr %r2, %r3
+; CHECK: br %r14
+  %res = call i32 @strnlen(i8 *%src, i32 %len)
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/unaligned-01.ll b/test/CodeGen/SystemZ/unaligned-01.ll
index 621069d..526a068 100644
--- a/test/CodeGen/SystemZ/unaligned-01.ll
+++ b/test/CodeGen/SystemZ/unaligned-01.ll
@@ -1,7 +1,10 @@
 ; Check that unaligned accesses are allowed in general.  We check the
 ; few exceptions (like CRL) in their respective test files.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; FIXME: -combiner-alias-analysis (the default for SystemZ) stops
+;        f1 from being optimized.
+; RUN: llc < %s -mtriple=s390x-linux-gnu -combiner-alias-analysis=false \
+; RUN:   | FileCheck %s
 
 ; Check that these four byte stores become a single word store.
 define void @f1(i8 *%ptr) {
diff --git a/test/CodeGen/SystemZ/xor-08.ll b/test/CodeGen/SystemZ/xor-08.ll
new file mode 100644
index 0000000..8cba41e
--- /dev/null
+++ b/test/CodeGen/SystemZ/xor-08.ll
@@ -0,0 +1,57 @@
+; Test memory-to-memory XORs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test the simple i8 case.
+define void @f1(i8 *%ptr1) {
+; CHECK-LABEL: f1:
+; CHECK: xc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i8 *%ptr1, i64 1
+  %val = load i8 *%ptr1
+  %old = load i8 *%ptr2
+  %xor = xor i8 %val, %old
+  store i8 %xor, i8 *%ptr2
+  ret void
+}
+
+; Test the simple i16 case.
+define void @f2(i16 *%ptr1) {
+; CHECK-LABEL: f2:
+; CHECK: xc 2(2,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i16 *%ptr1, i64 1
+  %val = load i16 *%ptr1
+  %old = load i16 *%ptr2
+  %xor = xor i16 %val, %old
+  store i16 %xor, i16 *%ptr2
+  ret void
+}
+
+; Test the simple i32 case.
+define void @f3(i32 *%ptr1) {
+; CHECK-LABEL: f3:
+; CHECK: xc 4(4,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i32 *%ptr1, i64 1
+  %val = load i32 *%ptr1
+  %old = load i32 *%ptr2
+  %xor = xor i32 %old, %val
+  store i32 %xor, i32 *%ptr2
+  ret void
+}
+
+; Test the i64 case.
+define void @f4(i64 *%ptr1) {
+; CHECK-LABEL: f4:
+; CHECK: xc 8(8,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i64 *%ptr1, i64 1
+  %val = load i64 *%ptr1
+  %old = load i64 *%ptr2
+  %xor = xor i64 %old, %val
+  store i64 %xor, i64 *%ptr2
+  ret void
+}
+
+; Leave other more complicated tests to and-08.ll.
diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
index f5b3739..b87bf24 100644
--- a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
+++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
@@ -47,25 +47,26 @@ declare double @sqrt(double) nounwind readonly
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!5}
+!llvm.module.flags = !{!104}
 !0 = metadata !{i32 46, i32 0, metadata !1, null}
 !1 = metadata !{i32 524299, metadata !101, metadata !2, i32 44, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !2 = metadata !{i32 524299, metadata !101, metadata !3, i32 44, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !3 = metadata !{i32 524334, metadata !101, null, metadata !"getClosestDiagonal3", metadata !"getClosestDiagonal3", metadata !"_Z19getClosestDiagonal3ii", i32 44, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !4 = metadata !{i32 524329, metadata !101} ; [ DW_TAG_file_type ]
 !5 = metadata !{i32 524305, metadata !101, i32 4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", i1 true, metadata !"", i32 0, metadata !102, metadata !102, metadata !103, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!6 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{metadata !8, metadata !22, metadata !22}
-!8 = metadata !{i32 524307, metadata !99, null, metadata !"ggVector3", i32 66, i64 192, i64 32, i64 0, i32 0, null, metadata !10, i32 0, null} ; [ DW_TAG_structure_type ]
+!8 = metadata !{i32 524307, metadata !99, null, metadata !"ggVector3", i32 66, i64 192, i64 32, i64 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [ggVector3] [line 66, size 192, align 32, offset 0] [def] [from ]
 !9 = metadata !{i32 524329, metadata !"ggVector3.h", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !5} ; [ DW_TAG_file_type ]
 !99 = metadata !{metadata !"ggVector3.h", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src"}
 !10 = metadata !{metadata !11, metadata !16, metadata !23, metadata !26, metadata !29, metadata !30, metadata !35, metadata !36, metadata !37, metadata !41, metadata !42, metadata !43, metadata !46, metadata !47, metadata !48, metadata !52, metadata !53, metadata !54, metadata !57, metadata !60, metadata !63, metadata !66, metadata !70, metadata !71, metadata !74, metadata !75, metadata !76, metadata !77, metadata !78, metadata !81, metadata !82, metadata !83, metadata !84, metadata !85, metadata !88, metadata !89, metadata !90}
 !11 = metadata !{i32 524301, metadata !99, metadata !8, metadata !"e", i32 160, i64 192, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ]
-!12 = metadata !{i32 524289, metadata !101, metadata !4, metadata !"", i32 0, i64 192, i64 32, i64 0, i32 0, metadata !13, metadata !14, i32 0, null} ; [ DW_TAG_array_type ]
+!12 = metadata !{i32 524289, metadata !101, metadata !4, metadata !"", i32 0, i64 192, i64 32, i64 0, i32 0, metadata !13, metadata !14, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 192, align 32, offset 0] [from double]
 !13 = metadata !{i32 524324, metadata !101, metadata !4, metadata !"double", i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !14 = metadata !{metadata !15}
 !15 = metadata !{i32 524321, i64 0, i64 3}        ; [ DW_TAG_subrange_type ]
 !16 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", i32 72, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!17 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !18 = metadata !{null, metadata !19, metadata !20}
 !19 = metadata !{i32 524303, metadata !101, metadata !4, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ]
 !20 = metadata !{i32 524310, metadata !100, null, metadata !"ggBoolean", i32 478, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_typedef ]
@@ -73,69 +74,69 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !100 = metadata !{metadata !"math.h", metadata !"/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS4.2.Internal.sdk/usr/include/architecture/arm"}
 !22 = metadata !{i32 524324, metadata !101, metadata !4, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !23 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", i32 73, metadata !24, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!24 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!24 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !25 = metadata !{null, metadata !19}
 !26 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", i32 74, metadata !27, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!27 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !28, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!27 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !28, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !28 = metadata !{null, metadata !19, metadata !13, metadata !13, metadata !13}
 !29 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"Set", metadata !"Set", metadata !"_ZN9ggVector33SetEddd", i32 81, metadata !27, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !30 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"x", metadata !"x", metadata !"_ZNK9ggVector31xEv", i32 82, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!31 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !32, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!31 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !32, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !32 = metadata !{metadata !13, metadata !33}
 !33 = metadata !{i32 524303, metadata !101, metadata !4, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 64, metadata !34} ; [ DW_TAG_pointer_type ]
 !34 = metadata !{i32 524326, metadata !101, metadata !4, metadata !"", i32 0, i64 192, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ]
 !35 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"y", metadata !"y", metadata !"_ZNK9ggVector31yEv", i32 83, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !36 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"z", metadata !"z", metadata !"_ZNK9ggVector31zEv", i32 84, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !37 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"x", metadata !"x", metadata !"_ZN9ggVector31xEv", i32 85, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!38 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !39, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!38 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !39, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !39 = metadata !{metadata !40, metadata !19}
 !40 = metadata !{i32 524304, metadata !101, metadata !4, metadata !"double", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !13} ; [ DW_TAG_reference_type ]
 !41 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"y", metadata !"y", metadata !"_ZN9ggVector31yEv", i32 86, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !42 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"z", metadata !"z", metadata !"_ZN9ggVector31zEv", i32 87, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !43 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"SetX", metadata !"SetX", metadata !"_ZN9ggVector34SetXEd", i32 88, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!44 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !45, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!44 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !45, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !45 = metadata !{null, metadata !19, metadata !13}
 !46 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"SetY", metadata !"SetY", metadata !"_ZN9ggVector34SetYEd", i32 89, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !47 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"SetZ", metadata !"SetZ", metadata !"_ZN9ggVector34SetZEd", i32 90, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !48 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", i32 92, metadata !49, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!49 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !50, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!49 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !50, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !50 = metadata !{null, metadata !19, metadata !51}
 !51 = metadata !{i32 524304, metadata !101, metadata !4, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !34} ; [ DW_TAG_reference_type ]
 !52 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"tolerance", metadata !"tolerance", metadata !"_ZNK9ggVector39toleranceEv", i32 100, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !53 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"tolerance", metadata !"tolerance", metadata !"_ZN9ggVector39toleranceEv", i32 101, metadata !38, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !54 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator+", metadata !"operator+", metadata !"_ZNK9ggVector3psEv", i32 107, metadata !55, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!55 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !56, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!55 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !56, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !56 = metadata !{metadata !51, metadata !33}
 !57 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator-", metadata !"operator-", metadata !"_ZNK9ggVector3ngEv", i32 108, metadata !58, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!58 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !59, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!58 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !59, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !59 = metadata !{metadata !8, metadata !33}
 !60 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator[]", metadata !"operator[]", metadata !"_ZNK9ggVector3ixEi", i32 290, metadata !61, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!61 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !62, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!61 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !62, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !62 = metadata !{metadata !13, metadata !33, metadata !22}
 !63 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator[]", metadata !"operator[]", metadata !"_ZN9ggVector3ixEi", i32 278, metadata !64, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!64 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !65, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!64 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !65, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !65 = metadata !{metadata !40, metadata !19, metadata !22}
 !66 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator+=", metadata !"operator+=", metadata !"_ZN9ggVector3pLERKS_", i32 303, metadata !67, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!67 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !68, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!67 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !68, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !68 = metadata !{metadata !69, metadata !19, metadata !51}
 !69 = metadata !{i32 524304, metadata !101, metadata !4, metadata !"ggVector3", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_reference_type ]
 !70 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator-=", metadata !"operator-=", metadata !"_ZN9ggVector3mIERKS_", i32 310, metadata !67, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !71 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator*=", metadata !"operator*=", metadata !"_ZN9ggVector3mLEd", i32 317, metadata !72, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!72 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !73, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!72 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !73, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !73 = metadata !{metadata !69, metadata !19, metadata !13}
 !74 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator/=", metadata !"operator/=", metadata !"_ZN9ggVector3dVEd", i32 324, metadata !72, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !75 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"length", metadata !"length", metadata !"_ZNK9ggVector36lengthEv", i32 121, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !76 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"squaredLength", metadata !"squaredLength", metadata !"_ZNK9ggVector313squaredLengthEv", i32 122, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !77 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"MakeUnitVector", metadata !"MakeUnitVector", metadata !"_ZN9ggVector314MakeUnitVectorEv", i32 217, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !78 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"Perturb", metadata !"Perturb", metadata !"_ZNK9ggVector37PerturbEdd", i32 126, metadata !79, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!79 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !80, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!79 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !80, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !80 = metadata !{metadata !8, metadata !33, metadata !13, metadata !13}
 !81 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"maxComponent", metadata !"maxComponent", metadata !"_ZNK9ggVector312maxComponentEv", i32 128, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !82 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"minComponent", metadata !"minComponent", metadata !"_ZNK9ggVector312minComponentEv", i32 129, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !83 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"maxAbsComponent", metadata !"maxAbsComponent", metadata !"_ZNK9ggVector315maxAbsComponentEv", i32 131, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !84 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"minAbsComponent", metadata !"minAbsComponent", metadata !"_ZNK9ggVector315minAbsComponentEv", i32 132, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !85 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"indexOfMinComponent", metadata !"indexOfMinComponent", metadata !"_ZNK9ggVector319indexOfMinComponentEv", i32 133, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!86 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !87, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!86 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !87, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !87 = metadata !{metadata !22, metadata !33}
 !88 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"indexOfMinAbsComponent", metadata !"indexOfMinAbsComponent", metadata !"_ZNK9ggVector322indexOfMinAbsComponentEv", i32 137, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !89 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"indexOfMaxComponent", metadata !"indexOfMaxComponent", metadata !"_ZNK9ggVector319indexOfMaxComponentEv", i32 146, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
@@ -151,3 +152,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !101 = metadata !{metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src"}
 !102 = metadata !{i32 0}
 !103 = metadata !{metadata !3}
+!104 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/Thumb/PR17309.ll b/test/CodeGen/Thumb/PR17309.ll
new file mode 100644
index 0000000..b7b08e9
--- /dev/null
+++ b/test/CodeGen/Thumb/PR17309.ll
@@ -0,0 +1,57 @@
+; RUN: llc -mtriple thumbv5-none-linux-gnueabi < %s | FileCheck %s
+
+%struct.C = type { [1000 x i8] }
+%struct.S = type { [1000 x i16] }
+%struct.I = type { [1000 x i32] }
+
+;CHECK-LABEL: pass_C:
+;CHECK-NOT: ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;CHECK-NOT: strb    r{{[0-9]+}}, [{{.*}}], #1
+define void @pass_C() #0 {
+entry:
+  %c = alloca %struct.C, align 1
+  %0 = getelementptr inbounds %struct.C* %c, i32 0, i32 0, i32 0
+  call void @llvm.lifetime.start(i64 1000, i8* %0) #1
+  call void @use_C(%struct.C* byval %c) #3
+  call void @llvm.lifetime.end(i64 1000, i8* %0) #1
+  ret void
+}
+
+;CHECK-LABEL: pass_S:
+;CHECK-NOT: ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;CHECK-NOT: strh    r{{[0-9]+}}, [{{.*}}], #2
+define void @pass_S() #0 {
+entry:
+  %s = alloca %struct.S, align 2
+  %0 = bitcast %struct.S* %s to i8*
+  call void @llvm.lifetime.start(i64 2000, i8* %0) #1
+  call void @use_S(%struct.S* byval %s) #3
+  call void @llvm.lifetime.end(i64 2000, i8* %0) #1
+  ret void
+}
+
+;CHECK-LABEL: pass_I:
+;CHECK-NOT: ldr     r{{[0-9]+}}, [{{.*}}], #4
+;CHECK-NOT: str     r{{[0-9]+}}, [{{.*}}], #4
+define void @pass_I() #0 {
+entry:
+  %i = alloca %struct.I, align 4
+  %0 = bitcast %struct.I* %i to i8*
+  call void @llvm.lifetime.start(i64 4000, i8* %0) #1
+  call void @use_I(%struct.I* byval %i) #3
+  call void @llvm.lifetime.end(i64 4000, i8* %0) #1
+  ret void
+}
+
+declare void @use_C(%struct.C* byval) #2
+declare void @use_S(%struct.S* byval) #2
+declare void @use_I(%struct.I* byval) #2
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
+
+attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+attributes #2 = { optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind optsize }
diff --git a/test/CodeGen/Thumb/barrier.ll b/test/CodeGen/Thumb/barrier.ll
index 8fca273..1c27fa0 100644
--- a/test/CodeGen/Thumb/barrier.ll
+++ b/test/CodeGen/Thumb/barrier.ll
@@ -7,7 +7,7 @@ define void @t1() {
 ; V6: blx {{_*}}sync_synchronize
 
 ; V6M-LABEL: t1:
-; V6M: dmb ish
+; V6M: dmb sy
   fence seq_cst
   ret void
 }
diff --git a/test/CodeGen/Thumb/lit.local.cfg b/test/CodeGen/Thumb/lit.local.cfg
index 4d75f58..8a3ba96 100644
--- a/test/CodeGen/Thumb/lit.local.cfg
+++ b/test/CodeGen/Thumb/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.test']
-
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
index 486c064..1b8bdb1 100644
--- a/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
+++ b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
@@ -8,10 +8,10 @@ define void @t() nounwind ssp {
 entry:
 ; CHECK-LABEL: t:
   %size = mul i32 8, 2
-; CHECK:  subs  r0, #16
+; CHECK:  sub.w  r0, sp, #16
 ; CHECK:  mov sp, r0
   %vla_a = alloca i8, i32 %size, align 8
-; CHECK:  subs  r0, #16
+; CHECK:  sub.w  r0, sp, #16
 ; CHECK:  mov sp, r0
   %vla_b = alloca i8, i32 %size, align 8
   unreachable
diff --git a/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll b/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
index 244d0bb..810bfb7 100644
--- a/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
+++ b/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
@@ -40,7 +40,7 @@ entry:
 ; CHECK: pop
 ; CHECK: pop
 ; Do not convert into single stream code. BranchProbability Analysis assumes
-; that branches which goes to "ret" intruction have lower probabilities.
+; that branches which goes to "ret" instruction have lower probabilities.
   switch i32 undef, label %bb7 [
     i32 37, label %bb43
     i32 48, label %bb5
diff --git a/test/CodeGen/Thumb2/lit.local.cfg b/test/CodeGen/Thumb2/lit.local.cfg
index cb77b09..8a3ba96 100644
--- a/test/CodeGen/Thumb2/lit.local.cfg
+++ b/test/CodeGen/Thumb2/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/Thumb2/tail-call-r9.ll b/test/CodeGen/Thumb2/tail-call-r9.ll
new file mode 100644
index 0000000..24c76c9
--- /dev/null
+++ b/test/CodeGen/Thumb2/tail-call-r9.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 | FileCheck %s
+
+@foo = common global void ()* null, align 4
+
+; Make sure in the presence of a tail call, r9 doesn't get used to hold
+; the destination address. It's callee-saved in AAPCS.
+define arm_aapcscc void @test(i32 %a) nounwind {
+; CHECK-LABEL: test:
+; CHECK-NOT bx r9
+  %tmp = load void ()** @foo, align 4
+  tail call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r12}"() nounwind
+  tail call arm_aapcscc void %tmp() nounwind
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
index 85943cf..13a1ca2 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
-
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-default-it | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it |FileCheck %s
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 ; CHECK-LABEL: t1:
 ; CHECK: ittt ne
@@ -74,7 +75,7 @@ entry:
 ; CHECK-LABEL: t3:
 ; CHECK: itt ge
 ; CHECK: movge r0, r1
-; CHECK: blge  _foo
+; CHECK: blge  {{_?}}foo
 	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]
 	br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
 
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
index 788fa06..403cd48 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
@@ -1,4 +1,6 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-default-it | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8-apple-ios -arm-no-restrict-it | FileCheck %s
 
 define void @foo(i32 %X, i32 %Y) {
 entry:
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
index bcf10ef..a71aa3f 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
@@ -1,4 +1,6 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-default-it | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8-apple-darwin -arm-no-restrict-it | FileCheck %s
 
 ; There shouldn't be a unconditional branch at end of bb52.
 ; rdar://7184787
diff --git a/test/CodeGen/Thumb2/thumb2-select.ll b/test/CodeGen/Thumb2/thumb2-select.ll
index 0feaf95..5f5fa19 100644
--- a/test/CodeGen/Thumb2/thumb2-select.ll
+++ b/test/CodeGen/Thumb2/thumb2-select.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -show-mc-encoding | FileCheck %s
 
 define i32 @f1(i32 %a.s) {
 entry:
@@ -66,7 +66,7 @@ define i32 @f7(i32 %a, i32 %b, i32 %c) {
 entry:
 ; CHECK-LABEL: f7:
 ; CHECK: it hi
-; CHECK: lsrhi.w
+; CHECK: lsrhi {{r[0-9]+}}
     %tmp1 = icmp ugt i32 %a, %b
     %tmp2 = udiv i32 %c, 3
     %tmp3 = select i1 %tmp1, i32 %tmp2, i32 3
@@ -77,7 +77,7 @@ define i32 @f8(i32 %a, i32 %b, i32 %c) {
 entry:
 ; CHECK-LABEL: f8:
 ; CHECK: it lo
-; CHECK: lsllo.w
+; CHECK: lsllo {{r[0-9]+}}
     %tmp1 = icmp ult i32 %a, %b
     %tmp2 = mul i32 %c, 4
     %tmp3 = select i1 %tmp1, i32 %tmp2, i32 3
@@ -96,3 +96,20 @@ entry:
     %tmp5 = select i1 %tmp1, i32 %tmp4, i32 3
     ret i32 %tmp5
 }
+
+define i32 @f10(i32 %a, i32 %b) {
+; CHECK-LABEL: f10:
+; CHECK: movwne {{r[0-9]+}}, #1234    @ encoding: [0x40,0xf2,0xd2,0x4{{[0-9a-f]+}}]
+    %tst = icmp ne i32 %a, %b
+    %val = select i1 %tst, i32 1234, i32 12345
+    ret i32 %val
+}
+
+; Make sure we pick the Thumb encoding for movw/movt
+define i32 @f11(i32 %a, i32 %b) {
+; CHECK-LABEL: f11:
+; CHECK: movwne {{r[0-9]+}}, #50033         @ encoding: [0x4c,0xf2,0x71,0x3{{[0-9a-f]+}}]
+    %tst = icmp ne i32 %a, %b
+    %val = select i1 %tst, i32 123454321, i32 543212345
+    ret i32 %val
+}
diff --git a/test/CodeGen/Thumb2/v8_IT_1.ll b/test/CodeGen/Thumb2/v8_IT_1.ll
new file mode 100644
index 0000000..30dbb48
--- /dev/null
+++ b/test/CodeGen/Thumb2/v8_IT_1.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=thumbv8 -mattr=+neon | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7 -mattr=+neon -arm-restrict-it | FileCheck %s
+
+;CHECK-LABEL: select_s_v_v:
+;CHECK-NOT: it
+;CHECK: bx
+define <16 x i8> @select_s_v_v(i32 %avail, i8* %bar) {
+entry:
+  %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %bar, i32 1)
+  %and = and i32 %avail, 1
+  %tobool = icmp eq i32 %and, 0
+  %vld1. = select i1 %tobool, <16 x i8> %vld1, <16 x i8> zeroinitializer
+  ret <16 x i8> %vld1.
+}
+
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* , i32 )
+
diff --git a/test/CodeGen/Thumb2/v8_IT_2.ll b/test/CodeGen/Thumb2/v8_IT_2.ll
new file mode 100644
index 0000000..170b413
--- /dev/null
+++ b/test/CodeGen/Thumb2/v8_IT_2.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7 -arm-restrict-it | FileCheck %s
+
+	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
+
+define fastcc i32 @CountTree(%struct.quad_struct* %tree) {
+entry:
+; CHECK-LABEL: CountTree:
+; CHECK: bne
+; CHECK: cmp
+; CHECK: it eq
+; CHECK: cmpeq
+; CHECK: bne
+; CHECK: mov
+; CHECK: pop
+	br label %tailrecurse
+
+tailrecurse:		; preds = %bb, %entry
+	%tmp6 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp9 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=2]
+	%tmp12 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp14 = icmp eq %struct.quad_struct* null, null		; <i1> [#uses=1]
+	%tmp17 = icmp eq %struct.quad_struct* %tmp6, null		; <i1> [#uses=1]
+	%tmp23 = icmp eq %struct.quad_struct* %tmp9, null		; <i1> [#uses=1]
+	%tmp29 = icmp eq %struct.quad_struct* %tmp12, null		; <i1> [#uses=1]
+	%bothcond = and i1 %tmp17, %tmp14		; <i1> [#uses=1]
+	%bothcond1 = and i1 %bothcond, %tmp23		; <i1> [#uses=1]
+	%bothcond2 = and i1 %bothcond1, %tmp29		; <i1> [#uses=1]
+	br i1 %bothcond2, label %return, label %bb
+
+bb:		; preds = %tailrecurse
+	%tmp41 = tail call fastcc i32 @CountTree( %struct.quad_struct* %tmp9 )		; <i32> [#uses=0]
+	br label %tailrecurse
+
+return:		; preds = %tailrecurse
+	ret i32 0
+}
+
diff --git a/test/CodeGen/Thumb2/v8_IT_3.ll b/test/CodeGen/Thumb2/v8_IT_3.ll
new file mode 100644
index 0000000..4dca246
--- /dev/null
+++ b/test/CodeGen/Thumb2/v8_IT_3.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7 -arm-restrict-it | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8 -relocation-model=pic | FileCheck %s --check-prefix=CHECK-PIC
+; RUN: llc < %s -mtriple=thumbv7 -arm-restrict-it -relocation-model=pic | FileCheck %s --check-prefix=CHECK-PIC
+
+%struct.FF = type { i32 (i32*)*, i32 (i32*, i32*, i32, i32, i32, i32)*, i32 (i32, i32, i8*)*, void ()*, i32 (i32, i8*, i32*)*, i32 ()* }
+%struct.BD = type { %struct.BD*, i32, i32, i32, i32, i64, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i64, i32)*, [16 x i8], i64, i64 }
+
+@FuncPtr = external hidden unnamed_addr global %struct.FF*
+@.str1 = external hidden unnamed_addr constant [6 x i8], align 4
+@G = external unnamed_addr global i32
+@.str2 = external hidden unnamed_addr constant [58 x i8], align 4
+@.str3 = external hidden unnamed_addr constant [58 x i8], align 4
+
+define i32 @test() nounwind optsize ssp {
+entry:
+; CHECK-LABEL: test:
+; CHECK: push
+; CHECK-NOT: push
+  %block_size = alloca i32, align 4
+  %block_count = alloca i32, align 4
+  %index_cache = alloca i32, align 4
+  store i32 0, i32* %index_cache, align 4
+  %tmp = load i32* @G, align 4
+  %tmp1 = call i32 @bar(i32 0, i32 0, i32 %tmp) nounwind
+  switch i32 %tmp1, label %bb8 [
+    i32 0, label %bb
+    i32 536870913, label %bb4
+    i32 536870914, label %bb6
+  ]
+
+bb:
+  %tmp2 = load i32* @G, align 4
+  %tmp4 = icmp eq i32 %tmp2, 0
+  br i1 %tmp4, label %bb1, label %bb8
+
+bb1:
+; CHECK: %bb6
+; CHECK: it	eq
+; CHECK-NEXT: ldreq
+; CHECK-NEXT: it	eq
+; CHECK-NEXT: cmpeq
+; CHECK: %bb1
+  %tmp5 = load i32* %block_size, align 4
+  %tmp6 = load i32* %block_count, align 4
+  %tmp7 = call %struct.FF* @Get() nounwind
+  store %struct.FF* %tmp7, %struct.FF** @FuncPtr, align 4
+  %tmp10 = zext i32 %tmp6 to i64
+  %tmp11 = zext i32 %tmp5 to i64
+  %tmp12 = mul nsw i64 %tmp10, %tmp11
+  %tmp13 = call i32 @foo(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0), i64 %tmp12, i32 %tmp5) nounwind
+  br label %bb8
+
+bb4:
+; CHECK-PIC: cmp
+; CHECK-PIC: cmp
+; CHECK-PIC-NEXT: bne
+; CHECK-PIC-NEXT: %bb4
+; CHECK-PIC-NEXT: movs
+; CHECK-PIC-NEXT: add
+; CHECK-PIC-NEXT: pop
+  ret i32 0
+
+bb6:
+  ret i32 1
+
+bb8:
+  ret i32 -1
+}
+
+declare i32 @printf(i8*, ...)
+
+declare %struct.FF* @Get()
+
+declare i32 @foo(i8*, i64, i32)
+
+declare i32 @bar(i32, i32, i32)
diff --git a/test/CodeGen/Thumb2/v8_IT_4.ll b/test/CodeGen/Thumb2/v8_IT_4.ll
new file mode 100644
index 0000000..5a80d8c
--- /dev/null
+++ b/test/CodeGen/Thumb2/v8_IT_4.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=thumbv8-eabi -float-abi=hard | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-eabi -float-abi=hard -arm-restrict-it | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8-eabi -float-abi=hard -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-eabi -float-abi=hard -regalloc=basic -arm-restrict-it | FileCheck %s
+
+%"struct.__gnu_cxx::__normal_iterator<char*,std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" = type { i8* }
+%"struct.__gnu_cxx::new_allocator<char>" = type <{ i8 }>
+%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >" = type { %"struct.__gnu_cxx::__normal_iterator<char*,std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" }
+%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" }
+%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" = type { i32, i32, i32 }
+
+
+define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) {
+; CHECK-LABEL: _ZNKSs7compareERKSs:
+; CHECK:      cbnz	r0,
+; CHECK-NEXT: %bb
+; CHECK-NEXT: sub{{(.w)?}} r0, r{{[0-9]+}}, r{{[0-9]+}}
+; CHECK-NEXT: %bb1
+; CHECK-NEXT: pop.w
+entry:
+  %0 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3]
+  %1 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i32> [#uses=3]
+  %2 = icmp ult i32 %1, %0                        ; <i1> [#uses=1]
+  %3 = select i1 %2, i32 %1, i32 %0               ; <i32> [#uses=1]
+  %4 = tail call arm_aapcs_vfpcc  i8* @_ZNKSs7_M_dataEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i8*> [#uses=1]
+  %5 = tail call arm_aapcs_vfpcc  i8* @_ZNKSs4dataEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i8*> [#uses=1]
+  %6 = tail call arm_aapcs_vfpcc  i32 @memcmp(i8* %4, i8* %5, i32 %3) nounwind readonly ; <i32> [#uses=2]
+  %7 = icmp eq i32 %6, 0                          ; <i1> [#uses=1]
+  br i1 %7, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  %8 = sub i32 %0, %1                             ; <i32> [#uses=1]
+  ret i32 %8
+
+bb1:                                              ; preds = %entry
+  ret i32 %6
+}
+
+declare arm_aapcs_vfpcc i32 @memcmp(i8* nocapture, i8* nocapture, i32) nounwind readonly
+
+declare arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this)
+
+declare arm_aapcs_vfpcc i8* @_ZNKSs7_M_dataEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this)
+
+declare arm_aapcs_vfpcc i8* @_ZNKSs4dataEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this)
diff --git a/test/CodeGen/Thumb2/v8_IT_5.ll b/test/CodeGen/Thumb2/v8_IT_5.ll
new file mode 100644
index 0000000..30250c8
--- /dev/null
+++ b/test/CodeGen/Thumb2/v8_IT_5.ll
@@ -0,0 +1,63 @@
+; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7 -arm-restrict-it | FileCheck %s
+; CHECK: it	ne
+; CHECK-NEXT: cmpne
+; CHECK-NEXT: beq
+; CHECK: cmp
+; CHECK-NEXT: beq
+; CHECK-NEXT: %if.else163
+; CHECK-NEXT: mov.w
+; CHECK-NEXT: b
+; CHECK-NEXT: %if.else145
+; CHECK-NEXT: mov.w
+
+%struct.hc = type { i32, i32, i32, i32 }
+
+define i32 @t(i32 %type) optsize {
+entry:
+  br i1 undef, label %if.then, label %if.else
+
+if.then:
+  unreachable
+
+if.else:
+  br i1 undef, label %if.then15, label %if.else18
+
+if.then15:
+  unreachable
+
+if.else18:
+  switch i32 %type, label %if.else173 [
+    i32 3, label %if.then115
+    i32 1, label %if.then102
+  ]
+
+if.then102:
+  br i1 undef, label %cond.true10.i, label %t.exit
+
+cond.true10.i:
+  br label %t.exit
+
+t.exit:
+  unreachable
+
+if.then115:
+  br i1 undef, label %if.else163, label %if.else145
+
+if.else145:
+  %call150 = call fastcc %struct.hc* @foo(%struct.hc* undef, i32 34865152) optsize
+  br label %while.body172
+
+if.else163:
+  %call168 = call fastcc %struct.hc* @foo(%struct.hc* undef, i32 34078720) optsize
+  br label %while.body172
+
+while.body172:
+  br label %while.body172
+
+if.else173:
+  ret i32 -1
+}
+
+declare hidden fastcc %struct.hc* @foo(%struct.hc* nocapture, i32) nounwind optsize
+
diff --git a/test/CodeGen/X86/2006-05-02-InstrSched1.ll b/test/CodeGen/X86/2006-05-02-InstrSched1.ll
index 0afddd8..69266dc 100644
--- a/test/CodeGen/X86/2006-05-02-InstrSched1.ll
+++ b/test/CodeGen/X86/2006-05-02-InstrSched1.ll
@@ -1,7 +1,10 @@
 ; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -relocation-model=static -stats 2>&1 | \
-; RUN:   grep asm-printer | grep 14
+; RUN:   grep asm-printer | grep 16
 ;
+; It's possible to schedule this in 14 instructions by avoiding
+; callee-save registers, but the scheduler isn't currently that
+; conervative with registers.
 @size20 = external global i32		; <i32*> [#uses=1]
 @in5 = external global i8*		; <i8**> [#uses=1]
 
@@ -21,4 +24,3 @@ define i32 @compare(i8* %a, i8* %b) nounwind {
 }
 
 declare i32 @memcmp(i8*, i8*, i32)
-
diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll
index 24aa5b9..4ec7039 100644
--- a/test/CodeGen/X86/2007-01-08-InstrSched.ll
+++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll
@@ -13,10 +13,10 @@ define float @foo(float %x) nounwind {
 
 ; CHECK: mulss
 ; CHECK: mulss
-; CHECK: addss
 ; CHECK: mulss
-; CHECK: addss
 ; CHECK: mulss
 ; CHECK: addss
+; CHECK: addss
+; CHECK: addss
 ; CHECK: ret
 }
diff --git a/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll b/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
index c5d2a46..638d399 100644
--- a/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
+++ b/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
@@ -1,4 +1,13 @@
-; RUN: llc < %s -march=x86 | grep weak | count 2
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu | FileCheck %s
+
 @__gthrw_pthread_once = alias weak i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
 
-declare extern_weak i32 @pthread_once(i32*, void ()*)
+define weak i32 @pthread_once(i32*, void ()*) {
+  ret i32 0
+}
+
+; CHECK: .weak   pthread_once
+; CHECK: pthread_once:
+
+; CHECK: .weak   __gthrw_pthread_once
+; CHECK: __gthrw_pthread_once = pthread_once
diff --git a/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll b/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
index 7a3d72d..1ec9c70 100644
--- a/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
+++ b/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep sarl | not grep esp
+; RUN: llc < %s -march=x86 -mcpu=corei7 | grep sarl | not grep esp
 
 define signext   i16 @t(i16* %qmatrix, i16* %dct, i16* %acBaseTable, i16* %acExtTable, i16 signext  %acBaseRes, i16 signext  %acMaskRes, i16 signext  %acExtRes, i32* %bitptr, i32* %source, i32 %markerPrefix, i8** %byteptr, i32 %scale, i32 %round, i32 %bits) {
 entry:
diff --git a/test/CodeGen/X86/2008-03-14-SpillerCrash.ll b/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
index 8946415..18b3714 100644
--- a/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
+++ b/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
@@ -45,4 +45,6 @@ bb383:		; preds = %bb374.us, %bb311.split
 	ret i64 0
 }
 
-declare i64 @__wcstoll_l(i32*, i32**, i32, %struct.__locale_struct*) nounwind 
+define i64 @__wcstoll_l(i32*, i32**, i32, %struct.__locale_struct*) nounwind {
+  ret i64 0
+}
diff --git a/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll b/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
index 4eaca17..86bce8e 100644
--- a/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
+++ b/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mattr=+sse41
+; RUN: llc < %s -mattr=+sse4.1
 ; rdar://5886601
 ; gcc testsuite:  gcc.target/i386/sse4_1-pblendw.c
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
index ecd8663..296f0ca 100644
--- a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
+++ b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
@@ -67,17 +67,16 @@ declare i64 @strlen(i8*) nounwind readonly
 declare void @llvm.stackrestore(i8*) nounwind
 
 !0 = metadata !{i32 459009, metadata !1, metadata !"s1", metadata !2, i32 2, metadata !6} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 458798, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", i32 2, metadata !3, i1 false, i1 true,
-                i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 458798, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 458769, metadata !17, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !18, metadata !18, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 458773, null, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 458773, null, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5, metadata !6}
 !5 = metadata !{i32 458788, null, metadata !2, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 458767, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
 !7 = metadata !{i32 2, i32 0, metadata !1, null}
 !8 = metadata !{i32 459008, metadata !1, metadata !"str.0", metadata !2, i32 3, metadata !9} ; [ DW_TAG_auto_variable ]
 !9 = metadata !{i32 458767, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 458753, null, metadata !2, metadata !"", i32 0, i64 8, i64 8, i64 0, i32 0, metadata !5, metadata !11, i32 0, null} ; [ DW_TAG_array_type ]
+!10 = metadata !{i32 458753, null, metadata !2, metadata !"", i32 0, i64 8, i64 8, i64 0, i32 0, metadata !5, metadata !11, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 8, align 8, offset 0] [from char]
 !11 = metadata !{metadata !12}
 !12 = metadata !{i32 458785, i64 0, i64 1}        ; [ DW_TAG_subrange_type ]
 !13 = metadata !{i32 3, i32 0, metadata !14, null}
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index 8174fbd..764c2cd 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn -stats 2>&1 | grep "4 machine-licm"
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "4 machine-licm"
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn | FileCheck %s
 ; rdar://6627786
 ; rdar://7792037
 
@@ -17,9 +17,9 @@ bb4:		; preds = %bb.i, %bb26, %bb4, %entry
 ; CHECK: %bb4
 ; CHECK: xorl
 ; CHECK: callq
-; CHECK: movq
 ; CHECK: xorl
 ; CHECK: xorl
+; CHECK: movq
 
 	%0 = call i32 (...)* @xxGetOffsetForCode(i32 undef) nounwind		; <i32> [#uses=0]
 	%ins = or i64 %p, 2097152		; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
index 5cb05e8..e1930e0 100644
--- a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
+++ b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic -asm-verbose=false \
-; RUN:     -mcpu=generic -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false -regalloc=basic < %s | \
+; RUN:     -mcpu=generic -disable-fp-elim -mattr=-sse4.1,-sse3,+sse2 -post-RA-scheduler=false -regalloc=basic < %s | \
 ; RUN:   FileCheck %s
 ; rdar://6808032
 
diff --git a/test/CodeGen/X86/2009-10-16-Scope.ll b/test/CodeGen/X86/2009-10-16-Scope.ll
index ae2e9ac..a936edc 100644
--- a/test/CodeGen/X86/2009-10-16-Scope.ll
+++ b/test/CodeGen/X86/2009-10-16-Scope.ll
@@ -24,8 +24,7 @@ declare i32 @foo(i32) ssp
 
 !0 = metadata !{i32 5, i32 2, metadata !1, null}
 !1 = metadata !{i32 458763, null, metadata !2, i32 1, i32 1, i32 0}; [DW_TAG_lexical_block ]
-!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"bar", metadata !"bar", metadata !"bar", i32 4, null, i1 false, i1 true,
-               i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"bar", metadata !"bar", metadata !"bar", i32 4, null, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !3 = metadata !{i32 458769, metadata !8, i32 12, metadata !"clang 1.1", i1 true, metadata !"", i32 0, null, metadata !9, null, null, null, metadata !""}; [DW_TAG_compile_unit ]
 !4 = metadata !{i32 459008, metadata !5, metadata !"count_", metadata !3, i32 5, metadata !6}; [ DW_TAG_auto_variable ]
 !5 = metadata !{i32 458763, null, metadata !1, i32 1, i32 1, i32 0}; [DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/2010-01-18-DbgValue.ll b/test/CodeGen/X86/2010-01-18-DbgValue.ll
index c54f030..f99e682 100644
--- a/test/CodeGen/X86/2010-01-18-DbgValue.ll
+++ b/test/CodeGen/X86/2010-01-18-DbgValue.ll
@@ -29,18 +29,19 @@ return:                                           ; preds = %entry
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!21}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 786478, metadata !19, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (%struct.Rect*)* @foo, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786449, metadata !19, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !20, metadata !20, metadata !18, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6, metadata !7}
 !6 = metadata !{i32 786468, metadata !19, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Rect", i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ]
+!7 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Rect", i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [Rect] [line 6, size 256, align 64, offset 0] [def] [from ]
 !8 = metadata !{metadata !9, metadata !14}
 !9 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P1", i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Pt", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!10 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Pt", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [Pt] [line 1, size 128, align 64, offset 0] [def] [from ]
 !11 = metadata !{metadata !12, metadata !13}
 !12 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"x", i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
 !13 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"y", i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
@@ -51,3 +52,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !18 = metadata !{metadata !1}
 !19 = metadata !{metadata !"b2.c", metadata !"/tmp/"}
 !20 = metadata !{i32 0}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
index 71c7b65..4d4e8c1 100644
--- a/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
+++ b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
@@ -18,7 +18,7 @@ declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.
 
 !0 = metadata !{i32 458769, metadata !15, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !16, metadata !16, null, null, null, i32 0} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 458790, metadata !15, metadata !0, metadata !"", i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !15, metadata !0, metadata !"C", i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!2 = metadata !{i32 458771, metadata !15, metadata !0, metadata !"C", i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [C] [line 1, size 192, align 64, offset 0] [def] [from ]
 !3 = metadata !{metadata !4, metadata !6, metadata !7}
 !4 = metadata !{i32 458765, metadata !15, metadata !2, metadata !"x", i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
 !5 = metadata !{i32 458788, metadata !15, metadata !0, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
@@ -27,7 +27,7 @@ declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.
 !8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
 !9 = metadata !{i32 458763, null, metadata !10, i32 0, i32 0, i32 0}        ; [ DW_TAG_lexical_block ]
 !10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !15, metadata !0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{i32 458773, metadata !15, metadata !0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{metadata !13}
 !13 = metadata !{i32 458788, metadata !15, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
diff --git a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
index d4a74c9..060c535 100644
--- a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
+++ b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
@@ -1,9 +1,9 @@
-; RUN: llc -mcpu=generic -mtriple=i386-apple-darwin -tailcallopt < %s | FileCheck %s
+; RUN: llc -mcpu=generic -mtriple=i386-apple-darwin -tailcallopt -enable-misched=false < %s | FileCheck %s
 ; Check that lowered argumens do not overwrite the return address before it is moved.
 ; Bug 6225
 ;
 ; If a call is a fastcc tail call and tail call optimization is enabled, the
-; caller frame is replaced by the callee frame. This can require that arguments are 
+; caller frame is replaced by the callee frame. This can require that arguments are
 ; placed on the former return address stack slot. Special care needs to be taken
 ; taken that the return address is moved / or stored in a register before
 ; lowering of arguments potentially overwrites the value.
@@ -51,5 +51,3 @@ false:
   tail call fastcc void @l298(i32 %r10, i32 %r9, i32 %r4) noreturn nounwind
   ret void
 }
-
-
diff --git a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
index 00ac71a..7faee99 100644
--- a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
@@ -199,12 +199,13 @@ declare float @copysignf(float, float) nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!48}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 786478, metadata !45, metadata !2, metadata !"__divsc3", metadata !"__divsc3", metadata !"__divsc3", i32 1922, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, %0 (float, float, float, float)* @__divsc3, null, null, metadata !43, i32 1922} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !45} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786449, metadata !45, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !47, metadata !47, metadata !44, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !45, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 786453, metadata !45, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6, metadata !9, metadata !9, metadata !9, metadata !9}
 !6 = metadata !{i32 786454, metadata !46, metadata !7, metadata !"SCtype", i32 170, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ]
 !7 = metadata !{i32 786473, metadata !46} ; [ DW_TAG_file_type ]
@@ -248,3 +249,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !45 = metadata !{metadata !"libgcc2.c", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc"}
 !46 = metadata !{metadata !"libgcc2.h", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc"}
 !47 = metadata !{i32 0}
+!48 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
index 4b1dfb3..c5736eb 100644
--- a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
@@ -22,6 +22,7 @@ declare void @foo(i32) nounwind optsize noinline ssp
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!38}
 
 !0 = metadata !{i32 786484, i32 0, metadata !1, metadata !"ret", metadata !"ret", metadata !"", metadata !1, i32 7, metadata !3, i1 false, i1 true, null, null} ; [ DW_TAG_variable ]
 !1 = metadata !{i32 786473, metadata !36} ; [ DW_TAG_file_type ]
@@ -29,21 +30,21 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !3 = metadata !{i32 786468, metadata !36, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !4 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !1, i32 12, metadata !3, i32 0, null} ; [ DW_TAG_arg_variable ]
 !5 = metadata !{i32 786478, metadata !36, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 13, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void (i32)* @foo, null, null, metadata !33, i32 13} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786453, metadata !36, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{i32 786453, metadata !36, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{null, metadata !3}
 !8 = metadata !{i32 786689, metadata !9, metadata !"myvar", metadata !1, i32 17, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
 !9 = metadata !{i32 786478, metadata !36, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", i32 17, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i8* (%struct.a*)* @bar, null, null, metadata !34, i32 17} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 786453, metadata !36, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!10 = metadata !{i32 786453, metadata !36, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !11 = metadata !{metadata !12, metadata !13}
 !12 = metadata !{i32 786447, metadata !36, metadata !1, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
 !13 = metadata !{i32 786447, metadata !36, metadata !1, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
-!14 = metadata !{i32 786451, metadata !36, metadata !1, metadata !"a", i32 2, i64 128, i64 64, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_structure_type ]
+!14 = metadata !{i32 786451, metadata !36, metadata !1, metadata !"a", i32 2, i64 128, i64 64, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [a] [line 2, size 128, align 64, offset 0] [def] [from ]
 !15 = metadata !{metadata !16, metadata !17}
 !16 = metadata !{i32 786445, metadata !36, metadata !14, metadata !"c", i32 3, i64 32, i64 32, i64 0, i32 0, metadata !3} ; [ DW_TAG_member ]
 !17 = metadata !{i32 786445, metadata !36, metadata !14, metadata !"d", i32 4, i64 64, i64 64, i64 64, i32 0, metadata !13} ; [ DW_TAG_member ]
 !18 = metadata !{i32 786689, metadata !19, metadata !"argc", metadata !1, i32 22, metadata !3, i32 0, null} ; [ DW_TAG_arg_variable ]
 !19 = metadata !{i32 786478, metadata !36, metadata !1, metadata !"main", metadata !"main", metadata !"main", i32 22, metadata !20, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, metadata !35, i32 22} ; [ DW_TAG_subprogram ]
-!20 = metadata !{i32 786453, metadata !36, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !21, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!20 = metadata !{i32 786453, metadata !36, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !21, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !21 = metadata !{metadata !3, metadata !3, metadata !22}
 !22 = metadata !{i32 786447, metadata !36, metadata !1, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
 !23 = metadata !{i32 786447, metadata !36, metadata !1, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_pointer_type ]
@@ -86,3 +87,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 ; CHECK-NEXT: Ltmp{{.*}}:
 ; CHECK-NEXT: .byte   83
 ; CHECK-NEXT: Ltmp{{.*}}:
+!38 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2010-05-28-Crash.ll b/test/CodeGen/X86/2010-05-28-Crash.ll
index d5c0ead..1114c8d 100644
--- a/test/CodeGen/X86/2010-05-28-Crash.ll
+++ b/test/CodeGen/X86/2010-05-28-Crash.ll
@@ -23,12 +23,13 @@ entry:
 }
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!20}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"y", metadata !2, i32 2, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 786478, metadata !18, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @foo, null, null, metadata !15, i32 2} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786449, metadata !18, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !19, metadata !19, metadata !17, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6, metadata !6}
 !6 = metadata !{i32 786468, metadata !18, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 786689, metadata !8, metadata !"x", metadata !2, i32 6, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
@@ -48,3 +49,4 @@ entry:
 ;CHECK: DEBUG_VALUE: bar:x <- E
 ;CHECK: Ltmp
 ;CHECK:	DEBUG_VALUE: foo:y <- 1{{$}}
+!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
index 1571a58..b45ac22 100644
--- a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
+++ b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
@@ -20,18 +20,19 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!4}
+!llvm.module.flags = !{!34}
 !llvm.dbg.lv = !{!0, !14, !15, !16, !17, !24, !25, !28}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"this", metadata !3, i32 11, metadata !12, i32 0, null} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 786478, metadata !31, metadata !2, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEi", i32 11, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 786451, metadata !31, metadata !3, metadata !"foo", i32 3, i64 32, i64 32, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_structure_type ]
+!2 = metadata !{i32 786451, metadata !31, metadata !3, metadata !"foo", i32 3, i64 32, i64 32, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 3, size 32, align 32, offset 0] [def] [from ]
 !3 = metadata !{i32 786473, metadata !31} ; [ DW_TAG_file_type ]
 !4 = metadata !{i32 786449, metadata !31, i32 4, metadata !"4.2.1 LLVM build", i1 true, metadata !"", i32 0, metadata !32, metadata !32, metadata !33, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !5 = metadata !{metadata !6, metadata !1, metadata !8}
 !6 = metadata !{i32 786445, metadata !31, metadata !2, metadata !"y", i32 8, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ]
 !7 = metadata !{i32 786468, metadata !31, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 786478, metadata !31, metadata !2, metadata !"baz", metadata !"baz", metadata !"_ZN3foo3bazEi", i32 15, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null, i32 15} ; [ DW_TAG_subprogram ]
-!9 = metadata !{i32 786453, metadata !31, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!9 = metadata !{i32 786453, metadata !31, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !10 = metadata !{metadata !7, metadata !11, metadata !7}
 !11 = metadata !{i32 786447, metadata !31, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !2} ; [ DW_TAG_pointer_type ]
 !12 = metadata !{i32 786470, metadata !31, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !13} ; [ DW_TAG_const_type ]
@@ -41,7 +42,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !16 = metadata !{i32 786689, metadata !8, metadata !"x", metadata !3, i32 15, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
 !17 = metadata !{i32 786689, metadata !18, metadata !"argc", metadata !3, i32 19, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
 !18 = metadata !{i32 786478, metadata !31, metadata !3, metadata !"main", metadata !"main", metadata !"main", i32 19, metadata !19, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, null, null, null, null, i32 19} ; [ DW_TAG_subprogram ]
-!19 = metadata !{i32 786453, metadata !31, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !20, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!19 = metadata !{i32 786453, metadata !31, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !20, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !20 = metadata !{metadata !7, metadata !7, metadata !21}
 !21 = metadata !{i32 786447, metadata !31, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
 !22 = metadata !{i32 786447, metadata !31, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
@@ -56,3 +57,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !31 = metadata !{metadata !"foo.cp", metadata !"/tmp/"}
 !32 = metadata !{i32 0}
 !33 = metadata !{metadata !1, metadata !8, metadata !18}
+!34 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2010-07-06-DbgCrash.ll b/test/CodeGen/X86/2010-07-06-DbgCrash.ll
index e91cd76..b49aec3 100644
--- a/test/CodeGen/X86/2010-07-06-DbgCrash.ll
+++ b/test/CodeGen/X86/2010-07-06-DbgCrash.ll
@@ -7,15 +7,14 @@
 !39 = metadata !{i32 524305, metadata !109, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", i1 true, metadata !"", i32 0, metadata !108, metadata !108, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !46 = metadata !{i32 524303, metadata !109, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !47} ; [ DW_TAG_pointer_type ]
 !47 = metadata !{i32 524324, metadata !109, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!97 = metadata !{i32 524334, i32 0, metadata !39, metadata !"main", metadata !"main", metadata !"main", i32 73, metadata !98, i1 false, i1 true,
-                i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!98 = metadata !{i32 524309, metadata !109, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !99, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!97 = metadata !{i32 524334, i32 0, metadata !39, metadata !"main", metadata !"main", metadata !"main", i32 73, metadata !98, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!98 = metadata !{i32 524309, metadata !109, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !99, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !99 = metadata !{metadata !100}
 !100 = metadata !{i32 524324, metadata !109, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !101 = metadata !{[2 x i8*]* @C.9.2167}
 !102 = metadata !{i32 524544, metadata !103, metadata !"find_strings", metadata !38, i32 75, metadata !104, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
 !103 = metadata !{i32 524299, null, metadata !97, i32 73, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!104 = metadata !{i32 524289, metadata !109, null, metadata !"", i32 0, i64 85312, i64 64, i64 0, i32 0, metadata !46, metadata !105, i32 0, null} ; [ DW_TAG_array_type ]
+!104 = metadata !{i32 524289, metadata !109, null, metadata !"", i32 0, i64 85312, i64 64, i64 0, i32 0, metadata !46, metadata !105, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 85312, align 64, offset 0] [from ]
 !105 = metadata !{metadata !106}
 !106 = metadata !{i32 524321, i64 0, i64 1333}    ; [ DW_TAG_subrange_type ]
 !107 = metadata !{i32 73, i32 0, metadata !103, null}
diff --git a/test/CodeGen/X86/2010-08-04-StackVariable.ll b/test/CodeGen/X86/2010-08-04-StackVariable.ll
index c6e1654..91fec3b 100644
--- a/test/CodeGen/X86/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/X86/2010-08-04-StackVariable.ll
@@ -75,10 +75,11 @@ return:                                           ; preds = %entry
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!49}
 !46 = metadata !{metadata !0, metadata !9, metadata !16, metadata !17, metadata !20}
 
 !0 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786451, metadata !47, metadata !2, metadata !"SVal", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ]
+!1 = metadata !{i32 786451, metadata !47, metadata !2, metadata !"SVal", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [SVal] [line 1, size 128, align 64, offset 0] [def] [from ]
 !2 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786449, metadata !47, i32 4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !48, metadata !48, metadata !46, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9}
@@ -87,18 +88,18 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !7 = metadata !{i32 786445, metadata !47, metadata !1, metadata !"Kind", i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ]
 !8 = metadata !{i32 786468, metadata !47, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !9 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 12} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 786453, metadata !47, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!10 = metadata !{i32 786453, metadata !47, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !11 = metadata !{null, metadata !12, metadata !13}
 !12 = metadata !{i32 786447, metadata !47, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
 !13 = metadata !{i32 786468, metadata !47, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 786453, metadata !47, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!14 = metadata !{i32 786453, metadata !47, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !15 = metadata !{null, metadata !12}
 !16 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
 !17 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal, null, null, null, i32 16} ; [ DW_TAG_subprogram ]
-!18 = metadata !{i32 786453, metadata !47, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!18 = metadata !{i32 786453, metadata !47, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !19 = metadata !{metadata !13, metadata !13, metadata !1}
 !20 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"main", metadata !"main", metadata !"main", i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main, null, null, null, i32 23} ; [ DW_TAG_subprogram ]
-!21 = metadata !{i32 786453, metadata !47, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!21 = metadata !{i32 786453, metadata !47, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !22 = metadata !{metadata !13}
 !23 = metadata !{i32 786689, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
 !24 = metadata !{i32 16, i32 0, metadata !17, null}
@@ -125,3 +126,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !45 = metadata !{i32 27, i32 0, metadata !39, null}
 !47 = metadata !{metadata !"small.cc", metadata !"/Users/manav/R8248330"}
 !48 = metadata !{i32 0}
+!49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
index 831fe66..9aa41c3 100644
--- a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
+++ b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
@@ -13,11 +13,12 @@ entry:
 }
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!17}
 
 !0 = metadata !{i32 786478, metadata !14, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 53, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !15, i32 12, metadata !"clang version 2.9 (trunk 114084)", i1 false, metadata !"", i32 0, metadata !16, metadata !16, metadata !13, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !14, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, metadata !13, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !14, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !14, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 786478, metadata !15, metadata !7, metadata !"bar", metadata !"bar", metadata !"bar", i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
@@ -31,3 +32,4 @@ entry:
 !14 = metadata !{metadata !"", metadata !"/private/tmp"}
 !15 = metadata !{metadata !"bug.c", metadata !"/private/tmp"}
 !16 = metadata !{i32 0}
+!17 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
index 1b33977..39d89e3 100644
--- a/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
+++ b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
@@ -19,8 +19,8 @@ entry:
 }
 
 ; CHECK: movq	___stack_chk_guard@GOTPCREL(%rip)
-; CHECK: movb   38(%rsp), [[R0:%.+]]
-; CHECK: movb   8(%rsp), [[R1:%.+]]
-; CHECK: movb   [[R1]], 8(%rsp)
-; CHECK: movb   [[R0]], 38(%rsp)
+; CHECK: movb   (%rsp), [[R1:%.+]]
+; CHECK: movb   30(%rsp), [[R0:%.+]]
+; CHECK: movb   [[R1]], (%rsp)
+; CHECK: movb   [[R0]], 30(%rsp)
 ; CHECK: callq	___stack_chk_fail
diff --git a/test/CodeGen/X86/2010-11-02-DbgParameter.ll b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
index e118e80..21ac7c9 100644
--- a/test/CodeGen/X86/2010-11-02-DbgParameter.ll
+++ b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
@@ -16,16 +16,17 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!19}
 
 !0 = metadata !{i32 786478, metadata !17, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.bar*)* @foo, null, null, metadata !16, i32 3} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !17} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !17, i32 12, metadata !"clang version 2.9 (trunk 117922)", i1 true, metadata !"", i32 0, metadata !18, metadata !18, metadata !15, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !17, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !17, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !17, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 786689, metadata !0, metadata !"i", metadata !1, i32 3, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
 !7 = metadata !{i32 786447, metadata !17, metadata !1, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 786451, metadata !17, metadata !1, metadata !"bar", i32 2, i64 64, i64 32, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_structure_type ]
+!8 = metadata !{i32 786451, metadata !17, metadata !1, metadata !"bar", i32 2, i64 64, i64 32, i64 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [bar] [line 2, size 64, align 32, offset 0] [def] [from ]
 !9 = metadata !{metadata !10, metadata !11}
 !10 = metadata !{i32 786445, metadata !17,  metadata !1, metadata !"x", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
 !11 = metadata !{i32 786445, metadata !17, metadata !1, metadata !"y", i32 2, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ]
@@ -36,3 +37,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !16 = metadata !{metadata !6}
 !17 = metadata !{metadata !"one.c", metadata !"/private/tmp"}
 !18 = metadata !{i32 0}
+!19 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2010-12-02-MC-Set.ll b/test/CodeGen/X86/2010-12-02-MC-Set.ll
index 1a4c586..5a407d3 100644
--- a/test/CodeGen/X86/2010-12-02-MC-Set.ll
+++ b/test/CodeGen/X86/2010-12-02-MC-Set.ll
@@ -7,12 +7,13 @@ entry:
 }
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!10}
 !7 = metadata !{metadata !0}
 
 !0 = metadata !{i32 786478, metadata !9, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !9} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !9, i32 12, metadata !"clang version 2.9 (trunk 120563)", i1 false, metadata !"", i32 0, metadata !8, metadata !8, metadata !7, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !9, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !9, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 5, i32 1, metadata !6, null}
 !6 = metadata !{i32 786443, metadata !9, metadata !0, i32 3, i32 16, i32 0} ; [ DW_TAG_lexical_block ]
@@ -23,3 +24,4 @@ entry:
 ; CHECK-NEXT: __debug_line
 ; CHECK-NEXT: Lline_table_start0
 ; CHECK-NEXT: Ltmp{{[0-9]}} = (Ltmp
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
index 3e0fbca..d534030 100644
--- a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
+++ b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
@@ -70,15 +70,16 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 declare i32 @puts(i8* nocapture) nounwind
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!33}
 
-!0 = metadata !{i32 786478, metadata !31, metadata !1, metadata !"gcd", metadata !"gcd", metadata !"", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i64 (i64, i64)* @gcd, null, null, metadata !29, i32 0} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !31, metadata !1, metadata !"gcd", metadata !"gcd", metadata !"", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i64 (i64, i64)* @gcd, null, null, metadata !29, i32 0} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 0] [gcd]
 !1 = metadata !{i32 786473, metadata !31} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !31, i32 12, metadata !"clang version 2.9 (trunk 124117)", i1 true, metadata !"", i32 0, metadata !32, metadata !32, metadata !28, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !31, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !31, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, null, metadata !2, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !31, metadata !1, metadata !"main", metadata !"main", metadata !"", i32 25, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @main, null, null, metadata !30, i32 0} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786453, metadata !31, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{i32 786478, metadata !31, metadata !1, metadata !"main", metadata !"main", metadata !"", i32 25, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @main, null, null, metadata !30, i32 0} ; [ DW_TAG_subprogram ] [line 25] [def] [scope 0] [main]
+!7 = metadata !{i32 786453, metadata !31, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 5, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
@@ -104,3 +105,4 @@ declare i32 @puts(i8* nocapture) nounwind
 !30 = metadata !{metadata !14, metadata !17}
 !31 = metadata !{metadata !"rem_small.c", metadata !"/private/tmp"}
 !32 = metadata !{i32 0}
+!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll b/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll
index 0f18f09..91cd208 100644
--- a/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll
+++ b/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-apple-macosx10.6.0"
 
 @aux_temp = external global %struct.dfa, align 8
 
-declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) nounwind readnone
 
 declare void @__memset_chk() nounwind
 
@@ -21,12 +21,12 @@ if.end.i:                                         ; preds = %entry
   br i1 undef, label %land.end.thread.i, label %land.end.i
 
 land.end.thread.i:                                ; preds = %if.end.i
-  %0 = call i64 @llvm.objectsize.i64(i8* undef, i1 false) nounwind
+  %0 = call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 false) nounwind
   %cmp1710.i = icmp eq i64 %0, -1
   br i1 %cmp1710.i, label %cond.false156.i, label %cond.true138.i
 
 land.end.i:                                       ; preds = %if.end.i
-  %1 = call i64 @llvm.objectsize.i64(i8* undef, i1 false) nounwind
+  %1 = call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 false) nounwind
   %cmp17.i = icmp eq i64 %1, -1
   br i1 %cmp17.i, label %cond.false156.i, label %cond.true138.i
 
@@ -41,13 +41,8 @@ cond.false156.i:                                  ; preds = %for.end.i, %land.en
 
 cond.end166.i:                                    ; preds = %cond.false156.i, %cond.true138.i
   %idxprom1113.i = phi i64 [ %idxprom1114.i, %cond.false156.i ], [ undef, %cond.true138.i ]
-  %tmp235.i = load %struct.state** getelementptr inbounds (%struct.dfa* @aux_temp, i64 0, i32 2), align 8, !tbaa !0
+  %tmp235.i = load %struct.state** getelementptr inbounds (%struct.dfa* @aux_temp, i64 0, i32 2), align 8
   %att.i = getelementptr inbounds %struct.state* %tmp235.i, i64 %idxprom1113.i, i32 0
-  store i32 0, i32* %att.i, align 4, !tbaa !3
+  store i32 0, i32* %att.i, align 4
   ret void
 }
-
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"int", metadata !1}
diff --git a/test/CodeGen/X86/2011-06-03-x87chain.ll b/test/CodeGen/X86/2011-06-03-x87chain.ll
index ce63c74..5275b68 100644
--- a/test/CodeGen/X86/2011-06-03-x87chain.ll
+++ b/test/CodeGen/X86/2011-06-03-x87chain.ll
@@ -29,3 +29,21 @@ entry:
   store float %conv, float* %f, align 4
   ret float %conv
 }
+
+define void @PR17495() {
+entry:
+  br i1 undef, label %while.end, label %while.body
+
+while.body:                                       ; preds = %while.body, %entry
+  %x.1.copyload = load i24* undef, align 1
+  %conv = sitofp i24 %x.1.copyload to float
+  %div = fmul float %conv, 0x3E80000000000000
+  store float %div, float* undef, align 4
+  br i1 false, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  ret void
+
+; CHECK-LABEL: @PR17495
+; CHECK-NOT: fildll
+}
diff --git a/test/CodeGen/X86/2011-09-18-sse2cmp.ll b/test/CodeGen/X86/2011-09-18-sse2cmp.ll
index a6f428f..89de648 100644
--- a/test/CodeGen/X86/2011-09-18-sse2cmp.ll
+++ b/test/CodeGen/X86/2011-09-18-sse2cmp.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse41 | FileCheck %s
+;RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse4.1 | FileCheck %s
 
 ;CHECK: @max
 ;CHECK: cmplepd
diff --git a/test/CodeGen/X86/2011-09-21-setcc-bug.ll b/test/CodeGen/X86/2011-09-21-setcc-bug.ll
index 4daf678..a67c3f3 100644
--- a/test/CodeGen/X86/2011-09-21-setcc-bug.ll
+++ b/test/CodeGen/X86/2011-09-21-setcc-bug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse41
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse4.1
 
 ; Make sure we are not crashing on this code.
 
diff --git a/test/CodeGen/X86/2011-10-11-srl.ll b/test/CodeGen/X86/2011-10-11-srl.ll
index 6c6d340..434f88c 100644
--- a/test/CodeGen/X86/2011-10-11-srl.ll
+++ b/test/CodeGen/X86/2011-10-11-srl.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=-sse41 
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=-sse4.1
 
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/test/CodeGen/X86/2011-10-12-MachineCSE.ll b/test/CodeGen/X86/2011-10-12-MachineCSE.ll
index cd15f84..72e672a 100644
--- a/test/CodeGen/X86/2011-10-12-MachineCSE.ll
+++ b/test/CodeGen/X86/2011-10-12-MachineCSE.ll
@@ -20,11 +20,11 @@ entry:
   %2 = lshr i32 %1, 16
   %bf.clear = and i32 %2, 255
   %idxprom = sext i32 %bf.clear to i64
-  %3 = load %struct.optab** getelementptr inbounds ([49 x %struct.optab*]* @optab_table, i32 0, i64 0), align 8, !tbaa !0
+  %3 = load %struct.optab** getelementptr inbounds ([49 x %struct.optab*]* @optab_table, i32 0, i64 0), align 8
   %handlers = getelementptr inbounds %struct.optab* %3, i32 0, i32 1
   %arrayidx = getelementptr inbounds [59 x %struct.anon.3]* %handlers, i32 0, i64 %idxprom
   %insn_code = getelementptr inbounds %struct.anon.3* %arrayidx, i32 0, i32 0
-  %4 = load i32* %insn_code, align 4, !tbaa !3
+  %4 = load i32* %insn_code, align 4
   %cmp = icmp eq i32 %4, 1317
   br i1 %cmp, label %if.then, label %lor.lhs.false
 
@@ -32,14 +32,14 @@ lor.lhs.false:                                    ; preds = %entry
   %idxprom1 = sext i32 %4 to i64
   %arrayidx2 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom1
   %operand = getelementptr inbounds %struct.insn_data* %arrayidx2, i32 0, i32 3
-  %5 = load %struct.insn_operand_data** %operand, align 8, !tbaa !0
+  %5 = load %struct.insn_operand_data** %operand, align 8
   %arrayidx3 = getelementptr inbounds %struct.insn_operand_data* %5, i64 0
   %predicate = getelementptr inbounds %struct.insn_operand_data* %arrayidx3, i32 0, i32 0
-  %6 = load i32 (%struct.rtx_def*, i32)** %predicate, align 8, !tbaa !0
+  %6 = load i32 (%struct.rtx_def*, i32)** %predicate, align 8
   %idxprom4 = sext i32 %4 to i64
   %arrayidx5 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom4
   %operand6 = getelementptr inbounds %struct.insn_data* %arrayidx5, i32 0, i32 3
-  %7 = load %struct.insn_operand_data** %operand6, align 8, !tbaa !0
+  %7 = load %struct.insn_operand_data** %operand6, align 8
   %arrayidx7 = getelementptr inbounds %struct.insn_operand_data* %7, i64 0
   %8 = bitcast %struct.insn_operand_data* %arrayidx7 to i8*
   %bf.field.offs = getelementptr i8* %8, i32 16
@@ -54,14 +54,14 @@ lor.lhs.false9:                                   ; preds = %lor.lhs.false
   %idxprom10 = sext i32 %4 to i64
   %arrayidx11 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom10
   %operand12 = getelementptr inbounds %struct.insn_data* %arrayidx11, i32 0, i32 3
-  %11 = load %struct.insn_operand_data** %operand12, align 8, !tbaa !0
+  %11 = load %struct.insn_operand_data** %operand12, align 8
   %arrayidx13 = getelementptr inbounds %struct.insn_operand_data* %11, i64 1
   %predicate14 = getelementptr inbounds %struct.insn_operand_data* %arrayidx13, i32 0, i32 0
-  %12 = load i32 (%struct.rtx_def*, i32)** %predicate14, align 8, !tbaa !0
+  %12 = load i32 (%struct.rtx_def*, i32)** %predicate14, align 8
   %idxprom15 = sext i32 %4 to i64
   %arrayidx16 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom15
   %operand17 = getelementptr inbounds %struct.insn_data* %arrayidx16, i32 0, i32 3
-  %13 = load %struct.insn_operand_data** %operand17, align 8, !tbaa !0
+  %13 = load %struct.insn_operand_data** %operand17, align 8
   %arrayidx18 = getelementptr inbounds %struct.insn_operand_data* %13, i64 1
   %14 = bitcast %struct.insn_operand_data* %arrayidx18 to i8*
   %bf.field.offs19 = getelementptr i8* %14, i32 16
@@ -76,14 +76,14 @@ lor.lhs.false23:                                  ; preds = %lor.lhs.false9
   %idxprom24 = sext i32 %4 to i64
   %arrayidx25 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom24
   %operand26 = getelementptr inbounds %struct.insn_data* %arrayidx25, i32 0, i32 3
-  %17 = load %struct.insn_operand_data** %operand26, align 8, !tbaa !0
+  %17 = load %struct.insn_operand_data** %operand26, align 8
   %arrayidx27 = getelementptr inbounds %struct.insn_operand_data* %17, i64 2
   %predicate28 = getelementptr inbounds %struct.insn_operand_data* %arrayidx27, i32 0, i32 0
-  %18 = load i32 (%struct.rtx_def*, i32)** %predicate28, align 8, !tbaa !0
+  %18 = load i32 (%struct.rtx_def*, i32)** %predicate28, align 8
   %idxprom29 = sext i32 %4 to i64
   %arrayidx30 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom29
   %operand31 = getelementptr inbounds %struct.insn_data* %arrayidx30, i32 0, i32 3
-  %19 = load %struct.insn_operand_data** %operand31, align 8, !tbaa !0
+  %19 = load %struct.insn_operand_data** %operand31, align 8
   %arrayidx32 = getelementptr inbounds %struct.insn_operand_data* %19, i64 2
   %20 = bitcast %struct.insn_operand_data* %arrayidx32 to i8*
   %bf.field.offs33 = getelementptr i8* %20, i32 16
@@ -101,7 +101,7 @@ if.end:                                           ; preds = %lor.lhs.false23
   %idxprom37 = sext i32 %4 to i64
   %arrayidx38 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom37
   %genfun = getelementptr inbounds %struct.insn_data* %arrayidx38, i32 0, i32 2
-  %23 = load %struct.rtx_def* (%struct.rtx_def*, ...)** %genfun, align 8, !tbaa !0
+  %23 = load %struct.rtx_def* (%struct.rtx_def*, ...)** %genfun, align 8
   %call39 = tail call %struct.rtx_def* (%struct.rtx_def*, ...)* %23(%struct.rtx_def* %r0, %struct.rtx_def* %r1, %struct.rtx_def* %c)
   br label %return
 
@@ -109,8 +109,3 @@ return:                                           ; preds = %if.end, %if.then
   %24 = phi %struct.rtx_def* [ %call39, %if.end ], [ null, %if.then ]
   ret %struct.rtx_def* %24
 }
-
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"_ZTS9insn_code", metadata !1}
diff --git a/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll b/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
index da734d4..07a6910 100644
--- a/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
+++ b/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
@@ -16,8 +16,8 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK: main
 define i32 @main() nounwind uwtable {
 entry:
-; CHECK: pmovsxbq  j(%rip), %
 ; CHECK: pmovsxbq  i(%rip), %
+; CHECK: pmovsxbq  j(%rip), %
   %0 = load <2 x i8>* @i, align 8
   %1 = load <2 x i8>* @j, align 8
   %div = sdiv <2 x i8> %1, %0
@@ -25,4 +25,3 @@ entry:
   ret i32 0
 ; CHECK: ret
 }
-
diff --git a/test/CodeGen/X86/2011-12-15-vec_shift.ll b/test/CodeGen/X86/2011-12-15-vec_shift.ll
index dc3a08b..0183e10 100644
--- a/test/CodeGen/X86/2011-12-15-vec_shift.ll
+++ b/test/CodeGen/X86/2011-12-15-vec_shift.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=x86-64 -mattr=+sse41 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-W-SSE4
-; RUN: llc -march=x86-64 -mattr=-sse41 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-WO-SSE4
+; RUN: llc -march=x86-64 -mattr=+sse4.1 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-W-SSE4
+; RUN: llc -march=x86-64 -mattr=-sse4.1 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-WO-SSE4
 ; Test case for r146671
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.7"
diff --git a/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll b/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
index 7515e80..14643e4 100644
--- a/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
+++ b/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86-64 -mattr=-sse42,+sse41 < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mattr=-sse4.2,+sse4.1 < %s | FileCheck %s
 ; Make sure we don't load from the location pointed to by %p
 ; twice: it has non-obvious performance implications, and
 ; the relevant transformation doesn't know how to update
diff --git a/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll b/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
index a883d79..cd8a16f 100644
--- a/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
+++ b/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
@@ -15,7 +15,7 @@ entry:
 
 ; CHECK: lock
 ; CHECK-NEXT: orl {{.*}}, (%esp)
-; CHECK-NEXT: cmpl $0
+; CHECK-NEXT: testl [[REG:%e[a-z]+]], [[REG]]
 
 if.then:                                          ; preds = %entry
   tail call void bitcast (void (...)* @foo to void ()*)() nounwind
diff --git a/test/CodeGen/X86/2012-04-26-sdglue.ll b/test/CodeGen/X86/2012-04-26-sdglue.ll
index 186fafb..16706ae 100644
--- a/test/CodeGen/X86/2012-04-26-sdglue.ll
+++ b/test/CodeGen/X86/2012-04-26-sdglue.ll
@@ -5,8 +5,8 @@
 ; It's hard to test for the ISEL condition because CodeGen optimizes
 ; away the bugpointed code. Just ensure the basics are still there.
 ;CHECK-LABEL: func:
-;CHECK: vxorps
-;CHECK: vinsertf128
+;CHECK: vpxor
+;CHECK: vinserti128
 ;CHECK: vpshufd
 ;CHECK: vpshufd
 ;CHECK: vmulps
diff --git a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
index 503aab4..d41b432 100644
--- a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
@@ -6,7 +6,7 @@
 ;
 ; CHECK: %entry
 ; CHECK: DEBUG_VALUE: hg
-; CHECK: je
+; CHECK: j
 
 %struct.node.0.27 = type { i16, double, [3 x double], i32, i32 }
 %struct.hgstruct.2.29 = type { %struct.bnode.1.28*, [3 x double], double, [3 x double] }
@@ -36,6 +36,7 @@ return:                                           ; preds = %for.cond.preheader,
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!12}
 
 !0 = metadata !{i32 786449, metadata !11, i32 12, metadata !"clang version 3.3 (trunk 168918) (llvm/trunk 168920)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, null, metadata !""} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/Olden/bh/newbh.c] [DW_LANG_C99]
 !1 = metadata !{metadata !2}
@@ -44,5 +45,6 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !4 = metadata !{i32 786689, null, metadata !"hg", metadata !5, i32 67109589, metadata !6, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [hg] [line 725]
 !5 = metadata !{i32 786473, metadata !11} ; [ DW_TAG_file_type ]
 !6 = metadata !{i32 786454, metadata !11, null, metadata !"hgstruct", i32 492, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ] [hgstruct] [line 492, size 0, align 0, offset 0] [from ]
-!7 = metadata !{i32 786451, metadata !11, null, metadata !"", i32 487, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [line 487, size 512, align 64, offset 0] [from ]
+!7 = metadata !{i32 786451, metadata !11, null, metadata !"", i32 487, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, null, i32 0, null} ; [ DW_TAG_structure_type ] [line 487, size 512, align 64, offset 0] [def] [from ]
 !11 = metadata !{metadata !"MultiSource/Benchmarks/Olden/bh/newbh.c", metadata !"MultiSource/Benchmarks/Olden/bh"}
+!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2012-11-30-misched-dbg.ll b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
index 21e105d..7befa6b 100644
--- a/test/CodeGen/X86/2012-11-30-misched-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
@@ -63,6 +63,7 @@ if.else4114:                                      ; preds = %if.then4073
 declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...)
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!35}
 
 !0 = metadata !{i32 786449, metadata !19, i32 12, metadata !"clang version 3.3 (trunk 168918) (llvm/trunk 168920)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/MiBench/consumer-typeset/MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c] [DW_LANG_C99]
 !1 = metadata !{metadata !2}
@@ -79,7 +80,7 @@ declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...)
 !12 = metadata !{i32 786443, metadata !13, i32 249, i32 0, metadata !14, i32 23} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
 !13 = metadata !{i32 786443, metadata !3, i32 221, i32 0, metadata !14, i32 19} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
 !14 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
-!15 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 160, i64 8, i32 0, i32 0, metadata !16, metadata !17, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 160, align 8, offset 0] [from char]
+!15 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 160, i64 8, i32 0, i32 0, metadata !16, metadata !17, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 160, align 8, offset 0] [from char]
 !16 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
 !17 = metadata !{metadata !18}
 !18 = metadata !{i32 786465, i64 0, i64 20}       ; [ DW_TAG_subrange_type ] [0, 19]
@@ -134,3 +135,4 @@ declare void @_Znwm()
 !32 = metadata !{i32 786454, metadata !34, null, metadata !"HM", i32 28, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_typedef ] [HM] [line 28, size 0, align 0, offset 0] [from ]
 !33 = metadata !{i32 786473, metadata !34} ; [ DW_TAG_file_type ]
 !34 = metadata !{metadata !"SingleSource/Benchmarks/Shootout-C++/hash.cpp", metadata !"SingleSource/Benchmarks/Shootout-C++"}
+!35 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2012-11-30-regpres-dbg.ll b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
index dcbe109..5aec3d9 100644
--- a/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
@@ -34,11 +34,13 @@ invoke.cont44:                                    ; preds = %if.end
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8}
 
 !0 = metadata !{i32 786449, metadata !6, i32 4, metadata !"clang version 3.3 (trunk 168984) (llvm/trunk 168983)", i1 true, metadata !"", i32 0, metadata !2, metadata !7, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/Bullet/MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp] [DW_LANG_C_plus_plus]
 !2 = metadata !{null}
 !3 = metadata !{i32 786688, null, metadata !"callback", null, i32 214, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [callback] [line 214]
-!4 = metadata !{i32 786451, metadata !6, null, metadata !"btCompoundLeafCallback", i32 90, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, null, null} ; [ DW_TAG_structure_type ] [btCompoundLeafCallback] [line 90, size 512, align 64, offset 0] [from ]
+!4 = metadata !{i32 786451, metadata !6, null, metadata !"btCompoundLeafCallback", i32 90, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [btCompoundLeafCallback] [line 90, size 512, align 64, offset 0] [def] [from ]
 !5 = metadata !{i32 786473, metadata !6} ; [ DW_TAG_file_type ]
 !6 = metadata !{metadata !"MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp", metadata !"MultiSource/Benchmarks/Bullet"}
 !7 = metadata !{i32 0}
+!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll b/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
index f0c7781..0ff9d39 100644
--- a/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
+++ b/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
@@ -23,6 +23,6 @@ entry:
 
 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) #1
 
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind }
diff --git a/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll b/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll
new file mode 100644
index 0000000..3455b68
--- /dev/null
+++ b/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll
@@ -0,0 +1,132 @@
+; RUN: llc -mtriple x86_64-apple-darwin -O0 < %s -o - | FileCheck %s
+;
+; During X86 fastisel, the address of indirect call was resolved
+; through bitcast, ptrtoint, and inttoptr instructions. This is valid
+; only if the related instructions are in that same basic block, otherwise
+; we may reference variables that were not live accross basic blocks
+; resulting in undefined virtual registers.
+;
+; In this example, this is illustrated by a the spill/reload of the
+; LOADED_PTR_SLOT.
+;
+; Before this patch, the compiler was accessing two different spill
+; slots.
+; <rdar://problem/15192473>
+
+; CHECK-LABEL: @test_bitcast
+; Load the value of the function pointer: %loaded_ptr
+; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]]
+; Spill %arg2.
+; CHECK: movq %rdx, [[ARG2_SLOT:[0-9]*\(%[a-z]+\)]]
+; Spill %loaded_ptr.
+; CHECK: movq [[LOADED_PTR]], [[LOADED_PTR_SLOT:[0-9]*\(%[a-z]+\)]]
+; Perform the indirect call.
+; Load the first argument
+; CHECK: movq [[ARG2_SLOT]], %rdi
+; Load the second argument
+; CHECK: movq [[ARG2_SLOT]], %rsi
+; Load the thrid argument
+; CHECK: movq [[ARG2_SLOT]], %rdx
+; Load the function pointer.
+; CHECK: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]]
+; Call.
+; CHECK: callq *[[FCT_PTR]]
+; CHECK: ret
+define i64 @test_bitcast(i64 (i64, i64, i64)** %arg, i1 %bool, i64 %arg2) {
+entry:
+  %loaded_ptr = load i64 (i64, i64, i64)** %arg, align 8
+  %raw = bitcast i64 (i64, i64, i64)* %loaded_ptr to i8*
+  switch i1 %bool, label %default [
+    i1 true, label %label_true
+    i1 false, label %label_end
+  ]
+default:
+  unreachable
+
+label_true:
+  br label %label_end
+
+label_end:
+  %fct_ptr = bitcast i8* %raw to i64 (i64, i64, i64)*
+  %res = call i64 %fct_ptr(i64 %arg2, i64 %arg2, i64 %arg2)
+  ret i64 %res
+}
+
+; CHECK-LABEL: @test_inttoptr
+; Load the value of the function pointer: %loaded_ptr
+; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]]
+; Spill %arg2.
+; CHECK: movq %rdx, [[ARG2_SLOT:[0-9]*\(%[a-z]+\)]]
+; Spill %loaded_ptr.
+; CHECK: movq [[LOADED_PTR]], [[LOADED_PTR_SLOT:[0-9]*\(%[a-z]+\)]]
+; Perform the indirect call.
+; Load the first argument
+; CHECK: movq [[ARG2_SLOT]], %rdi
+; Load the second argument
+; CHECK: movq [[ARG2_SLOT]], %rsi
+; Load the thrid argument
+; CHECK: movq [[ARG2_SLOT]], %rdx
+; Load the function pointer.
+; CHECK: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]]
+; Call.
+; CHECK: callq *[[FCT_PTR]]
+; CHECK: ret
+define i64 @test_inttoptr(i64 (i64, i64, i64)** %arg, i1 %bool, i64 %arg2) {
+entry:
+  %loaded_ptr = load i64 (i64, i64, i64)** %arg, align 8
+  %raw = ptrtoint i64 (i64, i64, i64)* %loaded_ptr to i64
+  switch i1 %bool, label %default [
+    i1 true, label %label_true
+    i1 false, label %label_end
+  ]
+default:
+  unreachable
+
+label_true:
+  br label %label_end
+
+label_end:
+  %fct_ptr = inttoptr i64 %raw to i64 (i64, i64, i64)*
+  %res = call i64 %fct_ptr(i64 %arg2, i64 %arg2, i64 %arg2)
+  ret i64 %res
+}
+
+; CHECK-LABEL: @test_ptrtoint
+; Load the value of the function pointer: %loaded_ptr
+; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]]
+; Spill %arg2.
+; CHECK: movq %rdx, [[ARG2_SLOT:[0-9]*\(%[a-z]+\)]]
+; Spill %loaded_ptr.
+; CHECK: movq [[LOADED_PTR]], [[LOADED_PTR_SLOT:[0-9]*\(%[a-z]+\)]]
+; Perform the indirect call.
+; Load the first argument
+; CHECK: movq [[ARG2_SLOT]], %rdi
+; Load the second argument
+; CHECK: movq [[ARG2_SLOT]], %rsi
+; Load the thrid argument
+; CHECK: movq [[ARG2_SLOT]], %rdx
+; Load the function pointer.
+; CHECK: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]]
+; Call.
+; CHECK: callq *[[FCT_PTR]]
+; CHECK: ret
+define i64 @test_ptrtoint(i64 (i64, i64, i64)** %arg, i1 %bool, i64 %arg2) {
+entry:
+  %loaded_ptr = load i64 (i64, i64, i64)** %arg, align 8
+  %raw = bitcast i64 (i64, i64, i64)* %loaded_ptr to i8*
+  switch i1 %bool, label %default [
+    i1 true, label %label_true
+    i1 false, label %label_end
+  ]
+default:
+  unreachable
+
+label_true:
+  br label %label_end
+
+label_end:
+  %fct_int = ptrtoint i8* %raw to i64
+  %fct_ptr = inttoptr i64 %fct_int to i64 (i64, i64, i64)*
+  %res = call i64 %fct_ptr(i64 %arg2, i64 %arg2, i64 %arg2)
+  ret i64 %res
+}
diff --git a/test/CodeGen/X86/3addr-16bit.ll b/test/CodeGen/X86/3addr-16bit.ll
index 77c3c16..fafdfdb 100644
--- a/test/CodeGen/X86/3addr-16bit.ll
+++ b/test/CodeGen/X86/3addr-16bit.ll
@@ -34,7 +34,8 @@ entry:
 
 ; 64BIT-LABEL:     t2:
 ; 64BIT-NOT: movw %si, %ax
-; 64BIT:     leal -1(%rsi), %eax
+; 64BIT:     decl %eax
+; 64BIT:     movzwl %ax
   %0 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
   %1 = add i16 %k, -1                             ; <i16> [#uses=3]
   br i1 %0, label %bb, label %bb1
@@ -58,7 +59,7 @@ entry:
 
 ; 64BIT-LABEL:     t3:
 ; 64BIT-NOT: movw %si, %ax
-; 64BIT:     leal 2(%rsi), %eax
+; 64BIT:     addl $2, %eax
   %0 = add i16 %k, 2                              ; <i16> [#uses=3]
   %1 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
   br i1 %1, label %bb, label %bb1
@@ -81,7 +82,7 @@ entry:
 
 ; 64BIT-LABEL:     t4:
 ; 64BIT-NOT: movw %si, %ax
-; 64BIT:     leal (%rsi,%rdi), %eax
+; 64BIT:     addl %edi, %eax
   %0 = add i16 %k, %c                             ; <i16> [#uses=3]
   %1 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
   br i1 %1, label %bb, label %bb1
diff --git a/test/CodeGen/X86/GC/lit.local.cfg b/test/CodeGen/X86/GC/lit.local.cfg
index a8ad0f1..ba763cf 100644
--- a/test/CodeGen/X86/GC/lit.local.cfg
+++ b/test/CodeGen/X86/GC/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/X86/GC/ocaml-gc-assert.ll b/test/CodeGen/X86/GC/ocaml-gc-assert.ll
new file mode 100644
index 0000000..b32ceca
--- /dev/null
+++ b/test/CodeGen/X86/GC/ocaml-gc-assert.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; PR3168
+
+; CHECK-LABEL: append
+
+define i32* @append() gc "ocaml" {
+entry:
+  switch i32 0, label %L2 [i32 0, label %L1]
+L1:
+  %var8 = alloca i8*
+  call void @llvm.gcroot(i8** %var8,i8* null)
+  br label %L3
+L2:
+  call ccc void @oread_runtime_casenotcovered()
+  unreachable
+L3:
+  ret i32* null
+}
+
+declare ccc void @oread_runtime_casenotcovered()
+declare void @llvm.gcroot(i8**,i8*)
diff --git a/test/CodeGen/X86/GC/ocaml-gc.ll b/test/CodeGen/X86/GC/ocaml-gc.ll
index 44241a9..6d5f8ae 100644
--- a/test/CodeGen/X86/GC/ocaml-gc.ll
+++ b/test/CodeGen/X86/GC/ocaml-gc.ll
@@ -2,23 +2,23 @@
 
 define i32 @main(i32 %x) nounwind gc "ocaml" {
 ; CHECK:        .text
-; CHECK-NEXT:   .globl  caml_3C_stdin_3E___code_begin
-; CHECK-NEXT: caml_3C_stdin_3E___code_begin:
+; CHECK-NEXT:   .globl "caml<stdin>__code_begin"
+; CHECK-NEXT: "caml<stdin>__code_begin":
 ; CHECK-NEXT:   .data
-; CHECK-NEXT:   .globl  caml_3C_stdin_3E___data_begin
-; CHECK-NEXT: caml_3C_stdin_3E___data_begin:
+; CHECK-NEXT:   .globl  "caml<stdin>__data_begin"
+; CHECK-NEXT: "caml<stdin>__data_begin":
 
   %puts = tail call i32 @foo(i32 %x)
   ret i32 0
 
-; CHECK:        .globl  caml_3C_stdin_3E___code_end
-; CHECK-NEXT: caml_3C_stdin_3E___code_end:
+; CHECK:        .globl "caml<stdin>__code_end"
+; CHECK-NEXT: "caml<stdin>__code_end":
 ; CHECK-NEXT:   .data
-; CHECK-NEXT:   .globl  caml_3C_stdin_3E___data_end
-; CHECK-NEXT: caml_3C_stdin_3E___data_end:
+; CHECK-NEXT:   .globl "caml<stdin>__data_end"
+; CHECK-NEXT: "caml<stdin>__data_end":
 ; CHECK-NEXT:   .quad   0
-; CHECK-NEXT:   .globl  caml_3C_stdin_3E___frametable
-; CHECK-NEXT: caml_3C_stdin_3E___frametable:
+; CHECK-NEXT:   .globl "caml<stdin>__frametable"
+; CHECK-NEXT: "caml<stdin>__frametable":
 ; CHECK-NEXT:   .short  1
 ; CHECK-NEXT:   .align  8
 ; CHECK-NEXT:                # live roots for main
diff --git a/test/CodeGen/X86/MachineSink-DbgValue.ll b/test/CodeGen/X86/MachineSink-DbgValue.ll
index df9580c..584e644 100644
--- a/test/CodeGen/X86/MachineSink-DbgValue.ll
+++ b/test/CodeGen/X86/MachineSink-DbgValue.ll
@@ -26,11 +26,12 @@ bb2:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!22}
 
 !0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, metadata !21, metadata !21, metadata !18, null,  null, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !20, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @foo, null, null, metadata !19, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !20, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i32*)* @foo, null, null, metadata !19, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
 !2 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 16777218, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
@@ -49,3 +50,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !19 = metadata !{metadata !6, metadata !7, metadata !10}
 !20 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
 !21 = metadata !{i32 0}
+!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/StackColoring-dbg.ll b/test/CodeGen/X86/StackColoring-dbg.ll
index 8b67a44..51d0d17 100644
--- a/test/CodeGen/X86/StackColoring-dbg.ll
+++ b/test/CodeGen/X86/StackColoring-dbg.ll
@@ -25,6 +25,11 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
 
 declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
 
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!23}
+!0 = metadata !{i32 524305, metadata !1, i32 1, metadata !"clang", i1 true, metadata !"", i32 0, metadata !2, metadata !2, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !"t.c", metadata !""}
 !16 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6}
 !2 = metadata !{i32 0}
 !22 = metadata !{i32 786688, null, metadata !"x", metadata !2, i32 16, metadata !16, i32 0, i32 0}
+!23 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/StackColoring.ll b/test/CodeGen/X86/StackColoring.ll
index f1d9296..a8e3537 100644
--- a/test/CodeGen/X86/StackColoring.ll
+++ b/test/CodeGen/X86/StackColoring.ll
@@ -4,8 +4,8 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-;YESCOLOR: subq  $136, %rsp
-;NOCOLOR: subq  $264, %rsp
+;YESCOLOR: subq  $144, %rsp
+;NOCOLOR: subq  $272, %rsp
 
 define i32 @myCall_w2(i32 %in) {
 entry:
diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll
index 3b84231..633e70f 100644
--- a/test/CodeGen/X86/abi-isel.ll
+++ b/test/CodeGen/X86/abi-isel.ll
@@ -1,16 +1,16 @@
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-32-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-64-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-PIC
 
 @src = external global [131072 x i32]
 @dst = external global [131072 x i32]
diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll
index f36577b..62a62a4 100644
--- a/test/CodeGen/X86/add.ll
+++ b/test/CodeGen/X86/add.ll
@@ -9,7 +9,7 @@ define i32 @test1(i32 inreg %a) nounwind {
   %b = add i32 %a, 128
   ret i32 %b
 ; X32: subl	$-128, %eax
-; X64: subl $-128, 
+; X64: subl $-128,
 }
 define i64 @test2(i64 inreg %a) nounwind {
   %b = add i64 %a, 2147483648
@@ -20,7 +20,7 @@ define i64 @test2(i64 inreg %a) nounwind {
 define i64 @test3(i64 inreg %a) nounwind {
   %b = add i64 %a, 128
   ret i64 %b
-  
+
 ; X32: addl $128, %eax
 ; X64: subq	$-128,
 }
@@ -38,7 +38,7 @@ normal:
 
 overflow:
   ret i1 false
-  
+
 ; X32-LABEL: test4:
 ; X32: addl
 ; X32-NEXT: jo
@@ -82,11 +82,11 @@ define i64 @test6(i64 %A, i32 %B) nounwind {
         ret i64 %tmp5
 
 ; X32-LABEL: test6:
-; X32:	    movl 12(%esp), %edx
+; X32:	    movl 4(%esp), %eax
+; X32-NEXT: movl 12(%esp), %edx
 ; X32-NEXT: addl 8(%esp), %edx
-; X32-NEXT: movl 4(%esp), %eax
 ; X32-NEXT: ret
-        
+
 ; X64-LABEL: test6:
 ; X64:	shlq	$32, %r[[A1]]
 ; X64:	leaq	(%r[[A1]],%r[[A0]]), %rax
diff --git a/test/CodeGen/X86/aes_intrinsics.ll b/test/CodeGen/X86/aes_intrinsics.ll
new file mode 100644
index 0000000..fc1a2cc
--- /dev/null
+++ b/test/CodeGen/X86/aes_intrinsics.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+aes,-avx | FileCheck %s
+
+define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: aesdec
+  %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: aesdeclast
+  %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: aesenc
+  %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: aesenclast
+  %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) {
+  ; CHECK: aesimc
+  %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) {
+  ; CHECK: aeskeygenassist
+  %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
diff --git a/test/CodeGen/X86/alias-error.ll b/test/CodeGen/X86/alias-error.ll
new file mode 100644
index 0000000..8f01dcf
--- /dev/null
+++ b/test/CodeGen/X86/alias-error.ll
@@ -0,0 +1,5 @@
+; RUN: not llc -mtriple=i686-pc-linux-gnu %s -o /dev/null 2>&1 | FileCheck %s
+
+@a = external global i32
+@b = alias i32* @a
+; CHECK: b: Target doesn't support aliases to declarations
diff --git a/test/CodeGen/X86/aliases.ll b/test/CodeGen/X86/aliases.ll
index f920279..d0a262d 100644
--- a/test/CodeGen/X86/aliases.ll
+++ b/test/CodeGen/X86/aliases.ll
@@ -1,26 +1,38 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=false -o %t
-; RUN: grep globl %t | count 6
-; RUN: grep weak %t  | count 1
-; RUN: grep hidden %t | count 1
-; RUN: grep protected %t | count 1
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=false | FileCheck %s
 
-@bar = external global i32
+@bar = global i32 42
+
+; CHECK-DAG: .globl	foo1
 @foo1 = alias i32* @bar
+
+; CHECK-DAG: .globl	foo2
 @foo2 = alias i32* @bar
 
 %FunTy = type i32()
 
-declare i32 @foo_f()
+define i32 @foo_f() {
+  ret i32 0
+}
+; CHECK-DAG: .weak	bar_f
 @bar_f = alias weak %FunTy* @foo_f
 
+@bar_l = alias linkonce_odr i32* @bar
+; CHECK-DAG: .weak	bar_l
+
 @bar_i = alias internal i32* @bar
 
+; CHECK-DAG: .globl	A
 @A = alias bitcast (i32* @bar to i64*)
 
+; CHECK-DAG: .globl	bar_h
+; CHECK-DAG: .hidden	bar_h
 @bar_h = hidden alias i32* @bar
 
+; CHECK-DAG: .globl	bar_p
+; CHECK-DAG: .protected	bar_p
 @bar_p = protected alias i32* @bar
 
+; CHECK-DAG: .globl	test
 define i32 @test() {
 entry:
    %tmp = load i32* @foo1
diff --git a/test/CodeGen/X86/alloca-align-rounding.ll b/test/CodeGen/X86/alloca-align-rounding.ll
index 3d76fb0..74b9470 100644
--- a/test/CodeGen/X86/alloca-align-rounding.ll
+++ b/test/CodeGen/X86/alloca-align-rounding.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux -enable-misched=false | FileCheck %s
 
 declare void @bar(<2 x i64>* %n)
 
diff --git a/test/CodeGen/X86/anyregcc-crash.ll b/test/CodeGen/X86/anyregcc-crash.ll
new file mode 100644
index 0000000..cf6f6ed
--- /dev/null
+++ b/test/CodeGen/X86/anyregcc-crash.ll
@@ -0,0 +1,17 @@
+; RUN: not llc < %s -mtriple=x86_64-apple-darwin 2>&1 | FileCheck %s
+;
+; Check that misuse of anyregcc results in a compile time error.
+
+; CHECK: LLVM ERROR: ran out of registers during register allocation
+define i64 @anyreglimit(i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6,
+                        i64 %v7, i64 %v8, i64 %v9, i64 %v10, i64 %v11, i64 %v12,
+                        i64 %v13, i64 %v14, i64 %v15, i64 %v16) {
+entry:
+  %result = tail call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 16,
+                i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6,
+                i64 %v7, i64 %v8, i64 %v9, i64 %v10, i64 %v11, i64 %v12,
+                i64 %v13, i64 %v14, i64 %v15, i64 %v16)
+  ret i64 %result
+}
+
+declare i64 @llvm.experimental.patchpoint.i64(i32, i32, i8*, i32, ...)
diff --git a/test/CodeGen/X86/anyregcc.ll b/test/CodeGen/X86/anyregcc.ll
new file mode 100644
index 0000000..8109f87
--- /dev/null
+++ b/test/CodeGen/X86/anyregcc.ll
@@ -0,0 +1,348 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+; Stackmap Header: no constants - 6 callsites
+; CHECK-LABEL: .section	__LLVM_STACKMAPS,__llvm_stackmaps
+; CHECK-NEXT:  __LLVM_StackMaps:
+; Header
+; CHECK-NEXT:   .long   0
+; Num Constants
+; CHECK-NEXT:   .long   0
+; Num Callsites
+; CHECK-NEXT:   .long   8
+
+; test
+; CHECK-NEXT:   .long   0
+; CHECK-LABEL:  .long   L{{.*}}-_test
+; CHECK-NEXT:   .short  0
+; 3 locations
+; CHECK-NEXT:   .short  3
+; Loc 0: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 4
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 4
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 2: Constant 3
+; CHECK-NEXT:   .byte 4
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long 3
+define i64 @test() nounwind ssp uwtable {
+entry:
+  call anyregcc void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 0, i32 15, i8* null, i32 2, i32 1, i32 2, i64 3)
+  ret i64 0
+}
+
+; property access 1 - %obj is an anyreg call argument and should therefore be in a register
+; CHECK-NEXT:   .long   1
+; CHECK-LABEL:  .long   L{{.*}}-_property_access1
+; CHECK-NEXT:   .short  0
+; 2 locations
+; CHECK-NEXT:   .short  2
+; Loc 0: Register <-- this is the return register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+define i64 @property_access1(i8* %obj) nounwind ssp uwtable {
+entry:
+  %f = inttoptr i64 12297829382473034410 to i8*
+  %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 1, i32 15, i8* %f, i32 1, i8* %obj)
+  ret i64 %ret
+}
+
+; property access 2 - %obj is an anyreg call argument and should therefore be in a register
+; CHECK-NEXT:   .long   2
+; CHECK-LABEL:  .long   L{{.*}}-_property_access2
+; CHECK-NEXT:   .short  0
+; 2 locations
+; CHECK-NEXT:   .short  2
+; Loc 0: Register <-- this is the return register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+define i64 @property_access2() nounwind ssp uwtable {
+entry:
+  %obj = alloca i64, align 8
+  %f = inttoptr i64 12297829382473034410 to i8*
+  %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 2, i32 15, i8* %f, i32 1, i64* %obj)
+  ret i64 %ret
+}
+
+; property access 3 - %obj is a frame index
+; CHECK-NEXT:   .long   3
+; CHECK-LABEL:  .long   L{{.*}}-_property_access3
+; CHECK-NEXT:   .short  0
+; 2 locations
+; CHECK-NEXT:   .short  2
+; Loc 0: Register <-- this is the return register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Register <-- this will be folded once folding for FI is implemented
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+define i64 @property_access3() nounwind ssp uwtable {
+entry:
+  %obj = alloca i64, align 8
+  %f = inttoptr i64 12297829382473034410 to i8*
+  %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 3, i32 15, i8* %f, i32 0, i64* %obj)
+  ret i64 %ret
+}
+
+; anyreg_test1
+; CHECK-NEXT:   .long   4
+; CHECK-LABEL:  .long   L{{.*}}-_anyreg_test1
+; CHECK-NEXT:   .short  0
+; 14 locations
+; CHECK-NEXT:   .short  14
+; Loc 0: Register <-- this is the return register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 2: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 3: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 4: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 5: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 6: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 7: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 8: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 9: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 10: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 11: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 12: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 13: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
+entry:
+  %f = inttoptr i64 12297829382473034410 to i8*
+  %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 4, i32 15, i8* %f, i32 13, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
+  ret i64 %ret
+}
+
+; anyreg_test2
+; CHECK-NEXT:   .long   5
+; CHECK-LABEL:  .long   L{{.*}}-_anyreg_test2
+; CHECK-NEXT:   .short  0
+; 14 locations
+; CHECK-NEXT:   .short  14
+; Loc 0: Register <-- this is the return register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 2: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 3: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 4: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 5: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 6: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 7: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 8: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 9: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 10: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 11: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 12: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 13: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
+entry:
+  %f = inttoptr i64 12297829382473034410 to i8*
+  %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 5, i32 15, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
+  ret i64 %ret
+}
+
+; Test spilling the return value of an anyregcc call.
+;
+; <rdar://problem/15432754> [JS] Assertion: "Folded a def to a non-store!"
+;
+; CHECK-LABEL: .long 12
+; CHECK-LABEL: .long L{{.*}}-_patchpoint_spilldef
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .short 3
+; Loc 0: Register (some register that will be spilled to the stack)
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .long  0
+; Loc 1: Register RDI
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short 5
+; CHECK-NEXT: .long  0
+; Loc 1: Register RSI
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short 4
+; CHECK-NEXT: .long  0
+define i64 @patchpoint_spilldef(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+  %result = tail call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2)
+  tail call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind
+  ret i64 %result
+}
+
+; Test spilling the arguments of an anyregcc call.
+;
+; <rdar://problem/15487687> [JS] AnyRegCC argument ends up being spilled
+;
+; CHECK-LABEL: .long 13
+; CHECK-LABEL: .long L{{.*}}-_patchpoint_spillargs
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .short 5
+; Loc 0: Return a register
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .long  0
+; Loc 1: Arg0 in a Register
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .long  0
+; Loc 2: Arg1 in a Register
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .long  0
+; Loc 3: Arg2 spilled to RBP +
+; CHECK-NEXT: .byte  3
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short 7
+; CHECK-NEXT: .long  {{[0-9]+}}
+; Loc 4: Arg3 spilled to RBP +
+; CHECK-NEXT: .byte  3
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short 7
+; CHECK-NEXT: .long  {{[0-9]+}}
+define i64 @patchpoint_spillargs(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+  tail call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind
+  %result = tail call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 13, i32 15, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
+  ret i64 %result
+}
+
+declare void @llvm.experimental.patchpoint.void(i32, i32, i8*, i32, ...)
+declare i64 @llvm.experimental.patchpoint.i64(i32, i32, i8*, i32, ...)
diff --git a/test/CodeGen/X86/atom-call-reg-indirect.ll b/test/CodeGen/X86/atom-call-reg-indirect.ll
index 933b98b..48f2d4c 100644
--- a/test/CodeGen/X86/atom-call-reg-indirect.ll
+++ b/test/CodeGen/X86/atom-call-reg-indirect.ll
@@ -2,6 +2,8 @@
 ; RUN: llc < %s -mcpu=core2 -mtriple=i686-linux | FileCheck -check-prefix=ATOM-NOT32 %s
 ; RUN: llc < %s -mcpu=atom -mtriple=x86_64-linux  | FileCheck -check-prefix=ATOM64 %s
 ; RUN: llc < %s -mcpu=core2 -mtriple=x86_64-linux | FileCheck -check-prefix=ATOM-NOT64 %s
+; RUN: llc < %s -mcpu=slm -mtriple=i686-linux  | FileCheck -check-prefix=SLM32 %s
+; RUN: llc < %s -mcpu=slm -mtriple=x86_64-linux  | FileCheck -check-prefix=SLM64 %s
 
 
 ; fn_ptr.ll
@@ -20,6 +22,10 @@ entry:
   ;ATOM64: movq (%rcx), %rcx
   ;ATOM64: callq *%rcx
   ;ATOM-NOT64: callq *(%rcx)
+  ;SLM32: movl (%ecx), %ecx
+  ;SLM32: calll *%ecx
+  ;SLM64: movq (%rcx), %rcx
+  ;SLM64: callq *%rcx
   tail call void %1(%class.A* %call)
   ret i32 0
 }
@@ -40,6 +46,10 @@ entry:
   ;ATOM64: movq (%rax), %rax
   ;ATOM64: callq *%rax
   ;ATOM-NOT64: callq *(%rax)
+  ;SLM32: movl (%eax), %eax
+  ;SLM32: calll *%eax
+  ;SLM64: movq (%rax), %rax
+  ;SLM64: callq *%rax
   tail call void %1(i32 2)
   ret i32 0
 }
diff --git a/test/CodeGen/X86/atom-lea-addw-bug.ll b/test/CodeGen/X86/atom-lea-addw-bug.ll
new file mode 100644
index 0000000..5cda2df
--- /dev/null
+++ b/test/CodeGen/X86/atom-lea-addw-bug.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mcpu=atom | FileCheck %s
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target triple = "x86_64-apple-darwin12.5.0"
+
+define i32 @DoLayout() {
+entry:
+  %tmp1 = load i16* undef, align 2
+  %tmp17 = load i16* null, align 2
+  %tmp19 = load i16* undef, align 2
+  %shl = shl i16 %tmp19, 1
+  %add55 = add i16 %tmp17, %tmp1
+  %add57 = add i16 %add55, %shl
+  %conv60 = zext i16 %add57 to i32
+  %add61 = add nsw i32 %conv60, 0
+  %conv63 = and i32 %add61, 65535
+  ret i32 %conv63
+; CHECK: addw
+}
diff --git a/test/CodeGen/X86/atom-sched.ll b/test/CodeGen/X86/atom-sched.ll
index 0d97e85..fd18472 100644
--- a/test/CodeGen/X86/atom-sched.ll
+++ b/test/CodeGen/X86/atom-sched.ll
@@ -1,4 +1,5 @@
 ; RUN: llc <%s -O2 -mcpu=atom -march=x86 -relocation-model=static | FileCheck -check-prefix=atom %s
+; RUN: llc <%s -O2 -mcpu=slm -march=x86 -relocation-model=static | FileCheck -check-prefix=slm %s
 ; RUN: llc <%s -O2 -mcpu=core2 -march=x86 -relocation-model=static | FileCheck %s
 ;
 
@@ -13,6 +14,9 @@ define void @func() nounwind uwtable {
 ; atom: imull
 ; atom-NOT: movl
 ; atom: imull
+; slm: imull
+; slm-NOT: movl
+; slm: imull
 ; CHECK: imull
 ; CHECK: movl
 ; CHECK: imull
diff --git a/test/CodeGen/X86/atomic-dagsched.ll b/test/CodeGen/X86/atomic-dagsched.ll
index 05e630b..aa05757 100644
--- a/test/CodeGen/X86/atomic-dagsched.ll
+++ b/test/CodeGen/X86/atomic-dagsched.ll
@@ -34,8 +34,8 @@ dim_0_vector_pre_head.i:                          ; preds = %loop
 vector_kernel_entry.i:                            ; preds = %vector_kernel_entry.i, %dim_0_vector_pre_head.i
   %asr.iv9 = phi i8* [ %scevgep10, %vector_kernel_entry.i ], [ %asr.iv6, %dim_0_vector_pre_head.i ]
   %asr.iv = phi i64 [ %asr.iv.next, %vector_kernel_entry.i ], [ %vector.size.i, %dim_0_vector_pre_head.i ]
-  %8 = bitcast i8* %ptrtoarg4 to i32 addrspace(1)*
-  %asr.iv911 = bitcast i8* %asr.iv9 to <8 x i32> addrspace(1)*
+  %8 = addrspacecast i8* %ptrtoarg4 to i32 addrspace(1)*
+  %asr.iv911 = addrspacecast i8* %asr.iv9 to <8 x i32> addrspace(1)*
   %9 = load <8 x i32> addrspace(1)* %asr.iv911, align 4
   %extract8vector_func.i = extractelement <8 x i32> %9, i32 0
   %extract9vector_func.i = extractelement <8 x i32> %9, i32 1
@@ -73,8 +73,8 @@ dim_0_pre_head.i:                                 ; preds = %scalarIf.i
 
 scalar_kernel_entry.i:                            ; preds = %scalar_kernel_entry.i, %dim_0_pre_head.i
   %asr.iv12 = phi i64 [ %asr.iv.next13, %scalar_kernel_entry.i ], [ %22, %dim_0_pre_head.i ]
-  %23 = bitcast i8* %asr.iv6 to i32 addrspace(1)*
-  %24 = bitcast i8* %ptrtoarg4 to i32 addrspace(1)*
+  %23 = addrspacecast i8* %asr.iv6 to i32 addrspace(1)*
+  %24 = addrspacecast i8* %ptrtoarg4 to i32 addrspace(1)*
   %scevgep16 = getelementptr i32 addrspace(1)* %23, i64 %asr.iv12
   %25 = load i32 addrspace(1)* %scevgep16, align 4
   %26 = atomicrmw min i32 addrspace(1)* %24, i32 %25 seq_cst
diff --git a/test/CodeGen/X86/avx-arith.ll b/test/CodeGen/X86/avx-arith.ll
index 4aa3370..a9da1ec 100644
--- a/test/CodeGen/X86/avx-arith.ll
+++ b/test/CodeGen/X86/avx-arith.ll
@@ -240,15 +240,15 @@ define <16 x i16> @vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
 ; CHECK-NEXT: vpmuludq %xmm
 ; CHECK-NEXT: vpsllq $32, %xmm
 ; CHECK-NEXT: vpaddq %xmm
-; CHECK-NEXT: vpmuludq %xmm
 ; CHECK-NEXT: vpsrlq $32, %xmm
 ; CHECK-NEXT: vpmuludq %xmm
 ; CHECK-NEXT: vpsllq $32, %xmm
+; CHECK-NEXT: vpaddq %xmm
+; CHECK-NEXT: vpmuludq %xmm
 ; CHECK-NEXT: vpsrlq $32, %xmm
 ; CHECK-NEXT: vpmuludq %xmm
 ; CHECK-NEXT: vpsllq $32, %xmm
 ; CHECK-NEXT: vpaddq %xmm
-; CHECK-NEXT: vpaddq %xmm
 ; CHECK-NEXT: vpsrlq $32, %xmm
 ; CHECK-NEXT: vpmuludq %xmm
 ; CHECK-NEXT: vpsllq $32, %xmm
@@ -269,4 +269,3 @@ define <4 x float> @int_sqrt_ss() {
  %x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind
  ret <4 x float> %x2
 }
-
diff --git a/test/CodeGen/X86/avx-basic.ll b/test/CodeGen/X86/avx-basic.ll
index 64c4627..1fd9085 100644
--- a/test/CodeGen/X86/avx-basic.ll
+++ b/test/CodeGen/X86/avx-basic.ll
@@ -122,10 +122,10 @@ define <16 x i16> @build_vec_16x16(i16 %a) nounwind readonly {
   ret <16 x i16> %res
 }
 
-;;; Check that VMOVPQIto64rr generates the assembly string "vmovd".  Previously
+;;; Check that VMOVPQIto64rr generates the assembly string "vmovq".  Previously
 ;;; an incorrect mnemonic of "movd" was printed for this instruction.
 ; CHECK: VMOVPQIto64rr
-; CHECK: vmovd
+; CHECK: vmovq
 define i64 @VMOVPQIto64rr(<2 x i64> %a) {
 entry:
   %vecext.i = extractelement <2 x i64> %a, i32 0
diff --git a/test/CodeGen/X86/avx-bitcast.ll b/test/CodeGen/X86/avx-bitcast.ll
index ecc71be..c9d828c 100644
--- a/test/CodeGen/X86/avx-bitcast.ll
+++ b/test/CodeGen/X86/avx-bitcast.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -O0 -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
 
 ; CHECK: vmovsd (%
-; CHECK-NEXT: vmovd %xmm
+; CHECK-NEXT: vmovq %xmm
 define i64 @bitcasti64tof64() {
   %a = load double* undef
   %b = bitcast double %a to i64
diff --git a/test/CodeGen/X86/avx-intel-ocl.ll b/test/CodeGen/X86/avx-intel-ocl.ll
index 0550720..7337815 100644
--- a/test/CodeGen/X86/avx-intel-ocl.ll
+++ b/test/CodeGen/X86/avx-intel-ocl.ll
@@ -32,7 +32,7 @@ declare i32 @func_int(i32, i32)
 define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
   %y = alloca <16 x float>, align 16
   %x = fadd <16 x float> %a, %b
-  %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) 
+  %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
   %2 = load <16 x float>* %y, align 16
   %3 = fadd <16 x float> %2, %1
   ret <16 x float> %3
@@ -43,21 +43,21 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
 ; preserved ymm6-ymm15
 ; WIN64: testf16_regs
 ; WIN64: call
-; WIN64: vaddps  {{%ymm[6-7]}}, %ymm0, %ymm0
-; WIN64: vaddps  {{%ymm[6-7]}}, %ymm1, %ymm1
+; WIN64: vaddps  {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
+; WIN64: vaddps  {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
 ; WIN64: ret
 
 ; preserved ymm8-ymm15
 ; X64: testf16_regs
 ; X64: call
-; X64: vaddps  {{%ymm[8-9]}}, %ymm0, %ymm0
-; X64: vaddps  {{%ymm[8-9]}}, %ymm1, %ymm1
+; X64: vaddps  {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
+; X64: vaddps  {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
 ; X64: ret
 
 define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
   %y = alloca <16 x float>, align 16
   %x = fadd <16 x float> %a, %b
-  %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) 
+  %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
   %2 = load <16 x float>* %y, align 16
   %3 = fadd <16 x float> %1, %b
   %4 = fadd <16 x float> %2, %3
@@ -166,4 +166,3 @@ entry:
   %8 = shufflevector <8 x float> %3, <8 x float> %7, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
   ret <8 x float> %8
 }
-
diff --git a/test/CodeGen/X86/avx-sext.ll b/test/CodeGen/X86/avx-sext.ll
index b9c7000..fb2287f 100644
--- a/test/CodeGen/X86/avx-sext.ll
+++ b/test/CodeGen/X86/avx-sext.ll
@@ -154,6 +154,17 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
   ret <4 x i64> %extmask
 }
 
+; AVX-LABEL: sext_16i8_to_16i16
+; AVX: vpmovsxbw
+; AVX: vmovhlps
+; AVX: vpmovsxbw
+; AVX: ret
+define <16 x i16> @sext_16i8_to_16i16(<16 x i8> *%ptr) {
+ %X = load <16 x i8>* %ptr
+ %Y = sext <16 x i8> %X to <16 x i16>
+ ret <16 x i16> %Y
+}
+
 ; AVX: sext_4i8_to_4i64
 ; AVX: vpslld  $24
 ; AVX: vpsrad  $24
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index a625601..0956361 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -81,7 +81,7 @@ entry:
 define i32 @test9(<4 x i32> %a) nounwind {
 ; CHECK: test9
 ; CHECK: vpextrd
-  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 undef, i32 4> 
+  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 undef, i32 4>
   %r = extractelement <8 x i32> %b, i32 2
 ; CHECK: ret
   ret i32 %r
@@ -251,8 +251,8 @@ define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind {
 ; CHECK: swap8doubles
 ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
 ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
-; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
-; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
+; CHECK: vinsertf128 $1, {{[0-9]*}}(%rdi), %ymm{{[0-9]+}}
+; CHECK: vinsertf128 $1, {{[0-9]*}}(%rdi), %ymm{{[0-9]+}}
 ; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
 ; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
 ; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi)
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index 5c01c2c..5d07815 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -20,7 +20,7 @@ entry:
   ret <16 x i16> %shuffle
 }
 
-; CHECK: vmovd
+; CHECK: vmovq
 ; CHECK-NEXT: vmovlhps %xmm
 ; CHECK-NEXT: vinsertf128 $1
 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
diff --git a/test/CodeGen/X86/avx-trunc.ll b/test/CodeGen/X86/avx-trunc.ll
index d007736..58d0a35 100644
--- a/test/CodeGen/X86/avx-trunc.ll
+++ b/test/CodeGen/X86/avx-trunc.ll
@@ -12,4 +12,9 @@ define <8 x i16> @trunc_32_16(<8 x i32> %A) nounwind uwtable readnone ssp{
   %B = trunc <8 x i32> %A to <8 x i16>
   ret <8 x i16>%B
 }
-
+define <16 x i8> @trunc_16_8(<16 x i16> %A) nounwind uwtable readnone ssp{
+; CHECK-LABEL: trunc_16_8
+; CHECK: pshufb
+  %B = trunc <16 x i16> %A to <16 x i8>
+  ret <16 x i8> %B
+}
diff --git a/test/CodeGen/X86/avx-zext.ll b/test/CodeGen/X86/avx-zext.ll
index e2b6c55..7511746 100644
--- a/test/CodeGen/X86/avx-zext.ll
+++ b/test/CodeGen/X86/avx-zext.ll
@@ -27,3 +27,15 @@ define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
   %t = zext <8 x i8> %z to <8 x i32>
   ret <8 x i32> %t
 }
+
+; PR17654
+define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %z) {
+; CHECK-LABEL: zext_16i8_to_16i16:
+; CHECK: vpxor
+; CHECK: vpunpckhbw
+; CHECK: vpunpcklbw
+; CHECK: vinsertf128
+; CHECK: ret
+  %t = zext <16 x i8> %z to <16 x i16>
+  ret <16 x i16> %t
+}
diff --git a/test/CodeGen/X86/avx2-arith.ll b/test/CodeGen/X86/avx2-arith.ll
index 997fa19..72bdd9d 100644
--- a/test/CodeGen/X86/avx2-arith.ll
+++ b/test/CodeGen/X86/avx2-arith.ll
@@ -148,3 +148,21 @@ define <8 x i32> @mul_const9(<8 x i32> %x) {
   %y = mul <8 x i32> %x, <i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   ret <8 x i32> %y
 }
+
+; CHECK: mul_const10
+; CHECK: vpmulld
+; CHECK: ret
+define <4 x i32> @mul_const10(<4 x i32> %x) {
+  ; %x * 0x01010101
+  %m = mul <4 x i32> %x, <i32 16843009, i32 16843009, i32 16843009, i32 16843009>
+  ret <4 x i32> %m
+}
+
+; CHECK: mul_const11
+; CHECK: vpmulld
+; CHECK: ret
+define <4 x i32> @mul_const11(<4 x i32> %x) {
+  ; %x * 0x80808080
+  %m = mul <4 x i32> %x, <i32 2155905152, i32 2155905152, i32 2155905152, i32 2155905152>
+  ret <4 x i32> %m
+}
diff --git a/test/CodeGen/X86/avx2-conversions.ll b/test/CodeGen/X86/avx2-conversions.ll
index 3ce08dc..f49718e 100644
--- a/test/CodeGen/X86/avx2-conversions.ll
+++ b/test/CodeGen/X86/avx2-conversions.ll
@@ -63,6 +63,34 @@ define <8 x i32> @zext_8i8_8i32(<8 x i8> %A) nounwind {
   ret <8 x i32>%B
 }
 
+; CHECK-LABEL: zext_16i8_16i16:
+; CHECK: vpmovzxbw
+; CHECK-NOT: vinsert
+; CHECK: ret
+define <16 x i16> @zext_16i8_16i16(<16 x i8> %z) {
+  %t = zext <16 x i8> %z to <16 x i16>
+  ret <16 x i16> %t
+}
+
+; CHECK-LABEL: sext_16i8_16i16:
+; CHECK: vpmovsxbw
+; CHECK-NOT: vinsert
+; CHECK: ret
+define <16 x i16> @sext_16i8_16i16(<16 x i8> %z) {
+  %t = sext <16 x i8> %z to <16 x i16>
+  ret <16 x i16> %t
+}
+
+; CHECK-LABEL: trunc_16i16_16i8:
+; CHECK: vpshufb
+; CHECK: vpshufb
+; CHECK: vpor
+; CHECK: ret
+define <16 x i8> @trunc_16i16_16i8(<16 x i16> %z) {
+  %t = trunc <16 x i16> %z to <16 x i8>
+  ret <16 x i8> %t
+}
+
 ; CHECK: load_sext_test1
 ; CHECK: vpmovsxdq (%r{{[^,]*}}), %ymm{{.*}}
 ; CHECK: ret 
diff --git a/test/CodeGen/X86/avx2-palignr.ll b/test/CodeGen/X86/avx2-palignr.ll
index 176e02c..83573dc 100644
--- a/test/CodeGen/X86/avx2-palignr.ll
+++ b/test/CodeGen/X86/avx2-palignr.ll
@@ -51,7 +51,7 @@ define <16 x i16> @test7(<16 x i16> %A, <16 x i16> %B) nounwind {
 
 define <32 x i8> @test8(<32 x i8> %A, <32 x i8> %B) nounwind {
 ; CHECK-LABEL: test8:
-; CHECK: palignr $5
+; CHECK: vpalignr $5
   %C = shufflevector <32 x i8> %A, <32 x i8> %B, <32 x i32> <i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52>
   ret <32 x i8> %C
 }
diff --git a/test/CodeGen/X86/avx2-vector-shifts.ll b/test/CodeGen/X86/avx2-vector-shifts.ll
index a978d93..5592e6c 100644
--- a/test/CodeGen/X86/avx2-vector-shifts.ll
+++ b/test/CodeGen/X86/avx2-vector-shifts.ll
@@ -121,7 +121,7 @@ entry:
 }
 
 ; CHECK-LABEL: test_sraw_3:
-; CHECK: vpsraw  $16, %ymm0, %ymm0
+; CHECK: vpsraw  $15, %ymm0, %ymm0
 ; CHECK: ret
 
 define <8 x i32> @test_srad_1(<8 x i32> %InVec) {
@@ -151,7 +151,7 @@ entry:
 }
 
 ; CHECK-LABEL: test_srad_3:
-; CHECK: vpsrad  $32, %ymm0, %ymm0
+; CHECK: vpsrad  $31, %ymm0, %ymm0
 ; CHECK: ret
 
 ; SSE Logical Shift Right
diff --git a/test/CodeGen/X86/avx512-arith.ll b/test/CodeGen/X86/avx512-arith.ll
new file mode 100644
index 0000000..e27600e
--- /dev/null
+++ b/test/CodeGen/X86/avx512-arith.ll
@@ -0,0 +1,271 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+; CHECK-LABEL: addpd512
+; CHECK: vaddpd
+; CHECK: ret
+define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
+entry:
+  %add.i = fadd <8 x double> %x, %y
+  ret <8 x double> %add.i
+}
+
+; CHECK-LABEL: addpd512fold
+; CHECK: vaddpd LCP{{.*}}(%rip)
+; CHECK: ret
+define <8 x double> @addpd512fold(<8 x double> %y) {
+entry:
+  %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
+  ret <8 x double> %add.i
+}
+
+; CHECK-LABEL: addps512
+; CHECK: vaddps
+; CHECK: ret
+define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
+entry:
+  %add.i = fadd <16 x float> %x, %y
+  ret <16 x float> %add.i
+}
+
+; CHECK-LABEL: addps512fold
+; CHECK: vaddps LCP{{.*}}(%rip)
+; CHECK: ret
+define <16 x float> @addps512fold(<16 x float> %y) {
+entry:
+  %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000,  float 0x4002666660000000, float 0x3FF3333340000000>
+  ret <16 x float> %add.i
+}
+
+; CHECK-LABEL: subpd512
+; CHECK: vsubpd
+; CHECK: ret
+define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
+entry:
+  %sub.i = fsub <8 x double> %x, %y
+  ret <8 x double> %sub.i
+}
+
+; CHECK-LABEL: @subpd512fold
+; CHECK: vsubpd (%
+; CHECK: ret
+define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
+entry:
+  %tmp2 = load <8 x double>* %x, align 8
+  %sub.i = fsub <8 x double> %y, %tmp2
+  ret <8 x double> %sub.i
+}
+
+; CHECK-LABEL: @subps512
+; CHECK: vsubps
+; CHECK: ret
+define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
+entry:
+  %sub.i = fsub <16 x float> %x, %y
+  ret <16 x float> %sub.i
+}
+
+; CHECK-LABEL: subps512fold
+; CHECK: vsubps (%
+; CHECK: ret
+define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
+entry:
+  %tmp2 = load <16 x float>* %x, align 4
+  %sub.i = fsub <16 x float> %y, %tmp2
+  ret <16 x float> %sub.i
+}
+
+; CHECK-LABEL: imulq512
+; CHECK: vpmuludq
+; CHECK: vpmuludq
+; CHECK: ret
+define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
+  %z = mul <8 x i64>%x, %y
+  ret <8 x i64>%z
+}
+
+; CHECK-LABEL: mulpd512
+; CHECK: vmulpd
+; CHECK: ret
+define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
+entry:
+  %mul.i = fmul <8 x double> %x, %y
+  ret <8 x double> %mul.i
+}
+
+; CHECK-LABEL: mulpd512fold
+; CHECK: vmulpd LCP{{.*}}(%rip)
+; CHECK: ret
+define <8 x double> @mulpd512fold(<8 x double> %y) {
+entry:
+  %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
+  ret <8 x double> %mul.i
+}
+
+; CHECK-LABEL: mulps512
+; CHECK: vmulps
+; CHECK: ret
+define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
+entry:
+  %mul.i = fmul <16 x float> %x, %y
+  ret <16 x float> %mul.i
+}
+
+; CHECK-LABEL: mulps512fold
+; CHECK: vmulps LCP{{.*}}(%rip)
+; CHECK: ret
+define <16 x float> @mulps512fold(<16 x float> %y) {
+entry:
+  %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
+  ret <16 x float> %mul.i
+}
+
+; CHECK-LABEL: divpd512
+; CHECK: vdivpd
+; CHECK: ret
+define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
+entry:
+  %div.i = fdiv <8 x double> %x, %y
+  ret <8 x double> %div.i
+}
+
+; CHECK-LABEL: divpd512fold
+; CHECK: vdivpd LCP{{.*}}(%rip)
+; CHECK: ret
+define <8 x double> @divpd512fold(<8 x double> %y) {
+entry:
+  %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
+  ret <8 x double> %div.i
+}
+
+; CHECK-LABEL: divps512
+; CHECK: vdivps
+; CHECK: ret
+define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
+entry:
+  %div.i = fdiv <16 x float> %x, %y
+  ret <16 x float> %div.i
+}
+
+; CHECK-LABEL: divps512fold
+; CHECK: vdivps LCP{{.*}}(%rip)
+; CHECK: ret
+define <16 x float> @divps512fold(<16 x float> %y) {
+entry:
+  %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
+  ret <16 x float> %div.i
+}
+
+; CHECK-LABEL: vpaddq_test
+; CHECK: vpaddq %zmm
+; CHECK: ret
+define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
+  %x = add <8 x i64> %i, %j
+  ret <8 x i64> %x
+}
+
+; CHECK-LABEL: vpaddd_test
+; CHECK: vpaddd %zmm
+; CHECK: ret
+define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
+  %x = add <16 x i32> %i, %j
+  ret <16 x i32> %x
+}
+
+; CHECK-LABEL: vpsubq_test
+; CHECK: vpsubq %zmm
+; CHECK: ret
+define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
+  %x = sub <8 x i64> %i, %j
+  ret <8 x i64> %x
+}
+
+; CHECK-LABEL: vpsubd_test
+; CHECK: vpsubd
+; CHECK: ret
+define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
+  %x = sub <16 x i32> %i, %j
+  ret <16 x i32> %x
+}
+
+; CHECK-LABEL: vpmulld_test
+; CHECK: vpmulld %zmm
+; CHECK: ret
+define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
+  %x = mul <16 x i32> %i, %j
+  ret <16 x i32> %x
+}
+
+; CHECK-LABEL: sqrtA
+; CHECK: vsqrtssz
+; CHECK: ret
+declare float @sqrtf(float) readnone
+define float @sqrtA(float %a) nounwind uwtable readnone ssp {
+entry:
+  %conv1 = tail call float @sqrtf(float %a) nounwind readnone
+  ret float %conv1
+}
+
+; CHECK-LABEL: sqrtB
+; CHECK: vsqrtsdz
+; CHECK: ret
+declare double @sqrt(double) readnone
+define double @sqrtB(double %a) nounwind uwtable readnone ssp {
+entry:
+  %call = tail call double @sqrt(double %a) nounwind readnone
+  ret double %call
+}
+
+; CHECK-LABEL: sqrtC
+; CHECK: vsqrtssz
+; CHECK: ret
+declare float @llvm.sqrt.f32(float)
+define float @sqrtC(float %a) nounwind {
+  %b = call float @llvm.sqrt.f32(float %a)
+  ret float %b
+}
+
+; CHECK-LABEL: fadd_broadcast
+; CHECK: LCP{{.*}}(%rip){1to16}, %zmm0, %zmm0
+; CHECK: ret
+define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
+  %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
+  ret <16 x float> %b
+}
+
+; CHECK-LABEL: addq_broadcast
+; CHECK: vpaddq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
+; CHECK: ret
+define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
+  %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
+  ret <8 x i64> %b
+}
+
+; CHECK-LABEL: orq_broadcast
+; CHECK: vporq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
+; CHECK: ret
+define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
+  %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
+  ret <8 x i64> %b
+}
+
+; CHECK-LABEL: andd512fold
+; CHECK: vpandd (%
+; CHECK: ret
+define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
+entry:
+  %a = load <16 x i32>* %x, align 4
+  %b = and <16 x i32> %y, %a
+  ret <16 x i32> %b
+}
+
+; CHECK-LABEL: andqbrst
+; CHECK: vpandq  (%rdi){1to8}, %zmm
+; CHECK: ret
+define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
+entry:
+  %a = load i64* %ap, align 8
+  %b = insertelement <8 x i64> undef, i64 %a, i32 0
+  %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
+  %d = and <8 x i64> %p1, %c
+  ret <8 x i64>%d
+}
diff --git a/test/CodeGen/X86/avx512-build-vector.ll b/test/CodeGen/X86/avx512-build-vector.ll
new file mode 100644
index 0000000..bc4560b
--- /dev/null
+++ b/test/CodeGen/X86/avx512-build-vector.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+; CHECK-LABEL: test1
+; CHECK: vpxord
+; CHECK: ret
+define <16 x i32> @test1(i32* %x) {
+   %y = load i32* %x, align 4
+   %res = insertelement <16 x i32>zeroinitializer, i32 %y, i32 4
+   ret <16 x i32>%res
+}
+
+; CHECK-LABEL: test2
+; CHECK: vpaddd LCP{{.*}}(%rip){1to16}
+; CHECK: ret
+define <16 x i32> @test2(<16 x i32> %x) {
+   %res = add <16 x i32><i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, %x
+   ret <16 x i32>%res
+}
+\ No newline at end of file
diff --git a/test/CodeGen/X86/avx512-cmp.ll b/test/CodeGen/X86/avx512-cmp.ll
new file mode 100644
index 0000000..ba52745
--- /dev/null
+++ b/test/CodeGen/X86/avx512-cmp.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+; CHECK: vucomisdz
+define double @test1(double %a, double %b) nounwind {
+  %tobool = fcmp une double %a, %b
+  br i1 %tobool, label %l1, label %l2
+
+l1:
+  %c = fsub double %a, %b
+  ret double %c
+l2:
+  %c1 = fadd double %a, %b
+  ret double %c1
+}
+
+; CHECK: vucomissz
+define float @test2(float %a, float %b) nounwind {
+  %tobool = fcmp olt float %a, %b
+  br i1 %tobool, label %l1, label %l2
+
+l1:
+  %c = fsub float %a, %b
+  ret float %c
+l2:
+  %c1 = fadd float %a, %b
+  ret float %c1
+}
diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll
new file mode 100644
index 0000000..ed68ff7
--- /dev/null
+++ b/test/CodeGen/X86/avx512-cvt.ll
@@ -0,0 +1,217 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+; CHECK-LABEL: sitof32
+; CHECK: vcvtdq2ps %zmm
+; CHECK: ret
+define <16 x float> @sitof32(<16 x i32> %a) nounwind {
+  %b = sitofp <16 x i32> %a to <16 x float>
+  ret <16 x float> %b
+}
+
+; CHECK-LABEL: fptosi00
+; CHECK: vcvttps2dq %zmm
+; CHECK: ret
+define <16 x i32> @fptosi00(<16 x float> %a) nounwind {
+  %b = fptosi <16 x float> %a to <16 x i32>
+  ret <16 x i32> %b
+}
+
+; CHECK-LABEL: fptoui00
+; CHECK: vcvttps2udq
+; CHECK: ret
+define <16 x i32> @fptoui00(<16 x float> %a) nounwind {
+  %b = fptoui <16 x float> %a to <16 x i32>
+  ret <16 x i32> %b
+}
+
+; CHECK-LABEL: fptoui01
+; CHECK: vcvttpd2udq
+; CHECK: ret
+define <8 x i32> @fptoui01(<8 x double> %a) nounwind {
+  %b = fptoui <8 x double> %a to <8 x i32>
+  ret <8 x i32> %b
+}
+
+; CHECK-LABEL: sitof64
+; CHECK: vcvtdq2pd %ymm
+; CHECK: ret
+define <8 x double> @sitof64(<8 x i32> %a) {
+  %b = sitofp <8 x i32> %a to <8 x double>
+  ret <8 x double> %b
+}
+
+; CHECK-LABEL: fptosi01
+; CHECK: vcvttpd2dq %zmm
+; CHECK: ret
+define <8 x i32> @fptosi01(<8 x double> %a) {
+  %b = fptosi <8 x double> %a to <8 x i32>
+  ret <8 x i32> %b
+}
+
+; CHECK-LABEL: fptrunc00
+; CHECK: vcvtpd2ps %zmm
+; CHECK-NEXT: vcvtpd2ps %zmm
+; CHECK-NEXT: vinsertf64x4    $1
+; CHECK: ret
+define <16 x float> @fptrunc00(<16 x double> %b) nounwind {
+  %a = fptrunc <16 x double> %b to <16 x float>
+  ret <16 x float> %a
+}
+
+; CHECK-LABEL: fpext00
+; CHECK: vcvtps2pd %ymm0, %zmm0
+; CHECK: ret
+define <8 x double> @fpext00(<8 x float> %b) nounwind {
+  %a = fpext <8 x float> %b to <8 x double>
+  ret <8 x double> %a
+}
+
+; CHECK-LABEL: funcA
+; CHECK: vcvtsi2sdqz (%
+; CHECK: ret
+define double @funcA(i64* nocapture %e) {
+entry:
+  %tmp1 = load i64* %e, align 8
+  %conv = sitofp i64 %tmp1 to double
+  ret double %conv
+}
+
+; CHECK-LABEL: funcB
+; CHECK: vcvtsi2sdlz (%
+; CHECK: ret
+define double @funcB(i32* %e) {
+entry:
+  %tmp1 = load i32* %e, align 4
+  %conv = sitofp i32 %tmp1 to double
+  ret double %conv
+}
+
+; CHECK-LABEL: funcC
+; CHECK: vcvtsi2sslz (%
+; CHECK: ret
+define float @funcC(i32* %e) {
+entry:
+  %tmp1 = load i32* %e, align 4
+  %conv = sitofp i32 %tmp1 to float
+  ret float %conv
+}
+
+; CHECK-LABEL: i64tof32
+; CHECK: vcvtsi2ssqz  (%
+; CHECK: ret
+define float @i64tof32(i64* %e) {
+entry:
+  %tmp1 = load i64* %e, align 8
+  %conv = sitofp i64 %tmp1 to float
+  ret float %conv
+}
+
+; CHECK-LABEL: fpext
+; CHECK: vcvtss2sdz
+; CHECK: ret
+define void @fpext() {
+entry:
+  %f = alloca float, align 4
+  %d = alloca double, align 8
+  %tmp = load float* %f, align 4
+  %conv = fpext float %tmp to double
+  store double %conv, double* %d, align 8
+  ret void
+}
+
+; CHECK-LABEL: fpround_scalar
+; CHECK: vmovsdz
+; CHECK: vcvtsd2ssz
+; CHECK: vmovssz
+; CHECK: ret
+define void @fpround_scalar() nounwind uwtable {
+entry:
+  %f = alloca float, align 4
+  %d = alloca double, align 8
+  %tmp = load double* %d, align 8
+  %conv = fptrunc double %tmp to float
+  store float %conv, float* %f, align 4
+  ret void
+}
+
+; CHECK-LABEL: long_to_double
+; CHECK: vmovqz
+; CHECK: ret
+define double @long_to_double(i64 %x) {
+   %res = bitcast i64 %x to double
+   ret double %res
+}
+
+; CHECK-LABEL: double_to_long
+; CHECK: vmovqz
+; CHECK: ret
+define i64 @double_to_long(double %x) {
+   %res = bitcast double %x to i64
+   ret i64 %res
+}
+
+; CHECK-LABEL: int_to_float
+; CHECK: vmovdz
+; CHECK: ret
+define float @int_to_float(i32 %x) {
+   %res = bitcast i32 %x to float
+   ret float %res
+}
+
+; CHECK-LABEL: float_to_int
+; CHECK: vmovdz
+; CHECK: ret
+define i32 @float_to_int(float %x) {
+   %res = bitcast float %x to i32
+   ret i32 %res
+}
+
+; CHECK-LABEL: uitof64
+; CHECK: vcvtudq2pd
+; CHECK: vextracti64x4
+; CHECK: vcvtudq2pd
+; CHECK: ret
+define <16 x double> @uitof64(<16 x i32> %a) nounwind {
+  %b = uitofp <16 x i32> %a to <16 x double>
+  ret <16 x double> %b
+}
+
+; CHECK-LABEL: uitof32
+; CHECK: vcvtudq2ps
+; CHECK: ret
+define <16 x float> @uitof32(<16 x i32> %a) nounwind {
+  %b = uitofp <16 x i32> %a to <16 x float>
+  ret <16 x float> %b
+}
+
+; CHECK-LABEL: @fptosi02
+; CHECK vcvttss2siz
+; CHECK: ret
+define i32 @fptosi02(float %a) nounwind {
+  %b = fptosi float %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: @fptoui02
+; CHECK vcvttss2usiz
+; CHECK: ret
+define i32 @fptoui02(float %a) nounwind {
+  %b = fptoui float %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: @uitofp02
+; CHECK vcvtusi2ss
+; CHECK: ret
+define float @uitofp02(i32 %a) nounwind {
+  %b = uitofp i32 %a to float
+  ret float %b
+}
+
+; CHECK-LABEL: @uitofp03
+; CHECK vcvtusi2sd
+; CHECK: ret
+define double @uitofp03(i32 %a) nounwind {
+  %b = uitofp i32 %a to double
+  ret double %b
+}
diff --git a/test/CodeGen/X86/avx512-fma-intrinsics.ll b/test/CodeGen/X86/avx512-fma-intrinsics.ll
new file mode 100644
index 0000000..ce3d759
--- /dev/null
+++ b/test/CodeGen/X86/avx512-fma-intrinsics.ll
@@ -0,0 +1,97 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+define <16 x float> @test_x86_vfmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  ; CHECK-LABEL: test_x86_vfmadd_ps_z
+  ; CHECK: vfmadd213ps %zmm
+  %res = call <16 x float> @llvm.x86.fma.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.fma.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
+
+define <8 x double> @test_x86_vfmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+  ; CHECK-LABEL: test_x86_vfmadd_pd_z
+  ; CHECK: vfmadd213pd %zmm
+  %res = call <8 x double> @llvm.x86.fma.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.fma.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
+
+define <16 x float> @test_x86_vfmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  ; CHECK-LABEL: test_x86_vfmsubps_z
+  ; CHECK: vfmsub213ps %zmm
+  %res = call <16 x float> @llvm.x86.fma.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.fma.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
+
+define <8 x double> @test_x86_vfmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+  ; CHECK-LABEL: test_x86_vfmsubpd_z
+  ; CHECK: vfmsub213pd %zmm
+  %res = call <8 x double> @llvm.x86.fma.vfmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.fma.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
+
+define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  ; CHECK-LABEL: test_x86_vfnmadd_ps_z
+  ; CHECK: vfnmadd213ps %zmm
+  %res = call <16 x float> @llvm.x86.fma.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.fma.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
+
+define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+  ; CHECK-LABEL: test_x86_vfnmadd_pd_z
+  ; CHECK: vfnmadd213pd %zmm
+  %res = call <8 x double> @llvm.x86.fma.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.fma.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
+
+define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  ; CHECK-LABEL: test_x86_vfnmsubps_z
+  ; CHECK: vfnmsub213ps %zmm
+  %res = call <16 x float> @llvm.x86.fma.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.fma.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
+
+define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+  ; CHECK-LABEL: test_x86_vfnmsubpd_z
+  ; CHECK: vfnmsub213pd %zmm
+  %res = call <8 x double> @llvm.x86.fma.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.fma.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
+
+define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  ; CHECK-LABEL: test_x86_vfmaddsubps_z
+  ; CHECK: vfmaddsub213ps %zmm
+  %res = call <16 x float> @llvm.x86.fma.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.fma.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
+
+define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+  ; CHECK-LABEL: test_x86_vfmaddsubpd_z
+  ; CHECK: vfmaddsub213pd %zmm
+  %res = call <8 x double> @llvm.x86.fma.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.fma.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
+
+define <16 x float> @test_x86_vfmsubaddps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  ; CHECK-LABEL: test_x86_vfmsubaddps_z
+  ; CHECK: vfmsubadd213ps %zmm
+  %res = call <16 x float> @llvm.x86.fma.vfmsubadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.fma.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
+
+define <8 x double> @test_x86_vfmsubaddpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+  ; CHECK-LABEL: test_x86_vfmsubaddpd_z
+  ; CHECK: vfmsubadd213pd %zmm
+  %res = call <8 x double> @llvm.x86.fma.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.fma.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
diff --git a/test/CodeGen/X86/avx512-fma.ll b/test/CodeGen/X86/avx512-fma.ll
new file mode 100644
index 0000000..d6926e2
--- /dev/null
+++ b/test/CodeGen/X86/avx512-fma.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -fp-contract=fast | FileCheck %s
+
+; CHECK-LABEL: test_x86_fmadd_ps_z
+; CHECK: vfmadd213ps     %zmm2, %zmm1, %zmm0
+; CHECK: ret
+define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  %x = fmul <16 x float> %a0, %a1
+  %res = fadd <16 x float> %x, %a2
+  ret <16 x float> %res
+}
+
+; CHECK-LABEL: test_x86_fmsub_ps_z
+; CHECK: vfmsub213ps     %zmm2, %zmm1, %zmm0
+; CHECK: ret
+define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  %x = fmul <16 x float> %a0, %a1
+  %res = fsub <16 x float> %x, %a2
+  ret <16 x float> %res
+}
+
+; CHECK-LABEL: test_x86_fnmadd_ps_z
+; CHECK: vfnmadd213ps     %zmm2, %zmm1, %zmm0
+; CHECK: ret
+define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  %x = fmul <16 x float> %a0, %a1
+  %res = fsub <16 x float> %a2, %x
+  ret <16 x float> %res
+}
+
+; CHECK-LABEL: test_x86_fnmsub_ps_z
+; CHECK: vfnmsub213ps     %zmm2, %zmm1, %zmm0
+; CHECK: ret
+define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  %x = fmul <16 x float> %a0, %a1
+  %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 
+                          float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
+						  float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 
+						  float -0.000000e+00>, %x
+  %res = fsub <16 x float> %y, %a2
+  ret <16 x float> %res
+}
+
+; CHECK-LABEL: test_x86_fmadd_pd_z
+; CHECK: vfmadd213pd     %zmm2, %zmm1, %zmm0
+; CHECK: ret
+define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+  %x = fmul <8 x double> %a0, %a1
+  %res = fadd <8 x double> %x, %a2
+  ret <8 x double> %res
+}
+
+; CHECK-LABEL: test_x86_fmsub_pd_z
+; CHECK: vfmsub213pd     %zmm2, %zmm1, %zmm0
+; CHECK: ret
+define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+  %x = fmul <8 x double> %a0, %a1
+  %res = fsub <8 x double> %x, %a2
+  ret <8 x double> %res
+}
+
+define double @test_x86_fmsub_sd_z(double %a0, double %a1, double %a2) {
+  %x = fmul double %a0, %a1
+  %res = fsub double %x, %a2
+  ret double %res
+}
+
+;CHECK-LABEL: test132_br
+;CHECK: vfmadd132ps  LCP{{.*}}(%rip){1to16}
+;CHECK: ret
+define <16 x float> @test132_br(<16 x float> %a1, <16 x float> %a2) nounwind {
+  %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
+  %b2 = fadd <16 x float> %b1, %a2
+  ret <16 x float> %b2
+}
+
+;CHECK-LABEL: test213_br
+;CHECK: vfmadd213ps  LCP{{.*}}(%rip){1to16}
+;CHECK: ret
+define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
+  %b1 = fmul <16 x float> %a1, %a2
+  %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
+  ret <16 x float> %b2
+}
diff --git a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
new file mode 100644
index 0000000..0321e95
--- /dev/null
+++ b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
@@ -0,0 +1,225 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+declare <16 x float> @llvm.x86.avx512.gather.dps.mask.512 (<16 x float>, i16, <16 x i32>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.dps.mask.512 (i8*, i16, <16 x i32>, <16 x float>, i32)
+declare <8 x double> @llvm.x86.avx512.gather.dpd.mask.512 (<8 x double>, i8, <8 x i32>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.dpd.mask.512 (i8*, i8, <8 x i32>, <8 x double>, i32)
+
+declare <8 x float> @llvm.x86.avx512.gather.qps.mask.512 (<8 x float>, i8, <8 x i64>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.qps.mask.512 (i8*, i8, <8 x i64>, <8 x float>, i32)
+declare <8 x double> @llvm.x86.avx512.gather.qpd.mask.512 (<8 x double>, i8, <8 x i64>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.qpd.mask.512 (i8*, i8, <8 x i64>, <8 x double>, i32)
+
+;CHECK-LABEL: gather_mask_dps
+;CHECK: kmovw
+;CHECK: vgatherdps
+;CHECK: vpadd
+;CHECK: vscatterdps
+;CHECK: ret
+define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf)  {
+  %x = call <16 x float> @llvm.x86.avx512.gather.dps.mask.512 (<16 x float> %src, i16 %mask, <16 x i32>%ind, i8* %base, i32 4)
+  %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  call void @llvm.x86.avx512.scatter.dps.mask.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4)
+  ret void
+}
+
+;CHECK-LABEL: gather_mask_dpd
+;CHECK: kmovw
+;CHECK: vgatherdpd
+;CHECK: vpadd
+;CHECK: vscatterdpd
+;CHECK: ret
+define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf)  {
+  %x = call <8 x double> @llvm.x86.avx512.gather.dpd.mask.512 (<8 x double> %src, i8 %mask, <8 x i32>%ind, i8* %base, i32 4)
+  %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  call void @llvm.x86.avx512.scatter.dpd.mask.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4)
+  ret void
+}
+
+;CHECK-LABEL: gather_mask_qps
+;CHECK: kmovw
+;CHECK: vgatherqps
+;CHECK: vpadd
+;CHECK: vscatterqps
+;CHECK: ret
+define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf)  {
+  %x = call <8 x float> @llvm.x86.avx512.gather.qps.mask.512 (<8 x float> %src, i8 %mask, <8 x i64>%ind, i8* %base, i32 4)
+  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
+  call void @llvm.x86.avx512.scatter.qps.mask.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4)
+  ret void
+}
+
+;CHECK-LABEL: gather_mask_qpd
+;CHECK: kmovw
+;CHECK: vgatherqpd
+;CHECK: vpadd
+;CHECK: vscatterqpd
+;CHECK: ret
+define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf)  {
+  %x = call <8 x double> @llvm.x86.avx512.gather.qpd.mask.512 (<8 x double> %src, i8 %mask, <8 x i64>%ind, i8* %base, i32 4)
+  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
+  call void @llvm.x86.avx512.scatter.qpd.mask.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4)
+  ret void
+}
+;;
+;; Integer Gather/Scatter
+;;
+declare <16 x i32> @llvm.x86.avx512.gather.dpi.mask.512 (<16 x i32>, i16, <16 x i32>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.dpi.mask.512 (i8*, i16, <16 x i32>, <16 x i32>, i32)
+declare <8 x i64> @llvm.x86.avx512.gather.dpq.mask.512 (<8 x i64>, i8, <8 x i32>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.dpq.mask.512 (i8*, i8, <8 x i32>, <8 x i64>, i32)
+
+declare <8 x i32> @llvm.x86.avx512.gather.qpi.mask.512 (<8 x i32>, i8, <8 x i64>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.qpi.mask.512 (i8*, i8, <8 x i64>, <8 x i32>, i32)
+declare <8 x i64> @llvm.x86.avx512.gather.qpq.mask.512 (<8 x i64>, i8, <8 x i64>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.qpq.mask.512 (i8*, i8, <8 x i64>, <8 x i64>, i32)
+
+;CHECK-LABEL: gather_mask_dd
+;CHECK: kmovw
+;CHECK: vpgatherdd
+;CHECK: vpadd
+;CHECK: vpscatterdd
+;CHECK: ret
+define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf)  {
+  %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.mask.512 (<16 x i32> %src, i16 %mask, <16 x i32>%ind, i8* %base, i32 4)
+  %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  call void @llvm.x86.avx512.scatter.dpi.mask.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4)
+  ret void
+}
+
+;CHECK-LABEL: gather_mask_qd
+;CHECK: kmovw
+;CHECK: vpgatherqd
+;CHECK: vpadd
+;CHECK: vpscatterqd
+;CHECK: ret
+define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf)  {
+  %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.mask.512 (<8 x i32> %src, i8 %mask, <8 x i64>%ind, i8* %base, i32 4)
+  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
+  call void @llvm.x86.avx512.scatter.qpi.mask.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4)
+  ret void
+}
+
+;CHECK-LABEL: gather_mask_qq
+;CHECK: kmovw
+;CHECK: vpgatherqq
+;CHECK: vpadd
+;CHECK: vpscatterqq
+;CHECK: ret
+define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf)  {
+  %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.mask.512 (<8 x i64> %src, i8 %mask, <8 x i64>%ind, i8* %base, i32 4)
+  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
+  call void @llvm.x86.avx512.scatter.qpq.mask.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4)
+  ret void
+}
+
+;CHECK-LABEL: gather_mask_dq
+;CHECK: kmovw
+;CHECK: vpgatherdq
+;CHECK: vpadd
+;CHECK: vpscatterdq
+;CHECK: ret
+define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf)  {
+  %x = call <8 x i64> @llvm.x86.avx512.gather.dpq.mask.512 (<8 x i64> %src, i8 %mask, <8 x i32>%ind, i8* %base, i32 4)
+  %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  call void @llvm.x86.avx512.scatter.dpq.mask.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4)
+  ret void
+}
+
+;; FP Intinsics without masks
+
+declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x i32>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.dps.512 (i8*, <16 x i32>, <16 x float>, i32)
+declare <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x i64>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.qps.512 (i8*, <8 x i64>, <8 x float>, i32)
+declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x i64>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, <8 x i64>, <8 x double>, i32)
+
+;CHECK-LABEL: gather_dps
+;CHECK: kxnorw
+;CHECK: vgatherdps
+;CHECK: vscatterdps
+;CHECK: ret
+define void @gather_dps(<16 x i32> %ind, i8* %base, i8* %stbuf)  {
+  %x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x i32>%ind, i8* %base, i32 4)
+  %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, <16 x i32>%ind2, <16 x float> %x, i32 4)
+  ret void
+}
+
+;CHECK-LABEL: gather_qps
+;CHECK: kxnorw
+;CHECK: vgatherqps
+;CHECK: vscatterqps
+;CHECK: ret
+define void @gather_qps(<8 x i64> %ind, i8* %base, i8* %stbuf)  {
+  %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x i64>%ind, i8* %base, i32 4)
+  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
+  call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, <8 x i64>%ind2, <8 x float> %x, i32 4)
+  ret void
+}
+
+;CHECK-LABEL: gather_qpd
+;CHECK: kxnorw
+;CHECK: vgatherqpd
+;CHECK: vpadd
+;CHECK: vscatterqpd
+;CHECK: ret
+define void @gather_qpd(<8 x i64> %ind, i8* %base, i8* %stbuf)  {
+  %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x i64>%ind, i8* %base, i32 4)
+  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
+  call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, <8 x i64>%ind2, <8 x double> %x, i32 4)
+  ret void
+}
+
+;; Integer Intinsics without masks
+
+declare <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.dpi.512 (i8*, <16 x i32>, <16 x i32>, i32)
+declare <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i32>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.dpq.512 (i8*, <8 x i32>, <8 x i64>, i32)
+
+declare <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i64>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, <8 x i64>, <8 x i32>, i32)
+declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, <8 x i64>, <8 x i64>, i32)
+
+;CHECK-LABEL: gather_dpi
+;CHECK: kxnorw
+;CHECK: vpgatherdd
+;CHECK: vpscatterdd
+;CHECK: ret
+define void @gather_dpi(<16 x i32> %ind, i8* %base, i8* %stbuf)  {
+  %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32>%ind, i8* %base, i32 4)
+  %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, <16 x i32>%ind2, <16 x i32> %x, i32 4)
+  ret void
+}
+
+;CHECK-LABEL: gather_qpq
+;CHECK: vpxord  %zmm
+;CHECK: kxnorw
+;CHECK: vpgatherqq
+;CHECK: vpadd
+;CHECK: vpscatterqq
+;CHECK: ret
+define void @gather_qpq(<8 x i64> %ind, i8* %base, i8* %stbuf)  {
+  %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>%ind, i8* %base, i32 4)
+  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
+  call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, <8 x i64>%ind2, <8 x i64> %x, i32 4)
+  ret void
+}
+
+;CHECK-LABEL: gather_qpi
+;CHECK: vpxor %ymm
+;CHECK: kxnorw
+;CHECK: vpgatherqd
+;CHECK: vpadd
+;CHECK: vpscatterqd
+;CHECK: ret
+define void @gather_qpi(<8 x i64> %ind, i8* %base, i8* %stbuf)  {
+  %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i64>%ind, i8* %base, i32 4)
+  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
+  call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, <8 x i64>%ind2, <8 x i32> %x, i32 4)
+  ret void
+}
diff --git a/test/CodeGen/X86/avx512-insert-extract.ll b/test/CodeGen/X86/avx512-insert-extract.ll
index 189bdd7..3f06740 100644
--- a/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/test/CodeGen/X86/avx512-insert-extract.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
 
-;CHECK: test1
+;CHECK-LABEL: test1:
 ;CHECK: vinsertps
 ;CHECK: vinsertf32x4
 ;CHECK: ret
@@ -11,7 +11,7 @@ define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
   ret <16 x float> %rrr3
 }
 
-;CHECK: test2
+;CHECK-LABEL: test2:
 ;CHECK: vinsertf32x4
 ;CHECK: vextractf32x4
 ;CHECK: vinsertf32x4
@@ -23,7 +23,7 @@ define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
   ret <8 x double> %rrr3
 }
 
-;CHECK: test3
+;CHECK-LABEL: test3:
 ;CHECK: vextractf32x4
 ;CHECK: vinsertf32x4
 ;CHECK: ret
@@ -33,7 +33,7 @@ define <16 x float> @test3(<16 x float> %x) nounwind {
   ret <16 x float> %rrr2
 }
 
-;CHECK: test4
+;CHECK-LABEL: test4:
 ;CHECK: vextracti32x4
 ;CHECK: vinserti32x4
 ;CHECK: ret
@@ -43,7 +43,7 @@ define <8 x i64> @test4(<8 x i64> %x) nounwind {
   ret <8 x i64> %rrr2
 }
 
-;CHECK: test5
+;CHECK-LABEL: test5:
 ;CHECK: vextractpsz
 ;CHECK: ret
 define i32 @test5(<4 x float> %x) nounwind {
@@ -52,7 +52,7 @@ define i32 @test5(<4 x float> %x) nounwind {
   ret i32 %ei
 }
 
-;CHECK: test6
+;CHECK-LABEL: test6:
 ;CHECK: vextractpsz {{.*}}, (%rdi)
 ;CHECK: ret
 define void @test6(<4 x float> %x, float* %out) nounwind {
@@ -61,3 +61,65 @@ define void @test6(<4 x float> %x, float* %out) nounwind {
   ret void
 }
 
+;CHECK-LABEL: test7
+;CHECK: vmovdz
+;CHECK: vpermps %zmm
+;CHECK: ret
+define float @test7(<16 x float> %x, i32 %ind) nounwind {
+  %e = extractelement <16 x float> %x, i32 %ind
+  ret float %e
+}
+
+;CHECK-LABEL: test8
+;CHECK: vmovqz
+;CHECK: vpermpd %zmm
+;CHECK: ret
+define double @test8(<8 x double> %x, i32 %ind) nounwind {
+  %e = extractelement <8 x double> %x, i32 %ind
+  ret double %e
+}
+
+;CHECK-LABEL: test9
+;CHECK: vmovd
+;CHECK: vpermps %ymm
+;CHECK: ret
+define float @test9(<8 x float> %x, i32 %ind) nounwind {
+  %e = extractelement <8 x float> %x, i32 %ind
+  ret float %e
+}
+
+;CHECK-LABEL: test10
+;CHECK: vmovdz
+;CHECK: vpermd %zmm
+;CHEKK: vmovdz  %xmm0, %eax
+;CHECK: ret
+define i32 @test10(<16 x i32> %x, i32 %ind) nounwind {
+  %e = extractelement <16 x i32> %x, i32 %ind
+  ret i32 %e
+}
+
+;CHECK-LABEL: test11
+;CHECK: movl    $260
+;CHECK: bextrl
+;CHECK: movl    $268
+;CHECK: bextrl
+;CHECK: ret
+define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
+  %cmp_res = icmp ult <16 x i32> %a, %b
+  %ia = extractelement <16 x i1> %cmp_res, i32 4
+  %ib = extractelement <16 x i1> %cmp_res, i32 12
+
+  br i1 %ia, label %A, label %B
+
+  A:
+    ret <16 x i32>%b
+  B:
+   %c = add <16 x i32>%b, %a
+  br i1 %ib, label %C, label %D
+  C:
+   %c1 = sub <16 x i32>%c, %a
+   ret <16 x i32>%c1
+  D:
+   %c2 = mul <16 x i32>%c, %a
+   ret <16 x i32>%c2
+}
diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll
new file mode 100644
index 0000000..5bdabf2
--- /dev/null
+++ b/test/CodeGen/X86/avx512-intrinsics.ll
@@ -0,0 +1,374 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+declare i32 @llvm.x86.avx512.kortestz(i16, i16) nounwind readnone
+; CHECK: test_kortestz
+; CHECK: kortestw
+; CHECK: sete
+define i32 @test_kortestz(i16 %a0, i16 %a1) {
+  %res = call i32 @llvm.x86.avx512.kortestz(i16 %a0, i16 %a1) 
+  ret i32 %res
+}
+
+declare i32 @llvm.x86.avx512.kortestc(i16, i16) nounwind readnone
+; CHECK: test_kortestc
+; CHECK: kortestw
+; CHECK: sbbl
+define i32 @test_kortestc(i16 %a0, i16 %a1) {
+  %res = call i32 @llvm.x86.avx512.kortestc(i16 %a0, i16 %a1) 
+  ret i32 %res
+}
+
+define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
+  ; CHECK: vrcp14ps
+  %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>) nounwind readnone
+
+define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
+  ; CHECK: vrcp14pd
+  %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1]
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>) nounwind readnone
+
+define <16 x float> @test_rcp28_ps_512(<16 x float> %a0) {
+  ; CHECK: vrcp28ps
+  %res = call <16 x float> @llvm.x86.avx512.rcp28.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.rcp28.ps.512(<16 x float>) nounwind readnone
+
+define <8 x double> @test_rcp28_pd_512(<8 x double> %a0) {
+  ; CHECK: vrcp28pd
+  %res = call <8 x double> @llvm.x86.avx512.rcp28.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1]
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.avx512.rcp28.pd.512(<8 x double>) nounwind readnone
+
+define <8 x double> @test_rndscale_pd_512(<8 x double> %a0) {
+  ; CHECK: vrndscale
+  %res = call <8 x double> @llvm.x86.avx512.rndscale.pd.512(<8 x double> %a0, i32 7) ; <<8 x double>> [#uses=1]
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.avx512.rndscale.pd.512(<8 x double>, i32) nounwind readnone
+
+
+define <16 x float> @test_rndscale_ps_512(<16 x float> %a0) {
+  ; CHECK: vrndscale
+  %res = call <16 x float> @llvm.x86.avx512.rndscale.ps.512(<16 x float> %a0, i32 7) ; <<16 x float>> [#uses=1]
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.rndscale.ps.512(<16 x float>, i32) nounwind readnone
+
+
+define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
+  ; CHECK: vrsqrt14ps
+  %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>) nounwind readnone
+
+define <16 x float> @test_rsqrt28_ps_512(<16 x float> %a0) {
+  ; CHECK: vrsqrt28ps
+  %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.rsqrt28.ps.512(<16 x float>) nounwind readnone
+
+define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
+  ; CHECK: vrsqrt14ss
+  %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>) nounwind readnone
+
+define <4 x float> @test_rsqrt28_ss(<4 x float> %a0) {
+  ; CHECK: vrsqrt28ss
+  %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float>) nounwind readnone
+
+define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
+  ; CHECK: vrcp14ss
+  %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>) nounwind readnone
+
+define <4 x float> @test_rcp28_ss(<4 x float> %a0) {
+  ; CHECK: vrcp28ss
+  %res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>) nounwind readnone
+
+define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
+  ; CHECK: vsqrtpd
+  %res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1]
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>) nounwind readnone
+
+define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
+  ; CHECK: vsqrtps
+  %res = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>) nounwind readnone
+
+define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vsqrtssz
+  %res = call <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vsqrtsdz
+  %res = call <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
+  ; CHECK: vcvtsd2siz
+  %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
+
+define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
+  ; CHECK: vcvtsi2sdqz
+  %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
+
+define <2 x double> @test_x86_avx512_cvtusi642sd(<2 x double> %a0, i64 %a1) {
+  ; CHECK: vcvtusi2sdqz
+  %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64) nounwind readnone
+
+define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
+  ; CHECK: vcvttsd2siz
+  %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
+
+
+define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
+  ; CHECK: vcvtss2siz
+  %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
+  ; CHECK: vcvtsi2ssqz
+  %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
+
+
+define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) {
+  ; CHECK: vcvttss2siz
+  %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1]
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
+
+define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
+  ; CHECK: vcvtsd2usiz
+  %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
+  ret i64 %res
+}
+declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
+
+define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
+  ; CHECK: vcvtph2ps
+  %res = call <16 x float> @llvm.x86.avx512.vcvtph2ps.512(<16 x i16> %a0)
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.vcvtph2ps.512(<16 x i16>) nounwind readonly
+
+
+define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
+  ; CHECK: vcvtps2ph
+  %res = call <16 x i16> @llvm.x86.avx512.vcvtps2ph.512(<16 x float> %a0, i32 0)
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx512.vcvtps2ph.512(<16 x float>, i32) nounwind readonly
+
+define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
+  ; CHECK: vbroadcastss
+  %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
+
+define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
+  ; CHECK: vbroadcastsd
+  %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
+
+define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) {
+  ; CHECK: vbroadcastss
+  %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1]
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly
+
+define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
+  ; CHECK: vbroadcastsd
+  %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1]
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
+
+define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32>  %a0) {
+  ; CHECK: vpbroadcastd
+  %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1]
+  ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly
+
+define <16 x i32> @test_x86_pbroadcastd_i32_512(i32  %a0) {
+  ; CHECK: vpbroadcastd
+  %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
+  ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
+
+define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) {
+  ; CHECK: vpbroadcastq
+  %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1]
+  ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly
+
+define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
+  ; CHECK: vpbroadcastq
+  %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
+  ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
+
+define <16 x i32> @test_x86_pmaxu_d(<16 x i32> %a0, <16 x i32> %a1) {
+  ; CHECK: vpmaxud 
+  %res = call <16 x i32> @llvm.x86.avx512.pmaxu.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
+  ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.pmaxu.d(<16 x i32>, <16 x i32>) nounwind readonly
+
+define <8 x i64> @test_x86_pmaxu_q(<8 x i64> %a0, <8 x i64> %a1) {
+  ; CHECK: vpmaxuq
+  %res = call <8 x i64> @llvm.x86.avx512.pmaxu.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
+  ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.pmaxu.q(<8 x i64>, <8 x i64>) nounwind readonly
+
+define <16 x i32> @test_x86_pmaxs_d(<16 x i32> %a0, <16 x i32> %a1) {
+  ; CHECK: vpmaxsd
+  %res = call <16 x i32> @llvm.x86.avx512.pmaxs.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
+  ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.pmaxs.d(<16 x i32>, <16 x i32>) nounwind readonly
+
+define <8 x i64> @test_x86_pmaxs_q(<8 x i64> %a0, <8 x i64> %a1) {
+  ; CHECK: vpmaxsq
+  %res = call <8 x i64> @llvm.x86.avx512.pmaxs.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
+  ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.pmaxs.q(<8 x i64>, <8 x i64>) nounwind readonly
+
+define <16 x i32> @test_x86_pminu_d(<16 x i32> %a0, <16 x i32> %a1) {
+  ; CHECK: vpminud
+  %res = call <16 x i32> @llvm.x86.avx512.pminu.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
+  ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.pminu.d(<16 x i32>, <16 x i32>) nounwind readonly
+
+define <8 x i64> @test_x86_pminu_q(<8 x i64> %a0, <8 x i64> %a1) {
+  ; CHECK: vpminuq
+  %res = call <8 x i64> @llvm.x86.avx512.pminu.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
+  ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.pminu.q(<8 x i64>, <8 x i64>) nounwind readonly
+
+define <16 x i32> @test_x86_pmins_d(<16 x i32> %a0, <16 x i32> %a1) {
+  ; CHECK: vpminsd
+  %res = call <16 x i32> @llvm.x86.avx512.pmins.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
+  ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.pmins.d(<16 x i32>, <16 x i32>) nounwind readonly
+
+define <8 x i64> @test_x86_pmins_q(<8 x i64> %a0, <8 x i64> %a1) {
+  ; CHECK: vpminsq
+  %res = call <8 x i64> @llvm.x86.avx512.pmins.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
+  ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.pmins.q(<8 x i64>, <8 x i64>) nounwind readonly
+
+define <16 x i32> @test_conflict_d(<16 x i32> %a) {
+  ; CHECK: vpconflictd
+  %res = call <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32> %a)
+  ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32>) nounwind readonly
+
+define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
+  ; CHECK: vpconflictd %zmm0, %zmm0 {%k1} {z}
+  %vmask = bitcast i16 %mask to <16 x i1>
+  %res = call <16 x i32> @llvm.x86.avx512.conflict.d.maskz.512(<16 x i1> %vmask, <16 x i32> %a)
+  ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.conflict.d.maskz.512(<16 x i1>,<16 x i32>) nounwind readonly
+
+define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
+  ; CHECK: vpconflictq {{.*}} {%k1}
+  %vmask = bitcast i8 %mask to <8 x i1>
+  %res = call <8 x i64> @llvm.x86.avx512.conflict.q.mask.512(<8 x i64> %b, <8 x i1> %vmask, <8 x i64> %a)
+  ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.conflict.q.mask.512(<8 x i64>, <8 x i1>,<8 x i64>) nounwind readonly
+
+define <16 x float> @test_x86_mskblend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
+  ; CHECK: vblendmps
+  %m0 = bitcast i16 %a0 to <16 x i1>
+  %res = call <16 x float> @llvm.x86.avx512.mskblend.ps.512(<16 x i1> %m0, <16 x float> %a1, <16 x float> %a2) ; <<16 x float>> [#uses=1]
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.mskblend.ps.512(<16 x i1> %a0, <16 x float> %a1, <16 x float> %a2) nounwind readonly
+
+define <8 x double> @test_x86_mskblend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
+  ; CHECK: vblendmpd
+  %m0 = bitcast i8 %a0 to <8 x i1>
+  %res = call <8 x double> @llvm.x86.avx512.mskblend.pd.512(<8 x i1> %m0, <8 x double> %a1, <8 x double> %a2) ; <<8 x double>> [#uses=1]
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.avx512.mskblend.pd.512(<8 x i1> %a0, <8 x double> %a1, <8 x double> %a2) nounwind readonly
+
+define <16 x i32> @test_x86_mskblend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) {
+  ; CHECK: vpblendmd
+  %m0 = bitcast i16 %a0 to <16 x i1>
+  %res = call <16 x i32> @llvm.x86.avx512.mskblend.d.512(<16 x i1> %m0, <16 x i32> %a1, <16 x i32> %a2) ; <<16 x i32>> [#uses=1]
+  ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.mskblend.d.512(<16 x i1> %a0, <16 x i32> %a1, <16 x i32> %a2) nounwind readonly
+
+define <8 x i64> @test_x86_mskblend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
+  ; CHECK: vpblendmq
+  %m0 = bitcast i8 %a0 to <8 x i1>
+  %res = call <8 x i64> @llvm.x86.avx512.mskblend.q.512(<8 x i1> %m0, <8 x i64> %a1, <8 x i64> %a2) ; <<8 x i64>> [#uses=1]
+  ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.mskblend.q.512(<8 x i1> %a0, <8 x i64> %a1, <8 x i64> %a2) nounwind readonly
diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll
index eec8873..ef5cb56 100644
--- a/test/CodeGen/X86/avx512-mask-op.ll
+++ b/test/CodeGen/X86/avx512-mask-op.ll
@@ -27,8 +27,8 @@ define i16 @mand16(i16 %x, i16 %y) {
   %md = xor <16 x i1> %ma, %mb
   %me = or <16 x i1> %mc, %md
   %ret = bitcast <16 x i1> %me to i16
-; CHECK: kxorw
 ; CHECK: kandw
+; CHECK: kxorw
 ; CHECK: korw
   ret i16 %ret
 }
@@ -55,4 +55,3 @@ define i8 @shuf_test1(i16 %v) nounwind {
    %mask1 = bitcast <8 x i1> %mask to i8
    ret i8 %mask1
 }
-
diff --git a/test/CodeGen/X86/avx512-mov.ll b/test/CodeGen/X86/avx512-mov.ll
new file mode 100644
index 0000000..91242b1
--- /dev/null
+++ b/test/CodeGen/X86/avx512-mov.ll
@@ -0,0 +1,155 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+; CHECK-LABEL: @test1
+; CHECK: vmovdz  %xmm0, %eax
+; CHECK: ret
+define i32 @test1(float %x) {
+   %res = bitcast float %x to i32
+   ret i32 %res
+}
+
+; CHECK-LABEL: @test2
+; CHECK: vmovdz  %edi
+; CHECK: ret
+define <4 x i32> @test2(i32 %x) {
+   %res = insertelement <4 x i32>undef, i32 %x, i32 0
+   ret <4 x i32>%res
+}
+
+; CHECK-LABEL: @test3
+; CHECK: vmovqz  %rdi
+; CHECK: ret
+define <2 x i64> @test3(i64 %x) {
+   %res = insertelement <2 x i64>undef, i64 %x, i32 0
+   ret <2 x i64>%res
+}
+
+; CHECK-LABEL: @test4
+; CHECK: vmovdz  (%rdi)
+; CHECK: ret
+define <4 x i32> @test4(i32* %x) {
+   %y = load i32* %x
+   %res = insertelement <4 x i32>undef, i32 %y, i32 0
+   ret <4 x i32>%res
+}
+
+; CHECK-LABEL: @test5
+; CHECK: vmovssz  %xmm0, (%rdi)
+; CHECK: ret
+define void @test5(float %x, float* %y) {
+   store float %x, float* %y, align 4
+   ret void
+}
+
+; CHECK-LABEL: @test6
+; CHECK: vmovsdz  %xmm0, (%rdi)
+; CHECK: ret
+define void @test6(double %x, double* %y) {
+   store double %x, double* %y, align 8
+   ret void
+}
+
+; CHECK-LABEL: @test7
+; CHECK: vmovssz  (%rdi), %xmm0
+; CHECK: ret
+define float @test7(i32* %x) {
+   %y = load i32* %x
+   %res = bitcast i32 %y to float
+   ret float %res
+}
+
+; CHECK-LABEL: @test8
+; CHECK: vmovdz %xmm0, %eax
+; CHECK: ret
+define i32 @test8(<4 x i32> %x) {
+   %res = extractelement <4 x i32> %x, i32 0
+   ret i32 %res
+}
+
+; CHECK-LABEL: @test9
+; CHECK: vmovqz %xmm0, %rax
+; CHECK: ret
+define i64 @test9(<2 x i64> %x) {
+   %res = extractelement <2 x i64> %x, i32 0
+   ret i64 %res
+}
+
+; CHECK-LABEL: @test10
+; CHECK: vmovdz  (%rdi)
+; CHECK: ret
+define <4 x i32> @test10(i32* %x) {
+   %y = load i32* %x, align 4
+   %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
+   ret <4 x i32>%res
+}
+
+; CHECK-LABEL: @test11
+; CHECK: vmovssz  (%rdi)
+; CHECK: ret
+define <4 x float> @test11(float* %x) {
+   %y = load float* %x, align 4
+   %res = insertelement <4 x float>zeroinitializer, float %y, i32 0
+   ret <4 x float>%res
+}
+
+; CHECK-LABEL: @test12
+; CHECK: vmovsdz  (%rdi)
+; CHECK: ret
+define <2 x double> @test12(double* %x) {
+   %y = load double* %x, align 8
+   %res = insertelement <2 x double>zeroinitializer, double %y, i32 0
+   ret <2 x double>%res
+}
+
+; CHECK-LABEL: @test13
+; CHECK: vmovqz  %rdi
+; CHECK: ret
+define <2 x i64> @test13(i64 %x) {
+   %res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0
+   ret <2 x i64>%res
+}
+
+; CHECK-LABEL: @test14
+; CHECK: vmovdz  %edi
+; CHECK: ret
+define <4 x i32> @test14(i32 %x) {
+   %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0
+   ret <4 x i32>%res
+}
+
+; CHECK-LABEL: @test15
+; CHECK: vmovdz  (%rdi)
+; CHECK: ret
+define <4 x i32> @test15(i32* %x) {
+   %y = load i32* %x, align 4
+   %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
+   ret <4 x i32>%res
+}
+
+; CHECK-LABEL: test16
+; CHECK: vmovdqu32
+; CHECK: ret
+define <16 x i32> @test16(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <16 x i32>*
+  %res = load <16 x i32>* %vaddr, align 1
+  ret <16 x i32>%res
+}
+
+; CHECK-LABEL: test17
+; CHECK: vmovdqa32
+; CHECK: ret
+define <16 x i32> @test17(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <16 x i32>*
+  %res = load <16 x i32>* %vaddr, align 64
+  ret <16 x i32>%res
+}
+
+; CHECK-LABEL: test18
+; CHECK: vmovdqa64
+; CHECK: ret
+define void @test18(i8 * %addr, <8 x i64> %data) {
+  %vaddr = bitcast i8* %addr to <8 x i64>*
+  store <8 x i64>%data, <8 x i64>* %vaddr, align 64
+  ret void
+}
+
diff --git a/test/CodeGen/X86/avx512-select.ll b/test/CodeGen/X86/avx512-select.ll
new file mode 100644
index 0000000..d2d6681
--- /dev/null
+++ b/test/CodeGen/X86/avx512-select.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl  | FileCheck %s
+
+; CHECK-LABEL: select00
+; CHECK: vmovaps
+; CHECK-NEXT: LBB
+define <16 x i32> @select00(i32 %a, <16 x i32> %b) nounwind {
+  %cmpres = icmp eq i32 %a, 255
+  %selres = select i1 %cmpres, <16 x i32> zeroinitializer, <16 x i32> %b
+  %res = xor <16 x i32> %b, %selres
+  ret <16 x i32> %res
+}
+
+; CHECK-LABEL: select01
+; CHECK: vmovaps
+; CHECK-NEXT: LBB
+define <8 x i64> @select01(i32 %a, <8 x i64> %b) nounwind {
+  %cmpres = icmp eq i32 %a, 255
+  %selres = select i1 %cmpres, <8 x i64> zeroinitializer, <8 x i64> %b
+  %res = xor <8 x i64> %b, %selres
+  ret <8 x i64> %res
+}
+
diff --git a/test/CodeGen/X86/avx512-shift.ll b/test/CodeGen/X86/avx512-shift.ll
new file mode 100644
index 0000000..8cdcf8a
--- /dev/null
+++ b/test/CodeGen/X86/avx512-shift.ll
@@ -0,0 +1,108 @@
+;RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+;CHECK-LABEL: shift_16_i32
+;CHECK: vpsrld
+;CHECK: vpslld
+;CHECK: vpsrad
+;CHECK: ret
+define <16 x i32> @shift_16_i32(<16 x i32> %a) {
+   %b = lshr <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+   %c = shl <16 x i32> %b, <i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
+   %d = ashr <16 x i32> %c, <i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
+   ret <16 x i32> %d;
+}
+
+;CHECK-LABEL: shift_8_i64
+;CHECK: vpsrlq
+;CHECK: vpsllq
+;CHECK: vpsraq
+;CHECK: ret
+define <8 x i64> @shift_8_i64(<8 x i64> %a) {
+   %b = lshr <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+   %c = shl <8 x i64> %b,  <i64 12, i64 12, i64 12, i64 12, i64 12, i64 12, i64 12, i64 12>
+   %d = ashr <8 x i64> %c, <i64 12, i64 12, i64 12, i64 12, i64 12, i64 12, i64 12, i64 12>
+   ret <8 x i64> %d;
+}
+
+; CHECK-LABEL: variable_shl4
+; CHECK: vpsllvq %zmm
+; CHECK: ret
+define <8 x i64> @variable_shl4(<8 x i64> %x, <8 x i64> %y) {
+  %k = shl <8 x i64> %x, %y
+  ret <8 x i64> %k
+}
+
+; CHECK-LABEL: variable_shl5
+; CHECK: vpsllvd %zmm
+; CHECK: ret
+define <16 x i32> @variable_shl5(<16 x i32> %x, <16 x i32> %y) {
+  %k = shl <16 x i32> %x, %y
+  ret <16 x i32> %k
+}
+
+; CHECK-LABEL: variable_srl0
+; CHECK: vpsrlvd
+; CHECK: ret
+define <16 x i32> @variable_srl0(<16 x i32> %x, <16 x i32> %y) {
+  %k = lshr <16 x i32> %x, %y
+  ret <16 x i32> %k
+}
+
+; CHECK-LABEL: variable_srl2
+; CHECK: psrlvq
+; CHECK: ret
+define <8 x i64> @variable_srl2(<8 x i64> %x, <8 x i64> %y) {
+  %k = lshr <8 x i64> %x, %y
+  ret <8 x i64> %k
+}
+
+; CHECK-LABEL: variable_sra1
+; CHECK: vpsravd
+; CHECK: ret
+define <16 x i32> @variable_sra1(<16 x i32> %x, <16 x i32> %y) {
+  %k = ashr <16 x i32> %x, %y
+  ret <16 x i32> %k
+}
+
+; CHECK-LABEL: variable_sra2
+; CHECK: vpsravq %zmm
+; CHECK: ret
+define <8 x i64> @variable_sra2(<8 x i64> %x, <8 x i64> %y) {
+  %k = ashr <8 x i64> %x, %y
+  ret <8 x i64> %k
+}
+
+; CHECK-LABEL: variable_sra01_load
+; CHECK: vpsravd (%
+; CHECK: ret
+define <16 x i32> @variable_sra01_load(<16 x i32> %x, <16 x i32>* %y) {
+  %y1 = load <16 x i32>* %y
+  %k = ashr <16 x i32> %x, %y1
+  ret <16 x i32> %k
+}
+
+; CHECK-LABEL: variable_shl1_load
+; CHECK: vpsllvd (%
+; CHECK: ret
+define <16 x i32> @variable_shl1_load(<16 x i32> %x, <16 x i32>* %y) {
+  %y1 = load <16 x i32>* %y
+  %k = shl <16 x i32> %x, %y1
+  ret <16 x i32> %k
+}
+; CHECK: variable_srl0_load
+; CHECK: vpsrlvd (%
+; CHECK: ret
+define <16 x i32> @variable_srl0_load(<16 x i32> %x, <16 x i32>* %y) {
+  %y1 = load <16 x i32>* %y
+  %k = lshr <16 x i32> %x, %y1
+  ret <16 x i32> %k
+}
+
+; CHECK: variable_srl3_load
+; CHECK: vpsrlvq (%
+; CHECK: ret
+define <8 x i64> @variable_srl3_load(<8 x i64> %x, <8 x i64>* %y) {
+  %y1 = load <8 x i64>* %y
+  %k = lshr <8 x i64> %x, %y1
+  ret <8 x i64> %k
+}
diff --git a/test/CodeGen/X86/avx512-shuffle.ll b/test/CodeGen/X86/avx512-shuffle.ll
new file mode 100644
index 0000000..c9e0c2b
--- /dev/null
+++ b/test/CodeGen/X86/avx512-shuffle.ll
@@ -0,0 +1,226 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; CHECK: LCP
+; CHECK: .long 2
+; CHECK: .long 5
+; CHECK: .long 0
+; CHECK: .long 0
+; CHECK: .long 7
+; CHECK: .long 0
+; CHECK: .long 10
+; CHECK: .long 1
+; CHECK: .long 0
+; CHECK: .long 5
+; CHECK: .long 0
+; CHECK: .long 4
+; CHECK: .long 7
+; CHECK: .long 0
+; CHECK: .long 10
+; CHECK: .long 1
+; CHECK-LABEL: test1:
+; CHECK: vpermps
+; CHECK: ret
+define <16 x float> @test1(<16 x float> %a) nounwind {
+  %c = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1,  i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
+  ret <16 x float> %c
+}
+
+; CHECK-LABEL: test2:
+; CHECK: vpermd
+; CHECK: ret
+define <16 x i32> @test2(<16 x i32> %a) nounwind {
+  %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1,  i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
+  ret <16 x i32> %c
+}
+
+; CHECK-LABEL: test3:
+; CHECK: vpermq
+; CHECK: ret
+define <8 x i64> @test3(<8 x i64> %a) nounwind {
+  %c = shufflevector <8 x i64> %a, <8 x i64> undef, <8 x i32> <i32 2, i32 5, i32 1, i32 undef, i32 7, i32 undef, i32 3, i32 1>
+  ret <8 x i64> %c
+}
+
+; CHECK-LABEL: test4:
+; CHECK: vpermpd
+; CHECK: ret
+define <8 x double> @test4(<8 x double> %a) nounwind {
+  %c = shufflevector <8 x double> %a, <8 x double> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x double> %c
+}
+
+; CHECK-LABEL: test5:
+; CHECK: vpermi2pd
+; CHECK: ret
+define <8 x double> @test5(<8 x double> %a, <8 x double> %b) nounwind {
+  %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
+  ret <8 x double> %c
+}
+
+; CHECK-LABEL: test6:
+; CHECK: vpermq $30
+; CHECK: ret
+define <8 x i64> @test6(<8 x i64> %a) nounwind {
+  %c = shufflevector <8 x i64> %a, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4>
+  ret <8 x i64> %c
+}
+
+; CHECK-LABEL: test7:
+; CHECK: vpermi2q
+; CHECK: ret
+define <8 x i64> @test7(<8 x i64> %a, <8 x i64> %b) nounwind {
+  %c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
+  ret <8 x i64> %c
+}
+
+; CHECK-LABEL: test8:
+; CHECK: vpermi2d
+; CHECK: ret
+define <16 x i32> @test8(<16 x i32> %a, <16 x i32> %b) nounwind {
+  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
+  ret <16 x i32> %c
+}
+
+; CHECK-LABEL: test9:
+; CHECK: vpermi2ps
+; CHECK: ret
+define <16 x float> @test9(<16 x float> %a, <16 x float> %b) nounwind {
+  %c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
+  ret <16 x float> %c
+}
+
+; CHECK-LABEL: test10:
+; CHECK: vpermi2ps (
+; CHECK: ret
+define <16 x float> @test10(<16 x float> %a, <16 x float>* %b) nounwind {
+  %c = load <16 x float>* %b
+  %d = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
+  ret <16 x float> %d
+}
+
+; CHECK-LABEL: test11:
+; CHECK: vpermi2d (
+; CHECK: ret
+define <16 x i32> @test11(<16 x i32> %a, <16 x i32>* %b) nounwind {
+  %c = load <16 x i32>* %b
+  %d = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
+  ret <16 x i32> %d
+}
+
+; CHECK-LABEL: test12
+; CHECK: vmovlhpsz %xmm
+; CHECK: ret
+define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) nounwind {
+  %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  ret <4 x i32> %c
+}
+
+; CHECK-LABEL: test13
+; CHECK: vpermilps $-79, %zmm
+; CHECK: ret
+define <16 x float> @test13(<16 x float> %a) {
+ %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32><i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+ ret <16 x float> %b
+}
+
+; CHECK-LABEL: test14
+; CHECK: vpermilpd $-53, %zmm
+; CHECK: ret
+define <8 x double> @test14(<8 x double> %a) {
+ %b = shufflevector <8 x double> %a, <8 x double> undef, <8 x i32><i32 1, i32 1, i32 2, i32 3, i32 4, i32 4, i32 7, i32 7>
+ ret <8 x double> %b
+}
+
+; CHECK-LABEL: test15
+; CHECK: vpshufd $-79, %zmm
+; CHECK: ret
+define <16 x i32> @test15(<16 x i32> %a) {
+ %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32><i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+ ret <16 x i32> %b
+}
+; CHECK-LABEL: test16
+; CHECK: valignq $2, %zmm0, %zmm1
+; CHECK: ret
+define <8 x double> @test16(<8 x double> %a, <8 x double> %b) nounwind {
+  %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
+  ret <8 x double> %c
+}
+
+; CHECK-LABEL: test17
+; CHECK: vshufpd $19, %zmm1, %zmm0
+; CHECK: ret
+define <8 x double> @test17(<8 x double> %a, <8 x double> %b) nounwind {
+  %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 9, i32 2, i32 10, i32 5, i32 undef, i32 undef, i32 undef>
+  ret <8 x double> %c
+}
+
+; CHECK-LABEL: test18
+; CHECK: vpunpckhdq %zmm
+; CHECK: ret
+define <16 x i32> @test18(<16 x i32> %a, <16 x i32> %c) {
+ %b = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32><i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15, i32 18, i32 26, i32 19, i32 27, i32 22, i32 30, i32 23, i32 31>
+ ret <16 x i32> %b
+}
+
+; CHECK-LABEL: test19
+; CHECK: vpunpckldq %zmm
+; CHECK: ret
+define <16 x i32> @test19(<16 x i32> %a, <16 x i32> %c) {
+ %b = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32><i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13, i32 16, i32 24, i32 17, i32 25, i32 20, i32 28, i32 21, i32 29>
+ ret <16 x i32> %b
+}
+
+; CHECK-LABEL: test20
+; CHECK: vpunpckhqdq  %zmm
+; CHECK: ret
+define <8 x i64> @test20(<8 x i64> %a, <8 x i64> %c) {
+ %b = shufflevector <8 x i64> %a, <8 x i64> %c, <8 x i32><i32 1, i32 5, i32 3, i32 7, i32 9, i32 13, i32 11, i32 15>
+ ret <8 x i64> %b
+}
+
+; CHECK-LABEL: test21
+; CHECK: vunpcklps %zmm
+; CHECK: ret
+define <16 x float> @test21(<16 x float> %a, <16 x float> %c) {
+ %b = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32><i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13, i32 16, i32 24, i32 17, i32 25, i32 20, i32 28, i32 21, i32 29>
+ ret <16 x float> %b
+}
+
+; CHECK-LABEL: test22
+; CHECK: vmovhlpsz %xmm
+; CHECK: ret
+define <4 x i32> @test22(<4 x i32> %a, <4 x i32> %b) nounwind {
+  %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+  ret <4 x i32> %c
+}
+
+; CHECK-LABEL: @test23
+; CHECK: vshufps $-112, %zmm
+; CHECK: ret
+define <16 x float> @test23(<16 x float> %a, <16 x float> %c) {
+ %b = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32><i32 0, i32 0, i32 17, i32 18, i32 4, i32 4, i32 21, i32 22, i32 8, i32 8, i32 25, i32 26, i32 12, i32 12, i32 29, i32 30>
+ ret <16 x float> %b
+}
+
+; CHECK-LABEL: @test24
+; CHECK: vpermi2d
+; CHECK: ret
+define <16 x i32> @test24(<16 x i32> %a, <16 x i32> %b) nounwind {
+  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i32> %c
+}
+
+; CHECK-LABEL: @test25
+; CHECK: vshufps  $52
+; CHECK: ret
+define <16 x i32> @test25(<16 x i32> %a, <16 x i32> %b) nounwind {
+  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 19, i32 undef, i32 4, i32 5, i32 23, i32 undef, i32 8, i32 9, i32 27, i32 undef, i32 12, i32 13, i32 undef, i32 undef>
+  ret <16 x i32> %c
+}
+
+; CHECK-LABEL: @test26
+; CHECK: vmovshdup
+; CHECK: ret
+define <16 x i32> @test26(<16 x i32> %a) nounwind {
+  %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 undef, i32 9, i32 9, i32 undef, i32 11, i32 13, i32 undef, i32 undef, i32 undef>
+  ret <16 x i32> %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/X86/avx512-trunc-ext.ll b/test/CodeGen/X86/avx512-trunc-ext.ll
new file mode 100644
index 0000000..31db68c
--- /dev/null
+++ b/test/CodeGen/X86/avx512-trunc-ext.ll
@@ -0,0 +1,127 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+; CHECK-LABEL: trunc_16x32_to_16x8
+; CHECK: vpmovdb
+; CHECK: ret
+define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) nounwind readnone {
+  %x = trunc <16 x i32> %i to <16 x i8>
+  ret <16 x i8> %x
+}
+
+; CHECK-LABEL: trunc_8x64_to_8x16
+; CHECK: vpmovqw
+; CHECK: ret
+define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) nounwind readnone {
+  %x = trunc <8 x i64> %i to <8 x i16>
+  ret <8 x i16> %x
+}
+
+
+; CHECK-LABEL: zext_16x8_to_16x32
+; CHECK; vpmovzxbd {{.*}}%zmm
+; CHECK: ret
+define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
+  %x = zext <16 x i8> %i to <16 x i32>
+  ret <16 x i32> %x
+}
+
+; CHECK-LABEL: sext_16x8_to_16x32
+; CHECK; vpmovsxbd {{.*}}%zmm
+; CHECK: ret
+define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
+  %x = sext <16 x i8> %i to <16 x i32>
+  ret <16 x i32> %x
+}
+
+
+; CHECK-LABEL: zext_16x16_to_16x32
+; CHECK; vpmovzxwd {{.*}}%zmm
+; CHECK: ret
+define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %i) nounwind readnone {
+  %x = zext <16 x i16> %i to <16 x i32>
+  ret <16 x i32> %x
+}
+
+; CHECK-LABEL: zext_8x16_to_8x64
+; CHECK; vpmovzxwq
+; CHECK: ret
+define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %i) nounwind readnone {
+  %x = zext <8 x i16> %i to <8 x i64>
+  ret <8 x i64> %x
+}
+
+;CHECK-LABEL: fptrunc_test
+;CHECK: vcvtpd2ps {{.*}}%zmm
+;CHECK: ret
+define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
+  %b = fptrunc <8 x double> %a to <8 x float>
+  ret <8 x float> %b
+}
+
+;CHECK-LABEL: fpext_test
+;CHECK: vcvtps2pd {{.*}}%zmm
+;CHECK: ret
+define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
+  %b = fpext <8 x float> %a to <8 x double>
+  ret <8 x double> %b
+}
+
+; CHECK-LABEL: zext_16i1_to_16xi32
+; CHECK: vpbroadcastd LCP{{.*}}(%rip), %zmm0 {%k1} {z}
+; CHECK: ret
+define   <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
+  %a = bitcast i16 %b to <16 x i1>
+  %c = zext <16 x i1> %a to <16 x i32>
+  ret <16 x i32> %c
+}
+
+; CHECK-LABEL: zext_8i1_to_8xi64
+; CHECK: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
+; CHECK: ret
+define   <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
+  %a = bitcast i8 %b to <8 x i1>
+  %c = zext <8 x i1> %a to <8 x i64>
+  ret <8 x i64> %c
+}
+
+; CHECK-LABEL: trunc_16i8_to_16i1
+; CHECK: vpmovsxbd
+; CHECK: vpandd
+; CHECK: vptestmd
+; CHECK: ret
+define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
+  %mask_b = trunc <16 x i8>%a to <16 x i1>
+  %mask = bitcast <16 x i1> %mask_b to i16
+  ret i16 %mask
+}
+
+; CHECK-LABEL: trunc_16i32_to_16i1
+; CHECK: vpandd
+; CHECK: vptestmd
+; CHECK: ret
+define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
+  %mask_b = trunc <16 x i32>%a to <16 x i1>
+  %mask = bitcast <16 x i1> %mask_b to i16
+  ret i16 %mask
+}
+
+; CHECK-LABEL: trunc_8i16_to_8i1
+; CHECK: vpmovsxwq
+; CHECK: vpandq LCP{{.*}}(%rip){1to8}
+; CHECK: vptestmq
+; CHECK: ret
+define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
+  %mask_b = trunc <8 x i16>%a to <8 x i1>
+  %mask = bitcast <8 x i1> %mask_b to i8
+  ret i8 %mask
+}
+
+; CHECK: sext_8i1_8i32
+; CHECK: vpbroadcastq  LCP{{.*}}(%rip), %zmm0 {%k1} {z}
+; CHECK: ret
+define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
+  %x = icmp slt <8 x i32> %a1, %a2
+  %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+  %y = sext <8 x i1> %x1 to <8 x i32>
+  ret <8 x i32> %y
+}
diff --git a/test/CodeGen/X86/avx512-vbroadcast.ll b/test/CodeGen/X86/avx512-vbroadcast.ll
index 4f07f94..6f89d6c 100644
--- a/test/CodeGen/X86/avx512-vbroadcast.ll
+++ b/test/CodeGen/X86/avx512-vbroadcast.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
 
-;CHECK: _inreg16xi32
+;CHECK-LABEL: _inreg16xi32:
 ;CHECK: vpbroadcastd {{.*}}, %zmm
 ;CHECK: ret
 define   <16 x i32> @_inreg16xi32(i32 %a) {
@@ -9,7 +9,7 @@ define   <16 x i32> @_inreg16xi32(i32 %a) {
   ret <16 x i32> %c
 }
 
-;CHECK: _inreg8xi64
+;CHECK-LABEL: _inreg8xi64:
 ;CHECK: vpbroadcastq {{.*}}, %zmm
 ;CHECK: ret
 define   <8 x i64> @_inreg8xi64(i64 %a) {
@@ -18,7 +18,7 @@ define   <8 x i64> @_inreg8xi64(i64 %a) {
   ret <8 x i64> %c
 }
 
-;CHECK: _inreg16xfloat
+;CHECK-LABEL: _inreg16xfloat:
 ;CHECK: vbroadcastssz {{.*}}, %zmm
 ;CHECK: ret
 define   <16 x float> @_inreg16xfloat(float %a) {
@@ -27,7 +27,7 @@ define   <16 x float> @_inreg16xfloat(float %a) {
   ret <16 x float> %c
 }
 
-;CHECK: _inreg8xdouble
+;CHECK-LABEL: _inreg8xdouble:
 ;CHECK: vbroadcastsdz {{.*}}, %zmm
 ;CHECK: ret
 define   <8 x double> @_inreg8xdouble(double %a) {
@@ -35,3 +35,19 @@ define   <8 x double> @_inreg8xdouble(double %a) {
   %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
   ret <8 x double> %c
 }
+
+;CHECK-LABEL: _xmm16xi32
+;CHECK: vpbroadcastd
+;CHECK: ret
+define   <16 x i32> @_xmm16xi32(<16 x i32> %a) {
+  %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer
+  ret <16 x i32> %b
+}
+
+;CHECK-LABEL: _xmm16xfloat
+;CHECK: vbroadcastssz
+;CHECK: ret
+define   <16 x float> @_xmm16xfloat(<16 x float> %a) {
+  %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer
+  ret <16 x float> %b
+}
diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll
new file mode 100644
index 0000000..6ca5bcc
--- /dev/null
+++ b/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -0,0 +1,113 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+; CHECK-LABEL: test1
+; CHECK: vcmpleps
+; CHECK: vmovups
+; CHECK: ret
+define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
+	%mask = fcmp ole <16 x float> %x, %y
+	%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
+	ret <16 x float> %max
+}
+
+; CHECK-LABEL: test2
+; CHECK: vcmplepd
+; CHECK: vmovupd
+; CHECK: ret
+define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
+	%mask = fcmp ole <8 x double> %x, %y
+	%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
+	ret <8 x double> %max
+}
+
+; CHECK-LABEL: test3
+; CHECK: vpcmpeqd  (%rdi)
+; CHECK: vmovdqu32
+; CHECK: ret
+define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind {
+  %y = load <16 x i32>* %yp, align 4
+	%mask = icmp eq <16 x i32> %x, %y
+	%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
+	ret <16 x i32> %max
+}
+
+; CHECK-LABEL: @test4_unsigned
+; CHECK: vpcmpnltud
+; CHECK: vmovdqu32
+; CHECK: ret
+define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y) nounwind {
+	%mask = icmp uge <16 x i32> %x, %y
+	%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
+	ret <16 x i32> %max
+}
+
+; CHECK-LABEL: test5
+; CHECK: vpcmpeqq {{.*}}%k1
+; CHECK: vmovdqu64 {{.*}}%k1
+; CHECK: ret
+define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
+	%mask = icmp eq <8 x i64> %x, %y
+	%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
+	ret <8 x i64> %max
+}
+
+; CHECK-LABEL: test6_unsigned
+; CHECK: vpcmpnleuq {{.*}}%k1
+; CHECK: vmovdqu64 {{.*}}%k1
+; CHECK: ret
+define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y) nounwind {
+	%mask = icmp ugt <8 x i64> %x, %y
+	%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
+	ret <8 x i64> %max
+}
+
+; CHECK-LABEL: test7
+; CHECK: xor
+; CHECK: vcmpltps
+; CHECK: vblendvps
+; CHECK: ret
+define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
+  %mask = fcmp olt <4 x float> %a, zeroinitializer
+  %c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
+  ret <4 x float>%c
+}
+
+; CHECK-LABEL: test8
+; CHECK: xor
+; CHECK: vcmpltpd
+; CHECK: vblendvpd
+; CHECK: ret
+define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
+  %mask = fcmp olt <2 x double> %a, zeroinitializer
+  %c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
+  ret <2 x double>%c
+}
+
+; CHECK-LABEL: test9
+; CHECK: vpcmpeqd
+; CHECK: vpblendmd
+; CHECK: ret
+define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
+  %mask = icmp eq <8 x i32> %x, %y
+  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
+  ret <8 x i32> %max
+}
+
+; CHECK-LABEL: test10
+; CHECK: vcmpeqps
+; CHECK: vblendmps
+; CHECK: ret
+define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
+  %mask = fcmp oeq <8 x float> %x, %y
+  %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
+  ret <8 x float> %max
+}
+
+; CHECK-LABEL: test11_unsigned
+; CHECK: vpmaxud
+; CHECK: ret
+define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
+  %mask = icmp ugt <8 x i32> %x, %y
+  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
+  ret <8 x i32> %max
+}
diff --git a/test/CodeGen/X86/bc-extract.ll b/test/CodeGen/X86/bc-extract.ll
index ceabcb7..a1c0f5a 100644
--- a/test/CodeGen/X86/bc-extract.ll
+++ b/test/CodeGen/X86/bc-extract.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse42 |  FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse4.2 |  FileCheck %s
 
 
 define float @extractFloat1() nounwind {
diff --git a/test/CodeGen/X86/bitcast2.ll b/test/CodeGen/X86/bitcast2.ll
index 48922b5..12aa863 100644
--- a/test/CodeGen/X86/bitcast2.ll
+++ b/test/CodeGen/X86/bitcast2.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 | grep movd | count 2
-; RUN: llc < %s -march=x86-64 | not grep rsp
+; RUN: llc < %s -march=x86-64 -mattr=-avx | grep movd | count 2
+; RUN: llc < %s -march=x86-64 -mattr=-avx | not grep rsp
 
 define i64 @test1(double %A) {
    %B = bitcast double %A to i64
diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll
index fa775bd..4f2060f 100644
--- a/test/CodeGen/X86/blend-msb.ll
+++ b/test/CodeGen/X86/blend-msb.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
 
 
 ; In this test we check that sign-extend of the mask bit is performed by
diff --git a/test/CodeGen/X86/bmi.ll b/test/CodeGen/X86/bmi.ll
index 4eda888..242075a 100644
--- a/test/CodeGen/X86/bmi.ll
+++ b/test/CodeGen/X86/bmi.ll
@@ -111,6 +111,23 @@ define i32 @bextr32_load(i32* %x, i32 %y) nounwind readnone {
 
 declare i32 @llvm.x86.bmi.bextr.32(i32, i32) nounwind readnone
 
+define i32 @bextr32b(i32 %x) nounwind uwtable readnone ssp {
+  %1 = lshr i32 %x, 4
+  %2 = and i32 %1, 4095
+  ret i32 %2
+; CHECK-LABEL: bextr32b:
+; CHECK: bextrl
+}
+
+define i32 @bextr32b_load(i32* %x) nounwind uwtable readnone ssp {
+  %1 = load i32* %x
+  %2 = lshr i32 %1, 4
+  %3 = and i32 %2, 4095
+  ret i32 %3
+; CHECK-LABEL: bextr32b_load:
+; CHECK: bextrl {{.*}}, ({{.*}}), {{.*}}
+}
+
 define i64 @bextr64(i64 %x, i64 %y) nounwind readnone {
   %tmp = tail call i64 @llvm.x86.bmi.bextr.64(i64 %x, i64 %y)
   ret i64 %tmp
@@ -120,6 +137,14 @@ define i64 @bextr64(i64 %x, i64 %y) nounwind readnone {
 
 declare i64 @llvm.x86.bmi.bextr.64(i64, i64) nounwind readnone
 
+define i64 @bextr64b(i64 %x) nounwind uwtable readnone ssp {
+  %1 = lshr i64 %x, 4
+  %2 = and i64 %1, 4095
+  ret i64 %2
+; CHECK-LABEL: bextr64b:
+; CHECK: bextrq
+}
+
 define i32 @bzhi32(i32 %x, i32 %y) nounwind readnone {
   %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x, i32 %y)
   ret i32 %tmp
@@ -146,6 +171,51 @@ define i64 @bzhi64(i64 %x, i64 %y) nounwind readnone {
 
 declare i64 @llvm.x86.bmi.bzhi.64(i64, i64) nounwind readnone
 
+define i32 @bzhi32b(i32 %x, i8 zeroext %index) #0 {
+entry:
+  %conv = zext i8 %index to i32
+  %shl = shl i32 1, %conv
+  %sub = add nsw i32 %shl, -1
+  %and = and i32 %sub, %x
+  ret i32 %and
+; CHECK-LABEL: bzhi32b:
+; CHECK: bzhil
+}
+
+define i32 @bzhi32b_load(i32* %w, i8 zeroext %index) #0 {
+entry:
+  %x = load i32* %w
+  %conv = zext i8 %index to i32
+  %shl = shl i32 1, %conv
+  %sub = add nsw i32 %shl, -1
+  %and = and i32 %sub, %x
+  ret i32 %and
+; CHECK-LABEL: bzhi32b_load:
+; CHECK: bzhil {{.*}}, ({{.*}}), {{.*}}
+}
+
+define i32 @bzhi32c(i32 %x, i8 zeroext %index) #0 {
+entry:
+  %conv = zext i8 %index to i32
+  %shl = shl i32 1, %conv
+  %sub = add nsw i32 %shl, -1
+  %and = and i32 %x, %sub
+  ret i32 %and
+; CHECK-LABEL: bzhi32c:
+; CHECK: bzhil
+}
+
+define i64 @bzhi64b(i64 %x, i8 zeroext %index) #0 {
+entry:
+  %conv = zext i8 %index to i64
+  %shl = shl i64 1, %conv
+  %sub = add nsw i64 %shl, -1
+  %and = and i64 %x, %sub
+  ret i64 %and
+; CHECK-LABEL: bzhi64b:
+; CHECK: bzhiq
+}
+
 define i32 @blsi32(i32 %x) nounwind readnone {
   %tmp = sub i32 0, %x
   %tmp2 = and i32 %x, %tmp
diff --git a/test/CodeGen/X86/bool-simplify.ll b/test/CodeGen/X86/bool-simplify.ll
index fa6f6e8..a0a1c36 100644
--- a/test/CodeGen/X86/bool-simplify.ll
+++ b/test/CodeGen/X86/bool-simplify.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx,+rdrand,+rdseed | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse4.1,-avx,+rdrnd,+rdseed | FileCheck %s
 
 define i32 @foo(<2 x i64> %c, i32 %a, i32 %b) {
   %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c)
diff --git a/test/CodeGen/X86/break-anti-dependencies.ll b/test/CodeGen/X86/break-anti-dependencies.ll
index c942614..614d0ad 100644
--- a/test/CodeGen/X86/break-anti-dependencies.ll
+++ b/test/CodeGen/X86/break-anti-dependencies.ll
@@ -1,7 +1,7 @@
 ; Without list-burr scheduling we may not see the difference in codegen here.
 ; Use a subtarget that has post-RA scheduling enabled because the anti-dependency
 ; breaker requires liveness information to be kept.
-; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t
+; RUN: llc < %s -march=x86-64 -mcpu=atom -enable-misched=false -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t
 ; RUN:   grep "%xmm0" %t | count 14
 ; RUN:   not grep "%xmm1" %t
 ; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -break-anti-dependencies=critical > %t
diff --git a/test/CodeGen/X86/break-avx-dep.ll b/test/CodeGen/X86/break-avx-dep.ll
new file mode 100644
index 0000000..210bda1
--- /dev/null
+++ b/test/CodeGen/X86/break-avx-dep.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s
+;
+; rdar:15221834 False AVX register dependencies cause 5x slowdown on
+; flops-6. Make sure the unused register read by vcvtsi2sdq is zeroed
+; to avoid cyclic dependence on a write to the same register in a
+; previous iteration.
+
+; CHECK-LABEL: t1:
+; CHECK-LABEL: %loop
+; CHECK: vxorps %[[REG:xmm.]], %{{xmm.}}, %{{xmm.}}
+; CHECK: vcvtsi2sdq %{{r[0-9a-x]+}}, %[[REG]], %{{xmm.}}
+define i64 @t1(i64* nocapture %x, double* nocapture %y) nounwind {
+entry:
+  %vx = load i64* %x
+  br label %loop
+loop:
+  %i = phi i64 [ 1, %entry ], [ %inc, %loop ]
+  %s1 = phi i64 [ %vx, %entry ], [ %s2, %loop ]
+  %fi = sitofp i64 %i to double
+  %vy = load double* %y
+  %fipy = fadd double %fi, %vy
+  %iipy = fptosi double %fipy to i64
+  %s2 = add i64 %s1, %iipy
+  %inc = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %inc, 156250000
+  br i1 %exitcond, label %ret, label %loop
+ret:
+  ret i64 %s2
+}
diff --git a/test/CodeGen/X86/bswap.ll b/test/CodeGen/X86/bswap.ll
index 9e46592..e6a456c 100644
--- a/test/CodeGen/X86/bswap.ll
+++ b/test/CodeGen/X86/bswap.ll
@@ -1,6 +1,7 @@
 ; bswap should be constant folded when it is passed a constant argument
 
 ; RUN: llc < %s -march=x86 -mcpu=i686 | FileCheck %s
+; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK64
 
 declare i16 @llvm.bswap.i16(i16)
 
@@ -11,6 +12,9 @@ declare i64 @llvm.bswap.i64(i64)
 define i16 @W(i16 %A) {
 ; CHECK-LABEL: W:
 ; CHECK: rolw $8, %ax
+
+; CHECK64-LABEL: W:
+; CHECK64: rolw $8, %
         %Z = call i16 @llvm.bswap.i16( i16 %A )         ; <i16> [#uses=1]
         ret i16 %Z
 }
@@ -18,6 +22,9 @@ define i16 @W(i16 %A) {
 define i32 @X(i32 %A) {
 ; CHECK-LABEL: X:
 ; CHECK: bswapl %eax
+
+; CHECK64-LABEL: X:
+; CHECK64: bswapl %
         %Z = call i32 @llvm.bswap.i32( i32 %A )         ; <i32> [#uses=1]
         ret i32 %Z
 }
@@ -26,6 +33,9 @@ define i64 @Y(i64 %A) {
 ; CHECK-LABEL: Y:
 ; CHECK: bswapl %eax
 ; CHECK: bswapl %edx
+
+; CHECK64-LABEL: Y:
+; CHECK64: bswapq %
         %Z = call i64 @llvm.bswap.i64( i64 %A )         ; <i64> [#uses=1]
         ret i64 %Z
 }
@@ -33,9 +43,13 @@ define i64 @Y(i64 %A) {
 ; rdar://9164521
 define i32 @test1(i32 %a) nounwind readnone {
 entry:
-; CHECK: test1
-; CHECK: bswapl %eax
-; CHECK: shrl $16, %eax
+; CHECK-LABEL: test1:
+; CHECK: bswapl [[REG:%.*]]
+; CHECK: shrl $16, [[REG]]
+
+; CHECK64-LABEL: test1:
+; CHECK64: bswapl [[REG:%.*]]
+; CHECK64: shrl $16, [[REG]]
   %and = lshr i32 %a, 8
   %shr3 = and i32 %and, 255
   %and2 = shl i32 %a, 8
@@ -46,9 +60,13 @@ entry:
 
 define i32 @test2(i32 %a) nounwind readnone {
 entry:
-; CHECK: test2
-; CHECK: bswapl %eax
-; CHECK: sarl $16, %eax
+; CHECK-LABEL: test2:
+; CHECK: bswapl [[REG:%.*]]
+; CHECK: sarl $16, [[REG]]
+
+; CHECK64-LABEL: test2:
+; CHECK64: bswapl [[REG:%.*]]
+; CHECK64: sarl $16, [[REG]]
   %and = lshr i32 %a, 8
   %shr4 = and i32 %and, 255
   %and2 = shl i32 %a, 8
@@ -57,3 +75,80 @@ entry:
   %conv3 = ashr exact i32 %sext, 16
   ret i32 %conv3
 }
+
+@var8 = global i8 0
+@var16 = global i16 0
+
+; The "shl" below can move bits into the high parts of the value, so the
+; operation is not a "bswap, shr" pair.
+
+; rdar://problem/14814049
+define i64 @not_bswap() {
+; CHECK-LABEL: not_bswap:
+; CHECK-NOT: bswapl
+; CHECK: ret
+
+; CHECK64-LABEL: not_bswap:
+; CHECK64-NOT: bswapq
+; CHECK64: ret
+  %init = load i16* @var16
+  %big = zext i16 %init to i64
+
+  %hishifted = lshr i64 %big, 8
+  %loshifted = shl i64 %big, 8
+
+  %notswapped = or i64 %hishifted, %loshifted
+
+  ret i64 %notswapped
+}
+
+; This time, the lshr (and subsequent or) is completely useless. While it's
+; technically correct to convert this into a "bswap, shr", it's suboptimal. A
+; simple shl works better.
+
+define i64 @not_useful_bswap() {
+; CHECK-LABEL: not_useful_bswap:
+; CHECK-NOT: bswapl
+; CHECK: ret
+
+; CHECK64-LABEL: not_useful_bswap:
+; CHECK64-NOT: bswapq
+; CHECK64: ret
+
+  %init = load i8* @var8
+  %big = zext i8 %init to i64
+
+  %hishifted = lshr i64 %big, 8
+  %loshifted = shl i64 %big, 8
+
+  %notswapped = or i64 %hishifted, %loshifted
+
+  ret i64 %notswapped
+}
+
+; Finally, it *is* OK to just mask off the shl if we know that the value is zero
+; beyond 16 bits anyway. This is a legitimate bswap.
+
+define i64 @finally_useful_bswap() {
+; CHECK-LABEL: finally_useful_bswap:
+; CHECK: bswapl [[REG:%.*]]
+; CHECK: shrl $16, [[REG]]
+; CHECK: ret
+
+; CHECK64-LABEL: finally_useful_bswap:
+; CHECK64: bswapq [[REG:%.*]]
+; CHECK64: shrq $48, [[REG]]
+; CHECK64: ret
+
+  %init = load i16* @var16
+  %big = zext i16 %init to i64
+
+  %hishifted = lshr i64 %big, 8
+  %lomasked = and i64 %big, 255
+  %loshifted = shl i64 %lomasked, 8
+
+  %swapped = or i64 %hishifted, %loshifted
+
+  ret i64 %swapped
+}
+
diff --git a/test/CodeGen/X86/bt.ll b/test/CodeGen/X86/bt.ll
index e28923b..f12a354 100644
--- a/test/CodeGen/X86/bt.ll
+++ b/test/CodeGen/X86/bt.ll
@@ -38,7 +38,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @test2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: test2b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
@@ -56,7 +56,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @atest2(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: atest2
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
@@ -74,7 +74,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @atest2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: atest2b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
@@ -91,7 +91,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @test3(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: test3
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
@@ -109,7 +109,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @test3b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: test3b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
@@ -127,7 +127,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @testne2(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: testne2
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
@@ -145,7 +145,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @testne2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: testne2b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
@@ -163,7 +163,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @atestne2(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: atestne2
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
@@ -181,7 +181,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @atestne2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: atestne2b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
@@ -199,7 +199,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @testne3(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: testne3
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
@@ -217,7 +217,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @testne3b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: testne3b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
@@ -235,7 +235,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @query2(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: query2
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
@@ -253,7 +253,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @query2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: query2b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
@@ -271,7 +271,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @aquery2(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: aquery2
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
@@ -289,7 +289,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @aquery2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: aquery2b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
@@ -307,7 +307,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @query3(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: query3
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
@@ -325,7 +325,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @query3b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: query3b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
@@ -343,7 +343,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @query3x(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: query3x
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
@@ -361,7 +361,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @query3bx(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: query3bx
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
@@ -379,7 +379,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @queryne2(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: queryne2
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
@@ -397,7 +397,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @queryne2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: queryne2b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
@@ -415,7 +415,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @aqueryne2(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: aqueryne2
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
@@ -433,7 +433,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @aqueryne2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: aqueryne2b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
@@ -451,7 +451,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @queryne3(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: queryne3
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
@@ -469,7 +469,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @queryne3b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: queryne3b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
@@ -487,7 +487,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @queryne3x(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: queryne3x
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
@@ -505,7 +505,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @queryne3bx(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: queryne3bx
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
diff --git a/test/CodeGen/X86/byval7.ll b/test/CodeGen/X86/byval7.ll
index 8a96e41..42751d7 100644
--- a/test/CodeGen/X86/byval7.ll
+++ b/test/CodeGen/X86/byval7.ll
@@ -7,14 +7,14 @@
 define i32 @main() nounwind  {
 entry:
 ; CHECK-LABEL: main:
-; CHECK: movl $1, (%esp)
 ; CHECK: leal 16(%esp), %edi
 ; CHECK: leal 160(%esp), %esi
 ; CHECK: rep;movsl
+; CHECK: movl $1, (%esp)
 	%s = alloca %struct.S		; <%struct.S*> [#uses=2]
 	%tmp15 = getelementptr %struct.S* %s, i32 0, i32 0		; <<2 x i64>*> [#uses=1]
 	store <2 x i64> < i64 8589934595, i64 1 >, <2 x i64>* %tmp15, align 16
-	call void @t( i32 1, %struct.S* byval  %s ) nounwind 
+	call void @t( i32 1, %struct.S* byval  %s ) nounwind
 	ret i32 0
 }
 
diff --git a/test/CodeGen/X86/chain_order.ll b/test/CodeGen/X86/chain_order.ll
index 8c1c864..c88726e 100644
--- a/test/CodeGen/X86/chain_order.ll
+++ b/test/CodeGen/X86/chain_order.ll
@@ -3,8 +3,8 @@
 ;CHECK-LABEL: cftx020:
 ;CHECK: vmovsd  (%rdi), %xmm{{.*}}
 ;CHECK: vmovsd  16(%rdi), %xmm{{.*}}
-;CHECK: vmovhpd  8(%rdi), %xmm{{.*}}
 ;CHECK: vmovsd  24(%rdi), %xmm{{.*}}
+;CHECK: vmovhpd  8(%rdi), %xmm{{.*}}
 ;CHECK: vmovupd %xmm{{.*}}, (%rdi)
 ;CHECK: vmovupd %xmm{{.*}}, 16(%rdi)
 ;CHECK: ret
@@ -35,4 +35,3 @@ entry:
   store <2 x double> %14, <2 x double>* %15, align 8
   ret void
 }
-
diff --git a/test/CodeGen/X86/cmov.ll b/test/CodeGen/X86/cmov.ll
index 92c0445..215b862 100644
--- a/test/CodeGen/X86/cmov.ll
+++ b/test/CodeGen/X86/cmov.ll
@@ -4,8 +4,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 define i32 @test1(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
 entry:
 ; CHECK-LABEL: test1:
-; CHECK: movl	$12, %eax
-; CHECK-NEXT: btl
+; CHECK: btl
+; CHECK-NEXT: movl	$12, %eax
 ; CHECK-NEXT: cmovael	(%rcx), %eax
 ; CHECK-NEXT: ret
 
@@ -19,8 +19,8 @@ entry:
 define i32 @test2(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
 entry:
 ; CHECK-LABEL: test2:
-; CHECK: movl	$12, %eax
-; CHECK-NEXT: btl
+; CHECK: btl
+; CHECK-NEXT: movl	$12, %eax
 ; CHECK-NEXT: cmovbl	(%rcx), %eax
 ; CHECK-NEXT: ret
 
@@ -92,7 +92,7 @@ bb.i.i.i:                                         ; preds = %entry
 ; CHECK: testb
 ; CHECK-NOT: xor
 ; CHECK: setne
-; CHECK-NEXT: testb
+; CHECK: testb
 
 func_4.exit.i:                                    ; preds = %bb.i.i.i, %entry
   %.not.i = xor i1 %2, true                       ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/coalesce-implicitdef.ll b/test/CodeGen/X86/coalesce-implicitdef.ll
index 19cd08c..9be0452 100644
--- a/test/CodeGen/X86/coalesce-implicitdef.ll
+++ b/test/CodeGen/X86/coalesce-implicitdef.ll
@@ -26,7 +26,7 @@ for.cond:                                         ; preds = %for.inc34, %entry
   br i1 %tobool, label %for.end36, label %for.body
 
 for.body:                                         ; preds = %for.cond
-  store i32 0, i32* @c, align 4, !tbaa !0
+  store i32 0, i32* @c, align 4
   br label %for.body2
 
 for.body2:                                        ; preds = %for.body, %for.inc
@@ -35,7 +35,7 @@ for.body2:                                        ; preds = %for.body, %for.inc
   br i1 %tobool3, label %if.then10, label %if.then
 
 if.then:                                          ; preds = %for.body2
-  store i32 0, i32* %i, align 4, !tbaa !0
+  store i32 0, i32* %i, align 4
   br label %for.body6
 
 for.body6:                                        ; preds = %if.then, %for.body6
@@ -43,7 +43,7 @@ for.body6:                                        ; preds = %if.then, %for.body6
   br i1 true, label %for.body6, label %for.inc
 
 if.then10:                                        ; preds = %for.body2
-  store i32 1, i32* @b, align 4, !tbaa !0
+  store i32 1, i32* @b, align 4
   ret void
 
 for.inc:                                          ; preds = %for.body6
@@ -66,30 +66,30 @@ while.end:                                        ; preds = %while.cond
 
 for.inc27.backedge:                               ; preds = %while.end, %if.then22
   %inc28 = add nsw i32 %0, 1
-  store i32 %inc28, i32* @b, align 4, !tbaa !0
+  store i32 %inc28, i32* @b, align 4
   %tobool17 = icmp eq i32 %inc28, 0
   br i1 %tobool17, label %for.inc27.if.end30.loopexit56_crit_edge, label %while.condthread-pre-split
 
 if.then22:                                        ; preds = %while.end
-  %1 = load i16* %p2.1, align 2, !tbaa !3
+  %1 = load i16* %p2.1, align 2
   %tobool23 = icmp eq i16 %1, 0
   br i1 %tobool23, label %for.inc27.backedge, label %label.loopexit
 
 label.loopexit:                                   ; preds = %if.then22
-  store i32 %inc20, i32* @a, align 4, !tbaa !0
+  store i32 %inc20, i32* @a, align 4
   %inc2858 = add nsw i32 %0, 1
-  store i32 %inc2858, i32* @b, align 4, !tbaa !0
+  store i32 %inc2858, i32* @b, align 4
   %tobool1759 = icmp eq i32 %inc2858, 0
   br i1 %tobool1759, label %if.end30, label %while.condthread-pre-split
 
 for.inc27.if.end30.loopexit56_crit_edge:          ; preds = %for.inc27.backedge
-  store i32 %inc20, i32* @a, align 4, !tbaa !0
+  store i32 %inc20, i32* @a, align 4
   br label %if.end30
 
 if.end30:                                         ; preds = %for.inc27.if.end30.loopexit56_crit_edge, %label.loopexit, %label.preheader, %for.inc
   %i.0.load46 = phi i32 [ 0, %for.inc ], [ %i.0.load4669, %label.preheader ], [ %i.0.load4669, %label.loopexit ], [ %i.0.load4669, %for.inc27.if.end30.loopexit56_crit_edge ]
   %pi.4 = phi i32* [ %i, %for.inc ], [ %pi.3.ph, %label.preheader ], [ %pi.3.ph, %label.loopexit ], [ %pi.3.ph, %for.inc27.if.end30.loopexit56_crit_edge ]
-  %2 = load i32* %pi.4, align 4, !tbaa !0
+  %2 = load i32* %pi.4, align 4
   %tobool31 = icmp eq i32 %2, 0
   br i1 %tobool31, label %for.inc34, label %label.preheader
 
@@ -100,31 +100,26 @@ for.inc34:                                        ; preds = %if.end30
 
 for.end36:                                        ; preds = %for.cond
   store i32 1, i32* %i, align 4
-  %3 = load i32* @c, align 4, !tbaa !0
+  %3 = load i32* @c, align 4
   %tobool37 = icmp eq i32 %3, 0
   br i1 %tobool37, label %label.preheader, label %land.rhs
 
 land.rhs:                                         ; preds = %for.end36
-  store i32 0, i32* @a, align 4, !tbaa !0
+  store i32 0, i32* @a, align 4
   br label %label.preheader
 
 label.preheader:                                  ; preds = %for.end36, %if.end30, %land.rhs
   %i.0.load4669 = phi i32 [ 1, %land.rhs ], [ %i.0.load46, %if.end30 ], [ 1, %for.end36 ]
   %pi.3.ph = phi i32* [ %pi.0, %land.rhs ], [ %pi.4, %if.end30 ], [ %pi.0, %for.end36 ]
-  %4 = load i32* @b, align 4, !tbaa !0
+  %4 = load i32* @b, align 4
   %inc285863 = add nsw i32 %4, 1
-  store i32 %inc285863, i32* @b, align 4, !tbaa !0
+  store i32 %inc285863, i32* @b, align 4
   %tobool175964 = icmp eq i32 %inc285863, 0
   br i1 %tobool175964, label %if.end30, label %while.condthread-pre-split.lr.ph.lr.ph
 
 while.condthread-pre-split.lr.ph.lr.ph:           ; preds = %label.preheader
-  %.pr50 = load i32* @d, align 4, !tbaa !0
+  %.pr50 = load i32* @d, align 4
   %tobool19 = icmp eq i32 %.pr50, 0
-  %a.promoted.pre = load i32* @a, align 4, !tbaa !0
+  %a.promoted.pre = load i32* @a, align 4
   br label %while.condthread-pre-split
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/X86/coff-feat00.ll b/test/CodeGen/X86/coff-feat00.ll
new file mode 100644
index 0000000..1dcd427
--- /dev/null
+++ b/test/CodeGen/X86/coff-feat00.ll
@@ -0,0 +1,7 @@
+; RUN: llc -O0 -mtriple=i386-pc-win32 -filetype=asm -o - %s | FileCheck %s
+
+define i32 @foo() {
+  ret i32 0
+}
+
+; CHECK: @feat.00 = 1
diff --git a/test/CodeGen/X86/commute-two-addr.ll b/test/CodeGen/X86/commute-two-addr.ll
index eb44e08..656c385 100644
--- a/test/CodeGen/X86/commute-two-addr.ll
+++ b/test/CodeGen/X86/commute-two-addr.ll
@@ -38,10 +38,10 @@ define i32 @t2(i32 %X, i32 %Y) nounwind {
 define %0 @t3(i32 %lb, i8 zeroext %has_lb, i8 zeroext %lb_inclusive, i32 %ub, i8 zeroext %has_ub, i8 zeroext %ub_inclusive) nounwind {
 entry:
 ; DARWIN-LABEL: t3:
-; DARWIN: shll $16
 ; DARWIN: shlq $32, %rcx
+; DARWIN-NEXT: orq %rcx, %rax
+; DARWIN-NEXT: shll $8
 ; DARWIN-NOT: leaq
-; DARWIN: orq %rcx, %rax
   %tmp21 = zext i32 %lb to i64
   %tmp23 = zext i32 %ub to i64
   %tmp24 = shl i64 %tmp23, 32
diff --git a/test/CodeGen/X86/compact-unwind.ll b/test/CodeGen/X86/compact-unwind.ll
index 8c4fa27..9d3a125 100644
--- a/test/CodeGen/X86/compact-unwind.ll
+++ b/test/CodeGen/X86/compact-unwind.ll
@@ -1,18 +1,29 @@
-; RUN: llc < %s -disable-cfi -disable-fp-elim -mtriple x86_64-apple-darwin11 | FileCheck %s
+; RUN: llc < %s -disable-fp-elim -mtriple x86_64-apple-darwin11 -mcpu corei7 | FileCheck -check-prefix=ASM %s
+; RUN: llc < %s -disable-fp-elim -mtriple x86_64-apple-darwin11 -mcpu corei7 -filetype=obj -o - \
+; RUN:  | llvm-objdump -triple x86_64-apple-darwin11 -s - \
+; RUN:  | FileCheck -check-prefix=CU %s
+; RUN: llc < %s -disable-fp-elim -mtriple x86_64-apple-darwin11 -mcpu corei7 \
+; RUN:  | llvm-mc -triple x86_64-apple-darwin11 -filetype=obj -o - \
+; RUN:  | llvm-objdump -triple x86_64-apple-darwin11 -s - \
+; RUN:  | FileCheck -check-prefix=FROM-ASM %s
 
 %ty = type { i8* }
 
 @gv = external global i32
 
 ; This is aligning the stack with a push of a random register.
-; CHECK: pushq %rax
+; ASM: pushq %rax
 
 ; Even though we can't encode %rax into the compact unwind, We still want to be
 ; able to generate a compact unwind encoding in this particular case.
-;
-; CHECK: __LD,__compact_unwind
-; CHECK: _foo ## Range Start
-; CHECK: 16842753 ## Compact Unwind Encoding: 0x1010001
+
+; CU:      Contents of section __compact_unwind:
+; CU-NEXT: 0020 00000000 00000000 1e000000 01000101
+; CU-NEXT: 0030 00000000 00000000 00000000 00000000
+
+; FROM-ASM:      Contents of section __compact_unwind:
+; FROM-ASM-NEXT: 0020 00000000 00000000 1e000000 01000101
+; FROM-ASM-NEXT: 0030 00000000 00000000 00000000 00000000
 
 define i8* @foo(i64 %size) {
   %addr = alloca i64, align 8
diff --git a/test/CodeGen/X86/crash-nosse.ll b/test/CodeGen/X86/crash-nosse.ll
index 7a15a47..b1e01f9 100644
--- a/test/CodeGen/X86/crash-nosse.ll
+++ b/test/CodeGen/X86/crash-nosse.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=corei7 -mattr=-sse2,-sse41 -verify-machineinstrs
+; RUN: llc < %s -mcpu=corei7 -mattr=-sse2,-sse4.1 -verify-machineinstrs
 target triple = "x86_64-unknown-linux-gnu"
 
 ; PR10503
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index b0a0e24..051150e 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -204,7 +204,7 @@ entry:
 ; <rdar://problem/9187792>
 define fastcc void @func_61() nounwind sspreq {
 entry:
-  %t1 = tail call i64 @llvm.objectsize.i64(i8* undef, i1 false)
+  %t1 = tail call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 false)
   %t2 = icmp eq i64 %t1, -1
   br i1 %t2, label %bb2, label %bb1
 
@@ -215,7 +215,7 @@ bb2:
   ret void
 }
 
-declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) nounwind readnone
 
 ; PR10277
 ; This test has dead code elimination caused by remat during spilling.
diff --git a/test/CodeGen/X86/dagcombine-shifts.ll b/test/CodeGen/X86/dagcombine-shifts.ll
new file mode 100644
index 0000000..905cf05
--- /dev/null
+++ b/test/CodeGen/X86/dagcombine-shifts.ll
@@ -0,0 +1,209 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s
+
+; fold (shl (zext (lshr (A, X))), X) -> (zext (shl (lshr (A, X)), X))
+
+; Canolicalize the sequence shl/zext/lshr performing the zeroextend
+; as the last instruction of the sequence.
+; This will help DAGCombiner to identify and then fold the sequence
+; of shifts into a single AND.
+; This transformation is profitable if the shift amounts are the same
+; and if there is only one use of the zext.
+
+define i16 @fun1(i8 zeroext %v) {
+entry:
+  %shr = lshr i8 %v, 4
+  %ext = zext i8 %shr to i16
+  %shl = shl i16 %ext, 4
+  ret i16 %shl
+}
+
+; CHECK-LABEL: @fun1
+; CHECK: and
+; CHECK-NOT: shr
+; CHECK-NOT: shl
+; CHECK: ret
+
+define i32 @fun2(i8 zeroext %v) {
+entry:
+  %shr = lshr i8 %v, 4
+  %ext = zext i8 %shr to i32
+  %shl = shl i32 %ext, 4
+  ret i32 %shl
+}
+
+; CHECK-LABEL: @fun2
+; CHECK: and
+; CHECK-NOT: shr
+; CHECK-NOT: shl
+; CHECK: ret
+
+define i32 @fun3(i16 zeroext %v) {
+entry:
+  %shr = lshr i16 %v, 4
+  %ext = zext i16 %shr to i32
+  %shl = shl i32 %ext, 4
+  ret i32 %shl
+}
+
+; CHECK-LABEL: @fun3
+; CHECK: and
+; CHECK-NOT: shr
+; CHECK-NOT: shl
+; CHECK: ret
+
+define i64 @fun4(i8 zeroext %v) {
+entry:
+  %shr = lshr i8 %v, 4
+  %ext = zext i8 %shr to i64
+  %shl = shl i64 %ext, 4
+  ret i64 %shl
+}
+
+; CHECK-LABEL: @fun4
+; CHECK: and
+; CHECK-NOT: shr
+; CHECK-NOT: shl
+; CHECK: ret
+
+define i64 @fun5(i16 zeroext %v) {
+entry:
+  %shr = lshr i16 %v, 4
+  %ext = zext i16 %shr to i64
+  %shl = shl i64 %ext, 4
+  ret i64 %shl
+}
+
+; CHECK-LABEL: @fun5
+; CHECK: and
+; CHECK-NOT: shr
+; CHECK-NOT: shl
+; CHECK: ret
+
+define i64 @fun6(i32 zeroext %v) {
+entry:
+  %shr = lshr i32 %v, 4
+  %ext = zext i32 %shr to i64
+  %shl = shl i64 %ext, 4
+  ret i64 %shl
+}
+
+; CHECK-LABEL: @fun6
+; CHECK: and
+; CHECK-NOT: shr
+; CHECK-NOT: shl
+; CHECK: ret
+
+; Don't fold the pattern if we use arithmetic shifts.
+
+define i64 @fun7(i8 zeroext %v) {
+entry:
+  %shr = ashr i8 %v, 4
+  %ext = zext i8 %shr to i64
+  %shl = shl i64 %ext, 4
+  ret i64 %shl
+}
+
+; CHECK-LABEL: @fun7
+; CHECK: sar
+; CHECK: shl
+; CHECK: ret
+
+define i64 @fun8(i16 zeroext %v) {
+entry:
+  %shr = ashr i16 %v, 4
+  %ext = zext i16 %shr to i64
+  %shl = shl i64 %ext, 4
+  ret i64 %shl
+}
+
+; CHECK-LABEL: @fun8
+; CHECK: sar
+; CHECK: shl
+; CHECK: ret
+
+define i64 @fun9(i32 zeroext %v) {
+entry:
+  %shr = ashr i32 %v, 4
+  %ext = zext i32 %shr to i64
+  %shl = shl i64 %ext, 4
+  ret i64 %shl
+}
+
+; CHECK-LABEL: @fun9
+; CHECK: sar
+; CHECK: shl
+; CHECK: ret
+
+; Don't fold the pattern if there is more than one use of the
+; operand in input to the shift left.
+
+define i64 @fun10(i8 zeroext %v) {
+entry:
+  %shr = lshr i8 %v, 4
+  %ext = zext i8 %shr to i64
+  %shl = shl i64 %ext, 4
+  %add = add i64 %shl, %ext
+  ret i64 %add
+}
+
+; CHECK-LABEL: @fun10
+; CHECK: shr
+; CHECK: shl
+; CHECK: ret
+
+define i64 @fun11(i16 zeroext %v) {
+entry:
+  %shr = lshr i16 %v, 4
+  %ext = zext i16 %shr to i64
+  %shl = shl i64 %ext, 4
+  %add = add i64 %shl, %ext
+  ret i64 %add
+}
+
+; CHECK-LABEL: @fun11
+; CHECK: shr
+; CHECK: shl
+; CHECK: ret
+
+define i64 @fun12(i32 zeroext %v) {
+entry:
+  %shr = lshr i32 %v, 4
+  %ext = zext i32 %shr to i64
+  %shl = shl i64 %ext, 4
+  %add = add i64 %shl, %ext
+  ret i64 %add
+}
+
+; CHECK-LABEL: @fun12
+; CHECK: shr
+; CHECK: shl
+; CHECK: ret
+
+; PR17380
+; Make sure that the combined dags are legal if we run the DAGCombiner after
+; Legalization took place. The add instruction is redundant and increases by 
+; one the number of uses of the zext. This prevents the transformation from
+; firing before dags are legalized and optimized.
+; Once the add is removed, the number of uses becomes one and therefore the
+; dags are canonicalized. After Legalization, we need to make sure that the
+; valuetype for the shift count is legal.
+; Verify also that we correctly fold the shl-shr sequence into an 
+; AND with bitmask.
+
+define void @g(i32 %a) {
+  %b = lshr i32 %a, 2
+  %c = zext i32 %b to i64
+  %d = add i64 %c, 1
+  %e = shl i64 %c, 2
+  tail call void @f(i64 %e)
+  ret void
+}
+
+; CHECK-LABEL: @g
+; CHECK-NOT: shr
+; CHECK-NOT: shl
+; CHECK: and
+; CHECK-NEXT: jmp
+
+declare void @f(i64)
+
diff --git a/test/CodeGen/X86/dagcombine_unsafe_math.ll b/test/CodeGen/X86/dagcombine-unsafe-math.ll
index 592cf1b..f06d9f1 100644
--- a/test/CodeGen/X86/dagcombine_unsafe_math.ll
+++ b/test/CodeGen/X86/dagcombine-unsafe-math.ll
@@ -43,7 +43,7 @@ define float @test4(float %x, float %y) {
 
 ; rdar://13445387
 ; "x + x + x => 3.0 * x" should be disabled after legalization because 
-; Instruction-Selection dosen't know how to handle "3.0"
+; Instruction-Selection doesn't know how to handle "3.0"
 ; 
 define float @test5() {
   %mul.i.i151 = fmul <4 x float> zeroinitializer, zeroinitializer
diff --git a/test/CodeGen/X86/dwarf-comp-dir.ll b/test/CodeGen/X86/dwarf-comp-dir.ll
index b746dec..3b4a868 100644
--- a/test/CodeGen/X86/dwarf-comp-dir.ll
+++ b/test/CodeGen/X86/dwarf-comp-dir.ll
@@ -5,6 +5,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-unknown-linux-gnu"
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!5}
 
 !0 = metadata !{i32 720913, metadata !4, i32 12, metadata !"clang version 3.1 (trunk 143523)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !2 = metadata !{i32 0}
@@ -15,3 +16,4 @@ target triple = "x86_64-unknown-linux-gnu"
 ;                        Dir  Mod Time   File Len   File Name
 ;                        ---- ---------- ---------- ---------------------------
 ; CHECK: file_names[  1]    0 0x00000000 0x00000000 empty.c
+!5 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/dyn_alloca_aligned.ll b/test/CodeGen/X86/dyn_alloca_aligned.ll
new file mode 100644
index 0000000..993f4d2
--- /dev/null
+++ b/test/CodeGen/X86/dyn_alloca_aligned.ll
@@ -0,0 +1,9 @@
+; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
+define i32 @A(i32 %Size) {
+; CHECK:  subq    %rcx, %rax
+; CHECK:  andq    $-128, %rax
+; CHECK:  movq    %rax, %rsp
+  %A = alloca i8, i32 %Size, align 128
+  %A_addr = ptrtoint i8* %A to i32
+  ret i32 %A_addr
+}
diff --git a/test/CodeGen/X86/fast-isel-mem.ll b/test/CodeGen/X86/fast-isel-mem.ll
index 7fcef03..cd2dc1d 100644
--- a/test/CodeGen/X86/fast-isel-mem.ll
+++ b/test/CodeGen/X86/fast-isel-mem.ll
@@ -40,7 +40,7 @@ entry:
 ; CHECK:	movl	L_LotsStuff$non_lazy_ptr, %ecx
 
 ; ATOM: _t:
-; ATOM:         movl    L_LotsStuff$non_lazy_ptr, %ecx
-; ATOM:         movl    $0, %eax
+; ATOM:         movl    L_LotsStuff$non_lazy_ptr, %e{{..}}
+; ATOM:         movl    $0, %e{{..}}
 
 }
diff --git a/test/CodeGen/X86/fastcc.ll b/test/CodeGen/X86/fastcc.ll
index 705ab7b..a362f8d 100644
--- a/test/CodeGen/X86/fastcc.ll
+++ b/test/CodeGen/X86/fastcc.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -post-RA-scheduler=false | FileCheck %s
-; CHECK: movsd %xmm0, 8(%esp)
-; CHECK: xorl %ecx, %ecx
+; CHECK: movsd %xmm{{[0-9]}}, 8(%esp)
+; CHECK: xorl %eax, %eax
 
 @d = external global double		; <double*> [#uses=1]
 @c = external global double		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/fastisel-gep-promote-before-add.ll b/test/CodeGen/X86/fastisel-gep-promote-before-add.ll
new file mode 100644
index 0000000..f87a34c
--- /dev/null
+++ b/test/CodeGen/X86/fastisel-gep-promote-before-add.ll
@@ -0,0 +1,37 @@
+; fastisel should not fold add with non-pointer bitwidth
+; sext(a) + sext(b) != sext(a + b)
+; RUN: llc -mtriple=x86_64-apple-darwin %s -O0 -o - | FileCheck %s
+
+define zeroext i8 @gep_promotion(i8* %ptr) nounwind uwtable ssp {
+entry:
+  %ptr.addr = alloca i8*, align 8
+  %add = add i8 64, 64 ; 0x40 + 0x40
+  %0 = load i8** %ptr.addr, align 8
+
+  ; CHECK-LABEL: _gep_promotion:
+  ; CHECK: movzbl ({{.*}})
+  %arrayidx = getelementptr inbounds i8* %0, i8 %add
+
+  %1 = load i8* %arrayidx, align 1
+  ret i8 %1
+}
+
+define zeroext i8 @gep_promotion_nonconst(i8 %i, i8* %ptr) nounwind uwtable ssp {
+entry:
+  %i.addr = alloca i8, align 4
+  %ptr.addr = alloca i8*, align 8
+  store i8 %i, i8* %i.addr, align 4
+  store i8* %ptr, i8** %ptr.addr, align 8
+  %0 = load i8* %i.addr, align 4
+  ; CHECK-LABEL: _gep_promotion_nonconst:
+  ; CHECK: movzbl ({{.*}})
+  %xor = xor i8 %0, -128   ; %0   ^ 0x80
+  %add = add i8 %xor, -127 ; %xor + 0x81
+  %1 = load i8** %ptr.addr, align 8
+
+  %arrayidx = getelementptr inbounds i8* %1, i8 %add
+
+  %2 = load i8* %arrayidx, align 1
+  ret i8 %2
+}
+
diff --git a/test/CodeGen/X86/floor-soft-float.ll b/test/CodeGen/X86/floor-soft-float.ll
index 8e7ee09..5644509 100644
--- a/test/CodeGen/X86/floor-soft-float.ll
+++ b/test/CodeGen/X86/floor-soft-float.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx -soft-float=0 | FileCheck %s --check-prefix=CHECK-HARD-FLOAT
-; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx -soft-float=1 | FileCheck %s --check-prefix=CHECK-SOFT-FLOAT
+; RUN: llc < %s -march=x86-64 -mattr=+sse4.1,-avx -soft-float=0 | FileCheck %s --check-prefix=CHECK-HARD-FLOAT
+; RUN: llc < %s -march=x86-64 -mattr=+sse4.1,-avx -soft-float=1 | FileCheck %s --check-prefix=CHECK-SOFT-FLOAT
 
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/test/CodeGen/X86/fold-load-vec.ll b/test/CodeGen/X86/fold-load-vec.ll
index 47100be..e85d8f7 100644
--- a/test/CodeGen/X86/fold-load-vec.ll
+++ b/test/CodeGen/X86/fold-load-vec.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
 
 ; rdar://12721174
 ; We should not fold movss into pshufd since pshufd expects m128 while movss
diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll
index 495acd9..dde0a2d 100644
--- a/test/CodeGen/X86/fold-load.ll
+++ b/test/CodeGen/X86/fold-load.ll
@@ -38,10 +38,10 @@ L:
 
   store i16 %A, i16* %Q
   ret i32 %D
-  
+
 ; CHECK-LABEL: test2:
 ; CHECK: 	movl	4(%esp), %eax
-; CHECK-NEXT:	movzwl	(%eax), %ecx
+; CHECK-NEXT:	movzwl	(%eax), %e{{..}}
 
 }
 
@@ -49,10 +49,10 @@ L:
 ; xor in exit block will be CSE'ed and load will be folded to xor in entry.
 define i1 @test3(i32* %P, i32* %Q) nounwind {
 ; CHECK-LABEL: test3:
-; CHECK: movl 8(%esp), %eax
-; CHECK: xorl (%eax),
+; CHECK: movl 8(%esp), %e
+; CHECK: movl 4(%esp), %e
+; CHECK: xorl (%e
 ; CHECK: j
-; CHECK-NOT: xor
 entry:
   %0 = load i32* %P, align 4
   %1 = load i32* %Q, align 4
diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll
index 0a3afb7..60a6844 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -54,22 +54,27 @@ forbody:		; preds = %forcond
 	%mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer		; <<4 x float>> [#uses=2]
 	%mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer		; <<4 x float>> [#uses=1]
 	%cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind		; <<4 x float>> [#uses=1]
+	%tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+	%bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andps.i5 = and <4 x i32> %bitcast.i3, zeroinitializer		; <<4 x i32>> [#uses=1]
+
+	call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
+
+	%tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+
 	%bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32>		; <<4 x i32>> [#uses=2]
 	%andps.i14 = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %bitcast6.i13		; <<4 x i32>> [#uses=1]
 	%not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
 	%andnps.i17 = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %not.i16		; <<4 x i32>> [#uses=1]
 	%orps.i18 = or <4 x i32> %andnps.i17, %andps.i14		; <<4 x i32>> [#uses=1]
 	%bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float>		; <<4 x float>> [#uses=1]
-	%tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
-	%bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%andps.i5 = and <4 x i32> %bitcast.i3, zeroinitializer		; <<4 x i32>> [#uses=1]
+
 	%bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%not.i7 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
 	%andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7		; <<4 x i32>> [#uses=1]
-	call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
 	%orps.i9 = or <4 x i32> %andnps.i8, %andps.i5		; <<4 x i32>> [#uses=1]
 	%bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float>		; <<4 x float>> [#uses=1]
-	%tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+
 	%bitcast6.i = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=2]
 	%andps.i = and <4 x i32> zeroinitializer, %bitcast6.i		; <<4 x i32>> [#uses=1]
 	%bitcast11.i = bitcast <4 x float> %tmp84 to <4 x i32>		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/fp-elim.ll b/test/CodeGen/X86/fp-elim.ll
index 583388c..2c50bd1 100644
--- a/test/CodeGen/X86/fp-elim.ll
+++ b/test/CodeGen/X86/fp-elim.ll
@@ -4,7 +4,7 @@
 ; Implement -momit-leaf-frame-pointer
 ; rdar://7886181
 
-define i32 @t1() "no-frame-pointer-elim-non-leaf"="false" nounwind readnone {
+define i32 @t1() nounwind readnone {
 entry:
 ; FP-ELIM-LABEL:  t1:
 ; FP-ELIM-NEXT:     movl
@@ -17,7 +17,7 @@ entry:
   ret i32 10
 }
 
-define void @t2() "no-frame-pointer-elim-non-leaf"="false" nounwind {
+define void @t2() nounwind {
 entry:
 ; FP-ELIM-LABEL:  t2:
 ; FP-ELIM-NOT:      pushl %ebp
@@ -31,7 +31,7 @@ entry:
   ret void
 }
 
-define i32 @t3() "no-frame-pointer-elim-non-leaf"="true" nounwind readnone {
+define i32 @t3() "no-frame-pointer-elim-non-leaf" nounwind readnone {
 entry:
 ; FP-ELIM-LABEL:  t3:
 ; FP-ELIM-NEXT:     movl
@@ -44,7 +44,7 @@ entry:
   ret i32 10
 }
 
-define void @t4() "no-frame-pointer-elim-non-leaf"="true" nounwind {
+define void @t4() "no-frame-pointer-elim-non-leaf" nounwind {
 entry:
 ; FP-ELIM-LABEL:  t4:
 ; FP-ELIM-NEXT:     pushl %ebp
diff --git a/test/CodeGen/X86/fp-une-cmp.ll b/test/CodeGen/X86/fp-une-cmp.ll
new file mode 100644
index 0000000..7f772d1
--- /dev/null
+++ b/test/CodeGen/X86/fp-une-cmp.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=x86 -mattr=sse4.1 | FileCheck %s
+; <rdar://problem/7859988>
+
+; Make sure we don't generate more jumps than we need to. We used to generate
+; something like this:
+;
+;       jne  LBB0_1
+;       jnp  LBB0_2
+;   LBB0_1:
+;       jmp  LBB0_3
+;   LBB0_2:
+;       addsd ...
+;   LBB0_3:
+;
+; Now we generate this:
+;
+;       jne  LBB0_2
+;       jp   LBB0_2
+;       addsd ...
+;   LBB0_2:
+
+; CHECK:       func
+; CHECK:       jne [[LABEL:.*]]
+; CHECK-NEXT:  jp  [[LABEL]]
+; CHECK-NOT:   jmp
+
+define float @func(float %x, float %y) nounwind readnone optsize ssp {
+entry:
+  %0 = fpext float %x to double
+  %1 = fpext float %y to double
+  %2 = fmul double %0, %1
+  %3 = fcmp une double %2, 0.000000e+00
+  br i1 %3, label %bb2, label %bb1
+
+bb1:
+  %4 = fadd double %2, -1.000000e+00
+  br label %bb2
+
+bb2:
+  %.0.in = phi double [ %4, %bb1 ], [ %2, %entry ]
+  %.0 = fptrunc double %.0.in to float
+  ret float %.0
+}
diff --git a/test/CodeGen/X86/frame-base.ll b/test/CodeGen/X86/frame-base.ll
new file mode 100644
index 0000000..a6bd2a5
--- /dev/null
+++ b/test/CodeGen/X86/frame-base.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mtriple=x86_64-apple-macosx -o - %s | FileCheck %s
+
+; The issue here was a conflict between forming a %rip-relative lea and a
+; FrameIndex lea. The %rip sanity-checks didn't consider that a base register
+; had been set if we'd already matched a FrameIndex, when it has in reality.
+
+@var = global i32 0
+
+define void @test_frame_rip_conflict() {
+; CHECK-LABEL: test_frame_rip_conflict:
+; CHECK: leaq _var(%rip), [[TMPADDR:%r.*]]
+; CHECK: leaq {{-?[0-9]+}}(%rsp,[[TMPADDR]]),
+  %stackvar = alloca i32
+
+  %stackint = ptrtoint i32* %stackvar to i64
+  %addr = add i64 ptrtoint(i32* @var to i64), %stackint
+
+  call void @eat_i64(i64 %addr)
+  ret void
+}
+
+declare void @eat_i64(i64)
diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll
index 0729dda..cbcc62a 100644
--- a/test/CodeGen/X86/full-lsr.ll
+++ b/test/CodeGen/X86/full-lsr.ll
@@ -4,7 +4,7 @@
 define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
 ; ATOM: foo
 ; ATOM: addl
-; ATOM: leal
+; ATOM: addl
 ; ATOM: leal
 
 ; CHECK: foo
diff --git a/test/CodeGen/X86/gather-addresses.ll b/test/CodeGen/X86/gather-addresses.ll
index 72a5096..5f48b1e 100644
--- a/test/CodeGen/X86/gather-addresses.ll
+++ b/test/CodeGen/X86/gather-addresses.ll
@@ -1,21 +1,35 @@
-; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s
-; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s --check-prefix=LIN
+; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s --check-prefix=WIN
 ; rdar://7398554
 
 ; When doing vector gather-scatter index calculation with 32-bit indices,
 ; bounce the vector off of cache rather than shuffling each individual
 ; element out of the index vector.
 
-; CHECK: andps    ([[H:%rdx|%r8]]), %xmm0
-; CHECK: movaps   %xmm0, {{(-24)?}}(%rsp)
-; CHECK: movslq   {{(-24)?}}(%rsp), %rax
-; CHECK: movsd    ([[P:%rdi|%rcx]],%rax,8), %xmm0
-; CHECK: movslq   {{-20|4}}(%rsp), %rax
-; CHECK: movhpd   ([[P]],%rax,8), %xmm0
-; CHECK: movslq   {{-16|8}}(%rsp), %rax
-; CHECK: movsd    ([[P]],%rax,8), %xmm1
-; CHECK: movslq   {{-12|12}}(%rsp), %rax
-; CHECK: movhpd   ([[P]],%rax,8), %xmm1
+; CHECK: foo:
+; LIN: movaps	(%rsi), %xmm0
+; LIN: andps	(%rdx), %xmm0
+; LIN: movaps	%xmm0, -24(%rsp)
+; LIN: movslq	-24(%rsp), %[[REG1:r.+]]
+; LIN: movslq	-20(%rsp), %[[REG2:r.+]]
+; LIN: movslq	-16(%rsp), %[[REG3:r.+]]
+; LIN: movslq	-12(%rsp), %[[REG4:r.+]]
+; LIN: movsd	(%rdi,%[[REG1]],8), %xmm0
+; LIN: movhpd	(%rdi,%[[REG2]],8), %xmm0
+; LIN: movsd	(%rdi,%[[REG3]],8), %xmm1
+; LIN: movhpd	(%rdi,%[[REG4]],8), %xmm1
+
+; WIN: movaps	(%rdx), %xmm0
+; WIN: andps	(%r8), %xmm0
+; WIN: movaps	%xmm0, (%rsp)
+; WIN: movslq	(%rsp), %[[REG1:r.+]]
+; WIN: movslq	4(%rsp), %[[REG2:r.+]]
+; WIN: movslq	8(%rsp), %[[REG3:r.+]]
+; WIN: movslq	12(%rsp), %[[REG4:r.+]]
+; WIN: movsd	(%rcx,%[[REG1]],8), %xmm0
+; WIN: movhpd	(%rcx,%[[REG2]],8), %xmm0
+; WIN: movsd	(%rcx,%[[REG3]],8), %xmm1
+; WIN: movhpd	(%rcx,%[[REG4]],8), %xmm1
 
 define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
   %a = load <4 x i32>* %i
diff --git a/test/CodeGen/X86/ghc-cc.ll b/test/CodeGen/X86/ghc-cc.ll
index 0e65cfd..4dba2c0 100644
--- a/test/CodeGen/X86/ghc-cc.ll
+++ b/test/CodeGen/X86/ghc-cc.ll
@@ -28,10 +28,10 @@ entry:
 
 define cc 10 void @foo() nounwind {
 entry:
-  ; CHECK: movl base, %ebx
-  ; CHECK-NEXT: movl sp, %ebp
+  ; CHECK:      movl r1, %esi
   ; CHECK-NEXT: movl hp, %edi
-  ; CHECK-NEXT: movl r1, %esi
+  ; CHECK-NEXT: movl sp, %ebp
+  ; CHECK-NEXT: movl base, %ebx
   %0 = load i32* @r1
   %1 = load i32* @hp
   %2 = load i32* @sp
@@ -42,4 +42,3 @@ entry:
 }
 
 declare cc 10 void @bar(i32, i32, i32, i32)
-
diff --git a/test/CodeGen/X86/ghc-cc64.ll b/test/CodeGen/X86/ghc-cc64.ll
index fcf7e17..403391e 100644
--- a/test/CodeGen/X86/ghc-cc64.ll
+++ b/test/CodeGen/X86/ghc-cc64.ll
@@ -41,22 +41,22 @@ entry:
 
 define cc 10 void @foo() nounwind {
 entry:
-  ; CHECK: movq base(%rip), %r13
-  ; CHECK-NEXT: movq sp(%rip), %rbp
-  ; CHECK-NEXT: movq hp(%rip), %r12
-  ; CHECK-NEXT: movq r1(%rip), %rbx
-  ; CHECK-NEXT: movq r2(%rip), %r14
-  ; CHECK-NEXT: movq r3(%rip), %rsi
-  ; CHECK-NEXT: movq r4(%rip), %rdi
-  ; CHECK-NEXT: movq r5(%rip), %r8
-  ; CHECK-NEXT: movq r6(%rip), %r9
-  ; CHECK-NEXT: movq splim(%rip), %r15
-  ; CHECK-NEXT: movss f1(%rip), %xmm1
-  ; CHECK-NEXT: movss f2(%rip), %xmm2
-  ; CHECK-NEXT: movss f3(%rip), %xmm3
-  ; CHECK-NEXT: movss f4(%rip), %xmm4
+  ; CHECK:      movsd d2(%rip), %xmm6
   ; CHECK-NEXT: movsd d1(%rip), %xmm5
-  ; CHECK-NEXT: movsd d2(%rip), %xmm6
+  ; CHECK-NEXT: movss f4(%rip), %xmm4
+  ; CHECK-NEXT: movss f3(%rip), %xmm3
+  ; CHECK-NEXT: movss f2(%rip), %xmm2
+  ; CHECK-NEXT: movss f1(%rip), %xmm1
+  ; CHECK-NEXT: movq splim(%rip), %r15
+  ; CHECK-NEXT: movq r6(%rip), %r9
+  ; CHECK-NEXT: movq r5(%rip), %r8
+  ; CHECK-NEXT: movq r4(%rip), %rdi
+  ; CHECK-NEXT: movq r3(%rip), %rsi
+  ; CHECK-NEXT: movq r2(%rip), %r14
+  ; CHECK-NEXT: movq r1(%rip), %rbx
+  ; CHECK-NEXT: movq hp(%rip), %r12
+  ; CHECK-NEXT: movq sp(%rip), %rbp
+  ; CHECK-NEXT: movq base(%rip), %r13
   %0 = load double* @d2
   %1 = load double* @d1
   %2 = load float* @f4
@@ -83,4 +83,3 @@ entry:
 
 declare cc 10 void @bar(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64,
                         float, float, float, float, double, double)
-
diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll
index 194f597..d8743ac 100644
--- a/test/CodeGen/X86/global-sections.ll
+++ b/test/CodeGen/X86/global-sections.ll
@@ -65,10 +65,10 @@
 ; PR4584
 @"foo bar" = linkonce global i32 42
 
-; LINUX: .type	foo_20_bar,@object
-; LINUX: .section .data.foo_20_bar,"aGw",@progbits,foo_20_bar,comdat
-; LINUX: .weak	foo_20_bar
-; LINUX: foo_20_bar:
+; LINUX: .type	"foo bar",@object
+; LINUX: .section ".data.foo bar","aGw",@progbits,"foo bar",comdat
+; LINUX: .weak	"foo bar"
+; LINUX: "foo bar":
 
 ; DARWIN: .section		__DATA,__datacoal_nt,coalesced
 ; DARWIN: .globl	"_foo bar"
diff --git a/test/CodeGen/X86/h-register-addressing-32.ll b/test/CodeGen/X86/h-register-addressing-32.ll
index 968a9e8..68e8c60 100644
--- a/test/CodeGen/X86/h-register-addressing-32.ll
+++ b/test/CodeGen/X86/h-register-addressing-32.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep "movzbl	%[abcd]h," | count 7
+; RUN: llc < %s -march=x86 -mattr=-bmi | FileCheck %s
 
 ; Use h-register extract and zero-extend.
 
@@ -9,6 +9,9 @@ define double @foo8(double* nocapture inreg %p, i32 inreg %x) nounwind readonly
   %t3 = load double* %t2, align 8
   ret double %t3
 }
+; CHECK: foo8:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define float @foo4(float* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t0 = lshr i32 %x, 8
   %t1 = and i32 %t0, 255
@@ -16,6 +19,9 @@ define float @foo4(float* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t3 = load float* %t2, align 8
   ret float %t3
 }
+; CHECK: foo4:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define i16 @foo2(i16* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t0 = lshr i32 %x, 8
   %t1 = and i32 %t0, 255
@@ -23,6 +29,9 @@ define i16 @foo2(i16* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t3 = load i16* %t2, align 8
   ret i16 %t3
 }
+; CHECK: foo2:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define i8 @foo1(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t0 = lshr i32 %x, 8
   %t1 = and i32 %t0, 255
@@ -30,6 +39,9 @@ define i8 @foo1(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t3 = load i8* %t2, align 8
   ret i8 %t3
 }
+; CHECK: foo1:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define i8 @bar8(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t0 = lshr i32 %x, 5
   %t1 = and i32 %t0, 2040
@@ -37,6 +49,9 @@ define i8 @bar8(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t3 = load i8* %t2, align 8
   ret i8 %t3
 }
+; CHECK: bar8:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define i8 @bar4(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t0 = lshr i32 %x, 6
   %t1 = and i32 %t0, 1020
@@ -44,6 +59,9 @@ define i8 @bar4(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t3 = load i8* %t2, align 8
   ret i8 %t3
 }
+; CHECK: bar4:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define i8 @bar2(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t0 = lshr i32 %x, 7
   %t1 = and i32 %t0, 510
@@ -51,3 +69,6 @@ define i8 @bar2(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t3 = load i8* %t2, align 8
   ret i8 %t3
 }
+; CHECK: bar2:
+; CHECK: movzbl %{{[abcd]}}h, %e
+; CHECK: ret
diff --git a/test/CodeGen/X86/h-register-addressing-64.ll b/test/CodeGen/X86/h-register-addressing-64.ll
index a19fca5..3f549d2 100644
--- a/test/CodeGen/X86/h-register-addressing-64.ll
+++ b/test/CodeGen/X86/h-register-addressing-64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep "movzbl	%[abcd]h," | count 7
+; RUN: llc < %s -march=x86-64 -mattr=-bmi | FileCheck %s
 
 ; Use h-register extract and zero-extend.
 
@@ -9,6 +9,9 @@ define double @foo8(double* nocapture inreg %p, i64 inreg %x) nounwind readonly
   %t3 = load double* %t2, align 8
   ret double %t3
 }
+; CHECK: foo8:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define float @foo4(float* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t0 = lshr i64 %x, 8
   %t1 = and i64 %t0, 255
@@ -16,6 +19,9 @@ define float @foo4(float* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t3 = load float* %t2, align 8
   ret float %t3
 }
+; CHECK: foo4:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define i16 @foo2(i16* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t0 = lshr i64 %x, 8
   %t1 = and i64 %t0, 255
@@ -23,6 +29,9 @@ define i16 @foo2(i16* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t3 = load i16* %t2, align 8
   ret i16 %t3
 }
+; CHECK: foo2:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define i8 @foo1(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t0 = lshr i64 %x, 8
   %t1 = and i64 %t0, 255
@@ -30,6 +39,9 @@ define i8 @foo1(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t3 = load i8* %t2, align 8
   ret i8 %t3
 }
+; CHECK: foo1:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define i8 @bar8(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t0 = lshr i64 %x, 5
   %t1 = and i64 %t0, 2040
@@ -37,6 +49,9 @@ define i8 @bar8(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t3 = load i8* %t2, align 8
   ret i8 %t3
 }
+; CHECK: bar8:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define i8 @bar4(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t0 = lshr i64 %x, 6
   %t1 = and i64 %t0, 1020
@@ -44,6 +59,9 @@ define i8 @bar4(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t3 = load i8* %t2, align 8
   ret i8 %t3
 }
+; CHECK: bar4:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define i8 @bar2(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t0 = lshr i64 %x, 7
   %t1 = and i64 %t0, 510
@@ -51,3 +69,6 @@ define i8 @bar2(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t3 = load i8* %t2, align 8
   ret i8 %t3
 }
+; CHECK: bar2:
+; CHECK: movzbl %{{[abcd]}}h, %e
+; CHECK: ret
diff --git a/test/CodeGen/X86/h-registers-0.ll b/test/CodeGen/X86/h-registers-0.ll
index 71b3b43..6a5ccaa 100644
--- a/test/CodeGen/X86/h-registers-0.ll
+++ b/test/CodeGen/X86/h-registers-0.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X86-64
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
-; RUN: llc < %s -march=x86    | FileCheck %s -check-prefix=X86-32
+; RUN: llc < %s -mattr=-bmi -mtriple=x86_64-linux | FileCheck %s -check-prefix=X86-64
+; RUN: llc < %s -mattr=-bmi -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -mattr=-bmi -march=x86    | FileCheck %s -check-prefix=X86-32
 
 ; Use h registers. On x86-64, codegen doesn't support general allocation
 ; of h registers yet, due to x86 encoding complications.
diff --git a/test/CodeGen/X86/h-registers-1.ll b/test/CodeGen/X86/h-registers-1.ll
index 903c453..7254325 100644
--- a/test/CodeGen/X86/h-registers-1.ll
+++ b/test/CodeGen/X86/h-registers-1.ll
@@ -1,12 +1,21 @@
-; RUN: llc < %s -mtriple=x86_64-linux > %t
-; RUN: grep "movzbl	%[abcd]h," %t | count 8
-; RUN: grep "%[abcd]h" %t | not grep "%r[[:digit:]]*d"
+; RUN: llc -mattr=-bmi < %s -mtriple=x86_64-linux | FileCheck %s
 
 ; LLVM creates virtual registers for values live across blocks
 ; based on the type of the value. Make sure that the extracts
 ; here use the GR64_NOREX register class for their result,
 ; instead of plain GR64.
 
+; CHECK: foo:
+; CHECK: movzbl %{{[abcd]}}h, %e
+; CHECK: movzbl %{{[abcd]}}h, %e
+; CHECK: movzbl %{{[abcd]}}h, %e
+; CHECK: movzbl %{{[abcd]}}h, %e
+; CHECK: movzbl %{{[abcd]}}h, %e
+; CHECK: movzbl %{{[abcd]}}h, %e
+; CHECK: movzbl %{{[abcd]}}h, %e
+; CHECK: movzbl %{{[abcd]}}h, %e
+; CHECK: ret
+
 define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d,
                 i64 %e, i64 %f, i64 %g, i64 %h) {
   %sa = lshr i64 %a, 8
diff --git a/test/CodeGen/X86/hipe-cc.ll b/test/CodeGen/X86/hipe-cc.ll
index 76d17a0..b34417e 100644
--- a/test/CodeGen/X86/hipe-cc.ll
+++ b/test/CodeGen/X86/hipe-cc.ll
@@ -49,10 +49,10 @@ entry:
   store i32 %arg1, i32* %arg1_var
   store i32 %arg2, i32* %arg2_var
 
-  ; CHECK:      movl   4(%esp), %edx
-  ; CHECK-NEXT: movl   8(%esp), %eax
+  ; CHECK:      movl  16(%esp), %esi
   ; CHECK-NEXT: movl  12(%esp), %ebp
-  ; CHECK-NEXT: movl  16(%esp), %esi
+  ; CHECK-NEXT: movl   8(%esp), %eax
+  ; CHECK-NEXT: movl   4(%esp), %edx
   %0 = load i32* %hp_var
   %1 = load i32* %p_var
   %2 = load i32* %arg0_var
diff --git a/test/CodeGen/X86/hipe-cc64.ll b/test/CodeGen/X86/hipe-cc64.ll
index 5dbb5a2..27e1c72 100644
--- a/test/CodeGen/X86/hipe-cc64.ll
+++ b/test/CodeGen/X86/hipe-cc64.ll
@@ -5,10 +5,10 @@
 define void @zap(i64 %a, i64 %b) nounwind {
 entry:
   ; CHECK:      movq %rsi, %rax
-  ; CHECK-NEXT: movq %rdi, %rsi
-  ; CHECK-NEXT: movq %rax, %rdx
   ; CHECK-NEXT: movl $8, %ecx
   ; CHECK-NEXT: movl $9, %r8d
+  ; CHECK-NEXT: movq %rdi, %rsi
+  ; CHECK-NEXT: movq %rax, %rdx
   ; CHECK-NEXT: callq addfour
   %0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9)
   %res = extractvalue {i64, i64, i64} %0, 2
@@ -57,11 +57,11 @@ entry:
   store i64 %arg2, i64* %arg2_var
   store i64 %arg3, i64* %arg3_var
 
-  ; CHECK:      movq  8(%rsp), %rcx
-  ; CHECK-NEXT: movq  16(%rsp), %rdx
-  ; CHECK-NEXT: movq  24(%rsp), %rsi
+  ; CHECK:      movq  40(%rsp), %r15
   ; CHECK-NEXT: movq  32(%rsp), %rbp
-  ; CHECK-NEXT: movq  40(%rsp), %r15
+  ; CHECK-NEXT: movq  24(%rsp), %rsi
+  ; CHECK-NEXT: movq  16(%rsp), %rdx
+  ; CHECK-NEXT: movq  8(%rsp), %rcx
   %0 = load i64* %hp_var
   %1 = load i64* %p_var
   %2 = load i64* %arg0_var
diff --git a/test/CodeGen/X86/hoist-common.ll b/test/CodeGen/X86/hoist-common.ll
index 6b26876..01d1b8c 100644
--- a/test/CodeGen/X86/hoist-common.ll
+++ b/test/CodeGen/X86/hoist-common.ll
@@ -1,4 +1,14 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-macosx  | FileCheck %s
+; This is supposed to be testing BranchFolding's common
+; code hoisting logic, but has been erroneously passing due
+; to there being a redundant xorl in the entry block
+; and no common code to hoist.
+; However, now that MachineSink sinks the redundant xor
+; hoist-common looks at it and rejects it for hoisting,
+; which causes this test to fail.
+; Since it seems this test is broken, marking XFAIL for now
+; until someone decides to remove it or fix what it tests.
+; XFAIL: *
 
 ; Common "xorb al, al" instruction in the two successor blocks should be
 ; moved to the entry block above the test + je.
diff --git a/test/CodeGen/X86/i128-mul.ll b/test/CodeGen/X86/i128-mul.ll
index c0b85df..8cfda85 100644
--- a/test/CodeGen/X86/i128-mul.ll
+++ b/test/CodeGen/X86/i128-mul.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
 ; PR1198
 
 define i64 @foo(i64 %x, i64 %y) {
diff --git a/test/CodeGen/X86/i486-fence-loop.ll b/test/CodeGen/X86/i486-fence-loop.ll
new file mode 100644
index 0000000..d809619
--- /dev/null
+++ b/test/CodeGen/X86/i486-fence-loop.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=x86 -mcpu=i486 -o - %s | FileCheck %s
+
+; Main test here was that ISelDAG could cope with a MachineNode in the chain
+; from the first load to the "X86ISD::SUB". Previously it thought that meant no
+; cycle could be formed so it tried to use "sub (%eax), [[RHS]]".
+
+define void @gst_atomic_queue_push(i32* %addr) {
+; CHECK-LABEL: gst_atomic_queue_push:
+; CHECK: movl (%eax), [[LHS:%e[a-z]+]]
+; CHECK: lock
+; CHECK-NEXT: orl
+; CHECK: movl (%eax), [[RHS:%e[a-z]+]]
+; CHECK: cmpl [[LHS]], [[RHS]]
+
+entry:
+  br label %while.body
+
+while.body:
+  %0 = load volatile i32* %addr, align 4
+  fence seq_cst
+  %1 = load volatile i32* %addr, align 4
+  %cmp = icmp sgt i32 %1, %0
+  br i1 %cmp, label %while.body, label %if.then
+
+if.then:
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/X86/ident-metadata.ll b/test/CodeGen/X86/ident-metadata.ll
new file mode 100644
index 0000000..a568673
--- /dev/null
+++ b/test/CodeGen/X86/ident-metadata.ll
@@ -0,0 +1,9 @@
+; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
+; Verify that llvm.ident metadata is emitted as .ident
+; directives in assembly files, and in the .comment section in ELF object files.
+
+; CHECK: .ident  "clang version x.x"
+; CHECK-NEXT: .ident  "something else"
+!llvm.ident = !{!0, !1}
+!0 = metadata !{metadata !"clang version x.x"}
+!1 = metadata !{metadata !"something else"}
diff --git a/test/CodeGen/X86/inline-asm-error.ll b/test/CodeGen/X86/inline-asm-error.ll
index 747a589..31fb190 100644
--- a/test/CodeGen/X86/inline-asm-error.ll
+++ b/test/CodeGen/X86/inline-asm-error.ll
@@ -6,7 +6,7 @@
 ; RUN: FileCheck %s < %t3
 
 ; The register allocator must fail on this function.
-; CHECK: error: ran out of registers during register allocation
+; CHECK: error: inline assembly requires more registers than available
 
 define void @f(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5, i32 %x6, i32 %x7, i32 %x8, i32 %x9) nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/inline-asm-flag-clobber.ll b/test/CodeGen/X86/inline-asm-flag-clobber.ll
index 51ea843..45f4d2f 100644
--- a/test/CodeGen/X86/inline-asm-flag-clobber.ll
+++ b/test/CodeGen/X86/inline-asm-flag-clobber.ll
@@ -2,18 +2,31 @@
 ; PR3701
 
 define i64 @t(i64* %arg) nounwind {
-	br i1 true, label %1, label %5
+        br i1 true, label %1, label %5
 
-; <label>:1		; preds = %0
-	%2 = icmp eq i64* null, %arg		; <i1> [#uses=1]
-	%3 = tail call i64* asm sideeffect "movl %fs:0,$0", "=r,~{dirflag},~{fpsr},~{flags}"() nounwind		; <%struct.thread*> [#uses=0]
+; <label>:1             ; preds = %0
+        %2 = icmp eq i64* null, %arg            ; <i1> [#uses=1]
+        %3 = tail call i64* asm sideeffect "movl %fs:0,$0", "=r,~{dirflag},~{fpsr},~{flags}"() nounwind         ; <%struct.thread*> [#uses=0]
 ; CHECK: test
 ; CHECK-NEXT: j
-	br i1 %2, label %4, label %5
+        br i1 %2, label %4, label %5
 
-; <label>:4		; preds = %1
-	ret i64 1
+; <label>:4             ; preds = %1
+        ret i64 1
 
-; <label>:5		; preds = %1
-	ret i64 0
+; <label>:5             ; preds = %1
+        ret i64 0
 }
+
+; Make sure that we translate this to the bswap intrinsic which lowers down without the
+; inline assembly.
+; CHECK-NOT: #APP
+define i32 @s(i32 %argc, i8** nocapture %argv) unnamed_addr nounwind {
+entry:
+  %0 = trunc i32 %argc to i16
+  %asmtmp = tail call i16 asm "rorw $$8, ${0:w}", "=r,0,~{fpsr},~{flags},~{cc}"(i16 %0) nounwind, !srcloc !0
+  %1 = zext i16 %asmtmp to i32
+  ret i32 %1
+}
+
+!0 = metadata !{i64 935930}
diff --git a/test/CodeGen/X86/ins_subreg_coalesce-1.ll b/test/CodeGen/X86/ins_subreg_coalesce-1.ll
index bec98a2..a74e3f2 100644
--- a/test/CodeGen/X86/ins_subreg_coalesce-1.ll
+++ b/test/CodeGen/X86/ins_subreg_coalesce-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=-bmi | FileCheck %s
 
 define fastcc i32 @t() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/isel-optnone.ll b/test/CodeGen/X86/isel-optnone.ll
new file mode 100644
index 0000000..d2f0628
--- /dev/null
+++ b/test/CodeGen/X86/isel-optnone.ll
@@ -0,0 +1,42 @@
+; RUN: llc -O2 -march=x86 < %s | FileCheck %s
+
+define i32* @fooOptnone(i32* %p, i32* %q, i32** %z) #0 {
+entry:
+  %r = load i32* %p
+  %s = load i32* %q
+  %y = load i32** %z
+
+  %t0 = add i32 %r, %s
+  %t1 = add i32 %t0, 1
+  %t2 = getelementptr i32* %y, i32 1
+  %t3 = getelementptr i32* %t2, i32 %t1
+
+  ret i32* %t3
+
+; 'optnone' should use fast-isel which will not produce 'lea'.
+; CHECK-LABEL: fooOptnone:
+; CHECK-NOT:   lea
+; CHECK:       ret
+}
+
+define i32* @fooNormal(i32* %p, i32* %q, i32** %z) #1 {
+entry:
+  %r = load i32* %p
+  %s = load i32* %q
+  %y = load i32** %z
+
+  %t0 = add i32 %r, %s
+  %t1 = add i32 %t0, 1
+  %t2 = getelementptr i32* %y, i32 1
+  %t3 = getelementptr i32* %t2, i32 %t1
+
+  ret i32* %t3
+
+; Normal ISel will produce 'lea'.
+; CHECK-LABEL: fooNormal:
+; CHECK:       lea
+; CHECK:       ret
+}
+
+attributes #0 = { nounwind optnone noinline }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/X86/large-gep-chain.ll b/test/CodeGen/X86/large-gep-chain.ll
new file mode 100644
index 0000000..5cf4661
--- /dev/null
+++ b/test/CodeGen/X86/large-gep-chain.ll
@@ -0,0 +1,25607 @@
+; RUN: llc < %s -O0 -march x86 -o /dev/null
+; <rdar://problem/12445434>
+
+%0 = type { i32, float* }
+
+@0 = external unnamed_addr constant [27 x i8], align 1
+@1 = external unnamed_addr constant [26 x i8], align 1
+@2 = external unnamed_addr constant [18 x i8], align 1
+@3 = external unnamed_addr constant [15 x i8], align 1
+@4 = external unnamed_addr constant [20 x i8], align 1
+@5 = external unnamed_addr constant [21 x i8], align 1
+@6 = external unnamed_addr constant [12 x i8], align 1
+@7 = external unnamed_addr constant [27 x i8], align 1
+@8 = external unnamed_addr constant [63 x i8], align 1
+
+define void @main() uwtable ssp {
+bb:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  br label %bb25362
+
+bb2:                                              ; preds = %bb
+  %tmp = getelementptr inbounds float* null, i64 1
+  %tmp3 = getelementptr inbounds float* %tmp, i64 1
+  %tmp4 = getelementptr inbounds float* %tmp3, i64 1
+  %tmp5 = getelementptr inbounds float* %tmp4, i64 1
+  %tmp6 = getelementptr inbounds float* %tmp5, i64 1
+  %tmp7 = getelementptr inbounds float* %tmp6, i64 1
+  %tmp8 = getelementptr inbounds float* %tmp7, i64 1
+  %tmp9 = getelementptr inbounds float* %tmp8, i64 1
+  %tmp10 = getelementptr inbounds float* %tmp9, i64 1
+  %tmp11 = getelementptr inbounds float* %tmp10, i64 1
+  %tmp12 = getelementptr inbounds float* %tmp11, i64 1
+  %tmp13 = getelementptr inbounds float* %tmp12, i64 1
+  %tmp14 = getelementptr inbounds float* %tmp13, i64 1
+  %tmp15 = getelementptr inbounds float* %tmp14, i64 1
+  %tmp16 = getelementptr inbounds float* %tmp15, i64 1
+  %tmp17 = getelementptr inbounds float* %tmp16, i64 1
+  %tmp18 = getelementptr inbounds float* %tmp17, i64 1
+  %tmp19 = getelementptr inbounds float* %tmp18, i64 1
+  %tmp20 = getelementptr inbounds float* %tmp19, i64 1
+  %tmp21 = getelementptr inbounds float* %tmp20, i64 1
+  %tmp22 = getelementptr inbounds float* %tmp21, i64 1
+  %tmp23 = getelementptr inbounds float* %tmp22, i64 1
+  %tmp24 = getelementptr inbounds float* %tmp23, i64 1
+  %tmp25 = getelementptr inbounds float* %tmp24, i64 1
+  %tmp26 = getelementptr inbounds float* %tmp25, i64 1
+  %tmp27 = getelementptr inbounds float* %tmp26, i64 1
+  %tmp28 = getelementptr inbounds float* %tmp27, i64 1
+  %tmp29 = getelementptr inbounds float* %tmp28, i64 1
+  %tmp30 = getelementptr inbounds float* %tmp29, i64 1
+  %tmp31 = getelementptr inbounds float* %tmp30, i64 1
+  %tmp32 = getelementptr inbounds float* %tmp31, i64 1
+  %tmp33 = getelementptr inbounds float* %tmp32, i64 1
+  %tmp34 = getelementptr inbounds float* %tmp33, i64 1
+  %tmp35 = getelementptr inbounds float* %tmp34, i64 1
+  %tmp36 = getelementptr inbounds float* %tmp35, i64 1
+  %tmp37 = getelementptr inbounds float* %tmp36, i64 1
+  %tmp38 = getelementptr inbounds float* %tmp37, i64 1
+  %tmp39 = getelementptr inbounds float* %tmp38, i64 1
+  %tmp40 = getelementptr inbounds float* %tmp39, i64 1
+  %tmp41 = getelementptr inbounds float* %tmp40, i64 1
+  %tmp42 = getelementptr inbounds float* %tmp41, i64 1
+  %tmp43 = getelementptr inbounds float* %tmp42, i64 1
+  %tmp44 = getelementptr inbounds float* %tmp43, i64 1
+  %tmp45 = getelementptr inbounds float* %tmp44, i64 1
+  %tmp46 = getelementptr inbounds float* %tmp45, i64 1
+  %tmp47 = getelementptr inbounds float* %tmp46, i64 1
+  %tmp48 = getelementptr inbounds float* %tmp47, i64 1
+  %tmp49 = getelementptr inbounds float* %tmp48, i64 1
+  %tmp50 = getelementptr inbounds float* %tmp49, i64 1
+  %tmp51 = getelementptr inbounds float* %tmp50, i64 1
+  %tmp52 = getelementptr inbounds float* %tmp51, i64 1
+  %tmp53 = getelementptr inbounds float* %tmp52, i64 1
+  %tmp54 = getelementptr inbounds float* %tmp53, i64 1
+  %tmp55 = getelementptr inbounds float* %tmp54, i64 1
+  %tmp56 = getelementptr inbounds float* %tmp55, i64 1
+  %tmp57 = getelementptr inbounds float* %tmp56, i64 1
+  %tmp58 = getelementptr inbounds float* %tmp57, i64 1
+  %tmp59 = getelementptr inbounds float* %tmp58, i64 1
+  %tmp60 = getelementptr inbounds float* %tmp59, i64 1
+  %tmp61 = getelementptr inbounds float* %tmp60, i64 1
+  %tmp62 = getelementptr inbounds float* %tmp61, i64 1
+  %tmp63 = getelementptr inbounds float* %tmp62, i64 1
+  %tmp64 = getelementptr inbounds float* %tmp63, i64 1
+  %tmp65 = getelementptr inbounds float* %tmp64, i64 1
+  %tmp66 = getelementptr inbounds float* %tmp65, i64 1
+  %tmp67 = getelementptr inbounds float* %tmp66, i64 1
+  %tmp68 = getelementptr inbounds float* %tmp67, i64 1
+  %tmp69 = getelementptr inbounds float* %tmp68, i64 1
+  %tmp70 = getelementptr inbounds float* %tmp69, i64 1
+  %tmp71 = getelementptr inbounds float* %tmp70, i64 1
+  %tmp72 = getelementptr inbounds float* %tmp71, i64 1
+  %tmp73 = getelementptr inbounds float* %tmp72, i64 1
+  %tmp74 = getelementptr inbounds float* %tmp73, i64 1
+  %tmp75 = getelementptr inbounds float* %tmp74, i64 1
+  %tmp76 = getelementptr inbounds float* %tmp75, i64 1
+  %tmp77 = getelementptr inbounds float* %tmp76, i64 1
+  %tmp78 = getelementptr inbounds float* %tmp77, i64 1
+  %tmp79 = getelementptr inbounds float* %tmp78, i64 1
+  %tmp80 = getelementptr inbounds float* %tmp79, i64 1
+  %tmp81 = getelementptr inbounds float* %tmp80, i64 1
+  %tmp82 = getelementptr inbounds float* %tmp81, i64 1
+  %tmp83 = getelementptr inbounds float* %tmp82, i64 1
+  %tmp84 = getelementptr inbounds float* %tmp83, i64 1
+  %tmp85 = getelementptr inbounds float* %tmp84, i64 1
+  %tmp86 = getelementptr inbounds float* %tmp85, i64 1
+  %tmp87 = getelementptr inbounds float* %tmp86, i64 1
+  %tmp88 = getelementptr inbounds float* %tmp87, i64 1
+  %tmp89 = getelementptr inbounds float* %tmp88, i64 1
+  %tmp90 = getelementptr inbounds float* %tmp89, i64 1
+  %tmp91 = getelementptr inbounds float* %tmp90, i64 1
+  %tmp92 = getelementptr inbounds float* %tmp91, i64 1
+  %tmp93 = getelementptr inbounds float* %tmp92, i64 1
+  %tmp94 = getelementptr inbounds float* %tmp93, i64 1
+  %tmp95 = getelementptr inbounds float* %tmp94, i64 1
+  %tmp96 = getelementptr inbounds float* %tmp95, i64 1
+  %tmp97 = getelementptr inbounds float* %tmp96, i64 1
+  %tmp98 = getelementptr inbounds float* %tmp97, i64 1
+  %tmp99 = getelementptr inbounds float* %tmp98, i64 1
+  %tmp100 = getelementptr inbounds float* %tmp99, i64 1
+  %tmp101 = getelementptr inbounds float* %tmp100, i64 1
+  %tmp102 = getelementptr inbounds float* %tmp101, i64 1
+  %tmp103 = getelementptr inbounds float* %tmp102, i64 1
+  %tmp104 = getelementptr inbounds float* %tmp103, i64 1
+  %tmp105 = getelementptr inbounds float* %tmp104, i64 1
+  %tmp106 = getelementptr inbounds float* %tmp105, i64 1
+  %tmp107 = getelementptr inbounds float* %tmp106, i64 1
+  %tmp108 = getelementptr inbounds float* %tmp107, i64 1
+  %tmp109 = getelementptr inbounds float* %tmp108, i64 1
+  %tmp110 = getelementptr inbounds float* %tmp109, i64 1
+  %tmp111 = getelementptr inbounds float* %tmp110, i64 1
+  %tmp112 = getelementptr inbounds float* %tmp111, i64 1
+  %tmp113 = getelementptr inbounds float* %tmp112, i64 1
+  %tmp114 = getelementptr inbounds float* %tmp113, i64 1
+  %tmp115 = getelementptr inbounds float* %tmp114, i64 1
+  %tmp116 = getelementptr inbounds float* %tmp115, i64 1
+  %tmp117 = getelementptr inbounds float* %tmp116, i64 1
+  %tmp118 = getelementptr inbounds float* %tmp117, i64 1
+  %tmp119 = getelementptr inbounds float* %tmp118, i64 1
+  %tmp120 = getelementptr inbounds float* %tmp119, i64 1
+  %tmp121 = getelementptr inbounds float* %tmp120, i64 1
+  %tmp122 = getelementptr inbounds float* %tmp121, i64 1
+  %tmp123 = getelementptr inbounds float* %tmp122, i64 1
+  %tmp124 = getelementptr inbounds float* %tmp123, i64 1
+  %tmp125 = getelementptr inbounds float* %tmp124, i64 1
+  %tmp126 = getelementptr inbounds float* %tmp125, i64 1
+  %tmp127 = getelementptr inbounds float* %tmp126, i64 1
+  %tmp128 = getelementptr inbounds float* %tmp127, i64 1
+  %tmp129 = getelementptr inbounds float* %tmp128, i64 1
+  %tmp130 = getelementptr inbounds float* %tmp129, i64 1
+  %tmp131 = getelementptr inbounds float* %tmp130, i64 1
+  %tmp132 = getelementptr inbounds float* %tmp131, i64 1
+  %tmp133 = getelementptr inbounds float* %tmp132, i64 1
+  %tmp134 = getelementptr inbounds float* %tmp133, i64 1
+  %tmp135 = getelementptr inbounds float* %tmp134, i64 1
+  %tmp136 = getelementptr inbounds float* %tmp135, i64 1
+  %tmp137 = getelementptr inbounds float* %tmp136, i64 1
+  %tmp138 = getelementptr inbounds float* %tmp137, i64 1
+  %tmp139 = getelementptr inbounds float* %tmp138, i64 1
+  %tmp140 = getelementptr inbounds float* %tmp139, i64 1
+  %tmp141 = getelementptr inbounds float* %tmp140, i64 1
+  %tmp142 = getelementptr inbounds float* %tmp141, i64 1
+  %tmp143 = getelementptr inbounds float* %tmp142, i64 1
+  %tmp144 = getelementptr inbounds float* %tmp143, i64 1
+  %tmp145 = getelementptr inbounds float* %tmp144, i64 1
+  %tmp146 = getelementptr inbounds float* %tmp145, i64 1
+  %tmp147 = getelementptr inbounds float* %tmp146, i64 1
+  %tmp148 = getelementptr inbounds float* %tmp147, i64 1
+  %tmp149 = getelementptr inbounds float* %tmp148, i64 1
+  %tmp150 = getelementptr inbounds float* %tmp149, i64 1
+  %tmp151 = getelementptr inbounds float* %tmp150, i64 1
+  %tmp152 = getelementptr inbounds float* %tmp151, i64 1
+  %tmp153 = getelementptr inbounds float* %tmp152, i64 1
+  %tmp154 = getelementptr inbounds float* %tmp153, i64 1
+  %tmp155 = getelementptr inbounds float* %tmp154, i64 1
+  %tmp156 = getelementptr inbounds float* %tmp155, i64 1
+  %tmp157 = getelementptr inbounds float* %tmp156, i64 1
+  %tmp158 = getelementptr inbounds float* %tmp157, i64 1
+  %tmp159 = getelementptr inbounds float* %tmp158, i64 1
+  %tmp160 = getelementptr inbounds float* %tmp159, i64 1
+  %tmp161 = getelementptr inbounds float* %tmp160, i64 1
+  %tmp162 = getelementptr inbounds float* %tmp161, i64 1
+  %tmp163 = getelementptr inbounds float* %tmp162, i64 1
+  %tmp164 = getelementptr inbounds float* %tmp163, i64 1
+  %tmp165 = getelementptr inbounds float* %tmp164, i64 1
+  %tmp166 = getelementptr inbounds float* %tmp165, i64 1
+  %tmp167 = getelementptr inbounds float* %tmp166, i64 1
+  %tmp168 = getelementptr inbounds float* %tmp167, i64 1
+  %tmp169 = getelementptr inbounds float* %tmp168, i64 1
+  %tmp170 = getelementptr inbounds float* %tmp169, i64 1
+  %tmp171 = getelementptr inbounds float* %tmp170, i64 1
+  %tmp172 = getelementptr inbounds float* %tmp171, i64 1
+  %tmp173 = getelementptr inbounds float* %tmp172, i64 1
+  %tmp174 = getelementptr inbounds float* %tmp173, i64 1
+  %tmp175 = getelementptr inbounds float* %tmp174, i64 1
+  %tmp176 = getelementptr inbounds float* %tmp175, i64 1
+  %tmp177 = getelementptr inbounds float* %tmp176, i64 1
+  %tmp178 = getelementptr inbounds float* %tmp177, i64 1
+  %tmp179 = getelementptr inbounds float* %tmp178, i64 1
+  %tmp180 = getelementptr inbounds float* %tmp179, i64 1
+  %tmp181 = getelementptr inbounds float* %tmp180, i64 1
+  %tmp182 = getelementptr inbounds float* %tmp181, i64 1
+  %tmp183 = getelementptr inbounds float* %tmp182, i64 1
+  %tmp184 = getelementptr inbounds float* %tmp183, i64 1
+  %tmp185 = getelementptr inbounds float* %tmp184, i64 1
+  %tmp186 = getelementptr inbounds float* %tmp185, i64 1
+  %tmp187 = getelementptr inbounds float* %tmp186, i64 1
+  %tmp188 = getelementptr inbounds float* %tmp187, i64 1
+  %tmp189 = getelementptr inbounds float* %tmp188, i64 1
+  %tmp190 = getelementptr inbounds float* %tmp189, i64 1
+  %tmp191 = getelementptr inbounds float* %tmp190, i64 1
+  %tmp192 = getelementptr inbounds float* %tmp191, i64 1
+  %tmp193 = getelementptr inbounds float* %tmp192, i64 1
+  %tmp194 = getelementptr inbounds float* %tmp193, i64 1
+  %tmp195 = getelementptr inbounds float* %tmp194, i64 1
+  %tmp196 = getelementptr inbounds float* %tmp195, i64 1
+  %tmp197 = getelementptr inbounds float* %tmp196, i64 1
+  %tmp198 = getelementptr inbounds float* %tmp197, i64 1
+  %tmp199 = getelementptr inbounds float* %tmp198, i64 1
+  %tmp200 = getelementptr inbounds float* %tmp199, i64 1
+  %tmp201 = getelementptr inbounds float* %tmp200, i64 1
+  %tmp202 = getelementptr inbounds float* %tmp201, i64 1
+  %tmp203 = getelementptr inbounds float* %tmp202, i64 1
+  %tmp204 = getelementptr inbounds float* %tmp203, i64 1
+  %tmp205 = getelementptr inbounds float* %tmp204, i64 1
+  %tmp206 = getelementptr inbounds float* %tmp205, i64 1
+  %tmp207 = getelementptr inbounds float* %tmp206, i64 1
+  %tmp208 = getelementptr inbounds float* %tmp207, i64 1
+  %tmp209 = getelementptr inbounds float* %tmp208, i64 1
+  %tmp210 = getelementptr inbounds float* %tmp209, i64 1
+  %tmp211 = getelementptr inbounds float* %tmp210, i64 1
+  %tmp212 = getelementptr inbounds float* %tmp211, i64 1
+  %tmp213 = getelementptr inbounds float* %tmp212, i64 1
+  %tmp214 = getelementptr inbounds float* %tmp213, i64 1
+  %tmp215 = getelementptr inbounds float* %tmp214, i64 1
+  %tmp216 = getelementptr inbounds float* %tmp215, i64 1
+  %tmp217 = getelementptr inbounds float* %tmp216, i64 1
+  %tmp218 = getelementptr inbounds float* %tmp217, i64 1
+  %tmp219 = getelementptr inbounds float* %tmp218, i64 1
+  %tmp220 = getelementptr inbounds float* %tmp219, i64 1
+  %tmp221 = getelementptr inbounds float* %tmp220, i64 1
+  %tmp222 = getelementptr inbounds float* %tmp221, i64 1
+  %tmp223 = getelementptr inbounds float* %tmp222, i64 1
+  %tmp224 = getelementptr inbounds float* %tmp223, i64 1
+  %tmp225 = getelementptr inbounds float* %tmp224, i64 1
+  %tmp226 = getelementptr inbounds float* %tmp225, i64 1
+  %tmp227 = getelementptr inbounds float* %tmp226, i64 1
+  %tmp228 = getelementptr inbounds float* %tmp227, i64 1
+  %tmp229 = getelementptr inbounds float* %tmp228, i64 1
+  %tmp230 = getelementptr inbounds float* %tmp229, i64 1
+  %tmp231 = getelementptr inbounds float* %tmp230, i64 1
+  %tmp232 = getelementptr inbounds float* %tmp231, i64 1
+  %tmp233 = getelementptr inbounds float* %tmp232, i64 1
+  %tmp234 = getelementptr inbounds float* %tmp233, i64 1
+  %tmp235 = getelementptr inbounds float* %tmp234, i64 1
+  %tmp236 = getelementptr inbounds float* %tmp235, i64 1
+  %tmp237 = getelementptr inbounds float* %tmp236, i64 1
+  %tmp238 = getelementptr inbounds float* %tmp237, i64 1
+  %tmp239 = getelementptr inbounds float* %tmp238, i64 1
+  %tmp240 = getelementptr inbounds float* %tmp239, i64 1
+  %tmp241 = getelementptr inbounds float* %tmp240, i64 1
+  %tmp242 = getelementptr inbounds float* %tmp241, i64 1
+  %tmp243 = getelementptr inbounds float* %tmp242, i64 1
+  %tmp244 = getelementptr inbounds float* %tmp243, i64 1
+  %tmp245 = getelementptr inbounds float* %tmp244, i64 1
+  %tmp246 = getelementptr inbounds float* %tmp245, i64 1
+  %tmp247 = getelementptr inbounds float* %tmp246, i64 1
+  %tmp248 = getelementptr inbounds float* %tmp247, i64 1
+  %tmp249 = getelementptr inbounds float* %tmp248, i64 1
+  %tmp250 = getelementptr inbounds float* %tmp249, i64 1
+  %tmp251 = getelementptr inbounds float* %tmp250, i64 1
+  %tmp252 = getelementptr inbounds float* %tmp251, i64 1
+  %tmp253 = getelementptr inbounds float* %tmp252, i64 1
+  %tmp254 = getelementptr inbounds float* %tmp253, i64 1
+  %tmp255 = getelementptr inbounds float* %tmp254, i64 1
+  %tmp256 = getelementptr inbounds float* %tmp255, i64 1
+  %tmp257 = getelementptr inbounds float* %tmp256, i64 1
+  %tmp258 = getelementptr inbounds float* %tmp257, i64 1
+  %tmp259 = getelementptr inbounds float* %tmp258, i64 1
+  %tmp260 = getelementptr inbounds float* %tmp259, i64 1
+  %tmp261 = getelementptr inbounds float* %tmp260, i64 1
+  %tmp262 = getelementptr inbounds float* %tmp261, i64 1
+  %tmp263 = getelementptr inbounds float* %tmp262, i64 1
+  %tmp264 = getelementptr inbounds float* %tmp263, i64 1
+  %tmp265 = getelementptr inbounds float* %tmp264, i64 1
+  %tmp266 = getelementptr inbounds float* %tmp265, i64 1
+  %tmp267 = getelementptr inbounds float* %tmp266, i64 1
+  %tmp268 = getelementptr inbounds float* %tmp267, i64 1
+  %tmp269 = getelementptr inbounds float* %tmp268, i64 1
+  %tmp270 = getelementptr inbounds float* %tmp269, i64 1
+  %tmp271 = getelementptr inbounds float* %tmp270, i64 1
+  %tmp272 = getelementptr inbounds float* %tmp271, i64 1
+  %tmp273 = getelementptr inbounds float* %tmp272, i64 1
+  %tmp274 = getelementptr inbounds float* %tmp273, i64 1
+  %tmp275 = getelementptr inbounds float* %tmp274, i64 1
+  %tmp276 = getelementptr inbounds float* %tmp275, i64 1
+  %tmp277 = getelementptr inbounds float* %tmp276, i64 1
+  %tmp278 = getelementptr inbounds float* %tmp277, i64 1
+  %tmp279 = getelementptr inbounds float* %tmp278, i64 1
+  %tmp280 = getelementptr inbounds float* %tmp279, i64 1
+  %tmp281 = getelementptr inbounds float* %tmp280, i64 1
+  %tmp282 = getelementptr inbounds float* %tmp281, i64 1
+  %tmp283 = getelementptr inbounds float* %tmp282, i64 1
+  %tmp284 = getelementptr inbounds float* %tmp283, i64 1
+  %tmp285 = getelementptr inbounds float* %tmp284, i64 1
+  %tmp286 = getelementptr inbounds float* %tmp285, i64 1
+  %tmp287 = getelementptr inbounds float* %tmp286, i64 1
+  %tmp288 = getelementptr inbounds float* %tmp287, i64 1
+  %tmp289 = getelementptr inbounds float* %tmp288, i64 1
+  %tmp290 = getelementptr inbounds float* %tmp289, i64 1
+  %tmp291 = getelementptr inbounds float* %tmp290, i64 1
+  %tmp292 = getelementptr inbounds float* %tmp291, i64 1
+  %tmp293 = getelementptr inbounds float* %tmp292, i64 1
+  %tmp294 = getelementptr inbounds float* %tmp293, i64 1
+  %tmp295 = getelementptr inbounds float* %tmp294, i64 1
+  %tmp296 = getelementptr inbounds float* %tmp295, i64 1
+  %tmp297 = getelementptr inbounds float* %tmp296, i64 1
+  %tmp298 = getelementptr inbounds float* %tmp297, i64 1
+  %tmp299 = getelementptr inbounds float* %tmp298, i64 1
+  %tmp300 = getelementptr inbounds float* %tmp299, i64 1
+  %tmp301 = getelementptr inbounds float* %tmp300, i64 1
+  %tmp302 = getelementptr inbounds float* %tmp301, i64 1
+  %tmp303 = getelementptr inbounds float* %tmp302, i64 1
+  %tmp304 = getelementptr inbounds float* %tmp303, i64 1
+  %tmp305 = getelementptr inbounds float* %tmp304, i64 1
+  %tmp306 = getelementptr inbounds float* %tmp305, i64 1
+  %tmp307 = getelementptr inbounds float* %tmp306, i64 1
+  %tmp308 = getelementptr inbounds float* %tmp307, i64 1
+  %tmp309 = getelementptr inbounds float* %tmp308, i64 1
+  %tmp310 = getelementptr inbounds float* %tmp309, i64 1
+  %tmp311 = getelementptr inbounds float* %tmp310, i64 1
+  %tmp312 = getelementptr inbounds float* %tmp311, i64 1
+  %tmp313 = getelementptr inbounds float* %tmp312, i64 1
+  %tmp314 = getelementptr inbounds float* %tmp313, i64 1
+  %tmp315 = getelementptr inbounds float* %tmp314, i64 1
+  %tmp316 = getelementptr inbounds float* %tmp315, i64 1
+  %tmp317 = getelementptr inbounds float* %tmp316, i64 1
+  %tmp318 = getelementptr inbounds float* %tmp317, i64 1
+  %tmp319 = getelementptr inbounds float* %tmp318, i64 1
+  %tmp320 = getelementptr inbounds float* %tmp319, i64 1
+  %tmp321 = getelementptr inbounds float* %tmp320, i64 1
+  %tmp322 = getelementptr inbounds float* %tmp321, i64 1
+  %tmp323 = getelementptr inbounds float* %tmp322, i64 1
+  %tmp324 = getelementptr inbounds float* %tmp323, i64 1
+  %tmp325 = getelementptr inbounds float* %tmp324, i64 1
+  %tmp326 = getelementptr inbounds float* %tmp325, i64 1
+  %tmp327 = getelementptr inbounds float* %tmp326, i64 1
+  %tmp328 = getelementptr inbounds float* %tmp327, i64 1
+  %tmp329 = getelementptr inbounds float* %tmp328, i64 1
+  %tmp330 = getelementptr inbounds float* %tmp329, i64 1
+  %tmp331 = getelementptr inbounds float* %tmp330, i64 1
+  %tmp332 = getelementptr inbounds float* %tmp331, i64 1
+  %tmp333 = getelementptr inbounds float* %tmp332, i64 1
+  %tmp334 = getelementptr inbounds float* %tmp333, i64 1
+  %tmp335 = getelementptr inbounds float* %tmp334, i64 1
+  %tmp336 = getelementptr inbounds float* %tmp335, i64 1
+  %tmp337 = getelementptr inbounds float* %tmp336, i64 1
+  %tmp338 = getelementptr inbounds float* %tmp337, i64 1
+  %tmp339 = getelementptr inbounds float* %tmp338, i64 1
+  %tmp340 = getelementptr inbounds float* %tmp339, i64 1
+  %tmp341 = getelementptr inbounds float* %tmp340, i64 1
+  %tmp342 = getelementptr inbounds float* %tmp341, i64 1
+  %tmp343 = getelementptr inbounds float* %tmp342, i64 1
+  %tmp344 = getelementptr inbounds float* %tmp343, i64 1
+  %tmp345 = getelementptr inbounds float* %tmp344, i64 1
+  %tmp346 = getelementptr inbounds float* %tmp345, i64 1
+  %tmp347 = getelementptr inbounds float* %tmp346, i64 1
+  %tmp348 = getelementptr inbounds float* %tmp347, i64 1
+  %tmp349 = getelementptr inbounds float* %tmp348, i64 1
+  %tmp350 = getelementptr inbounds float* %tmp349, i64 1
+  %tmp351 = getelementptr inbounds float* %tmp350, i64 1
+  %tmp352 = getelementptr inbounds float* %tmp351, i64 1
+  %tmp353 = getelementptr inbounds float* %tmp352, i64 1
+  %tmp354 = getelementptr inbounds float* %tmp353, i64 1
+  %tmp355 = getelementptr inbounds float* %tmp354, i64 1
+  %tmp356 = getelementptr inbounds float* %tmp355, i64 1
+  %tmp357 = getelementptr inbounds float* %tmp356, i64 1
+  %tmp358 = getelementptr inbounds float* %tmp357, i64 1
+  %tmp359 = getelementptr inbounds float* %tmp358, i64 1
+  %tmp360 = getelementptr inbounds float* %tmp359, i64 1
+  %tmp361 = getelementptr inbounds float* %tmp360, i64 1
+  %tmp362 = getelementptr inbounds float* %tmp361, i64 1
+  %tmp363 = getelementptr inbounds float* %tmp362, i64 1
+  %tmp364 = getelementptr inbounds float* %tmp363, i64 1
+  %tmp365 = getelementptr inbounds float* %tmp364, i64 1
+  %tmp366 = getelementptr inbounds float* %tmp365, i64 1
+  %tmp367 = getelementptr inbounds float* %tmp366, i64 1
+  %tmp368 = getelementptr inbounds float* %tmp367, i64 1
+  %tmp369 = getelementptr inbounds float* %tmp368, i64 1
+  %tmp370 = getelementptr inbounds float* %tmp369, i64 1
+  %tmp371 = getelementptr inbounds float* %tmp370, i64 1
+  %tmp372 = getelementptr inbounds float* %tmp371, i64 1
+  %tmp373 = getelementptr inbounds float* %tmp372, i64 1
+  %tmp374 = getelementptr inbounds float* %tmp373, i64 1
+  %tmp375 = getelementptr inbounds float* %tmp374, i64 1
+  %tmp376 = getelementptr inbounds float* %tmp375, i64 1
+  %tmp377 = getelementptr inbounds float* %tmp376, i64 1
+  %tmp378 = getelementptr inbounds float* %tmp377, i64 1
+  %tmp379 = getelementptr inbounds float* %tmp378, i64 1
+  %tmp380 = getelementptr inbounds float* %tmp379, i64 1
+  %tmp381 = getelementptr inbounds float* %tmp380, i64 1
+  %tmp382 = getelementptr inbounds float* %tmp381, i64 1
+  %tmp383 = getelementptr inbounds float* %tmp382, i64 1
+  %tmp384 = getelementptr inbounds float* %tmp383, i64 1
+  %tmp385 = getelementptr inbounds float* %tmp384, i64 1
+  %tmp386 = getelementptr inbounds float* %tmp385, i64 1
+  %tmp387 = getelementptr inbounds float* %tmp386, i64 1
+  %tmp388 = getelementptr inbounds float* %tmp387, i64 1
+  %tmp389 = getelementptr inbounds float* %tmp388, i64 1
+  %tmp390 = getelementptr inbounds float* %tmp389, i64 1
+  %tmp391 = getelementptr inbounds float* %tmp390, i64 1
+  %tmp392 = getelementptr inbounds float* %tmp391, i64 1
+  %tmp393 = getelementptr inbounds float* %tmp392, i64 1
+  %tmp394 = getelementptr inbounds float* %tmp393, i64 1
+  %tmp395 = getelementptr inbounds float* %tmp394, i64 1
+  %tmp396 = getelementptr inbounds float* %tmp395, i64 1
+  %tmp397 = getelementptr inbounds float* %tmp396, i64 1
+  %tmp398 = getelementptr inbounds float* %tmp397, i64 1
+  %tmp399 = getelementptr inbounds float* %tmp398, i64 1
+  %tmp400 = getelementptr inbounds float* %tmp399, i64 1
+  %tmp401 = getelementptr inbounds float* %tmp400, i64 1
+  %tmp402 = getelementptr inbounds float* %tmp401, i64 1
+  %tmp403 = getelementptr inbounds float* %tmp402, i64 1
+  %tmp404 = getelementptr inbounds float* %tmp403, i64 1
+  %tmp405 = getelementptr inbounds float* %tmp404, i64 1
+  %tmp406 = getelementptr inbounds float* %tmp405, i64 1
+  %tmp407 = getelementptr inbounds float* %tmp406, i64 1
+  %tmp408 = getelementptr inbounds float* %tmp407, i64 1
+  %tmp409 = getelementptr inbounds float* %tmp408, i64 1
+  %tmp410 = getelementptr inbounds float* %tmp409, i64 1
+  %tmp411 = getelementptr inbounds float* %tmp410, i64 1
+  %tmp412 = getelementptr inbounds float* %tmp411, i64 1
+  %tmp413 = getelementptr inbounds float* %tmp412, i64 1
+  %tmp414 = getelementptr inbounds float* %tmp413, i64 1
+  %tmp415 = getelementptr inbounds float* %tmp414, i64 1
+  %tmp416 = getelementptr inbounds float* %tmp415, i64 1
+  %tmp417 = getelementptr inbounds float* %tmp416, i64 1
+  %tmp418 = getelementptr inbounds float* %tmp417, i64 1
+  %tmp419 = getelementptr inbounds float* %tmp418, i64 1
+  %tmp420 = getelementptr inbounds float* %tmp419, i64 1
+  %tmp421 = getelementptr inbounds float* %tmp420, i64 1
+  %tmp422 = getelementptr inbounds float* %tmp421, i64 1
+  %tmp423 = getelementptr inbounds float* %tmp422, i64 1
+  %tmp424 = getelementptr inbounds float* %tmp423, i64 1
+  %tmp425 = getelementptr inbounds float* %tmp424, i64 1
+  %tmp426 = getelementptr inbounds float* %tmp425, i64 1
+  %tmp427 = getelementptr inbounds float* %tmp426, i64 1
+  %tmp428 = getelementptr inbounds float* %tmp427, i64 1
+  %tmp429 = getelementptr inbounds float* %tmp428, i64 1
+  %tmp430 = getelementptr inbounds float* %tmp429, i64 1
+  %tmp431 = getelementptr inbounds float* %tmp430, i64 1
+  %tmp432 = getelementptr inbounds float* %tmp431, i64 1
+  %tmp433 = getelementptr inbounds float* %tmp432, i64 1
+  %tmp434 = getelementptr inbounds float* %tmp433, i64 1
+  %tmp435 = getelementptr inbounds float* %tmp434, i64 1
+  %tmp436 = getelementptr inbounds float* %tmp435, i64 1
+  %tmp437 = getelementptr inbounds float* %tmp436, i64 1
+  %tmp438 = getelementptr inbounds float* %tmp437, i64 1
+  %tmp439 = getelementptr inbounds float* %tmp438, i64 1
+  %tmp440 = getelementptr inbounds float* %tmp439, i64 1
+  %tmp441 = getelementptr inbounds float* %tmp440, i64 1
+  %tmp442 = getelementptr inbounds float* %tmp441, i64 1
+  %tmp443 = getelementptr inbounds float* %tmp442, i64 1
+  %tmp444 = getelementptr inbounds float* %tmp443, i64 1
+  %tmp445 = getelementptr inbounds float* %tmp444, i64 1
+  %tmp446 = getelementptr inbounds float* %tmp445, i64 1
+  %tmp447 = getelementptr inbounds float* %tmp446, i64 1
+  %tmp448 = getelementptr inbounds float* %tmp447, i64 1
+  %tmp449 = getelementptr inbounds float* %tmp448, i64 1
+  %tmp450 = getelementptr inbounds float* %tmp449, i64 1
+  %tmp451 = getelementptr inbounds float* %tmp450, i64 1
+  %tmp452 = getelementptr inbounds float* %tmp451, i64 1
+  %tmp453 = getelementptr inbounds float* %tmp452, i64 1
+  %tmp454 = getelementptr inbounds float* %tmp453, i64 1
+  %tmp455 = getelementptr inbounds float* %tmp454, i64 1
+  %tmp456 = getelementptr inbounds float* %tmp455, i64 1
+  %tmp457 = getelementptr inbounds float* %tmp456, i64 1
+  %tmp458 = getelementptr inbounds float* %tmp457, i64 1
+  %tmp459 = getelementptr inbounds float* %tmp458, i64 1
+  %tmp460 = getelementptr inbounds float* %tmp459, i64 1
+  %tmp461 = getelementptr inbounds float* %tmp460, i64 1
+  %tmp462 = getelementptr inbounds float* %tmp461, i64 1
+  %tmp463 = getelementptr inbounds float* %tmp462, i64 1
+  %tmp464 = getelementptr inbounds float* %tmp463, i64 1
+  %tmp465 = getelementptr inbounds float* %tmp464, i64 1
+  %tmp466 = getelementptr inbounds float* %tmp465, i64 1
+  %tmp467 = getelementptr inbounds float* %tmp466, i64 1
+  %tmp468 = getelementptr inbounds float* %tmp467, i64 1
+  %tmp469 = getelementptr inbounds float* %tmp468, i64 1
+  %tmp470 = getelementptr inbounds float* %tmp469, i64 1
+  %tmp471 = getelementptr inbounds float* %tmp470, i64 1
+  %tmp472 = getelementptr inbounds float* %tmp471, i64 1
+  %tmp473 = getelementptr inbounds float* %tmp472, i64 1
+  %tmp474 = getelementptr inbounds float* %tmp473, i64 1
+  %tmp475 = getelementptr inbounds float* %tmp474, i64 1
+  %tmp476 = getelementptr inbounds float* %tmp475, i64 1
+  %tmp477 = getelementptr inbounds float* %tmp476, i64 1
+  %tmp478 = getelementptr inbounds float* %tmp477, i64 1
+  %tmp479 = getelementptr inbounds float* %tmp478, i64 1
+  %tmp480 = getelementptr inbounds float* %tmp479, i64 1
+  %tmp481 = getelementptr inbounds float* %tmp480, i64 1
+  %tmp482 = getelementptr inbounds float* %tmp481, i64 1
+  %tmp483 = getelementptr inbounds float* %tmp482, i64 1
+  %tmp484 = getelementptr inbounds float* %tmp483, i64 1
+  %tmp485 = getelementptr inbounds float* %tmp484, i64 1
+  %tmp486 = getelementptr inbounds float* %tmp485, i64 1
+  %tmp487 = getelementptr inbounds float* %tmp486, i64 1
+  %tmp488 = getelementptr inbounds float* %tmp487, i64 1
+  %tmp489 = getelementptr inbounds float* %tmp488, i64 1
+  %tmp490 = getelementptr inbounds float* %tmp489, i64 1
+  %tmp491 = getelementptr inbounds float* %tmp490, i64 1
+  %tmp492 = getelementptr inbounds float* %tmp491, i64 1
+  %tmp493 = getelementptr inbounds float* %tmp492, i64 1
+  %tmp494 = getelementptr inbounds float* %tmp493, i64 1
+  %tmp495 = getelementptr inbounds float* %tmp494, i64 1
+  %tmp496 = getelementptr inbounds float* %tmp495, i64 1
+  %tmp497 = getelementptr inbounds float* %tmp496, i64 1
+  %tmp498 = getelementptr inbounds float* %tmp497, i64 1
+  %tmp499 = getelementptr inbounds float* %tmp498, i64 1
+  %tmp500 = getelementptr inbounds float* %tmp499, i64 1
+  %tmp501 = getelementptr inbounds float* %tmp500, i64 1
+  %tmp502 = getelementptr inbounds float* %tmp501, i64 1
+  %tmp503 = getelementptr inbounds float* %tmp502, i64 1
+  %tmp504 = getelementptr inbounds float* %tmp503, i64 1
+  %tmp505 = getelementptr inbounds float* %tmp504, i64 1
+  %tmp506 = getelementptr inbounds float* %tmp505, i64 1
+  %tmp507 = getelementptr inbounds float* %tmp506, i64 1
+  %tmp508 = getelementptr inbounds float* %tmp507, i64 1
+  %tmp509 = getelementptr inbounds float* %tmp508, i64 1
+  %tmp510 = getelementptr inbounds float* %tmp509, i64 1
+  %tmp511 = getelementptr inbounds float* %tmp510, i64 1
+  %tmp512 = getelementptr inbounds float* %tmp511, i64 1
+  %tmp513 = getelementptr inbounds float* %tmp512, i64 1
+  %tmp514 = getelementptr inbounds float* %tmp513, i64 1
+  %tmp515 = getelementptr inbounds float* %tmp514, i64 1
+  %tmp516 = getelementptr inbounds float* %tmp515, i64 1
+  %tmp517 = getelementptr inbounds float* %tmp516, i64 1
+  %tmp518 = getelementptr inbounds float* %tmp517, i64 1
+  %tmp519 = getelementptr inbounds float* %tmp518, i64 1
+  %tmp520 = getelementptr inbounds float* %tmp519, i64 1
+  %tmp521 = getelementptr inbounds float* %tmp520, i64 1
+  %tmp522 = getelementptr inbounds float* %tmp521, i64 1
+  %tmp523 = getelementptr inbounds float* %tmp522, i64 1
+  %tmp524 = getelementptr inbounds float* %tmp523, i64 1
+  %tmp525 = getelementptr inbounds float* %tmp524, i64 1
+  %tmp526 = getelementptr inbounds float* %tmp525, i64 1
+  %tmp527 = getelementptr inbounds float* %tmp526, i64 1
+  %tmp528 = getelementptr inbounds float* %tmp527, i64 1
+  %tmp529 = getelementptr inbounds float* %tmp528, i64 1
+  %tmp530 = getelementptr inbounds float* %tmp529, i64 1
+  %tmp531 = getelementptr inbounds float* %tmp530, i64 1
+  %tmp532 = getelementptr inbounds float* %tmp531, i64 1
+  %tmp533 = getelementptr inbounds float* %tmp532, i64 1
+  %tmp534 = getelementptr inbounds float* %tmp533, i64 1
+  %tmp535 = getelementptr inbounds float* %tmp534, i64 1
+  %tmp536 = getelementptr inbounds float* %tmp535, i64 1
+  %tmp537 = getelementptr inbounds float* %tmp536, i64 1
+  %tmp538 = getelementptr inbounds float* %tmp537, i64 1
+  %tmp539 = getelementptr inbounds float* %tmp538, i64 1
+  %tmp540 = getelementptr inbounds float* %tmp539, i64 1
+  %tmp541 = getelementptr inbounds float* %tmp540, i64 1
+  %tmp542 = getelementptr inbounds float* %tmp541, i64 1
+  %tmp543 = getelementptr inbounds float* %tmp542, i64 1
+  %tmp544 = getelementptr inbounds float* %tmp543, i64 1
+  %tmp545 = getelementptr inbounds float* %tmp544, i64 1
+  %tmp546 = getelementptr inbounds float* %tmp545, i64 1
+  %tmp547 = getelementptr inbounds float* %tmp546, i64 1
+  %tmp548 = getelementptr inbounds float* %tmp547, i64 1
+  %tmp549 = getelementptr inbounds float* %tmp548, i64 1
+  %tmp550 = getelementptr inbounds float* %tmp549, i64 1
+  %tmp551 = getelementptr inbounds float* %tmp550, i64 1
+  %tmp552 = getelementptr inbounds float* %tmp551, i64 1
+  %tmp553 = getelementptr inbounds float* %tmp552, i64 1
+  %tmp554 = getelementptr inbounds float* %tmp553, i64 1
+  %tmp555 = getelementptr inbounds float* %tmp554, i64 1
+  %tmp556 = getelementptr inbounds float* %tmp555, i64 1
+  %tmp557 = getelementptr inbounds float* %tmp556, i64 1
+  %tmp558 = getelementptr inbounds float* %tmp557, i64 1
+  %tmp559 = getelementptr inbounds float* %tmp558, i64 1
+  %tmp560 = getelementptr inbounds float* %tmp559, i64 1
+  %tmp561 = getelementptr inbounds float* %tmp560, i64 1
+  %tmp562 = getelementptr inbounds float* %tmp561, i64 1
+  %tmp563 = getelementptr inbounds float* %tmp562, i64 1
+  %tmp564 = getelementptr inbounds float* %tmp563, i64 1
+  %tmp565 = getelementptr inbounds float* %tmp564, i64 1
+  %tmp566 = getelementptr inbounds float* %tmp565, i64 1
+  %tmp567 = getelementptr inbounds float* %tmp566, i64 1
+  %tmp568 = getelementptr inbounds float* %tmp567, i64 1
+  %tmp569 = getelementptr inbounds float* %tmp568, i64 1
+  %tmp570 = getelementptr inbounds float* %tmp569, i64 1
+  %tmp571 = getelementptr inbounds float* %tmp570, i64 1
+  %tmp572 = getelementptr inbounds float* %tmp571, i64 1
+  %tmp573 = getelementptr inbounds float* %tmp572, i64 1
+  %tmp574 = getelementptr inbounds float* %tmp573, i64 1
+  %tmp575 = getelementptr inbounds float* %tmp574, i64 1
+  %tmp576 = getelementptr inbounds float* %tmp575, i64 1
+  %tmp577 = getelementptr inbounds float* %tmp576, i64 1
+  %tmp578 = getelementptr inbounds float* %tmp577, i64 1
+  %tmp579 = getelementptr inbounds float* %tmp578, i64 1
+  %tmp580 = getelementptr inbounds float* %tmp579, i64 1
+  %tmp581 = getelementptr inbounds float* %tmp580, i64 1
+  %tmp582 = getelementptr inbounds float* %tmp581, i64 1
+  %tmp583 = getelementptr inbounds float* %tmp582, i64 1
+  %tmp584 = getelementptr inbounds float* %tmp583, i64 1
+  %tmp585 = getelementptr inbounds float* %tmp584, i64 1
+  %tmp586 = getelementptr inbounds float* %tmp585, i64 1
+  %tmp587 = getelementptr inbounds float* %tmp586, i64 1
+  %tmp588 = getelementptr inbounds float* %tmp587, i64 1
+  %tmp589 = getelementptr inbounds float* %tmp588, i64 1
+  %tmp590 = getelementptr inbounds float* %tmp589, i64 1
+  %tmp591 = getelementptr inbounds float* %tmp590, i64 1
+  %tmp592 = getelementptr inbounds float* %tmp591, i64 1
+  %tmp593 = getelementptr inbounds float* %tmp592, i64 1
+  %tmp594 = getelementptr inbounds float* %tmp593, i64 1
+  %tmp595 = getelementptr inbounds float* %tmp594, i64 1
+  %tmp596 = getelementptr inbounds float* %tmp595, i64 1
+  %tmp597 = getelementptr inbounds float* %tmp596, i64 1
+  %tmp598 = getelementptr inbounds float* %tmp597, i64 1
+  %tmp599 = getelementptr inbounds float* %tmp598, i64 1
+  %tmp600 = getelementptr inbounds float* %tmp599, i64 1
+  %tmp601 = getelementptr inbounds float* %tmp600, i64 1
+  %tmp602 = getelementptr inbounds float* %tmp601, i64 1
+  %tmp603 = getelementptr inbounds float* %tmp602, i64 1
+  %tmp604 = getelementptr inbounds float* %tmp603, i64 1
+  %tmp605 = getelementptr inbounds float* %tmp604, i64 1
+  %tmp606 = getelementptr inbounds float* %tmp605, i64 1
+  %tmp607 = getelementptr inbounds float* %tmp606, i64 1
+  %tmp608 = getelementptr inbounds float* %tmp607, i64 1
+  %tmp609 = getelementptr inbounds float* %tmp608, i64 1
+  %tmp610 = getelementptr inbounds float* %tmp609, i64 1
+  %tmp611 = getelementptr inbounds float* %tmp610, i64 1
+  %tmp612 = getelementptr inbounds float* %tmp611, i64 1
+  %tmp613 = getelementptr inbounds float* %tmp612, i64 1
+  %tmp614 = getelementptr inbounds float* %tmp613, i64 1
+  %tmp615 = getelementptr inbounds float* %tmp614, i64 1
+  %tmp616 = getelementptr inbounds float* %tmp615, i64 1
+  %tmp617 = getelementptr inbounds float* %tmp616, i64 1
+  %tmp618 = getelementptr inbounds float* %tmp617, i64 1
+  %tmp619 = getelementptr inbounds float* %tmp618, i64 1
+  %tmp620 = getelementptr inbounds float* %tmp619, i64 1
+  %tmp621 = getelementptr inbounds float* %tmp620, i64 1
+  %tmp622 = getelementptr inbounds float* %tmp621, i64 1
+  %tmp623 = getelementptr inbounds float* %tmp622, i64 1
+  %tmp624 = getelementptr inbounds float* %tmp623, i64 1
+  %tmp625 = getelementptr inbounds float* %tmp624, i64 1
+  %tmp626 = getelementptr inbounds float* %tmp625, i64 1
+  %tmp627 = getelementptr inbounds float* %tmp626, i64 1
+  %tmp628 = getelementptr inbounds float* %tmp627, i64 1
+  %tmp629 = getelementptr inbounds float* %tmp628, i64 1
+  %tmp630 = getelementptr inbounds float* %tmp629, i64 1
+  %tmp631 = getelementptr inbounds float* %tmp630, i64 1
+  %tmp632 = getelementptr inbounds float* %tmp631, i64 1
+  %tmp633 = getelementptr inbounds float* %tmp632, i64 1
+  %tmp634 = getelementptr inbounds float* %tmp633, i64 1
+  %tmp635 = getelementptr inbounds float* %tmp634, i64 1
+  %tmp636 = getelementptr inbounds float* %tmp635, i64 1
+  %tmp637 = getelementptr inbounds float* %tmp636, i64 1
+  %tmp638 = getelementptr inbounds float* %tmp637, i64 1
+  %tmp639 = getelementptr inbounds float* %tmp638, i64 1
+  %tmp640 = getelementptr inbounds float* %tmp639, i64 1
+  %tmp641 = getelementptr inbounds float* %tmp640, i64 1
+  %tmp642 = getelementptr inbounds float* %tmp641, i64 1
+  %tmp643 = getelementptr inbounds float* %tmp642, i64 1
+  %tmp644 = getelementptr inbounds float* %tmp643, i64 1
+  %tmp645 = getelementptr inbounds float* %tmp644, i64 1
+  %tmp646 = getelementptr inbounds float* %tmp645, i64 1
+  %tmp647 = getelementptr inbounds float* %tmp646, i64 1
+  %tmp648 = getelementptr inbounds float* %tmp647, i64 1
+  %tmp649 = getelementptr inbounds float* %tmp648, i64 1
+  %tmp650 = getelementptr inbounds float* %tmp649, i64 1
+  %tmp651 = getelementptr inbounds float* %tmp650, i64 1
+  %tmp652 = getelementptr inbounds float* %tmp651, i64 1
+  %tmp653 = getelementptr inbounds float* %tmp652, i64 1
+  %tmp654 = getelementptr inbounds float* %tmp653, i64 1
+  %tmp655 = getelementptr inbounds float* %tmp654, i64 1
+  %tmp656 = getelementptr inbounds float* %tmp655, i64 1
+  %tmp657 = getelementptr inbounds float* %tmp656, i64 1
+  %tmp658 = getelementptr inbounds float* %tmp657, i64 1
+  %tmp659 = getelementptr inbounds float* %tmp658, i64 1
+  %tmp660 = getelementptr inbounds float* %tmp659, i64 1
+  %tmp661 = getelementptr inbounds float* %tmp660, i64 1
+  %tmp662 = getelementptr inbounds float* %tmp661, i64 1
+  %tmp663 = getelementptr inbounds float* %tmp662, i64 1
+  %tmp664 = getelementptr inbounds float* %tmp663, i64 1
+  %tmp665 = getelementptr inbounds float* %tmp664, i64 1
+  %tmp666 = getelementptr inbounds float* %tmp665, i64 1
+  %tmp667 = getelementptr inbounds float* %tmp666, i64 1
+  %tmp668 = getelementptr inbounds float* %tmp667, i64 1
+  %tmp669 = getelementptr inbounds float* %tmp668, i64 1
+  %tmp670 = getelementptr inbounds float* %tmp669, i64 1
+  %tmp671 = getelementptr inbounds float* %tmp670, i64 1
+  %tmp672 = getelementptr inbounds float* %tmp671, i64 1
+  %tmp673 = getelementptr inbounds float* %tmp672, i64 1
+  %tmp674 = getelementptr inbounds float* %tmp673, i64 1
+  %tmp675 = getelementptr inbounds float* %tmp674, i64 1
+  %tmp676 = getelementptr inbounds float* %tmp675, i64 1
+  %tmp677 = getelementptr inbounds float* %tmp676, i64 1
+  %tmp678 = getelementptr inbounds float* %tmp677, i64 1
+  %tmp679 = getelementptr inbounds float* %tmp678, i64 1
+  %tmp680 = getelementptr inbounds float* %tmp679, i64 1
+  %tmp681 = getelementptr inbounds float* %tmp680, i64 1
+  %tmp682 = getelementptr inbounds float* %tmp681, i64 1
+  %tmp683 = getelementptr inbounds float* %tmp682, i64 1
+  %tmp684 = getelementptr inbounds float* %tmp683, i64 1
+  %tmp685 = getelementptr inbounds float* %tmp684, i64 1
+  %tmp686 = getelementptr inbounds float* %tmp685, i64 1
+  %tmp687 = getelementptr inbounds float* %tmp686, i64 1
+  %tmp688 = getelementptr inbounds float* %tmp687, i64 1
+  %tmp689 = getelementptr inbounds float* %tmp688, i64 1
+  %tmp690 = getelementptr inbounds float* %tmp689, i64 1
+  %tmp691 = getelementptr inbounds float* %tmp690, i64 1
+  %tmp692 = getelementptr inbounds float* %tmp691, i64 1
+  %tmp693 = getelementptr inbounds float* %tmp692, i64 1
+  %tmp694 = getelementptr inbounds float* %tmp693, i64 1
+  %tmp695 = getelementptr inbounds float* %tmp694, i64 1
+  %tmp696 = getelementptr inbounds float* %tmp695, i64 1
+  %tmp697 = getelementptr inbounds float* %tmp696, i64 1
+  %tmp698 = getelementptr inbounds float* %tmp697, i64 1
+  %tmp699 = getelementptr inbounds float* %tmp698, i64 1
+  %tmp700 = getelementptr inbounds float* %tmp699, i64 1
+  %tmp701 = getelementptr inbounds float* %tmp700, i64 1
+  %tmp702 = getelementptr inbounds float* %tmp701, i64 1
+  %tmp703 = getelementptr inbounds float* %tmp702, i64 1
+  %tmp704 = getelementptr inbounds float* %tmp703, i64 1
+  %tmp705 = getelementptr inbounds float* %tmp704, i64 1
+  %tmp706 = getelementptr inbounds float* %tmp705, i64 1
+  %tmp707 = getelementptr inbounds float* %tmp706, i64 1
+  %tmp708 = getelementptr inbounds float* %tmp707, i64 1
+  %tmp709 = getelementptr inbounds float* %tmp708, i64 1
+  %tmp710 = getelementptr inbounds float* %tmp709, i64 1
+  %tmp711 = getelementptr inbounds float* %tmp710, i64 1
+  %tmp712 = getelementptr inbounds float* %tmp711, i64 1
+  %tmp713 = getelementptr inbounds float* %tmp712, i64 1
+  %tmp714 = getelementptr inbounds float* %tmp713, i64 1
+  %tmp715 = getelementptr inbounds float* %tmp714, i64 1
+  %tmp716 = getelementptr inbounds float* %tmp715, i64 1
+  %tmp717 = getelementptr inbounds float* %tmp716, i64 1
+  %tmp718 = getelementptr inbounds float* %tmp717, i64 1
+  %tmp719 = getelementptr inbounds float* %tmp718, i64 1
+  %tmp720 = getelementptr inbounds float* %tmp719, i64 1
+  %tmp721 = getelementptr inbounds float* %tmp720, i64 1
+  %tmp722 = getelementptr inbounds float* %tmp721, i64 1
+  %tmp723 = getelementptr inbounds float* %tmp722, i64 1
+  %tmp724 = getelementptr inbounds float* %tmp723, i64 1
+  %tmp725 = getelementptr inbounds float* %tmp724, i64 1
+  %tmp726 = getelementptr inbounds float* %tmp725, i64 1
+  %tmp727 = getelementptr inbounds float* %tmp726, i64 1
+  %tmp728 = getelementptr inbounds float* %tmp727, i64 1
+  %tmp729 = getelementptr inbounds float* %tmp728, i64 1
+  %tmp730 = getelementptr inbounds float* %tmp729, i64 1
+  %tmp731 = getelementptr inbounds float* %tmp730, i64 1
+  %tmp732 = getelementptr inbounds float* %tmp731, i64 1
+  %tmp733 = getelementptr inbounds float* %tmp732, i64 1
+  %tmp734 = getelementptr inbounds float* %tmp733, i64 1
+  %tmp735 = getelementptr inbounds float* %tmp734, i64 1
+  %tmp736 = getelementptr inbounds float* %tmp735, i64 1
+  %tmp737 = getelementptr inbounds float* %tmp736, i64 1
+  %tmp738 = getelementptr inbounds float* %tmp737, i64 1
+  %tmp739 = getelementptr inbounds float* %tmp738, i64 1
+  %tmp740 = getelementptr inbounds float* %tmp739, i64 1
+  %tmp741 = getelementptr inbounds float* %tmp740, i64 1
+  %tmp742 = getelementptr inbounds float* %tmp741, i64 1
+  %tmp743 = getelementptr inbounds float* %tmp742, i64 1
+  %tmp744 = getelementptr inbounds float* %tmp743, i64 1
+  %tmp745 = getelementptr inbounds float* %tmp744, i64 1
+  %tmp746 = getelementptr inbounds float* %tmp745, i64 1
+  %tmp747 = getelementptr inbounds float* %tmp746, i64 1
+  %tmp748 = getelementptr inbounds float* %tmp747, i64 1
+  %tmp749 = getelementptr inbounds float* %tmp748, i64 1
+  %tmp750 = getelementptr inbounds float* %tmp749, i64 1
+  %tmp751 = getelementptr inbounds float* %tmp750, i64 1
+  %tmp752 = getelementptr inbounds float* %tmp751, i64 1
+  %tmp753 = getelementptr inbounds float* %tmp752, i64 1
+  %tmp754 = getelementptr inbounds float* %tmp753, i64 1
+  %tmp755 = getelementptr inbounds float* %tmp754, i64 1
+  %tmp756 = getelementptr inbounds float* %tmp755, i64 1
+  %tmp757 = getelementptr inbounds float* %tmp756, i64 1
+  %tmp758 = getelementptr inbounds float* %tmp757, i64 1
+  %tmp759 = getelementptr inbounds float* %tmp758, i64 1
+  %tmp760 = getelementptr inbounds float* %tmp759, i64 1
+  %tmp761 = getelementptr inbounds float* %tmp760, i64 1
+  %tmp762 = getelementptr inbounds float* %tmp761, i64 1
+  %tmp763 = getelementptr inbounds float* %tmp762, i64 1
+  %tmp764 = getelementptr inbounds float* %tmp763, i64 1
+  %tmp765 = getelementptr inbounds float* %tmp764, i64 1
+  %tmp766 = getelementptr inbounds float* %tmp765, i64 1
+  %tmp767 = getelementptr inbounds float* %tmp766, i64 1
+  %tmp768 = getelementptr inbounds float* %tmp767, i64 1
+  %tmp769 = getelementptr inbounds float* %tmp768, i64 1
+  %tmp770 = getelementptr inbounds float* %tmp769, i64 1
+  %tmp771 = getelementptr inbounds float* %tmp770, i64 1
+  %tmp772 = getelementptr inbounds float* %tmp771, i64 1
+  %tmp773 = getelementptr inbounds float* %tmp772, i64 1
+  %tmp774 = getelementptr inbounds float* %tmp773, i64 1
+  %tmp775 = getelementptr inbounds float* %tmp774, i64 1
+  %tmp776 = getelementptr inbounds float* %tmp775, i64 1
+  %tmp777 = getelementptr inbounds float* %tmp776, i64 1
+  %tmp778 = getelementptr inbounds float* %tmp777, i64 1
+  %tmp779 = getelementptr inbounds float* %tmp778, i64 1
+  %tmp780 = getelementptr inbounds float* %tmp779, i64 1
+  %tmp781 = getelementptr inbounds float* %tmp780, i64 1
+  %tmp782 = getelementptr inbounds float* %tmp781, i64 1
+  %tmp783 = getelementptr inbounds float* %tmp782, i64 1
+  %tmp784 = getelementptr inbounds float* %tmp783, i64 1
+  %tmp785 = getelementptr inbounds float* %tmp784, i64 1
+  %tmp786 = getelementptr inbounds float* %tmp785, i64 1
+  %tmp787 = getelementptr inbounds float* %tmp786, i64 1
+  %tmp788 = getelementptr inbounds float* %tmp787, i64 1
+  %tmp789 = getelementptr inbounds float* %tmp788, i64 1
+  %tmp790 = getelementptr inbounds float* %tmp789, i64 1
+  %tmp791 = getelementptr inbounds float* %tmp790, i64 1
+  %tmp792 = getelementptr inbounds float* %tmp791, i64 1
+  %tmp793 = getelementptr inbounds float* %tmp792, i64 1
+  %tmp794 = getelementptr inbounds float* %tmp793, i64 1
+  %tmp795 = getelementptr inbounds float* %tmp794, i64 1
+  %tmp796 = getelementptr inbounds float* %tmp795, i64 1
+  %tmp797 = getelementptr inbounds float* %tmp796, i64 1
+  %tmp798 = getelementptr inbounds float* %tmp797, i64 1
+  %tmp799 = getelementptr inbounds float* %tmp798, i64 1
+  %tmp800 = getelementptr inbounds float* %tmp799, i64 1
+  %tmp801 = getelementptr inbounds float* %tmp800, i64 1
+  %tmp802 = getelementptr inbounds float* %tmp801, i64 1
+  %tmp803 = getelementptr inbounds float* %tmp802, i64 1
+  %tmp804 = getelementptr inbounds float* %tmp803, i64 1
+  %tmp805 = getelementptr inbounds float* %tmp804, i64 1
+  %tmp806 = getelementptr inbounds float* %tmp805, i64 1
+  %tmp807 = getelementptr inbounds float* %tmp806, i64 1
+  %tmp808 = getelementptr inbounds float* %tmp807, i64 1
+  %tmp809 = getelementptr inbounds float* %tmp808, i64 1
+  %tmp810 = getelementptr inbounds float* %tmp809, i64 1
+  %tmp811 = getelementptr inbounds float* %tmp810, i64 1
+  %tmp812 = getelementptr inbounds float* %tmp811, i64 1
+  %tmp813 = getelementptr inbounds float* %tmp812, i64 1
+  %tmp814 = getelementptr inbounds float* %tmp813, i64 1
+  %tmp815 = getelementptr inbounds float* %tmp814, i64 1
+  %tmp816 = getelementptr inbounds float* %tmp815, i64 1
+  %tmp817 = getelementptr inbounds float* %tmp816, i64 1
+  %tmp818 = getelementptr inbounds float* %tmp817, i64 1
+  %tmp819 = getelementptr inbounds float* %tmp818, i64 1
+  %tmp820 = getelementptr inbounds float* %tmp819, i64 1
+  %tmp821 = getelementptr inbounds float* %tmp820, i64 1
+  %tmp822 = getelementptr inbounds float* %tmp821, i64 1
+  %tmp823 = getelementptr inbounds float* %tmp822, i64 1
+  %tmp824 = getelementptr inbounds float* %tmp823, i64 1
+  %tmp825 = getelementptr inbounds float* %tmp824, i64 1
+  %tmp826 = getelementptr inbounds float* %tmp825, i64 1
+  %tmp827 = getelementptr inbounds float* %tmp826, i64 1
+  %tmp828 = getelementptr inbounds float* %tmp827, i64 1
+  %tmp829 = getelementptr inbounds float* %tmp828, i64 1
+  %tmp830 = getelementptr inbounds float* %tmp829, i64 1
+  %tmp831 = getelementptr inbounds float* %tmp830, i64 1
+  %tmp832 = getelementptr inbounds float* %tmp831, i64 1
+  %tmp833 = getelementptr inbounds float* %tmp832, i64 1
+  %tmp834 = getelementptr inbounds float* %tmp833, i64 1
+  %tmp835 = getelementptr inbounds float* %tmp834, i64 1
+  %tmp836 = getelementptr inbounds float* %tmp835, i64 1
+  %tmp837 = getelementptr inbounds float* %tmp836, i64 1
+  %tmp838 = getelementptr inbounds float* %tmp837, i64 1
+  %tmp839 = getelementptr inbounds float* %tmp838, i64 1
+  %tmp840 = getelementptr inbounds float* %tmp839, i64 1
+  %tmp841 = getelementptr inbounds float* %tmp840, i64 1
+  %tmp842 = getelementptr inbounds float* %tmp841, i64 1
+  %tmp843 = getelementptr inbounds float* %tmp842, i64 1
+  %tmp844 = getelementptr inbounds float* %tmp843, i64 1
+  %tmp845 = getelementptr inbounds float* %tmp844, i64 1
+  %tmp846 = getelementptr inbounds float* %tmp845, i64 1
+  %tmp847 = getelementptr inbounds float* %tmp846, i64 1
+  %tmp848 = getelementptr inbounds float* %tmp847, i64 1
+  %tmp849 = getelementptr inbounds float* %tmp848, i64 1
+  %tmp850 = getelementptr inbounds float* %tmp849, i64 1
+  %tmp851 = getelementptr inbounds float* %tmp850, i64 1
+  %tmp852 = getelementptr inbounds float* %tmp851, i64 1
+  %tmp853 = getelementptr inbounds float* %tmp852, i64 1
+  %tmp854 = getelementptr inbounds float* %tmp853, i64 1
+  %tmp855 = getelementptr inbounds float* %tmp854, i64 1
+  %tmp856 = getelementptr inbounds float* %tmp855, i64 1
+  %tmp857 = getelementptr inbounds float* %tmp856, i64 1
+  %tmp858 = getelementptr inbounds float* %tmp857, i64 1
+  %tmp859 = getelementptr inbounds float* %tmp858, i64 1
+  %tmp860 = getelementptr inbounds float* %tmp859, i64 1
+  %tmp861 = getelementptr inbounds float* %tmp860, i64 1
+  %tmp862 = getelementptr inbounds float* %tmp861, i64 1
+  %tmp863 = getelementptr inbounds float* %tmp862, i64 1
+  %tmp864 = getelementptr inbounds float* %tmp863, i64 1
+  %tmp865 = getelementptr inbounds float* %tmp864, i64 1
+  %tmp866 = getelementptr inbounds float* %tmp865, i64 1
+  %tmp867 = getelementptr inbounds float* %tmp866, i64 1
+  %tmp868 = getelementptr inbounds float* %tmp867, i64 1
+  %tmp869 = getelementptr inbounds float* %tmp868, i64 1
+  %tmp870 = getelementptr inbounds float* %tmp869, i64 1
+  %tmp871 = getelementptr inbounds float* %tmp870, i64 1
+  %tmp872 = getelementptr inbounds float* %tmp871, i64 1
+  %tmp873 = getelementptr inbounds float* %tmp872, i64 1
+  %tmp874 = getelementptr inbounds float* %tmp873, i64 1
+  %tmp875 = getelementptr inbounds float* %tmp874, i64 1
+  %tmp876 = getelementptr inbounds float* %tmp875, i64 1
+  %tmp877 = getelementptr inbounds float* %tmp876, i64 1
+  %tmp878 = getelementptr inbounds float* %tmp877, i64 1
+  %tmp879 = getelementptr inbounds float* %tmp878, i64 1
+  %tmp880 = getelementptr inbounds float* %tmp879, i64 1
+  %tmp881 = getelementptr inbounds float* %tmp880, i64 1
+  %tmp882 = getelementptr inbounds float* %tmp881, i64 1
+  %tmp883 = getelementptr inbounds float* %tmp882, i64 1
+  %tmp884 = getelementptr inbounds float* %tmp883, i64 1
+  %tmp885 = getelementptr inbounds float* %tmp884, i64 1
+  %tmp886 = getelementptr inbounds float* %tmp885, i64 1
+  %tmp887 = getelementptr inbounds float* %tmp886, i64 1
+  %tmp888 = getelementptr inbounds float* %tmp887, i64 1
+  %tmp889 = getelementptr inbounds float* %tmp888, i64 1
+  %tmp890 = getelementptr inbounds float* %tmp889, i64 1
+  %tmp891 = getelementptr inbounds float* %tmp890, i64 1
+  %tmp892 = getelementptr inbounds float* %tmp891, i64 1
+  %tmp893 = getelementptr inbounds float* %tmp892, i64 1
+  %tmp894 = getelementptr inbounds float* %tmp893, i64 1
+  %tmp895 = getelementptr inbounds float* %tmp894, i64 1
+  %tmp896 = getelementptr inbounds float* %tmp895, i64 1
+  %tmp897 = getelementptr inbounds float* %tmp896, i64 1
+  %tmp898 = getelementptr inbounds float* %tmp897, i64 1
+  %tmp899 = getelementptr inbounds float* %tmp898, i64 1
+  %tmp900 = getelementptr inbounds float* %tmp899, i64 1
+  %tmp901 = getelementptr inbounds float* %tmp900, i64 1
+  %tmp902 = getelementptr inbounds float* %tmp901, i64 1
+  %tmp903 = getelementptr inbounds float* %tmp902, i64 1
+  %tmp904 = getelementptr inbounds float* %tmp903, i64 1
+  %tmp905 = getelementptr inbounds float* %tmp904, i64 1
+  %tmp906 = getelementptr inbounds float* %tmp905, i64 1
+  %tmp907 = getelementptr inbounds float* %tmp906, i64 1
+  %tmp908 = getelementptr inbounds float* %tmp907, i64 1
+  %tmp909 = getelementptr inbounds float* %tmp908, i64 1
+  %tmp910 = getelementptr inbounds float* %tmp909, i64 1
+  %tmp911 = getelementptr inbounds float* %tmp910, i64 1
+  %tmp912 = getelementptr inbounds float* %tmp911, i64 1
+  %tmp913 = getelementptr inbounds float* %tmp912, i64 1
+  %tmp914 = getelementptr inbounds float* %tmp913, i64 1
+  %tmp915 = getelementptr inbounds float* %tmp914, i64 1
+  %tmp916 = getelementptr inbounds float* %tmp915, i64 1
+  %tmp917 = getelementptr inbounds float* %tmp916, i64 1
+  %tmp918 = getelementptr inbounds float* %tmp917, i64 1
+  %tmp919 = getelementptr inbounds float* %tmp918, i64 1
+  %tmp920 = getelementptr inbounds float* %tmp919, i64 1
+  %tmp921 = getelementptr inbounds float* %tmp920, i64 1
+  %tmp922 = getelementptr inbounds float* %tmp921, i64 1
+  %tmp923 = getelementptr inbounds float* %tmp922, i64 1
+  %tmp924 = getelementptr inbounds float* %tmp923, i64 1
+  %tmp925 = getelementptr inbounds float* %tmp924, i64 1
+  %tmp926 = getelementptr inbounds float* %tmp925, i64 1
+  %tmp927 = getelementptr inbounds float* %tmp926, i64 1
+  %tmp928 = getelementptr inbounds float* %tmp927, i64 1
+  %tmp929 = getelementptr inbounds float* %tmp928, i64 1
+  %tmp930 = getelementptr inbounds float* %tmp929, i64 1
+  %tmp931 = getelementptr inbounds float* %tmp930, i64 1
+  %tmp932 = getelementptr inbounds float* %tmp931, i64 1
+  %tmp933 = getelementptr inbounds float* %tmp932, i64 1
+  %tmp934 = getelementptr inbounds float* %tmp933, i64 1
+  %tmp935 = getelementptr inbounds float* %tmp934, i64 1
+  %tmp936 = getelementptr inbounds float* %tmp935, i64 1
+  %tmp937 = getelementptr inbounds float* %tmp936, i64 1
+  %tmp938 = getelementptr inbounds float* %tmp937, i64 1
+  %tmp939 = getelementptr inbounds float* %tmp938, i64 1
+  %tmp940 = getelementptr inbounds float* %tmp939, i64 1
+  %tmp941 = getelementptr inbounds float* %tmp940, i64 1
+  %tmp942 = getelementptr inbounds float* %tmp941, i64 1
+  %tmp943 = getelementptr inbounds float* %tmp942, i64 1
+  %tmp944 = getelementptr inbounds float* %tmp943, i64 1
+  %tmp945 = getelementptr inbounds float* %tmp944, i64 1
+  %tmp946 = getelementptr inbounds float* %tmp945, i64 1
+  %tmp947 = getelementptr inbounds float* %tmp946, i64 1
+  %tmp948 = getelementptr inbounds float* %tmp947, i64 1
+  %tmp949 = getelementptr inbounds float* %tmp948, i64 1
+  %tmp950 = getelementptr inbounds float* %tmp949, i64 1
+  %tmp951 = getelementptr inbounds float* %tmp950, i64 1
+  %tmp952 = getelementptr inbounds float* %tmp951, i64 1
+  %tmp953 = getelementptr inbounds float* %tmp952, i64 1
+  %tmp954 = getelementptr inbounds float* %tmp953, i64 1
+  %tmp955 = getelementptr inbounds float* %tmp954, i64 1
+  %tmp956 = getelementptr inbounds float* %tmp955, i64 1
+  %tmp957 = getelementptr inbounds float* %tmp956, i64 1
+  %tmp958 = getelementptr inbounds float* %tmp957, i64 1
+  %tmp959 = getelementptr inbounds float* %tmp958, i64 1
+  %tmp960 = getelementptr inbounds float* %tmp959, i64 1
+  %tmp961 = getelementptr inbounds float* %tmp960, i64 1
+  %tmp962 = getelementptr inbounds float* %tmp961, i64 1
+  %tmp963 = getelementptr inbounds float* %tmp962, i64 1
+  %tmp964 = getelementptr inbounds float* %tmp963, i64 1
+  %tmp965 = getelementptr inbounds float* %tmp964, i64 1
+  %tmp966 = getelementptr inbounds float* %tmp965, i64 1
+  %tmp967 = getelementptr inbounds float* %tmp966, i64 1
+  %tmp968 = getelementptr inbounds float* %tmp967, i64 1
+  %tmp969 = getelementptr inbounds float* %tmp968, i64 1
+  %tmp970 = getelementptr inbounds float* %tmp969, i64 1
+  %tmp971 = getelementptr inbounds float* %tmp970, i64 1
+  %tmp972 = getelementptr inbounds float* %tmp971, i64 1
+  %tmp973 = getelementptr inbounds float* %tmp972, i64 1
+  %tmp974 = getelementptr inbounds float* %tmp973, i64 1
+  %tmp975 = getelementptr inbounds float* %tmp974, i64 1
+  %tmp976 = getelementptr inbounds float* %tmp975, i64 1
+  %tmp977 = getelementptr inbounds float* %tmp976, i64 1
+  %tmp978 = getelementptr inbounds float* %tmp977, i64 1
+  %tmp979 = getelementptr inbounds float* %tmp978, i64 1
+  %tmp980 = getelementptr inbounds float* %tmp979, i64 1
+  %tmp981 = getelementptr inbounds float* %tmp980, i64 1
+  %tmp982 = getelementptr inbounds float* %tmp981, i64 1
+  %tmp983 = getelementptr inbounds float* %tmp982, i64 1
+  %tmp984 = getelementptr inbounds float* %tmp983, i64 1
+  %tmp985 = getelementptr inbounds float* %tmp984, i64 1
+  %tmp986 = getelementptr inbounds float* %tmp985, i64 1
+  %tmp987 = getelementptr inbounds float* %tmp986, i64 1
+  %tmp988 = getelementptr inbounds float* %tmp987, i64 1
+  %tmp989 = getelementptr inbounds float* %tmp988, i64 1
+  %tmp990 = getelementptr inbounds float* %tmp989, i64 1
+  %tmp991 = getelementptr inbounds float* %tmp990, i64 1
+  %tmp992 = getelementptr inbounds float* %tmp991, i64 1
+  %tmp993 = getelementptr inbounds float* %tmp992, i64 1
+  %tmp994 = getelementptr inbounds float* %tmp993, i64 1
+  %tmp995 = getelementptr inbounds float* %tmp994, i64 1
+  %tmp996 = getelementptr inbounds float* %tmp995, i64 1
+  %tmp997 = getelementptr inbounds float* %tmp996, i64 1
+  %tmp998 = getelementptr inbounds float* %tmp997, i64 1
+  %tmp999 = getelementptr inbounds float* %tmp998, i64 1
+  %tmp1000 = getelementptr inbounds float* %tmp999, i64 1
+  %tmp1001 = getelementptr inbounds float* %tmp1000, i64 1
+  %tmp1002 = getelementptr inbounds float* %tmp1001, i64 1
+  %tmp1003 = getelementptr inbounds float* %tmp1002, i64 1
+  %tmp1004 = getelementptr inbounds float* %tmp1003, i64 1
+  %tmp1005 = getelementptr inbounds float* %tmp1004, i64 1
+  %tmp1006 = getelementptr inbounds float* %tmp1005, i64 1
+  %tmp1007 = getelementptr inbounds float* %tmp1006, i64 1
+  %tmp1008 = getelementptr inbounds float* %tmp1007, i64 1
+  %tmp1009 = getelementptr inbounds float* %tmp1008, i64 1
+  %tmp1010 = getelementptr inbounds float* %tmp1009, i64 1
+  %tmp1011 = getelementptr inbounds float* %tmp1010, i64 1
+  %tmp1012 = getelementptr inbounds float* %tmp1011, i64 1
+  %tmp1013 = getelementptr inbounds float* %tmp1012, i64 1
+  %tmp1014 = getelementptr inbounds float* %tmp1013, i64 1
+  %tmp1015 = getelementptr inbounds float* %tmp1014, i64 1
+  %tmp1016 = getelementptr inbounds float* %tmp1015, i64 1
+  %tmp1017 = getelementptr inbounds float* %tmp1016, i64 1
+  %tmp1018 = getelementptr inbounds float* %tmp1017, i64 1
+  %tmp1019 = getelementptr inbounds float* %tmp1018, i64 1
+  %tmp1020 = getelementptr inbounds float* %tmp1019, i64 1
+  %tmp1021 = getelementptr inbounds float* %tmp1020, i64 1
+  %tmp1022 = getelementptr inbounds float* %tmp1021, i64 1
+  %tmp1023 = getelementptr inbounds float* %tmp1022, i64 1
+  %tmp1024 = getelementptr inbounds float* %tmp1023, i64 1
+  %tmp1025 = getelementptr inbounds float* %tmp1024, i64 1
+  %tmp1026 = getelementptr inbounds float* %tmp1025, i64 1
+  %tmp1027 = getelementptr inbounds float* %tmp1026, i64 1
+  %tmp1028 = getelementptr inbounds float* %tmp1027, i64 1
+  %tmp1029 = getelementptr inbounds float* %tmp1028, i64 1
+  %tmp1030 = getelementptr inbounds float* %tmp1029, i64 1
+  %tmp1031 = getelementptr inbounds float* %tmp1030, i64 1
+  %tmp1032 = getelementptr inbounds float* %tmp1031, i64 1
+  %tmp1033 = getelementptr inbounds float* %tmp1032, i64 1
+  %tmp1034 = getelementptr inbounds float* %tmp1033, i64 1
+  %tmp1035 = getelementptr inbounds float* %tmp1034, i64 1
+  %tmp1036 = getelementptr inbounds float* %tmp1035, i64 1
+  %tmp1037 = getelementptr inbounds float* %tmp1036, i64 1
+  %tmp1038 = getelementptr inbounds float* %tmp1037, i64 1
+  %tmp1039 = getelementptr inbounds float* %tmp1038, i64 1
+  %tmp1040 = getelementptr inbounds float* %tmp1039, i64 1
+  %tmp1041 = getelementptr inbounds float* %tmp1040, i64 1
+  %tmp1042 = getelementptr inbounds float* %tmp1041, i64 1
+  %tmp1043 = getelementptr inbounds float* %tmp1042, i64 1
+  %tmp1044 = getelementptr inbounds float* %tmp1043, i64 1
+  %tmp1045 = getelementptr inbounds float* %tmp1044, i64 1
+  %tmp1046 = getelementptr inbounds float* %tmp1045, i64 1
+  %tmp1047 = getelementptr inbounds float* %tmp1046, i64 1
+  %tmp1048 = getelementptr inbounds float* %tmp1047, i64 1
+  %tmp1049 = getelementptr inbounds float* %tmp1048, i64 1
+  %tmp1050 = getelementptr inbounds float* %tmp1049, i64 1
+  %tmp1051 = getelementptr inbounds float* %tmp1050, i64 1
+  %tmp1052 = getelementptr inbounds float* %tmp1051, i64 1
+  %tmp1053 = getelementptr inbounds float* %tmp1052, i64 1
+  %tmp1054 = getelementptr inbounds float* %tmp1053, i64 1
+  %tmp1055 = getelementptr inbounds float* %tmp1054, i64 1
+  %tmp1056 = getelementptr inbounds float* %tmp1055, i64 1
+  %tmp1057 = getelementptr inbounds float* %tmp1056, i64 1
+  %tmp1058 = getelementptr inbounds float* %tmp1057, i64 1
+  %tmp1059 = getelementptr inbounds float* %tmp1058, i64 1
+  %tmp1060 = getelementptr inbounds float* %tmp1059, i64 1
+  %tmp1061 = getelementptr inbounds float* %tmp1060, i64 1
+  %tmp1062 = getelementptr inbounds float* %tmp1061, i64 1
+  %tmp1063 = getelementptr inbounds float* %tmp1062, i64 1
+  %tmp1064 = getelementptr inbounds float* %tmp1063, i64 1
+  %tmp1065 = getelementptr inbounds float* %tmp1064, i64 1
+  %tmp1066 = getelementptr inbounds float* %tmp1065, i64 1
+  %tmp1067 = getelementptr inbounds float* %tmp1066, i64 1
+  %tmp1068 = getelementptr inbounds float* %tmp1067, i64 1
+  %tmp1069 = getelementptr inbounds float* %tmp1068, i64 1
+  %tmp1070 = getelementptr inbounds float* %tmp1069, i64 1
+  %tmp1071 = getelementptr inbounds float* %tmp1070, i64 1
+  %tmp1072 = getelementptr inbounds float* %tmp1071, i64 1
+  %tmp1073 = getelementptr inbounds float* %tmp1072, i64 1
+  %tmp1074 = getelementptr inbounds float* %tmp1073, i64 1
+  %tmp1075 = getelementptr inbounds float* %tmp1074, i64 1
+  %tmp1076 = getelementptr inbounds float* %tmp1075, i64 1
+  %tmp1077 = getelementptr inbounds float* %tmp1076, i64 1
+  %tmp1078 = getelementptr inbounds float* %tmp1077, i64 1
+  %tmp1079 = getelementptr inbounds float* %tmp1078, i64 1
+  %tmp1080 = getelementptr inbounds float* %tmp1079, i64 1
+  %tmp1081 = getelementptr inbounds float* %tmp1080, i64 1
+  %tmp1082 = getelementptr inbounds float* %tmp1081, i64 1
+  %tmp1083 = getelementptr inbounds float* %tmp1082, i64 1
+  %tmp1084 = getelementptr inbounds float* %tmp1083, i64 1
+  %tmp1085 = getelementptr inbounds float* %tmp1084, i64 1
+  %tmp1086 = getelementptr inbounds float* %tmp1085, i64 1
+  %tmp1087 = getelementptr inbounds float* %tmp1086, i64 1
+  %tmp1088 = getelementptr inbounds float* %tmp1087, i64 1
+  %tmp1089 = getelementptr inbounds float* %tmp1088, i64 1
+  %tmp1090 = getelementptr inbounds float* %tmp1089, i64 1
+  %tmp1091 = getelementptr inbounds float* %tmp1090, i64 1
+  %tmp1092 = getelementptr inbounds float* %tmp1091, i64 1
+  %tmp1093 = getelementptr inbounds float* %tmp1092, i64 1
+  %tmp1094 = getelementptr inbounds float* %tmp1093, i64 1
+  %tmp1095 = getelementptr inbounds float* %tmp1094, i64 1
+  %tmp1096 = getelementptr inbounds float* %tmp1095, i64 1
+  %tmp1097 = getelementptr inbounds float* %tmp1096, i64 1
+  %tmp1098 = getelementptr inbounds float* %tmp1097, i64 1
+  %tmp1099 = getelementptr inbounds float* %tmp1098, i64 1
+  %tmp1100 = getelementptr inbounds float* %tmp1099, i64 1
+  %tmp1101 = getelementptr inbounds float* %tmp1100, i64 1
+  %tmp1102 = getelementptr inbounds float* %tmp1101, i64 1
+  %tmp1103 = getelementptr inbounds float* %tmp1102, i64 1
+  %tmp1104 = getelementptr inbounds float* %tmp1103, i64 1
+  %tmp1105 = getelementptr inbounds float* %tmp1104, i64 1
+  %tmp1106 = getelementptr inbounds float* %tmp1105, i64 1
+  %tmp1107 = getelementptr inbounds float* %tmp1106, i64 1
+  %tmp1108 = getelementptr inbounds float* %tmp1107, i64 1
+  %tmp1109 = getelementptr inbounds float* %tmp1108, i64 1
+  %tmp1110 = getelementptr inbounds float* %tmp1109, i64 1
+  %tmp1111 = getelementptr inbounds float* %tmp1110, i64 1
+  %tmp1112 = getelementptr inbounds float* %tmp1111, i64 1
+  %tmp1113 = getelementptr inbounds float* %tmp1112, i64 1
+  %tmp1114 = getelementptr inbounds float* %tmp1113, i64 1
+  %tmp1115 = getelementptr inbounds float* %tmp1114, i64 1
+  %tmp1116 = getelementptr inbounds float* %tmp1115, i64 1
+  %tmp1117 = getelementptr inbounds float* %tmp1116, i64 1
+  %tmp1118 = getelementptr inbounds float* %tmp1117, i64 1
+  %tmp1119 = getelementptr inbounds float* %tmp1118, i64 1
+  %tmp1120 = getelementptr inbounds float* %tmp1119, i64 1
+  %tmp1121 = getelementptr inbounds float* %tmp1120, i64 1
+  %tmp1122 = getelementptr inbounds float* %tmp1121, i64 1
+  %tmp1123 = getelementptr inbounds float* %tmp1122, i64 1
+  %tmp1124 = getelementptr inbounds float* %tmp1123, i64 1
+  %tmp1125 = getelementptr inbounds float* %tmp1124, i64 1
+  %tmp1126 = getelementptr inbounds float* %tmp1125, i64 1
+  %tmp1127 = getelementptr inbounds float* %tmp1126, i64 1
+  %tmp1128 = getelementptr inbounds float* %tmp1127, i64 1
+  %tmp1129 = getelementptr inbounds float* %tmp1128, i64 1
+  %tmp1130 = getelementptr inbounds float* %tmp1129, i64 1
+  %tmp1131 = getelementptr inbounds float* %tmp1130, i64 1
+  %tmp1132 = getelementptr inbounds float* %tmp1131, i64 1
+  %tmp1133 = getelementptr inbounds float* %tmp1132, i64 1
+  %tmp1134 = getelementptr inbounds float* %tmp1133, i64 1
+  %tmp1135 = getelementptr inbounds float* %tmp1134, i64 1
+  %tmp1136 = getelementptr inbounds float* %tmp1135, i64 1
+  %tmp1137 = getelementptr inbounds float* %tmp1136, i64 1
+  %tmp1138 = getelementptr inbounds float* %tmp1137, i64 1
+  %tmp1139 = getelementptr inbounds float* %tmp1138, i64 1
+  %tmp1140 = getelementptr inbounds float* %tmp1139, i64 1
+  %tmp1141 = getelementptr inbounds float* %tmp1140, i64 1
+  %tmp1142 = getelementptr inbounds float* %tmp1141, i64 1
+  %tmp1143 = getelementptr inbounds float* %tmp1142, i64 1
+  %tmp1144 = getelementptr inbounds float* %tmp1143, i64 1
+  %tmp1145 = getelementptr inbounds float* %tmp1144, i64 1
+  %tmp1146 = getelementptr inbounds float* %tmp1145, i64 1
+  %tmp1147 = getelementptr inbounds float* %tmp1146, i64 1
+  %tmp1148 = getelementptr inbounds float* %tmp1147, i64 1
+  %tmp1149 = getelementptr inbounds float* %tmp1148, i64 1
+  %tmp1150 = getelementptr inbounds float* %tmp1149, i64 1
+  %tmp1151 = getelementptr inbounds float* %tmp1150, i64 1
+  %tmp1152 = getelementptr inbounds float* %tmp1151, i64 1
+  %tmp1153 = getelementptr inbounds float* %tmp1152, i64 1
+  %tmp1154 = getelementptr inbounds float* %tmp1153, i64 1
+  %tmp1155 = getelementptr inbounds float* %tmp1154, i64 1
+  %tmp1156 = getelementptr inbounds float* %tmp1155, i64 1
+  %tmp1157 = getelementptr inbounds float* %tmp1156, i64 1
+  %tmp1158 = getelementptr inbounds float* %tmp1157, i64 1
+  %tmp1159 = getelementptr inbounds float* %tmp1158, i64 1
+  %tmp1160 = getelementptr inbounds float* %tmp1159, i64 1
+  %tmp1161 = getelementptr inbounds float* %tmp1160, i64 1
+  %tmp1162 = getelementptr inbounds float* %tmp1161, i64 1
+  %tmp1163 = getelementptr inbounds float* %tmp1162, i64 1
+  %tmp1164 = getelementptr inbounds float* %tmp1163, i64 1
+  %tmp1165 = getelementptr inbounds float* %tmp1164, i64 1
+  %tmp1166 = getelementptr inbounds float* %tmp1165, i64 1
+  %tmp1167 = getelementptr inbounds float* %tmp1166, i64 1
+  %tmp1168 = getelementptr inbounds float* %tmp1167, i64 1
+  %tmp1169 = getelementptr inbounds float* %tmp1168, i64 1
+  %tmp1170 = getelementptr inbounds float* %tmp1169, i64 1
+  %tmp1171 = getelementptr inbounds float* %tmp1170, i64 1
+  %tmp1172 = getelementptr inbounds float* %tmp1171, i64 1
+  %tmp1173 = getelementptr inbounds float* %tmp1172, i64 1
+  %tmp1174 = getelementptr inbounds float* %tmp1173, i64 1
+  %tmp1175 = getelementptr inbounds float* %tmp1174, i64 1
+  %tmp1176 = getelementptr inbounds float* %tmp1175, i64 1
+  %tmp1177 = getelementptr inbounds float* %tmp1176, i64 1
+  %tmp1178 = getelementptr inbounds float* %tmp1177, i64 1
+  %tmp1179 = getelementptr inbounds float* %tmp1178, i64 1
+  %tmp1180 = getelementptr inbounds float* %tmp1179, i64 1
+  %tmp1181 = getelementptr inbounds float* %tmp1180, i64 1
+  %tmp1182 = getelementptr inbounds float* %tmp1181, i64 1
+  %tmp1183 = getelementptr inbounds float* %tmp1182, i64 1
+  %tmp1184 = getelementptr inbounds float* %tmp1183, i64 1
+  %tmp1185 = getelementptr inbounds float* %tmp1184, i64 1
+  %tmp1186 = getelementptr inbounds float* %tmp1185, i64 1
+  %tmp1187 = getelementptr inbounds float* %tmp1186, i64 1
+  %tmp1188 = getelementptr inbounds float* %tmp1187, i64 1
+  %tmp1189 = getelementptr inbounds float* %tmp1188, i64 1
+  %tmp1190 = getelementptr inbounds float* %tmp1189, i64 1
+  %tmp1191 = getelementptr inbounds float* %tmp1190, i64 1
+  %tmp1192 = getelementptr inbounds float* %tmp1191, i64 1
+  %tmp1193 = getelementptr inbounds float* %tmp1192, i64 1
+  %tmp1194 = getelementptr inbounds float* %tmp1193, i64 1
+  %tmp1195 = getelementptr inbounds float* %tmp1194, i64 1
+  %tmp1196 = getelementptr inbounds float* %tmp1195, i64 1
+  %tmp1197 = getelementptr inbounds float* %tmp1196, i64 1
+  %tmp1198 = getelementptr inbounds float* %tmp1197, i64 1
+  %tmp1199 = getelementptr inbounds float* %tmp1198, i64 1
+  %tmp1200 = getelementptr inbounds float* %tmp1199, i64 1
+  %tmp1201 = getelementptr inbounds float* %tmp1200, i64 1
+  %tmp1202 = getelementptr inbounds float* %tmp1201, i64 1
+  %tmp1203 = getelementptr inbounds float* %tmp1202, i64 1
+  %tmp1204 = getelementptr inbounds float* %tmp1203, i64 1
+  %tmp1205 = getelementptr inbounds float* %tmp1204, i64 1
+  %tmp1206 = getelementptr inbounds float* %tmp1205, i64 1
+  %tmp1207 = getelementptr inbounds float* %tmp1206, i64 1
+  %tmp1208 = getelementptr inbounds float* %tmp1207, i64 1
+  %tmp1209 = getelementptr inbounds float* %tmp1208, i64 1
+  %tmp1210 = getelementptr inbounds float* %tmp1209, i64 1
+  %tmp1211 = getelementptr inbounds float* %tmp1210, i64 1
+  %tmp1212 = getelementptr inbounds float* %tmp1211, i64 1
+  %tmp1213 = getelementptr inbounds float* %tmp1212, i64 1
+  %tmp1214 = getelementptr inbounds float* %tmp1213, i64 1
+  %tmp1215 = getelementptr inbounds float* %tmp1214, i64 1
+  %tmp1216 = getelementptr inbounds float* %tmp1215, i64 1
+  %tmp1217 = getelementptr inbounds float* %tmp1216, i64 1
+  %tmp1218 = getelementptr inbounds float* %tmp1217, i64 1
+  %tmp1219 = getelementptr inbounds float* %tmp1218, i64 1
+  %tmp1220 = getelementptr inbounds float* %tmp1219, i64 1
+  %tmp1221 = getelementptr inbounds float* %tmp1220, i64 1
+  %tmp1222 = getelementptr inbounds float* %tmp1221, i64 1
+  %tmp1223 = getelementptr inbounds float* %tmp1222, i64 1
+  %tmp1224 = getelementptr inbounds float* %tmp1223, i64 1
+  %tmp1225 = getelementptr inbounds float* %tmp1224, i64 1
+  %tmp1226 = getelementptr inbounds float* %tmp1225, i64 1
+  %tmp1227 = getelementptr inbounds float* %tmp1226, i64 1
+  %tmp1228 = getelementptr inbounds float* %tmp1227, i64 1
+  %tmp1229 = getelementptr inbounds float* %tmp1228, i64 1
+  %tmp1230 = getelementptr inbounds float* %tmp1229, i64 1
+  %tmp1231 = getelementptr inbounds float* %tmp1230, i64 1
+  %tmp1232 = getelementptr inbounds float* %tmp1231, i64 1
+  %tmp1233 = getelementptr inbounds float* %tmp1232, i64 1
+  %tmp1234 = getelementptr inbounds float* %tmp1233, i64 1
+  %tmp1235 = getelementptr inbounds float* %tmp1234, i64 1
+  %tmp1236 = getelementptr inbounds float* %tmp1235, i64 1
+  %tmp1237 = getelementptr inbounds float* %tmp1236, i64 1
+  %tmp1238 = getelementptr inbounds float* %tmp1237, i64 1
+  %tmp1239 = getelementptr inbounds float* %tmp1238, i64 1
+  %tmp1240 = getelementptr inbounds float* %tmp1239, i64 1
+  %tmp1241 = getelementptr inbounds float* %tmp1240, i64 1
+  %tmp1242 = getelementptr inbounds float* %tmp1241, i64 1
+  %tmp1243 = getelementptr inbounds float* %tmp1242, i64 1
+  %tmp1244 = getelementptr inbounds float* %tmp1243, i64 1
+  %tmp1245 = getelementptr inbounds float* %tmp1244, i64 1
+  %tmp1246 = getelementptr inbounds float* %tmp1245, i64 1
+  %tmp1247 = getelementptr inbounds float* %tmp1246, i64 1
+  %tmp1248 = getelementptr inbounds float* %tmp1247, i64 1
+  %tmp1249 = getelementptr inbounds float* %tmp1248, i64 1
+  %tmp1250 = getelementptr inbounds float* %tmp1249, i64 1
+  %tmp1251 = getelementptr inbounds float* %tmp1250, i64 1
+  %tmp1252 = getelementptr inbounds float* %tmp1251, i64 1
+  %tmp1253 = getelementptr inbounds float* %tmp1252, i64 1
+  %tmp1254 = getelementptr inbounds float* %tmp1253, i64 1
+  %tmp1255 = getelementptr inbounds float* %tmp1254, i64 1
+  %tmp1256 = getelementptr inbounds float* %tmp1255, i64 1
+  %tmp1257 = getelementptr inbounds float* %tmp1256, i64 1
+  %tmp1258 = getelementptr inbounds float* %tmp1257, i64 1
+  %tmp1259 = getelementptr inbounds float* %tmp1258, i64 1
+  %tmp1260 = getelementptr inbounds float* %tmp1259, i64 1
+  %tmp1261 = getelementptr inbounds float* %tmp1260, i64 1
+  %tmp1262 = getelementptr inbounds float* %tmp1261, i64 1
+  %tmp1263 = getelementptr inbounds float* %tmp1262, i64 1
+  %tmp1264 = getelementptr inbounds float* %tmp1263, i64 1
+  %tmp1265 = getelementptr inbounds float* %tmp1264, i64 1
+  %tmp1266 = getelementptr inbounds float* %tmp1265, i64 1
+  %tmp1267 = getelementptr inbounds float* %tmp1266, i64 1
+  %tmp1268 = getelementptr inbounds float* %tmp1267, i64 1
+  %tmp1269 = getelementptr inbounds float* %tmp1268, i64 1
+  %tmp1270 = getelementptr inbounds float* %tmp1269, i64 1
+  %tmp1271 = getelementptr inbounds float* %tmp1270, i64 1
+  %tmp1272 = getelementptr inbounds float* %tmp1271, i64 1
+  %tmp1273 = getelementptr inbounds float* %tmp1272, i64 1
+  %tmp1274 = getelementptr inbounds float* %tmp1273, i64 1
+  %tmp1275 = getelementptr inbounds float* %tmp1274, i64 1
+  %tmp1276 = getelementptr inbounds float* %tmp1275, i64 1
+  %tmp1277 = getelementptr inbounds float* %tmp1276, i64 1
+  %tmp1278 = getelementptr inbounds float* %tmp1277, i64 1
+  %tmp1279 = getelementptr inbounds float* %tmp1278, i64 1
+  %tmp1280 = getelementptr inbounds float* %tmp1279, i64 1
+  %tmp1281 = getelementptr inbounds float* %tmp1280, i64 1
+  %tmp1282 = getelementptr inbounds float* %tmp1281, i64 1
+  %tmp1283 = getelementptr inbounds float* %tmp1282, i64 1
+  %tmp1284 = getelementptr inbounds float* %tmp1283, i64 1
+  %tmp1285 = getelementptr inbounds float* %tmp1284, i64 1
+  %tmp1286 = getelementptr inbounds float* %tmp1285, i64 1
+  %tmp1287 = getelementptr inbounds float* %tmp1286, i64 1
+  %tmp1288 = getelementptr inbounds float* %tmp1287, i64 1
+  %tmp1289 = getelementptr inbounds float* %tmp1288, i64 1
+  %tmp1290 = getelementptr inbounds float* %tmp1289, i64 1
+  %tmp1291 = getelementptr inbounds float* %tmp1290, i64 1
+  %tmp1292 = getelementptr inbounds float* %tmp1291, i64 1
+  %tmp1293 = getelementptr inbounds float* %tmp1292, i64 1
+  %tmp1294 = getelementptr inbounds float* %tmp1293, i64 1
+  %tmp1295 = getelementptr inbounds float* %tmp1294, i64 1
+  %tmp1296 = getelementptr inbounds float* %tmp1295, i64 1
+  %tmp1297 = getelementptr inbounds float* %tmp1296, i64 1
+  %tmp1298 = getelementptr inbounds float* %tmp1297, i64 1
+  %tmp1299 = getelementptr inbounds float* %tmp1298, i64 1
+  %tmp1300 = getelementptr inbounds float* %tmp1299, i64 1
+  %tmp1301 = getelementptr inbounds float* %tmp1300, i64 1
+  %tmp1302 = getelementptr inbounds float* %tmp1301, i64 1
+  %tmp1303 = getelementptr inbounds float* %tmp1302, i64 1
+  %tmp1304 = getelementptr inbounds float* %tmp1303, i64 1
+  %tmp1305 = getelementptr inbounds float* %tmp1304, i64 1
+  %tmp1306 = getelementptr inbounds float* %tmp1305, i64 1
+  %tmp1307 = getelementptr inbounds float* %tmp1306, i64 1
+  %tmp1308 = getelementptr inbounds float* %tmp1307, i64 1
+  %tmp1309 = getelementptr inbounds float* %tmp1308, i64 1
+  %tmp1310 = getelementptr inbounds float* %tmp1309, i64 1
+  %tmp1311 = getelementptr inbounds float* %tmp1310, i64 1
+  %tmp1312 = getelementptr inbounds float* %tmp1311, i64 1
+  %tmp1313 = getelementptr inbounds float* %tmp1312, i64 1
+  %tmp1314 = getelementptr inbounds float* %tmp1313, i64 1
+  %tmp1315 = getelementptr inbounds float* %tmp1314, i64 1
+  %tmp1316 = getelementptr inbounds float* %tmp1315, i64 1
+  %tmp1317 = getelementptr inbounds float* %tmp1316, i64 1
+  %tmp1318 = getelementptr inbounds float* %tmp1317, i64 1
+  %tmp1319 = getelementptr inbounds float* %tmp1318, i64 1
+  %tmp1320 = getelementptr inbounds float* %tmp1319, i64 1
+  %tmp1321 = getelementptr inbounds float* %tmp1320, i64 1
+  %tmp1322 = getelementptr inbounds float* %tmp1321, i64 1
+  %tmp1323 = getelementptr inbounds float* %tmp1322, i64 1
+  %tmp1324 = getelementptr inbounds float* %tmp1323, i64 1
+  %tmp1325 = getelementptr inbounds float* %tmp1324, i64 1
+  %tmp1326 = getelementptr inbounds float* %tmp1325, i64 1
+  %tmp1327 = getelementptr inbounds float* %tmp1326, i64 1
+  %tmp1328 = getelementptr inbounds float* %tmp1327, i64 1
+  %tmp1329 = getelementptr inbounds float* %tmp1328, i64 1
+  %tmp1330 = getelementptr inbounds float* %tmp1329, i64 1
+  %tmp1331 = getelementptr inbounds float* %tmp1330, i64 1
+  %tmp1332 = getelementptr inbounds float* %tmp1331, i64 1
+  %tmp1333 = getelementptr inbounds float* %tmp1332, i64 1
+  %tmp1334 = getelementptr inbounds float* %tmp1333, i64 1
+  %tmp1335 = getelementptr inbounds float* %tmp1334, i64 1
+  %tmp1336 = getelementptr inbounds float* %tmp1335, i64 1
+  %tmp1337 = getelementptr inbounds float* %tmp1336, i64 1
+  %tmp1338 = getelementptr inbounds float* %tmp1337, i64 1
+  %tmp1339 = getelementptr inbounds float* %tmp1338, i64 1
+  %tmp1340 = getelementptr inbounds float* %tmp1339, i64 1
+  %tmp1341 = getelementptr inbounds float* %tmp1340, i64 1
+  %tmp1342 = getelementptr inbounds float* %tmp1341, i64 1
+  %tmp1343 = getelementptr inbounds float* %tmp1342, i64 1
+  %tmp1344 = getelementptr inbounds float* %tmp1343, i64 1
+  %tmp1345 = getelementptr inbounds float* %tmp1344, i64 1
+  %tmp1346 = getelementptr inbounds float* %tmp1345, i64 1
+  %tmp1347 = getelementptr inbounds float* %tmp1346, i64 1
+  %tmp1348 = getelementptr inbounds float* %tmp1347, i64 1
+  %tmp1349 = getelementptr inbounds float* %tmp1348, i64 1
+  %tmp1350 = getelementptr inbounds float* %tmp1349, i64 1
+  %tmp1351 = getelementptr inbounds float* %tmp1350, i64 1
+  %tmp1352 = getelementptr inbounds float* %tmp1351, i64 1
+  %tmp1353 = getelementptr inbounds float* %tmp1352, i64 1
+  %tmp1354 = getelementptr inbounds float* %tmp1353, i64 1
+  %tmp1355 = getelementptr inbounds float* %tmp1354, i64 1
+  %tmp1356 = getelementptr inbounds float* %tmp1355, i64 1
+  %tmp1357 = getelementptr inbounds float* %tmp1356, i64 1
+  %tmp1358 = getelementptr inbounds float* %tmp1357, i64 1
+  %tmp1359 = getelementptr inbounds float* %tmp1358, i64 1
+  %tmp1360 = getelementptr inbounds float* %tmp1359, i64 1
+  %tmp1361 = getelementptr inbounds float* %tmp1360, i64 1
+  %tmp1362 = getelementptr inbounds float* %tmp1361, i64 1
+  %tmp1363 = getelementptr inbounds float* %tmp1362, i64 1
+  %tmp1364 = getelementptr inbounds float* %tmp1363, i64 1
+  %tmp1365 = getelementptr inbounds float* %tmp1364, i64 1
+  %tmp1366 = getelementptr inbounds float* %tmp1365, i64 1
+  %tmp1367 = getelementptr inbounds float* %tmp1366, i64 1
+  %tmp1368 = getelementptr inbounds float* %tmp1367, i64 1
+  %tmp1369 = getelementptr inbounds float* %tmp1368, i64 1
+  %tmp1370 = getelementptr inbounds float* %tmp1369, i64 1
+  %tmp1371 = getelementptr inbounds float* %tmp1370, i64 1
+  %tmp1372 = getelementptr inbounds float* %tmp1371, i64 1
+  %tmp1373 = getelementptr inbounds float* %tmp1372, i64 1
+  %tmp1374 = getelementptr inbounds float* %tmp1373, i64 1
+  %tmp1375 = getelementptr inbounds float* %tmp1374, i64 1
+  %tmp1376 = getelementptr inbounds float* %tmp1375, i64 1
+  %tmp1377 = getelementptr inbounds float* %tmp1376, i64 1
+  %tmp1378 = getelementptr inbounds float* %tmp1377, i64 1
+  %tmp1379 = getelementptr inbounds float* %tmp1378, i64 1
+  %tmp1380 = getelementptr inbounds float* %tmp1379, i64 1
+  %tmp1381 = getelementptr inbounds float* %tmp1380, i64 1
+  %tmp1382 = getelementptr inbounds float* %tmp1381, i64 1
+  %tmp1383 = getelementptr inbounds float* %tmp1382, i64 1
+  %tmp1384 = getelementptr inbounds float* %tmp1383, i64 1
+  %tmp1385 = getelementptr inbounds float* %tmp1384, i64 1
+  %tmp1386 = getelementptr inbounds float* %tmp1385, i64 1
+  %tmp1387 = getelementptr inbounds float* %tmp1386, i64 1
+  %tmp1388 = getelementptr inbounds float* %tmp1387, i64 1
+  %tmp1389 = getelementptr inbounds float* %tmp1388, i64 1
+  %tmp1390 = getelementptr inbounds float* %tmp1389, i64 1
+  %tmp1391 = getelementptr inbounds float* %tmp1390, i64 1
+  %tmp1392 = getelementptr inbounds float* %tmp1391, i64 1
+  %tmp1393 = getelementptr inbounds float* %tmp1392, i64 1
+  %tmp1394 = getelementptr inbounds float* %tmp1393, i64 1
+  %tmp1395 = getelementptr inbounds float* %tmp1394, i64 1
+  %tmp1396 = getelementptr inbounds float* %tmp1395, i64 1
+  %tmp1397 = getelementptr inbounds float* %tmp1396, i64 1
+  %tmp1398 = getelementptr inbounds float* %tmp1397, i64 1
+  %tmp1399 = getelementptr inbounds float* %tmp1398, i64 1
+  %tmp1400 = getelementptr inbounds float* %tmp1399, i64 1
+  %tmp1401 = getelementptr inbounds float* %tmp1400, i64 1
+  %tmp1402 = getelementptr inbounds float* %tmp1401, i64 1
+  %tmp1403 = getelementptr inbounds float* %tmp1402, i64 1
+  %tmp1404 = getelementptr inbounds float* %tmp1403, i64 1
+  %tmp1405 = getelementptr inbounds float* %tmp1404, i64 1
+  %tmp1406 = getelementptr inbounds float* %tmp1405, i64 1
+  %tmp1407 = getelementptr inbounds float* %tmp1406, i64 1
+  %tmp1408 = getelementptr inbounds float* %tmp1407, i64 1
+  %tmp1409 = getelementptr inbounds float* %tmp1408, i64 1
+  %tmp1410 = getelementptr inbounds float* %tmp1409, i64 1
+  %tmp1411 = getelementptr inbounds float* %tmp1410, i64 1
+  %tmp1412 = getelementptr inbounds float* %tmp1411, i64 1
+  %tmp1413 = getelementptr inbounds float* %tmp1412, i64 1
+  %tmp1414 = getelementptr inbounds float* %tmp1413, i64 1
+  %tmp1415 = getelementptr inbounds float* %tmp1414, i64 1
+  %tmp1416 = getelementptr inbounds float* %tmp1415, i64 1
+  %tmp1417 = getelementptr inbounds float* %tmp1416, i64 1
+  %tmp1418 = getelementptr inbounds float* %tmp1417, i64 1
+  %tmp1419 = getelementptr inbounds float* %tmp1418, i64 1
+  %tmp1420 = getelementptr inbounds float* %tmp1419, i64 1
+  %tmp1421 = getelementptr inbounds float* %tmp1420, i64 1
+  %tmp1422 = getelementptr inbounds float* %tmp1421, i64 1
+  %tmp1423 = getelementptr inbounds float* %tmp1422, i64 1
+  %tmp1424 = getelementptr inbounds float* %tmp1423, i64 1
+  %tmp1425 = getelementptr inbounds float* %tmp1424, i64 1
+  %tmp1426 = getelementptr inbounds float* %tmp1425, i64 1
+  %tmp1427 = getelementptr inbounds float* %tmp1426, i64 1
+  %tmp1428 = getelementptr inbounds float* %tmp1427, i64 1
+  %tmp1429 = getelementptr inbounds float* %tmp1428, i64 1
+  %tmp1430 = getelementptr inbounds float* %tmp1429, i64 1
+  %tmp1431 = getelementptr inbounds float* %tmp1430, i64 1
+  %tmp1432 = getelementptr inbounds float* %tmp1431, i64 1
+  %tmp1433 = getelementptr inbounds float* %tmp1432, i64 1
+  %tmp1434 = getelementptr inbounds float* %tmp1433, i64 1
+  %tmp1435 = getelementptr inbounds float* %tmp1434, i64 1
+  %tmp1436 = getelementptr inbounds float* %tmp1435, i64 1
+  %tmp1437 = getelementptr inbounds float* %tmp1436, i64 1
+  %tmp1438 = getelementptr inbounds float* %tmp1437, i64 1
+  %tmp1439 = getelementptr inbounds float* %tmp1438, i64 1
+  %tmp1440 = getelementptr inbounds float* %tmp1439, i64 1
+  %tmp1441 = getelementptr inbounds float* %tmp1440, i64 1
+  %tmp1442 = getelementptr inbounds float* %tmp1441, i64 1
+  %tmp1443 = getelementptr inbounds float* %tmp1442, i64 1
+  %tmp1444 = getelementptr inbounds float* %tmp1443, i64 1
+  %tmp1445 = getelementptr inbounds float* %tmp1444, i64 1
+  %tmp1446 = getelementptr inbounds float* %tmp1445, i64 1
+  %tmp1447 = getelementptr inbounds float* %tmp1446, i64 1
+  %tmp1448 = getelementptr inbounds float* %tmp1447, i64 1
+  %tmp1449 = getelementptr inbounds float* %tmp1448, i64 1
+  %tmp1450 = getelementptr inbounds float* %tmp1449, i64 1
+  %tmp1451 = getelementptr inbounds float* %tmp1450, i64 1
+  %tmp1452 = getelementptr inbounds float* %tmp1451, i64 1
+  %tmp1453 = getelementptr inbounds float* %tmp1452, i64 1
+  %tmp1454 = getelementptr inbounds float* %tmp1453, i64 1
+  %tmp1455 = getelementptr inbounds float* %tmp1454, i64 1
+  %tmp1456 = getelementptr inbounds float* %tmp1455, i64 1
+  %tmp1457 = getelementptr inbounds float* %tmp1456, i64 1
+  %tmp1458 = getelementptr inbounds float* %tmp1457, i64 1
+  %tmp1459 = getelementptr inbounds float* %tmp1458, i64 1
+  %tmp1460 = getelementptr inbounds float* %tmp1459, i64 1
+  %tmp1461 = getelementptr inbounds float* %tmp1460, i64 1
+  %tmp1462 = getelementptr inbounds float* %tmp1461, i64 1
+  %tmp1463 = getelementptr inbounds float* %tmp1462, i64 1
+  %tmp1464 = getelementptr inbounds float* %tmp1463, i64 1
+  %tmp1465 = getelementptr inbounds float* %tmp1464, i64 1
+  %tmp1466 = getelementptr inbounds float* %tmp1465, i64 1
+  %tmp1467 = getelementptr inbounds float* %tmp1466, i64 1
+  %tmp1468 = getelementptr inbounds float* %tmp1467, i64 1
+  %tmp1469 = getelementptr inbounds float* %tmp1468, i64 1
+  %tmp1470 = getelementptr inbounds float* %tmp1469, i64 1
+  %tmp1471 = getelementptr inbounds float* %tmp1470, i64 1
+  %tmp1472 = getelementptr inbounds float* %tmp1471, i64 1
+  %tmp1473 = getelementptr inbounds float* %tmp1472, i64 1
+  %tmp1474 = getelementptr inbounds float* %tmp1473, i64 1
+  %tmp1475 = getelementptr inbounds float* %tmp1474, i64 1
+  %tmp1476 = getelementptr inbounds float* %tmp1475, i64 1
+  %tmp1477 = getelementptr inbounds float* %tmp1476, i64 1
+  %tmp1478 = getelementptr inbounds float* %tmp1477, i64 1
+  %tmp1479 = getelementptr inbounds float* %tmp1478, i64 1
+  %tmp1480 = getelementptr inbounds float* %tmp1479, i64 1
+  %tmp1481 = getelementptr inbounds float* %tmp1480, i64 1
+  %tmp1482 = getelementptr inbounds float* %tmp1481, i64 1
+  %tmp1483 = getelementptr inbounds float* %tmp1482, i64 1
+  %tmp1484 = getelementptr inbounds float* %tmp1483, i64 1
+  %tmp1485 = getelementptr inbounds float* %tmp1484, i64 1
+  %tmp1486 = getelementptr inbounds float* %tmp1485, i64 1
+  %tmp1487 = getelementptr inbounds float* %tmp1486, i64 1
+  %tmp1488 = getelementptr inbounds float* %tmp1487, i64 1
+  %tmp1489 = getelementptr inbounds float* %tmp1488, i64 1
+  %tmp1490 = getelementptr inbounds float* %tmp1489, i64 1
+  %tmp1491 = getelementptr inbounds float* %tmp1490, i64 1
+  %tmp1492 = getelementptr inbounds float* %tmp1491, i64 1
+  %tmp1493 = getelementptr inbounds float* %tmp1492, i64 1
+  %tmp1494 = getelementptr inbounds float* %tmp1493, i64 1
+  %tmp1495 = getelementptr inbounds float* %tmp1494, i64 1
+  %tmp1496 = getelementptr inbounds float* %tmp1495, i64 1
+  %tmp1497 = getelementptr inbounds float* %tmp1496, i64 1
+  %tmp1498 = getelementptr inbounds float* %tmp1497, i64 1
+  %tmp1499 = getelementptr inbounds float* %tmp1498, i64 1
+  %tmp1500 = getelementptr inbounds float* %tmp1499, i64 1
+  %tmp1501 = getelementptr inbounds float* %tmp1500, i64 1
+  %tmp1502 = getelementptr inbounds float* %tmp1501, i64 1
+  %tmp1503 = getelementptr inbounds float* %tmp1502, i64 1
+  %tmp1504 = getelementptr inbounds float* %tmp1503, i64 1
+  %tmp1505 = getelementptr inbounds float* %tmp1504, i64 1
+  %tmp1506 = getelementptr inbounds float* %tmp1505, i64 1
+  %tmp1507 = getelementptr inbounds float* %tmp1506, i64 1
+  %tmp1508 = getelementptr inbounds float* %tmp1507, i64 1
+  %tmp1509 = getelementptr inbounds float* %tmp1508, i64 1
+  %tmp1510 = getelementptr inbounds float* %tmp1509, i64 1
+  %tmp1511 = getelementptr inbounds float* %tmp1510, i64 1
+  %tmp1512 = getelementptr inbounds float* %tmp1511, i64 1
+  %tmp1513 = getelementptr inbounds float* %tmp1512, i64 1
+  %tmp1514 = getelementptr inbounds float* %tmp1513, i64 1
+  %tmp1515 = getelementptr inbounds float* %tmp1514, i64 1
+  %tmp1516 = getelementptr inbounds float* %tmp1515, i64 1
+  %tmp1517 = getelementptr inbounds float* %tmp1516, i64 1
+  %tmp1518 = getelementptr inbounds float* %tmp1517, i64 1
+  %tmp1519 = getelementptr inbounds float* %tmp1518, i64 1
+  %tmp1520 = getelementptr inbounds float* %tmp1519, i64 1
+  %tmp1521 = getelementptr inbounds float* %tmp1520, i64 1
+  %tmp1522 = getelementptr inbounds float* %tmp1521, i64 1
+  %tmp1523 = getelementptr inbounds float* %tmp1522, i64 1
+  %tmp1524 = getelementptr inbounds float* %tmp1523, i64 1
+  %tmp1525 = getelementptr inbounds float* %tmp1524, i64 1
+  %tmp1526 = getelementptr inbounds float* %tmp1525, i64 1
+  %tmp1527 = getelementptr inbounds float* %tmp1526, i64 1
+  %tmp1528 = getelementptr inbounds float* %tmp1527, i64 1
+  %tmp1529 = getelementptr inbounds float* %tmp1528, i64 1
+  %tmp1530 = getelementptr inbounds float* %tmp1529, i64 1
+  %tmp1531 = getelementptr inbounds float* %tmp1530, i64 1
+  %tmp1532 = getelementptr inbounds float* %tmp1531, i64 1
+  %tmp1533 = getelementptr inbounds float* %tmp1532, i64 1
+  %tmp1534 = getelementptr inbounds float* %tmp1533, i64 1
+  %tmp1535 = getelementptr inbounds float* %tmp1534, i64 1
+  %tmp1536 = getelementptr inbounds float* %tmp1535, i64 1
+  %tmp1537 = getelementptr inbounds float* %tmp1536, i64 1
+  %tmp1538 = getelementptr inbounds float* %tmp1537, i64 1
+  %tmp1539 = getelementptr inbounds float* %tmp1538, i64 1
+  %tmp1540 = getelementptr inbounds float* %tmp1539, i64 1
+  %tmp1541 = getelementptr inbounds float* %tmp1540, i64 1
+  %tmp1542 = getelementptr inbounds float* %tmp1541, i64 1
+  %tmp1543 = getelementptr inbounds float* %tmp1542, i64 1
+  %tmp1544 = getelementptr inbounds float* %tmp1543, i64 1
+  %tmp1545 = getelementptr inbounds float* %tmp1544, i64 1
+  %tmp1546 = getelementptr inbounds float* %tmp1545, i64 1
+  %tmp1547 = getelementptr inbounds float* %tmp1546, i64 1
+  %tmp1548 = getelementptr inbounds float* %tmp1547, i64 1
+  %tmp1549 = getelementptr inbounds float* %tmp1548, i64 1
+  %tmp1550 = getelementptr inbounds float* %tmp1549, i64 1
+  %tmp1551 = getelementptr inbounds float* %tmp1550, i64 1
+  %tmp1552 = getelementptr inbounds float* %tmp1551, i64 1
+  %tmp1553 = getelementptr inbounds float* %tmp1552, i64 1
+  %tmp1554 = getelementptr inbounds float* %tmp1553, i64 1
+  %tmp1555 = getelementptr inbounds float* %tmp1554, i64 1
+  %tmp1556 = getelementptr inbounds float* %tmp1555, i64 1
+  %tmp1557 = getelementptr inbounds float* %tmp1556, i64 1
+  %tmp1558 = getelementptr inbounds float* %tmp1557, i64 1
+  %tmp1559 = getelementptr inbounds float* %tmp1558, i64 1
+  %tmp1560 = getelementptr inbounds float* %tmp1559, i64 1
+  %tmp1561 = getelementptr inbounds float* %tmp1560, i64 1
+  %tmp1562 = getelementptr inbounds float* %tmp1561, i64 1
+  %tmp1563 = getelementptr inbounds float* %tmp1562, i64 1
+  %tmp1564 = getelementptr inbounds float* %tmp1563, i64 1
+  %tmp1565 = getelementptr inbounds float* %tmp1564, i64 1
+  %tmp1566 = getelementptr inbounds float* %tmp1565, i64 1
+  %tmp1567 = getelementptr inbounds float* %tmp1566, i64 1
+  %tmp1568 = getelementptr inbounds float* %tmp1567, i64 1
+  %tmp1569 = getelementptr inbounds float* %tmp1568, i64 1
+  %tmp1570 = getelementptr inbounds float* %tmp1569, i64 1
+  %tmp1571 = getelementptr inbounds float* %tmp1570, i64 1
+  %tmp1572 = getelementptr inbounds float* %tmp1571, i64 1
+  %tmp1573 = getelementptr inbounds float* %tmp1572, i64 1
+  %tmp1574 = getelementptr inbounds float* %tmp1573, i64 1
+  %tmp1575 = getelementptr inbounds float* %tmp1574, i64 1
+  %tmp1576 = getelementptr inbounds float* %tmp1575, i64 1
+  %tmp1577 = getelementptr inbounds float* %tmp1576, i64 1
+  %tmp1578 = getelementptr inbounds float* %tmp1577, i64 1
+  %tmp1579 = getelementptr inbounds float* %tmp1578, i64 1
+  %tmp1580 = getelementptr inbounds float* %tmp1579, i64 1
+  %tmp1581 = getelementptr inbounds float* %tmp1580, i64 1
+  %tmp1582 = getelementptr inbounds float* %tmp1581, i64 1
+  %tmp1583 = getelementptr inbounds float* %tmp1582, i64 1
+  %tmp1584 = getelementptr inbounds float* %tmp1583, i64 1
+  %tmp1585 = getelementptr inbounds float* %tmp1584, i64 1
+  %tmp1586 = getelementptr inbounds float* %tmp1585, i64 1
+  %tmp1587 = getelementptr inbounds float* %tmp1586, i64 1
+  %tmp1588 = getelementptr inbounds float* %tmp1587, i64 1
+  %tmp1589 = getelementptr inbounds float* %tmp1588, i64 1
+  %tmp1590 = getelementptr inbounds float* %tmp1589, i64 1
+  %tmp1591 = getelementptr inbounds float* %tmp1590, i64 1
+  %tmp1592 = getelementptr inbounds float* %tmp1591, i64 1
+  %tmp1593 = getelementptr inbounds float* %tmp1592, i64 1
+  %tmp1594 = getelementptr inbounds float* %tmp1593, i64 1
+  %tmp1595 = getelementptr inbounds float* %tmp1594, i64 1
+  %tmp1596 = getelementptr inbounds float* %tmp1595, i64 1
+  %tmp1597 = getelementptr inbounds float* %tmp1596, i64 1
+  %tmp1598 = getelementptr inbounds float* %tmp1597, i64 1
+  %tmp1599 = getelementptr inbounds float* %tmp1598, i64 1
+  %tmp1600 = getelementptr inbounds float* %tmp1599, i64 1
+  %tmp1601 = getelementptr inbounds float* %tmp1600, i64 1
+  %tmp1602 = getelementptr inbounds float* %tmp1601, i64 1
+  %tmp1603 = getelementptr inbounds float* %tmp1602, i64 1
+  %tmp1604 = getelementptr inbounds float* %tmp1603, i64 1
+  %tmp1605 = getelementptr inbounds float* %tmp1604, i64 1
+  %tmp1606 = getelementptr inbounds float* %tmp1605, i64 1
+  %tmp1607 = getelementptr inbounds float* %tmp1606, i64 1
+  %tmp1608 = getelementptr inbounds float* %tmp1607, i64 1
+  %tmp1609 = getelementptr inbounds float* %tmp1608, i64 1
+  %tmp1610 = getelementptr inbounds float* %tmp1609, i64 1
+  %tmp1611 = getelementptr inbounds float* %tmp1610, i64 1
+  %tmp1612 = getelementptr inbounds float* %tmp1611, i64 1
+  %tmp1613 = getelementptr inbounds float* %tmp1612, i64 1
+  %tmp1614 = getelementptr inbounds float* %tmp1613, i64 1
+  %tmp1615 = getelementptr inbounds float* %tmp1614, i64 1
+  %tmp1616 = getelementptr inbounds float* %tmp1615, i64 1
+  %tmp1617 = getelementptr inbounds float* %tmp1616, i64 1
+  %tmp1618 = getelementptr inbounds float* %tmp1617, i64 1
+  %tmp1619 = getelementptr inbounds float* %tmp1618, i64 1
+  %tmp1620 = getelementptr inbounds float* %tmp1619, i64 1
+  %tmp1621 = getelementptr inbounds float* %tmp1620, i64 1
+  %tmp1622 = getelementptr inbounds float* %tmp1621, i64 1
+  %tmp1623 = getelementptr inbounds float* %tmp1622, i64 1
+  %tmp1624 = getelementptr inbounds float* %tmp1623, i64 1
+  %tmp1625 = getelementptr inbounds float* %tmp1624, i64 1
+  %tmp1626 = getelementptr inbounds float* %tmp1625, i64 1
+  %tmp1627 = getelementptr inbounds float* %tmp1626, i64 1
+  %tmp1628 = getelementptr inbounds float* %tmp1627, i64 1
+  %tmp1629 = getelementptr inbounds float* %tmp1628, i64 1
+  %tmp1630 = getelementptr inbounds float* %tmp1629, i64 1
+  %tmp1631 = getelementptr inbounds float* %tmp1630, i64 1
+  %tmp1632 = getelementptr inbounds float* %tmp1631, i64 1
+  %tmp1633 = getelementptr inbounds float* %tmp1632, i64 1
+  %tmp1634 = getelementptr inbounds float* %tmp1633, i64 1
+  %tmp1635 = getelementptr inbounds float* %tmp1634, i64 1
+  %tmp1636 = getelementptr inbounds float* %tmp1635, i64 1
+  %tmp1637 = getelementptr inbounds float* %tmp1636, i64 1
+  %tmp1638 = getelementptr inbounds float* %tmp1637, i64 1
+  %tmp1639 = getelementptr inbounds float* %tmp1638, i64 1
+  %tmp1640 = getelementptr inbounds float* %tmp1639, i64 1
+  %tmp1641 = getelementptr inbounds float* %tmp1640, i64 1
+  %tmp1642 = getelementptr inbounds float* %tmp1641, i64 1
+  %tmp1643 = getelementptr inbounds float* %tmp1642, i64 1
+  %tmp1644 = getelementptr inbounds float* %tmp1643, i64 1
+  %tmp1645 = getelementptr inbounds float* %tmp1644, i64 1
+  %tmp1646 = getelementptr inbounds float* %tmp1645, i64 1
+  %tmp1647 = getelementptr inbounds float* %tmp1646, i64 1
+  %tmp1648 = getelementptr inbounds float* %tmp1647, i64 1
+  %tmp1649 = getelementptr inbounds float* %tmp1648, i64 1
+  %tmp1650 = getelementptr inbounds float* %tmp1649, i64 1
+  %tmp1651 = getelementptr inbounds float* %tmp1650, i64 1
+  %tmp1652 = getelementptr inbounds float* %tmp1651, i64 1
+  %tmp1653 = getelementptr inbounds float* %tmp1652, i64 1
+  %tmp1654 = getelementptr inbounds float* %tmp1653, i64 1
+  %tmp1655 = getelementptr inbounds float* %tmp1654, i64 1
+  %tmp1656 = getelementptr inbounds float* %tmp1655, i64 1
+  %tmp1657 = getelementptr inbounds float* %tmp1656, i64 1
+  %tmp1658 = getelementptr inbounds float* %tmp1657, i64 1
+  %tmp1659 = getelementptr inbounds float* %tmp1658, i64 1
+  %tmp1660 = getelementptr inbounds float* %tmp1659, i64 1
+  %tmp1661 = getelementptr inbounds float* %tmp1660, i64 1
+  %tmp1662 = getelementptr inbounds float* %tmp1661, i64 1
+  %tmp1663 = getelementptr inbounds float* %tmp1662, i64 1
+  %tmp1664 = getelementptr inbounds float* %tmp1663, i64 1
+  %tmp1665 = getelementptr inbounds float* %tmp1664, i64 1
+  %tmp1666 = getelementptr inbounds float* %tmp1665, i64 1
+  %tmp1667 = getelementptr inbounds float* %tmp1666, i64 1
+  %tmp1668 = getelementptr inbounds float* %tmp1667, i64 1
+  %tmp1669 = getelementptr inbounds float* %tmp1668, i64 1
+  %tmp1670 = getelementptr inbounds float* %tmp1669, i64 1
+  %tmp1671 = getelementptr inbounds float* %tmp1670, i64 1
+  %tmp1672 = getelementptr inbounds float* %tmp1671, i64 1
+  %tmp1673 = getelementptr inbounds float* %tmp1672, i64 1
+  %tmp1674 = getelementptr inbounds float* %tmp1673, i64 1
+  %tmp1675 = getelementptr inbounds float* %tmp1674, i64 1
+  %tmp1676 = getelementptr inbounds float* %tmp1675, i64 1
+  %tmp1677 = getelementptr inbounds float* %tmp1676, i64 1
+  %tmp1678 = getelementptr inbounds float* %tmp1677, i64 1
+  %tmp1679 = getelementptr inbounds float* %tmp1678, i64 1
+  %tmp1680 = getelementptr inbounds float* %tmp1679, i64 1
+  %tmp1681 = getelementptr inbounds float* %tmp1680, i64 1
+  %tmp1682 = getelementptr inbounds float* %tmp1681, i64 1
+  %tmp1683 = getelementptr inbounds float* %tmp1682, i64 1
+  %tmp1684 = getelementptr inbounds float* %tmp1683, i64 1
+  %tmp1685 = getelementptr inbounds float* %tmp1684, i64 1
+  %tmp1686 = getelementptr inbounds float* %tmp1685, i64 1
+  %tmp1687 = getelementptr inbounds float* %tmp1686, i64 1
+  %tmp1688 = getelementptr inbounds float* %tmp1687, i64 1
+  %tmp1689 = getelementptr inbounds float* %tmp1688, i64 1
+  %tmp1690 = getelementptr inbounds float* %tmp1689, i64 1
+  %tmp1691 = getelementptr inbounds float* %tmp1690, i64 1
+  %tmp1692 = getelementptr inbounds float* %tmp1691, i64 1
+  %tmp1693 = getelementptr inbounds float* %tmp1692, i64 1
+  %tmp1694 = getelementptr inbounds float* %tmp1693, i64 1
+  %tmp1695 = getelementptr inbounds float* %tmp1694, i64 1
+  %tmp1696 = getelementptr inbounds float* %tmp1695, i64 1
+  %tmp1697 = getelementptr inbounds float* %tmp1696, i64 1
+  %tmp1698 = getelementptr inbounds float* %tmp1697, i64 1
+  %tmp1699 = getelementptr inbounds float* %tmp1698, i64 1
+  %tmp1700 = getelementptr inbounds float* %tmp1699, i64 1
+  %tmp1701 = getelementptr inbounds float* %tmp1700, i64 1
+  %tmp1702 = getelementptr inbounds float* %tmp1701, i64 1
+  %tmp1703 = getelementptr inbounds float* %tmp1702, i64 1
+  %tmp1704 = getelementptr inbounds float* %tmp1703, i64 1
+  %tmp1705 = getelementptr inbounds float* %tmp1704, i64 1
+  %tmp1706 = getelementptr inbounds float* %tmp1705, i64 1
+  %tmp1707 = getelementptr inbounds float* %tmp1706, i64 1
+  %tmp1708 = getelementptr inbounds float* %tmp1707, i64 1
+  %tmp1709 = getelementptr inbounds float* %tmp1708, i64 1
+  %tmp1710 = getelementptr inbounds float* %tmp1709, i64 1
+  %tmp1711 = getelementptr inbounds float* %tmp1710, i64 1
+  %tmp1712 = getelementptr inbounds float* %tmp1711, i64 1
+  %tmp1713 = getelementptr inbounds float* %tmp1712, i64 1
+  %tmp1714 = getelementptr inbounds float* %tmp1713, i64 1
+  %tmp1715 = getelementptr inbounds float* %tmp1714, i64 1
+  %tmp1716 = getelementptr inbounds float* %tmp1715, i64 1
+  %tmp1717 = getelementptr inbounds float* %tmp1716, i64 1
+  %tmp1718 = getelementptr inbounds float* %tmp1717, i64 1
+  %tmp1719 = getelementptr inbounds float* %tmp1718, i64 1
+  %tmp1720 = getelementptr inbounds float* %tmp1719, i64 1
+  %tmp1721 = getelementptr inbounds float* %tmp1720, i64 1
+  %tmp1722 = getelementptr inbounds float* %tmp1721, i64 1
+  %tmp1723 = getelementptr inbounds float* %tmp1722, i64 1
+  %tmp1724 = getelementptr inbounds float* %tmp1723, i64 1
+  %tmp1725 = getelementptr inbounds float* %tmp1724, i64 1
+  %tmp1726 = getelementptr inbounds float* %tmp1725, i64 1
+  %tmp1727 = getelementptr inbounds float* %tmp1726, i64 1
+  %tmp1728 = getelementptr inbounds float* %tmp1727, i64 1
+  %tmp1729 = getelementptr inbounds float* %tmp1728, i64 1
+  %tmp1730 = getelementptr inbounds float* %tmp1729, i64 1
+  %tmp1731 = getelementptr inbounds float* %tmp1730, i64 1
+  %tmp1732 = getelementptr inbounds float* %tmp1731, i64 1
+  %tmp1733 = getelementptr inbounds float* %tmp1732, i64 1
+  %tmp1734 = getelementptr inbounds float* %tmp1733, i64 1
+  %tmp1735 = getelementptr inbounds float* %tmp1734, i64 1
+  %tmp1736 = getelementptr inbounds float* %tmp1735, i64 1
+  %tmp1737 = getelementptr inbounds float* %tmp1736, i64 1
+  %tmp1738 = getelementptr inbounds float* %tmp1737, i64 1
+  %tmp1739 = getelementptr inbounds float* %tmp1738, i64 1
+  %tmp1740 = getelementptr inbounds float* %tmp1739, i64 1
+  %tmp1741 = getelementptr inbounds float* %tmp1740, i64 1
+  %tmp1742 = getelementptr inbounds float* %tmp1741, i64 1
+  %tmp1743 = getelementptr inbounds float* %tmp1742, i64 1
+  %tmp1744 = getelementptr inbounds float* %tmp1743, i64 1
+  %tmp1745 = getelementptr inbounds float* %tmp1744, i64 1
+  %tmp1746 = getelementptr inbounds float* %tmp1745, i64 1
+  %tmp1747 = getelementptr inbounds float* %tmp1746, i64 1
+  %tmp1748 = getelementptr inbounds float* %tmp1747, i64 1
+  %tmp1749 = getelementptr inbounds float* %tmp1748, i64 1
+  %tmp1750 = getelementptr inbounds float* %tmp1749, i64 1
+  %tmp1751 = getelementptr inbounds float* %tmp1750, i64 1
+  %tmp1752 = getelementptr inbounds float* %tmp1751, i64 1
+  %tmp1753 = getelementptr inbounds float* %tmp1752, i64 1
+  %tmp1754 = getelementptr inbounds float* %tmp1753, i64 1
+  %tmp1755 = getelementptr inbounds float* %tmp1754, i64 1
+  %tmp1756 = getelementptr inbounds float* %tmp1755, i64 1
+  %tmp1757 = getelementptr inbounds float* %tmp1756, i64 1
+  %tmp1758 = getelementptr inbounds float* %tmp1757, i64 1
+  %tmp1759 = getelementptr inbounds float* %tmp1758, i64 1
+  %tmp1760 = getelementptr inbounds float* %tmp1759, i64 1
+  %tmp1761 = getelementptr inbounds float* %tmp1760, i64 1
+  %tmp1762 = getelementptr inbounds float* %tmp1761, i64 1
+  %tmp1763 = getelementptr inbounds float* %tmp1762, i64 1
+  %tmp1764 = getelementptr inbounds float* %tmp1763, i64 1
+  %tmp1765 = getelementptr inbounds float* %tmp1764, i64 1
+  %tmp1766 = getelementptr inbounds float* %tmp1765, i64 1
+  %tmp1767 = getelementptr inbounds float* %tmp1766, i64 1
+  %tmp1768 = getelementptr inbounds float* %tmp1767, i64 1
+  %tmp1769 = getelementptr inbounds float* %tmp1768, i64 1
+  %tmp1770 = getelementptr inbounds float* %tmp1769, i64 1
+  %tmp1771 = getelementptr inbounds float* %tmp1770, i64 1
+  %tmp1772 = getelementptr inbounds float* %tmp1771, i64 1
+  %tmp1773 = getelementptr inbounds float* %tmp1772, i64 1
+  %tmp1774 = getelementptr inbounds float* %tmp1773, i64 1
+  %tmp1775 = getelementptr inbounds float* %tmp1774, i64 1
+  %tmp1776 = getelementptr inbounds float* %tmp1775, i64 1
+  %tmp1777 = getelementptr inbounds float* %tmp1776, i64 1
+  %tmp1778 = getelementptr inbounds float* %tmp1777, i64 1
+  %tmp1779 = getelementptr inbounds float* %tmp1778, i64 1
+  %tmp1780 = getelementptr inbounds float* %tmp1779, i64 1
+  %tmp1781 = getelementptr inbounds float* %tmp1780, i64 1
+  %tmp1782 = getelementptr inbounds float* %tmp1781, i64 1
+  %tmp1783 = getelementptr inbounds float* %tmp1782, i64 1
+  %tmp1784 = getelementptr inbounds float* %tmp1783, i64 1
+  %tmp1785 = getelementptr inbounds float* %tmp1784, i64 1
+  %tmp1786 = getelementptr inbounds float* %tmp1785, i64 1
+  %tmp1787 = getelementptr inbounds float* %tmp1786, i64 1
+  %tmp1788 = getelementptr inbounds float* %tmp1787, i64 1
+  %tmp1789 = getelementptr inbounds float* %tmp1788, i64 1
+  %tmp1790 = getelementptr inbounds float* %tmp1789, i64 1
+  %tmp1791 = getelementptr inbounds float* %tmp1790, i64 1
+  %tmp1792 = getelementptr inbounds float* %tmp1791, i64 1
+  %tmp1793 = getelementptr inbounds float* %tmp1792, i64 1
+  %tmp1794 = getelementptr inbounds float* %tmp1793, i64 1
+  %tmp1795 = getelementptr inbounds float* %tmp1794, i64 1
+  %tmp1796 = getelementptr inbounds float* %tmp1795, i64 1
+  %tmp1797 = getelementptr inbounds float* %tmp1796, i64 1
+  %tmp1798 = getelementptr inbounds float* %tmp1797, i64 1
+  %tmp1799 = getelementptr inbounds float* %tmp1798, i64 1
+  %tmp1800 = getelementptr inbounds float* %tmp1799, i64 1
+  %tmp1801 = getelementptr inbounds float* %tmp1800, i64 1
+  %tmp1802 = getelementptr inbounds float* %tmp1801, i64 1
+  %tmp1803 = getelementptr inbounds float* %tmp1802, i64 1
+  %tmp1804 = getelementptr inbounds float* %tmp1803, i64 1
+  %tmp1805 = getelementptr inbounds float* %tmp1804, i64 1
+  %tmp1806 = getelementptr inbounds float* %tmp1805, i64 1
+  %tmp1807 = getelementptr inbounds float* %tmp1806, i64 1
+  %tmp1808 = getelementptr inbounds float* %tmp1807, i64 1
+  %tmp1809 = getelementptr inbounds float* %tmp1808, i64 1
+  %tmp1810 = getelementptr inbounds float* %tmp1809, i64 1
+  %tmp1811 = getelementptr inbounds float* %tmp1810, i64 1
+  %tmp1812 = getelementptr inbounds float* %tmp1811, i64 1
+  %tmp1813 = getelementptr inbounds float* %tmp1812, i64 1
+  %tmp1814 = getelementptr inbounds float* %tmp1813, i64 1
+  %tmp1815 = getelementptr inbounds float* %tmp1814, i64 1
+  %tmp1816 = getelementptr inbounds float* %tmp1815, i64 1
+  %tmp1817 = getelementptr inbounds float* %tmp1816, i64 1
+  %tmp1818 = getelementptr inbounds float* %tmp1817, i64 1
+  %tmp1819 = getelementptr inbounds float* %tmp1818, i64 1
+  %tmp1820 = getelementptr inbounds float* %tmp1819, i64 1
+  %tmp1821 = getelementptr inbounds float* %tmp1820, i64 1
+  %tmp1822 = getelementptr inbounds float* %tmp1821, i64 1
+  %tmp1823 = getelementptr inbounds float* %tmp1822, i64 1
+  %tmp1824 = getelementptr inbounds float* %tmp1823, i64 1
+  %tmp1825 = getelementptr inbounds float* %tmp1824, i64 1
+  %tmp1826 = getelementptr inbounds float* %tmp1825, i64 1
+  %tmp1827 = getelementptr inbounds float* %tmp1826, i64 1
+  %tmp1828 = getelementptr inbounds float* %tmp1827, i64 1
+  %tmp1829 = getelementptr inbounds float* %tmp1828, i64 1
+  %tmp1830 = getelementptr inbounds float* %tmp1829, i64 1
+  %tmp1831 = getelementptr inbounds float* %tmp1830, i64 1
+  %tmp1832 = getelementptr inbounds float* %tmp1831, i64 1
+  %tmp1833 = getelementptr inbounds float* %tmp1832, i64 1
+  %tmp1834 = getelementptr inbounds float* %tmp1833, i64 1
+  %tmp1835 = getelementptr inbounds float* %tmp1834, i64 1
+  %tmp1836 = getelementptr inbounds float* %tmp1835, i64 1
+  %tmp1837 = getelementptr inbounds float* %tmp1836, i64 1
+  %tmp1838 = getelementptr inbounds float* %tmp1837, i64 1
+  %tmp1839 = getelementptr inbounds float* %tmp1838, i64 1
+  %tmp1840 = getelementptr inbounds float* %tmp1839, i64 1
+  %tmp1841 = getelementptr inbounds float* %tmp1840, i64 1
+  %tmp1842 = getelementptr inbounds float* %tmp1841, i64 1
+  %tmp1843 = getelementptr inbounds float* %tmp1842, i64 1
+  %tmp1844 = getelementptr inbounds float* %tmp1843, i64 1
+  %tmp1845 = getelementptr inbounds float* %tmp1844, i64 1
+  %tmp1846 = getelementptr inbounds float* %tmp1845, i64 1
+  %tmp1847 = getelementptr inbounds float* %tmp1846, i64 1
+  %tmp1848 = getelementptr inbounds float* %tmp1847, i64 1
+  %tmp1849 = getelementptr inbounds float* %tmp1848, i64 1
+  %tmp1850 = getelementptr inbounds float* %tmp1849, i64 1
+  %tmp1851 = getelementptr inbounds float* %tmp1850, i64 1
+  %tmp1852 = getelementptr inbounds float* %tmp1851, i64 1
+  %tmp1853 = getelementptr inbounds float* %tmp1852, i64 1
+  %tmp1854 = getelementptr inbounds float* %tmp1853, i64 1
+  %tmp1855 = getelementptr inbounds float* %tmp1854, i64 1
+  %tmp1856 = getelementptr inbounds float* %tmp1855, i64 1
+  %tmp1857 = getelementptr inbounds float* %tmp1856, i64 1
+  %tmp1858 = getelementptr inbounds float* %tmp1857, i64 1
+  %tmp1859 = getelementptr inbounds float* %tmp1858, i64 1
+  %tmp1860 = getelementptr inbounds float* %tmp1859, i64 1
+  %tmp1861 = getelementptr inbounds float* %tmp1860, i64 1
+  %tmp1862 = getelementptr inbounds float* %tmp1861, i64 1
+  %tmp1863 = getelementptr inbounds float* %tmp1862, i64 1
+  %tmp1864 = getelementptr inbounds float* %tmp1863, i64 1
+  %tmp1865 = getelementptr inbounds float* %tmp1864, i64 1
+  %tmp1866 = getelementptr inbounds float* %tmp1865, i64 1
+  %tmp1867 = getelementptr inbounds float* %tmp1866, i64 1
+  %tmp1868 = getelementptr inbounds float* %tmp1867, i64 1
+  %tmp1869 = getelementptr inbounds float* %tmp1868, i64 1
+  %tmp1870 = getelementptr inbounds float* %tmp1869, i64 1
+  %tmp1871 = getelementptr inbounds float* %tmp1870, i64 1
+  %tmp1872 = getelementptr inbounds float* %tmp1871, i64 1
+  %tmp1873 = getelementptr inbounds float* %tmp1872, i64 1
+  %tmp1874 = getelementptr inbounds float* %tmp1873, i64 1
+  %tmp1875 = getelementptr inbounds float* %tmp1874, i64 1
+  %tmp1876 = getelementptr inbounds float* %tmp1875, i64 1
+  %tmp1877 = getelementptr inbounds float* %tmp1876, i64 1
+  %tmp1878 = getelementptr inbounds float* %tmp1877, i64 1
+  %tmp1879 = getelementptr inbounds float* %tmp1878, i64 1
+  %tmp1880 = getelementptr inbounds float* %tmp1879, i64 1
+  %tmp1881 = getelementptr inbounds float* %tmp1880, i64 1
+  %tmp1882 = getelementptr inbounds float* %tmp1881, i64 1
+  %tmp1883 = getelementptr inbounds float* %tmp1882, i64 1
+  %tmp1884 = getelementptr inbounds float* %tmp1883, i64 1
+  %tmp1885 = getelementptr inbounds float* %tmp1884, i64 1
+  %tmp1886 = getelementptr inbounds float* %tmp1885, i64 1
+  %tmp1887 = getelementptr inbounds float* %tmp1886, i64 1
+  %tmp1888 = getelementptr inbounds float* %tmp1887, i64 1
+  %tmp1889 = getelementptr inbounds float* %tmp1888, i64 1
+  %tmp1890 = getelementptr inbounds float* %tmp1889, i64 1
+  %tmp1891 = getelementptr inbounds float* %tmp1890, i64 1
+  %tmp1892 = getelementptr inbounds float* %tmp1891, i64 1
+  %tmp1893 = getelementptr inbounds float* %tmp1892, i64 1
+  %tmp1894 = getelementptr inbounds float* %tmp1893, i64 1
+  %tmp1895 = getelementptr inbounds float* %tmp1894, i64 1
+  %tmp1896 = getelementptr inbounds float* %tmp1895, i64 1
+  %tmp1897 = getelementptr inbounds float* %tmp1896, i64 1
+  %tmp1898 = getelementptr inbounds float* %tmp1897, i64 1
+  %tmp1899 = getelementptr inbounds float* %tmp1898, i64 1
+  %tmp1900 = getelementptr inbounds float* %tmp1899, i64 1
+  %tmp1901 = getelementptr inbounds float* %tmp1900, i64 1
+  %tmp1902 = getelementptr inbounds float* %tmp1901, i64 1
+  %tmp1903 = getelementptr inbounds float* %tmp1902, i64 1
+  %tmp1904 = getelementptr inbounds float* %tmp1903, i64 1
+  %tmp1905 = getelementptr inbounds float* %tmp1904, i64 1
+  %tmp1906 = getelementptr inbounds float* %tmp1905, i64 1
+  %tmp1907 = getelementptr inbounds float* %tmp1906, i64 1
+  %tmp1908 = getelementptr inbounds float* %tmp1907, i64 1
+  %tmp1909 = getelementptr inbounds float* %tmp1908, i64 1
+  %tmp1910 = getelementptr inbounds float* %tmp1909, i64 1
+  %tmp1911 = getelementptr inbounds float* %tmp1910, i64 1
+  %tmp1912 = getelementptr inbounds float* %tmp1911, i64 1
+  %tmp1913 = getelementptr inbounds float* %tmp1912, i64 1
+  %tmp1914 = getelementptr inbounds float* %tmp1913, i64 1
+  %tmp1915 = getelementptr inbounds float* %tmp1914, i64 1
+  %tmp1916 = getelementptr inbounds float* %tmp1915, i64 1
+  %tmp1917 = getelementptr inbounds float* %tmp1916, i64 1
+  %tmp1918 = getelementptr inbounds float* %tmp1917, i64 1
+  %tmp1919 = getelementptr inbounds float* %tmp1918, i64 1
+  %tmp1920 = getelementptr inbounds float* %tmp1919, i64 1
+  %tmp1921 = getelementptr inbounds float* %tmp1920, i64 1
+  %tmp1922 = getelementptr inbounds float* %tmp1921, i64 1
+  %tmp1923 = getelementptr inbounds float* %tmp1922, i64 1
+  %tmp1924 = getelementptr inbounds float* %tmp1923, i64 1
+  %tmp1925 = getelementptr inbounds float* %tmp1924, i64 1
+  %tmp1926 = getelementptr inbounds float* %tmp1925, i64 1
+  %tmp1927 = getelementptr inbounds float* %tmp1926, i64 1
+  %tmp1928 = getelementptr inbounds float* %tmp1927, i64 1
+  %tmp1929 = getelementptr inbounds float* %tmp1928, i64 1
+  %tmp1930 = getelementptr inbounds float* %tmp1929, i64 1
+  %tmp1931 = getelementptr inbounds float* %tmp1930, i64 1
+  %tmp1932 = getelementptr inbounds float* %tmp1931, i64 1
+  %tmp1933 = getelementptr inbounds float* %tmp1932, i64 1
+  %tmp1934 = getelementptr inbounds float* %tmp1933, i64 1
+  %tmp1935 = getelementptr inbounds float* %tmp1934, i64 1
+  %tmp1936 = getelementptr inbounds float* %tmp1935, i64 1
+  %tmp1937 = getelementptr inbounds float* %tmp1936, i64 1
+  %tmp1938 = getelementptr inbounds float* %tmp1937, i64 1
+  %tmp1939 = getelementptr inbounds float* %tmp1938, i64 1
+  %tmp1940 = getelementptr inbounds float* %tmp1939, i64 1
+  %tmp1941 = getelementptr inbounds float* %tmp1940, i64 1
+  %tmp1942 = getelementptr inbounds float* %tmp1941, i64 1
+  %tmp1943 = getelementptr inbounds float* %tmp1942, i64 1
+  %tmp1944 = getelementptr inbounds float* %tmp1943, i64 1
+  %tmp1945 = getelementptr inbounds float* %tmp1944, i64 1
+  %tmp1946 = getelementptr inbounds float* %tmp1945, i64 1
+  %tmp1947 = getelementptr inbounds float* %tmp1946, i64 1
+  %tmp1948 = getelementptr inbounds float* %tmp1947, i64 1
+  %tmp1949 = getelementptr inbounds float* %tmp1948, i64 1
+  %tmp1950 = getelementptr inbounds float* %tmp1949, i64 1
+  %tmp1951 = getelementptr inbounds float* %tmp1950, i64 1
+  %tmp1952 = getelementptr inbounds float* %tmp1951, i64 1
+  %tmp1953 = getelementptr inbounds float* %tmp1952, i64 1
+  %tmp1954 = getelementptr inbounds float* %tmp1953, i64 1
+  %tmp1955 = getelementptr inbounds float* %tmp1954, i64 1
+  %tmp1956 = getelementptr inbounds float* %tmp1955, i64 1
+  %tmp1957 = getelementptr inbounds float* %tmp1956, i64 1
+  %tmp1958 = getelementptr inbounds float* %tmp1957, i64 1
+  %tmp1959 = getelementptr inbounds float* %tmp1958, i64 1
+  %tmp1960 = getelementptr inbounds float* %tmp1959, i64 1
+  %tmp1961 = getelementptr inbounds float* %tmp1960, i64 1
+  %tmp1962 = getelementptr inbounds float* %tmp1961, i64 1
+  %tmp1963 = getelementptr inbounds float* %tmp1962, i64 1
+  %tmp1964 = getelementptr inbounds float* %tmp1963, i64 1
+  %tmp1965 = getelementptr inbounds float* %tmp1964, i64 1
+  %tmp1966 = getelementptr inbounds float* %tmp1965, i64 1
+  %tmp1967 = getelementptr inbounds float* %tmp1966, i64 1
+  %tmp1968 = getelementptr inbounds float* %tmp1967, i64 1
+  %tmp1969 = getelementptr inbounds float* %tmp1968, i64 1
+  %tmp1970 = getelementptr inbounds float* %tmp1969, i64 1
+  %tmp1971 = getelementptr inbounds float* %tmp1970, i64 1
+  %tmp1972 = getelementptr inbounds float* %tmp1971, i64 1
+  %tmp1973 = getelementptr inbounds float* %tmp1972, i64 1
+  %tmp1974 = getelementptr inbounds float* %tmp1973, i64 1
+  %tmp1975 = getelementptr inbounds float* %tmp1974, i64 1
+  %tmp1976 = getelementptr inbounds float* %tmp1975, i64 1
+  %tmp1977 = getelementptr inbounds float* %tmp1976, i64 1
+  %tmp1978 = getelementptr inbounds float* %tmp1977, i64 1
+  %tmp1979 = getelementptr inbounds float* %tmp1978, i64 1
+  %tmp1980 = getelementptr inbounds float* %tmp1979, i64 1
+  %tmp1981 = getelementptr inbounds float* %tmp1980, i64 1
+  %tmp1982 = getelementptr inbounds float* %tmp1981, i64 1
+  %tmp1983 = getelementptr inbounds float* %tmp1982, i64 1
+  %tmp1984 = getelementptr inbounds float* %tmp1983, i64 1
+  %tmp1985 = getelementptr inbounds float* %tmp1984, i64 1
+  %tmp1986 = getelementptr inbounds float* %tmp1985, i64 1
+  %tmp1987 = getelementptr inbounds float* %tmp1986, i64 1
+  %tmp1988 = getelementptr inbounds float* %tmp1987, i64 1
+  %tmp1989 = getelementptr inbounds float* %tmp1988, i64 1
+  %tmp1990 = getelementptr inbounds float* %tmp1989, i64 1
+  %tmp1991 = getelementptr inbounds float* %tmp1990, i64 1
+  %tmp1992 = getelementptr inbounds float* %tmp1991, i64 1
+  %tmp1993 = getelementptr inbounds float* %tmp1992, i64 1
+  %tmp1994 = getelementptr inbounds float* %tmp1993, i64 1
+  %tmp1995 = getelementptr inbounds float* %tmp1994, i64 1
+  %tmp1996 = getelementptr inbounds float* %tmp1995, i64 1
+  %tmp1997 = getelementptr inbounds float* %tmp1996, i64 1
+  %tmp1998 = getelementptr inbounds float* %tmp1997, i64 1
+  %tmp1999 = getelementptr inbounds float* %tmp1998, i64 1
+  %tmp2000 = getelementptr inbounds float* %tmp1999, i64 1
+  %tmp2001 = getelementptr inbounds float* %tmp2000, i64 1
+  %tmp2002 = getelementptr inbounds float* %tmp2001, i64 1
+  %tmp2003 = getelementptr inbounds float* %tmp2002, i64 1
+  %tmp2004 = getelementptr inbounds float* %tmp2003, i64 1
+  %tmp2005 = getelementptr inbounds float* %tmp2004, i64 1
+  %tmp2006 = getelementptr inbounds float* %tmp2005, i64 1
+  %tmp2007 = getelementptr inbounds float* %tmp2006, i64 1
+  %tmp2008 = getelementptr inbounds float* %tmp2007, i64 1
+  %tmp2009 = getelementptr inbounds float* %tmp2008, i64 1
+  %tmp2010 = getelementptr inbounds float* %tmp2009, i64 1
+  %tmp2011 = getelementptr inbounds float* %tmp2010, i64 1
+  %tmp2012 = getelementptr inbounds float* %tmp2011, i64 1
+  %tmp2013 = getelementptr inbounds float* %tmp2012, i64 1
+  %tmp2014 = getelementptr inbounds float* %tmp2013, i64 1
+  %tmp2015 = getelementptr inbounds float* %tmp2014, i64 1
+  %tmp2016 = getelementptr inbounds float* %tmp2015, i64 1
+  %tmp2017 = getelementptr inbounds float* %tmp2016, i64 1
+  %tmp2018 = getelementptr inbounds float* %tmp2017, i64 1
+  %tmp2019 = getelementptr inbounds float* %tmp2018, i64 1
+  %tmp2020 = getelementptr inbounds float* %tmp2019, i64 1
+  %tmp2021 = getelementptr inbounds float* %tmp2020, i64 1
+  %tmp2022 = getelementptr inbounds float* %tmp2021, i64 1
+  %tmp2023 = getelementptr inbounds float* %tmp2022, i64 1
+  %tmp2024 = getelementptr inbounds float* %tmp2023, i64 1
+  %tmp2025 = getelementptr inbounds float* %tmp2024, i64 1
+  %tmp2026 = getelementptr inbounds float* %tmp2025, i64 1
+  %tmp2027 = getelementptr inbounds float* %tmp2026, i64 1
+  %tmp2028 = getelementptr inbounds float* %tmp2027, i64 1
+  %tmp2029 = getelementptr inbounds float* %tmp2028, i64 1
+  %tmp2030 = getelementptr inbounds float* %tmp2029, i64 1
+  %tmp2031 = getelementptr inbounds float* %tmp2030, i64 1
+  %tmp2032 = getelementptr inbounds float* %tmp2031, i64 1
+  %tmp2033 = getelementptr inbounds float* %tmp2032, i64 1
+  %tmp2034 = getelementptr inbounds float* %tmp2033, i64 1
+  %tmp2035 = getelementptr inbounds float* %tmp2034, i64 1
+  %tmp2036 = getelementptr inbounds float* %tmp2035, i64 1
+  %tmp2037 = getelementptr inbounds float* %tmp2036, i64 1
+  %tmp2038 = getelementptr inbounds float* %tmp2037, i64 1
+  %tmp2039 = getelementptr inbounds float* %tmp2038, i64 1
+  %tmp2040 = getelementptr inbounds float* %tmp2039, i64 1
+  %tmp2041 = getelementptr inbounds float* %tmp2040, i64 1
+  %tmp2042 = getelementptr inbounds float* %tmp2041, i64 1
+  %tmp2043 = getelementptr inbounds float* %tmp2042, i64 1
+  %tmp2044 = getelementptr inbounds float* %tmp2043, i64 1
+  %tmp2045 = getelementptr inbounds float* %tmp2044, i64 1
+  %tmp2046 = getelementptr inbounds float* %tmp2045, i64 1
+  %tmp2047 = getelementptr inbounds float* %tmp2046, i64 1
+  %tmp2048 = getelementptr inbounds float* %tmp2047, i64 1
+  %tmp2049 = getelementptr inbounds float* %tmp2048, i64 1
+  %tmp2050 = getelementptr inbounds float* %tmp2049, i64 1
+  %tmp2051 = getelementptr inbounds float* %tmp2050, i64 1
+  %tmp2052 = getelementptr inbounds float* %tmp2051, i64 1
+  %tmp2053 = getelementptr inbounds float* %tmp2052, i64 1
+  %tmp2054 = getelementptr inbounds float* %tmp2053, i64 1
+  %tmp2055 = getelementptr inbounds float* %tmp2054, i64 1
+  %tmp2056 = getelementptr inbounds float* %tmp2055, i64 1
+  %tmp2057 = getelementptr inbounds float* %tmp2056, i64 1
+  %tmp2058 = getelementptr inbounds float* %tmp2057, i64 1
+  %tmp2059 = getelementptr inbounds float* %tmp2058, i64 1
+  %tmp2060 = getelementptr inbounds float* %tmp2059, i64 1
+  %tmp2061 = getelementptr inbounds float* %tmp2060, i64 1
+  %tmp2062 = getelementptr inbounds float* %tmp2061, i64 1
+  %tmp2063 = getelementptr inbounds float* %tmp2062, i64 1
+  %tmp2064 = getelementptr inbounds float* %tmp2063, i64 1
+  %tmp2065 = getelementptr inbounds float* %tmp2064, i64 1
+  %tmp2066 = getelementptr inbounds float* %tmp2065, i64 1
+  %tmp2067 = getelementptr inbounds float* %tmp2066, i64 1
+  %tmp2068 = getelementptr inbounds float* %tmp2067, i64 1
+  %tmp2069 = getelementptr inbounds float* %tmp2068, i64 1
+  %tmp2070 = getelementptr inbounds float* %tmp2069, i64 1
+  %tmp2071 = getelementptr inbounds float* %tmp2070, i64 1
+  %tmp2072 = getelementptr inbounds float* %tmp2071, i64 1
+  %tmp2073 = getelementptr inbounds float* %tmp2072, i64 1
+  %tmp2074 = getelementptr inbounds float* %tmp2073, i64 1
+  %tmp2075 = getelementptr inbounds float* %tmp2074, i64 1
+  %tmp2076 = getelementptr inbounds float* %tmp2075, i64 1
+  %tmp2077 = getelementptr inbounds float* %tmp2076, i64 1
+  %tmp2078 = getelementptr inbounds float* %tmp2077, i64 1
+  %tmp2079 = getelementptr inbounds float* %tmp2078, i64 1
+  %tmp2080 = getelementptr inbounds float* %tmp2079, i64 1
+  %tmp2081 = getelementptr inbounds float* %tmp2080, i64 1
+  %tmp2082 = getelementptr inbounds float* %tmp2081, i64 1
+  %tmp2083 = getelementptr inbounds float* %tmp2082, i64 1
+  %tmp2084 = getelementptr inbounds float* %tmp2083, i64 1
+  %tmp2085 = getelementptr inbounds float* %tmp2084, i64 1
+  %tmp2086 = getelementptr inbounds float* %tmp2085, i64 1
+  %tmp2087 = getelementptr inbounds float* %tmp2086, i64 1
+  %tmp2088 = getelementptr inbounds float* %tmp2087, i64 1
+  %tmp2089 = getelementptr inbounds float* %tmp2088, i64 1
+  %tmp2090 = getelementptr inbounds float* %tmp2089, i64 1
+  %tmp2091 = getelementptr inbounds float* %tmp2090, i64 1
+  %tmp2092 = getelementptr inbounds float* %tmp2091, i64 1
+  %tmp2093 = getelementptr inbounds float* %tmp2092, i64 1
+  %tmp2094 = getelementptr inbounds float* %tmp2093, i64 1
+  %tmp2095 = getelementptr inbounds float* %tmp2094, i64 1
+  %tmp2096 = getelementptr inbounds float* %tmp2095, i64 1
+  %tmp2097 = getelementptr inbounds float* %tmp2096, i64 1
+  %tmp2098 = getelementptr inbounds float* %tmp2097, i64 1
+  %tmp2099 = getelementptr inbounds float* %tmp2098, i64 1
+  %tmp2100 = getelementptr inbounds float* %tmp2099, i64 1
+  %tmp2101 = getelementptr inbounds float* %tmp2100, i64 1
+  %tmp2102 = getelementptr inbounds float* %tmp2101, i64 1
+  %tmp2103 = getelementptr inbounds float* %tmp2102, i64 1
+  %tmp2104 = getelementptr inbounds float* %tmp2103, i64 1
+  %tmp2105 = getelementptr inbounds float* %tmp2104, i64 1
+  %tmp2106 = getelementptr inbounds float* %tmp2105, i64 1
+  %tmp2107 = getelementptr inbounds float* %tmp2106, i64 1
+  %tmp2108 = getelementptr inbounds float* %tmp2107, i64 1
+  %tmp2109 = getelementptr inbounds float* %tmp2108, i64 1
+  %tmp2110 = getelementptr inbounds float* %tmp2109, i64 1
+  %tmp2111 = getelementptr inbounds float* %tmp2110, i64 1
+  %tmp2112 = getelementptr inbounds float* %tmp2111, i64 1
+  %tmp2113 = getelementptr inbounds float* %tmp2112, i64 1
+  %tmp2114 = getelementptr inbounds float* %tmp2113, i64 1
+  %tmp2115 = getelementptr inbounds float* %tmp2114, i64 1
+  %tmp2116 = getelementptr inbounds float* %tmp2115, i64 1
+  %tmp2117 = getelementptr inbounds float* %tmp2116, i64 1
+  %tmp2118 = getelementptr inbounds float* %tmp2117, i64 1
+  %tmp2119 = getelementptr inbounds float* %tmp2118, i64 1
+  %tmp2120 = getelementptr inbounds float* %tmp2119, i64 1
+  %tmp2121 = getelementptr inbounds float* %tmp2120, i64 1
+  %tmp2122 = getelementptr inbounds float* %tmp2121, i64 1
+  %tmp2123 = getelementptr inbounds float* %tmp2122, i64 1
+  %tmp2124 = getelementptr inbounds float* %tmp2123, i64 1
+  %tmp2125 = getelementptr inbounds float* %tmp2124, i64 1
+  %tmp2126 = getelementptr inbounds float* %tmp2125, i64 1
+  %tmp2127 = getelementptr inbounds float* %tmp2126, i64 1
+  %tmp2128 = getelementptr inbounds float* %tmp2127, i64 1
+  %tmp2129 = getelementptr inbounds float* %tmp2128, i64 1
+  %tmp2130 = getelementptr inbounds float* %tmp2129, i64 1
+  %tmp2131 = getelementptr inbounds float* %tmp2130, i64 1
+  %tmp2132 = getelementptr inbounds float* %tmp2131, i64 1
+  %tmp2133 = getelementptr inbounds float* %tmp2132, i64 1
+  %tmp2134 = getelementptr inbounds float* %tmp2133, i64 1
+  %tmp2135 = getelementptr inbounds float* %tmp2134, i64 1
+  %tmp2136 = getelementptr inbounds float* %tmp2135, i64 1
+  %tmp2137 = getelementptr inbounds float* %tmp2136, i64 1
+  %tmp2138 = getelementptr inbounds float* %tmp2137, i64 1
+  %tmp2139 = getelementptr inbounds float* %tmp2138, i64 1
+  %tmp2140 = getelementptr inbounds float* %tmp2139, i64 1
+  %tmp2141 = getelementptr inbounds float* %tmp2140, i64 1
+  %tmp2142 = getelementptr inbounds float* %tmp2141, i64 1
+  %tmp2143 = getelementptr inbounds float* %tmp2142, i64 1
+  %tmp2144 = getelementptr inbounds float* %tmp2143, i64 1
+  %tmp2145 = getelementptr inbounds float* %tmp2144, i64 1
+  %tmp2146 = getelementptr inbounds float* %tmp2145, i64 1
+  %tmp2147 = getelementptr inbounds float* %tmp2146, i64 1
+  %tmp2148 = getelementptr inbounds float* %tmp2147, i64 1
+  %tmp2149 = getelementptr inbounds float* %tmp2148, i64 1
+  %tmp2150 = getelementptr inbounds float* %tmp2149, i64 1
+  %tmp2151 = getelementptr inbounds float* %tmp2150, i64 1
+  %tmp2152 = getelementptr inbounds float* %tmp2151, i64 1
+  %tmp2153 = getelementptr inbounds float* %tmp2152, i64 1
+  %tmp2154 = getelementptr inbounds float* %tmp2153, i64 1
+  %tmp2155 = getelementptr inbounds float* %tmp2154, i64 1
+  %tmp2156 = getelementptr inbounds float* %tmp2155, i64 1
+  %tmp2157 = getelementptr inbounds float* %tmp2156, i64 1
+  %tmp2158 = getelementptr inbounds float* %tmp2157, i64 1
+  %tmp2159 = getelementptr inbounds float* %tmp2158, i64 1
+  %tmp2160 = getelementptr inbounds float* %tmp2159, i64 1
+  %tmp2161 = getelementptr inbounds float* %tmp2160, i64 1
+  %tmp2162 = getelementptr inbounds float* %tmp2161, i64 1
+  %tmp2163 = getelementptr inbounds float* %tmp2162, i64 1
+  %tmp2164 = getelementptr inbounds float* %tmp2163, i64 1
+  %tmp2165 = getelementptr inbounds float* %tmp2164, i64 1
+  %tmp2166 = getelementptr inbounds float* %tmp2165, i64 1
+  %tmp2167 = getelementptr inbounds float* %tmp2166, i64 1
+  %tmp2168 = getelementptr inbounds float* %tmp2167, i64 1
+  %tmp2169 = getelementptr inbounds float* %tmp2168, i64 1
+  %tmp2170 = getelementptr inbounds float* %tmp2169, i64 1
+  %tmp2171 = getelementptr inbounds float* %tmp2170, i64 1
+  %tmp2172 = getelementptr inbounds float* %tmp2171, i64 1
+  %tmp2173 = getelementptr inbounds float* %tmp2172, i64 1
+  %tmp2174 = getelementptr inbounds float* %tmp2173, i64 1
+  %tmp2175 = getelementptr inbounds float* %tmp2174, i64 1
+  %tmp2176 = getelementptr inbounds float* %tmp2175, i64 1
+  %tmp2177 = getelementptr inbounds float* %tmp2176, i64 1
+  %tmp2178 = getelementptr inbounds float* %tmp2177, i64 1
+  %tmp2179 = getelementptr inbounds float* %tmp2178, i64 1
+  %tmp2180 = getelementptr inbounds float* %tmp2179, i64 1
+  %tmp2181 = getelementptr inbounds float* %tmp2180, i64 1
+  %tmp2182 = getelementptr inbounds float* %tmp2181, i64 1
+  %tmp2183 = getelementptr inbounds float* %tmp2182, i64 1
+  %tmp2184 = getelementptr inbounds float* %tmp2183, i64 1
+  %tmp2185 = getelementptr inbounds float* %tmp2184, i64 1
+  %tmp2186 = getelementptr inbounds float* %tmp2185, i64 1
+  %tmp2187 = getelementptr inbounds float* %tmp2186, i64 1
+  %tmp2188 = getelementptr inbounds float* %tmp2187, i64 1
+  %tmp2189 = getelementptr inbounds float* %tmp2188, i64 1
+  %tmp2190 = getelementptr inbounds float* %tmp2189, i64 1
+  %tmp2191 = getelementptr inbounds float* %tmp2190, i64 1
+  %tmp2192 = getelementptr inbounds float* %tmp2191, i64 1
+  %tmp2193 = getelementptr inbounds float* %tmp2192, i64 1
+  %tmp2194 = getelementptr inbounds float* %tmp2193, i64 1
+  %tmp2195 = getelementptr inbounds float* %tmp2194, i64 1
+  %tmp2196 = getelementptr inbounds float* %tmp2195, i64 1
+  %tmp2197 = getelementptr inbounds float* %tmp2196, i64 1
+  %tmp2198 = getelementptr inbounds float* %tmp2197, i64 1
+  %tmp2199 = getelementptr inbounds float* %tmp2198, i64 1
+  %tmp2200 = getelementptr inbounds float* %tmp2199, i64 1
+  %tmp2201 = getelementptr inbounds float* %tmp2200, i64 1
+  %tmp2202 = getelementptr inbounds float* %tmp2201, i64 1
+  %tmp2203 = getelementptr inbounds float* %tmp2202, i64 1
+  %tmp2204 = getelementptr inbounds float* %tmp2203, i64 1
+  %tmp2205 = getelementptr inbounds float* %tmp2204, i64 1
+  %tmp2206 = getelementptr inbounds float* %tmp2205, i64 1
+  %tmp2207 = getelementptr inbounds float* %tmp2206, i64 1
+  %tmp2208 = getelementptr inbounds float* %tmp2207, i64 1
+  %tmp2209 = getelementptr inbounds float* %tmp2208, i64 1
+  %tmp2210 = getelementptr inbounds float* %tmp2209, i64 1
+  %tmp2211 = getelementptr inbounds float* %tmp2210, i64 1
+  %tmp2212 = getelementptr inbounds float* %tmp2211, i64 1
+  %tmp2213 = getelementptr inbounds float* %tmp2212, i64 1
+  %tmp2214 = getelementptr inbounds float* %tmp2213, i64 1
+  %tmp2215 = getelementptr inbounds float* %tmp2214, i64 1
+  %tmp2216 = getelementptr inbounds float* %tmp2215, i64 1
+  %tmp2217 = getelementptr inbounds float* %tmp2216, i64 1
+  %tmp2218 = getelementptr inbounds float* %tmp2217, i64 1
+  %tmp2219 = getelementptr inbounds float* %tmp2218, i64 1
+  %tmp2220 = getelementptr inbounds float* %tmp2219, i64 1
+  %tmp2221 = getelementptr inbounds float* %tmp2220, i64 1
+  %tmp2222 = getelementptr inbounds float* %tmp2221, i64 1
+  %tmp2223 = getelementptr inbounds float* %tmp2222, i64 1
+  %tmp2224 = getelementptr inbounds float* %tmp2223, i64 1
+  %tmp2225 = getelementptr inbounds float* %tmp2224, i64 1
+  %tmp2226 = getelementptr inbounds float* %tmp2225, i64 1
+  %tmp2227 = getelementptr inbounds float* %tmp2226, i64 1
+  %tmp2228 = getelementptr inbounds float* %tmp2227, i64 1
+  %tmp2229 = getelementptr inbounds float* %tmp2228, i64 1
+  %tmp2230 = getelementptr inbounds float* %tmp2229, i64 1
+  %tmp2231 = getelementptr inbounds float* %tmp2230, i64 1
+  %tmp2232 = getelementptr inbounds float* %tmp2231, i64 1
+  %tmp2233 = getelementptr inbounds float* %tmp2232, i64 1
+  %tmp2234 = getelementptr inbounds float* %tmp2233, i64 1
+  %tmp2235 = getelementptr inbounds float* %tmp2234, i64 1
+  %tmp2236 = getelementptr inbounds float* %tmp2235, i64 1
+  %tmp2237 = getelementptr inbounds float* %tmp2236, i64 1
+  %tmp2238 = getelementptr inbounds float* %tmp2237, i64 1
+  %tmp2239 = getelementptr inbounds float* %tmp2238, i64 1
+  %tmp2240 = getelementptr inbounds float* %tmp2239, i64 1
+  %tmp2241 = getelementptr inbounds float* %tmp2240, i64 1
+  %tmp2242 = getelementptr inbounds float* %tmp2241, i64 1
+  %tmp2243 = getelementptr inbounds float* %tmp2242, i64 1
+  %tmp2244 = getelementptr inbounds float* %tmp2243, i64 1
+  %tmp2245 = getelementptr inbounds float* %tmp2244, i64 1
+  %tmp2246 = getelementptr inbounds float* %tmp2245, i64 1
+  %tmp2247 = getelementptr inbounds float* %tmp2246, i64 1
+  %tmp2248 = getelementptr inbounds float* %tmp2247, i64 1
+  %tmp2249 = getelementptr inbounds float* %tmp2248, i64 1
+  %tmp2250 = getelementptr inbounds float* %tmp2249, i64 1
+  %tmp2251 = getelementptr inbounds float* %tmp2250, i64 1
+  %tmp2252 = getelementptr inbounds float* %tmp2251, i64 1
+  %tmp2253 = getelementptr inbounds float* %tmp2252, i64 1
+  %tmp2254 = getelementptr inbounds float* %tmp2253, i64 1
+  %tmp2255 = getelementptr inbounds float* %tmp2254, i64 1
+  %tmp2256 = getelementptr inbounds float* %tmp2255, i64 1
+  %tmp2257 = getelementptr inbounds float* %tmp2256, i64 1
+  %tmp2258 = getelementptr inbounds float* %tmp2257, i64 1
+  %tmp2259 = getelementptr inbounds float* %tmp2258, i64 1
+  %tmp2260 = getelementptr inbounds float* %tmp2259, i64 1
+  %tmp2261 = getelementptr inbounds float* %tmp2260, i64 1
+  %tmp2262 = getelementptr inbounds float* %tmp2261, i64 1
+  %tmp2263 = getelementptr inbounds float* %tmp2262, i64 1
+  %tmp2264 = getelementptr inbounds float* %tmp2263, i64 1
+  %tmp2265 = getelementptr inbounds float* %tmp2264, i64 1
+  %tmp2266 = getelementptr inbounds float* %tmp2265, i64 1
+  %tmp2267 = getelementptr inbounds float* %tmp2266, i64 1
+  %tmp2268 = getelementptr inbounds float* %tmp2267, i64 1
+  %tmp2269 = getelementptr inbounds float* %tmp2268, i64 1
+  %tmp2270 = getelementptr inbounds float* %tmp2269, i64 1
+  %tmp2271 = getelementptr inbounds float* %tmp2270, i64 1
+  %tmp2272 = getelementptr inbounds float* %tmp2271, i64 1
+  %tmp2273 = getelementptr inbounds float* %tmp2272, i64 1
+  %tmp2274 = getelementptr inbounds float* %tmp2273, i64 1
+  %tmp2275 = getelementptr inbounds float* %tmp2274, i64 1
+  %tmp2276 = getelementptr inbounds float* %tmp2275, i64 1
+  %tmp2277 = getelementptr inbounds float* %tmp2276, i64 1
+  %tmp2278 = getelementptr inbounds float* %tmp2277, i64 1
+  %tmp2279 = getelementptr inbounds float* %tmp2278, i64 1
+  %tmp2280 = getelementptr inbounds float* %tmp2279, i64 1
+  %tmp2281 = getelementptr inbounds float* %tmp2280, i64 1
+  %tmp2282 = getelementptr inbounds float* %tmp2281, i64 1
+  %tmp2283 = getelementptr inbounds float* %tmp2282, i64 1
+  %tmp2284 = getelementptr inbounds float* %tmp2283, i64 1
+  %tmp2285 = getelementptr inbounds float* %tmp2284, i64 1
+  %tmp2286 = getelementptr inbounds float* %tmp2285, i64 1
+  %tmp2287 = getelementptr inbounds float* %tmp2286, i64 1
+  %tmp2288 = getelementptr inbounds float* %tmp2287, i64 1
+  %tmp2289 = getelementptr inbounds float* %tmp2288, i64 1
+  %tmp2290 = getelementptr inbounds float* %tmp2289, i64 1
+  %tmp2291 = getelementptr inbounds float* %tmp2290, i64 1
+  %tmp2292 = getelementptr inbounds float* %tmp2291, i64 1
+  %tmp2293 = getelementptr inbounds float* %tmp2292, i64 1
+  %tmp2294 = getelementptr inbounds float* %tmp2293, i64 1
+  %tmp2295 = getelementptr inbounds float* %tmp2294, i64 1
+  %tmp2296 = getelementptr inbounds float* %tmp2295, i64 1
+  %tmp2297 = getelementptr inbounds float* %tmp2296, i64 1
+  %tmp2298 = getelementptr inbounds float* %tmp2297, i64 1
+  %tmp2299 = getelementptr inbounds float* %tmp2298, i64 1
+  %tmp2300 = getelementptr inbounds float* %tmp2299, i64 1
+  %tmp2301 = getelementptr inbounds float* %tmp2300, i64 1
+  %tmp2302 = getelementptr inbounds float* %tmp2301, i64 1
+  %tmp2303 = getelementptr inbounds float* %tmp2302, i64 1
+  %tmp2304 = getelementptr inbounds float* %tmp2303, i64 1
+  %tmp2305 = getelementptr inbounds float* %tmp2304, i64 1
+  %tmp2306 = getelementptr inbounds float* %tmp2305, i64 1
+  %tmp2307 = getelementptr inbounds float* %tmp2306, i64 1
+  %tmp2308 = getelementptr inbounds float* %tmp2307, i64 1
+  %tmp2309 = getelementptr inbounds float* %tmp2308, i64 1
+  %tmp2310 = getelementptr inbounds float* %tmp2309, i64 1
+  %tmp2311 = getelementptr inbounds float* %tmp2310, i64 1
+  %tmp2312 = getelementptr inbounds float* %tmp2311, i64 1
+  %tmp2313 = getelementptr inbounds float* %tmp2312, i64 1
+  %tmp2314 = getelementptr inbounds float* %tmp2313, i64 1
+  %tmp2315 = getelementptr inbounds float* %tmp2314, i64 1
+  %tmp2316 = getelementptr inbounds float* %tmp2315, i64 1
+  %tmp2317 = getelementptr inbounds float* %tmp2316, i64 1
+  %tmp2318 = getelementptr inbounds float* %tmp2317, i64 1
+  %tmp2319 = getelementptr inbounds float* %tmp2318, i64 1
+  %tmp2320 = getelementptr inbounds float* %tmp2319, i64 1
+  %tmp2321 = getelementptr inbounds float* %tmp2320, i64 1
+  %tmp2322 = getelementptr inbounds float* %tmp2321, i64 1
+  %tmp2323 = getelementptr inbounds float* %tmp2322, i64 1
+  %tmp2324 = getelementptr inbounds float* %tmp2323, i64 1
+  %tmp2325 = getelementptr inbounds float* %tmp2324, i64 1
+  %tmp2326 = getelementptr inbounds float* %tmp2325, i64 1
+  %tmp2327 = getelementptr inbounds float* %tmp2326, i64 1
+  %tmp2328 = getelementptr inbounds float* %tmp2327, i64 1
+  %tmp2329 = getelementptr inbounds float* %tmp2328, i64 1
+  %tmp2330 = getelementptr inbounds float* %tmp2329, i64 1
+  %tmp2331 = getelementptr inbounds float* %tmp2330, i64 1
+  %tmp2332 = getelementptr inbounds float* %tmp2331, i64 1
+  %tmp2333 = getelementptr inbounds float* %tmp2332, i64 1
+  %tmp2334 = getelementptr inbounds float* %tmp2333, i64 1
+  %tmp2335 = getelementptr inbounds float* %tmp2334, i64 1
+  %tmp2336 = getelementptr inbounds float* %tmp2335, i64 1
+  %tmp2337 = getelementptr inbounds float* %tmp2336, i64 1
+  %tmp2338 = getelementptr inbounds float* %tmp2337, i64 1
+  %tmp2339 = getelementptr inbounds float* %tmp2338, i64 1
+  %tmp2340 = getelementptr inbounds float* %tmp2339, i64 1
+  %tmp2341 = getelementptr inbounds float* %tmp2340, i64 1
+  %tmp2342 = getelementptr inbounds float* %tmp2341, i64 1
+  %tmp2343 = getelementptr inbounds float* %tmp2342, i64 1
+  %tmp2344 = getelementptr inbounds float* %tmp2343, i64 1
+  %tmp2345 = getelementptr inbounds float* %tmp2344, i64 1
+  %tmp2346 = getelementptr inbounds float* %tmp2345, i64 1
+  %tmp2347 = getelementptr inbounds float* %tmp2346, i64 1
+  %tmp2348 = getelementptr inbounds float* %tmp2347, i64 1
+  %tmp2349 = getelementptr inbounds float* %tmp2348, i64 1
+  %tmp2350 = getelementptr inbounds float* %tmp2349, i64 1
+  %tmp2351 = getelementptr inbounds float* %tmp2350, i64 1
+  %tmp2352 = getelementptr inbounds float* %tmp2351, i64 1
+  %tmp2353 = getelementptr inbounds float* %tmp2352, i64 1
+  %tmp2354 = getelementptr inbounds float* %tmp2353, i64 1
+  %tmp2355 = getelementptr inbounds float* %tmp2354, i64 1
+  %tmp2356 = getelementptr inbounds float* %tmp2355, i64 1
+  %tmp2357 = getelementptr inbounds float* %tmp2356, i64 1
+  %tmp2358 = getelementptr inbounds float* %tmp2357, i64 1
+  %tmp2359 = getelementptr inbounds float* %tmp2358, i64 1
+  %tmp2360 = getelementptr inbounds float* %tmp2359, i64 1
+  %tmp2361 = getelementptr inbounds float* %tmp2360, i64 1
+  %tmp2362 = getelementptr inbounds float* %tmp2361, i64 1
+  %tmp2363 = getelementptr inbounds float* %tmp2362, i64 1
+  %tmp2364 = getelementptr inbounds float* %tmp2363, i64 1
+  %tmp2365 = getelementptr inbounds float* %tmp2364, i64 1
+  %tmp2366 = getelementptr inbounds float* %tmp2365, i64 1
+  %tmp2367 = getelementptr inbounds float* %tmp2366, i64 1
+  %tmp2368 = getelementptr inbounds float* %tmp2367, i64 1
+  %tmp2369 = getelementptr inbounds float* %tmp2368, i64 1
+  %tmp2370 = getelementptr inbounds float* %tmp2369, i64 1
+  %tmp2371 = getelementptr inbounds float* %tmp2370, i64 1
+  %tmp2372 = getelementptr inbounds float* %tmp2371, i64 1
+  %tmp2373 = getelementptr inbounds float* %tmp2372, i64 1
+  %tmp2374 = getelementptr inbounds float* %tmp2373, i64 1
+  %tmp2375 = getelementptr inbounds float* %tmp2374, i64 1
+  %tmp2376 = getelementptr inbounds float* %tmp2375, i64 1
+  %tmp2377 = getelementptr inbounds float* %tmp2376, i64 1
+  %tmp2378 = getelementptr inbounds float* %tmp2377, i64 1
+  %tmp2379 = getelementptr inbounds float* %tmp2378, i64 1
+  %tmp2380 = getelementptr inbounds float* %tmp2379, i64 1
+  %tmp2381 = getelementptr inbounds float* %tmp2380, i64 1
+  %tmp2382 = getelementptr inbounds float* %tmp2381, i64 1
+  %tmp2383 = getelementptr inbounds float* %tmp2382, i64 1
+  %tmp2384 = getelementptr inbounds float* %tmp2383, i64 1
+  %tmp2385 = getelementptr inbounds float* %tmp2384, i64 1
+  %tmp2386 = getelementptr inbounds float* %tmp2385, i64 1
+  %tmp2387 = getelementptr inbounds float* %tmp2386, i64 1
+  %tmp2388 = getelementptr inbounds float* %tmp2387, i64 1
+  %tmp2389 = getelementptr inbounds float* %tmp2388, i64 1
+  %tmp2390 = getelementptr inbounds float* %tmp2389, i64 1
+  %tmp2391 = getelementptr inbounds float* %tmp2390, i64 1
+  %tmp2392 = getelementptr inbounds float* %tmp2391, i64 1
+  %tmp2393 = getelementptr inbounds float* %tmp2392, i64 1
+  %tmp2394 = getelementptr inbounds float* %tmp2393, i64 1
+  %tmp2395 = getelementptr inbounds float* %tmp2394, i64 1
+  %tmp2396 = getelementptr inbounds float* %tmp2395, i64 1
+  %tmp2397 = getelementptr inbounds float* %tmp2396, i64 1
+  %tmp2398 = getelementptr inbounds float* %tmp2397, i64 1
+  %tmp2399 = getelementptr inbounds float* %tmp2398, i64 1
+  %tmp2400 = getelementptr inbounds float* %tmp2399, i64 1
+  %tmp2401 = getelementptr inbounds float* %tmp2400, i64 1
+  %tmp2402 = getelementptr inbounds float* %tmp2401, i64 1
+  %tmp2403 = getelementptr inbounds float* %tmp2402, i64 1
+  %tmp2404 = getelementptr inbounds float* %tmp2403, i64 1
+  %tmp2405 = getelementptr inbounds float* %tmp2404, i64 1
+  %tmp2406 = getelementptr inbounds float* %tmp2405, i64 1
+  %tmp2407 = getelementptr inbounds float* %tmp2406, i64 1
+  %tmp2408 = getelementptr inbounds float* %tmp2407, i64 1
+  %tmp2409 = getelementptr inbounds float* %tmp2408, i64 1
+  %tmp2410 = getelementptr inbounds float* %tmp2409, i64 1
+  %tmp2411 = getelementptr inbounds float* %tmp2410, i64 1
+  %tmp2412 = getelementptr inbounds float* %tmp2411, i64 1
+  %tmp2413 = getelementptr inbounds float* %tmp2412, i64 1
+  %tmp2414 = getelementptr inbounds float* %tmp2413, i64 1
+  %tmp2415 = getelementptr inbounds float* %tmp2414, i64 1
+  %tmp2416 = getelementptr inbounds float* %tmp2415, i64 1
+  %tmp2417 = getelementptr inbounds float* %tmp2416, i64 1
+  %tmp2418 = getelementptr inbounds float* %tmp2417, i64 1
+  %tmp2419 = getelementptr inbounds float* %tmp2418, i64 1
+  %tmp2420 = getelementptr inbounds float* %tmp2419, i64 1
+  %tmp2421 = getelementptr inbounds float* %tmp2420, i64 1
+  %tmp2422 = getelementptr inbounds float* %tmp2421, i64 1
+  %tmp2423 = getelementptr inbounds float* %tmp2422, i64 1
+  %tmp2424 = getelementptr inbounds float* %tmp2423, i64 1
+  %tmp2425 = getelementptr inbounds float* %tmp2424, i64 1
+  %tmp2426 = getelementptr inbounds float* %tmp2425, i64 1
+  %tmp2427 = getelementptr inbounds float* %tmp2426, i64 1
+  %tmp2428 = getelementptr inbounds float* %tmp2427, i64 1
+  %tmp2429 = getelementptr inbounds float* %tmp2428, i64 1
+  %tmp2430 = getelementptr inbounds float* %tmp2429, i64 1
+  %tmp2431 = getelementptr inbounds float* %tmp2430, i64 1
+  %tmp2432 = getelementptr inbounds float* %tmp2431, i64 1
+  %tmp2433 = getelementptr inbounds float* %tmp2432, i64 1
+  %tmp2434 = getelementptr inbounds float* %tmp2433, i64 1
+  %tmp2435 = getelementptr inbounds float* %tmp2434, i64 1
+  %tmp2436 = getelementptr inbounds float* %tmp2435, i64 1
+  %tmp2437 = getelementptr inbounds float* %tmp2436, i64 1
+  %tmp2438 = getelementptr inbounds float* %tmp2437, i64 1
+  %tmp2439 = getelementptr inbounds float* %tmp2438, i64 1
+  %tmp2440 = getelementptr inbounds float* %tmp2439, i64 1
+  %tmp2441 = getelementptr inbounds float* %tmp2440, i64 1
+  %tmp2442 = getelementptr inbounds float* %tmp2441, i64 1
+  %tmp2443 = getelementptr inbounds float* %tmp2442, i64 1
+  %tmp2444 = getelementptr inbounds float* %tmp2443, i64 1
+  %tmp2445 = getelementptr inbounds float* %tmp2444, i64 1
+  %tmp2446 = getelementptr inbounds float* %tmp2445, i64 1
+  %tmp2447 = getelementptr inbounds float* %tmp2446, i64 1
+  %tmp2448 = getelementptr inbounds float* %tmp2447, i64 1
+  %tmp2449 = getelementptr inbounds float* %tmp2448, i64 1
+  %tmp2450 = getelementptr inbounds float* %tmp2449, i64 1
+  %tmp2451 = getelementptr inbounds float* %tmp2450, i64 1
+  %tmp2452 = getelementptr inbounds float* %tmp2451, i64 1
+  %tmp2453 = getelementptr inbounds float* %tmp2452, i64 1
+  %tmp2454 = getelementptr inbounds float* %tmp2453, i64 1
+  %tmp2455 = getelementptr inbounds float* %tmp2454, i64 1
+  %tmp2456 = getelementptr inbounds float* %tmp2455, i64 1
+  %tmp2457 = getelementptr inbounds float* %tmp2456, i64 1
+  %tmp2458 = getelementptr inbounds float* %tmp2457, i64 1
+  %tmp2459 = getelementptr inbounds float* %tmp2458, i64 1
+  %tmp2460 = getelementptr inbounds float* %tmp2459, i64 1
+  %tmp2461 = getelementptr inbounds float* %tmp2460, i64 1
+  %tmp2462 = getelementptr inbounds float* %tmp2461, i64 1
+  %tmp2463 = getelementptr inbounds float* %tmp2462, i64 1
+  %tmp2464 = getelementptr inbounds float* %tmp2463, i64 1
+  %tmp2465 = getelementptr inbounds float* %tmp2464, i64 1
+  %tmp2466 = getelementptr inbounds float* %tmp2465, i64 1
+  %tmp2467 = getelementptr inbounds float* %tmp2466, i64 1
+  %tmp2468 = getelementptr inbounds float* %tmp2467, i64 1
+  %tmp2469 = getelementptr inbounds float* %tmp2468, i64 1
+  %tmp2470 = getelementptr inbounds float* %tmp2469, i64 1
+  %tmp2471 = getelementptr inbounds float* %tmp2470, i64 1
+  %tmp2472 = getelementptr inbounds float* %tmp2471, i64 1
+  %tmp2473 = getelementptr inbounds float* %tmp2472, i64 1
+  %tmp2474 = getelementptr inbounds float* %tmp2473, i64 1
+  %tmp2475 = getelementptr inbounds float* %tmp2474, i64 1
+  %tmp2476 = getelementptr inbounds float* %tmp2475, i64 1
+  %tmp2477 = getelementptr inbounds float* %tmp2476, i64 1
+  %tmp2478 = getelementptr inbounds float* %tmp2477, i64 1
+  %tmp2479 = getelementptr inbounds float* %tmp2478, i64 1
+  %tmp2480 = getelementptr inbounds float* %tmp2479, i64 1
+  %tmp2481 = getelementptr inbounds float* %tmp2480, i64 1
+  %tmp2482 = getelementptr inbounds float* %tmp2481, i64 1
+  %tmp2483 = getelementptr inbounds float* %tmp2482, i64 1
+  %tmp2484 = getelementptr inbounds float* %tmp2483, i64 1
+  %tmp2485 = getelementptr inbounds float* %tmp2484, i64 1
+  %tmp2486 = getelementptr inbounds float* %tmp2485, i64 1
+  %tmp2487 = getelementptr inbounds float* %tmp2486, i64 1
+  %tmp2488 = getelementptr inbounds float* %tmp2487, i64 1
+  %tmp2489 = getelementptr inbounds float* %tmp2488, i64 1
+  %tmp2490 = getelementptr inbounds float* %tmp2489, i64 1
+  %tmp2491 = getelementptr inbounds float* %tmp2490, i64 1
+  %tmp2492 = getelementptr inbounds float* %tmp2491, i64 1
+  %tmp2493 = getelementptr inbounds float* %tmp2492, i64 1
+  %tmp2494 = getelementptr inbounds float* %tmp2493, i64 1
+  %tmp2495 = getelementptr inbounds float* %tmp2494, i64 1
+  %tmp2496 = getelementptr inbounds float* %tmp2495, i64 1
+  %tmp2497 = getelementptr inbounds float* %tmp2496, i64 1
+  %tmp2498 = getelementptr inbounds float* %tmp2497, i64 1
+  %tmp2499 = getelementptr inbounds float* %tmp2498, i64 1
+  %tmp2500 = getelementptr inbounds float* %tmp2499, i64 1
+  %tmp2501 = getelementptr inbounds float* %tmp2500, i64 1
+  %tmp2502 = getelementptr inbounds float* %tmp2501, i64 1
+  %tmp2503 = getelementptr inbounds float* %tmp2502, i64 1
+  %tmp2504 = getelementptr inbounds float* %tmp2503, i64 1
+  %tmp2505 = getelementptr inbounds float* %tmp2504, i64 1
+  %tmp2506 = getelementptr inbounds float* %tmp2505, i64 1
+  %tmp2507 = getelementptr inbounds float* %tmp2506, i64 1
+  %tmp2508 = getelementptr inbounds float* %tmp2507, i64 1
+  %tmp2509 = getelementptr inbounds float* %tmp2508, i64 1
+  %tmp2510 = getelementptr inbounds float* %tmp2509, i64 1
+  %tmp2511 = getelementptr inbounds float* %tmp2510, i64 1
+  %tmp2512 = getelementptr inbounds float* %tmp2511, i64 1
+  %tmp2513 = getelementptr inbounds float* %tmp2512, i64 1
+  %tmp2514 = getelementptr inbounds float* %tmp2513, i64 1
+  %tmp2515 = getelementptr inbounds float* %tmp2514, i64 1
+  %tmp2516 = getelementptr inbounds float* %tmp2515, i64 1
+  %tmp2517 = getelementptr inbounds float* %tmp2516, i64 1
+  %tmp2518 = getelementptr inbounds float* %tmp2517, i64 1
+  %tmp2519 = getelementptr inbounds float* %tmp2518, i64 1
+  %tmp2520 = getelementptr inbounds float* %tmp2519, i64 1
+  %tmp2521 = getelementptr inbounds float* %tmp2520, i64 1
+  %tmp2522 = getelementptr inbounds float* %tmp2521, i64 1
+  %tmp2523 = getelementptr inbounds float* %tmp2522, i64 1
+  %tmp2524 = getelementptr inbounds float* %tmp2523, i64 1
+  %tmp2525 = getelementptr inbounds float* %tmp2524, i64 1
+  %tmp2526 = getelementptr inbounds float* %tmp2525, i64 1
+  %tmp2527 = getelementptr inbounds float* %tmp2526, i64 1
+  %tmp2528 = getelementptr inbounds float* %tmp2527, i64 1
+  %tmp2529 = getelementptr inbounds float* %tmp2528, i64 1
+  %tmp2530 = getelementptr inbounds float* %tmp2529, i64 1
+  %tmp2531 = getelementptr inbounds float* %tmp2530, i64 1
+  %tmp2532 = getelementptr inbounds float* %tmp2531, i64 1
+  %tmp2533 = getelementptr inbounds float* %tmp2532, i64 1
+  %tmp2534 = getelementptr inbounds float* %tmp2533, i64 1
+  %tmp2535 = getelementptr inbounds float* %tmp2534, i64 1
+  %tmp2536 = getelementptr inbounds float* %tmp2535, i64 1
+  %tmp2537 = getelementptr inbounds float* %tmp2536, i64 1
+  %tmp2538 = getelementptr inbounds float* %tmp2537, i64 1
+  %tmp2539 = getelementptr inbounds float* %tmp2538, i64 1
+  %tmp2540 = getelementptr inbounds float* %tmp2539, i64 1
+  %tmp2541 = getelementptr inbounds float* %tmp2540, i64 1
+  %tmp2542 = getelementptr inbounds float* %tmp2541, i64 1
+  %tmp2543 = getelementptr inbounds float* %tmp2542, i64 1
+  %tmp2544 = getelementptr inbounds float* %tmp2543, i64 1
+  %tmp2545 = getelementptr inbounds float* %tmp2544, i64 1
+  %tmp2546 = getelementptr inbounds float* %tmp2545, i64 1
+  %tmp2547 = getelementptr inbounds float* %tmp2546, i64 1
+  %tmp2548 = getelementptr inbounds float* %tmp2547, i64 1
+  %tmp2549 = getelementptr inbounds float* %tmp2548, i64 1
+  %tmp2550 = getelementptr inbounds float* %tmp2549, i64 1
+  %tmp2551 = getelementptr inbounds float* %tmp2550, i64 1
+  %tmp2552 = getelementptr inbounds float* %tmp2551, i64 1
+  %tmp2553 = getelementptr inbounds float* %tmp2552, i64 1
+  %tmp2554 = getelementptr inbounds float* %tmp2553, i64 1
+  %tmp2555 = getelementptr inbounds float* %tmp2554, i64 1
+  %tmp2556 = getelementptr inbounds float* %tmp2555, i64 1
+  %tmp2557 = getelementptr inbounds float* %tmp2556, i64 1
+  %tmp2558 = getelementptr inbounds float* %tmp2557, i64 1
+  %tmp2559 = getelementptr inbounds float* %tmp2558, i64 1
+  %tmp2560 = getelementptr inbounds float* %tmp2559, i64 1
+  %tmp2561 = getelementptr inbounds float* %tmp2560, i64 1
+  %tmp2562 = getelementptr inbounds float* %tmp2561, i64 1
+  %tmp2563 = getelementptr inbounds float* %tmp2562, i64 1
+  %tmp2564 = getelementptr inbounds float* %tmp2563, i64 1
+  %tmp2565 = getelementptr inbounds float* %tmp2564, i64 1
+  %tmp2566 = getelementptr inbounds float* %tmp2565, i64 1
+  %tmp2567 = getelementptr inbounds float* %tmp2566, i64 1
+  %tmp2568 = getelementptr inbounds float* %tmp2567, i64 1
+  %tmp2569 = getelementptr inbounds float* %tmp2568, i64 1
+  %tmp2570 = getelementptr inbounds float* %tmp2569, i64 1
+  %tmp2571 = getelementptr inbounds float* %tmp2570, i64 1
+  %tmp2572 = getelementptr inbounds float* %tmp2571, i64 1
+  %tmp2573 = getelementptr inbounds float* %tmp2572, i64 1
+  %tmp2574 = getelementptr inbounds float* %tmp2573, i64 1
+  %tmp2575 = getelementptr inbounds float* %tmp2574, i64 1
+  %tmp2576 = getelementptr inbounds float* %tmp2575, i64 1
+  %tmp2577 = getelementptr inbounds float* %tmp2576, i64 1
+  %tmp2578 = getelementptr inbounds float* %tmp2577, i64 1
+  %tmp2579 = getelementptr inbounds float* %tmp2578, i64 1
+  %tmp2580 = getelementptr inbounds float* %tmp2579, i64 1
+  %tmp2581 = getelementptr inbounds float* %tmp2580, i64 1
+  %tmp2582 = getelementptr inbounds float* %tmp2581, i64 1
+  %tmp2583 = getelementptr inbounds float* %tmp2582, i64 1
+  %tmp2584 = getelementptr inbounds float* %tmp2583, i64 1
+  %tmp2585 = getelementptr inbounds float* %tmp2584, i64 1
+  %tmp2586 = getelementptr inbounds float* %tmp2585, i64 1
+  %tmp2587 = getelementptr inbounds float* %tmp2586, i64 1
+  %tmp2588 = getelementptr inbounds float* %tmp2587, i64 1
+  %tmp2589 = getelementptr inbounds float* %tmp2588, i64 1
+  %tmp2590 = getelementptr inbounds float* %tmp2589, i64 1
+  %tmp2591 = getelementptr inbounds float* %tmp2590, i64 1
+  %tmp2592 = getelementptr inbounds float* %tmp2591, i64 1
+  %tmp2593 = getelementptr inbounds float* %tmp2592, i64 1
+  %tmp2594 = getelementptr inbounds float* %tmp2593, i64 1
+  %tmp2595 = getelementptr inbounds float* %tmp2594, i64 1
+  %tmp2596 = getelementptr inbounds float* %tmp2595, i64 1
+  %tmp2597 = getelementptr inbounds float* %tmp2596, i64 1
+  %tmp2598 = getelementptr inbounds float* %tmp2597, i64 1
+  %tmp2599 = getelementptr inbounds float* %tmp2598, i64 1
+  %tmp2600 = getelementptr inbounds float* %tmp2599, i64 1
+  %tmp2601 = getelementptr inbounds float* %tmp2600, i64 1
+  %tmp2602 = getelementptr inbounds float* %tmp2601, i64 1
+  %tmp2603 = getelementptr inbounds float* %tmp2602, i64 1
+  %tmp2604 = getelementptr inbounds float* %tmp2603, i64 1
+  %tmp2605 = getelementptr inbounds float* %tmp2604, i64 1
+  %tmp2606 = getelementptr inbounds float* %tmp2605, i64 1
+  %tmp2607 = getelementptr inbounds float* %tmp2606, i64 1
+  %tmp2608 = getelementptr inbounds float* %tmp2607, i64 1
+  %tmp2609 = getelementptr inbounds float* %tmp2608, i64 1
+  %tmp2610 = getelementptr inbounds float* %tmp2609, i64 1
+  %tmp2611 = getelementptr inbounds float* %tmp2610, i64 1
+  %tmp2612 = getelementptr inbounds float* %tmp2611, i64 1
+  %tmp2613 = getelementptr inbounds float* %tmp2612, i64 1
+  %tmp2614 = getelementptr inbounds float* %tmp2613, i64 1
+  %tmp2615 = getelementptr inbounds float* %tmp2614, i64 1
+  %tmp2616 = getelementptr inbounds float* %tmp2615, i64 1
+  %tmp2617 = getelementptr inbounds float* %tmp2616, i64 1
+  %tmp2618 = getelementptr inbounds float* %tmp2617, i64 1
+  %tmp2619 = getelementptr inbounds float* %tmp2618, i64 1
+  %tmp2620 = getelementptr inbounds float* %tmp2619, i64 1
+  %tmp2621 = getelementptr inbounds float* %tmp2620, i64 1
+  %tmp2622 = getelementptr inbounds float* %tmp2621, i64 1
+  %tmp2623 = getelementptr inbounds float* %tmp2622, i64 1
+  %tmp2624 = getelementptr inbounds float* %tmp2623, i64 1
+  %tmp2625 = getelementptr inbounds float* %tmp2624, i64 1
+  %tmp2626 = getelementptr inbounds float* %tmp2625, i64 1
+  %tmp2627 = getelementptr inbounds float* %tmp2626, i64 1
+  %tmp2628 = getelementptr inbounds float* %tmp2627, i64 1
+  %tmp2629 = getelementptr inbounds float* %tmp2628, i64 1
+  %tmp2630 = getelementptr inbounds float* %tmp2629, i64 1
+  %tmp2631 = getelementptr inbounds float* %tmp2630, i64 1
+  %tmp2632 = getelementptr inbounds float* %tmp2631, i64 1
+  %tmp2633 = getelementptr inbounds float* %tmp2632, i64 1
+  %tmp2634 = getelementptr inbounds float* %tmp2633, i64 1
+  %tmp2635 = getelementptr inbounds float* %tmp2634, i64 1
+  %tmp2636 = getelementptr inbounds float* %tmp2635, i64 1
+  %tmp2637 = getelementptr inbounds float* %tmp2636, i64 1
+  %tmp2638 = getelementptr inbounds float* %tmp2637, i64 1
+  %tmp2639 = getelementptr inbounds float* %tmp2638, i64 1
+  %tmp2640 = getelementptr inbounds float* %tmp2639, i64 1
+  %tmp2641 = getelementptr inbounds float* %tmp2640, i64 1
+  %tmp2642 = getelementptr inbounds float* %tmp2641, i64 1
+  %tmp2643 = getelementptr inbounds float* %tmp2642, i64 1
+  %tmp2644 = getelementptr inbounds float* %tmp2643, i64 1
+  %tmp2645 = getelementptr inbounds float* %tmp2644, i64 1
+  %tmp2646 = getelementptr inbounds float* %tmp2645, i64 1
+  %tmp2647 = getelementptr inbounds float* %tmp2646, i64 1
+  %tmp2648 = getelementptr inbounds float* %tmp2647, i64 1
+  %tmp2649 = getelementptr inbounds float* %tmp2648, i64 1
+  %tmp2650 = getelementptr inbounds float* %tmp2649, i64 1
+  %tmp2651 = getelementptr inbounds float* %tmp2650, i64 1
+  %tmp2652 = getelementptr inbounds float* %tmp2651, i64 1
+  %tmp2653 = getelementptr inbounds float* %tmp2652, i64 1
+  %tmp2654 = getelementptr inbounds float* %tmp2653, i64 1
+  %tmp2655 = getelementptr inbounds float* %tmp2654, i64 1
+  %tmp2656 = getelementptr inbounds float* %tmp2655, i64 1
+  %tmp2657 = getelementptr inbounds float* %tmp2656, i64 1
+  %tmp2658 = getelementptr inbounds float* %tmp2657, i64 1
+  %tmp2659 = getelementptr inbounds float* %tmp2658, i64 1
+  %tmp2660 = getelementptr inbounds float* %tmp2659, i64 1
+  %tmp2661 = getelementptr inbounds float* %tmp2660, i64 1
+  %tmp2662 = getelementptr inbounds float* %tmp2661, i64 1
+  %tmp2663 = getelementptr inbounds float* %tmp2662, i64 1
+  %tmp2664 = getelementptr inbounds float* %tmp2663, i64 1
+  %tmp2665 = getelementptr inbounds float* %tmp2664, i64 1
+  %tmp2666 = getelementptr inbounds float* %tmp2665, i64 1
+  %tmp2667 = getelementptr inbounds float* %tmp2666, i64 1
+  %tmp2668 = getelementptr inbounds float* %tmp2667, i64 1
+  %tmp2669 = getelementptr inbounds float* %tmp2668, i64 1
+  %tmp2670 = getelementptr inbounds float* %tmp2669, i64 1
+  %tmp2671 = getelementptr inbounds float* %tmp2670, i64 1
+  %tmp2672 = getelementptr inbounds float* %tmp2671, i64 1
+  %tmp2673 = getelementptr inbounds float* %tmp2672, i64 1
+  %tmp2674 = getelementptr inbounds float* %tmp2673, i64 1
+  %tmp2675 = getelementptr inbounds float* %tmp2674, i64 1
+  %tmp2676 = getelementptr inbounds float* %tmp2675, i64 1
+  %tmp2677 = getelementptr inbounds float* %tmp2676, i64 1
+  %tmp2678 = getelementptr inbounds float* %tmp2677, i64 1
+  %tmp2679 = getelementptr inbounds float* %tmp2678, i64 1
+  %tmp2680 = getelementptr inbounds float* %tmp2679, i64 1
+  %tmp2681 = getelementptr inbounds float* %tmp2680, i64 1
+  %tmp2682 = getelementptr inbounds float* %tmp2681, i64 1
+  %tmp2683 = getelementptr inbounds float* %tmp2682, i64 1
+  %tmp2684 = getelementptr inbounds float* %tmp2683, i64 1
+  %tmp2685 = getelementptr inbounds float* %tmp2684, i64 1
+  %tmp2686 = getelementptr inbounds float* %tmp2685, i64 1
+  %tmp2687 = getelementptr inbounds float* %tmp2686, i64 1
+  %tmp2688 = getelementptr inbounds float* %tmp2687, i64 1
+  %tmp2689 = getelementptr inbounds float* %tmp2688, i64 1
+  %tmp2690 = getelementptr inbounds float* %tmp2689, i64 1
+  %tmp2691 = getelementptr inbounds float* %tmp2690, i64 1
+  %tmp2692 = getelementptr inbounds float* %tmp2691, i64 1
+  %tmp2693 = getelementptr inbounds float* %tmp2692, i64 1
+  %tmp2694 = getelementptr inbounds float* %tmp2693, i64 1
+  %tmp2695 = getelementptr inbounds float* %tmp2694, i64 1
+  %tmp2696 = getelementptr inbounds float* %tmp2695, i64 1
+  %tmp2697 = getelementptr inbounds float* %tmp2696, i64 1
+  %tmp2698 = getelementptr inbounds float* %tmp2697, i64 1
+  %tmp2699 = getelementptr inbounds float* %tmp2698, i64 1
+  %tmp2700 = getelementptr inbounds float* %tmp2699, i64 1
+  %tmp2701 = getelementptr inbounds float* %tmp2700, i64 1
+  %tmp2702 = getelementptr inbounds float* %tmp2701, i64 1
+  %tmp2703 = getelementptr inbounds float* %tmp2702, i64 1
+  %tmp2704 = getelementptr inbounds float* %tmp2703, i64 1
+  %tmp2705 = getelementptr inbounds float* %tmp2704, i64 1
+  %tmp2706 = getelementptr inbounds float* %tmp2705, i64 1
+  %tmp2707 = getelementptr inbounds float* %tmp2706, i64 1
+  %tmp2708 = getelementptr inbounds float* %tmp2707, i64 1
+  %tmp2709 = getelementptr inbounds float* %tmp2708, i64 1
+  %tmp2710 = getelementptr inbounds float* %tmp2709, i64 1
+  %tmp2711 = getelementptr inbounds float* %tmp2710, i64 1
+  %tmp2712 = getelementptr inbounds float* %tmp2711, i64 1
+  %tmp2713 = getelementptr inbounds float* %tmp2712, i64 1
+  %tmp2714 = getelementptr inbounds float* %tmp2713, i64 1
+  %tmp2715 = getelementptr inbounds float* %tmp2714, i64 1
+  %tmp2716 = getelementptr inbounds float* %tmp2715, i64 1
+  %tmp2717 = getelementptr inbounds float* %tmp2716, i64 1
+  %tmp2718 = getelementptr inbounds float* %tmp2717, i64 1
+  %tmp2719 = getelementptr inbounds float* %tmp2718, i64 1
+  %tmp2720 = getelementptr inbounds float* %tmp2719, i64 1
+  %tmp2721 = getelementptr inbounds float* %tmp2720, i64 1
+  %tmp2722 = getelementptr inbounds float* %tmp2721, i64 1
+  %tmp2723 = getelementptr inbounds float* %tmp2722, i64 1
+  %tmp2724 = getelementptr inbounds float* %tmp2723, i64 1
+  %tmp2725 = getelementptr inbounds float* %tmp2724, i64 1
+  %tmp2726 = getelementptr inbounds float* %tmp2725, i64 1
+  %tmp2727 = getelementptr inbounds float* %tmp2726, i64 1
+  %tmp2728 = getelementptr inbounds float* %tmp2727, i64 1
+  %tmp2729 = getelementptr inbounds float* %tmp2728, i64 1
+  %tmp2730 = getelementptr inbounds float* %tmp2729, i64 1
+  %tmp2731 = getelementptr inbounds float* %tmp2730, i64 1
+  %tmp2732 = getelementptr inbounds float* %tmp2731, i64 1
+  %tmp2733 = getelementptr inbounds float* %tmp2732, i64 1
+  %tmp2734 = getelementptr inbounds float* %tmp2733, i64 1
+  %tmp2735 = getelementptr inbounds float* %tmp2734, i64 1
+  %tmp2736 = getelementptr inbounds float* %tmp2735, i64 1
+  %tmp2737 = getelementptr inbounds float* %tmp2736, i64 1
+  %tmp2738 = getelementptr inbounds float* %tmp2737, i64 1
+  %tmp2739 = getelementptr inbounds float* %tmp2738, i64 1
+  %tmp2740 = getelementptr inbounds float* %tmp2739, i64 1
+  %tmp2741 = getelementptr inbounds float* %tmp2740, i64 1
+  %tmp2742 = getelementptr inbounds float* %tmp2741, i64 1
+  %tmp2743 = getelementptr inbounds float* %tmp2742, i64 1
+  %tmp2744 = getelementptr inbounds float* %tmp2743, i64 1
+  %tmp2745 = getelementptr inbounds float* %tmp2744, i64 1
+  %tmp2746 = getelementptr inbounds float* %tmp2745, i64 1
+  %tmp2747 = getelementptr inbounds float* %tmp2746, i64 1
+  %tmp2748 = getelementptr inbounds float* %tmp2747, i64 1
+  %tmp2749 = getelementptr inbounds float* %tmp2748, i64 1
+  %tmp2750 = getelementptr inbounds float* %tmp2749, i64 1
+  %tmp2751 = getelementptr inbounds float* %tmp2750, i64 1
+  %tmp2752 = getelementptr inbounds float* %tmp2751, i64 1
+  %tmp2753 = getelementptr inbounds float* %tmp2752, i64 1
+  %tmp2754 = getelementptr inbounds float* %tmp2753, i64 1
+  %tmp2755 = getelementptr inbounds float* %tmp2754, i64 1
+  %tmp2756 = getelementptr inbounds float* %tmp2755, i64 1
+  %tmp2757 = getelementptr inbounds float* %tmp2756, i64 1
+  %tmp2758 = getelementptr inbounds float* %tmp2757, i64 1
+  %tmp2759 = getelementptr inbounds float* %tmp2758, i64 1
+  %tmp2760 = getelementptr inbounds float* %tmp2759, i64 1
+  %tmp2761 = getelementptr inbounds float* %tmp2760, i64 1
+  %tmp2762 = getelementptr inbounds float* %tmp2761, i64 1
+  %tmp2763 = getelementptr inbounds float* %tmp2762, i64 1
+  %tmp2764 = getelementptr inbounds float* %tmp2763, i64 1
+  %tmp2765 = getelementptr inbounds float* %tmp2764, i64 1
+  %tmp2766 = getelementptr inbounds float* %tmp2765, i64 1
+  %tmp2767 = getelementptr inbounds float* %tmp2766, i64 1
+  %tmp2768 = getelementptr inbounds float* %tmp2767, i64 1
+  %tmp2769 = getelementptr inbounds float* %tmp2768, i64 1
+  %tmp2770 = getelementptr inbounds float* %tmp2769, i64 1
+  %tmp2771 = getelementptr inbounds float* %tmp2770, i64 1
+  %tmp2772 = getelementptr inbounds float* %tmp2771, i64 1
+  %tmp2773 = getelementptr inbounds float* %tmp2772, i64 1
+  %tmp2774 = getelementptr inbounds float* %tmp2773, i64 1
+  %tmp2775 = getelementptr inbounds float* %tmp2774, i64 1
+  %tmp2776 = getelementptr inbounds float* %tmp2775, i64 1
+  %tmp2777 = getelementptr inbounds float* %tmp2776, i64 1
+  %tmp2778 = getelementptr inbounds float* %tmp2777, i64 1
+  %tmp2779 = getelementptr inbounds float* %tmp2778, i64 1
+  %tmp2780 = getelementptr inbounds float* %tmp2779, i64 1
+  %tmp2781 = getelementptr inbounds float* %tmp2780, i64 1
+  %tmp2782 = getelementptr inbounds float* %tmp2781, i64 1
+  %tmp2783 = getelementptr inbounds float* %tmp2782, i64 1
+  %tmp2784 = getelementptr inbounds float* %tmp2783, i64 1
+  %tmp2785 = getelementptr inbounds float* %tmp2784, i64 1
+  %tmp2786 = getelementptr inbounds float* %tmp2785, i64 1
+  %tmp2787 = getelementptr inbounds float* %tmp2786, i64 1
+  %tmp2788 = getelementptr inbounds float* %tmp2787, i64 1
+  %tmp2789 = getelementptr inbounds float* %tmp2788, i64 1
+  %tmp2790 = getelementptr inbounds float* %tmp2789, i64 1
+  %tmp2791 = getelementptr inbounds float* %tmp2790, i64 1
+  %tmp2792 = getelementptr inbounds float* %tmp2791, i64 1
+  %tmp2793 = getelementptr inbounds float* %tmp2792, i64 1
+  %tmp2794 = getelementptr inbounds float* %tmp2793, i64 1
+  %tmp2795 = getelementptr inbounds float* %tmp2794, i64 1
+  %tmp2796 = getelementptr inbounds float* %tmp2795, i64 1
+  %tmp2797 = getelementptr inbounds float* %tmp2796, i64 1
+  %tmp2798 = getelementptr inbounds float* %tmp2797, i64 1
+  %tmp2799 = getelementptr inbounds float* %tmp2798, i64 1
+  %tmp2800 = getelementptr inbounds float* %tmp2799, i64 1
+  %tmp2801 = getelementptr inbounds float* %tmp2800, i64 1
+  %tmp2802 = getelementptr inbounds float* %tmp2801, i64 1
+  %tmp2803 = getelementptr inbounds float* %tmp2802, i64 1
+  %tmp2804 = getelementptr inbounds float* %tmp2803, i64 1
+  %tmp2805 = getelementptr inbounds float* %tmp2804, i64 1
+  %tmp2806 = getelementptr inbounds float* %tmp2805, i64 1
+  %tmp2807 = getelementptr inbounds float* %tmp2806, i64 1
+  %tmp2808 = getelementptr inbounds float* %tmp2807, i64 1
+  %tmp2809 = getelementptr inbounds float* %tmp2808, i64 1
+  %tmp2810 = getelementptr inbounds float* %tmp2809, i64 1
+  %tmp2811 = getelementptr inbounds float* %tmp2810, i64 1
+  %tmp2812 = getelementptr inbounds float* %tmp2811, i64 1
+  %tmp2813 = getelementptr inbounds float* %tmp2812, i64 1
+  %tmp2814 = getelementptr inbounds float* %tmp2813, i64 1
+  %tmp2815 = getelementptr inbounds float* %tmp2814, i64 1
+  %tmp2816 = getelementptr inbounds float* %tmp2815, i64 1
+  %tmp2817 = getelementptr inbounds float* %tmp2816, i64 1
+  %tmp2818 = getelementptr inbounds float* %tmp2817, i64 1
+  %tmp2819 = getelementptr inbounds float* %tmp2818, i64 1
+  %tmp2820 = getelementptr inbounds float* %tmp2819, i64 1
+  %tmp2821 = getelementptr inbounds float* %tmp2820, i64 1
+  %tmp2822 = getelementptr inbounds float* %tmp2821, i64 1
+  %tmp2823 = getelementptr inbounds float* %tmp2822, i64 1
+  %tmp2824 = getelementptr inbounds float* %tmp2823, i64 1
+  %tmp2825 = getelementptr inbounds float* %tmp2824, i64 1
+  %tmp2826 = getelementptr inbounds float* %tmp2825, i64 1
+  %tmp2827 = getelementptr inbounds float* %tmp2826, i64 1
+  %tmp2828 = getelementptr inbounds float* %tmp2827, i64 1
+  %tmp2829 = getelementptr inbounds float* %tmp2828, i64 1
+  %tmp2830 = getelementptr inbounds float* %tmp2829, i64 1
+  %tmp2831 = getelementptr inbounds float* %tmp2830, i64 1
+  %tmp2832 = getelementptr inbounds float* %tmp2831, i64 1
+  %tmp2833 = getelementptr inbounds float* %tmp2832, i64 1
+  %tmp2834 = getelementptr inbounds float* %tmp2833, i64 1
+  %tmp2835 = getelementptr inbounds float* %tmp2834, i64 1
+  %tmp2836 = getelementptr inbounds float* %tmp2835, i64 1
+  %tmp2837 = getelementptr inbounds float* %tmp2836, i64 1
+  %tmp2838 = getelementptr inbounds float* %tmp2837, i64 1
+  %tmp2839 = getelementptr inbounds float* %tmp2838, i64 1
+  %tmp2840 = getelementptr inbounds float* %tmp2839, i64 1
+  %tmp2841 = getelementptr inbounds float* %tmp2840, i64 1
+  %tmp2842 = getelementptr inbounds float* %tmp2841, i64 1
+  %tmp2843 = getelementptr inbounds float* %tmp2842, i64 1
+  %tmp2844 = getelementptr inbounds float* %tmp2843, i64 1
+  %tmp2845 = getelementptr inbounds float* %tmp2844, i64 1
+  %tmp2846 = getelementptr inbounds float* %tmp2845, i64 1
+  %tmp2847 = getelementptr inbounds float* %tmp2846, i64 1
+  %tmp2848 = getelementptr inbounds float* %tmp2847, i64 1
+  %tmp2849 = getelementptr inbounds float* %tmp2848, i64 1
+  %tmp2850 = getelementptr inbounds float* %tmp2849, i64 1
+  %tmp2851 = getelementptr inbounds float* %tmp2850, i64 1
+  %tmp2852 = getelementptr inbounds float* %tmp2851, i64 1
+  %tmp2853 = getelementptr inbounds float* %tmp2852, i64 1
+  %tmp2854 = getelementptr inbounds float* %tmp2853, i64 1
+  %tmp2855 = getelementptr inbounds float* %tmp2854, i64 1
+  %tmp2856 = getelementptr inbounds float* %tmp2855, i64 1
+  %tmp2857 = getelementptr inbounds float* %tmp2856, i64 1
+  %tmp2858 = getelementptr inbounds float* %tmp2857, i64 1
+  %tmp2859 = getelementptr inbounds float* %tmp2858, i64 1
+  %tmp2860 = getelementptr inbounds float* %tmp2859, i64 1
+  %tmp2861 = getelementptr inbounds float* %tmp2860, i64 1
+  %tmp2862 = getelementptr inbounds float* %tmp2861, i64 1
+  %tmp2863 = getelementptr inbounds float* %tmp2862, i64 1
+  %tmp2864 = getelementptr inbounds float* %tmp2863, i64 1
+  %tmp2865 = getelementptr inbounds float* %tmp2864, i64 1
+  %tmp2866 = getelementptr inbounds float* %tmp2865, i64 1
+  %tmp2867 = getelementptr inbounds float* %tmp2866, i64 1
+  %tmp2868 = getelementptr inbounds float* %tmp2867, i64 1
+  %tmp2869 = getelementptr inbounds float* %tmp2868, i64 1
+  %tmp2870 = getelementptr inbounds float* %tmp2869, i64 1
+  %tmp2871 = getelementptr inbounds float* %tmp2870, i64 1
+  %tmp2872 = getelementptr inbounds float* %tmp2871, i64 1
+  %tmp2873 = getelementptr inbounds float* %tmp2872, i64 1
+  %tmp2874 = getelementptr inbounds float* %tmp2873, i64 1
+  %tmp2875 = getelementptr inbounds float* %tmp2874, i64 1
+  %tmp2876 = getelementptr inbounds float* %tmp2875, i64 1
+  %tmp2877 = getelementptr inbounds float* %tmp2876, i64 1
+  %tmp2878 = getelementptr inbounds float* %tmp2877, i64 1
+  %tmp2879 = getelementptr inbounds float* %tmp2878, i64 1
+  %tmp2880 = getelementptr inbounds float* %tmp2879, i64 1
+  %tmp2881 = getelementptr inbounds float* %tmp2880, i64 1
+  %tmp2882 = getelementptr inbounds float* %tmp2881, i64 1
+  %tmp2883 = getelementptr inbounds float* %tmp2882, i64 1
+  %tmp2884 = getelementptr inbounds float* %tmp2883, i64 1
+  %tmp2885 = getelementptr inbounds float* %tmp2884, i64 1
+  %tmp2886 = getelementptr inbounds float* %tmp2885, i64 1
+  %tmp2887 = getelementptr inbounds float* %tmp2886, i64 1
+  %tmp2888 = getelementptr inbounds float* %tmp2887, i64 1
+  %tmp2889 = getelementptr inbounds float* %tmp2888, i64 1
+  %tmp2890 = getelementptr inbounds float* %tmp2889, i64 1
+  %tmp2891 = getelementptr inbounds float* %tmp2890, i64 1
+  %tmp2892 = getelementptr inbounds float* %tmp2891, i64 1
+  %tmp2893 = getelementptr inbounds float* %tmp2892, i64 1
+  %tmp2894 = getelementptr inbounds float* %tmp2893, i64 1
+  %tmp2895 = getelementptr inbounds float* %tmp2894, i64 1
+  %tmp2896 = getelementptr inbounds float* %tmp2895, i64 1
+  %tmp2897 = getelementptr inbounds float* %tmp2896, i64 1
+  %tmp2898 = getelementptr inbounds float* %tmp2897, i64 1
+  %tmp2899 = getelementptr inbounds float* %tmp2898, i64 1
+  %tmp2900 = getelementptr inbounds float* %tmp2899, i64 1
+  %tmp2901 = getelementptr inbounds float* %tmp2900, i64 1
+  %tmp2902 = getelementptr inbounds float* %tmp2901, i64 1
+  %tmp2903 = getelementptr inbounds float* %tmp2902, i64 1
+  %tmp2904 = getelementptr inbounds float* %tmp2903, i64 1
+  %tmp2905 = getelementptr inbounds float* %tmp2904, i64 1
+  %tmp2906 = getelementptr inbounds float* %tmp2905, i64 1
+  %tmp2907 = getelementptr inbounds float* %tmp2906, i64 1
+  %tmp2908 = getelementptr inbounds float* %tmp2907, i64 1
+  %tmp2909 = getelementptr inbounds float* %tmp2908, i64 1
+  %tmp2910 = getelementptr inbounds float* %tmp2909, i64 1
+  %tmp2911 = getelementptr inbounds float* %tmp2910, i64 1
+  %tmp2912 = getelementptr inbounds float* %tmp2911, i64 1
+  %tmp2913 = getelementptr inbounds float* %tmp2912, i64 1
+  %tmp2914 = getelementptr inbounds float* %tmp2913, i64 1
+  %tmp2915 = getelementptr inbounds float* %tmp2914, i64 1
+  %tmp2916 = getelementptr inbounds float* %tmp2915, i64 1
+  %tmp2917 = getelementptr inbounds float* %tmp2916, i64 1
+  %tmp2918 = getelementptr inbounds float* %tmp2917, i64 1
+  %tmp2919 = getelementptr inbounds float* %tmp2918, i64 1
+  %tmp2920 = getelementptr inbounds float* %tmp2919, i64 1
+  %tmp2921 = getelementptr inbounds float* %tmp2920, i64 1
+  %tmp2922 = getelementptr inbounds float* %tmp2921, i64 1
+  %tmp2923 = getelementptr inbounds float* %tmp2922, i64 1
+  %tmp2924 = getelementptr inbounds float* %tmp2923, i64 1
+  %tmp2925 = getelementptr inbounds float* %tmp2924, i64 1
+  %tmp2926 = getelementptr inbounds float* %tmp2925, i64 1
+  %tmp2927 = getelementptr inbounds float* %tmp2926, i64 1
+  %tmp2928 = getelementptr inbounds float* %tmp2927, i64 1
+  %tmp2929 = getelementptr inbounds float* %tmp2928, i64 1
+  %tmp2930 = getelementptr inbounds float* %tmp2929, i64 1
+  %tmp2931 = getelementptr inbounds float* %tmp2930, i64 1
+  %tmp2932 = getelementptr inbounds float* %tmp2931, i64 1
+  %tmp2933 = getelementptr inbounds float* %tmp2932, i64 1
+  %tmp2934 = getelementptr inbounds float* %tmp2933, i64 1
+  %tmp2935 = getelementptr inbounds float* %tmp2934, i64 1
+  %tmp2936 = getelementptr inbounds float* %tmp2935, i64 1
+  %tmp2937 = getelementptr inbounds float* %tmp2936, i64 1
+  %tmp2938 = getelementptr inbounds float* %tmp2937, i64 1
+  %tmp2939 = getelementptr inbounds float* %tmp2938, i64 1
+  %tmp2940 = getelementptr inbounds float* %tmp2939, i64 1
+  %tmp2941 = getelementptr inbounds float* %tmp2940, i64 1
+  %tmp2942 = getelementptr inbounds float* %tmp2941, i64 1
+  %tmp2943 = getelementptr inbounds float* %tmp2942, i64 1
+  %tmp2944 = getelementptr inbounds float* %tmp2943, i64 1
+  %tmp2945 = getelementptr inbounds float* %tmp2944, i64 1
+  %tmp2946 = getelementptr inbounds float* %tmp2945, i64 1
+  %tmp2947 = getelementptr inbounds float* %tmp2946, i64 1
+  %tmp2948 = getelementptr inbounds float* %tmp2947, i64 1
+  %tmp2949 = getelementptr inbounds float* %tmp2948, i64 1
+  %tmp2950 = getelementptr inbounds float* %tmp2949, i64 1
+  %tmp2951 = getelementptr inbounds float* %tmp2950, i64 1
+  %tmp2952 = getelementptr inbounds float* %tmp2951, i64 1
+  %tmp2953 = getelementptr inbounds float* %tmp2952, i64 1
+  %tmp2954 = getelementptr inbounds float* %tmp2953, i64 1
+  %tmp2955 = getelementptr inbounds float* %tmp2954, i64 1
+  %tmp2956 = getelementptr inbounds float* %tmp2955, i64 1
+  %tmp2957 = getelementptr inbounds float* %tmp2956, i64 1
+  %tmp2958 = getelementptr inbounds float* %tmp2957, i64 1
+  %tmp2959 = getelementptr inbounds float* %tmp2958, i64 1
+  %tmp2960 = getelementptr inbounds float* %tmp2959, i64 1
+  %tmp2961 = getelementptr inbounds float* %tmp2960, i64 1
+  %tmp2962 = getelementptr inbounds float* %tmp2961, i64 1
+  %tmp2963 = getelementptr inbounds float* %tmp2962, i64 1
+  %tmp2964 = getelementptr inbounds float* %tmp2963, i64 1
+  %tmp2965 = getelementptr inbounds float* %tmp2964, i64 1
+  %tmp2966 = getelementptr inbounds float* %tmp2965, i64 1
+  %tmp2967 = getelementptr inbounds float* %tmp2966, i64 1
+  %tmp2968 = getelementptr inbounds float* %tmp2967, i64 1
+  %tmp2969 = getelementptr inbounds float* %tmp2968, i64 1
+  %tmp2970 = getelementptr inbounds float* %tmp2969, i64 1
+  %tmp2971 = getelementptr inbounds float* %tmp2970, i64 1
+  %tmp2972 = getelementptr inbounds float* %tmp2971, i64 1
+  %tmp2973 = getelementptr inbounds float* %tmp2972, i64 1
+  %tmp2974 = getelementptr inbounds float* %tmp2973, i64 1
+  %tmp2975 = getelementptr inbounds float* %tmp2974, i64 1
+  %tmp2976 = getelementptr inbounds float* %tmp2975, i64 1
+  %tmp2977 = getelementptr inbounds float* %tmp2976, i64 1
+  %tmp2978 = getelementptr inbounds float* %tmp2977, i64 1
+  %tmp2979 = getelementptr inbounds float* %tmp2978, i64 1
+  %tmp2980 = getelementptr inbounds float* %tmp2979, i64 1
+  %tmp2981 = getelementptr inbounds float* %tmp2980, i64 1
+  %tmp2982 = getelementptr inbounds float* %tmp2981, i64 1
+  %tmp2983 = getelementptr inbounds float* %tmp2982, i64 1
+  %tmp2984 = getelementptr inbounds float* %tmp2983, i64 1
+  %tmp2985 = getelementptr inbounds float* %tmp2984, i64 1
+  %tmp2986 = getelementptr inbounds float* %tmp2985, i64 1
+  %tmp2987 = getelementptr inbounds float* %tmp2986, i64 1
+  %tmp2988 = getelementptr inbounds float* %tmp2987, i64 1
+  %tmp2989 = getelementptr inbounds float* %tmp2988, i64 1
+  %tmp2990 = getelementptr inbounds float* %tmp2989, i64 1
+  %tmp2991 = getelementptr inbounds float* %tmp2990, i64 1
+  %tmp2992 = getelementptr inbounds float* %tmp2991, i64 1
+  %tmp2993 = getelementptr inbounds float* %tmp2992, i64 1
+  %tmp2994 = getelementptr inbounds float* %tmp2993, i64 1
+  %tmp2995 = getelementptr inbounds float* %tmp2994, i64 1
+  %tmp2996 = getelementptr inbounds float* %tmp2995, i64 1
+  %tmp2997 = getelementptr inbounds float* %tmp2996, i64 1
+  %tmp2998 = getelementptr inbounds float* %tmp2997, i64 1
+  %tmp2999 = getelementptr inbounds float* %tmp2998, i64 1
+  %tmp3000 = getelementptr inbounds float* %tmp2999, i64 1
+  %tmp3001 = getelementptr inbounds float* %tmp3000, i64 1
+  %tmp3002 = getelementptr inbounds float* %tmp3001, i64 1
+  %tmp3003 = getelementptr inbounds float* %tmp3002, i64 1
+  %tmp3004 = getelementptr inbounds float* %tmp3003, i64 1
+  %tmp3005 = getelementptr inbounds float* %tmp3004, i64 1
+  %tmp3006 = getelementptr inbounds float* %tmp3005, i64 1
+  %tmp3007 = getelementptr inbounds float* %tmp3006, i64 1
+  %tmp3008 = getelementptr inbounds float* %tmp3007, i64 1
+  %tmp3009 = getelementptr inbounds float* %tmp3008, i64 1
+  %tmp3010 = getelementptr inbounds float* %tmp3009, i64 1
+  %tmp3011 = getelementptr inbounds float* %tmp3010, i64 1
+  %tmp3012 = getelementptr inbounds float* %tmp3011, i64 1
+  %tmp3013 = getelementptr inbounds float* %tmp3012, i64 1
+  %tmp3014 = getelementptr inbounds float* %tmp3013, i64 1
+  %tmp3015 = getelementptr inbounds float* %tmp3014, i64 1
+  %tmp3016 = getelementptr inbounds float* %tmp3015, i64 1
+  %tmp3017 = getelementptr inbounds float* %tmp3016, i64 1
+  %tmp3018 = getelementptr inbounds float* %tmp3017, i64 1
+  %tmp3019 = getelementptr inbounds float* %tmp3018, i64 1
+  %tmp3020 = getelementptr inbounds float* %tmp3019, i64 1
+  %tmp3021 = getelementptr inbounds float* %tmp3020, i64 1
+  %tmp3022 = getelementptr inbounds float* %tmp3021, i64 1
+  %tmp3023 = getelementptr inbounds float* %tmp3022, i64 1
+  %tmp3024 = getelementptr inbounds float* %tmp3023, i64 1
+  %tmp3025 = getelementptr inbounds float* %tmp3024, i64 1
+  %tmp3026 = getelementptr inbounds float* %tmp3025, i64 1
+  %tmp3027 = getelementptr inbounds float* %tmp3026, i64 1
+  %tmp3028 = getelementptr inbounds float* %tmp3027, i64 1
+  %tmp3029 = getelementptr inbounds float* %tmp3028, i64 1
+  %tmp3030 = getelementptr inbounds float* %tmp3029, i64 1
+  %tmp3031 = getelementptr inbounds float* %tmp3030, i64 1
+  %tmp3032 = getelementptr inbounds float* %tmp3031, i64 1
+  %tmp3033 = getelementptr inbounds float* %tmp3032, i64 1
+  %tmp3034 = getelementptr inbounds float* %tmp3033, i64 1
+  %tmp3035 = getelementptr inbounds float* %tmp3034, i64 1
+  %tmp3036 = getelementptr inbounds float* %tmp3035, i64 1
+  %tmp3037 = getelementptr inbounds float* %tmp3036, i64 1
+  %tmp3038 = getelementptr inbounds float* %tmp3037, i64 1
+  %tmp3039 = getelementptr inbounds float* %tmp3038, i64 1
+  %tmp3040 = getelementptr inbounds float* %tmp3039, i64 1
+  %tmp3041 = getelementptr inbounds float* %tmp3040, i64 1
+  %tmp3042 = getelementptr inbounds float* %tmp3041, i64 1
+  %tmp3043 = getelementptr inbounds float* %tmp3042, i64 1
+  %tmp3044 = getelementptr inbounds float* %tmp3043, i64 1
+  %tmp3045 = getelementptr inbounds float* %tmp3044, i64 1
+  %tmp3046 = getelementptr inbounds float* %tmp3045, i64 1
+  %tmp3047 = getelementptr inbounds float* %tmp3046, i64 1
+  %tmp3048 = getelementptr inbounds float* %tmp3047, i64 1
+  %tmp3049 = getelementptr inbounds float* %tmp3048, i64 1
+  %tmp3050 = getelementptr inbounds float* %tmp3049, i64 1
+  %tmp3051 = getelementptr inbounds float* %tmp3050, i64 1
+  %tmp3052 = getelementptr inbounds float* %tmp3051, i64 1
+  %tmp3053 = getelementptr inbounds float* %tmp3052, i64 1
+  %tmp3054 = getelementptr inbounds float* %tmp3053, i64 1
+  %tmp3055 = getelementptr inbounds float* %tmp3054, i64 1
+  %tmp3056 = getelementptr inbounds float* %tmp3055, i64 1
+  %tmp3057 = getelementptr inbounds float* %tmp3056, i64 1
+  %tmp3058 = getelementptr inbounds float* %tmp3057, i64 1
+  %tmp3059 = getelementptr inbounds float* %tmp3058, i64 1
+  %tmp3060 = getelementptr inbounds float* %tmp3059, i64 1
+  %tmp3061 = getelementptr inbounds float* %tmp3060, i64 1
+  %tmp3062 = getelementptr inbounds float* %tmp3061, i64 1
+  %tmp3063 = getelementptr inbounds float* %tmp3062, i64 1
+  %tmp3064 = getelementptr inbounds float* %tmp3063, i64 1
+  %tmp3065 = getelementptr inbounds float* %tmp3064, i64 1
+  %tmp3066 = getelementptr inbounds float* %tmp3065, i64 1
+  %tmp3067 = getelementptr inbounds float* %tmp3066, i64 1
+  %tmp3068 = getelementptr inbounds float* %tmp3067, i64 1
+  %tmp3069 = getelementptr inbounds float* %tmp3068, i64 1
+  %tmp3070 = getelementptr inbounds float* %tmp3069, i64 1
+  %tmp3071 = getelementptr inbounds float* %tmp3070, i64 1
+  %tmp3072 = getelementptr inbounds float* %tmp3071, i64 1
+  %tmp3073 = getelementptr inbounds float* %tmp3072, i64 1
+  %tmp3074 = getelementptr inbounds float* %tmp3073, i64 1
+  %tmp3075 = getelementptr inbounds float* %tmp3074, i64 1
+  %tmp3076 = getelementptr inbounds float* %tmp3075, i64 1
+  %tmp3077 = getelementptr inbounds float* %tmp3076, i64 1
+  %tmp3078 = getelementptr inbounds float* %tmp3077, i64 1
+  %tmp3079 = getelementptr inbounds float* %tmp3078, i64 1
+  %tmp3080 = getelementptr inbounds float* %tmp3079, i64 1
+  %tmp3081 = getelementptr inbounds float* %tmp3080, i64 1
+  %tmp3082 = getelementptr inbounds float* %tmp3081, i64 1
+  %tmp3083 = getelementptr inbounds float* %tmp3082, i64 1
+  %tmp3084 = getelementptr inbounds float* %tmp3083, i64 1
+  %tmp3085 = getelementptr inbounds float* %tmp3084, i64 1
+  %tmp3086 = getelementptr inbounds float* %tmp3085, i64 1
+  %tmp3087 = getelementptr inbounds float* %tmp3086, i64 1
+  %tmp3088 = getelementptr inbounds float* %tmp3087, i64 1
+  %tmp3089 = getelementptr inbounds float* %tmp3088, i64 1
+  %tmp3090 = getelementptr inbounds float* %tmp3089, i64 1
+  %tmp3091 = getelementptr inbounds float* %tmp3090, i64 1
+  %tmp3092 = getelementptr inbounds float* %tmp3091, i64 1
+  %tmp3093 = getelementptr inbounds float* %tmp3092, i64 1
+  %tmp3094 = getelementptr inbounds float* %tmp3093, i64 1
+  %tmp3095 = getelementptr inbounds float* %tmp3094, i64 1
+  %tmp3096 = getelementptr inbounds float* %tmp3095, i64 1
+  %tmp3097 = getelementptr inbounds float* %tmp3096, i64 1
+  %tmp3098 = getelementptr inbounds float* %tmp3097, i64 1
+  %tmp3099 = getelementptr inbounds float* %tmp3098, i64 1
+  %tmp3100 = getelementptr inbounds float* %tmp3099, i64 1
+  %tmp3101 = getelementptr inbounds float* %tmp3100, i64 1
+  %tmp3102 = getelementptr inbounds float* %tmp3101, i64 1
+  %tmp3103 = getelementptr inbounds float* %tmp3102, i64 1
+  %tmp3104 = getelementptr inbounds float* %tmp3103, i64 1
+  %tmp3105 = getelementptr inbounds float* %tmp3104, i64 1
+  %tmp3106 = getelementptr inbounds float* %tmp3105, i64 1
+  %tmp3107 = getelementptr inbounds float* %tmp3106, i64 1
+  %tmp3108 = getelementptr inbounds float* %tmp3107, i64 1
+  %tmp3109 = getelementptr inbounds float* %tmp3108, i64 1
+  %tmp3110 = getelementptr inbounds float* %tmp3109, i64 1
+  %tmp3111 = getelementptr inbounds float* %tmp3110, i64 1
+  %tmp3112 = getelementptr inbounds float* %tmp3111, i64 1
+  %tmp3113 = getelementptr inbounds float* %tmp3112, i64 1
+  %tmp3114 = getelementptr inbounds float* %tmp3113, i64 1
+  %tmp3115 = getelementptr inbounds float* %tmp3114, i64 1
+  %tmp3116 = getelementptr inbounds float* %tmp3115, i64 1
+  %tmp3117 = getelementptr inbounds float* %tmp3116, i64 1
+  %tmp3118 = getelementptr inbounds float* %tmp3117, i64 1
+  %tmp3119 = getelementptr inbounds float* %tmp3118, i64 1
+  %tmp3120 = getelementptr inbounds float* %tmp3119, i64 1
+  %tmp3121 = getelementptr inbounds float* %tmp3120, i64 1
+  %tmp3122 = getelementptr inbounds float* %tmp3121, i64 1
+  %tmp3123 = getelementptr inbounds float* %tmp3122, i64 1
+  %tmp3124 = getelementptr inbounds float* %tmp3123, i64 1
+  %tmp3125 = getelementptr inbounds float* %tmp3124, i64 1
+  %tmp3126 = getelementptr inbounds float* %tmp3125, i64 1
+  %tmp3127 = getelementptr inbounds float* %tmp3126, i64 1
+  %tmp3128 = getelementptr inbounds float* %tmp3127, i64 1
+  %tmp3129 = getelementptr inbounds float* %tmp3128, i64 1
+  %tmp3130 = getelementptr inbounds float* %tmp3129, i64 1
+  %tmp3131 = getelementptr inbounds float* %tmp3130, i64 1
+  %tmp3132 = getelementptr inbounds float* %tmp3131, i64 1
+  %tmp3133 = getelementptr inbounds float* %tmp3132, i64 1
+  %tmp3134 = getelementptr inbounds float* %tmp3133, i64 1
+  %tmp3135 = getelementptr inbounds float* %tmp3134, i64 1
+  %tmp3136 = getelementptr inbounds float* %tmp3135, i64 1
+  %tmp3137 = getelementptr inbounds float* %tmp3136, i64 1
+  %tmp3138 = getelementptr inbounds float* %tmp3137, i64 1
+  %tmp3139 = getelementptr inbounds float* %tmp3138, i64 1
+  %tmp3140 = getelementptr inbounds float* %tmp3139, i64 1
+  %tmp3141 = getelementptr inbounds float* %tmp3140, i64 1
+  %tmp3142 = getelementptr inbounds float* %tmp3141, i64 1
+  %tmp3143 = getelementptr inbounds float* %tmp3142, i64 1
+  %tmp3144 = getelementptr inbounds float* %tmp3143, i64 1
+  %tmp3145 = getelementptr inbounds float* %tmp3144, i64 1
+  %tmp3146 = getelementptr inbounds float* %tmp3145, i64 1
+  %tmp3147 = getelementptr inbounds float* %tmp3146, i64 1
+  %tmp3148 = getelementptr inbounds float* %tmp3147, i64 1
+  %tmp3149 = getelementptr inbounds float* %tmp3148, i64 1
+  %tmp3150 = getelementptr inbounds float* %tmp3149, i64 1
+  %tmp3151 = getelementptr inbounds float* %tmp3150, i64 1
+  %tmp3152 = getelementptr inbounds float* %tmp3151, i64 1
+  %tmp3153 = getelementptr inbounds float* %tmp3152, i64 1
+  %tmp3154 = getelementptr inbounds float* %tmp3153, i64 1
+  %tmp3155 = getelementptr inbounds float* %tmp3154, i64 1
+  %tmp3156 = getelementptr inbounds float* %tmp3155, i64 1
+  %tmp3157 = getelementptr inbounds float* %tmp3156, i64 1
+  %tmp3158 = getelementptr inbounds float* %tmp3157, i64 1
+  %tmp3159 = getelementptr inbounds float* %tmp3158, i64 1
+  %tmp3160 = getelementptr inbounds float* %tmp3159, i64 1
+  %tmp3161 = getelementptr inbounds float* %tmp3160, i64 1
+  %tmp3162 = getelementptr inbounds float* %tmp3161, i64 1
+  %tmp3163 = getelementptr inbounds float* %tmp3162, i64 1
+  %tmp3164 = getelementptr inbounds float* %tmp3163, i64 1
+  %tmp3165 = getelementptr inbounds float* %tmp3164, i64 1
+  %tmp3166 = getelementptr inbounds float* %tmp3165, i64 1
+  %tmp3167 = getelementptr inbounds float* %tmp3166, i64 1
+  %tmp3168 = getelementptr inbounds float* %tmp3167, i64 1
+  %tmp3169 = getelementptr inbounds float* %tmp3168, i64 1
+  %tmp3170 = getelementptr inbounds float* %tmp3169, i64 1
+  %tmp3171 = getelementptr inbounds float* %tmp3170, i64 1
+  %tmp3172 = getelementptr inbounds float* %tmp3171, i64 1
+  %tmp3173 = getelementptr inbounds float* %tmp3172, i64 1
+  %tmp3174 = getelementptr inbounds float* %tmp3173, i64 1
+  %tmp3175 = getelementptr inbounds float* %tmp3174, i64 1
+  %tmp3176 = getelementptr inbounds float* %tmp3175, i64 1
+  %tmp3177 = getelementptr inbounds float* %tmp3176, i64 1
+  %tmp3178 = getelementptr inbounds float* %tmp3177, i64 1
+  %tmp3179 = getelementptr inbounds float* %tmp3178, i64 1
+  %tmp3180 = getelementptr inbounds float* %tmp3179, i64 1
+  %tmp3181 = getelementptr inbounds float* %tmp3180, i64 1
+  %tmp3182 = getelementptr inbounds float* %tmp3181, i64 1
+  %tmp3183 = getelementptr inbounds float* %tmp3182, i64 1
+  %tmp3184 = getelementptr inbounds float* %tmp3183, i64 1
+  %tmp3185 = getelementptr inbounds float* %tmp3184, i64 1
+  %tmp3186 = getelementptr inbounds float* %tmp3185, i64 1
+  %tmp3187 = getelementptr inbounds float* %tmp3186, i64 1
+  %tmp3188 = getelementptr inbounds float* %tmp3187, i64 1
+  %tmp3189 = getelementptr inbounds float* %tmp3188, i64 1
+  %tmp3190 = getelementptr inbounds float* %tmp3189, i64 1
+  %tmp3191 = getelementptr inbounds float* %tmp3190, i64 1
+  %tmp3192 = getelementptr inbounds float* %tmp3191, i64 1
+  %tmp3193 = getelementptr inbounds float* %tmp3192, i64 1
+  %tmp3194 = getelementptr inbounds float* %tmp3193, i64 1
+  %tmp3195 = getelementptr inbounds float* %tmp3194, i64 1
+  %tmp3196 = getelementptr inbounds float* %tmp3195, i64 1
+  %tmp3197 = getelementptr inbounds float* %tmp3196, i64 1
+  %tmp3198 = getelementptr inbounds float* %tmp3197, i64 1
+  %tmp3199 = getelementptr inbounds float* %tmp3198, i64 1
+  %tmp3200 = getelementptr inbounds float* %tmp3199, i64 1
+  %tmp3201 = getelementptr inbounds float* %tmp3200, i64 1
+  %tmp3202 = getelementptr inbounds float* %tmp3201, i64 1
+  %tmp3203 = getelementptr inbounds float* %tmp3202, i64 1
+  %tmp3204 = getelementptr inbounds float* %tmp3203, i64 1
+  %tmp3205 = getelementptr inbounds float* %tmp3204, i64 1
+  %tmp3206 = getelementptr inbounds float* %tmp3205, i64 1
+  %tmp3207 = getelementptr inbounds float* %tmp3206, i64 1
+  %tmp3208 = getelementptr inbounds float* %tmp3207, i64 1
+  %tmp3209 = getelementptr inbounds float* %tmp3208, i64 1
+  %tmp3210 = getelementptr inbounds float* %tmp3209, i64 1
+  %tmp3211 = getelementptr inbounds float* %tmp3210, i64 1
+  %tmp3212 = getelementptr inbounds float* %tmp3211, i64 1
+  %tmp3213 = getelementptr inbounds float* %tmp3212, i64 1
+  %tmp3214 = getelementptr inbounds float* %tmp3213, i64 1
+  %tmp3215 = getelementptr inbounds float* %tmp3214, i64 1
+  %tmp3216 = getelementptr inbounds float* %tmp3215, i64 1
+  %tmp3217 = getelementptr inbounds float* %tmp3216, i64 1
+  %tmp3218 = getelementptr inbounds float* %tmp3217, i64 1
+  %tmp3219 = getelementptr inbounds float* %tmp3218, i64 1
+  %tmp3220 = getelementptr inbounds float* %tmp3219, i64 1
+  %tmp3221 = getelementptr inbounds float* %tmp3220, i64 1
+  %tmp3222 = getelementptr inbounds float* %tmp3221, i64 1
+  %tmp3223 = getelementptr inbounds float* %tmp3222, i64 1
+  %tmp3224 = getelementptr inbounds float* %tmp3223, i64 1
+  %tmp3225 = getelementptr inbounds float* %tmp3224, i64 1
+  %tmp3226 = getelementptr inbounds float* %tmp3225, i64 1
+  %tmp3227 = getelementptr inbounds float* %tmp3226, i64 1
+  %tmp3228 = getelementptr inbounds float* %tmp3227, i64 1
+  %tmp3229 = getelementptr inbounds float* %tmp3228, i64 1
+  %tmp3230 = getelementptr inbounds float* %tmp3229, i64 1
+  %tmp3231 = getelementptr inbounds float* %tmp3230, i64 1
+  %tmp3232 = getelementptr inbounds float* %tmp3231, i64 1
+  %tmp3233 = getelementptr inbounds float* %tmp3232, i64 1
+  %tmp3234 = getelementptr inbounds float* %tmp3233, i64 1
+  %tmp3235 = getelementptr inbounds float* %tmp3234, i64 1
+  %tmp3236 = getelementptr inbounds float* %tmp3235, i64 1
+  %tmp3237 = getelementptr inbounds float* %tmp3236, i64 1
+  %tmp3238 = getelementptr inbounds float* %tmp3237, i64 1
+  %tmp3239 = getelementptr inbounds float* %tmp3238, i64 1
+  %tmp3240 = getelementptr inbounds float* %tmp3239, i64 1
+  %tmp3241 = getelementptr inbounds float* %tmp3240, i64 1
+  %tmp3242 = getelementptr inbounds float* %tmp3241, i64 1
+  %tmp3243 = getelementptr inbounds float* %tmp3242, i64 1
+  %tmp3244 = getelementptr inbounds float* %tmp3243, i64 1
+  %tmp3245 = getelementptr inbounds float* %tmp3244, i64 1
+  %tmp3246 = getelementptr inbounds float* %tmp3245, i64 1
+  %tmp3247 = getelementptr inbounds float* %tmp3246, i64 1
+  %tmp3248 = getelementptr inbounds float* %tmp3247, i64 1
+  %tmp3249 = getelementptr inbounds float* %tmp3248, i64 1
+  %tmp3250 = getelementptr inbounds float* %tmp3249, i64 1
+  %tmp3251 = getelementptr inbounds float* %tmp3250, i64 1
+  %tmp3252 = getelementptr inbounds float* %tmp3251, i64 1
+  %tmp3253 = getelementptr inbounds float* %tmp3252, i64 1
+  %tmp3254 = getelementptr inbounds float* %tmp3253, i64 1
+  %tmp3255 = getelementptr inbounds float* %tmp3254, i64 1
+  %tmp3256 = getelementptr inbounds float* %tmp3255, i64 1
+  %tmp3257 = getelementptr inbounds float* %tmp3256, i64 1
+  %tmp3258 = getelementptr inbounds float* %tmp3257, i64 1
+  %tmp3259 = getelementptr inbounds float* %tmp3258, i64 1
+  %tmp3260 = getelementptr inbounds float* %tmp3259, i64 1
+  %tmp3261 = getelementptr inbounds float* %tmp3260, i64 1
+  %tmp3262 = getelementptr inbounds float* %tmp3261, i64 1
+  %tmp3263 = getelementptr inbounds float* %tmp3262, i64 1
+  %tmp3264 = getelementptr inbounds float* %tmp3263, i64 1
+  %tmp3265 = getelementptr inbounds float* %tmp3264, i64 1
+  %tmp3266 = getelementptr inbounds float* %tmp3265, i64 1
+  %tmp3267 = getelementptr inbounds float* %tmp3266, i64 1
+  %tmp3268 = getelementptr inbounds float* %tmp3267, i64 1
+  %tmp3269 = getelementptr inbounds float* %tmp3268, i64 1
+  %tmp3270 = getelementptr inbounds float* %tmp3269, i64 1
+  %tmp3271 = getelementptr inbounds float* %tmp3270, i64 1
+  %tmp3272 = getelementptr inbounds float* %tmp3271, i64 1
+  %tmp3273 = getelementptr inbounds float* %tmp3272, i64 1
+  %tmp3274 = getelementptr inbounds float* %tmp3273, i64 1
+  %tmp3275 = getelementptr inbounds float* %tmp3274, i64 1
+  %tmp3276 = getelementptr inbounds float* %tmp3275, i64 1
+  %tmp3277 = getelementptr inbounds float* %tmp3276, i64 1
+  %tmp3278 = getelementptr inbounds float* %tmp3277, i64 1
+  %tmp3279 = getelementptr inbounds float* %tmp3278, i64 1
+  %tmp3280 = getelementptr inbounds float* %tmp3279, i64 1
+  %tmp3281 = getelementptr inbounds float* %tmp3280, i64 1
+  %tmp3282 = getelementptr inbounds float* %tmp3281, i64 1
+  %tmp3283 = getelementptr inbounds float* %tmp3282, i64 1
+  %tmp3284 = getelementptr inbounds float* %tmp3283, i64 1
+  %tmp3285 = getelementptr inbounds float* %tmp3284, i64 1
+  %tmp3286 = getelementptr inbounds float* %tmp3285, i64 1
+  %tmp3287 = getelementptr inbounds float* %tmp3286, i64 1
+  %tmp3288 = getelementptr inbounds float* %tmp3287, i64 1
+  %tmp3289 = getelementptr inbounds float* %tmp3288, i64 1
+  %tmp3290 = getelementptr inbounds float* %tmp3289, i64 1
+  %tmp3291 = getelementptr inbounds float* %tmp3290, i64 1
+  %tmp3292 = getelementptr inbounds float* %tmp3291, i64 1
+  %tmp3293 = getelementptr inbounds float* %tmp3292, i64 1
+  %tmp3294 = getelementptr inbounds float* %tmp3293, i64 1
+  %tmp3295 = getelementptr inbounds float* %tmp3294, i64 1
+  %tmp3296 = getelementptr inbounds float* %tmp3295, i64 1
+  %tmp3297 = getelementptr inbounds float* %tmp3296, i64 1
+  %tmp3298 = getelementptr inbounds float* %tmp3297, i64 1
+  %tmp3299 = getelementptr inbounds float* %tmp3298, i64 1
+  %tmp3300 = getelementptr inbounds float* %tmp3299, i64 1
+  %tmp3301 = getelementptr inbounds float* %tmp3300, i64 1
+  %tmp3302 = getelementptr inbounds float* %tmp3301, i64 1
+  %tmp3303 = getelementptr inbounds float* %tmp3302, i64 1
+  %tmp3304 = getelementptr inbounds float* %tmp3303, i64 1
+  %tmp3305 = getelementptr inbounds float* %tmp3304, i64 1
+  %tmp3306 = getelementptr inbounds float* %tmp3305, i64 1
+  %tmp3307 = getelementptr inbounds float* %tmp3306, i64 1
+  %tmp3308 = getelementptr inbounds float* %tmp3307, i64 1
+  %tmp3309 = getelementptr inbounds float* %tmp3308, i64 1
+  %tmp3310 = getelementptr inbounds float* %tmp3309, i64 1
+  %tmp3311 = getelementptr inbounds float* %tmp3310, i64 1
+  %tmp3312 = getelementptr inbounds float* %tmp3311, i64 1
+  %tmp3313 = getelementptr inbounds float* %tmp3312, i64 1
+  %tmp3314 = getelementptr inbounds float* %tmp3313, i64 1
+  %tmp3315 = getelementptr inbounds float* %tmp3314, i64 1
+  %tmp3316 = getelementptr inbounds float* %tmp3315, i64 1
+  %tmp3317 = getelementptr inbounds float* %tmp3316, i64 1
+  %tmp3318 = getelementptr inbounds float* %tmp3317, i64 1
+  %tmp3319 = getelementptr inbounds float* %tmp3318, i64 1
+  %tmp3320 = getelementptr inbounds float* %tmp3319, i64 1
+  %tmp3321 = getelementptr inbounds float* %tmp3320, i64 1
+  %tmp3322 = getelementptr inbounds float* %tmp3321, i64 1
+  %tmp3323 = getelementptr inbounds float* %tmp3322, i64 1
+  %tmp3324 = getelementptr inbounds float* %tmp3323, i64 1
+  %tmp3325 = getelementptr inbounds float* %tmp3324, i64 1
+  %tmp3326 = getelementptr inbounds float* %tmp3325, i64 1
+  %tmp3327 = getelementptr inbounds float* %tmp3326, i64 1
+  %tmp3328 = getelementptr inbounds float* %tmp3327, i64 1
+  %tmp3329 = getelementptr inbounds float* %tmp3328, i64 1
+  %tmp3330 = getelementptr inbounds float* %tmp3329, i64 1
+  %tmp3331 = getelementptr inbounds float* %tmp3330, i64 1
+  %tmp3332 = getelementptr inbounds float* %tmp3331, i64 1
+  %tmp3333 = getelementptr inbounds float* %tmp3332, i64 1
+  %tmp3334 = getelementptr inbounds float* %tmp3333, i64 1
+  %tmp3335 = getelementptr inbounds float* %tmp3334, i64 1
+  %tmp3336 = getelementptr inbounds float* %tmp3335, i64 1
+  %tmp3337 = getelementptr inbounds float* %tmp3336, i64 1
+  %tmp3338 = getelementptr inbounds float* %tmp3337, i64 1
+  %tmp3339 = getelementptr inbounds float* %tmp3338, i64 1
+  %tmp3340 = getelementptr inbounds float* %tmp3339, i64 1
+  %tmp3341 = getelementptr inbounds float* %tmp3340, i64 1
+  %tmp3342 = getelementptr inbounds float* %tmp3341, i64 1
+  %tmp3343 = getelementptr inbounds float* %tmp3342, i64 1
+  %tmp3344 = getelementptr inbounds float* %tmp3343, i64 1
+  %tmp3345 = getelementptr inbounds float* %tmp3344, i64 1
+  %tmp3346 = getelementptr inbounds float* %tmp3345, i64 1
+  %tmp3347 = getelementptr inbounds float* %tmp3346, i64 1
+  %tmp3348 = getelementptr inbounds float* %tmp3347, i64 1
+  %tmp3349 = getelementptr inbounds float* %tmp3348, i64 1
+  %tmp3350 = getelementptr inbounds float* %tmp3349, i64 1
+  %tmp3351 = getelementptr inbounds float* %tmp3350, i64 1
+  %tmp3352 = getelementptr inbounds float* %tmp3351, i64 1
+  %tmp3353 = getelementptr inbounds float* %tmp3352, i64 1
+  %tmp3354 = getelementptr inbounds float* %tmp3353, i64 1
+  %tmp3355 = getelementptr inbounds float* %tmp3354, i64 1
+  %tmp3356 = getelementptr inbounds float* %tmp3355, i64 1
+  %tmp3357 = getelementptr inbounds float* %tmp3356, i64 1
+  %tmp3358 = getelementptr inbounds float* %tmp3357, i64 1
+  %tmp3359 = getelementptr inbounds float* %tmp3358, i64 1
+  %tmp3360 = getelementptr inbounds float* %tmp3359, i64 1
+  %tmp3361 = getelementptr inbounds float* %tmp3360, i64 1
+  %tmp3362 = getelementptr inbounds float* %tmp3361, i64 1
+  %tmp3363 = getelementptr inbounds float* %tmp3362, i64 1
+  %tmp3364 = getelementptr inbounds float* %tmp3363, i64 1
+  %tmp3365 = getelementptr inbounds float* %tmp3364, i64 1
+  %tmp3366 = getelementptr inbounds float* %tmp3365, i64 1
+  %tmp3367 = getelementptr inbounds float* %tmp3366, i64 1
+  %tmp3368 = getelementptr inbounds float* %tmp3367, i64 1
+  %tmp3369 = getelementptr inbounds float* %tmp3368, i64 1
+  %tmp3370 = getelementptr inbounds float* %tmp3369, i64 1
+  %tmp3371 = getelementptr inbounds float* %tmp3370, i64 1
+  %tmp3372 = getelementptr inbounds float* %tmp3371, i64 1
+  %tmp3373 = getelementptr inbounds float* %tmp3372, i64 1
+  %tmp3374 = getelementptr inbounds float* %tmp3373, i64 1
+  %tmp3375 = getelementptr inbounds float* %tmp3374, i64 1
+  %tmp3376 = getelementptr inbounds float* %tmp3375, i64 1
+  %tmp3377 = getelementptr inbounds float* %tmp3376, i64 1
+  %tmp3378 = getelementptr inbounds float* %tmp3377, i64 1
+  %tmp3379 = getelementptr inbounds float* %tmp3378, i64 1
+  %tmp3380 = getelementptr inbounds float* %tmp3379, i64 1
+  %tmp3381 = getelementptr inbounds float* %tmp3380, i64 1
+  %tmp3382 = getelementptr inbounds float* %tmp3381, i64 1
+  %tmp3383 = getelementptr inbounds float* %tmp3382, i64 1
+  %tmp3384 = getelementptr inbounds float* %tmp3383, i64 1
+  %tmp3385 = getelementptr inbounds float* %tmp3384, i64 1
+  %tmp3386 = getelementptr inbounds float* %tmp3385, i64 1
+  %tmp3387 = getelementptr inbounds float* %tmp3386, i64 1
+  %tmp3388 = getelementptr inbounds float* %tmp3387, i64 1
+  %tmp3389 = getelementptr inbounds float* %tmp3388, i64 1
+  %tmp3390 = getelementptr inbounds float* %tmp3389, i64 1
+  %tmp3391 = getelementptr inbounds float* %tmp3390, i64 1
+  %tmp3392 = getelementptr inbounds float* %tmp3391, i64 1
+  %tmp3393 = getelementptr inbounds float* %tmp3392, i64 1
+  %tmp3394 = getelementptr inbounds float* %tmp3393, i64 1
+  %tmp3395 = getelementptr inbounds float* %tmp3394, i64 1
+  %tmp3396 = getelementptr inbounds float* %tmp3395, i64 1
+  %tmp3397 = getelementptr inbounds float* %tmp3396, i64 1
+  %tmp3398 = getelementptr inbounds float* %tmp3397, i64 1
+  %tmp3399 = getelementptr inbounds float* %tmp3398, i64 1
+  %tmp3400 = getelementptr inbounds float* %tmp3399, i64 1
+  %tmp3401 = getelementptr inbounds float* %tmp3400, i64 1
+  %tmp3402 = getelementptr inbounds float* %tmp3401, i64 1
+  %tmp3403 = getelementptr inbounds float* %tmp3402, i64 1
+  %tmp3404 = getelementptr inbounds float* %tmp3403, i64 1
+  %tmp3405 = getelementptr inbounds float* %tmp3404, i64 1
+  %tmp3406 = getelementptr inbounds float* %tmp3405, i64 1
+  %tmp3407 = getelementptr inbounds float* %tmp3406, i64 1
+  %tmp3408 = getelementptr inbounds float* %tmp3407, i64 1
+  %tmp3409 = getelementptr inbounds float* %tmp3408, i64 1
+  %tmp3410 = getelementptr inbounds float* %tmp3409, i64 1
+  %tmp3411 = getelementptr inbounds float* %tmp3410, i64 1
+  %tmp3412 = getelementptr inbounds float* %tmp3411, i64 1
+  %tmp3413 = getelementptr inbounds float* %tmp3412, i64 1
+  %tmp3414 = getelementptr inbounds float* %tmp3413, i64 1
+  %tmp3415 = getelementptr inbounds float* %tmp3414, i64 1
+  %tmp3416 = getelementptr inbounds float* %tmp3415, i64 1
+  %tmp3417 = getelementptr inbounds float* %tmp3416, i64 1
+  %tmp3418 = getelementptr inbounds float* %tmp3417, i64 1
+  %tmp3419 = getelementptr inbounds float* %tmp3418, i64 1
+  %tmp3420 = getelementptr inbounds float* %tmp3419, i64 1
+  %tmp3421 = getelementptr inbounds float* %tmp3420, i64 1
+  %tmp3422 = getelementptr inbounds float* %tmp3421, i64 1
+  %tmp3423 = getelementptr inbounds float* %tmp3422, i64 1
+  %tmp3424 = getelementptr inbounds float* %tmp3423, i64 1
+  %tmp3425 = getelementptr inbounds float* %tmp3424, i64 1
+  %tmp3426 = getelementptr inbounds float* %tmp3425, i64 1
+  %tmp3427 = getelementptr inbounds float* %tmp3426, i64 1
+  %tmp3428 = getelementptr inbounds float* %tmp3427, i64 1
+  %tmp3429 = getelementptr inbounds float* %tmp3428, i64 1
+  %tmp3430 = getelementptr inbounds float* %tmp3429, i64 1
+  %tmp3431 = getelementptr inbounds float* %tmp3430, i64 1
+  %tmp3432 = getelementptr inbounds float* %tmp3431, i64 1
+  %tmp3433 = getelementptr inbounds float* %tmp3432, i64 1
+  %tmp3434 = getelementptr inbounds float* %tmp3433, i64 1
+  %tmp3435 = getelementptr inbounds float* %tmp3434, i64 1
+  %tmp3436 = getelementptr inbounds float* %tmp3435, i64 1
+  %tmp3437 = getelementptr inbounds float* %tmp3436, i64 1
+  %tmp3438 = getelementptr inbounds float* %tmp3437, i64 1
+  %tmp3439 = getelementptr inbounds float* %tmp3438, i64 1
+  %tmp3440 = getelementptr inbounds float* %tmp3439, i64 1
+  %tmp3441 = getelementptr inbounds float* %tmp3440, i64 1
+  %tmp3442 = getelementptr inbounds float* %tmp3441, i64 1
+  %tmp3443 = getelementptr inbounds float* %tmp3442, i64 1
+  %tmp3444 = getelementptr inbounds float* %tmp3443, i64 1
+  %tmp3445 = getelementptr inbounds float* %tmp3444, i64 1
+  %tmp3446 = getelementptr inbounds float* %tmp3445, i64 1
+  %tmp3447 = getelementptr inbounds float* %tmp3446, i64 1
+  %tmp3448 = getelementptr inbounds float* %tmp3447, i64 1
+  %tmp3449 = getelementptr inbounds float* %tmp3448, i64 1
+  %tmp3450 = getelementptr inbounds float* %tmp3449, i64 1
+  %tmp3451 = getelementptr inbounds float* %tmp3450, i64 1
+  %tmp3452 = getelementptr inbounds float* %tmp3451, i64 1
+  %tmp3453 = getelementptr inbounds float* %tmp3452, i64 1
+  %tmp3454 = getelementptr inbounds float* %tmp3453, i64 1
+  %tmp3455 = getelementptr inbounds float* %tmp3454, i64 1
+  %tmp3456 = getelementptr inbounds float* %tmp3455, i64 1
+  %tmp3457 = getelementptr inbounds float* %tmp3456, i64 1
+  %tmp3458 = getelementptr inbounds float* %tmp3457, i64 1
+  %tmp3459 = getelementptr inbounds float* %tmp3458, i64 1
+  %tmp3460 = getelementptr inbounds float* %tmp3459, i64 1
+  %tmp3461 = getelementptr inbounds float* %tmp3460, i64 1
+  %tmp3462 = getelementptr inbounds float* %tmp3461, i64 1
+  %tmp3463 = getelementptr inbounds float* %tmp3462, i64 1
+  %tmp3464 = getelementptr inbounds float* %tmp3463, i64 1
+  %tmp3465 = getelementptr inbounds float* %tmp3464, i64 1
+  %tmp3466 = getelementptr inbounds float* %tmp3465, i64 1
+  %tmp3467 = getelementptr inbounds float* %tmp3466, i64 1
+  %tmp3468 = getelementptr inbounds float* %tmp3467, i64 1
+  %tmp3469 = getelementptr inbounds float* %tmp3468, i64 1
+  %tmp3470 = getelementptr inbounds float* %tmp3469, i64 1
+  %tmp3471 = getelementptr inbounds float* %tmp3470, i64 1
+  %tmp3472 = getelementptr inbounds float* %tmp3471, i64 1
+  %tmp3473 = getelementptr inbounds float* %tmp3472, i64 1
+  %tmp3474 = getelementptr inbounds float* %tmp3473, i64 1
+  %tmp3475 = getelementptr inbounds float* %tmp3474, i64 1
+  %tmp3476 = getelementptr inbounds float* %tmp3475, i64 1
+  %tmp3477 = getelementptr inbounds float* %tmp3476, i64 1
+  %tmp3478 = getelementptr inbounds float* %tmp3477, i64 1
+  %tmp3479 = getelementptr inbounds float* %tmp3478, i64 1
+  %tmp3480 = getelementptr inbounds float* %tmp3479, i64 1
+  %tmp3481 = getelementptr inbounds float* %tmp3480, i64 1
+  %tmp3482 = getelementptr inbounds float* %tmp3481, i64 1
+  %tmp3483 = getelementptr inbounds float* %tmp3482, i64 1
+  %tmp3484 = getelementptr inbounds float* %tmp3483, i64 1
+  %tmp3485 = getelementptr inbounds float* %tmp3484, i64 1
+  %tmp3486 = getelementptr inbounds float* %tmp3485, i64 1
+  %tmp3487 = getelementptr inbounds float* %tmp3486, i64 1
+  %tmp3488 = getelementptr inbounds float* %tmp3487, i64 1
+  %tmp3489 = getelementptr inbounds float* %tmp3488, i64 1
+  %tmp3490 = getelementptr inbounds float* %tmp3489, i64 1
+  %tmp3491 = getelementptr inbounds float* %tmp3490, i64 1
+  %tmp3492 = getelementptr inbounds float* %tmp3491, i64 1
+  %tmp3493 = getelementptr inbounds float* %tmp3492, i64 1
+  %tmp3494 = getelementptr inbounds float* %tmp3493, i64 1
+  %tmp3495 = getelementptr inbounds float* %tmp3494, i64 1
+  %tmp3496 = getelementptr inbounds float* %tmp3495, i64 1
+  %tmp3497 = getelementptr inbounds float* %tmp3496, i64 1
+  %tmp3498 = getelementptr inbounds float* %tmp3497, i64 1
+  %tmp3499 = getelementptr inbounds float* %tmp3498, i64 1
+  %tmp3500 = getelementptr inbounds float* %tmp3499, i64 1
+  %tmp3501 = getelementptr inbounds float* %tmp3500, i64 1
+  %tmp3502 = getelementptr inbounds float* %tmp3501, i64 1
+  %tmp3503 = getelementptr inbounds float* %tmp3502, i64 1
+  %tmp3504 = getelementptr inbounds float* %tmp3503, i64 1
+  %tmp3505 = getelementptr inbounds float* %tmp3504, i64 1
+  %tmp3506 = getelementptr inbounds float* %tmp3505, i64 1
+  %tmp3507 = getelementptr inbounds float* %tmp3506, i64 1
+  %tmp3508 = getelementptr inbounds float* %tmp3507, i64 1
+  %tmp3509 = getelementptr inbounds float* %tmp3508, i64 1
+  %tmp3510 = getelementptr inbounds float* %tmp3509, i64 1
+  %tmp3511 = getelementptr inbounds float* %tmp3510, i64 1
+  %tmp3512 = getelementptr inbounds float* %tmp3511, i64 1
+  %tmp3513 = getelementptr inbounds float* %tmp3512, i64 1
+  %tmp3514 = getelementptr inbounds float* %tmp3513, i64 1
+  %tmp3515 = getelementptr inbounds float* %tmp3514, i64 1
+  %tmp3516 = getelementptr inbounds float* %tmp3515, i64 1
+  %tmp3517 = getelementptr inbounds float* %tmp3516, i64 1
+  %tmp3518 = getelementptr inbounds float* %tmp3517, i64 1
+  %tmp3519 = getelementptr inbounds float* %tmp3518, i64 1
+  %tmp3520 = getelementptr inbounds float* %tmp3519, i64 1
+  %tmp3521 = getelementptr inbounds float* %tmp3520, i64 1
+  %tmp3522 = getelementptr inbounds float* %tmp3521, i64 1
+  %tmp3523 = getelementptr inbounds float* %tmp3522, i64 1
+  %tmp3524 = getelementptr inbounds float* %tmp3523, i64 1
+  %tmp3525 = getelementptr inbounds float* %tmp3524, i64 1
+  %tmp3526 = getelementptr inbounds float* %tmp3525, i64 1
+  %tmp3527 = getelementptr inbounds float* %tmp3526, i64 1
+  %tmp3528 = getelementptr inbounds float* %tmp3527, i64 1
+  %tmp3529 = getelementptr inbounds float* %tmp3528, i64 1
+  %tmp3530 = getelementptr inbounds float* %tmp3529, i64 1
+  %tmp3531 = getelementptr inbounds float* %tmp3530, i64 1
+  %tmp3532 = getelementptr inbounds float* %tmp3531, i64 1
+  %tmp3533 = getelementptr inbounds float* %tmp3532, i64 1
+  %tmp3534 = getelementptr inbounds float* %tmp3533, i64 1
+  %tmp3535 = getelementptr inbounds float* %tmp3534, i64 1
+  %tmp3536 = getelementptr inbounds float* %tmp3535, i64 1
+  %tmp3537 = getelementptr inbounds float* %tmp3536, i64 1
+  %tmp3538 = getelementptr inbounds float* %tmp3537, i64 1
+  %tmp3539 = getelementptr inbounds float* %tmp3538, i64 1
+  %tmp3540 = getelementptr inbounds float* %tmp3539, i64 1
+  %tmp3541 = getelementptr inbounds float* %tmp3540, i64 1
+  %tmp3542 = getelementptr inbounds float* %tmp3541, i64 1
+  %tmp3543 = getelementptr inbounds float* %tmp3542, i64 1
+  %tmp3544 = getelementptr inbounds float* %tmp3543, i64 1
+  %tmp3545 = getelementptr inbounds float* %tmp3544, i64 1
+  %tmp3546 = getelementptr inbounds float* %tmp3545, i64 1
+  %tmp3547 = getelementptr inbounds float* %tmp3546, i64 1
+  %tmp3548 = getelementptr inbounds float* %tmp3547, i64 1
+  %tmp3549 = getelementptr inbounds float* %tmp3548, i64 1
+  %tmp3550 = getelementptr inbounds float* %tmp3549, i64 1
+  %tmp3551 = getelementptr inbounds float* %tmp3550, i64 1
+  %tmp3552 = getelementptr inbounds float* %tmp3551, i64 1
+  %tmp3553 = getelementptr inbounds float* %tmp3552, i64 1
+  %tmp3554 = getelementptr inbounds float* %tmp3553, i64 1
+  %tmp3555 = getelementptr inbounds float* %tmp3554, i64 1
+  %tmp3556 = getelementptr inbounds float* %tmp3555, i64 1
+  %tmp3557 = getelementptr inbounds float* %tmp3556, i64 1
+  %tmp3558 = getelementptr inbounds float* %tmp3557, i64 1
+  %tmp3559 = getelementptr inbounds float* %tmp3558, i64 1
+  %tmp3560 = getelementptr inbounds float* %tmp3559, i64 1
+  %tmp3561 = getelementptr inbounds float* %tmp3560, i64 1
+  %tmp3562 = getelementptr inbounds float* %tmp3561, i64 1
+  %tmp3563 = getelementptr inbounds float* %tmp3562, i64 1
+  %tmp3564 = getelementptr inbounds float* %tmp3563, i64 1
+  %tmp3565 = getelementptr inbounds float* %tmp3564, i64 1
+  %tmp3566 = getelementptr inbounds float* %tmp3565, i64 1
+  %tmp3567 = getelementptr inbounds float* %tmp3566, i64 1
+  %tmp3568 = getelementptr inbounds float* %tmp3567, i64 1
+  %tmp3569 = getelementptr inbounds float* %tmp3568, i64 1
+  %tmp3570 = getelementptr inbounds float* %tmp3569, i64 1
+  %tmp3571 = getelementptr inbounds float* %tmp3570, i64 1
+  %tmp3572 = getelementptr inbounds float* %tmp3571, i64 1
+  %tmp3573 = getelementptr inbounds float* %tmp3572, i64 1
+  %tmp3574 = getelementptr inbounds float* %tmp3573, i64 1
+  %tmp3575 = getelementptr inbounds float* %tmp3574, i64 1
+  %tmp3576 = getelementptr inbounds float* %tmp3575, i64 1
+  %tmp3577 = getelementptr inbounds float* %tmp3576, i64 1
+  %tmp3578 = getelementptr inbounds float* %tmp3577, i64 1
+  %tmp3579 = getelementptr inbounds float* %tmp3578, i64 1
+  %tmp3580 = getelementptr inbounds float* %tmp3579, i64 1
+  %tmp3581 = getelementptr inbounds float* %tmp3580, i64 1
+  %tmp3582 = getelementptr inbounds float* %tmp3581, i64 1
+  %tmp3583 = getelementptr inbounds float* %tmp3582, i64 1
+  %tmp3584 = getelementptr inbounds float* %tmp3583, i64 1
+  %tmp3585 = getelementptr inbounds float* %tmp3584, i64 1
+  %tmp3586 = getelementptr inbounds float* %tmp3585, i64 1
+  %tmp3587 = getelementptr inbounds float* %tmp3586, i64 1
+  %tmp3588 = getelementptr inbounds float* %tmp3587, i64 1
+  %tmp3589 = getelementptr inbounds float* %tmp3588, i64 1
+  %tmp3590 = getelementptr inbounds float* %tmp3589, i64 1
+  %tmp3591 = getelementptr inbounds float* %tmp3590, i64 1
+  %tmp3592 = getelementptr inbounds float* %tmp3591, i64 1
+  %tmp3593 = getelementptr inbounds float* %tmp3592, i64 1
+  %tmp3594 = getelementptr inbounds float* %tmp3593, i64 1
+  %tmp3595 = getelementptr inbounds float* %tmp3594, i64 1
+  %tmp3596 = getelementptr inbounds float* %tmp3595, i64 1
+  %tmp3597 = getelementptr inbounds float* %tmp3596, i64 1
+  %tmp3598 = getelementptr inbounds float* %tmp3597, i64 1
+  %tmp3599 = getelementptr inbounds float* %tmp3598, i64 1
+  %tmp3600 = getelementptr inbounds float* %tmp3599, i64 1
+  %tmp3601 = getelementptr inbounds float* %tmp3600, i64 1
+  %tmp3602 = getelementptr inbounds float* %tmp3601, i64 1
+  %tmp3603 = getelementptr inbounds float* %tmp3602, i64 1
+  %tmp3604 = getelementptr inbounds float* %tmp3603, i64 1
+  %tmp3605 = getelementptr inbounds float* %tmp3604, i64 1
+  %tmp3606 = getelementptr inbounds float* %tmp3605, i64 1
+  %tmp3607 = getelementptr inbounds float* %tmp3606, i64 1
+  %tmp3608 = getelementptr inbounds float* %tmp3607, i64 1
+  %tmp3609 = getelementptr inbounds float* %tmp3608, i64 1
+  %tmp3610 = getelementptr inbounds float* %tmp3609, i64 1
+  %tmp3611 = getelementptr inbounds float* %tmp3610, i64 1
+  %tmp3612 = getelementptr inbounds float* %tmp3611, i64 1
+  %tmp3613 = getelementptr inbounds float* %tmp3612, i64 1
+  %tmp3614 = getelementptr inbounds float* %tmp3613, i64 1
+  %tmp3615 = getelementptr inbounds float* %tmp3614, i64 1
+  %tmp3616 = getelementptr inbounds float* %tmp3615, i64 1
+  %tmp3617 = getelementptr inbounds float* %tmp3616, i64 1
+  %tmp3618 = getelementptr inbounds float* %tmp3617, i64 1
+  %tmp3619 = getelementptr inbounds float* %tmp3618, i64 1
+  %tmp3620 = getelementptr inbounds float* %tmp3619, i64 1
+  %tmp3621 = getelementptr inbounds float* %tmp3620, i64 1
+  %tmp3622 = getelementptr inbounds float* %tmp3621, i64 1
+  %tmp3623 = getelementptr inbounds float* %tmp3622, i64 1
+  %tmp3624 = getelementptr inbounds float* %tmp3623, i64 1
+  %tmp3625 = getelementptr inbounds float* %tmp3624, i64 1
+  %tmp3626 = getelementptr inbounds float* %tmp3625, i64 1
+  %tmp3627 = getelementptr inbounds float* %tmp3626, i64 1
+  %tmp3628 = getelementptr inbounds float* %tmp3627, i64 1
+  %tmp3629 = getelementptr inbounds float* %tmp3628, i64 1
+  %tmp3630 = getelementptr inbounds float* %tmp3629, i64 1
+  %tmp3631 = getelementptr inbounds float* %tmp3630, i64 1
+  %tmp3632 = getelementptr inbounds float* %tmp3631, i64 1
+  %tmp3633 = getelementptr inbounds float* %tmp3632, i64 1
+  %tmp3634 = getelementptr inbounds float* %tmp3633, i64 1
+  %tmp3635 = getelementptr inbounds float* %tmp3634, i64 1
+  %tmp3636 = getelementptr inbounds float* %tmp3635, i64 1
+  %tmp3637 = getelementptr inbounds float* %tmp3636, i64 1
+  %tmp3638 = getelementptr inbounds float* %tmp3637, i64 1
+  %tmp3639 = getelementptr inbounds float* %tmp3638, i64 1
+  %tmp3640 = getelementptr inbounds float* %tmp3639, i64 1
+  %tmp3641 = getelementptr inbounds float* %tmp3640, i64 1
+  %tmp3642 = getelementptr inbounds float* %tmp3641, i64 1
+  %tmp3643 = getelementptr inbounds float* %tmp3642, i64 1
+  %tmp3644 = getelementptr inbounds float* %tmp3643, i64 1
+  %tmp3645 = getelementptr inbounds float* %tmp3644, i64 1
+  %tmp3646 = getelementptr inbounds float* %tmp3645, i64 1
+  %tmp3647 = getelementptr inbounds float* %tmp3646, i64 1
+  %tmp3648 = getelementptr inbounds float* %tmp3647, i64 1
+  %tmp3649 = getelementptr inbounds float* %tmp3648, i64 1
+  %tmp3650 = getelementptr inbounds float* %tmp3649, i64 1
+  %tmp3651 = getelementptr inbounds float* %tmp3650, i64 1
+  %tmp3652 = getelementptr inbounds float* %tmp3651, i64 1
+  %tmp3653 = getelementptr inbounds float* %tmp3652, i64 1
+  %tmp3654 = getelementptr inbounds float* %tmp3653, i64 1
+  %tmp3655 = getelementptr inbounds float* %tmp3654, i64 1
+  %tmp3656 = getelementptr inbounds float* %tmp3655, i64 1
+  %tmp3657 = getelementptr inbounds float* %tmp3656, i64 1
+  %tmp3658 = getelementptr inbounds float* %tmp3657, i64 1
+  %tmp3659 = getelementptr inbounds float* %tmp3658, i64 1
+  %tmp3660 = getelementptr inbounds float* %tmp3659, i64 1
+  %tmp3661 = getelementptr inbounds float* %tmp3660, i64 1
+  %tmp3662 = getelementptr inbounds float* %tmp3661, i64 1
+  %tmp3663 = getelementptr inbounds float* %tmp3662, i64 1
+  %tmp3664 = getelementptr inbounds float* %tmp3663, i64 1
+  %tmp3665 = getelementptr inbounds float* %tmp3664, i64 1
+  %tmp3666 = getelementptr inbounds float* %tmp3665, i64 1
+  %tmp3667 = getelementptr inbounds float* %tmp3666, i64 1
+  %tmp3668 = getelementptr inbounds float* %tmp3667, i64 1
+  %tmp3669 = getelementptr inbounds float* %tmp3668, i64 1
+  %tmp3670 = getelementptr inbounds float* %tmp3669, i64 1
+  %tmp3671 = getelementptr inbounds float* %tmp3670, i64 1
+  %tmp3672 = getelementptr inbounds float* %tmp3671, i64 1
+  %tmp3673 = getelementptr inbounds float* %tmp3672, i64 1
+  %tmp3674 = getelementptr inbounds float* %tmp3673, i64 1
+  %tmp3675 = getelementptr inbounds float* %tmp3674, i64 1
+  %tmp3676 = getelementptr inbounds float* %tmp3675, i64 1
+  %tmp3677 = getelementptr inbounds float* %tmp3676, i64 1
+  %tmp3678 = getelementptr inbounds float* %tmp3677, i64 1
+  %tmp3679 = getelementptr inbounds float* %tmp3678, i64 1
+  %tmp3680 = getelementptr inbounds float* %tmp3679, i64 1
+  %tmp3681 = getelementptr inbounds float* %tmp3680, i64 1
+  %tmp3682 = getelementptr inbounds float* %tmp3681, i64 1
+  %tmp3683 = getelementptr inbounds float* %tmp3682, i64 1
+  %tmp3684 = getelementptr inbounds float* %tmp3683, i64 1
+  %tmp3685 = getelementptr inbounds float* %tmp3684, i64 1
+  %tmp3686 = getelementptr inbounds float* %tmp3685, i64 1
+  %tmp3687 = getelementptr inbounds float* %tmp3686, i64 1
+  %tmp3688 = getelementptr inbounds float* %tmp3687, i64 1
+  %tmp3689 = getelementptr inbounds float* %tmp3688, i64 1
+  %tmp3690 = getelementptr inbounds float* %tmp3689, i64 1
+  %tmp3691 = getelementptr inbounds float* %tmp3690, i64 1
+  %tmp3692 = getelementptr inbounds float* %tmp3691, i64 1
+  %tmp3693 = getelementptr inbounds float* %tmp3692, i64 1
+  %tmp3694 = getelementptr inbounds float* %tmp3693, i64 1
+  %tmp3695 = getelementptr inbounds float* %tmp3694, i64 1
+  %tmp3696 = getelementptr inbounds float* %tmp3695, i64 1
+  %tmp3697 = getelementptr inbounds float* %tmp3696, i64 1
+  %tmp3698 = getelementptr inbounds float* %tmp3697, i64 1
+  %tmp3699 = getelementptr inbounds float* %tmp3698, i64 1
+  %tmp3700 = getelementptr inbounds float* %tmp3699, i64 1
+  %tmp3701 = getelementptr inbounds float* %tmp3700, i64 1
+  %tmp3702 = getelementptr inbounds float* %tmp3701, i64 1
+  %tmp3703 = getelementptr inbounds float* %tmp3702, i64 1
+  %tmp3704 = getelementptr inbounds float* %tmp3703, i64 1
+  %tmp3705 = getelementptr inbounds float* %tmp3704, i64 1
+  %tmp3706 = getelementptr inbounds float* %tmp3705, i64 1
+  %tmp3707 = getelementptr inbounds float* %tmp3706, i64 1
+  %tmp3708 = getelementptr inbounds float* %tmp3707, i64 1
+  %tmp3709 = getelementptr inbounds float* %tmp3708, i64 1
+  %tmp3710 = getelementptr inbounds float* %tmp3709, i64 1
+  %tmp3711 = getelementptr inbounds float* %tmp3710, i64 1
+  %tmp3712 = getelementptr inbounds float* %tmp3711, i64 1
+  %tmp3713 = getelementptr inbounds float* %tmp3712, i64 1
+  %tmp3714 = getelementptr inbounds float* %tmp3713, i64 1
+  %tmp3715 = getelementptr inbounds float* %tmp3714, i64 1
+  %tmp3716 = getelementptr inbounds float* %tmp3715, i64 1
+  %tmp3717 = getelementptr inbounds float* %tmp3716, i64 1
+  %tmp3718 = getelementptr inbounds float* %tmp3717, i64 1
+  %tmp3719 = getelementptr inbounds float* %tmp3718, i64 1
+  %tmp3720 = getelementptr inbounds float* %tmp3719, i64 1
+  %tmp3721 = getelementptr inbounds float* %tmp3720, i64 1
+  %tmp3722 = getelementptr inbounds float* %tmp3721, i64 1
+  %tmp3723 = getelementptr inbounds float* %tmp3722, i64 1
+  %tmp3724 = getelementptr inbounds float* %tmp3723, i64 1
+  %tmp3725 = getelementptr inbounds float* %tmp3724, i64 1
+  %tmp3726 = getelementptr inbounds float* %tmp3725, i64 1
+  %tmp3727 = getelementptr inbounds float* %tmp3726, i64 1
+  %tmp3728 = getelementptr inbounds float* %tmp3727, i64 1
+  %tmp3729 = getelementptr inbounds float* %tmp3728, i64 1
+  %tmp3730 = getelementptr inbounds float* %tmp3729, i64 1
+  %tmp3731 = getelementptr inbounds float* %tmp3730, i64 1
+  %tmp3732 = getelementptr inbounds float* %tmp3731, i64 1
+  %tmp3733 = getelementptr inbounds float* %tmp3732, i64 1
+  %tmp3734 = getelementptr inbounds float* %tmp3733, i64 1
+  %tmp3735 = getelementptr inbounds float* %tmp3734, i64 1
+  %tmp3736 = getelementptr inbounds float* %tmp3735, i64 1
+  %tmp3737 = getelementptr inbounds float* %tmp3736, i64 1
+  %tmp3738 = getelementptr inbounds float* %tmp3737, i64 1
+  %tmp3739 = getelementptr inbounds float* %tmp3738, i64 1
+  %tmp3740 = getelementptr inbounds float* %tmp3739, i64 1
+  %tmp3741 = getelementptr inbounds float* %tmp3740, i64 1
+  %tmp3742 = getelementptr inbounds float* %tmp3741, i64 1
+  %tmp3743 = getelementptr inbounds float* %tmp3742, i64 1
+  %tmp3744 = getelementptr inbounds float* %tmp3743, i64 1
+  %tmp3745 = getelementptr inbounds float* %tmp3744, i64 1
+  %tmp3746 = getelementptr inbounds float* %tmp3745, i64 1
+  %tmp3747 = getelementptr inbounds float* %tmp3746, i64 1
+  %tmp3748 = getelementptr inbounds float* %tmp3747, i64 1
+  %tmp3749 = getelementptr inbounds float* %tmp3748, i64 1
+  %tmp3750 = getelementptr inbounds float* %tmp3749, i64 1
+  %tmp3751 = getelementptr inbounds float* %tmp3750, i64 1
+  %tmp3752 = getelementptr inbounds float* %tmp3751, i64 1
+  %tmp3753 = getelementptr inbounds float* %tmp3752, i64 1
+  %tmp3754 = getelementptr inbounds float* %tmp3753, i64 1
+  %tmp3755 = getelementptr inbounds float* %tmp3754, i64 1
+  %tmp3756 = getelementptr inbounds float* %tmp3755, i64 1
+  %tmp3757 = getelementptr inbounds float* %tmp3756, i64 1
+  %tmp3758 = getelementptr inbounds float* %tmp3757, i64 1
+  %tmp3759 = getelementptr inbounds float* %tmp3758, i64 1
+  %tmp3760 = getelementptr inbounds float* %tmp3759, i64 1
+  %tmp3761 = getelementptr inbounds float* %tmp3760, i64 1
+  %tmp3762 = getelementptr inbounds float* %tmp3761, i64 1
+  %tmp3763 = getelementptr inbounds float* %tmp3762, i64 1
+  %tmp3764 = getelementptr inbounds float* %tmp3763, i64 1
+  %tmp3765 = getelementptr inbounds float* %tmp3764, i64 1
+  %tmp3766 = getelementptr inbounds float* %tmp3765, i64 1
+  %tmp3767 = getelementptr inbounds float* %tmp3766, i64 1
+  %tmp3768 = getelementptr inbounds float* %tmp3767, i64 1
+  %tmp3769 = getelementptr inbounds float* %tmp3768, i64 1
+  %tmp3770 = getelementptr inbounds float* %tmp3769, i64 1
+  %tmp3771 = getelementptr inbounds float* %tmp3770, i64 1
+  %tmp3772 = getelementptr inbounds float* %tmp3771, i64 1
+  %tmp3773 = getelementptr inbounds float* %tmp3772, i64 1
+  %tmp3774 = getelementptr inbounds float* %tmp3773, i64 1
+  %tmp3775 = getelementptr inbounds float* %tmp3774, i64 1
+  %tmp3776 = getelementptr inbounds float* %tmp3775, i64 1
+  %tmp3777 = getelementptr inbounds float* %tmp3776, i64 1
+  %tmp3778 = getelementptr inbounds float* %tmp3777, i64 1
+  %tmp3779 = getelementptr inbounds float* %tmp3778, i64 1
+  %tmp3780 = getelementptr inbounds float* %tmp3779, i64 1
+  %tmp3781 = getelementptr inbounds float* %tmp3780, i64 1
+  %tmp3782 = getelementptr inbounds float* %tmp3781, i64 1
+  %tmp3783 = getelementptr inbounds float* %tmp3782, i64 1
+  %tmp3784 = getelementptr inbounds float* %tmp3783, i64 1
+  %tmp3785 = getelementptr inbounds float* %tmp3784, i64 1
+  %tmp3786 = getelementptr inbounds float* %tmp3785, i64 1
+  %tmp3787 = getelementptr inbounds float* %tmp3786, i64 1
+  %tmp3788 = getelementptr inbounds float* %tmp3787, i64 1
+  %tmp3789 = getelementptr inbounds float* %tmp3788, i64 1
+  %tmp3790 = getelementptr inbounds float* %tmp3789, i64 1
+  %tmp3791 = getelementptr inbounds float* %tmp3790, i64 1
+  %tmp3792 = getelementptr inbounds float* %tmp3791, i64 1
+  %tmp3793 = getelementptr inbounds float* %tmp3792, i64 1
+  %tmp3794 = getelementptr inbounds float* %tmp3793, i64 1
+  %tmp3795 = getelementptr inbounds float* %tmp3794, i64 1
+  %tmp3796 = getelementptr inbounds float* %tmp3795, i64 1
+  %tmp3797 = getelementptr inbounds float* %tmp3796, i64 1
+  %tmp3798 = getelementptr inbounds float* %tmp3797, i64 1
+  %tmp3799 = getelementptr inbounds float* %tmp3798, i64 1
+  %tmp3800 = getelementptr inbounds float* %tmp3799, i64 1
+  %tmp3801 = getelementptr inbounds float* %tmp3800, i64 1
+  %tmp3802 = getelementptr inbounds float* %tmp3801, i64 1
+  %tmp3803 = getelementptr inbounds float* %tmp3802, i64 1
+  %tmp3804 = getelementptr inbounds float* %tmp3803, i64 1
+  %tmp3805 = getelementptr inbounds float* %tmp3804, i64 1
+  %tmp3806 = getelementptr inbounds float* %tmp3805, i64 1
+  %tmp3807 = getelementptr inbounds float* %tmp3806, i64 1
+  %tmp3808 = getelementptr inbounds float* %tmp3807, i64 1
+  %tmp3809 = getelementptr inbounds float* %tmp3808, i64 1
+  %tmp3810 = getelementptr inbounds float* %tmp3809, i64 1
+  %tmp3811 = getelementptr inbounds float* %tmp3810, i64 1
+  %tmp3812 = getelementptr inbounds float* %tmp3811, i64 1
+  %tmp3813 = getelementptr inbounds float* %tmp3812, i64 1
+  %tmp3814 = getelementptr inbounds float* %tmp3813, i64 1
+  %tmp3815 = getelementptr inbounds float* %tmp3814, i64 1
+  %tmp3816 = getelementptr inbounds float* %tmp3815, i64 1
+  %tmp3817 = getelementptr inbounds float* %tmp3816, i64 1
+  %tmp3818 = getelementptr inbounds float* %tmp3817, i64 1
+  %tmp3819 = getelementptr inbounds float* %tmp3818, i64 1
+  %tmp3820 = getelementptr inbounds float* %tmp3819, i64 1
+  %tmp3821 = getelementptr inbounds float* %tmp3820, i64 1
+  %tmp3822 = getelementptr inbounds float* %tmp3821, i64 1
+  %tmp3823 = getelementptr inbounds float* %tmp3822, i64 1
+  %tmp3824 = getelementptr inbounds float* %tmp3823, i64 1
+  %tmp3825 = getelementptr inbounds float* %tmp3824, i64 1
+  %tmp3826 = getelementptr inbounds float* %tmp3825, i64 1
+  %tmp3827 = getelementptr inbounds float* %tmp3826, i64 1
+  %tmp3828 = getelementptr inbounds float* %tmp3827, i64 1
+  %tmp3829 = getelementptr inbounds float* %tmp3828, i64 1
+  %tmp3830 = getelementptr inbounds float* %tmp3829, i64 1
+  %tmp3831 = getelementptr inbounds float* %tmp3830, i64 1
+  %tmp3832 = getelementptr inbounds float* %tmp3831, i64 1
+  %tmp3833 = getelementptr inbounds float* %tmp3832, i64 1
+  %tmp3834 = getelementptr inbounds float* %tmp3833, i64 1
+  %tmp3835 = getelementptr inbounds float* %tmp3834, i64 1
+  %tmp3836 = getelementptr inbounds float* %tmp3835, i64 1
+  %tmp3837 = getelementptr inbounds float* %tmp3836, i64 1
+  %tmp3838 = getelementptr inbounds float* %tmp3837, i64 1
+  %tmp3839 = getelementptr inbounds float* %tmp3838, i64 1
+  %tmp3840 = getelementptr inbounds float* %tmp3839, i64 1
+  %tmp3841 = getelementptr inbounds float* %tmp3840, i64 1
+  %tmp3842 = getelementptr inbounds float* %tmp3841, i64 1
+  %tmp3843 = getelementptr inbounds float* %tmp3842, i64 1
+  %tmp3844 = getelementptr inbounds float* %tmp3843, i64 1
+  %tmp3845 = getelementptr inbounds float* %tmp3844, i64 1
+  %tmp3846 = getelementptr inbounds float* %tmp3845, i64 1
+  %tmp3847 = getelementptr inbounds float* %tmp3846, i64 1
+  %tmp3848 = getelementptr inbounds float* %tmp3847, i64 1
+  %tmp3849 = getelementptr inbounds float* %tmp3848, i64 1
+  %tmp3850 = getelementptr inbounds float* %tmp3849, i64 1
+  %tmp3851 = getelementptr inbounds float* %tmp3850, i64 1
+  %tmp3852 = getelementptr inbounds float* %tmp3851, i64 1
+  %tmp3853 = getelementptr inbounds float* %tmp3852, i64 1
+  %tmp3854 = getelementptr inbounds float* %tmp3853, i64 1
+  %tmp3855 = getelementptr inbounds float* %tmp3854, i64 1
+  %tmp3856 = getelementptr inbounds float* %tmp3855, i64 1
+  %tmp3857 = getelementptr inbounds float* %tmp3856, i64 1
+  %tmp3858 = getelementptr inbounds float* %tmp3857, i64 1
+  %tmp3859 = getelementptr inbounds float* %tmp3858, i64 1
+  %tmp3860 = getelementptr inbounds float* %tmp3859, i64 1
+  %tmp3861 = getelementptr inbounds float* %tmp3860, i64 1
+  %tmp3862 = getelementptr inbounds float* %tmp3861, i64 1
+  %tmp3863 = getelementptr inbounds float* %tmp3862, i64 1
+  %tmp3864 = getelementptr inbounds float* %tmp3863, i64 1
+  %tmp3865 = getelementptr inbounds float* %tmp3864, i64 1
+  %tmp3866 = getelementptr inbounds float* %tmp3865, i64 1
+  %tmp3867 = getelementptr inbounds float* %tmp3866, i64 1
+  %tmp3868 = getelementptr inbounds float* %tmp3867, i64 1
+  %tmp3869 = getelementptr inbounds float* %tmp3868, i64 1
+  %tmp3870 = getelementptr inbounds float* %tmp3869, i64 1
+  %tmp3871 = getelementptr inbounds float* %tmp3870, i64 1
+  %tmp3872 = getelementptr inbounds float* %tmp3871, i64 1
+  %tmp3873 = getelementptr inbounds float* %tmp3872, i64 1
+  %tmp3874 = getelementptr inbounds float* %tmp3873, i64 1
+  %tmp3875 = getelementptr inbounds float* %tmp3874, i64 1
+  %tmp3876 = getelementptr inbounds float* %tmp3875, i64 1
+  %tmp3877 = getelementptr inbounds float* %tmp3876, i64 1
+  %tmp3878 = getelementptr inbounds float* %tmp3877, i64 1
+  %tmp3879 = getelementptr inbounds float* %tmp3878, i64 1
+  %tmp3880 = getelementptr inbounds float* %tmp3879, i64 1
+  %tmp3881 = getelementptr inbounds float* %tmp3880, i64 1
+  %tmp3882 = getelementptr inbounds float* %tmp3881, i64 1
+  %tmp3883 = getelementptr inbounds float* %tmp3882, i64 1
+  %tmp3884 = getelementptr inbounds float* %tmp3883, i64 1
+  %tmp3885 = getelementptr inbounds float* %tmp3884, i64 1
+  %tmp3886 = getelementptr inbounds float* %tmp3885, i64 1
+  %tmp3887 = getelementptr inbounds float* %tmp3886, i64 1
+  %tmp3888 = getelementptr inbounds float* %tmp3887, i64 1
+  %tmp3889 = getelementptr inbounds float* %tmp3888, i64 1
+  %tmp3890 = getelementptr inbounds float* %tmp3889, i64 1
+  %tmp3891 = getelementptr inbounds float* %tmp3890, i64 1
+  %tmp3892 = getelementptr inbounds float* %tmp3891, i64 1
+  %tmp3893 = getelementptr inbounds float* %tmp3892, i64 1
+  %tmp3894 = getelementptr inbounds float* %tmp3893, i64 1
+  %tmp3895 = getelementptr inbounds float* %tmp3894, i64 1
+  %tmp3896 = getelementptr inbounds float* %tmp3895, i64 1
+  %tmp3897 = getelementptr inbounds float* %tmp3896, i64 1
+  %tmp3898 = getelementptr inbounds float* %tmp3897, i64 1
+  %tmp3899 = getelementptr inbounds float* %tmp3898, i64 1
+  %tmp3900 = getelementptr inbounds float* %tmp3899, i64 1
+  %tmp3901 = getelementptr inbounds float* %tmp3900, i64 1
+  %tmp3902 = getelementptr inbounds float* %tmp3901, i64 1
+  %tmp3903 = getelementptr inbounds float* %tmp3902, i64 1
+  %tmp3904 = getelementptr inbounds float* %tmp3903, i64 1
+  %tmp3905 = getelementptr inbounds float* %tmp3904, i64 1
+  %tmp3906 = getelementptr inbounds float* %tmp3905, i64 1
+  %tmp3907 = getelementptr inbounds float* %tmp3906, i64 1
+  %tmp3908 = getelementptr inbounds float* %tmp3907, i64 1
+  %tmp3909 = getelementptr inbounds float* %tmp3908, i64 1
+  %tmp3910 = getelementptr inbounds float* %tmp3909, i64 1
+  %tmp3911 = getelementptr inbounds float* %tmp3910, i64 1
+  %tmp3912 = getelementptr inbounds float* %tmp3911, i64 1
+  %tmp3913 = getelementptr inbounds float* %tmp3912, i64 1
+  %tmp3914 = getelementptr inbounds float* %tmp3913, i64 1
+  %tmp3915 = getelementptr inbounds float* %tmp3914, i64 1
+  %tmp3916 = getelementptr inbounds float* %tmp3915, i64 1
+  %tmp3917 = getelementptr inbounds float* %tmp3916, i64 1
+  %tmp3918 = getelementptr inbounds float* %tmp3917, i64 1
+  %tmp3919 = getelementptr inbounds float* %tmp3918, i64 1
+  %tmp3920 = getelementptr inbounds float* %tmp3919, i64 1
+  %tmp3921 = getelementptr inbounds float* %tmp3920, i64 1
+  %tmp3922 = getelementptr inbounds float* %tmp3921, i64 1
+  %tmp3923 = getelementptr inbounds float* %tmp3922, i64 1
+  %tmp3924 = getelementptr inbounds float* %tmp3923, i64 1
+  %tmp3925 = getelementptr inbounds float* %tmp3924, i64 1
+  %tmp3926 = getelementptr inbounds float* %tmp3925, i64 1
+  %tmp3927 = getelementptr inbounds float* %tmp3926, i64 1
+  %tmp3928 = getelementptr inbounds float* %tmp3927, i64 1
+  %tmp3929 = getelementptr inbounds float* %tmp3928, i64 1
+  %tmp3930 = getelementptr inbounds float* %tmp3929, i64 1
+  %tmp3931 = getelementptr inbounds float* %tmp3930, i64 1
+  %tmp3932 = getelementptr inbounds float* %tmp3931, i64 1
+  %tmp3933 = getelementptr inbounds float* %tmp3932, i64 1
+  %tmp3934 = getelementptr inbounds float* %tmp3933, i64 1
+  %tmp3935 = getelementptr inbounds float* %tmp3934, i64 1
+  %tmp3936 = getelementptr inbounds float* %tmp3935, i64 1
+  %tmp3937 = getelementptr inbounds float* %tmp3936, i64 1
+  %tmp3938 = getelementptr inbounds float* %tmp3937, i64 1
+  %tmp3939 = getelementptr inbounds float* %tmp3938, i64 1
+  %tmp3940 = getelementptr inbounds float* %tmp3939, i64 1
+  %tmp3941 = getelementptr inbounds float* %tmp3940, i64 1
+  %tmp3942 = getelementptr inbounds float* %tmp3941, i64 1
+  %tmp3943 = getelementptr inbounds float* %tmp3942, i64 1
+  %tmp3944 = getelementptr inbounds float* %tmp3943, i64 1
+  %tmp3945 = getelementptr inbounds float* %tmp3944, i64 1
+  %tmp3946 = getelementptr inbounds float* %tmp3945, i64 1
+  %tmp3947 = getelementptr inbounds float* %tmp3946, i64 1
+  %tmp3948 = getelementptr inbounds float* %tmp3947, i64 1
+  %tmp3949 = getelementptr inbounds float* %tmp3948, i64 1
+  %tmp3950 = getelementptr inbounds float* %tmp3949, i64 1
+  %tmp3951 = getelementptr inbounds float* %tmp3950, i64 1
+  %tmp3952 = getelementptr inbounds float* %tmp3951, i64 1
+  %tmp3953 = getelementptr inbounds float* %tmp3952, i64 1
+  %tmp3954 = getelementptr inbounds float* %tmp3953, i64 1
+  %tmp3955 = getelementptr inbounds float* %tmp3954, i64 1
+  %tmp3956 = getelementptr inbounds float* %tmp3955, i64 1
+  %tmp3957 = getelementptr inbounds float* %tmp3956, i64 1
+  %tmp3958 = getelementptr inbounds float* %tmp3957, i64 1
+  %tmp3959 = getelementptr inbounds float* %tmp3958, i64 1
+  %tmp3960 = getelementptr inbounds float* %tmp3959, i64 1
+  %tmp3961 = getelementptr inbounds float* %tmp3960, i64 1
+  %tmp3962 = getelementptr inbounds float* %tmp3961, i64 1
+  %tmp3963 = getelementptr inbounds float* %tmp3962, i64 1
+  %tmp3964 = getelementptr inbounds float* %tmp3963, i64 1
+  %tmp3965 = getelementptr inbounds float* %tmp3964, i64 1
+  %tmp3966 = getelementptr inbounds float* %tmp3965, i64 1
+  %tmp3967 = getelementptr inbounds float* %tmp3966, i64 1
+  %tmp3968 = getelementptr inbounds float* %tmp3967, i64 1
+  %tmp3969 = getelementptr inbounds float* %tmp3968, i64 1
+  %tmp3970 = getelementptr inbounds float* %tmp3969, i64 1
+  %tmp3971 = getelementptr inbounds float* %tmp3970, i64 1
+  %tmp3972 = getelementptr inbounds float* %tmp3971, i64 1
+  %tmp3973 = getelementptr inbounds float* %tmp3972, i64 1
+  %tmp3974 = getelementptr inbounds float* %tmp3973, i64 1
+  %tmp3975 = getelementptr inbounds float* %tmp3974, i64 1
+  %tmp3976 = getelementptr inbounds float* %tmp3975, i64 1
+  %tmp3977 = getelementptr inbounds float* %tmp3976, i64 1
+  %tmp3978 = getelementptr inbounds float* %tmp3977, i64 1
+  %tmp3979 = getelementptr inbounds float* %tmp3978, i64 1
+  %tmp3980 = getelementptr inbounds float* %tmp3979, i64 1
+  %tmp3981 = getelementptr inbounds float* %tmp3980, i64 1
+  %tmp3982 = getelementptr inbounds float* %tmp3981, i64 1
+  %tmp3983 = getelementptr inbounds float* %tmp3982, i64 1
+  %tmp3984 = getelementptr inbounds float* %tmp3983, i64 1
+  %tmp3985 = getelementptr inbounds float* %tmp3984, i64 1
+  %tmp3986 = getelementptr inbounds float* %tmp3985, i64 1
+  %tmp3987 = getelementptr inbounds float* %tmp3986, i64 1
+  %tmp3988 = getelementptr inbounds float* %tmp3987, i64 1
+  %tmp3989 = getelementptr inbounds float* %tmp3988, i64 1
+  %tmp3990 = getelementptr inbounds float* %tmp3989, i64 1
+  %tmp3991 = getelementptr inbounds float* %tmp3990, i64 1
+  %tmp3992 = getelementptr inbounds float* %tmp3991, i64 1
+  %tmp3993 = getelementptr inbounds float* %tmp3992, i64 1
+  %tmp3994 = getelementptr inbounds float* %tmp3993, i64 1
+  %tmp3995 = getelementptr inbounds float* %tmp3994, i64 1
+  %tmp3996 = getelementptr inbounds float* %tmp3995, i64 1
+  %tmp3997 = getelementptr inbounds float* %tmp3996, i64 1
+  %tmp3998 = getelementptr inbounds float* %tmp3997, i64 1
+  %tmp3999 = getelementptr inbounds float* %tmp3998, i64 1
+  %tmp4000 = getelementptr inbounds float* %tmp3999, i64 1
+  %tmp4001 = getelementptr inbounds float* %tmp4000, i64 1
+  %tmp4002 = getelementptr inbounds float* %tmp4001, i64 1
+  %tmp4003 = getelementptr inbounds float* %tmp4002, i64 1
+  %tmp4004 = getelementptr inbounds float* %tmp4003, i64 1
+  %tmp4005 = getelementptr inbounds float* %tmp4004, i64 1
+  %tmp4006 = getelementptr inbounds float* %tmp4005, i64 1
+  %tmp4007 = getelementptr inbounds float* %tmp4006, i64 1
+  %tmp4008 = getelementptr inbounds float* %tmp4007, i64 1
+  %tmp4009 = getelementptr inbounds float* %tmp4008, i64 1
+  %tmp4010 = getelementptr inbounds float* %tmp4009, i64 1
+  %tmp4011 = getelementptr inbounds float* %tmp4010, i64 1
+  %tmp4012 = getelementptr inbounds float* %tmp4011, i64 1
+  %tmp4013 = getelementptr inbounds float* %tmp4012, i64 1
+  %tmp4014 = getelementptr inbounds float* %tmp4013, i64 1
+  %tmp4015 = getelementptr inbounds float* %tmp4014, i64 1
+  %tmp4016 = getelementptr inbounds float* %tmp4015, i64 1
+  %tmp4017 = getelementptr inbounds float* %tmp4016, i64 1
+  %tmp4018 = getelementptr inbounds float* %tmp4017, i64 1
+  %tmp4019 = getelementptr inbounds float* %tmp4018, i64 1
+  %tmp4020 = getelementptr inbounds float* %tmp4019, i64 1
+  %tmp4021 = getelementptr inbounds float* %tmp4020, i64 1
+  %tmp4022 = getelementptr inbounds float* %tmp4021, i64 1
+  %tmp4023 = getelementptr inbounds float* %tmp4022, i64 1
+  %tmp4024 = getelementptr inbounds float* %tmp4023, i64 1
+  %tmp4025 = getelementptr inbounds float* %tmp4024, i64 1
+  %tmp4026 = getelementptr inbounds float* %tmp4025, i64 1
+  %tmp4027 = getelementptr inbounds float* %tmp4026, i64 1
+  %tmp4028 = getelementptr inbounds float* %tmp4027, i64 1
+  %tmp4029 = getelementptr inbounds float* %tmp4028, i64 1
+  %tmp4030 = getelementptr inbounds float* %tmp4029, i64 1
+  %tmp4031 = getelementptr inbounds float* %tmp4030, i64 1
+  %tmp4032 = getelementptr inbounds float* %tmp4031, i64 1
+  %tmp4033 = getelementptr inbounds float* %tmp4032, i64 1
+  %tmp4034 = getelementptr inbounds float* %tmp4033, i64 1
+  %tmp4035 = getelementptr inbounds float* %tmp4034, i64 1
+  %tmp4036 = getelementptr inbounds float* %tmp4035, i64 1
+  %tmp4037 = getelementptr inbounds float* %tmp4036, i64 1
+  %tmp4038 = getelementptr inbounds float* %tmp4037, i64 1
+  %tmp4039 = getelementptr inbounds float* %tmp4038, i64 1
+  %tmp4040 = getelementptr inbounds float* %tmp4039, i64 1
+  %tmp4041 = getelementptr inbounds float* %tmp4040, i64 1
+  %tmp4042 = getelementptr inbounds float* %tmp4041, i64 1
+  %tmp4043 = getelementptr inbounds float* %tmp4042, i64 1
+  %tmp4044 = getelementptr inbounds float* %tmp4043, i64 1
+  %tmp4045 = getelementptr inbounds float* %tmp4044, i64 1
+  %tmp4046 = getelementptr inbounds float* %tmp4045, i64 1
+  %tmp4047 = getelementptr inbounds float* %tmp4046, i64 1
+  %tmp4048 = getelementptr inbounds float* %tmp4047, i64 1
+  %tmp4049 = getelementptr inbounds float* %tmp4048, i64 1
+  %tmp4050 = getelementptr inbounds float* %tmp4049, i64 1
+  %tmp4051 = getelementptr inbounds float* %tmp4050, i64 1
+  %tmp4052 = getelementptr inbounds float* %tmp4051, i64 1
+  %tmp4053 = getelementptr inbounds float* %tmp4052, i64 1
+  %tmp4054 = getelementptr inbounds float* %tmp4053, i64 1
+  %tmp4055 = getelementptr inbounds float* %tmp4054, i64 1
+  %tmp4056 = getelementptr inbounds float* %tmp4055, i64 1
+  %tmp4057 = getelementptr inbounds float* %tmp4056, i64 1
+  %tmp4058 = getelementptr inbounds float* %tmp4057, i64 1
+  %tmp4059 = getelementptr inbounds float* %tmp4058, i64 1
+  %tmp4060 = getelementptr inbounds float* %tmp4059, i64 1
+  %tmp4061 = getelementptr inbounds float* %tmp4060, i64 1
+  %tmp4062 = getelementptr inbounds float* %tmp4061, i64 1
+  %tmp4063 = getelementptr inbounds float* %tmp4062, i64 1
+  %tmp4064 = getelementptr inbounds float* %tmp4063, i64 1
+  %tmp4065 = getelementptr inbounds float* %tmp4064, i64 1
+  %tmp4066 = getelementptr inbounds float* %tmp4065, i64 1
+  %tmp4067 = getelementptr inbounds float* %tmp4066, i64 1
+  %tmp4068 = getelementptr inbounds float* %tmp4067, i64 1
+  %tmp4069 = getelementptr inbounds float* %tmp4068, i64 1
+  %tmp4070 = getelementptr inbounds float* %tmp4069, i64 1
+  %tmp4071 = getelementptr inbounds float* %tmp4070, i64 1
+  %tmp4072 = getelementptr inbounds float* %tmp4071, i64 1
+  %tmp4073 = getelementptr inbounds float* %tmp4072, i64 1
+  %tmp4074 = getelementptr inbounds float* %tmp4073, i64 1
+  %tmp4075 = getelementptr inbounds float* %tmp4074, i64 1
+  %tmp4076 = getelementptr inbounds float* %tmp4075, i64 1
+  %tmp4077 = getelementptr inbounds float* %tmp4076, i64 1
+  %tmp4078 = getelementptr inbounds float* %tmp4077, i64 1
+  %tmp4079 = getelementptr inbounds float* %tmp4078, i64 1
+  %tmp4080 = getelementptr inbounds float* %tmp4079, i64 1
+  %tmp4081 = getelementptr inbounds float* %tmp4080, i64 1
+  %tmp4082 = getelementptr inbounds float* %tmp4081, i64 1
+  %tmp4083 = getelementptr inbounds float* %tmp4082, i64 1
+  %tmp4084 = getelementptr inbounds float* %tmp4083, i64 1
+  %tmp4085 = getelementptr inbounds float* %tmp4084, i64 1
+  %tmp4086 = getelementptr inbounds float* %tmp4085, i64 1
+  %tmp4087 = getelementptr inbounds float* %tmp4086, i64 1
+  %tmp4088 = getelementptr inbounds float* %tmp4087, i64 1
+  %tmp4089 = getelementptr inbounds float* %tmp4088, i64 1
+  %tmp4090 = getelementptr inbounds float* %tmp4089, i64 1
+  %tmp4091 = getelementptr inbounds float* %tmp4090, i64 1
+  %tmp4092 = getelementptr inbounds float* %tmp4091, i64 1
+  %tmp4093 = getelementptr inbounds float* %tmp4092, i64 1
+  %tmp4094 = getelementptr inbounds float* %tmp4093, i64 1
+  %tmp4095 = getelementptr inbounds float* %tmp4094, i64 1
+  %tmp4096 = getelementptr inbounds float* %tmp4095, i64 1
+  %tmp4097 = getelementptr inbounds float* %tmp4096, i64 1
+  %tmp4098 = getelementptr inbounds float* %tmp4097, i64 1
+  %tmp4099 = getelementptr inbounds float* %tmp4098, i64 1
+  %tmp4100 = getelementptr inbounds float* %tmp4099, i64 1
+  %tmp4101 = getelementptr inbounds float* %tmp4100, i64 1
+  %tmp4102 = getelementptr inbounds float* %tmp4101, i64 1
+  %tmp4103 = getelementptr inbounds float* %tmp4102, i64 1
+  %tmp4104 = getelementptr inbounds float* %tmp4103, i64 1
+  %tmp4105 = getelementptr inbounds float* %tmp4104, i64 1
+  %tmp4106 = getelementptr inbounds float* %tmp4105, i64 1
+  %tmp4107 = getelementptr inbounds float* %tmp4106, i64 1
+  %tmp4108 = getelementptr inbounds float* %tmp4107, i64 1
+  %tmp4109 = getelementptr inbounds float* %tmp4108, i64 1
+  %tmp4110 = getelementptr inbounds float* %tmp4109, i64 1
+  %tmp4111 = getelementptr inbounds float* %tmp4110, i64 1
+  %tmp4112 = getelementptr inbounds float* %tmp4111, i64 1
+  %tmp4113 = getelementptr inbounds float* %tmp4112, i64 1
+  %tmp4114 = getelementptr inbounds float* %tmp4113, i64 1
+  %tmp4115 = getelementptr inbounds float* %tmp4114, i64 1
+  %tmp4116 = getelementptr inbounds float* %tmp4115, i64 1
+  %tmp4117 = getelementptr inbounds float* %tmp4116, i64 1
+  %tmp4118 = getelementptr inbounds float* %tmp4117, i64 1
+  %tmp4119 = getelementptr inbounds float* %tmp4118, i64 1
+  %tmp4120 = getelementptr inbounds float* %tmp4119, i64 1
+  %tmp4121 = getelementptr inbounds float* %tmp4120, i64 1
+  %tmp4122 = getelementptr inbounds float* %tmp4121, i64 1
+  %tmp4123 = getelementptr inbounds float* %tmp4122, i64 1
+  %tmp4124 = getelementptr inbounds float* %tmp4123, i64 1
+  %tmp4125 = getelementptr inbounds float* %tmp4124, i64 1
+  %tmp4126 = getelementptr inbounds float* %tmp4125, i64 1
+  %tmp4127 = getelementptr inbounds float* %tmp4126, i64 1
+  %tmp4128 = getelementptr inbounds float* %tmp4127, i64 1
+  %tmp4129 = getelementptr inbounds float* %tmp4128, i64 1
+  %tmp4130 = getelementptr inbounds float* %tmp4129, i64 1
+  %tmp4131 = getelementptr inbounds float* %tmp4130, i64 1
+  %tmp4132 = getelementptr inbounds float* %tmp4131, i64 1
+  %tmp4133 = getelementptr inbounds float* %tmp4132, i64 1
+  %tmp4134 = getelementptr inbounds float* %tmp4133, i64 1
+  %tmp4135 = getelementptr inbounds float* %tmp4134, i64 1
+  %tmp4136 = getelementptr inbounds float* %tmp4135, i64 1
+  %tmp4137 = getelementptr inbounds float* %tmp4136, i64 1
+  %tmp4138 = getelementptr inbounds float* %tmp4137, i64 1
+  %tmp4139 = getelementptr inbounds float* %tmp4138, i64 1
+  %tmp4140 = getelementptr inbounds float* %tmp4139, i64 1
+  %tmp4141 = getelementptr inbounds float* %tmp4140, i64 1
+  %tmp4142 = getelementptr inbounds float* %tmp4141, i64 1
+  %tmp4143 = getelementptr inbounds float* %tmp4142, i64 1
+  %tmp4144 = getelementptr inbounds float* %tmp4143, i64 1
+  %tmp4145 = getelementptr inbounds float* %tmp4144, i64 1
+  %tmp4146 = getelementptr inbounds float* %tmp4145, i64 1
+  %tmp4147 = getelementptr inbounds float* %tmp4146, i64 1
+  %tmp4148 = getelementptr inbounds float* %tmp4147, i64 1
+  %tmp4149 = getelementptr inbounds float* %tmp4148, i64 1
+  %tmp4150 = getelementptr inbounds float* %tmp4149, i64 1
+  %tmp4151 = getelementptr inbounds float* %tmp4150, i64 1
+  %tmp4152 = getelementptr inbounds float* %tmp4151, i64 1
+  %tmp4153 = getelementptr inbounds float* %tmp4152, i64 1
+  %tmp4154 = getelementptr inbounds float* %tmp4153, i64 1
+  %tmp4155 = getelementptr inbounds float* %tmp4154, i64 1
+  %tmp4156 = getelementptr inbounds float* %tmp4155, i64 1
+  %tmp4157 = getelementptr inbounds float* %tmp4156, i64 1
+  %tmp4158 = getelementptr inbounds float* %tmp4157, i64 1
+  %tmp4159 = getelementptr inbounds float* %tmp4158, i64 1
+  %tmp4160 = getelementptr inbounds float* %tmp4159, i64 1
+  %tmp4161 = getelementptr inbounds float* %tmp4160, i64 1
+  %tmp4162 = getelementptr inbounds float* %tmp4161, i64 1
+  %tmp4163 = getelementptr inbounds float* %tmp4162, i64 1
+  %tmp4164 = getelementptr inbounds float* %tmp4163, i64 1
+  %tmp4165 = getelementptr inbounds float* %tmp4164, i64 1
+  %tmp4166 = getelementptr inbounds float* %tmp4165, i64 1
+  %tmp4167 = getelementptr inbounds float* %tmp4166, i64 1
+  %tmp4168 = getelementptr inbounds float* %tmp4167, i64 1
+  %tmp4169 = getelementptr inbounds float* %tmp4168, i64 1
+  %tmp4170 = getelementptr inbounds float* %tmp4169, i64 1
+  %tmp4171 = getelementptr inbounds float* %tmp4170, i64 1
+  %tmp4172 = getelementptr inbounds float* %tmp4171, i64 1
+  %tmp4173 = getelementptr inbounds float* %tmp4172, i64 1
+  %tmp4174 = getelementptr inbounds float* %tmp4173, i64 1
+  %tmp4175 = getelementptr inbounds float* %tmp4174, i64 1
+  %tmp4176 = getelementptr inbounds float* %tmp4175, i64 1
+  %tmp4177 = getelementptr inbounds float* %tmp4176, i64 1
+  %tmp4178 = getelementptr inbounds float* %tmp4177, i64 1
+  %tmp4179 = getelementptr inbounds float* %tmp4178, i64 1
+  %tmp4180 = getelementptr inbounds float* %tmp4179, i64 1
+  %tmp4181 = getelementptr inbounds float* %tmp4180, i64 1
+  %tmp4182 = getelementptr inbounds float* %tmp4181, i64 1
+  %tmp4183 = getelementptr inbounds float* %tmp4182, i64 1
+  %tmp4184 = getelementptr inbounds float* %tmp4183, i64 1
+  %tmp4185 = getelementptr inbounds float* %tmp4184, i64 1
+  %tmp4186 = getelementptr inbounds float* %tmp4185, i64 1
+  %tmp4187 = getelementptr inbounds float* %tmp4186, i64 1
+  %tmp4188 = getelementptr inbounds float* %tmp4187, i64 1
+  %tmp4189 = getelementptr inbounds float* %tmp4188, i64 1
+  %tmp4190 = getelementptr inbounds float* %tmp4189, i64 1
+  %tmp4191 = getelementptr inbounds float* %tmp4190, i64 1
+  %tmp4192 = getelementptr inbounds float* %tmp4191, i64 1
+  %tmp4193 = getelementptr inbounds float* %tmp4192, i64 1
+  %tmp4194 = getelementptr inbounds float* %tmp4193, i64 1
+  %tmp4195 = getelementptr inbounds float* %tmp4194, i64 1
+  %tmp4196 = getelementptr inbounds float* %tmp4195, i64 1
+  %tmp4197 = getelementptr inbounds float* %tmp4196, i64 1
+  %tmp4198 = getelementptr inbounds float* %tmp4197, i64 1
+  %tmp4199 = getelementptr inbounds float* %tmp4198, i64 1
+  %tmp4200 = getelementptr inbounds float* %tmp4199, i64 1
+  %tmp4201 = getelementptr inbounds float* %tmp4200, i64 1
+  %tmp4202 = getelementptr inbounds float* %tmp4201, i64 1
+  %tmp4203 = getelementptr inbounds float* %tmp4202, i64 1
+  %tmp4204 = getelementptr inbounds float* %tmp4203, i64 1
+  %tmp4205 = getelementptr inbounds float* %tmp4204, i64 1
+  %tmp4206 = getelementptr inbounds float* %tmp4205, i64 1
+  %tmp4207 = getelementptr inbounds float* %tmp4206, i64 1
+  %tmp4208 = getelementptr inbounds float* %tmp4207, i64 1
+  %tmp4209 = getelementptr inbounds float* %tmp4208, i64 1
+  %tmp4210 = getelementptr inbounds float* %tmp4209, i64 1
+  %tmp4211 = getelementptr inbounds float* %tmp4210, i64 1
+  %tmp4212 = getelementptr inbounds float* %tmp4211, i64 1
+  %tmp4213 = getelementptr inbounds float* %tmp4212, i64 1
+  %tmp4214 = getelementptr inbounds float* %tmp4213, i64 1
+  %tmp4215 = getelementptr inbounds float* %tmp4214, i64 1
+  %tmp4216 = getelementptr inbounds float* %tmp4215, i64 1
+  %tmp4217 = getelementptr inbounds float* %tmp4216, i64 1
+  %tmp4218 = getelementptr inbounds float* %tmp4217, i64 1
+  %tmp4219 = getelementptr inbounds float* %tmp4218, i64 1
+  %tmp4220 = getelementptr inbounds float* %tmp4219, i64 1
+  %tmp4221 = getelementptr inbounds float* %tmp4220, i64 1
+  %tmp4222 = getelementptr inbounds float* %tmp4221, i64 1
+  %tmp4223 = getelementptr inbounds float* %tmp4222, i64 1
+  %tmp4224 = getelementptr inbounds float* %tmp4223, i64 1
+  %tmp4225 = getelementptr inbounds float* %tmp4224, i64 1
+  %tmp4226 = getelementptr inbounds float* %tmp4225, i64 1
+  %tmp4227 = getelementptr inbounds float* %tmp4226, i64 1
+  %tmp4228 = getelementptr inbounds float* %tmp4227, i64 1
+  %tmp4229 = getelementptr inbounds float* %tmp4228, i64 1
+  %tmp4230 = getelementptr inbounds float* %tmp4229, i64 1
+  %tmp4231 = getelementptr inbounds float* %tmp4230, i64 1
+  %tmp4232 = getelementptr inbounds float* %tmp4231, i64 1
+  %tmp4233 = getelementptr inbounds float* %tmp4232, i64 1
+  %tmp4234 = getelementptr inbounds float* %tmp4233, i64 1
+  %tmp4235 = getelementptr inbounds float* %tmp4234, i64 1
+  %tmp4236 = getelementptr inbounds float* %tmp4235, i64 1
+  %tmp4237 = getelementptr inbounds float* %tmp4236, i64 1
+  %tmp4238 = getelementptr inbounds float* %tmp4237, i64 1
+  %tmp4239 = getelementptr inbounds float* %tmp4238, i64 1
+  %tmp4240 = getelementptr inbounds float* %tmp4239, i64 1
+  %tmp4241 = getelementptr inbounds float* %tmp4240, i64 1
+  %tmp4242 = getelementptr inbounds float* %tmp4241, i64 1
+  %tmp4243 = getelementptr inbounds float* %tmp4242, i64 1
+  %tmp4244 = getelementptr inbounds float* %tmp4243, i64 1
+  %tmp4245 = getelementptr inbounds float* %tmp4244, i64 1
+  %tmp4246 = getelementptr inbounds float* %tmp4245, i64 1
+  %tmp4247 = getelementptr inbounds float* %tmp4246, i64 1
+  %tmp4248 = getelementptr inbounds float* %tmp4247, i64 1
+  %tmp4249 = getelementptr inbounds float* %tmp4248, i64 1
+  %tmp4250 = getelementptr inbounds float* %tmp4249, i64 1
+  %tmp4251 = getelementptr inbounds float* %tmp4250, i64 1
+  %tmp4252 = getelementptr inbounds float* %tmp4251, i64 1
+  %tmp4253 = getelementptr inbounds float* %tmp4252, i64 1
+  %tmp4254 = getelementptr inbounds float* %tmp4253, i64 1
+  %tmp4255 = getelementptr inbounds float* %tmp4254, i64 1
+  %tmp4256 = getelementptr inbounds float* %tmp4255, i64 1
+  %tmp4257 = getelementptr inbounds float* %tmp4256, i64 1
+  %tmp4258 = getelementptr inbounds float* %tmp4257, i64 1
+  %tmp4259 = getelementptr inbounds float* %tmp4258, i64 1
+  %tmp4260 = getelementptr inbounds float* %tmp4259, i64 1
+  %tmp4261 = getelementptr inbounds float* %tmp4260, i64 1
+  %tmp4262 = getelementptr inbounds float* %tmp4261, i64 1
+  %tmp4263 = getelementptr inbounds float* %tmp4262, i64 1
+  %tmp4264 = getelementptr inbounds float* %tmp4263, i64 1
+  %tmp4265 = getelementptr inbounds float* %tmp4264, i64 1
+  %tmp4266 = getelementptr inbounds float* %tmp4265, i64 1
+  %tmp4267 = getelementptr inbounds float* %tmp4266, i64 1
+  %tmp4268 = getelementptr inbounds float* %tmp4267, i64 1
+  %tmp4269 = getelementptr inbounds float* %tmp4268, i64 1
+  %tmp4270 = getelementptr inbounds float* %tmp4269, i64 1
+  %tmp4271 = getelementptr inbounds float* %tmp4270, i64 1
+  %tmp4272 = getelementptr inbounds float* %tmp4271, i64 1
+  %tmp4273 = getelementptr inbounds float* %tmp4272, i64 1
+  %tmp4274 = getelementptr inbounds float* %tmp4273, i64 1
+  %tmp4275 = getelementptr inbounds float* %tmp4274, i64 1
+  %tmp4276 = getelementptr inbounds float* %tmp4275, i64 1
+  %tmp4277 = getelementptr inbounds float* %tmp4276, i64 1
+  %tmp4278 = getelementptr inbounds float* %tmp4277, i64 1
+  %tmp4279 = getelementptr inbounds float* %tmp4278, i64 1
+  %tmp4280 = getelementptr inbounds float* %tmp4279, i64 1
+  %tmp4281 = getelementptr inbounds float* %tmp4280, i64 1
+  %tmp4282 = getelementptr inbounds float* %tmp4281, i64 1
+  %tmp4283 = getelementptr inbounds float* %tmp4282, i64 1
+  %tmp4284 = getelementptr inbounds float* %tmp4283, i64 1
+  %tmp4285 = getelementptr inbounds float* %tmp4284, i64 1
+  %tmp4286 = getelementptr inbounds float* %tmp4285, i64 1
+  %tmp4287 = getelementptr inbounds float* %tmp4286, i64 1
+  %tmp4288 = getelementptr inbounds float* %tmp4287, i64 1
+  %tmp4289 = getelementptr inbounds float* %tmp4288, i64 1
+  %tmp4290 = getelementptr inbounds float* %tmp4289, i64 1
+  %tmp4291 = getelementptr inbounds float* %tmp4290, i64 1
+  %tmp4292 = getelementptr inbounds float* %tmp4291, i64 1
+  %tmp4293 = getelementptr inbounds float* %tmp4292, i64 1
+  %tmp4294 = getelementptr inbounds float* %tmp4293, i64 1
+  %tmp4295 = getelementptr inbounds float* %tmp4294, i64 1
+  %tmp4296 = getelementptr inbounds float* %tmp4295, i64 1
+  %tmp4297 = getelementptr inbounds float* %tmp4296, i64 1
+  %tmp4298 = getelementptr inbounds float* %tmp4297, i64 1
+  %tmp4299 = getelementptr inbounds float* %tmp4298, i64 1
+  %tmp4300 = getelementptr inbounds float* %tmp4299, i64 1
+  %tmp4301 = getelementptr inbounds float* %tmp4300, i64 1
+  %tmp4302 = getelementptr inbounds float* %tmp4301, i64 1
+  %tmp4303 = getelementptr inbounds float* %tmp4302, i64 1
+  %tmp4304 = getelementptr inbounds float* %tmp4303, i64 1
+  %tmp4305 = getelementptr inbounds float* %tmp4304, i64 1
+  %tmp4306 = getelementptr inbounds float* %tmp4305, i64 1
+  %tmp4307 = getelementptr inbounds float* %tmp4306, i64 1
+  %tmp4308 = getelementptr inbounds float* %tmp4307, i64 1
+  %tmp4309 = getelementptr inbounds float* %tmp4308, i64 1
+  %tmp4310 = getelementptr inbounds float* %tmp4309, i64 1
+  %tmp4311 = getelementptr inbounds float* %tmp4310, i64 1
+  %tmp4312 = getelementptr inbounds float* %tmp4311, i64 1
+  %tmp4313 = getelementptr inbounds float* %tmp4312, i64 1
+  %tmp4314 = getelementptr inbounds float* %tmp4313, i64 1
+  %tmp4315 = getelementptr inbounds float* %tmp4314, i64 1
+  %tmp4316 = getelementptr inbounds float* %tmp4315, i64 1
+  %tmp4317 = getelementptr inbounds float* %tmp4316, i64 1
+  %tmp4318 = getelementptr inbounds float* %tmp4317, i64 1
+  %tmp4319 = getelementptr inbounds float* %tmp4318, i64 1
+  %tmp4320 = getelementptr inbounds float* %tmp4319, i64 1
+  %tmp4321 = getelementptr inbounds float* %tmp4320, i64 1
+  %tmp4322 = getelementptr inbounds float* %tmp4321, i64 1
+  %tmp4323 = getelementptr inbounds float* %tmp4322, i64 1
+  %tmp4324 = getelementptr inbounds float* %tmp4323, i64 1
+  %tmp4325 = getelementptr inbounds float* %tmp4324, i64 1
+  %tmp4326 = getelementptr inbounds float* %tmp4325, i64 1
+  %tmp4327 = getelementptr inbounds float* %tmp4326, i64 1
+  %tmp4328 = getelementptr inbounds float* %tmp4327, i64 1
+  %tmp4329 = getelementptr inbounds float* %tmp4328, i64 1
+  %tmp4330 = getelementptr inbounds float* %tmp4329, i64 1
+  %tmp4331 = getelementptr inbounds float* %tmp4330, i64 1
+  %tmp4332 = getelementptr inbounds float* %tmp4331, i64 1
+  %tmp4333 = getelementptr inbounds float* %tmp4332, i64 1
+  %tmp4334 = getelementptr inbounds float* %tmp4333, i64 1
+  %tmp4335 = getelementptr inbounds float* %tmp4334, i64 1
+  %tmp4336 = getelementptr inbounds float* %tmp4335, i64 1
+  %tmp4337 = getelementptr inbounds float* %tmp4336, i64 1
+  %tmp4338 = getelementptr inbounds float* %tmp4337, i64 1
+  %tmp4339 = getelementptr inbounds float* %tmp4338, i64 1
+  %tmp4340 = getelementptr inbounds float* %tmp4339, i64 1
+  %tmp4341 = getelementptr inbounds float* %tmp4340, i64 1
+  %tmp4342 = getelementptr inbounds float* %tmp4341, i64 1
+  %tmp4343 = getelementptr inbounds float* %tmp4342, i64 1
+  %tmp4344 = getelementptr inbounds float* %tmp4343, i64 1
+  %tmp4345 = getelementptr inbounds float* %tmp4344, i64 1
+  %tmp4346 = getelementptr inbounds float* %tmp4345, i64 1
+  %tmp4347 = getelementptr inbounds float* %tmp4346, i64 1
+  %tmp4348 = getelementptr inbounds float* %tmp4347, i64 1
+  %tmp4349 = getelementptr inbounds float* %tmp4348, i64 1
+  %tmp4350 = getelementptr inbounds float* %tmp4349, i64 1
+  %tmp4351 = getelementptr inbounds float* %tmp4350, i64 1
+  %tmp4352 = getelementptr inbounds float* %tmp4351, i64 1
+  %tmp4353 = getelementptr inbounds float* %tmp4352, i64 1
+  %tmp4354 = getelementptr inbounds float* %tmp4353, i64 1
+  %tmp4355 = getelementptr inbounds float* %tmp4354, i64 1
+  %tmp4356 = getelementptr inbounds float* %tmp4355, i64 1
+  %tmp4357 = getelementptr inbounds float* %tmp4356, i64 1
+  %tmp4358 = getelementptr inbounds float* %tmp4357, i64 1
+  %tmp4359 = getelementptr inbounds float* %tmp4358, i64 1
+  %tmp4360 = getelementptr inbounds float* %tmp4359, i64 1
+  %tmp4361 = getelementptr inbounds float* %tmp4360, i64 1
+  %tmp4362 = getelementptr inbounds float* %tmp4361, i64 1
+  %tmp4363 = getelementptr inbounds float* %tmp4362, i64 1
+  %tmp4364 = getelementptr inbounds float* %tmp4363, i64 1
+  %tmp4365 = getelementptr inbounds float* %tmp4364, i64 1
+  %tmp4366 = getelementptr inbounds float* %tmp4365, i64 1
+  %tmp4367 = getelementptr inbounds float* %tmp4366, i64 1
+  %tmp4368 = getelementptr inbounds float* %tmp4367, i64 1
+  %tmp4369 = getelementptr inbounds float* %tmp4368, i64 1
+  %tmp4370 = getelementptr inbounds float* %tmp4369, i64 1
+  %tmp4371 = getelementptr inbounds float* %tmp4370, i64 1
+  %tmp4372 = getelementptr inbounds float* %tmp4371, i64 1
+  %tmp4373 = getelementptr inbounds float* %tmp4372, i64 1
+  %tmp4374 = getelementptr inbounds float* %tmp4373, i64 1
+  %tmp4375 = getelementptr inbounds float* %tmp4374, i64 1
+  %tmp4376 = getelementptr inbounds float* %tmp4375, i64 1
+  %tmp4377 = getelementptr inbounds float* %tmp4376, i64 1
+  %tmp4378 = getelementptr inbounds float* %tmp4377, i64 1
+  %tmp4379 = getelementptr inbounds float* %tmp4378, i64 1
+  %tmp4380 = getelementptr inbounds float* %tmp4379, i64 1
+  %tmp4381 = getelementptr inbounds float* %tmp4380, i64 1
+  %tmp4382 = getelementptr inbounds float* %tmp4381, i64 1
+  %tmp4383 = getelementptr inbounds float* %tmp4382, i64 1
+  %tmp4384 = getelementptr inbounds float* %tmp4383, i64 1
+  %tmp4385 = getelementptr inbounds float* %tmp4384, i64 1
+  %tmp4386 = getelementptr inbounds float* %tmp4385, i64 1
+  %tmp4387 = getelementptr inbounds float* %tmp4386, i64 1
+  %tmp4388 = getelementptr inbounds float* %tmp4387, i64 1
+  %tmp4389 = getelementptr inbounds float* %tmp4388, i64 1
+  %tmp4390 = getelementptr inbounds float* %tmp4389, i64 1
+  %tmp4391 = getelementptr inbounds float* %tmp4390, i64 1
+  %tmp4392 = getelementptr inbounds float* %tmp4391, i64 1
+  %tmp4393 = getelementptr inbounds float* %tmp4392, i64 1
+  %tmp4394 = getelementptr inbounds float* %tmp4393, i64 1
+  %tmp4395 = getelementptr inbounds float* %tmp4394, i64 1
+  %tmp4396 = getelementptr inbounds float* %tmp4395, i64 1
+  %tmp4397 = getelementptr inbounds float* %tmp4396, i64 1
+  %tmp4398 = getelementptr inbounds float* %tmp4397, i64 1
+  %tmp4399 = getelementptr inbounds float* %tmp4398, i64 1
+  %tmp4400 = getelementptr inbounds float* %tmp4399, i64 1
+  %tmp4401 = getelementptr inbounds float* %tmp4400, i64 1
+  %tmp4402 = getelementptr inbounds float* %tmp4401, i64 1
+  %tmp4403 = getelementptr inbounds float* %tmp4402, i64 1
+  %tmp4404 = getelementptr inbounds float* %tmp4403, i64 1
+  %tmp4405 = getelementptr inbounds float* %tmp4404, i64 1
+  %tmp4406 = getelementptr inbounds float* %tmp4405, i64 1
+  %tmp4407 = getelementptr inbounds float* %tmp4406, i64 1
+  %tmp4408 = getelementptr inbounds float* %tmp4407, i64 1
+  %tmp4409 = getelementptr inbounds float* %tmp4408, i64 1
+  %tmp4410 = getelementptr inbounds float* %tmp4409, i64 1
+  %tmp4411 = getelementptr inbounds float* %tmp4410, i64 1
+  %tmp4412 = getelementptr inbounds float* %tmp4411, i64 1
+  %tmp4413 = getelementptr inbounds float* %tmp4412, i64 1
+  %tmp4414 = getelementptr inbounds float* %tmp4413, i64 1
+  %tmp4415 = getelementptr inbounds float* %tmp4414, i64 1
+  %tmp4416 = getelementptr inbounds float* %tmp4415, i64 1
+  %tmp4417 = getelementptr inbounds float* %tmp4416, i64 1
+  %tmp4418 = getelementptr inbounds float* %tmp4417, i64 1
+  %tmp4419 = getelementptr inbounds float* %tmp4418, i64 1
+  %tmp4420 = getelementptr inbounds float* %tmp4419, i64 1
+  %tmp4421 = getelementptr inbounds float* %tmp4420, i64 1
+  %tmp4422 = getelementptr inbounds float* %tmp4421, i64 1
+  %tmp4423 = getelementptr inbounds float* %tmp4422, i64 1
+  %tmp4424 = getelementptr inbounds float* %tmp4423, i64 1
+  %tmp4425 = getelementptr inbounds float* %tmp4424, i64 1
+  %tmp4426 = getelementptr inbounds float* %tmp4425, i64 1
+  %tmp4427 = getelementptr inbounds float* %tmp4426, i64 1
+  %tmp4428 = getelementptr inbounds float* %tmp4427, i64 1
+  %tmp4429 = getelementptr inbounds float* %tmp4428, i64 1
+  %tmp4430 = getelementptr inbounds float* %tmp4429, i64 1
+  %tmp4431 = getelementptr inbounds float* %tmp4430, i64 1
+  %tmp4432 = getelementptr inbounds float* %tmp4431, i64 1
+  %tmp4433 = getelementptr inbounds float* %tmp4432, i64 1
+  %tmp4434 = getelementptr inbounds float* %tmp4433, i64 1
+  %tmp4435 = getelementptr inbounds float* %tmp4434, i64 1
+  %tmp4436 = getelementptr inbounds float* %tmp4435, i64 1
+  %tmp4437 = getelementptr inbounds float* %tmp4436, i64 1
+  %tmp4438 = getelementptr inbounds float* %tmp4437, i64 1
+  %tmp4439 = getelementptr inbounds float* %tmp4438, i64 1
+  %tmp4440 = getelementptr inbounds float* %tmp4439, i64 1
+  %tmp4441 = getelementptr inbounds float* %tmp4440, i64 1
+  %tmp4442 = getelementptr inbounds float* %tmp4441, i64 1
+  %tmp4443 = getelementptr inbounds float* %tmp4442, i64 1
+  %tmp4444 = getelementptr inbounds float* %tmp4443, i64 1
+  %tmp4445 = getelementptr inbounds float* %tmp4444, i64 1
+  %tmp4446 = getelementptr inbounds float* %tmp4445, i64 1
+  %tmp4447 = getelementptr inbounds float* %tmp4446, i64 1
+  %tmp4448 = getelementptr inbounds float* %tmp4447, i64 1
+  %tmp4449 = getelementptr inbounds float* %tmp4448, i64 1
+  %tmp4450 = getelementptr inbounds float* %tmp4449, i64 1
+  %tmp4451 = getelementptr inbounds float* %tmp4450, i64 1
+  %tmp4452 = getelementptr inbounds float* %tmp4451, i64 1
+  %tmp4453 = getelementptr inbounds float* %tmp4452, i64 1
+  %tmp4454 = getelementptr inbounds float* %tmp4453, i64 1
+  %tmp4455 = getelementptr inbounds float* %tmp4454, i64 1
+  %tmp4456 = getelementptr inbounds float* %tmp4455, i64 1
+  %tmp4457 = getelementptr inbounds float* %tmp4456, i64 1
+  %tmp4458 = getelementptr inbounds float* %tmp4457, i64 1
+  %tmp4459 = getelementptr inbounds float* %tmp4458, i64 1
+  %tmp4460 = getelementptr inbounds float* %tmp4459, i64 1
+  %tmp4461 = getelementptr inbounds float* %tmp4460, i64 1
+  %tmp4462 = getelementptr inbounds float* %tmp4461, i64 1
+  %tmp4463 = getelementptr inbounds float* %tmp4462, i64 1
+  %tmp4464 = getelementptr inbounds float* %tmp4463, i64 1
+  %tmp4465 = getelementptr inbounds float* %tmp4464, i64 1
+  %tmp4466 = getelementptr inbounds float* %tmp4465, i64 1
+  %tmp4467 = getelementptr inbounds float* %tmp4466, i64 1
+  %tmp4468 = getelementptr inbounds float* %tmp4467, i64 1
+  %tmp4469 = getelementptr inbounds float* %tmp4468, i64 1
+  %tmp4470 = getelementptr inbounds float* %tmp4469, i64 1
+  %tmp4471 = getelementptr inbounds float* %tmp4470, i64 1
+  %tmp4472 = getelementptr inbounds float* %tmp4471, i64 1
+  %tmp4473 = getelementptr inbounds float* %tmp4472, i64 1
+  %tmp4474 = getelementptr inbounds float* %tmp4473, i64 1
+  %tmp4475 = getelementptr inbounds float* %tmp4474, i64 1
+  %tmp4476 = getelementptr inbounds float* %tmp4475, i64 1
+  %tmp4477 = getelementptr inbounds float* %tmp4476, i64 1
+  %tmp4478 = getelementptr inbounds float* %tmp4477, i64 1
+  %tmp4479 = getelementptr inbounds float* %tmp4478, i64 1
+  %tmp4480 = getelementptr inbounds float* %tmp4479, i64 1
+  %tmp4481 = getelementptr inbounds float* %tmp4480, i64 1
+  %tmp4482 = getelementptr inbounds float* %tmp4481, i64 1
+  %tmp4483 = getelementptr inbounds float* %tmp4482, i64 1
+  %tmp4484 = getelementptr inbounds float* %tmp4483, i64 1
+  %tmp4485 = getelementptr inbounds float* %tmp4484, i64 1
+  %tmp4486 = getelementptr inbounds float* %tmp4485, i64 1
+  %tmp4487 = getelementptr inbounds float* %tmp4486, i64 1
+  %tmp4488 = getelementptr inbounds float* %tmp4487, i64 1
+  %tmp4489 = getelementptr inbounds float* %tmp4488, i64 1
+  %tmp4490 = getelementptr inbounds float* %tmp4489, i64 1
+  %tmp4491 = getelementptr inbounds float* %tmp4490, i64 1
+  %tmp4492 = getelementptr inbounds float* %tmp4491, i64 1
+  %tmp4493 = getelementptr inbounds float* %tmp4492, i64 1
+  %tmp4494 = getelementptr inbounds float* %tmp4493, i64 1
+  %tmp4495 = getelementptr inbounds float* %tmp4494, i64 1
+  %tmp4496 = getelementptr inbounds float* %tmp4495, i64 1
+  %tmp4497 = getelementptr inbounds float* %tmp4496, i64 1
+  %tmp4498 = getelementptr inbounds float* %tmp4497, i64 1
+  %tmp4499 = getelementptr inbounds float* %tmp4498, i64 1
+  %tmp4500 = getelementptr inbounds float* %tmp4499, i64 1
+  %tmp4501 = getelementptr inbounds float* %tmp4500, i64 1
+  %tmp4502 = getelementptr inbounds float* %tmp4501, i64 1
+  %tmp4503 = getelementptr inbounds float* %tmp4502, i64 1
+  %tmp4504 = getelementptr inbounds float* %tmp4503, i64 1
+  %tmp4505 = getelementptr inbounds float* %tmp4504, i64 1
+  %tmp4506 = getelementptr inbounds float* %tmp4505, i64 1
+  %tmp4507 = getelementptr inbounds float* %tmp4506, i64 1
+  %tmp4508 = getelementptr inbounds float* %tmp4507, i64 1
+  %tmp4509 = getelementptr inbounds float* %tmp4508, i64 1
+  %tmp4510 = getelementptr inbounds float* %tmp4509, i64 1
+  %tmp4511 = getelementptr inbounds float* %tmp4510, i64 1
+  %tmp4512 = getelementptr inbounds float* %tmp4511, i64 1
+  %tmp4513 = getelementptr inbounds float* %tmp4512, i64 1
+  %tmp4514 = getelementptr inbounds float* %tmp4513, i64 1
+  %tmp4515 = getelementptr inbounds float* %tmp4514, i64 1
+  %tmp4516 = getelementptr inbounds float* %tmp4515, i64 1
+  %tmp4517 = getelementptr inbounds float* %tmp4516, i64 1
+  %tmp4518 = getelementptr inbounds float* %tmp4517, i64 1
+  %tmp4519 = getelementptr inbounds float* %tmp4518, i64 1
+  %tmp4520 = getelementptr inbounds float* %tmp4519, i64 1
+  %tmp4521 = getelementptr inbounds float* %tmp4520, i64 1
+  %tmp4522 = getelementptr inbounds float* %tmp4521, i64 1
+  %tmp4523 = getelementptr inbounds float* %tmp4522, i64 1
+  %tmp4524 = getelementptr inbounds float* %tmp4523, i64 1
+  %tmp4525 = getelementptr inbounds float* %tmp4524, i64 1
+  %tmp4526 = getelementptr inbounds float* %tmp4525, i64 1
+  %tmp4527 = getelementptr inbounds float* %tmp4526, i64 1
+  %tmp4528 = getelementptr inbounds float* %tmp4527, i64 1
+  %tmp4529 = getelementptr inbounds float* %tmp4528, i64 1
+  %tmp4530 = getelementptr inbounds float* %tmp4529, i64 1
+  %tmp4531 = getelementptr inbounds float* %tmp4530, i64 1
+  %tmp4532 = getelementptr inbounds float* %tmp4531, i64 1
+  %tmp4533 = getelementptr inbounds float* %tmp4532, i64 1
+  %tmp4534 = getelementptr inbounds float* %tmp4533, i64 1
+  %tmp4535 = getelementptr inbounds float* %tmp4534, i64 1
+  %tmp4536 = getelementptr inbounds float* %tmp4535, i64 1
+  %tmp4537 = getelementptr inbounds float* %tmp4536, i64 1
+  %tmp4538 = getelementptr inbounds float* %tmp4537, i64 1
+  %tmp4539 = getelementptr inbounds float* %tmp4538, i64 1
+  %tmp4540 = getelementptr inbounds float* %tmp4539, i64 1
+  %tmp4541 = getelementptr inbounds float* %tmp4540, i64 1
+  %tmp4542 = getelementptr inbounds float* %tmp4541, i64 1
+  %tmp4543 = getelementptr inbounds float* %tmp4542, i64 1
+  %tmp4544 = getelementptr inbounds float* %tmp4543, i64 1
+  %tmp4545 = getelementptr inbounds float* %tmp4544, i64 1
+  %tmp4546 = getelementptr inbounds float* %tmp4545, i64 1
+  %tmp4547 = getelementptr inbounds float* %tmp4546, i64 1
+  %tmp4548 = getelementptr inbounds float* %tmp4547, i64 1
+  %tmp4549 = getelementptr inbounds float* %tmp4548, i64 1
+  %tmp4550 = getelementptr inbounds float* %tmp4549, i64 1
+  %tmp4551 = getelementptr inbounds float* %tmp4550, i64 1
+  %tmp4552 = getelementptr inbounds float* %tmp4551, i64 1
+  %tmp4553 = getelementptr inbounds float* %tmp4552, i64 1
+  %tmp4554 = getelementptr inbounds float* %tmp4553, i64 1
+  %tmp4555 = getelementptr inbounds float* %tmp4554, i64 1
+  %tmp4556 = getelementptr inbounds float* %tmp4555, i64 1
+  %tmp4557 = getelementptr inbounds float* %tmp4556, i64 1
+  %tmp4558 = getelementptr inbounds float* %tmp4557, i64 1
+  %tmp4559 = getelementptr inbounds float* %tmp4558, i64 1
+  %tmp4560 = getelementptr inbounds float* %tmp4559, i64 1
+  %tmp4561 = getelementptr inbounds float* %tmp4560, i64 1
+  %tmp4562 = getelementptr inbounds float* %tmp4561, i64 1
+  %tmp4563 = getelementptr inbounds float* %tmp4562, i64 1
+  %tmp4564 = getelementptr inbounds float* %tmp4563, i64 1
+  %tmp4565 = getelementptr inbounds float* %tmp4564, i64 1
+  %tmp4566 = getelementptr inbounds float* %tmp4565, i64 1
+  %tmp4567 = getelementptr inbounds float* %tmp4566, i64 1
+  %tmp4568 = getelementptr inbounds float* %tmp4567, i64 1
+  %tmp4569 = getelementptr inbounds float* %tmp4568, i64 1
+  %tmp4570 = getelementptr inbounds float* %tmp4569, i64 1
+  %tmp4571 = getelementptr inbounds float* %tmp4570, i64 1
+  %tmp4572 = getelementptr inbounds float* %tmp4571, i64 1
+  %tmp4573 = getelementptr inbounds float* %tmp4572, i64 1
+  %tmp4574 = getelementptr inbounds float* %tmp4573, i64 1
+  %tmp4575 = getelementptr inbounds float* %tmp4574, i64 1
+  %tmp4576 = getelementptr inbounds float* %tmp4575, i64 1
+  %tmp4577 = getelementptr inbounds float* %tmp4576, i64 1
+  %tmp4578 = getelementptr inbounds float* %tmp4577, i64 1
+  %tmp4579 = getelementptr inbounds float* %tmp4578, i64 1
+  %tmp4580 = getelementptr inbounds float* %tmp4579, i64 1
+  %tmp4581 = getelementptr inbounds float* %tmp4580, i64 1
+  %tmp4582 = getelementptr inbounds float* %tmp4581, i64 1
+  %tmp4583 = getelementptr inbounds float* %tmp4582, i64 1
+  %tmp4584 = getelementptr inbounds float* %tmp4583, i64 1
+  %tmp4585 = getelementptr inbounds float* %tmp4584, i64 1
+  %tmp4586 = getelementptr inbounds float* %tmp4585, i64 1
+  %tmp4587 = getelementptr inbounds float* %tmp4586, i64 1
+  %tmp4588 = getelementptr inbounds float* %tmp4587, i64 1
+  %tmp4589 = getelementptr inbounds float* %tmp4588, i64 1
+  %tmp4590 = getelementptr inbounds float* %tmp4589, i64 1
+  %tmp4591 = getelementptr inbounds float* %tmp4590, i64 1
+  %tmp4592 = getelementptr inbounds float* %tmp4591, i64 1
+  %tmp4593 = getelementptr inbounds float* %tmp4592, i64 1
+  %tmp4594 = getelementptr inbounds float* %tmp4593, i64 1
+  %tmp4595 = getelementptr inbounds float* %tmp4594, i64 1
+  %tmp4596 = getelementptr inbounds float* %tmp4595, i64 1
+  %tmp4597 = getelementptr inbounds float* %tmp4596, i64 1
+  %tmp4598 = getelementptr inbounds float* %tmp4597, i64 1
+  %tmp4599 = getelementptr inbounds float* %tmp4598, i64 1
+  %tmp4600 = getelementptr inbounds float* %tmp4599, i64 1
+  %tmp4601 = getelementptr inbounds float* %tmp4600, i64 1
+  %tmp4602 = getelementptr inbounds float* %tmp4601, i64 1
+  %tmp4603 = getelementptr inbounds float* %tmp4602, i64 1
+  %tmp4604 = getelementptr inbounds float* %tmp4603, i64 1
+  %tmp4605 = getelementptr inbounds float* %tmp4604, i64 1
+  %tmp4606 = getelementptr inbounds float* %tmp4605, i64 1
+  %tmp4607 = getelementptr inbounds float* %tmp4606, i64 1
+  %tmp4608 = getelementptr inbounds float* %tmp4607, i64 1
+  %tmp4609 = getelementptr inbounds float* %tmp4608, i64 1
+  %tmp4610 = getelementptr inbounds float* %tmp4609, i64 1
+  %tmp4611 = getelementptr inbounds float* %tmp4610, i64 1
+  %tmp4612 = getelementptr inbounds float* %tmp4611, i64 1
+  %tmp4613 = getelementptr inbounds float* %tmp4612, i64 1
+  %tmp4614 = getelementptr inbounds float* %tmp4613, i64 1
+  %tmp4615 = getelementptr inbounds float* %tmp4614, i64 1
+  %tmp4616 = getelementptr inbounds float* %tmp4615, i64 1
+  %tmp4617 = getelementptr inbounds float* %tmp4616, i64 1
+  %tmp4618 = getelementptr inbounds float* %tmp4617, i64 1
+  %tmp4619 = getelementptr inbounds float* %tmp4618, i64 1
+  %tmp4620 = getelementptr inbounds float* %tmp4619, i64 1
+  %tmp4621 = getelementptr inbounds float* %tmp4620, i64 1
+  %tmp4622 = getelementptr inbounds float* %tmp4621, i64 1
+  %tmp4623 = getelementptr inbounds float* %tmp4622, i64 1
+  %tmp4624 = getelementptr inbounds float* %tmp4623, i64 1
+  %tmp4625 = getelementptr inbounds float* %tmp4624, i64 1
+  %tmp4626 = getelementptr inbounds float* %tmp4625, i64 1
+  %tmp4627 = getelementptr inbounds float* %tmp4626, i64 1
+  %tmp4628 = getelementptr inbounds float* %tmp4627, i64 1
+  %tmp4629 = getelementptr inbounds float* %tmp4628, i64 1
+  %tmp4630 = getelementptr inbounds float* %tmp4629, i64 1
+  %tmp4631 = getelementptr inbounds float* %tmp4630, i64 1
+  %tmp4632 = getelementptr inbounds float* %tmp4631, i64 1
+  %tmp4633 = getelementptr inbounds float* %tmp4632, i64 1
+  %tmp4634 = getelementptr inbounds float* %tmp4633, i64 1
+  %tmp4635 = getelementptr inbounds float* %tmp4634, i64 1
+  %tmp4636 = getelementptr inbounds float* %tmp4635, i64 1
+  %tmp4637 = getelementptr inbounds float* %tmp4636, i64 1
+  %tmp4638 = getelementptr inbounds float* %tmp4637, i64 1
+  %tmp4639 = getelementptr inbounds float* %tmp4638, i64 1
+  %tmp4640 = getelementptr inbounds float* %tmp4639, i64 1
+  %tmp4641 = getelementptr inbounds float* %tmp4640, i64 1
+  %tmp4642 = getelementptr inbounds float* %tmp4641, i64 1
+  %tmp4643 = getelementptr inbounds float* %tmp4642, i64 1
+  %tmp4644 = getelementptr inbounds float* %tmp4643, i64 1
+  %tmp4645 = getelementptr inbounds float* %tmp4644, i64 1
+  %tmp4646 = getelementptr inbounds float* %tmp4645, i64 1
+  %tmp4647 = getelementptr inbounds float* %tmp4646, i64 1
+  %tmp4648 = getelementptr inbounds float* %tmp4647, i64 1
+  %tmp4649 = getelementptr inbounds float* %tmp4648, i64 1
+  %tmp4650 = getelementptr inbounds float* %tmp4649, i64 1
+  %tmp4651 = getelementptr inbounds float* %tmp4650, i64 1
+  %tmp4652 = getelementptr inbounds float* %tmp4651, i64 1
+  %tmp4653 = getelementptr inbounds float* %tmp4652, i64 1
+  %tmp4654 = getelementptr inbounds float* %tmp4653, i64 1
+  %tmp4655 = getelementptr inbounds float* %tmp4654, i64 1
+  %tmp4656 = getelementptr inbounds float* %tmp4655, i64 1
+  %tmp4657 = getelementptr inbounds float* %tmp4656, i64 1
+  %tmp4658 = getelementptr inbounds float* %tmp4657, i64 1
+  %tmp4659 = getelementptr inbounds float* %tmp4658, i64 1
+  %tmp4660 = getelementptr inbounds float* %tmp4659, i64 1
+  %tmp4661 = getelementptr inbounds float* %tmp4660, i64 1
+  %tmp4662 = getelementptr inbounds float* %tmp4661, i64 1
+  %tmp4663 = getelementptr inbounds float* %tmp4662, i64 1
+  %tmp4664 = getelementptr inbounds float* %tmp4663, i64 1
+  %tmp4665 = getelementptr inbounds float* %tmp4664, i64 1
+  %tmp4666 = getelementptr inbounds float* %tmp4665, i64 1
+  %tmp4667 = getelementptr inbounds float* %tmp4666, i64 1
+  %tmp4668 = getelementptr inbounds float* %tmp4667, i64 1
+  %tmp4669 = getelementptr inbounds float* %tmp4668, i64 1
+  %tmp4670 = getelementptr inbounds float* %tmp4669, i64 1
+  %tmp4671 = getelementptr inbounds float* %tmp4670, i64 1
+  %tmp4672 = getelementptr inbounds float* %tmp4671, i64 1
+  %tmp4673 = getelementptr inbounds float* %tmp4672, i64 1
+  %tmp4674 = getelementptr inbounds float* %tmp4673, i64 1
+  %tmp4675 = getelementptr inbounds float* %tmp4674, i64 1
+  %tmp4676 = getelementptr inbounds float* %tmp4675, i64 1
+  %tmp4677 = getelementptr inbounds float* %tmp4676, i64 1
+  %tmp4678 = getelementptr inbounds float* %tmp4677, i64 1
+  %tmp4679 = getelementptr inbounds float* %tmp4678, i64 1
+  %tmp4680 = getelementptr inbounds float* %tmp4679, i64 1
+  %tmp4681 = getelementptr inbounds float* %tmp4680, i64 1
+  %tmp4682 = getelementptr inbounds float* %tmp4681, i64 1
+  %tmp4683 = getelementptr inbounds float* %tmp4682, i64 1
+  %tmp4684 = getelementptr inbounds float* %tmp4683, i64 1
+  %tmp4685 = getelementptr inbounds float* %tmp4684, i64 1
+  %tmp4686 = getelementptr inbounds float* %tmp4685, i64 1
+  %tmp4687 = getelementptr inbounds float* %tmp4686, i64 1
+  %tmp4688 = getelementptr inbounds float* %tmp4687, i64 1
+  %tmp4689 = getelementptr inbounds float* %tmp4688, i64 1
+  %tmp4690 = getelementptr inbounds float* %tmp4689, i64 1
+  %tmp4691 = getelementptr inbounds float* %tmp4690, i64 1
+  %tmp4692 = getelementptr inbounds float* %tmp4691, i64 1
+  %tmp4693 = getelementptr inbounds float* %tmp4692, i64 1
+  %tmp4694 = getelementptr inbounds float* %tmp4693, i64 1
+  %tmp4695 = getelementptr inbounds float* %tmp4694, i64 1
+  %tmp4696 = getelementptr inbounds float* %tmp4695, i64 1
+  %tmp4697 = getelementptr inbounds float* %tmp4696, i64 1
+  %tmp4698 = getelementptr inbounds float* %tmp4697, i64 1
+  %tmp4699 = getelementptr inbounds float* %tmp4698, i64 1
+  %tmp4700 = getelementptr inbounds float* %tmp4699, i64 1
+  %tmp4701 = getelementptr inbounds float* %tmp4700, i64 1
+  %tmp4702 = getelementptr inbounds float* %tmp4701, i64 1
+  %tmp4703 = getelementptr inbounds float* %tmp4702, i64 1
+  %tmp4704 = getelementptr inbounds float* %tmp4703, i64 1
+  %tmp4705 = getelementptr inbounds float* %tmp4704, i64 1
+  %tmp4706 = getelementptr inbounds float* %tmp4705, i64 1
+  %tmp4707 = getelementptr inbounds float* %tmp4706, i64 1
+  %tmp4708 = getelementptr inbounds float* %tmp4707, i64 1
+  %tmp4709 = getelementptr inbounds float* %tmp4708, i64 1
+  %tmp4710 = getelementptr inbounds float* %tmp4709, i64 1
+  %tmp4711 = getelementptr inbounds float* %tmp4710, i64 1
+  %tmp4712 = getelementptr inbounds float* %tmp4711, i64 1
+  %tmp4713 = getelementptr inbounds float* %tmp4712, i64 1
+  %tmp4714 = getelementptr inbounds float* %tmp4713, i64 1
+  %tmp4715 = getelementptr inbounds float* %tmp4714, i64 1
+  %tmp4716 = getelementptr inbounds float* %tmp4715, i64 1
+  %tmp4717 = getelementptr inbounds float* %tmp4716, i64 1
+  %tmp4718 = getelementptr inbounds float* %tmp4717, i64 1
+  %tmp4719 = getelementptr inbounds float* %tmp4718, i64 1
+  %tmp4720 = getelementptr inbounds float* %tmp4719, i64 1
+  %tmp4721 = getelementptr inbounds float* %tmp4720, i64 1
+  %tmp4722 = getelementptr inbounds float* %tmp4721, i64 1
+  %tmp4723 = getelementptr inbounds float* %tmp4722, i64 1
+  %tmp4724 = getelementptr inbounds float* %tmp4723, i64 1
+  %tmp4725 = getelementptr inbounds float* %tmp4724, i64 1
+  %tmp4726 = getelementptr inbounds float* %tmp4725, i64 1
+  %tmp4727 = getelementptr inbounds float* %tmp4726, i64 1
+  %tmp4728 = getelementptr inbounds float* %tmp4727, i64 1
+  %tmp4729 = getelementptr inbounds float* %tmp4728, i64 1
+  %tmp4730 = getelementptr inbounds float* %tmp4729, i64 1
+  %tmp4731 = getelementptr inbounds float* %tmp4730, i64 1
+  %tmp4732 = getelementptr inbounds float* %tmp4731, i64 1
+  %tmp4733 = getelementptr inbounds float* %tmp4732, i64 1
+  %tmp4734 = getelementptr inbounds float* %tmp4733, i64 1
+  %tmp4735 = getelementptr inbounds float* %tmp4734, i64 1
+  %tmp4736 = getelementptr inbounds float* %tmp4735, i64 1
+  %tmp4737 = getelementptr inbounds float* %tmp4736, i64 1
+  %tmp4738 = getelementptr inbounds float* %tmp4737, i64 1
+  %tmp4739 = getelementptr inbounds float* %tmp4738, i64 1
+  %tmp4740 = getelementptr inbounds float* %tmp4739, i64 1
+  %tmp4741 = getelementptr inbounds float* %tmp4740, i64 1
+  %tmp4742 = getelementptr inbounds float* %tmp4741, i64 1
+  %tmp4743 = getelementptr inbounds float* %tmp4742, i64 1
+  %tmp4744 = getelementptr inbounds float* %tmp4743, i64 1
+  %tmp4745 = getelementptr inbounds float* %tmp4744, i64 1
+  %tmp4746 = getelementptr inbounds float* %tmp4745, i64 1
+  %tmp4747 = getelementptr inbounds float* %tmp4746, i64 1
+  %tmp4748 = getelementptr inbounds float* %tmp4747, i64 1
+  %tmp4749 = getelementptr inbounds float* %tmp4748, i64 1
+  %tmp4750 = getelementptr inbounds float* %tmp4749, i64 1
+  %tmp4751 = getelementptr inbounds float* %tmp4750, i64 1
+  %tmp4752 = getelementptr inbounds float* %tmp4751, i64 1
+  %tmp4753 = getelementptr inbounds float* %tmp4752, i64 1
+  %tmp4754 = getelementptr inbounds float* %tmp4753, i64 1
+  %tmp4755 = getelementptr inbounds float* %tmp4754, i64 1
+  %tmp4756 = getelementptr inbounds float* %tmp4755, i64 1
+  %tmp4757 = getelementptr inbounds float* %tmp4756, i64 1
+  %tmp4758 = getelementptr inbounds float* %tmp4757, i64 1
+  %tmp4759 = getelementptr inbounds float* %tmp4758, i64 1
+  %tmp4760 = getelementptr inbounds float* %tmp4759, i64 1
+  %tmp4761 = getelementptr inbounds float* %tmp4760, i64 1
+  %tmp4762 = getelementptr inbounds float* %tmp4761, i64 1
+  %tmp4763 = getelementptr inbounds float* %tmp4762, i64 1
+  %tmp4764 = getelementptr inbounds float* %tmp4763, i64 1
+  %tmp4765 = getelementptr inbounds float* %tmp4764, i64 1
+  %tmp4766 = getelementptr inbounds float* %tmp4765, i64 1
+  %tmp4767 = getelementptr inbounds float* %tmp4766, i64 1
+  %tmp4768 = getelementptr inbounds float* %tmp4767, i64 1
+  %tmp4769 = getelementptr inbounds float* %tmp4768, i64 1
+  %tmp4770 = getelementptr inbounds float* %tmp4769, i64 1
+  %tmp4771 = getelementptr inbounds float* %tmp4770, i64 1
+  %tmp4772 = getelementptr inbounds float* %tmp4771, i64 1
+  %tmp4773 = getelementptr inbounds float* %tmp4772, i64 1
+  %tmp4774 = getelementptr inbounds float* %tmp4773, i64 1
+  %tmp4775 = getelementptr inbounds float* %tmp4774, i64 1
+  %tmp4776 = getelementptr inbounds float* %tmp4775, i64 1
+  %tmp4777 = getelementptr inbounds float* %tmp4776, i64 1
+  %tmp4778 = getelementptr inbounds float* %tmp4777, i64 1
+  %tmp4779 = getelementptr inbounds float* %tmp4778, i64 1
+  %tmp4780 = getelementptr inbounds float* %tmp4779, i64 1
+  %tmp4781 = getelementptr inbounds float* %tmp4780, i64 1
+  %tmp4782 = getelementptr inbounds float* %tmp4781, i64 1
+  %tmp4783 = getelementptr inbounds float* %tmp4782, i64 1
+  %tmp4784 = getelementptr inbounds float* %tmp4783, i64 1
+  %tmp4785 = getelementptr inbounds float* %tmp4784, i64 1
+  %tmp4786 = getelementptr inbounds float* %tmp4785, i64 1
+  %tmp4787 = getelementptr inbounds float* %tmp4786, i64 1
+  %tmp4788 = getelementptr inbounds float* %tmp4787, i64 1
+  %tmp4789 = getelementptr inbounds float* %tmp4788, i64 1
+  %tmp4790 = getelementptr inbounds float* %tmp4789, i64 1
+  %tmp4791 = getelementptr inbounds float* %tmp4790, i64 1
+  %tmp4792 = getelementptr inbounds float* %tmp4791, i64 1
+  %tmp4793 = getelementptr inbounds float* %tmp4792, i64 1
+  %tmp4794 = getelementptr inbounds float* %tmp4793, i64 1
+  %tmp4795 = getelementptr inbounds float* %tmp4794, i64 1
+  %tmp4796 = getelementptr inbounds float* %tmp4795, i64 1
+  %tmp4797 = getelementptr inbounds float* %tmp4796, i64 1
+  %tmp4798 = getelementptr inbounds float* %tmp4797, i64 1
+  %tmp4799 = getelementptr inbounds float* %tmp4798, i64 1
+  %tmp4800 = getelementptr inbounds float* %tmp4799, i64 1
+  %tmp4801 = getelementptr inbounds float* %tmp4800, i64 1
+  %tmp4802 = getelementptr inbounds float* %tmp4801, i64 1
+  %tmp4803 = getelementptr inbounds float* %tmp4802, i64 1
+  %tmp4804 = getelementptr inbounds float* %tmp4803, i64 1
+  %tmp4805 = getelementptr inbounds float* %tmp4804, i64 1
+  %tmp4806 = getelementptr inbounds float* %tmp4805, i64 1
+  %tmp4807 = getelementptr inbounds float* %tmp4806, i64 1
+  %tmp4808 = getelementptr inbounds float* %tmp4807, i64 1
+  %tmp4809 = getelementptr inbounds float* %tmp4808, i64 1
+  %tmp4810 = getelementptr inbounds float* %tmp4809, i64 1
+  %tmp4811 = getelementptr inbounds float* %tmp4810, i64 1
+  %tmp4812 = getelementptr inbounds float* %tmp4811, i64 1
+  %tmp4813 = getelementptr inbounds float* %tmp4812, i64 1
+  %tmp4814 = getelementptr inbounds float* %tmp4813, i64 1
+  %tmp4815 = getelementptr inbounds float* %tmp4814, i64 1
+  %tmp4816 = getelementptr inbounds float* %tmp4815, i64 1
+  %tmp4817 = getelementptr inbounds float* %tmp4816, i64 1
+  %tmp4818 = getelementptr inbounds float* %tmp4817, i64 1
+  %tmp4819 = getelementptr inbounds float* %tmp4818, i64 1
+  %tmp4820 = getelementptr inbounds float* %tmp4819, i64 1
+  %tmp4821 = getelementptr inbounds float* %tmp4820, i64 1
+  %tmp4822 = getelementptr inbounds float* %tmp4821, i64 1
+  %tmp4823 = getelementptr inbounds float* %tmp4822, i64 1
+  %tmp4824 = getelementptr inbounds float* %tmp4823, i64 1
+  %tmp4825 = getelementptr inbounds float* %tmp4824, i64 1
+  %tmp4826 = getelementptr inbounds float* %tmp4825, i64 1
+  %tmp4827 = getelementptr inbounds float* %tmp4826, i64 1
+  %tmp4828 = getelementptr inbounds float* %tmp4827, i64 1
+  %tmp4829 = getelementptr inbounds float* %tmp4828, i64 1
+  %tmp4830 = getelementptr inbounds float* %tmp4829, i64 1
+  %tmp4831 = getelementptr inbounds float* %tmp4830, i64 1
+  %tmp4832 = getelementptr inbounds float* %tmp4831, i64 1
+  %tmp4833 = getelementptr inbounds float* %tmp4832, i64 1
+  %tmp4834 = getelementptr inbounds float* %tmp4833, i64 1
+  %tmp4835 = getelementptr inbounds float* %tmp4834, i64 1
+  %tmp4836 = getelementptr inbounds float* %tmp4835, i64 1
+  %tmp4837 = getelementptr inbounds float* %tmp4836, i64 1
+  %tmp4838 = getelementptr inbounds float* %tmp4837, i64 1
+  %tmp4839 = getelementptr inbounds float* %tmp4838, i64 1
+  %tmp4840 = getelementptr inbounds float* %tmp4839, i64 1
+  %tmp4841 = getelementptr inbounds float* %tmp4840, i64 1
+  %tmp4842 = getelementptr inbounds float* %tmp4841, i64 1
+  %tmp4843 = getelementptr inbounds float* %tmp4842, i64 1
+  %tmp4844 = getelementptr inbounds float* %tmp4843, i64 1
+  %tmp4845 = getelementptr inbounds float* %tmp4844, i64 1
+  %tmp4846 = getelementptr inbounds float* %tmp4845, i64 1
+  %tmp4847 = getelementptr inbounds float* %tmp4846, i64 1
+  %tmp4848 = getelementptr inbounds float* %tmp4847, i64 1
+  %tmp4849 = getelementptr inbounds float* %tmp4848, i64 1
+  %tmp4850 = getelementptr inbounds float* %tmp4849, i64 1
+  %tmp4851 = getelementptr inbounds float* %tmp4850, i64 1
+  %tmp4852 = getelementptr inbounds float* %tmp4851, i64 1
+  %tmp4853 = getelementptr inbounds float* %tmp4852, i64 1
+  %tmp4854 = getelementptr inbounds float* %tmp4853, i64 1
+  %tmp4855 = getelementptr inbounds float* %tmp4854, i64 1
+  %tmp4856 = getelementptr inbounds float* %tmp4855, i64 1
+  %tmp4857 = getelementptr inbounds float* %tmp4856, i64 1
+  %tmp4858 = getelementptr inbounds float* %tmp4857, i64 1
+  %tmp4859 = getelementptr inbounds float* %tmp4858, i64 1
+  %tmp4860 = getelementptr inbounds float* %tmp4859, i64 1
+  %tmp4861 = getelementptr inbounds float* %tmp4860, i64 1
+  %tmp4862 = getelementptr inbounds float* %tmp4861, i64 1
+  %tmp4863 = getelementptr inbounds float* %tmp4862, i64 1
+  %tmp4864 = getelementptr inbounds float* %tmp4863, i64 1
+  %tmp4865 = getelementptr inbounds float* %tmp4864, i64 1
+  %tmp4866 = getelementptr inbounds float* %tmp4865, i64 1
+  %tmp4867 = getelementptr inbounds float* %tmp4866, i64 1
+  %tmp4868 = getelementptr inbounds float* %tmp4867, i64 1
+  %tmp4869 = getelementptr inbounds float* %tmp4868, i64 1
+  %tmp4870 = getelementptr inbounds float* %tmp4869, i64 1
+  %tmp4871 = getelementptr inbounds float* %tmp4870, i64 1
+  %tmp4872 = getelementptr inbounds float* %tmp4871, i64 1
+  %tmp4873 = getelementptr inbounds float* %tmp4872, i64 1
+  %tmp4874 = getelementptr inbounds float* %tmp4873, i64 1
+  %tmp4875 = getelementptr inbounds float* %tmp4874, i64 1
+  %tmp4876 = getelementptr inbounds float* %tmp4875, i64 1
+  %tmp4877 = getelementptr inbounds float* %tmp4876, i64 1
+  %tmp4878 = getelementptr inbounds float* %tmp4877, i64 1
+  %tmp4879 = getelementptr inbounds float* %tmp4878, i64 1
+  %tmp4880 = getelementptr inbounds float* %tmp4879, i64 1
+  %tmp4881 = getelementptr inbounds float* %tmp4880, i64 1
+  %tmp4882 = getelementptr inbounds float* %tmp4881, i64 1
+  %tmp4883 = getelementptr inbounds float* %tmp4882, i64 1
+  %tmp4884 = getelementptr inbounds float* %tmp4883, i64 1
+  %tmp4885 = getelementptr inbounds float* %tmp4884, i64 1
+  %tmp4886 = getelementptr inbounds float* %tmp4885, i64 1
+  %tmp4887 = getelementptr inbounds float* %tmp4886, i64 1
+  %tmp4888 = getelementptr inbounds float* %tmp4887, i64 1
+  %tmp4889 = getelementptr inbounds float* %tmp4888, i64 1
+  %tmp4890 = getelementptr inbounds float* %tmp4889, i64 1
+  %tmp4891 = getelementptr inbounds float* %tmp4890, i64 1
+  %tmp4892 = getelementptr inbounds float* %tmp4891, i64 1
+  %tmp4893 = getelementptr inbounds float* %tmp4892, i64 1
+  %tmp4894 = getelementptr inbounds float* %tmp4893, i64 1
+  %tmp4895 = getelementptr inbounds float* %tmp4894, i64 1
+  %tmp4896 = getelementptr inbounds float* %tmp4895, i64 1
+  %tmp4897 = getelementptr inbounds float* %tmp4896, i64 1
+  %tmp4898 = getelementptr inbounds float* %tmp4897, i64 1
+  %tmp4899 = getelementptr inbounds float* %tmp4898, i64 1
+  %tmp4900 = getelementptr inbounds float* %tmp4899, i64 1
+  %tmp4901 = getelementptr inbounds float* %tmp4900, i64 1
+  %tmp4902 = getelementptr inbounds float* %tmp4901, i64 1
+  %tmp4903 = getelementptr inbounds float* %tmp4902, i64 1
+  %tmp4904 = getelementptr inbounds float* %tmp4903, i64 1
+  %tmp4905 = getelementptr inbounds float* %tmp4904, i64 1
+  %tmp4906 = getelementptr inbounds float* %tmp4905, i64 1
+  %tmp4907 = getelementptr inbounds float* %tmp4906, i64 1
+  %tmp4908 = getelementptr inbounds float* %tmp4907, i64 1
+  %tmp4909 = getelementptr inbounds float* %tmp4908, i64 1
+  %tmp4910 = getelementptr inbounds float* %tmp4909, i64 1
+  %tmp4911 = getelementptr inbounds float* %tmp4910, i64 1
+  %tmp4912 = getelementptr inbounds float* %tmp4911, i64 1
+  %tmp4913 = getelementptr inbounds float* %tmp4912, i64 1
+  %tmp4914 = getelementptr inbounds float* %tmp4913, i64 1
+  %tmp4915 = getelementptr inbounds float* %tmp4914, i64 1
+  %tmp4916 = getelementptr inbounds float* %tmp4915, i64 1
+  %tmp4917 = getelementptr inbounds float* %tmp4916, i64 1
+  %tmp4918 = getelementptr inbounds float* %tmp4917, i64 1
+  %tmp4919 = getelementptr inbounds float* %tmp4918, i64 1
+  %tmp4920 = getelementptr inbounds float* %tmp4919, i64 1
+  %tmp4921 = getelementptr inbounds float* %tmp4920, i64 1
+  %tmp4922 = getelementptr inbounds float* %tmp4921, i64 1
+  %tmp4923 = getelementptr inbounds float* %tmp4922, i64 1
+  %tmp4924 = getelementptr inbounds float* %tmp4923, i64 1
+  %tmp4925 = getelementptr inbounds float* %tmp4924, i64 1
+  %tmp4926 = getelementptr inbounds float* %tmp4925, i64 1
+  %tmp4927 = getelementptr inbounds float* %tmp4926, i64 1
+  %tmp4928 = getelementptr inbounds float* %tmp4927, i64 1
+  %tmp4929 = getelementptr inbounds float* %tmp4928, i64 1
+  %tmp4930 = getelementptr inbounds float* %tmp4929, i64 1
+  %tmp4931 = getelementptr inbounds float* %tmp4930, i64 1
+  %tmp4932 = getelementptr inbounds float* %tmp4931, i64 1
+  %tmp4933 = getelementptr inbounds float* %tmp4932, i64 1
+  %tmp4934 = getelementptr inbounds float* %tmp4933, i64 1
+  %tmp4935 = getelementptr inbounds float* %tmp4934, i64 1
+  %tmp4936 = getelementptr inbounds float* %tmp4935, i64 1
+  %tmp4937 = getelementptr inbounds float* %tmp4936, i64 1
+  %tmp4938 = getelementptr inbounds float* %tmp4937, i64 1
+  %tmp4939 = getelementptr inbounds float* %tmp4938, i64 1
+  %tmp4940 = getelementptr inbounds float* %tmp4939, i64 1
+  %tmp4941 = getelementptr inbounds float* %tmp4940, i64 1
+  %tmp4942 = getelementptr inbounds float* %tmp4941, i64 1
+  %tmp4943 = getelementptr inbounds float* %tmp4942, i64 1
+  %tmp4944 = getelementptr inbounds float* %tmp4943, i64 1
+  %tmp4945 = getelementptr inbounds float* %tmp4944, i64 1
+  %tmp4946 = getelementptr inbounds float* %tmp4945, i64 1
+  %tmp4947 = getelementptr inbounds float* %tmp4946, i64 1
+  %tmp4948 = getelementptr inbounds float* %tmp4947, i64 1
+  %tmp4949 = getelementptr inbounds float* %tmp4948, i64 1
+  %tmp4950 = getelementptr inbounds float* %tmp4949, i64 1
+  %tmp4951 = getelementptr inbounds float* %tmp4950, i64 1
+  %tmp4952 = getelementptr inbounds float* %tmp4951, i64 1
+  %tmp4953 = getelementptr inbounds float* %tmp4952, i64 1
+  %tmp4954 = getelementptr inbounds float* %tmp4953, i64 1
+  %tmp4955 = getelementptr inbounds float* %tmp4954, i64 1
+  %tmp4956 = getelementptr inbounds float* %tmp4955, i64 1
+  %tmp4957 = getelementptr inbounds float* %tmp4956, i64 1
+  %tmp4958 = getelementptr inbounds float* %tmp4957, i64 1
+  %tmp4959 = getelementptr inbounds float* %tmp4958, i64 1
+  %tmp4960 = getelementptr inbounds float* %tmp4959, i64 1
+  %tmp4961 = getelementptr inbounds float* %tmp4960, i64 1
+  %tmp4962 = getelementptr inbounds float* %tmp4961, i64 1
+  %tmp4963 = getelementptr inbounds float* %tmp4962, i64 1
+  %tmp4964 = getelementptr inbounds float* %tmp4963, i64 1
+  %tmp4965 = getelementptr inbounds float* %tmp4964, i64 1
+  %tmp4966 = getelementptr inbounds float* %tmp4965, i64 1
+  %tmp4967 = getelementptr inbounds float* %tmp4966, i64 1
+  %tmp4968 = getelementptr inbounds float* %tmp4967, i64 1
+  %tmp4969 = getelementptr inbounds float* %tmp4968, i64 1
+  %tmp4970 = getelementptr inbounds float* %tmp4969, i64 1
+  %tmp4971 = getelementptr inbounds float* %tmp4970, i64 1
+  %tmp4972 = getelementptr inbounds float* %tmp4971, i64 1
+  %tmp4973 = getelementptr inbounds float* %tmp4972, i64 1
+  %tmp4974 = getelementptr inbounds float* %tmp4973, i64 1
+  %tmp4975 = getelementptr inbounds float* %tmp4974, i64 1
+  %tmp4976 = getelementptr inbounds float* %tmp4975, i64 1
+  %tmp4977 = getelementptr inbounds float* %tmp4976, i64 1
+  %tmp4978 = getelementptr inbounds float* %tmp4977, i64 1
+  %tmp4979 = getelementptr inbounds float* %tmp4978, i64 1
+  %tmp4980 = getelementptr inbounds float* %tmp4979, i64 1
+  %tmp4981 = getelementptr inbounds float* %tmp4980, i64 1
+  %tmp4982 = getelementptr inbounds float* %tmp4981, i64 1
+  %tmp4983 = getelementptr inbounds float* %tmp4982, i64 1
+  %tmp4984 = getelementptr inbounds float* %tmp4983, i64 1
+  %tmp4985 = getelementptr inbounds float* %tmp4984, i64 1
+  %tmp4986 = getelementptr inbounds float* %tmp4985, i64 1
+  %tmp4987 = getelementptr inbounds float* %tmp4986, i64 1
+  %tmp4988 = getelementptr inbounds float* %tmp4987, i64 1
+  %tmp4989 = getelementptr inbounds float* %tmp4988, i64 1
+  %tmp4990 = getelementptr inbounds float* %tmp4989, i64 1
+  %tmp4991 = getelementptr inbounds float* %tmp4990, i64 1
+  %tmp4992 = getelementptr inbounds float* %tmp4991, i64 1
+  %tmp4993 = getelementptr inbounds float* %tmp4992, i64 1
+  %tmp4994 = getelementptr inbounds float* %tmp4993, i64 1
+  %tmp4995 = getelementptr inbounds float* %tmp4994, i64 1
+  %tmp4996 = getelementptr inbounds float* %tmp4995, i64 1
+  %tmp4997 = getelementptr inbounds float* %tmp4996, i64 1
+  %tmp4998 = getelementptr inbounds float* %tmp4997, i64 1
+  %tmp4999 = getelementptr inbounds float* %tmp4998, i64 1
+  %tmp5000 = getelementptr inbounds float* %tmp4999, i64 1
+  %tmp5001 = getelementptr inbounds float* %tmp5000, i64 1
+  %tmp5002 = getelementptr inbounds float* %tmp5001, i64 1
+  %tmp5003 = getelementptr inbounds float* %tmp5002, i64 1
+  %tmp5004 = getelementptr inbounds float* %tmp5003, i64 1
+  %tmp5005 = getelementptr inbounds float* %tmp5004, i64 1
+  %tmp5006 = getelementptr inbounds float* %tmp5005, i64 1
+  %tmp5007 = getelementptr inbounds float* %tmp5006, i64 1
+  %tmp5008 = getelementptr inbounds float* %tmp5007, i64 1
+  %tmp5009 = getelementptr inbounds float* %tmp5008, i64 1
+  %tmp5010 = getelementptr inbounds float* %tmp5009, i64 1
+  %tmp5011 = getelementptr inbounds float* %tmp5010, i64 1
+  %tmp5012 = getelementptr inbounds float* %tmp5011, i64 1
+  %tmp5013 = getelementptr inbounds float* %tmp5012, i64 1
+  %tmp5014 = getelementptr inbounds float* %tmp5013, i64 1
+  %tmp5015 = getelementptr inbounds float* %tmp5014, i64 1
+  %tmp5016 = getelementptr inbounds float* %tmp5015, i64 1
+  %tmp5017 = getelementptr inbounds float* %tmp5016, i64 1
+  %tmp5018 = getelementptr inbounds float* %tmp5017, i64 1
+  %tmp5019 = getelementptr inbounds float* %tmp5018, i64 1
+  %tmp5020 = getelementptr inbounds float* %tmp5019, i64 1
+  %tmp5021 = getelementptr inbounds float* %tmp5020, i64 1
+  %tmp5022 = getelementptr inbounds float* %tmp5021, i64 1
+  %tmp5023 = getelementptr inbounds float* %tmp5022, i64 1
+  %tmp5024 = getelementptr inbounds float* %tmp5023, i64 1
+  %tmp5025 = getelementptr inbounds float* %tmp5024, i64 1
+  %tmp5026 = getelementptr inbounds float* %tmp5025, i64 1
+  %tmp5027 = getelementptr inbounds float* %tmp5026, i64 1
+  %tmp5028 = getelementptr inbounds float* %tmp5027, i64 1
+  %tmp5029 = getelementptr inbounds float* %tmp5028, i64 1
+  %tmp5030 = getelementptr inbounds float* %tmp5029, i64 1
+  %tmp5031 = getelementptr inbounds float* %tmp5030, i64 1
+  %tmp5032 = getelementptr inbounds float* %tmp5031, i64 1
+  %tmp5033 = getelementptr inbounds float* %tmp5032, i64 1
+  %tmp5034 = getelementptr inbounds float* %tmp5033, i64 1
+  %tmp5035 = getelementptr inbounds float* %tmp5034, i64 1
+  %tmp5036 = getelementptr inbounds float* %tmp5035, i64 1
+  %tmp5037 = getelementptr inbounds float* %tmp5036, i64 1
+  %tmp5038 = getelementptr inbounds float* %tmp5037, i64 1
+  %tmp5039 = getelementptr inbounds float* %tmp5038, i64 1
+  %tmp5040 = getelementptr inbounds float* %tmp5039, i64 1
+  %tmp5041 = getelementptr inbounds float* %tmp5040, i64 1
+  %tmp5042 = getelementptr inbounds float* %tmp5041, i64 1
+  %tmp5043 = getelementptr inbounds float* %tmp5042, i64 1
+  %tmp5044 = getelementptr inbounds float* %tmp5043, i64 1
+  %tmp5045 = getelementptr inbounds float* %tmp5044, i64 1
+  %tmp5046 = getelementptr inbounds float* %tmp5045, i64 1
+  %tmp5047 = getelementptr inbounds float* %tmp5046, i64 1
+  %tmp5048 = getelementptr inbounds float* %tmp5047, i64 1
+  %tmp5049 = getelementptr inbounds float* %tmp5048, i64 1
+  %tmp5050 = getelementptr inbounds float* %tmp5049, i64 1
+  %tmp5051 = getelementptr inbounds float* %tmp5050, i64 1
+  %tmp5052 = getelementptr inbounds float* %tmp5051, i64 1
+  %tmp5053 = getelementptr inbounds float* %tmp5052, i64 1
+  %tmp5054 = getelementptr inbounds float* %tmp5053, i64 1
+  %tmp5055 = getelementptr inbounds float* %tmp5054, i64 1
+  %tmp5056 = getelementptr inbounds float* %tmp5055, i64 1
+  %tmp5057 = getelementptr inbounds float* %tmp5056, i64 1
+  %tmp5058 = getelementptr inbounds float* %tmp5057, i64 1
+  %tmp5059 = getelementptr inbounds float* %tmp5058, i64 1
+  %tmp5060 = getelementptr inbounds float* %tmp5059, i64 1
+  %tmp5061 = getelementptr inbounds float* %tmp5060, i64 1
+  %tmp5062 = getelementptr inbounds float* %tmp5061, i64 1
+  %tmp5063 = getelementptr inbounds float* %tmp5062, i64 1
+  %tmp5064 = getelementptr inbounds float* %tmp5063, i64 1
+  %tmp5065 = getelementptr inbounds float* %tmp5064, i64 1
+  %tmp5066 = getelementptr inbounds float* %tmp5065, i64 1
+  %tmp5067 = getelementptr inbounds float* %tmp5066, i64 1
+  %tmp5068 = getelementptr inbounds float* %tmp5067, i64 1
+  %tmp5069 = getelementptr inbounds float* %tmp5068, i64 1
+  %tmp5070 = getelementptr inbounds float* %tmp5069, i64 1
+  %tmp5071 = getelementptr inbounds float* %tmp5070, i64 1
+  %tmp5072 = getelementptr inbounds float* %tmp5071, i64 1
+  %tmp5073 = getelementptr inbounds float* %tmp5072, i64 1
+  %tmp5074 = getelementptr inbounds float* %tmp5073, i64 1
+  %tmp5075 = getelementptr inbounds float* %tmp5074, i64 1
+  %tmp5076 = getelementptr inbounds float* %tmp5075, i64 1
+  %tmp5077 = getelementptr inbounds float* %tmp5076, i64 1
+  %tmp5078 = getelementptr inbounds float* %tmp5077, i64 1
+  %tmp5079 = getelementptr inbounds float* %tmp5078, i64 1
+  %tmp5080 = getelementptr inbounds float* %tmp5079, i64 1
+  %tmp5081 = getelementptr inbounds float* %tmp5080, i64 1
+  %tmp5082 = getelementptr inbounds float* %tmp5081, i64 1
+  %tmp5083 = getelementptr inbounds float* %tmp5082, i64 1
+  %tmp5084 = getelementptr inbounds float* %tmp5083, i64 1
+  %tmp5085 = getelementptr inbounds float* %tmp5084, i64 1
+  %tmp5086 = getelementptr inbounds float* %tmp5085, i64 1
+  %tmp5087 = getelementptr inbounds float* %tmp5086, i64 1
+  %tmp5088 = getelementptr inbounds float* %tmp5087, i64 1
+  %tmp5089 = getelementptr inbounds float* %tmp5088, i64 1
+  %tmp5090 = getelementptr inbounds float* %tmp5089, i64 1
+  %tmp5091 = getelementptr inbounds float* %tmp5090, i64 1
+  %tmp5092 = getelementptr inbounds float* %tmp5091, i64 1
+  %tmp5093 = getelementptr inbounds float* %tmp5092, i64 1
+  %tmp5094 = getelementptr inbounds float* %tmp5093, i64 1
+  %tmp5095 = getelementptr inbounds float* %tmp5094, i64 1
+  %tmp5096 = getelementptr inbounds float* %tmp5095, i64 1
+  %tmp5097 = getelementptr inbounds float* %tmp5096, i64 1
+  %tmp5098 = getelementptr inbounds float* %tmp5097, i64 1
+  %tmp5099 = getelementptr inbounds float* %tmp5098, i64 1
+  %tmp5100 = getelementptr inbounds float* %tmp5099, i64 1
+  %tmp5101 = getelementptr inbounds float* %tmp5100, i64 1
+  %tmp5102 = getelementptr inbounds float* %tmp5101, i64 1
+  %tmp5103 = getelementptr inbounds float* %tmp5102, i64 1
+  %tmp5104 = getelementptr inbounds float* %tmp5103, i64 1
+  %tmp5105 = getelementptr inbounds float* %tmp5104, i64 1
+  %tmp5106 = getelementptr inbounds float* %tmp5105, i64 1
+  %tmp5107 = getelementptr inbounds float* %tmp5106, i64 1
+  %tmp5108 = getelementptr inbounds float* %tmp5107, i64 1
+  %tmp5109 = getelementptr inbounds float* %tmp5108, i64 1
+  %tmp5110 = getelementptr inbounds float* %tmp5109, i64 1
+  %tmp5111 = getelementptr inbounds float* %tmp5110, i64 1
+  %tmp5112 = getelementptr inbounds float* %tmp5111, i64 1
+  %tmp5113 = getelementptr inbounds float* %tmp5112, i64 1
+  %tmp5114 = getelementptr inbounds float* %tmp5113, i64 1
+  %tmp5115 = getelementptr inbounds float* %tmp5114, i64 1
+  %tmp5116 = getelementptr inbounds float* %tmp5115, i64 1
+  %tmp5117 = getelementptr inbounds float* %tmp5116, i64 1
+  %tmp5118 = getelementptr inbounds float* %tmp5117, i64 1
+  %tmp5119 = getelementptr inbounds float* %tmp5118, i64 1
+  %tmp5120 = getelementptr inbounds float* %tmp5119, i64 1
+  %tmp5121 = getelementptr inbounds float* %tmp5120, i64 1
+  %tmp5122 = getelementptr inbounds float* %tmp5121, i64 1
+  %tmp5123 = getelementptr inbounds float* %tmp5122, i64 1
+  %tmp5124 = getelementptr inbounds float* %tmp5123, i64 1
+  %tmp5125 = getelementptr inbounds float* %tmp5124, i64 1
+  %tmp5126 = getelementptr inbounds float* %tmp5125, i64 1
+  %tmp5127 = getelementptr inbounds float* %tmp5126, i64 1
+  %tmp5128 = getelementptr inbounds float* %tmp5127, i64 1
+  %tmp5129 = getelementptr inbounds float* %tmp5128, i64 1
+  %tmp5130 = getelementptr inbounds float* %tmp5129, i64 1
+  %tmp5131 = getelementptr inbounds float* %tmp5130, i64 1
+  %tmp5132 = getelementptr inbounds float* %tmp5131, i64 1
+  %tmp5133 = getelementptr inbounds float* %tmp5132, i64 1
+  %tmp5134 = getelementptr inbounds float* %tmp5133, i64 1
+  %tmp5135 = getelementptr inbounds float* %tmp5134, i64 1
+  %tmp5136 = getelementptr inbounds float* %tmp5135, i64 1
+  %tmp5137 = getelementptr inbounds float* %tmp5136, i64 1
+  %tmp5138 = getelementptr inbounds float* %tmp5137, i64 1
+  %tmp5139 = getelementptr inbounds float* %tmp5138, i64 1
+  %tmp5140 = getelementptr inbounds float* %tmp5139, i64 1
+  %tmp5141 = getelementptr inbounds float* %tmp5140, i64 1
+  %tmp5142 = getelementptr inbounds float* %tmp5141, i64 1
+  %tmp5143 = getelementptr inbounds float* %tmp5142, i64 1
+  %tmp5144 = getelementptr inbounds float* %tmp5143, i64 1
+  %tmp5145 = getelementptr inbounds float* %tmp5144, i64 1
+  %tmp5146 = getelementptr inbounds float* %tmp5145, i64 1
+  %tmp5147 = getelementptr inbounds float* %tmp5146, i64 1
+  %tmp5148 = getelementptr inbounds float* %tmp5147, i64 1
+  %tmp5149 = getelementptr inbounds float* %tmp5148, i64 1
+  %tmp5150 = getelementptr inbounds float* %tmp5149, i64 1
+  %tmp5151 = getelementptr inbounds float* %tmp5150, i64 1
+  %tmp5152 = getelementptr inbounds float* %tmp5151, i64 1
+  %tmp5153 = getelementptr inbounds float* %tmp5152, i64 1
+  %tmp5154 = getelementptr inbounds float* %tmp5153, i64 1
+  %tmp5155 = getelementptr inbounds float* %tmp5154, i64 1
+  %tmp5156 = getelementptr inbounds float* %tmp5155, i64 1
+  %tmp5157 = getelementptr inbounds float* %tmp5156, i64 1
+  %tmp5158 = getelementptr inbounds float* %tmp5157, i64 1
+  %tmp5159 = getelementptr inbounds float* %tmp5158, i64 1
+  %tmp5160 = getelementptr inbounds float* %tmp5159, i64 1
+  %tmp5161 = getelementptr inbounds float* %tmp5160, i64 1
+  %tmp5162 = getelementptr inbounds float* %tmp5161, i64 1
+  %tmp5163 = getelementptr inbounds float* %tmp5162, i64 1
+  %tmp5164 = getelementptr inbounds float* %tmp5163, i64 1
+  %tmp5165 = getelementptr inbounds float* %tmp5164, i64 1
+  %tmp5166 = getelementptr inbounds float* %tmp5165, i64 1
+  %tmp5167 = getelementptr inbounds float* %tmp5166, i64 1
+  %tmp5168 = getelementptr inbounds float* %tmp5167, i64 1
+  %tmp5169 = getelementptr inbounds float* %tmp5168, i64 1
+  %tmp5170 = getelementptr inbounds float* %tmp5169, i64 1
+  %tmp5171 = getelementptr inbounds float* %tmp5170, i64 1
+  %tmp5172 = getelementptr inbounds float* %tmp5171, i64 1
+  %tmp5173 = getelementptr inbounds float* %tmp5172, i64 1
+  %tmp5174 = getelementptr inbounds float* %tmp5173, i64 1
+  %tmp5175 = getelementptr inbounds float* %tmp5174, i64 1
+  %tmp5176 = getelementptr inbounds float* %tmp5175, i64 1
+  %tmp5177 = getelementptr inbounds float* %tmp5176, i64 1
+  %tmp5178 = getelementptr inbounds float* %tmp5177, i64 1
+  %tmp5179 = getelementptr inbounds float* %tmp5178, i64 1
+  %tmp5180 = getelementptr inbounds float* %tmp5179, i64 1
+  %tmp5181 = getelementptr inbounds float* %tmp5180, i64 1
+  %tmp5182 = getelementptr inbounds float* %tmp5181, i64 1
+  %tmp5183 = getelementptr inbounds float* %tmp5182, i64 1
+  %tmp5184 = getelementptr inbounds float* %tmp5183, i64 1
+  %tmp5185 = getelementptr inbounds float* %tmp5184, i64 1
+  %tmp5186 = getelementptr inbounds float* %tmp5185, i64 1
+  %tmp5187 = getelementptr inbounds float* %tmp5186, i64 1
+  %tmp5188 = getelementptr inbounds float* %tmp5187, i64 1
+  %tmp5189 = getelementptr inbounds float* %tmp5188, i64 1
+  %tmp5190 = getelementptr inbounds float* %tmp5189, i64 1
+  %tmp5191 = getelementptr inbounds float* %tmp5190, i64 1
+  %tmp5192 = getelementptr inbounds float* %tmp5191, i64 1
+  %tmp5193 = getelementptr inbounds float* %tmp5192, i64 1
+  %tmp5194 = getelementptr inbounds float* %tmp5193, i64 1
+  %tmp5195 = getelementptr inbounds float* %tmp5194, i64 1
+  %tmp5196 = getelementptr inbounds float* %tmp5195, i64 1
+  %tmp5197 = getelementptr inbounds float* %tmp5196, i64 1
+  %tmp5198 = getelementptr inbounds float* %tmp5197, i64 1
+  %tmp5199 = getelementptr inbounds float* %tmp5198, i64 1
+  %tmp5200 = getelementptr inbounds float* %tmp5199, i64 1
+  %tmp5201 = getelementptr inbounds float* %tmp5200, i64 1
+  %tmp5202 = getelementptr inbounds float* %tmp5201, i64 1
+  %tmp5203 = getelementptr inbounds float* %tmp5202, i64 1
+  %tmp5204 = getelementptr inbounds float* %tmp5203, i64 1
+  %tmp5205 = getelementptr inbounds float* %tmp5204, i64 1
+  %tmp5206 = getelementptr inbounds float* %tmp5205, i64 1
+  %tmp5207 = getelementptr inbounds float* %tmp5206, i64 1
+  %tmp5208 = getelementptr inbounds float* %tmp5207, i64 1
+  %tmp5209 = getelementptr inbounds float* %tmp5208, i64 1
+  %tmp5210 = getelementptr inbounds float* %tmp5209, i64 1
+  %tmp5211 = getelementptr inbounds float* %tmp5210, i64 1
+  %tmp5212 = getelementptr inbounds float* %tmp5211, i64 1
+  %tmp5213 = getelementptr inbounds float* %tmp5212, i64 1
+  %tmp5214 = getelementptr inbounds float* %tmp5213, i64 1
+  %tmp5215 = getelementptr inbounds float* %tmp5214, i64 1
+  %tmp5216 = getelementptr inbounds float* %tmp5215, i64 1
+  %tmp5217 = getelementptr inbounds float* %tmp5216, i64 1
+  %tmp5218 = getelementptr inbounds float* %tmp5217, i64 1
+  %tmp5219 = getelementptr inbounds float* %tmp5218, i64 1
+  %tmp5220 = getelementptr inbounds float* %tmp5219, i64 1
+  %tmp5221 = getelementptr inbounds float* %tmp5220, i64 1
+  %tmp5222 = getelementptr inbounds float* %tmp5221, i64 1
+  %tmp5223 = getelementptr inbounds float* %tmp5222, i64 1
+  %tmp5224 = getelementptr inbounds float* %tmp5223, i64 1
+  %tmp5225 = getelementptr inbounds float* %tmp5224, i64 1
+  %tmp5226 = getelementptr inbounds float* %tmp5225, i64 1
+  %tmp5227 = getelementptr inbounds float* %tmp5226, i64 1
+  %tmp5228 = getelementptr inbounds float* %tmp5227, i64 1
+  %tmp5229 = getelementptr inbounds float* %tmp5228, i64 1
+  %tmp5230 = getelementptr inbounds float* %tmp5229, i64 1
+  %tmp5231 = getelementptr inbounds float* %tmp5230, i64 1
+  %tmp5232 = getelementptr inbounds float* %tmp5231, i64 1
+  %tmp5233 = getelementptr inbounds float* %tmp5232, i64 1
+  %tmp5234 = getelementptr inbounds float* %tmp5233, i64 1
+  %tmp5235 = getelementptr inbounds float* %tmp5234, i64 1
+  %tmp5236 = getelementptr inbounds float* %tmp5235, i64 1
+  %tmp5237 = getelementptr inbounds float* %tmp5236, i64 1
+  %tmp5238 = getelementptr inbounds float* %tmp5237, i64 1
+  %tmp5239 = getelementptr inbounds float* %tmp5238, i64 1
+  %tmp5240 = getelementptr inbounds float* %tmp5239, i64 1
+  %tmp5241 = getelementptr inbounds float* %tmp5240, i64 1
+  %tmp5242 = getelementptr inbounds float* %tmp5241, i64 1
+  %tmp5243 = getelementptr inbounds float* %tmp5242, i64 1
+  %tmp5244 = getelementptr inbounds float* %tmp5243, i64 1
+  %tmp5245 = getelementptr inbounds float* %tmp5244, i64 1
+  %tmp5246 = getelementptr inbounds float* %tmp5245, i64 1
+  %tmp5247 = getelementptr inbounds float* %tmp5246, i64 1
+  %tmp5248 = getelementptr inbounds float* %tmp5247, i64 1
+  %tmp5249 = getelementptr inbounds float* %tmp5248, i64 1
+  %tmp5250 = getelementptr inbounds float* %tmp5249, i64 1
+  %tmp5251 = getelementptr inbounds float* %tmp5250, i64 1
+  %tmp5252 = getelementptr inbounds float* %tmp5251, i64 1
+  %tmp5253 = getelementptr inbounds float* %tmp5252, i64 1
+  %tmp5254 = getelementptr inbounds float* %tmp5253, i64 1
+  %tmp5255 = getelementptr inbounds float* %tmp5254, i64 1
+  %tmp5256 = getelementptr inbounds float* %tmp5255, i64 1
+  %tmp5257 = getelementptr inbounds float* %tmp5256, i64 1
+  %tmp5258 = getelementptr inbounds float* %tmp5257, i64 1
+  %tmp5259 = getelementptr inbounds float* %tmp5258, i64 1
+  %tmp5260 = getelementptr inbounds float* %tmp5259, i64 1
+  %tmp5261 = getelementptr inbounds float* %tmp5260, i64 1
+  %tmp5262 = getelementptr inbounds float* %tmp5261, i64 1
+  %tmp5263 = getelementptr inbounds float* %tmp5262, i64 1
+  %tmp5264 = getelementptr inbounds float* %tmp5263, i64 1
+  %tmp5265 = getelementptr inbounds float* %tmp5264, i64 1
+  %tmp5266 = getelementptr inbounds float* %tmp5265, i64 1
+  %tmp5267 = getelementptr inbounds float* %tmp5266, i64 1
+  %tmp5268 = getelementptr inbounds float* %tmp5267, i64 1
+  %tmp5269 = getelementptr inbounds float* %tmp5268, i64 1
+  %tmp5270 = getelementptr inbounds float* %tmp5269, i64 1
+  %tmp5271 = getelementptr inbounds float* %tmp5270, i64 1
+  %tmp5272 = getelementptr inbounds float* %tmp5271, i64 1
+  %tmp5273 = getelementptr inbounds float* %tmp5272, i64 1
+  %tmp5274 = getelementptr inbounds float* %tmp5273, i64 1
+  %tmp5275 = getelementptr inbounds float* %tmp5274, i64 1
+  %tmp5276 = getelementptr inbounds float* %tmp5275, i64 1
+  %tmp5277 = getelementptr inbounds float* %tmp5276, i64 1
+  %tmp5278 = getelementptr inbounds float* %tmp5277, i64 1
+  %tmp5279 = getelementptr inbounds float* %tmp5278, i64 1
+  %tmp5280 = getelementptr inbounds float* %tmp5279, i64 1
+  %tmp5281 = getelementptr inbounds float* %tmp5280, i64 1
+  %tmp5282 = getelementptr inbounds float* %tmp5281, i64 1
+  %tmp5283 = getelementptr inbounds float* %tmp5282, i64 1
+  %tmp5284 = getelementptr inbounds float* %tmp5283, i64 1
+  %tmp5285 = getelementptr inbounds float* %tmp5284, i64 1
+  %tmp5286 = getelementptr inbounds float* %tmp5285, i64 1
+  %tmp5287 = getelementptr inbounds float* %tmp5286, i64 1
+  %tmp5288 = getelementptr inbounds float* %tmp5287, i64 1
+  %tmp5289 = getelementptr inbounds float* %tmp5288, i64 1
+  %tmp5290 = getelementptr inbounds float* %tmp5289, i64 1
+  %tmp5291 = getelementptr inbounds float* %tmp5290, i64 1
+  %tmp5292 = getelementptr inbounds float* %tmp5291, i64 1
+  %tmp5293 = getelementptr inbounds float* %tmp5292, i64 1
+  %tmp5294 = getelementptr inbounds float* %tmp5293, i64 1
+  %tmp5295 = getelementptr inbounds float* %tmp5294, i64 1
+  %tmp5296 = getelementptr inbounds float* %tmp5295, i64 1
+  %tmp5297 = getelementptr inbounds float* %tmp5296, i64 1
+  %tmp5298 = getelementptr inbounds float* %tmp5297, i64 1
+  %tmp5299 = getelementptr inbounds float* %tmp5298, i64 1
+  %tmp5300 = getelementptr inbounds float* %tmp5299, i64 1
+  %tmp5301 = getelementptr inbounds float* %tmp5300, i64 1
+  %tmp5302 = getelementptr inbounds float* %tmp5301, i64 1
+  %tmp5303 = getelementptr inbounds float* %tmp5302, i64 1
+  %tmp5304 = getelementptr inbounds float* %tmp5303, i64 1
+  %tmp5305 = getelementptr inbounds float* %tmp5304, i64 1
+  %tmp5306 = getelementptr inbounds float* %tmp5305, i64 1
+  %tmp5307 = getelementptr inbounds float* %tmp5306, i64 1
+  %tmp5308 = getelementptr inbounds float* %tmp5307, i64 1
+  %tmp5309 = getelementptr inbounds float* %tmp5308, i64 1
+  %tmp5310 = getelementptr inbounds float* %tmp5309, i64 1
+  %tmp5311 = getelementptr inbounds float* %tmp5310, i64 1
+  %tmp5312 = getelementptr inbounds float* %tmp5311, i64 1
+  %tmp5313 = getelementptr inbounds float* %tmp5312, i64 1
+  %tmp5314 = getelementptr inbounds float* %tmp5313, i64 1
+  %tmp5315 = getelementptr inbounds float* %tmp5314, i64 1
+  %tmp5316 = getelementptr inbounds float* %tmp5315, i64 1
+  %tmp5317 = getelementptr inbounds float* %tmp5316, i64 1
+  %tmp5318 = getelementptr inbounds float* %tmp5317, i64 1
+  %tmp5319 = getelementptr inbounds float* %tmp5318, i64 1
+  %tmp5320 = getelementptr inbounds float* %tmp5319, i64 1
+  %tmp5321 = getelementptr inbounds float* %tmp5320, i64 1
+  %tmp5322 = getelementptr inbounds float* %tmp5321, i64 1
+  %tmp5323 = getelementptr inbounds float* %tmp5322, i64 1
+  %tmp5324 = getelementptr inbounds float* %tmp5323, i64 1
+  %tmp5325 = getelementptr inbounds float* %tmp5324, i64 1
+  %tmp5326 = getelementptr inbounds float* %tmp5325, i64 1
+  %tmp5327 = getelementptr inbounds float* %tmp5326, i64 1
+  %tmp5328 = getelementptr inbounds float* %tmp5327, i64 1
+  %tmp5329 = getelementptr inbounds float* %tmp5328, i64 1
+  %tmp5330 = getelementptr inbounds float* %tmp5329, i64 1
+  %tmp5331 = getelementptr inbounds float* %tmp5330, i64 1
+  %tmp5332 = getelementptr inbounds float* %tmp5331, i64 1
+  %tmp5333 = getelementptr inbounds float* %tmp5332, i64 1
+  %tmp5334 = getelementptr inbounds float* %tmp5333, i64 1
+  %tmp5335 = getelementptr inbounds float* %tmp5334, i64 1
+  %tmp5336 = getelementptr inbounds float* %tmp5335, i64 1
+  %tmp5337 = getelementptr inbounds float* %tmp5336, i64 1
+  %tmp5338 = getelementptr inbounds float* %tmp5337, i64 1
+  %tmp5339 = getelementptr inbounds float* %tmp5338, i64 1
+  %tmp5340 = getelementptr inbounds float* %tmp5339, i64 1
+  %tmp5341 = getelementptr inbounds float* %tmp5340, i64 1
+  %tmp5342 = getelementptr inbounds float* %tmp5341, i64 1
+  %tmp5343 = getelementptr inbounds float* %tmp5342, i64 1
+  %tmp5344 = getelementptr inbounds float* %tmp5343, i64 1
+  %tmp5345 = getelementptr inbounds float* %tmp5344, i64 1
+  %tmp5346 = getelementptr inbounds float* %tmp5345, i64 1
+  %tmp5347 = getelementptr inbounds float* %tmp5346, i64 1
+  %tmp5348 = getelementptr inbounds float* %tmp5347, i64 1
+  %tmp5349 = getelementptr inbounds float* %tmp5348, i64 1
+  %tmp5350 = getelementptr inbounds float* %tmp5349, i64 1
+  %tmp5351 = getelementptr inbounds float* %tmp5350, i64 1
+  %tmp5352 = getelementptr inbounds float* %tmp5351, i64 1
+  %tmp5353 = getelementptr inbounds float* %tmp5352, i64 1
+  %tmp5354 = getelementptr inbounds float* %tmp5353, i64 1
+  %tmp5355 = getelementptr inbounds float* %tmp5354, i64 1
+  %tmp5356 = getelementptr inbounds float* %tmp5355, i64 1
+  %tmp5357 = getelementptr inbounds float* %tmp5356, i64 1
+  %tmp5358 = getelementptr inbounds float* %tmp5357, i64 1
+  %tmp5359 = getelementptr inbounds float* %tmp5358, i64 1
+  %tmp5360 = getelementptr inbounds float* %tmp5359, i64 1
+  %tmp5361 = getelementptr inbounds float* %tmp5360, i64 1
+  %tmp5362 = getelementptr inbounds float* %tmp5361, i64 1
+  %tmp5363 = getelementptr inbounds float* %tmp5362, i64 1
+  %tmp5364 = getelementptr inbounds float* %tmp5363, i64 1
+  %tmp5365 = getelementptr inbounds float* %tmp5364, i64 1
+  %tmp5366 = getelementptr inbounds float* %tmp5365, i64 1
+  %tmp5367 = getelementptr inbounds float* %tmp5366, i64 1
+  %tmp5368 = getelementptr inbounds float* %tmp5367, i64 1
+  %tmp5369 = getelementptr inbounds float* %tmp5368, i64 1
+  %tmp5370 = getelementptr inbounds float* %tmp5369, i64 1
+  %tmp5371 = getelementptr inbounds float* %tmp5370, i64 1
+  %tmp5372 = getelementptr inbounds float* %tmp5371, i64 1
+  %tmp5373 = getelementptr inbounds float* %tmp5372, i64 1
+  %tmp5374 = getelementptr inbounds float* %tmp5373, i64 1
+  %tmp5375 = getelementptr inbounds float* %tmp5374, i64 1
+  %tmp5376 = getelementptr inbounds float* %tmp5375, i64 1
+  %tmp5377 = getelementptr inbounds float* %tmp5376, i64 1
+  %tmp5378 = getelementptr inbounds float* %tmp5377, i64 1
+  %tmp5379 = getelementptr inbounds float* %tmp5378, i64 1
+  %tmp5380 = getelementptr inbounds float* %tmp5379, i64 1
+  %tmp5381 = getelementptr inbounds float* %tmp5380, i64 1
+  %tmp5382 = getelementptr inbounds float* %tmp5381, i64 1
+  %tmp5383 = getelementptr inbounds float* %tmp5382, i64 1
+  %tmp5384 = getelementptr inbounds float* %tmp5383, i64 1
+  %tmp5385 = getelementptr inbounds float* %tmp5384, i64 1
+  %tmp5386 = getelementptr inbounds float* %tmp5385, i64 1
+  %tmp5387 = getelementptr inbounds float* %tmp5386, i64 1
+  %tmp5388 = getelementptr inbounds float* %tmp5387, i64 1
+  %tmp5389 = getelementptr inbounds float* %tmp5388, i64 1
+  %tmp5390 = getelementptr inbounds float* %tmp5389, i64 1
+  %tmp5391 = getelementptr inbounds float* %tmp5390, i64 1
+  %tmp5392 = getelementptr inbounds float* %tmp5391, i64 1
+  %tmp5393 = getelementptr inbounds float* %tmp5392, i64 1
+  %tmp5394 = getelementptr inbounds float* %tmp5393, i64 1
+  %tmp5395 = getelementptr inbounds float* %tmp5394, i64 1
+  %tmp5396 = getelementptr inbounds float* %tmp5395, i64 1
+  %tmp5397 = getelementptr inbounds float* %tmp5396, i64 1
+  %tmp5398 = getelementptr inbounds float* %tmp5397, i64 1
+  %tmp5399 = getelementptr inbounds float* %tmp5398, i64 1
+  %tmp5400 = getelementptr inbounds float* %tmp5399, i64 1
+  %tmp5401 = getelementptr inbounds float* %tmp5400, i64 1
+  %tmp5402 = getelementptr inbounds float* %tmp5401, i64 1
+  %tmp5403 = getelementptr inbounds float* %tmp5402, i64 1
+  %tmp5404 = getelementptr inbounds float* %tmp5403, i64 1
+  %tmp5405 = getelementptr inbounds float* %tmp5404, i64 1
+  %tmp5406 = getelementptr inbounds float* %tmp5405, i64 1
+  %tmp5407 = getelementptr inbounds float* %tmp5406, i64 1
+  %tmp5408 = getelementptr inbounds float* %tmp5407, i64 1
+  %tmp5409 = getelementptr inbounds float* %tmp5408, i64 1
+  %tmp5410 = getelementptr inbounds float* %tmp5409, i64 1
+  %tmp5411 = getelementptr inbounds float* %tmp5410, i64 1
+  %tmp5412 = getelementptr inbounds float* %tmp5411, i64 1
+  %tmp5413 = getelementptr inbounds float* %tmp5412, i64 1
+  %tmp5414 = getelementptr inbounds float* %tmp5413, i64 1
+  %tmp5415 = getelementptr inbounds float* %tmp5414, i64 1
+  %tmp5416 = getelementptr inbounds float* %tmp5415, i64 1
+  %tmp5417 = getelementptr inbounds float* %tmp5416, i64 1
+  %tmp5418 = getelementptr inbounds float* %tmp5417, i64 1
+  %tmp5419 = getelementptr inbounds float* %tmp5418, i64 1
+  %tmp5420 = getelementptr inbounds float* %tmp5419, i64 1
+  %tmp5421 = getelementptr inbounds float* %tmp5420, i64 1
+  %tmp5422 = getelementptr inbounds float* %tmp5421, i64 1
+  %tmp5423 = getelementptr inbounds float* %tmp5422, i64 1
+  %tmp5424 = getelementptr inbounds float* %tmp5423, i64 1
+  %tmp5425 = getelementptr inbounds float* %tmp5424, i64 1
+  %tmp5426 = getelementptr inbounds float* %tmp5425, i64 1
+  %tmp5427 = getelementptr inbounds float* %tmp5426, i64 1
+  %tmp5428 = getelementptr inbounds float* %tmp5427, i64 1
+  %tmp5429 = getelementptr inbounds float* %tmp5428, i64 1
+  %tmp5430 = getelementptr inbounds float* %tmp5429, i64 1
+  %tmp5431 = getelementptr inbounds float* %tmp5430, i64 1
+  %tmp5432 = getelementptr inbounds float* %tmp5431, i64 1
+  %tmp5433 = getelementptr inbounds float* %tmp5432, i64 1
+  %tmp5434 = getelementptr inbounds float* %tmp5433, i64 1
+  %tmp5435 = getelementptr inbounds float* %tmp5434, i64 1
+  %tmp5436 = getelementptr inbounds float* %tmp5435, i64 1
+  %tmp5437 = getelementptr inbounds float* %tmp5436, i64 1
+  %tmp5438 = getelementptr inbounds float* %tmp5437, i64 1
+  %tmp5439 = getelementptr inbounds float* %tmp5438, i64 1
+  %tmp5440 = getelementptr inbounds float* %tmp5439, i64 1
+  %tmp5441 = getelementptr inbounds float* %tmp5440, i64 1
+  %tmp5442 = getelementptr inbounds float* %tmp5441, i64 1
+  %tmp5443 = getelementptr inbounds float* %tmp5442, i64 1
+  %tmp5444 = getelementptr inbounds float* %tmp5443, i64 1
+  %tmp5445 = getelementptr inbounds float* %tmp5444, i64 1
+  %tmp5446 = getelementptr inbounds float* %tmp5445, i64 1
+  %tmp5447 = getelementptr inbounds float* %tmp5446, i64 1
+  %tmp5448 = getelementptr inbounds float* %tmp5447, i64 1
+  %tmp5449 = getelementptr inbounds float* %tmp5448, i64 1
+  %tmp5450 = getelementptr inbounds float* %tmp5449, i64 1
+  %tmp5451 = getelementptr inbounds float* %tmp5450, i64 1
+  %tmp5452 = getelementptr inbounds float* %tmp5451, i64 1
+  %tmp5453 = getelementptr inbounds float* %tmp5452, i64 1
+  %tmp5454 = getelementptr inbounds float* %tmp5453, i64 1
+  %tmp5455 = getelementptr inbounds float* %tmp5454, i64 1
+  %tmp5456 = getelementptr inbounds float* %tmp5455, i64 1
+  %tmp5457 = getelementptr inbounds float* %tmp5456, i64 1
+  %tmp5458 = getelementptr inbounds float* %tmp5457, i64 1
+  %tmp5459 = getelementptr inbounds float* %tmp5458, i64 1
+  %tmp5460 = getelementptr inbounds float* %tmp5459, i64 1
+  %tmp5461 = getelementptr inbounds float* %tmp5460, i64 1
+  %tmp5462 = getelementptr inbounds float* %tmp5461, i64 1
+  %tmp5463 = getelementptr inbounds float* %tmp5462, i64 1
+  %tmp5464 = getelementptr inbounds float* %tmp5463, i64 1
+  %tmp5465 = getelementptr inbounds float* %tmp5464, i64 1
+  %tmp5466 = getelementptr inbounds float* %tmp5465, i64 1
+  %tmp5467 = getelementptr inbounds float* %tmp5466, i64 1
+  %tmp5468 = getelementptr inbounds float* %tmp5467, i64 1
+  %tmp5469 = getelementptr inbounds float* %tmp5468, i64 1
+  %tmp5470 = getelementptr inbounds float* %tmp5469, i64 1
+  %tmp5471 = getelementptr inbounds float* %tmp5470, i64 1
+  %tmp5472 = getelementptr inbounds float* %tmp5471, i64 1
+  %tmp5473 = getelementptr inbounds float* %tmp5472, i64 1
+  %tmp5474 = getelementptr inbounds float* %tmp5473, i64 1
+  %tmp5475 = getelementptr inbounds float* %tmp5474, i64 1
+  %tmp5476 = getelementptr inbounds float* %tmp5475, i64 1
+  %tmp5477 = getelementptr inbounds float* %tmp5476, i64 1
+  %tmp5478 = getelementptr inbounds float* %tmp5477, i64 1
+  %tmp5479 = getelementptr inbounds float* %tmp5478, i64 1
+  %tmp5480 = getelementptr inbounds float* %tmp5479, i64 1
+  %tmp5481 = getelementptr inbounds float* %tmp5480, i64 1
+  %tmp5482 = getelementptr inbounds float* %tmp5481, i64 1
+  %tmp5483 = getelementptr inbounds float* %tmp5482, i64 1
+  %tmp5484 = getelementptr inbounds float* %tmp5483, i64 1
+  %tmp5485 = getelementptr inbounds float* %tmp5484, i64 1
+  %tmp5486 = getelementptr inbounds float* %tmp5485, i64 1
+  %tmp5487 = getelementptr inbounds float* %tmp5486, i64 1
+  %tmp5488 = getelementptr inbounds float* %tmp5487, i64 1
+  %tmp5489 = getelementptr inbounds float* %tmp5488, i64 1
+  %tmp5490 = getelementptr inbounds float* %tmp5489, i64 1
+  %tmp5491 = getelementptr inbounds float* %tmp5490, i64 1
+  %tmp5492 = getelementptr inbounds float* %tmp5491, i64 1
+  %tmp5493 = getelementptr inbounds float* %tmp5492, i64 1
+  %tmp5494 = getelementptr inbounds float* %tmp5493, i64 1
+  %tmp5495 = getelementptr inbounds float* %tmp5494, i64 1
+  %tmp5496 = getelementptr inbounds float* %tmp5495, i64 1
+  %tmp5497 = getelementptr inbounds float* %tmp5496, i64 1
+  %tmp5498 = getelementptr inbounds float* %tmp5497, i64 1
+  %tmp5499 = getelementptr inbounds float* %tmp5498, i64 1
+  %tmp5500 = getelementptr inbounds float* %tmp5499, i64 1
+  %tmp5501 = getelementptr inbounds float* %tmp5500, i64 1
+  %tmp5502 = getelementptr inbounds float* %tmp5501, i64 1
+  %tmp5503 = getelementptr inbounds float* %tmp5502, i64 1
+  %tmp5504 = getelementptr inbounds float* %tmp5503, i64 1
+  %tmp5505 = getelementptr inbounds float* %tmp5504, i64 1
+  %tmp5506 = getelementptr inbounds float* %tmp5505, i64 1
+  %tmp5507 = getelementptr inbounds float* %tmp5506, i64 1
+  %tmp5508 = getelementptr inbounds float* %tmp5507, i64 1
+  %tmp5509 = getelementptr inbounds float* %tmp5508, i64 1
+  %tmp5510 = getelementptr inbounds float* %tmp5509, i64 1
+  %tmp5511 = getelementptr inbounds float* %tmp5510, i64 1
+  %tmp5512 = getelementptr inbounds float* %tmp5511, i64 1
+  %tmp5513 = getelementptr inbounds float* %tmp5512, i64 1
+  %tmp5514 = getelementptr inbounds float* %tmp5513, i64 1
+  %tmp5515 = getelementptr inbounds float* %tmp5514, i64 1
+  %tmp5516 = getelementptr inbounds float* %tmp5515, i64 1
+  %tmp5517 = getelementptr inbounds float* %tmp5516, i64 1
+  %tmp5518 = getelementptr inbounds float* %tmp5517, i64 1
+  %tmp5519 = getelementptr inbounds float* %tmp5518, i64 1
+  %tmp5520 = getelementptr inbounds float* %tmp5519, i64 1
+  %tmp5521 = getelementptr inbounds float* %tmp5520, i64 1
+  %tmp5522 = getelementptr inbounds float* %tmp5521, i64 1
+  %tmp5523 = getelementptr inbounds float* %tmp5522, i64 1
+  %tmp5524 = getelementptr inbounds float* %tmp5523, i64 1
+  %tmp5525 = getelementptr inbounds float* %tmp5524, i64 1
+  %tmp5526 = getelementptr inbounds float* %tmp5525, i64 1
+  %tmp5527 = getelementptr inbounds float* %tmp5526, i64 1
+  %tmp5528 = getelementptr inbounds float* %tmp5527, i64 1
+  %tmp5529 = getelementptr inbounds float* %tmp5528, i64 1
+  %tmp5530 = getelementptr inbounds float* %tmp5529, i64 1
+  %tmp5531 = getelementptr inbounds float* %tmp5530, i64 1
+  %tmp5532 = getelementptr inbounds float* %tmp5531, i64 1
+  %tmp5533 = getelementptr inbounds float* %tmp5532, i64 1
+  %tmp5534 = getelementptr inbounds float* %tmp5533, i64 1
+  %tmp5535 = getelementptr inbounds float* %tmp5534, i64 1
+  %tmp5536 = getelementptr inbounds float* %tmp5535, i64 1
+  %tmp5537 = getelementptr inbounds float* %tmp5536, i64 1
+  %tmp5538 = getelementptr inbounds float* %tmp5537, i64 1
+  %tmp5539 = getelementptr inbounds float* %tmp5538, i64 1
+  %tmp5540 = getelementptr inbounds float* %tmp5539, i64 1
+  %tmp5541 = getelementptr inbounds float* %tmp5540, i64 1
+  %tmp5542 = getelementptr inbounds float* %tmp5541, i64 1
+  %tmp5543 = getelementptr inbounds float* %tmp5542, i64 1
+  %tmp5544 = getelementptr inbounds float* %tmp5543, i64 1
+  %tmp5545 = getelementptr inbounds float* %tmp5544, i64 1
+  %tmp5546 = getelementptr inbounds float* %tmp5545, i64 1
+  %tmp5547 = getelementptr inbounds float* %tmp5546, i64 1
+  %tmp5548 = getelementptr inbounds float* %tmp5547, i64 1
+  %tmp5549 = getelementptr inbounds float* %tmp5548, i64 1
+  %tmp5550 = getelementptr inbounds float* %tmp5549, i64 1
+  %tmp5551 = getelementptr inbounds float* %tmp5550, i64 1
+  %tmp5552 = getelementptr inbounds float* %tmp5551, i64 1
+  %tmp5553 = getelementptr inbounds float* %tmp5552, i64 1
+  %tmp5554 = getelementptr inbounds float* %tmp5553, i64 1
+  %tmp5555 = getelementptr inbounds float* %tmp5554, i64 1
+  %tmp5556 = getelementptr inbounds float* %tmp5555, i64 1
+  %tmp5557 = getelementptr inbounds float* %tmp5556, i64 1
+  %tmp5558 = getelementptr inbounds float* %tmp5557, i64 1
+  %tmp5559 = getelementptr inbounds float* %tmp5558, i64 1
+  %tmp5560 = getelementptr inbounds float* %tmp5559, i64 1
+  %tmp5561 = getelementptr inbounds float* %tmp5560, i64 1
+  %tmp5562 = getelementptr inbounds float* %tmp5561, i64 1
+  %tmp5563 = getelementptr inbounds float* %tmp5562, i64 1
+  %tmp5564 = getelementptr inbounds float* %tmp5563, i64 1
+  %tmp5565 = getelementptr inbounds float* %tmp5564, i64 1
+  %tmp5566 = getelementptr inbounds float* %tmp5565, i64 1
+  %tmp5567 = getelementptr inbounds float* %tmp5566, i64 1
+  %tmp5568 = getelementptr inbounds float* %tmp5567, i64 1
+  %tmp5569 = getelementptr inbounds float* %tmp5568, i64 1
+  %tmp5570 = getelementptr inbounds float* %tmp5569, i64 1
+  %tmp5571 = getelementptr inbounds float* %tmp5570, i64 1
+  %tmp5572 = getelementptr inbounds float* %tmp5571, i64 1
+  %tmp5573 = getelementptr inbounds float* %tmp5572, i64 1
+  %tmp5574 = getelementptr inbounds float* %tmp5573, i64 1
+  %tmp5575 = getelementptr inbounds float* %tmp5574, i64 1
+  %tmp5576 = getelementptr inbounds float* %tmp5575, i64 1
+  %tmp5577 = getelementptr inbounds float* %tmp5576, i64 1
+  %tmp5578 = getelementptr inbounds float* %tmp5577, i64 1
+  %tmp5579 = getelementptr inbounds float* %tmp5578, i64 1
+  %tmp5580 = getelementptr inbounds float* %tmp5579, i64 1
+  %tmp5581 = getelementptr inbounds float* %tmp5580, i64 1
+  %tmp5582 = getelementptr inbounds float* %tmp5581, i64 1
+  %tmp5583 = getelementptr inbounds float* %tmp5582, i64 1
+  %tmp5584 = getelementptr inbounds float* %tmp5583, i64 1
+  %tmp5585 = getelementptr inbounds float* %tmp5584, i64 1
+  %tmp5586 = getelementptr inbounds float* %tmp5585, i64 1
+  %tmp5587 = getelementptr inbounds float* %tmp5586, i64 1
+  %tmp5588 = getelementptr inbounds float* %tmp5587, i64 1
+  %tmp5589 = getelementptr inbounds float* %tmp5588, i64 1
+  %tmp5590 = getelementptr inbounds float* %tmp5589, i64 1
+  %tmp5591 = getelementptr inbounds float* %tmp5590, i64 1
+  %tmp5592 = getelementptr inbounds float* %tmp5591, i64 1
+  %tmp5593 = getelementptr inbounds float* %tmp5592, i64 1
+  %tmp5594 = getelementptr inbounds float* %tmp5593, i64 1
+  %tmp5595 = getelementptr inbounds float* %tmp5594, i64 1
+  %tmp5596 = getelementptr inbounds float* %tmp5595, i64 1
+  %tmp5597 = getelementptr inbounds float* %tmp5596, i64 1
+  %tmp5598 = getelementptr inbounds float* %tmp5597, i64 1
+  %tmp5599 = getelementptr inbounds float* %tmp5598, i64 1
+  %tmp5600 = getelementptr inbounds float* %tmp5599, i64 1
+  %tmp5601 = getelementptr inbounds float* %tmp5600, i64 1
+  %tmp5602 = getelementptr inbounds float* %tmp5601, i64 1
+  %tmp5603 = getelementptr inbounds float* %tmp5602, i64 1
+  %tmp5604 = getelementptr inbounds float* %tmp5603, i64 1
+  %tmp5605 = getelementptr inbounds float* %tmp5604, i64 1
+  %tmp5606 = getelementptr inbounds float* %tmp5605, i64 1
+  %tmp5607 = getelementptr inbounds float* %tmp5606, i64 1
+  %tmp5608 = getelementptr inbounds float* %tmp5607, i64 1
+  %tmp5609 = getelementptr inbounds float* %tmp5608, i64 1
+  %tmp5610 = getelementptr inbounds float* %tmp5609, i64 1
+  %tmp5611 = getelementptr inbounds float* %tmp5610, i64 1
+  %tmp5612 = getelementptr inbounds float* %tmp5611, i64 1
+  %tmp5613 = getelementptr inbounds float* %tmp5612, i64 1
+  %tmp5614 = getelementptr inbounds float* %tmp5613, i64 1
+  %tmp5615 = getelementptr inbounds float* %tmp5614, i64 1
+  %tmp5616 = getelementptr inbounds float* %tmp5615, i64 1
+  %tmp5617 = getelementptr inbounds float* %tmp5616, i64 1
+  %tmp5618 = getelementptr inbounds float* %tmp5617, i64 1
+  %tmp5619 = getelementptr inbounds float* %tmp5618, i64 1
+  %tmp5620 = getelementptr inbounds float* %tmp5619, i64 1
+  %tmp5621 = getelementptr inbounds float* %tmp5620, i64 1
+  %tmp5622 = getelementptr inbounds float* %tmp5621, i64 1
+  %tmp5623 = getelementptr inbounds float* %tmp5622, i64 1
+  %tmp5624 = getelementptr inbounds float* %tmp5623, i64 1
+  %tmp5625 = getelementptr inbounds float* %tmp5624, i64 1
+  %tmp5626 = getelementptr inbounds float* %tmp5625, i64 1
+  %tmp5627 = getelementptr inbounds float* %tmp5626, i64 1
+  %tmp5628 = getelementptr inbounds float* %tmp5627, i64 1
+  %tmp5629 = getelementptr inbounds float* %tmp5628, i64 1
+  %tmp5630 = getelementptr inbounds float* %tmp5629, i64 1
+  %tmp5631 = getelementptr inbounds float* %tmp5630, i64 1
+  %tmp5632 = getelementptr inbounds float* %tmp5631, i64 1
+  %tmp5633 = getelementptr inbounds float* %tmp5632, i64 1
+  %tmp5634 = getelementptr inbounds float* %tmp5633, i64 1
+  %tmp5635 = getelementptr inbounds float* %tmp5634, i64 1
+  %tmp5636 = getelementptr inbounds float* %tmp5635, i64 1
+  %tmp5637 = getelementptr inbounds float* %tmp5636, i64 1
+  %tmp5638 = getelementptr inbounds float* %tmp5637, i64 1
+  %tmp5639 = getelementptr inbounds float* %tmp5638, i64 1
+  %tmp5640 = getelementptr inbounds float* %tmp5639, i64 1
+  %tmp5641 = getelementptr inbounds float* %tmp5640, i64 1
+  %tmp5642 = getelementptr inbounds float* %tmp5641, i64 1
+  %tmp5643 = getelementptr inbounds float* %tmp5642, i64 1
+  %tmp5644 = getelementptr inbounds float* %tmp5643, i64 1
+  %tmp5645 = getelementptr inbounds float* %tmp5644, i64 1
+  %tmp5646 = getelementptr inbounds float* %tmp5645, i64 1
+  %tmp5647 = getelementptr inbounds float* %tmp5646, i64 1
+  %tmp5648 = getelementptr inbounds float* %tmp5647, i64 1
+  %tmp5649 = getelementptr inbounds float* %tmp5648, i64 1
+  %tmp5650 = getelementptr inbounds float* %tmp5649, i64 1
+  %tmp5651 = getelementptr inbounds float* %tmp5650, i64 1
+  %tmp5652 = getelementptr inbounds float* %tmp5651, i64 1
+  %tmp5653 = getelementptr inbounds float* %tmp5652, i64 1
+  %tmp5654 = getelementptr inbounds float* %tmp5653, i64 1
+  %tmp5655 = getelementptr inbounds float* %tmp5654, i64 1
+  %tmp5656 = getelementptr inbounds float* %tmp5655, i64 1
+  %tmp5657 = getelementptr inbounds float* %tmp5656, i64 1
+  %tmp5658 = getelementptr inbounds float* %tmp5657, i64 1
+  %tmp5659 = getelementptr inbounds float* %tmp5658, i64 1
+  %tmp5660 = getelementptr inbounds float* %tmp5659, i64 1
+  %tmp5661 = getelementptr inbounds float* %tmp5660, i64 1
+  %tmp5662 = getelementptr inbounds float* %tmp5661, i64 1
+  %tmp5663 = getelementptr inbounds float* %tmp5662, i64 1
+  %tmp5664 = getelementptr inbounds float* %tmp5663, i64 1
+  %tmp5665 = getelementptr inbounds float* %tmp5664, i64 1
+  %tmp5666 = getelementptr inbounds float* %tmp5665, i64 1
+  %tmp5667 = getelementptr inbounds float* %tmp5666, i64 1
+  %tmp5668 = getelementptr inbounds float* %tmp5667, i64 1
+  %tmp5669 = getelementptr inbounds float* %tmp5668, i64 1
+  %tmp5670 = getelementptr inbounds float* %tmp5669, i64 1
+  %tmp5671 = getelementptr inbounds float* %tmp5670, i64 1
+  %tmp5672 = getelementptr inbounds float* %tmp5671, i64 1
+  %tmp5673 = getelementptr inbounds float* %tmp5672, i64 1
+  %tmp5674 = getelementptr inbounds float* %tmp5673, i64 1
+  %tmp5675 = getelementptr inbounds float* %tmp5674, i64 1
+  %tmp5676 = getelementptr inbounds float* %tmp5675, i64 1
+  %tmp5677 = getelementptr inbounds float* %tmp5676, i64 1
+  %tmp5678 = getelementptr inbounds float* %tmp5677, i64 1
+  %tmp5679 = getelementptr inbounds float* %tmp5678, i64 1
+  %tmp5680 = getelementptr inbounds float* %tmp5679, i64 1
+  %tmp5681 = getelementptr inbounds float* %tmp5680, i64 1
+  %tmp5682 = getelementptr inbounds float* %tmp5681, i64 1
+  %tmp5683 = getelementptr inbounds float* %tmp5682, i64 1
+  %tmp5684 = getelementptr inbounds float* %tmp5683, i64 1
+  %tmp5685 = getelementptr inbounds float* %tmp5684, i64 1
+  %tmp5686 = getelementptr inbounds float* %tmp5685, i64 1
+  %tmp5687 = getelementptr inbounds float* %tmp5686, i64 1
+  %tmp5688 = getelementptr inbounds float* %tmp5687, i64 1
+  %tmp5689 = getelementptr inbounds float* %tmp5688, i64 1
+  %tmp5690 = getelementptr inbounds float* %tmp5689, i64 1
+  %tmp5691 = getelementptr inbounds float* %tmp5690, i64 1
+  %tmp5692 = getelementptr inbounds float* %tmp5691, i64 1
+  %tmp5693 = getelementptr inbounds float* %tmp5692, i64 1
+  %tmp5694 = getelementptr inbounds float* %tmp5693, i64 1
+  %tmp5695 = getelementptr inbounds float* %tmp5694, i64 1
+  %tmp5696 = getelementptr inbounds float* %tmp5695, i64 1
+  %tmp5697 = getelementptr inbounds float* %tmp5696, i64 1
+  %tmp5698 = getelementptr inbounds float* %tmp5697, i64 1
+  %tmp5699 = getelementptr inbounds float* %tmp5698, i64 1
+  %tmp5700 = getelementptr inbounds float* %tmp5699, i64 1
+  %tmp5701 = getelementptr inbounds float* %tmp5700, i64 1
+  %tmp5702 = getelementptr inbounds float* %tmp5701, i64 1
+  %tmp5703 = getelementptr inbounds float* %tmp5702, i64 1
+  %tmp5704 = getelementptr inbounds float* %tmp5703, i64 1
+  %tmp5705 = getelementptr inbounds float* %tmp5704, i64 1
+  %tmp5706 = getelementptr inbounds float* %tmp5705, i64 1
+  %tmp5707 = getelementptr inbounds float* %tmp5706, i64 1
+  %tmp5708 = getelementptr inbounds float* %tmp5707, i64 1
+  %tmp5709 = getelementptr inbounds float* %tmp5708, i64 1
+  %tmp5710 = getelementptr inbounds float* %tmp5709, i64 1
+  %tmp5711 = getelementptr inbounds float* %tmp5710, i64 1
+  %tmp5712 = getelementptr inbounds float* %tmp5711, i64 1
+  %tmp5713 = getelementptr inbounds float* %tmp5712, i64 1
+  %tmp5714 = getelementptr inbounds float* %tmp5713, i64 1
+  %tmp5715 = getelementptr inbounds float* %tmp5714, i64 1
+  %tmp5716 = getelementptr inbounds float* %tmp5715, i64 1
+  %tmp5717 = getelementptr inbounds float* %tmp5716, i64 1
+  %tmp5718 = getelementptr inbounds float* %tmp5717, i64 1
+  %tmp5719 = getelementptr inbounds float* %tmp5718, i64 1
+  %tmp5720 = getelementptr inbounds float* %tmp5719, i64 1
+  %tmp5721 = getelementptr inbounds float* %tmp5720, i64 1
+  %tmp5722 = getelementptr inbounds float* %tmp5721, i64 1
+  %tmp5723 = getelementptr inbounds float* %tmp5722, i64 1
+  %tmp5724 = getelementptr inbounds float* %tmp5723, i64 1
+  %tmp5725 = getelementptr inbounds float* %tmp5724, i64 1
+  %tmp5726 = getelementptr inbounds float* %tmp5725, i64 1
+  %tmp5727 = getelementptr inbounds float* %tmp5726, i64 1
+  %tmp5728 = getelementptr inbounds float* %tmp5727, i64 1
+  %tmp5729 = getelementptr inbounds float* %tmp5728, i64 1
+  %tmp5730 = getelementptr inbounds float* %tmp5729, i64 1
+  %tmp5731 = getelementptr inbounds float* %tmp5730, i64 1
+  %tmp5732 = getelementptr inbounds float* %tmp5731, i64 1
+  %tmp5733 = getelementptr inbounds float* %tmp5732, i64 1
+  %tmp5734 = getelementptr inbounds float* %tmp5733, i64 1
+  %tmp5735 = getelementptr inbounds float* %tmp5734, i64 1
+  %tmp5736 = getelementptr inbounds float* %tmp5735, i64 1
+  %tmp5737 = getelementptr inbounds float* %tmp5736, i64 1
+  %tmp5738 = getelementptr inbounds float* %tmp5737, i64 1
+  %tmp5739 = getelementptr inbounds float* %tmp5738, i64 1
+  %tmp5740 = getelementptr inbounds float* %tmp5739, i64 1
+  %tmp5741 = getelementptr inbounds float* %tmp5740, i64 1
+  %tmp5742 = getelementptr inbounds float* %tmp5741, i64 1
+  %tmp5743 = getelementptr inbounds float* %tmp5742, i64 1
+  %tmp5744 = getelementptr inbounds float* %tmp5743, i64 1
+  %tmp5745 = getelementptr inbounds float* %tmp5744, i64 1
+  %tmp5746 = getelementptr inbounds float* %tmp5745, i64 1
+  %tmp5747 = getelementptr inbounds float* %tmp5746, i64 1
+  %tmp5748 = getelementptr inbounds float* %tmp5747, i64 1
+  %tmp5749 = getelementptr inbounds float* %tmp5748, i64 1
+  %tmp5750 = getelementptr inbounds float* %tmp5749, i64 1
+  %tmp5751 = getelementptr inbounds float* %tmp5750, i64 1
+  %tmp5752 = getelementptr inbounds float* %tmp5751, i64 1
+  %tmp5753 = getelementptr inbounds float* %tmp5752, i64 1
+  %tmp5754 = getelementptr inbounds float* %tmp5753, i64 1
+  %tmp5755 = getelementptr inbounds float* %tmp5754, i64 1
+  %tmp5756 = getelementptr inbounds float* %tmp5755, i64 1
+  %tmp5757 = getelementptr inbounds float* %tmp5756, i64 1
+  %tmp5758 = getelementptr inbounds float* %tmp5757, i64 1
+  %tmp5759 = getelementptr inbounds float* %tmp5758, i64 1
+  %tmp5760 = getelementptr inbounds float* %tmp5759, i64 1
+  %tmp5761 = getelementptr inbounds float* %tmp5760, i64 1
+  %tmp5762 = getelementptr inbounds float* %tmp5761, i64 1
+  %tmp5763 = getelementptr inbounds float* %tmp5762, i64 1
+  %tmp5764 = getelementptr inbounds float* %tmp5763, i64 1
+  %tmp5765 = getelementptr inbounds float* %tmp5764, i64 1
+  %tmp5766 = getelementptr inbounds float* %tmp5765, i64 1
+  %tmp5767 = getelementptr inbounds float* %tmp5766, i64 1
+  %tmp5768 = getelementptr inbounds float* %tmp5767, i64 1
+  %tmp5769 = getelementptr inbounds float* %tmp5768, i64 1
+  %tmp5770 = getelementptr inbounds float* %tmp5769, i64 1
+  %tmp5771 = getelementptr inbounds float* %tmp5770, i64 1
+  %tmp5772 = getelementptr inbounds float* %tmp5771, i64 1
+  %tmp5773 = getelementptr inbounds float* %tmp5772, i64 1
+  %tmp5774 = getelementptr inbounds float* %tmp5773, i64 1
+  %tmp5775 = getelementptr inbounds float* %tmp5774, i64 1
+  %tmp5776 = getelementptr inbounds float* %tmp5775, i64 1
+  %tmp5777 = getelementptr inbounds float* %tmp5776, i64 1
+  %tmp5778 = getelementptr inbounds float* %tmp5777, i64 1
+  %tmp5779 = getelementptr inbounds float* %tmp5778, i64 1
+  %tmp5780 = getelementptr inbounds float* %tmp5779, i64 1
+  %tmp5781 = getelementptr inbounds float* %tmp5780, i64 1
+  %tmp5782 = getelementptr inbounds float* %tmp5781, i64 1
+  %tmp5783 = getelementptr inbounds float* %tmp5782, i64 1
+  %tmp5784 = getelementptr inbounds float* %tmp5783, i64 1
+  %tmp5785 = getelementptr inbounds float* %tmp5784, i64 1
+  %tmp5786 = getelementptr inbounds float* %tmp5785, i64 1
+  %tmp5787 = getelementptr inbounds float* %tmp5786, i64 1
+  %tmp5788 = getelementptr inbounds float* %tmp5787, i64 1
+  %tmp5789 = getelementptr inbounds float* %tmp5788, i64 1
+  %tmp5790 = getelementptr inbounds float* %tmp5789, i64 1
+  %tmp5791 = getelementptr inbounds float* %tmp5790, i64 1
+  %tmp5792 = getelementptr inbounds float* %tmp5791, i64 1
+  %tmp5793 = getelementptr inbounds float* %tmp5792, i64 1
+  %tmp5794 = getelementptr inbounds float* %tmp5793, i64 1
+  %tmp5795 = getelementptr inbounds float* %tmp5794, i64 1
+  %tmp5796 = getelementptr inbounds float* %tmp5795, i64 1
+  %tmp5797 = getelementptr inbounds float* %tmp5796, i64 1
+  %tmp5798 = getelementptr inbounds float* %tmp5797, i64 1
+  %tmp5799 = getelementptr inbounds float* %tmp5798, i64 1
+  %tmp5800 = getelementptr inbounds float* %tmp5799, i64 1
+  %tmp5801 = getelementptr inbounds float* %tmp5800, i64 1
+  %tmp5802 = getelementptr inbounds float* %tmp5801, i64 1
+  %tmp5803 = getelementptr inbounds float* %tmp5802, i64 1
+  %tmp5804 = getelementptr inbounds float* %tmp5803, i64 1
+  %tmp5805 = getelementptr inbounds float* %tmp5804, i64 1
+  %tmp5806 = getelementptr inbounds float* %tmp5805, i64 1
+  %tmp5807 = getelementptr inbounds float* %tmp5806, i64 1
+  %tmp5808 = getelementptr inbounds float* %tmp5807, i64 1
+  %tmp5809 = getelementptr inbounds float* %tmp5808, i64 1
+  %tmp5810 = getelementptr inbounds float* %tmp5809, i64 1
+  %tmp5811 = getelementptr inbounds float* %tmp5810, i64 1
+  %tmp5812 = getelementptr inbounds float* %tmp5811, i64 1
+  %tmp5813 = getelementptr inbounds float* %tmp5812, i64 1
+  %tmp5814 = getelementptr inbounds float* %tmp5813, i64 1
+  %tmp5815 = getelementptr inbounds float* %tmp5814, i64 1
+  %tmp5816 = getelementptr inbounds float* %tmp5815, i64 1
+  %tmp5817 = getelementptr inbounds float* %tmp5816, i64 1
+  %tmp5818 = getelementptr inbounds float* %tmp5817, i64 1
+  %tmp5819 = getelementptr inbounds float* %tmp5818, i64 1
+  %tmp5820 = getelementptr inbounds float* %tmp5819, i64 1
+  %tmp5821 = getelementptr inbounds float* %tmp5820, i64 1
+  %tmp5822 = getelementptr inbounds float* %tmp5821, i64 1
+  %tmp5823 = getelementptr inbounds float* %tmp5822, i64 1
+  %tmp5824 = getelementptr inbounds float* %tmp5823, i64 1
+  %tmp5825 = getelementptr inbounds float* %tmp5824, i64 1
+  %tmp5826 = getelementptr inbounds float* %tmp5825, i64 1
+  %tmp5827 = getelementptr inbounds float* %tmp5826, i64 1
+  %tmp5828 = getelementptr inbounds float* %tmp5827, i64 1
+  %tmp5829 = getelementptr inbounds float* %tmp5828, i64 1
+  %tmp5830 = getelementptr inbounds float* %tmp5829, i64 1
+  %tmp5831 = getelementptr inbounds float* %tmp5830, i64 1
+  %tmp5832 = getelementptr inbounds float* %tmp5831, i64 1
+  %tmp5833 = getelementptr inbounds float* %tmp5832, i64 1
+  %tmp5834 = getelementptr inbounds float* %tmp5833, i64 1
+  %tmp5835 = getelementptr inbounds float* %tmp5834, i64 1
+  %tmp5836 = getelementptr inbounds float* %tmp5835, i64 1
+  %tmp5837 = getelementptr inbounds float* %tmp5836, i64 1
+  %tmp5838 = getelementptr inbounds float* %tmp5837, i64 1
+  %tmp5839 = getelementptr inbounds float* %tmp5838, i64 1
+  %tmp5840 = getelementptr inbounds float* %tmp5839, i64 1
+  %tmp5841 = getelementptr inbounds float* %tmp5840, i64 1
+  %tmp5842 = getelementptr inbounds float* %tmp5841, i64 1
+  %tmp5843 = getelementptr inbounds float* %tmp5842, i64 1
+  %tmp5844 = getelementptr inbounds float* %tmp5843, i64 1
+  %tmp5845 = getelementptr inbounds float* %tmp5844, i64 1
+  %tmp5846 = getelementptr inbounds float* %tmp5845, i64 1
+  %tmp5847 = getelementptr inbounds float* %tmp5846, i64 1
+  %tmp5848 = getelementptr inbounds float* %tmp5847, i64 1
+  %tmp5849 = getelementptr inbounds float* %tmp5848, i64 1
+  %tmp5850 = getelementptr inbounds float* %tmp5849, i64 1
+  %tmp5851 = getelementptr inbounds float* %tmp5850, i64 1
+  %tmp5852 = getelementptr inbounds float* %tmp5851, i64 1
+  %tmp5853 = getelementptr inbounds float* %tmp5852, i64 1
+  %tmp5854 = getelementptr inbounds float* %tmp5853, i64 1
+  %tmp5855 = getelementptr inbounds float* %tmp5854, i64 1
+  %tmp5856 = getelementptr inbounds float* %tmp5855, i64 1
+  %tmp5857 = getelementptr inbounds float* %tmp5856, i64 1
+  %tmp5858 = getelementptr inbounds float* %tmp5857, i64 1
+  %tmp5859 = getelementptr inbounds float* %tmp5858, i64 1
+  %tmp5860 = getelementptr inbounds float* %tmp5859, i64 1
+  %tmp5861 = getelementptr inbounds float* %tmp5860, i64 1
+  %tmp5862 = getelementptr inbounds float* %tmp5861, i64 1
+  %tmp5863 = getelementptr inbounds float* %tmp5862, i64 1
+  %tmp5864 = getelementptr inbounds float* %tmp5863, i64 1
+  %tmp5865 = getelementptr inbounds float* %tmp5864, i64 1
+  %tmp5866 = getelementptr inbounds float* %tmp5865, i64 1
+  %tmp5867 = getelementptr inbounds float* %tmp5866, i64 1
+  %tmp5868 = getelementptr inbounds float* %tmp5867, i64 1
+  %tmp5869 = getelementptr inbounds float* %tmp5868, i64 1
+  %tmp5870 = getelementptr inbounds float* %tmp5869, i64 1
+  %tmp5871 = getelementptr inbounds float* %tmp5870, i64 1
+  %tmp5872 = getelementptr inbounds float* %tmp5871, i64 1
+  %tmp5873 = getelementptr inbounds float* %tmp5872, i64 1
+  %tmp5874 = getelementptr inbounds float* %tmp5873, i64 1
+  %tmp5875 = getelementptr inbounds float* %tmp5874, i64 1
+  %tmp5876 = getelementptr inbounds float* %tmp5875, i64 1
+  %tmp5877 = getelementptr inbounds float* %tmp5876, i64 1
+  %tmp5878 = getelementptr inbounds float* %tmp5877, i64 1
+  %tmp5879 = getelementptr inbounds float* %tmp5878, i64 1
+  %tmp5880 = getelementptr inbounds float* %tmp5879, i64 1
+  %tmp5881 = getelementptr inbounds float* %tmp5880, i64 1
+  %tmp5882 = getelementptr inbounds float* %tmp5881, i64 1
+  %tmp5883 = getelementptr inbounds float* %tmp5882, i64 1
+  %tmp5884 = getelementptr inbounds float* %tmp5883, i64 1
+  %tmp5885 = getelementptr inbounds float* %tmp5884, i64 1
+  %tmp5886 = getelementptr inbounds float* %tmp5885, i64 1
+  %tmp5887 = getelementptr inbounds float* %tmp5886, i64 1
+  %tmp5888 = getelementptr inbounds float* %tmp5887, i64 1
+  %tmp5889 = getelementptr inbounds float* %tmp5888, i64 1
+  %tmp5890 = getelementptr inbounds float* %tmp5889, i64 1
+  %tmp5891 = getelementptr inbounds float* %tmp5890, i64 1
+  %tmp5892 = getelementptr inbounds float* %tmp5891, i64 1
+  %tmp5893 = getelementptr inbounds float* %tmp5892, i64 1
+  %tmp5894 = getelementptr inbounds float* %tmp5893, i64 1
+  %tmp5895 = getelementptr inbounds float* %tmp5894, i64 1
+  %tmp5896 = getelementptr inbounds float* %tmp5895, i64 1
+  %tmp5897 = getelementptr inbounds float* %tmp5896, i64 1
+  %tmp5898 = getelementptr inbounds float* %tmp5897, i64 1
+  %tmp5899 = getelementptr inbounds float* %tmp5898, i64 1
+  %tmp5900 = getelementptr inbounds float* %tmp5899, i64 1
+  %tmp5901 = getelementptr inbounds float* %tmp5900, i64 1
+  %tmp5902 = getelementptr inbounds float* %tmp5901, i64 1
+  %tmp5903 = getelementptr inbounds float* %tmp5902, i64 1
+  %tmp5904 = getelementptr inbounds float* %tmp5903, i64 1
+  %tmp5905 = getelementptr inbounds float* %tmp5904, i64 1
+  %tmp5906 = getelementptr inbounds float* %tmp5905, i64 1
+  %tmp5907 = getelementptr inbounds float* %tmp5906, i64 1
+  %tmp5908 = getelementptr inbounds float* %tmp5907, i64 1
+  %tmp5909 = getelementptr inbounds float* %tmp5908, i64 1
+  %tmp5910 = getelementptr inbounds float* %tmp5909, i64 1
+  %tmp5911 = getelementptr inbounds float* %tmp5910, i64 1
+  %tmp5912 = getelementptr inbounds float* %tmp5911, i64 1
+  %tmp5913 = getelementptr inbounds float* %tmp5912, i64 1
+  %tmp5914 = getelementptr inbounds float* %tmp5913, i64 1
+  %tmp5915 = getelementptr inbounds float* %tmp5914, i64 1
+  %tmp5916 = getelementptr inbounds float* %tmp5915, i64 1
+  %tmp5917 = getelementptr inbounds float* %tmp5916, i64 1
+  %tmp5918 = getelementptr inbounds float* %tmp5917, i64 1
+  %tmp5919 = getelementptr inbounds float* %tmp5918, i64 1
+  %tmp5920 = getelementptr inbounds float* %tmp5919, i64 1
+  %tmp5921 = getelementptr inbounds float* %tmp5920, i64 1
+  %tmp5922 = getelementptr inbounds float* %tmp5921, i64 1
+  %tmp5923 = getelementptr inbounds float* %tmp5922, i64 1
+  %tmp5924 = getelementptr inbounds float* %tmp5923, i64 1
+  %tmp5925 = getelementptr inbounds float* %tmp5924, i64 1
+  %tmp5926 = getelementptr inbounds float* %tmp5925, i64 1
+  %tmp5927 = getelementptr inbounds float* %tmp5926, i64 1
+  %tmp5928 = getelementptr inbounds float* %tmp5927, i64 1
+  %tmp5929 = getelementptr inbounds float* %tmp5928, i64 1
+  %tmp5930 = getelementptr inbounds float* %tmp5929, i64 1
+  %tmp5931 = getelementptr inbounds float* %tmp5930, i64 1
+  %tmp5932 = getelementptr inbounds float* %tmp5931, i64 1
+  %tmp5933 = getelementptr inbounds float* %tmp5932, i64 1
+  %tmp5934 = getelementptr inbounds float* %tmp5933, i64 1
+  %tmp5935 = getelementptr inbounds float* %tmp5934, i64 1
+  %tmp5936 = getelementptr inbounds float* %tmp5935, i64 1
+  %tmp5937 = getelementptr inbounds float* %tmp5936, i64 1
+  %tmp5938 = getelementptr inbounds float* %tmp5937, i64 1
+  %tmp5939 = getelementptr inbounds float* %tmp5938, i64 1
+  %tmp5940 = getelementptr inbounds float* %tmp5939, i64 1
+  %tmp5941 = getelementptr inbounds float* %tmp5940, i64 1
+  %tmp5942 = getelementptr inbounds float* %tmp5941, i64 1
+  %tmp5943 = getelementptr inbounds float* %tmp5942, i64 1
+  %tmp5944 = getelementptr inbounds float* %tmp5943, i64 1
+  %tmp5945 = getelementptr inbounds float* %tmp5944, i64 1
+  %tmp5946 = getelementptr inbounds float* %tmp5945, i64 1
+  %tmp5947 = getelementptr inbounds float* %tmp5946, i64 1
+  %tmp5948 = getelementptr inbounds float* %tmp5947, i64 1
+  %tmp5949 = getelementptr inbounds float* %tmp5948, i64 1
+  %tmp5950 = getelementptr inbounds float* %tmp5949, i64 1
+  %tmp5951 = getelementptr inbounds float* %tmp5950, i64 1
+  %tmp5952 = getelementptr inbounds float* %tmp5951, i64 1
+  %tmp5953 = getelementptr inbounds float* %tmp5952, i64 1
+  %tmp5954 = getelementptr inbounds float* %tmp5953, i64 1
+  %tmp5955 = getelementptr inbounds float* %tmp5954, i64 1
+  %tmp5956 = getelementptr inbounds float* %tmp5955, i64 1
+  %tmp5957 = getelementptr inbounds float* %tmp5956, i64 1
+  %tmp5958 = getelementptr inbounds float* %tmp5957, i64 1
+  %tmp5959 = getelementptr inbounds float* %tmp5958, i64 1
+  %tmp5960 = getelementptr inbounds float* %tmp5959, i64 1
+  %tmp5961 = getelementptr inbounds float* %tmp5960, i64 1
+  %tmp5962 = getelementptr inbounds float* %tmp5961, i64 1
+  %tmp5963 = getelementptr inbounds float* %tmp5962, i64 1
+  %tmp5964 = getelementptr inbounds float* %tmp5963, i64 1
+  %tmp5965 = getelementptr inbounds float* %tmp5964, i64 1
+  %tmp5966 = getelementptr inbounds float* %tmp5965, i64 1
+  %tmp5967 = getelementptr inbounds float* %tmp5966, i64 1
+  %tmp5968 = getelementptr inbounds float* %tmp5967, i64 1
+  %tmp5969 = getelementptr inbounds float* %tmp5968, i64 1
+  %tmp5970 = getelementptr inbounds float* %tmp5969, i64 1
+  %tmp5971 = getelementptr inbounds float* %tmp5970, i64 1
+  %tmp5972 = getelementptr inbounds float* %tmp5971, i64 1
+  %tmp5973 = getelementptr inbounds float* %tmp5972, i64 1
+  %tmp5974 = getelementptr inbounds float* %tmp5973, i64 1
+  %tmp5975 = getelementptr inbounds float* %tmp5974, i64 1
+  %tmp5976 = getelementptr inbounds float* %tmp5975, i64 1
+  %tmp5977 = getelementptr inbounds float* %tmp5976, i64 1
+  %tmp5978 = getelementptr inbounds float* %tmp5977, i64 1
+  %tmp5979 = getelementptr inbounds float* %tmp5978, i64 1
+  %tmp5980 = getelementptr inbounds float* %tmp5979, i64 1
+  %tmp5981 = getelementptr inbounds float* %tmp5980, i64 1
+  %tmp5982 = getelementptr inbounds float* %tmp5981, i64 1
+  %tmp5983 = getelementptr inbounds float* %tmp5982, i64 1
+  %tmp5984 = getelementptr inbounds float* %tmp5983, i64 1
+  %tmp5985 = getelementptr inbounds float* %tmp5984, i64 1
+  %tmp5986 = getelementptr inbounds float* %tmp5985, i64 1
+  %tmp5987 = getelementptr inbounds float* %tmp5986, i64 1
+  %tmp5988 = getelementptr inbounds float* %tmp5987, i64 1
+  %tmp5989 = getelementptr inbounds float* %tmp5988, i64 1
+  %tmp5990 = getelementptr inbounds float* %tmp5989, i64 1
+  %tmp5991 = getelementptr inbounds float* %tmp5990, i64 1
+  %tmp5992 = getelementptr inbounds float* %tmp5991, i64 1
+  %tmp5993 = getelementptr inbounds float* %tmp5992, i64 1
+  %tmp5994 = getelementptr inbounds float* %tmp5993, i64 1
+  %tmp5995 = getelementptr inbounds float* %tmp5994, i64 1
+  %tmp5996 = getelementptr inbounds float* %tmp5995, i64 1
+  %tmp5997 = getelementptr inbounds float* %tmp5996, i64 1
+  %tmp5998 = getelementptr inbounds float* %tmp5997, i64 1
+  %tmp5999 = getelementptr inbounds float* %tmp5998, i64 1
+  %tmp6000 = getelementptr inbounds float* %tmp5999, i64 1
+  %tmp6001 = getelementptr inbounds float* %tmp6000, i64 1
+  %tmp6002 = getelementptr inbounds float* %tmp6001, i64 1
+  %tmp6003 = getelementptr inbounds float* %tmp6002, i64 1
+  %tmp6004 = getelementptr inbounds float* %tmp6003, i64 1
+  %tmp6005 = getelementptr inbounds float* %tmp6004, i64 1
+  %tmp6006 = getelementptr inbounds float* %tmp6005, i64 1
+  %tmp6007 = getelementptr inbounds float* %tmp6006, i64 1
+  %tmp6008 = getelementptr inbounds float* %tmp6007, i64 1
+  %tmp6009 = getelementptr inbounds float* %tmp6008, i64 1
+  %tmp6010 = getelementptr inbounds float* %tmp6009, i64 1
+  %tmp6011 = getelementptr inbounds float* %tmp6010, i64 1
+  %tmp6012 = getelementptr inbounds float* %tmp6011, i64 1
+  %tmp6013 = getelementptr inbounds float* %tmp6012, i64 1
+  %tmp6014 = getelementptr inbounds float* %tmp6013, i64 1
+  %tmp6015 = getelementptr inbounds float* %tmp6014, i64 1
+  %tmp6016 = getelementptr inbounds float* %tmp6015, i64 1
+  %tmp6017 = getelementptr inbounds float* %tmp6016, i64 1
+  %tmp6018 = getelementptr inbounds float* %tmp6017, i64 1
+  %tmp6019 = getelementptr inbounds float* %tmp6018, i64 1
+  %tmp6020 = getelementptr inbounds float* %tmp6019, i64 1
+  %tmp6021 = getelementptr inbounds float* %tmp6020, i64 1
+  %tmp6022 = getelementptr inbounds float* %tmp6021, i64 1
+  %tmp6023 = getelementptr inbounds float* %tmp6022, i64 1
+  %tmp6024 = getelementptr inbounds float* %tmp6023, i64 1
+  %tmp6025 = getelementptr inbounds float* %tmp6024, i64 1
+  %tmp6026 = getelementptr inbounds float* %tmp6025, i64 1
+  %tmp6027 = getelementptr inbounds float* %tmp6026, i64 1
+  %tmp6028 = getelementptr inbounds float* %tmp6027, i64 1
+  %tmp6029 = getelementptr inbounds float* %tmp6028, i64 1
+  %tmp6030 = getelementptr inbounds float* %tmp6029, i64 1
+  %tmp6031 = getelementptr inbounds float* %tmp6030, i64 1
+  %tmp6032 = getelementptr inbounds float* %tmp6031, i64 1
+  %tmp6033 = getelementptr inbounds float* %tmp6032, i64 1
+  %tmp6034 = getelementptr inbounds float* %tmp6033, i64 1
+  %tmp6035 = getelementptr inbounds float* %tmp6034, i64 1
+  %tmp6036 = getelementptr inbounds float* %tmp6035, i64 1
+  %tmp6037 = getelementptr inbounds float* %tmp6036, i64 1
+  %tmp6038 = getelementptr inbounds float* %tmp6037, i64 1
+  %tmp6039 = getelementptr inbounds float* %tmp6038, i64 1
+  %tmp6040 = getelementptr inbounds float* %tmp6039, i64 1
+  %tmp6041 = getelementptr inbounds float* %tmp6040, i64 1
+  %tmp6042 = getelementptr inbounds float* %tmp6041, i64 1
+  %tmp6043 = getelementptr inbounds float* %tmp6042, i64 1
+  %tmp6044 = getelementptr inbounds float* %tmp6043, i64 1
+  %tmp6045 = getelementptr inbounds float* %tmp6044, i64 1
+  %tmp6046 = getelementptr inbounds float* %tmp6045, i64 1
+  %tmp6047 = getelementptr inbounds float* %tmp6046, i64 1
+  %tmp6048 = getelementptr inbounds float* %tmp6047, i64 1
+  %tmp6049 = getelementptr inbounds float* %tmp6048, i64 1
+  %tmp6050 = getelementptr inbounds float* %tmp6049, i64 1
+  %tmp6051 = getelementptr inbounds float* %tmp6050, i64 1
+  %tmp6052 = getelementptr inbounds float* %tmp6051, i64 1
+  %tmp6053 = getelementptr inbounds float* %tmp6052, i64 1
+  %tmp6054 = getelementptr inbounds float* %tmp6053, i64 1
+  %tmp6055 = getelementptr inbounds float* %tmp6054, i64 1
+  %tmp6056 = getelementptr inbounds float* %tmp6055, i64 1
+  %tmp6057 = getelementptr inbounds float* %tmp6056, i64 1
+  %tmp6058 = getelementptr inbounds float* %tmp6057, i64 1
+  %tmp6059 = getelementptr inbounds float* %tmp6058, i64 1
+  %tmp6060 = getelementptr inbounds float* %tmp6059, i64 1
+  %tmp6061 = getelementptr inbounds float* %tmp6060, i64 1
+  %tmp6062 = getelementptr inbounds float* %tmp6061, i64 1
+  %tmp6063 = getelementptr inbounds float* %tmp6062, i64 1
+  %tmp6064 = getelementptr inbounds float* %tmp6063, i64 1
+  %tmp6065 = getelementptr inbounds float* %tmp6064, i64 1
+  %tmp6066 = getelementptr inbounds float* %tmp6065, i64 1
+  %tmp6067 = getelementptr inbounds float* %tmp6066, i64 1
+  %tmp6068 = getelementptr inbounds float* %tmp6067, i64 1
+  %tmp6069 = getelementptr inbounds float* %tmp6068, i64 1
+  %tmp6070 = getelementptr inbounds float* %tmp6069, i64 1
+  %tmp6071 = getelementptr inbounds float* %tmp6070, i64 1
+  %tmp6072 = getelementptr inbounds float* %tmp6071, i64 1
+  %tmp6073 = getelementptr inbounds float* %tmp6072, i64 1
+  %tmp6074 = getelementptr inbounds float* %tmp6073, i64 1
+  %tmp6075 = getelementptr inbounds float* %tmp6074, i64 1
+  %tmp6076 = getelementptr inbounds float* %tmp6075, i64 1
+  %tmp6077 = getelementptr inbounds float* %tmp6076, i64 1
+  %tmp6078 = getelementptr inbounds float* %tmp6077, i64 1
+  %tmp6079 = getelementptr inbounds float* %tmp6078, i64 1
+  %tmp6080 = getelementptr inbounds float* %tmp6079, i64 1
+  %tmp6081 = getelementptr inbounds float* %tmp6080, i64 1
+  %tmp6082 = getelementptr inbounds float* %tmp6081, i64 1
+  %tmp6083 = getelementptr inbounds float* %tmp6082, i64 1
+  %tmp6084 = getelementptr inbounds float* %tmp6083, i64 1
+  %tmp6085 = getelementptr inbounds float* %tmp6084, i64 1
+  %tmp6086 = getelementptr inbounds float* %tmp6085, i64 1
+  %tmp6087 = getelementptr inbounds float* %tmp6086, i64 1
+  %tmp6088 = getelementptr inbounds float* %tmp6087, i64 1
+  %tmp6089 = getelementptr inbounds float* %tmp6088, i64 1
+  %tmp6090 = getelementptr inbounds float* %tmp6089, i64 1
+  %tmp6091 = getelementptr inbounds float* %tmp6090, i64 1
+  %tmp6092 = getelementptr inbounds float* %tmp6091, i64 1
+  %tmp6093 = getelementptr inbounds float* %tmp6092, i64 1
+  %tmp6094 = getelementptr inbounds float* %tmp6093, i64 1
+  %tmp6095 = getelementptr inbounds float* %tmp6094, i64 1
+  %tmp6096 = getelementptr inbounds float* %tmp6095, i64 1
+  %tmp6097 = getelementptr inbounds float* %tmp6096, i64 1
+  %tmp6098 = getelementptr inbounds float* %tmp6097, i64 1
+  %tmp6099 = getelementptr inbounds float* %tmp6098, i64 1
+  %tmp6100 = getelementptr inbounds float* %tmp6099, i64 1
+  %tmp6101 = getelementptr inbounds float* %tmp6100, i64 1
+  %tmp6102 = getelementptr inbounds float* %tmp6101, i64 1
+  %tmp6103 = getelementptr inbounds float* %tmp6102, i64 1
+  %tmp6104 = getelementptr inbounds float* %tmp6103, i64 1
+  %tmp6105 = getelementptr inbounds float* %tmp6104, i64 1
+  %tmp6106 = getelementptr inbounds float* %tmp6105, i64 1
+  %tmp6107 = getelementptr inbounds float* %tmp6106, i64 1
+  %tmp6108 = getelementptr inbounds float* %tmp6107, i64 1
+  %tmp6109 = getelementptr inbounds float* %tmp6108, i64 1
+  %tmp6110 = getelementptr inbounds float* %tmp6109, i64 1
+  %tmp6111 = getelementptr inbounds float* %tmp6110, i64 1
+  %tmp6112 = getelementptr inbounds float* %tmp6111, i64 1
+  %tmp6113 = getelementptr inbounds float* %tmp6112, i64 1
+  %tmp6114 = getelementptr inbounds float* %tmp6113, i64 1
+  %tmp6115 = getelementptr inbounds float* %tmp6114, i64 1
+  %tmp6116 = getelementptr inbounds float* %tmp6115, i64 1
+  %tmp6117 = getelementptr inbounds float* %tmp6116, i64 1
+  %tmp6118 = getelementptr inbounds float* %tmp6117, i64 1
+  %tmp6119 = getelementptr inbounds float* %tmp6118, i64 1
+  %tmp6120 = getelementptr inbounds float* %tmp6119, i64 1
+  %tmp6121 = getelementptr inbounds float* %tmp6120, i64 1
+  %tmp6122 = getelementptr inbounds float* %tmp6121, i64 1
+  %tmp6123 = getelementptr inbounds float* %tmp6122, i64 1
+  %tmp6124 = getelementptr inbounds float* %tmp6123, i64 1
+  %tmp6125 = getelementptr inbounds float* %tmp6124, i64 1
+  %tmp6126 = getelementptr inbounds float* %tmp6125, i64 1
+  %tmp6127 = getelementptr inbounds float* %tmp6126, i64 1
+  %tmp6128 = getelementptr inbounds float* %tmp6127, i64 1
+  %tmp6129 = getelementptr inbounds float* %tmp6128, i64 1
+  %tmp6130 = getelementptr inbounds float* %tmp6129, i64 1
+  %tmp6131 = getelementptr inbounds float* %tmp6130, i64 1
+  %tmp6132 = getelementptr inbounds float* %tmp6131, i64 1
+  %tmp6133 = getelementptr inbounds float* %tmp6132, i64 1
+  %tmp6134 = getelementptr inbounds float* %tmp6133, i64 1
+  %tmp6135 = getelementptr inbounds float* %tmp6134, i64 1
+  %tmp6136 = getelementptr inbounds float* %tmp6135, i64 1
+  %tmp6137 = getelementptr inbounds float* %tmp6136, i64 1
+  %tmp6138 = getelementptr inbounds float* %tmp6137, i64 1
+  %tmp6139 = getelementptr inbounds float* %tmp6138, i64 1
+  %tmp6140 = getelementptr inbounds float* %tmp6139, i64 1
+  %tmp6141 = getelementptr inbounds float* %tmp6140, i64 1
+  %tmp6142 = getelementptr inbounds float* %tmp6141, i64 1
+  %tmp6143 = getelementptr inbounds float* %tmp6142, i64 1
+  %tmp6144 = getelementptr inbounds float* %tmp6143, i64 1
+  %tmp6145 = getelementptr inbounds float* %tmp6144, i64 1
+  %tmp6146 = getelementptr inbounds float* %tmp6145, i64 1
+  %tmp6147 = getelementptr inbounds float* %tmp6146, i64 1
+  %tmp6148 = getelementptr inbounds float* %tmp6147, i64 1
+  %tmp6149 = getelementptr inbounds float* %tmp6148, i64 1
+  %tmp6150 = getelementptr inbounds float* %tmp6149, i64 1
+  %tmp6151 = getelementptr inbounds float* %tmp6150, i64 1
+  %tmp6152 = getelementptr inbounds float* %tmp6151, i64 1
+  %tmp6153 = getelementptr inbounds float* %tmp6152, i64 1
+  %tmp6154 = getelementptr inbounds float* %tmp6153, i64 1
+  %tmp6155 = getelementptr inbounds float* %tmp6154, i64 1
+  %tmp6156 = getelementptr inbounds float* %tmp6155, i64 1
+  %tmp6157 = getelementptr inbounds float* %tmp6156, i64 1
+  %tmp6158 = getelementptr inbounds float* %tmp6157, i64 1
+  %tmp6159 = getelementptr inbounds float* %tmp6158, i64 1
+  %tmp6160 = getelementptr inbounds float* %tmp6159, i64 1
+  %tmp6161 = getelementptr inbounds float* %tmp6160, i64 1
+  %tmp6162 = getelementptr inbounds float* %tmp6161, i64 1
+  %tmp6163 = getelementptr inbounds float* %tmp6162, i64 1
+  %tmp6164 = getelementptr inbounds float* %tmp6163, i64 1
+  %tmp6165 = getelementptr inbounds float* %tmp6164, i64 1
+  %tmp6166 = getelementptr inbounds float* %tmp6165, i64 1
+  %tmp6167 = getelementptr inbounds float* %tmp6166, i64 1
+  %tmp6168 = getelementptr inbounds float* %tmp6167, i64 1
+  %tmp6169 = getelementptr inbounds float* %tmp6168, i64 1
+  %tmp6170 = getelementptr inbounds float* %tmp6169, i64 1
+  %tmp6171 = getelementptr inbounds float* %tmp6170, i64 1
+  %tmp6172 = getelementptr inbounds float* %tmp6171, i64 1
+  %tmp6173 = getelementptr inbounds float* %tmp6172, i64 1
+  %tmp6174 = getelementptr inbounds float* %tmp6173, i64 1
+  %tmp6175 = getelementptr inbounds float* %tmp6174, i64 1
+  %tmp6176 = getelementptr inbounds float* %tmp6175, i64 1
+  %tmp6177 = getelementptr inbounds float* %tmp6176, i64 1
+  %tmp6178 = getelementptr inbounds float* %tmp6177, i64 1
+  %tmp6179 = getelementptr inbounds float* %tmp6178, i64 1
+  %tmp6180 = getelementptr inbounds float* %tmp6179, i64 1
+  %tmp6181 = getelementptr inbounds float* %tmp6180, i64 1
+  %tmp6182 = getelementptr inbounds float* %tmp6181, i64 1
+  %tmp6183 = getelementptr inbounds float* %tmp6182, i64 1
+  %tmp6184 = getelementptr inbounds float* %tmp6183, i64 1
+  %tmp6185 = getelementptr inbounds float* %tmp6184, i64 1
+  %tmp6186 = getelementptr inbounds float* %tmp6185, i64 1
+  %tmp6187 = getelementptr inbounds float* %tmp6186, i64 1
+  %tmp6188 = getelementptr inbounds float* %tmp6187, i64 1
+  %tmp6189 = getelementptr inbounds float* %tmp6188, i64 1
+  %tmp6190 = getelementptr inbounds float* %tmp6189, i64 1
+  %tmp6191 = getelementptr inbounds float* %tmp6190, i64 1
+  %tmp6192 = getelementptr inbounds float* %tmp6191, i64 1
+  %tmp6193 = getelementptr inbounds float* %tmp6192, i64 1
+  %tmp6194 = getelementptr inbounds float* %tmp6193, i64 1
+  %tmp6195 = getelementptr inbounds float* %tmp6194, i64 1
+  %tmp6196 = getelementptr inbounds float* %tmp6195, i64 1
+  %tmp6197 = getelementptr inbounds float* %tmp6196, i64 1
+  %tmp6198 = getelementptr inbounds float* %tmp6197, i64 1
+  %tmp6199 = getelementptr inbounds float* %tmp6198, i64 1
+  %tmp6200 = getelementptr inbounds float* %tmp6199, i64 1
+  %tmp6201 = getelementptr inbounds float* %tmp6200, i64 1
+  %tmp6202 = getelementptr inbounds float* %tmp6201, i64 1
+  %tmp6203 = getelementptr inbounds float* %tmp6202, i64 1
+  %tmp6204 = getelementptr inbounds float* %tmp6203, i64 1
+  %tmp6205 = getelementptr inbounds float* %tmp6204, i64 1
+  %tmp6206 = getelementptr inbounds float* %tmp6205, i64 1
+  %tmp6207 = getelementptr inbounds float* %tmp6206, i64 1
+  %tmp6208 = getelementptr inbounds float* %tmp6207, i64 1
+  %tmp6209 = getelementptr inbounds float* %tmp6208, i64 1
+  %tmp6210 = getelementptr inbounds float* %tmp6209, i64 1
+  %tmp6211 = getelementptr inbounds float* %tmp6210, i64 1
+  %tmp6212 = getelementptr inbounds float* %tmp6211, i64 1
+  %tmp6213 = getelementptr inbounds float* %tmp6212, i64 1
+  %tmp6214 = getelementptr inbounds float* %tmp6213, i64 1
+  %tmp6215 = getelementptr inbounds float* %tmp6214, i64 1
+  %tmp6216 = getelementptr inbounds float* %tmp6215, i64 1
+  %tmp6217 = getelementptr inbounds float* %tmp6216, i64 1
+  %tmp6218 = getelementptr inbounds float* %tmp6217, i64 1
+  %tmp6219 = getelementptr inbounds float* %tmp6218, i64 1
+  %tmp6220 = getelementptr inbounds float* %tmp6219, i64 1
+  %tmp6221 = getelementptr inbounds float* %tmp6220, i64 1
+  %tmp6222 = getelementptr inbounds float* %tmp6221, i64 1
+  %tmp6223 = getelementptr inbounds float* %tmp6222, i64 1
+  %tmp6224 = getelementptr inbounds float* %tmp6223, i64 1
+  %tmp6225 = getelementptr inbounds float* %tmp6224, i64 1
+  %tmp6226 = getelementptr inbounds float* %tmp6225, i64 1
+  %tmp6227 = getelementptr inbounds float* %tmp6226, i64 1
+  %tmp6228 = getelementptr inbounds float* %tmp6227, i64 1
+  %tmp6229 = getelementptr inbounds float* %tmp6228, i64 1
+  %tmp6230 = getelementptr inbounds float* %tmp6229, i64 1
+  %tmp6231 = getelementptr inbounds float* %tmp6230, i64 1
+  %tmp6232 = getelementptr inbounds float* %tmp6231, i64 1
+  %tmp6233 = getelementptr inbounds float* %tmp6232, i64 1
+  %tmp6234 = getelementptr inbounds float* %tmp6233, i64 1
+  %tmp6235 = getelementptr inbounds float* %tmp6234, i64 1
+  %tmp6236 = getelementptr inbounds float* %tmp6235, i64 1
+  %tmp6237 = getelementptr inbounds float* %tmp6236, i64 1
+  %tmp6238 = getelementptr inbounds float* %tmp6237, i64 1
+  %tmp6239 = getelementptr inbounds float* %tmp6238, i64 1
+  %tmp6240 = getelementptr inbounds float* %tmp6239, i64 1
+  %tmp6241 = getelementptr inbounds float* %tmp6240, i64 1
+  %tmp6242 = getelementptr inbounds float* %tmp6241, i64 1
+  %tmp6243 = getelementptr inbounds float* %tmp6242, i64 1
+  %tmp6244 = getelementptr inbounds float* %tmp6243, i64 1
+  %tmp6245 = getelementptr inbounds float* %tmp6244, i64 1
+  %tmp6246 = getelementptr inbounds float* %tmp6245, i64 1
+  %tmp6247 = getelementptr inbounds float* %tmp6246, i64 1
+  %tmp6248 = getelementptr inbounds float* %tmp6247, i64 1
+  %tmp6249 = getelementptr inbounds float* %tmp6248, i64 1
+  %tmp6250 = getelementptr inbounds float* %tmp6249, i64 1
+  %tmp6251 = getelementptr inbounds float* %tmp6250, i64 1
+  %tmp6252 = getelementptr inbounds float* %tmp6251, i64 1
+  %tmp6253 = getelementptr inbounds float* %tmp6252, i64 1
+  %tmp6254 = getelementptr inbounds float* %tmp6253, i64 1
+  %tmp6255 = getelementptr inbounds float* %tmp6254, i64 1
+  %tmp6256 = getelementptr inbounds float* %tmp6255, i64 1
+  %tmp6257 = getelementptr inbounds float* %tmp6256, i64 1
+  %tmp6258 = getelementptr inbounds float* %tmp6257, i64 1
+  %tmp6259 = getelementptr inbounds float* %tmp6258, i64 1
+  %tmp6260 = getelementptr inbounds float* %tmp6259, i64 1
+  %tmp6261 = getelementptr inbounds float* %tmp6260, i64 1
+  %tmp6262 = getelementptr inbounds float* %tmp6261, i64 1
+  %tmp6263 = getelementptr inbounds float* %tmp6262, i64 1
+  %tmp6264 = getelementptr inbounds float* %tmp6263, i64 1
+  %tmp6265 = getelementptr inbounds float* %tmp6264, i64 1
+  %tmp6266 = getelementptr inbounds float* %tmp6265, i64 1
+  %tmp6267 = getelementptr inbounds float* %tmp6266, i64 1
+  %tmp6268 = getelementptr inbounds float* %tmp6267, i64 1
+  %tmp6269 = getelementptr inbounds float* %tmp6268, i64 1
+  %tmp6270 = getelementptr inbounds float* %tmp6269, i64 1
+  %tmp6271 = getelementptr inbounds float* %tmp6270, i64 1
+  %tmp6272 = getelementptr inbounds float* %tmp6271, i64 1
+  %tmp6273 = getelementptr inbounds float* %tmp6272, i64 1
+  %tmp6274 = getelementptr inbounds float* %tmp6273, i64 1
+  %tmp6275 = getelementptr inbounds float* %tmp6274, i64 1
+  %tmp6276 = getelementptr inbounds float* %tmp6275, i64 1
+  %tmp6277 = getelementptr inbounds float* %tmp6276, i64 1
+  %tmp6278 = getelementptr inbounds float* %tmp6277, i64 1
+  %tmp6279 = getelementptr inbounds float* %tmp6278, i64 1
+  %tmp6280 = getelementptr inbounds float* %tmp6279, i64 1
+  %tmp6281 = getelementptr inbounds float* %tmp6280, i64 1
+  %tmp6282 = getelementptr inbounds float* %tmp6281, i64 1
+  %tmp6283 = getelementptr inbounds float* %tmp6282, i64 1
+  %tmp6284 = getelementptr inbounds float* %tmp6283, i64 1
+  %tmp6285 = getelementptr inbounds float* %tmp6284, i64 1
+  %tmp6286 = getelementptr inbounds float* %tmp6285, i64 1
+  %tmp6287 = getelementptr inbounds float* %tmp6286, i64 1
+  %tmp6288 = getelementptr inbounds float* %tmp6287, i64 1
+  %tmp6289 = getelementptr inbounds float* %tmp6288, i64 1
+  %tmp6290 = getelementptr inbounds float* %tmp6289, i64 1
+  %tmp6291 = getelementptr inbounds float* %tmp6290, i64 1
+  %tmp6292 = getelementptr inbounds float* %tmp6291, i64 1
+  %tmp6293 = getelementptr inbounds float* %tmp6292, i64 1
+  %tmp6294 = getelementptr inbounds float* %tmp6293, i64 1
+  %tmp6295 = getelementptr inbounds float* %tmp6294, i64 1
+  %tmp6296 = getelementptr inbounds float* %tmp6295, i64 1
+  %tmp6297 = getelementptr inbounds float* %tmp6296, i64 1
+  %tmp6298 = getelementptr inbounds float* %tmp6297, i64 1
+  %tmp6299 = getelementptr inbounds float* %tmp6298, i64 1
+  %tmp6300 = getelementptr inbounds float* %tmp6299, i64 1
+  %tmp6301 = getelementptr inbounds float* %tmp6300, i64 1
+  %tmp6302 = getelementptr inbounds float* %tmp6301, i64 1
+  %tmp6303 = getelementptr inbounds float* %tmp6302, i64 1
+  %tmp6304 = getelementptr inbounds float* %tmp6303, i64 1
+  %tmp6305 = getelementptr inbounds float* %tmp6304, i64 1
+  %tmp6306 = getelementptr inbounds float* %tmp6305, i64 1
+  %tmp6307 = getelementptr inbounds float* %tmp6306, i64 1
+  %tmp6308 = getelementptr inbounds float* %tmp6307, i64 1
+  %tmp6309 = getelementptr inbounds float* %tmp6308, i64 1
+  %tmp6310 = getelementptr inbounds float* %tmp6309, i64 1
+  %tmp6311 = getelementptr inbounds float* %tmp6310, i64 1
+  %tmp6312 = getelementptr inbounds float* %tmp6311, i64 1
+  %tmp6313 = getelementptr inbounds float* %tmp6312, i64 1
+  %tmp6314 = getelementptr inbounds float* %tmp6313, i64 1
+  %tmp6315 = getelementptr inbounds float* %tmp6314, i64 1
+  %tmp6316 = getelementptr inbounds float* %tmp6315, i64 1
+  %tmp6317 = getelementptr inbounds float* %tmp6316, i64 1
+  %tmp6318 = getelementptr inbounds float* %tmp6317, i64 1
+  %tmp6319 = getelementptr inbounds float* %tmp6318, i64 1
+  %tmp6320 = getelementptr inbounds float* %tmp6319, i64 1
+  %tmp6321 = getelementptr inbounds float* %tmp6320, i64 1
+  %tmp6322 = getelementptr inbounds float* %tmp6321, i64 1
+  %tmp6323 = getelementptr inbounds float* %tmp6322, i64 1
+  %tmp6324 = getelementptr inbounds float* %tmp6323, i64 1
+  %tmp6325 = getelementptr inbounds float* %tmp6324, i64 1
+  %tmp6326 = getelementptr inbounds float* %tmp6325, i64 1
+  %tmp6327 = getelementptr inbounds float* %tmp6326, i64 1
+  %tmp6328 = getelementptr inbounds float* %tmp6327, i64 1
+  %tmp6329 = getelementptr inbounds float* %tmp6328, i64 1
+  %tmp6330 = getelementptr inbounds float* %tmp6329, i64 1
+  %tmp6331 = getelementptr inbounds float* %tmp6330, i64 1
+  %tmp6332 = getelementptr inbounds float* %tmp6331, i64 1
+  %tmp6333 = getelementptr inbounds float* %tmp6332, i64 1
+  %tmp6334 = getelementptr inbounds float* %tmp6333, i64 1
+  %tmp6335 = getelementptr inbounds float* %tmp6334, i64 1
+  %tmp6336 = getelementptr inbounds float* %tmp6335, i64 1
+  %tmp6337 = getelementptr inbounds float* %tmp6336, i64 1
+  %tmp6338 = getelementptr inbounds float* %tmp6337, i64 1
+  %tmp6339 = getelementptr inbounds float* %tmp6338, i64 1
+  %tmp6340 = getelementptr inbounds float* %tmp6339, i64 1
+  %tmp6341 = getelementptr inbounds float* %tmp6340, i64 1
+  %tmp6342 = getelementptr inbounds float* %tmp6341, i64 1
+  %tmp6343 = getelementptr inbounds float* %tmp6342, i64 1
+  %tmp6344 = getelementptr inbounds float* %tmp6343, i64 1
+  %tmp6345 = getelementptr inbounds float* %tmp6344, i64 1
+  %tmp6346 = getelementptr inbounds float* %tmp6345, i64 1
+  %tmp6347 = getelementptr inbounds float* %tmp6346, i64 1
+  %tmp6348 = getelementptr inbounds float* %tmp6347, i64 1
+  %tmp6349 = getelementptr inbounds float* %tmp6348, i64 1
+  %tmp6350 = getelementptr inbounds float* %tmp6349, i64 1
+  %tmp6351 = getelementptr inbounds float* %tmp6350, i64 1
+  %tmp6352 = getelementptr inbounds float* %tmp6351, i64 1
+  %tmp6353 = getelementptr inbounds float* %tmp6352, i64 1
+  %tmp6354 = getelementptr inbounds float* %tmp6353, i64 1
+  %tmp6355 = getelementptr inbounds float* %tmp6354, i64 1
+  %tmp6356 = getelementptr inbounds float* %tmp6355, i64 1
+  %tmp6357 = getelementptr inbounds float* %tmp6356, i64 1
+  %tmp6358 = getelementptr inbounds float* %tmp6357, i64 1
+  %tmp6359 = getelementptr inbounds float* %tmp6358, i64 1
+  %tmp6360 = getelementptr inbounds float* %tmp6359, i64 1
+  %tmp6361 = getelementptr inbounds float* %tmp6360, i64 1
+  %tmp6362 = getelementptr inbounds float* %tmp6361, i64 1
+  %tmp6363 = getelementptr inbounds float* %tmp6362, i64 1
+  %tmp6364 = getelementptr inbounds float* %tmp6363, i64 1
+  %tmp6365 = getelementptr inbounds float* %tmp6364, i64 1
+  %tmp6366 = getelementptr inbounds float* %tmp6365, i64 1
+  %tmp6367 = getelementptr inbounds float* %tmp6366, i64 1
+  %tmp6368 = getelementptr inbounds float* %tmp6367, i64 1
+  %tmp6369 = getelementptr inbounds float* %tmp6368, i64 1
+  %tmp6370 = getelementptr inbounds float* %tmp6369, i64 1
+  %tmp6371 = getelementptr inbounds float* %tmp6370, i64 1
+  %tmp6372 = getelementptr inbounds float* %tmp6371, i64 1
+  %tmp6373 = getelementptr inbounds float* %tmp6372, i64 1
+  %tmp6374 = getelementptr inbounds float* %tmp6373, i64 1
+  %tmp6375 = getelementptr inbounds float* %tmp6374, i64 1
+  %tmp6376 = getelementptr inbounds float* %tmp6375, i64 1
+  %tmp6377 = getelementptr inbounds float* %tmp6376, i64 1
+  %tmp6378 = getelementptr inbounds float* %tmp6377, i64 1
+  %tmp6379 = getelementptr inbounds float* %tmp6378, i64 1
+  %tmp6380 = getelementptr inbounds float* %tmp6379, i64 1
+  %tmp6381 = getelementptr inbounds float* %tmp6380, i64 1
+  %tmp6382 = getelementptr inbounds float* %tmp6381, i64 1
+  %tmp6383 = getelementptr inbounds float* %tmp6382, i64 1
+  %tmp6384 = getelementptr inbounds float* %tmp6383, i64 1
+  %tmp6385 = getelementptr inbounds float* %tmp6384, i64 1
+  %tmp6386 = getelementptr inbounds float* %tmp6385, i64 1
+  %tmp6387 = getelementptr inbounds float* %tmp6386, i64 1
+  %tmp6388 = getelementptr inbounds float* %tmp6387, i64 1
+  %tmp6389 = getelementptr inbounds float* %tmp6388, i64 1
+  %tmp6390 = getelementptr inbounds float* %tmp6389, i64 1
+  %tmp6391 = getelementptr inbounds float* %tmp6390, i64 1
+  %tmp6392 = getelementptr inbounds float* %tmp6391, i64 1
+  %tmp6393 = getelementptr inbounds float* %tmp6392, i64 1
+  %tmp6394 = getelementptr inbounds float* %tmp6393, i64 1
+  %tmp6395 = getelementptr inbounds float* %tmp6394, i64 1
+  %tmp6396 = getelementptr inbounds float* %tmp6395, i64 1
+  %tmp6397 = getelementptr inbounds float* %tmp6396, i64 1
+  %tmp6398 = getelementptr inbounds float* %tmp6397, i64 1
+  %tmp6399 = getelementptr inbounds float* %tmp6398, i64 1
+  %tmp6400 = getelementptr inbounds float* %tmp6399, i64 1
+  %tmp6401 = getelementptr inbounds float* %tmp6400, i64 1
+  %tmp6402 = getelementptr inbounds float* %tmp6401, i64 1
+  %tmp6403 = getelementptr inbounds float* %tmp6402, i64 1
+  %tmp6404 = getelementptr inbounds float* %tmp6403, i64 1
+  %tmp6405 = getelementptr inbounds float* %tmp6404, i64 1
+  %tmp6406 = getelementptr inbounds float* %tmp6405, i64 1
+  %tmp6407 = getelementptr inbounds float* %tmp6406, i64 1
+  %tmp6408 = getelementptr inbounds float* %tmp6407, i64 1
+  %tmp6409 = getelementptr inbounds float* %tmp6408, i64 1
+  %tmp6410 = getelementptr inbounds float* %tmp6409, i64 1
+  %tmp6411 = getelementptr inbounds float* %tmp6410, i64 1
+  %tmp6412 = getelementptr inbounds float* %tmp6411, i64 1
+  %tmp6413 = getelementptr inbounds float* %tmp6412, i64 1
+  %tmp6414 = getelementptr inbounds float* %tmp6413, i64 1
+  %tmp6415 = getelementptr inbounds float* %tmp6414, i64 1
+  %tmp6416 = getelementptr inbounds float* %tmp6415, i64 1
+  %tmp6417 = getelementptr inbounds float* %tmp6416, i64 1
+  %tmp6418 = getelementptr inbounds float* %tmp6417, i64 1
+  %tmp6419 = getelementptr inbounds float* %tmp6418, i64 1
+  %tmp6420 = getelementptr inbounds float* %tmp6419, i64 1
+  %tmp6421 = getelementptr inbounds float* %tmp6420, i64 1
+  %tmp6422 = getelementptr inbounds float* %tmp6421, i64 1
+  %tmp6423 = getelementptr inbounds float* %tmp6422, i64 1
+  %tmp6424 = getelementptr inbounds float* %tmp6423, i64 1
+  %tmp6425 = getelementptr inbounds float* %tmp6424, i64 1
+  %tmp6426 = getelementptr inbounds float* %tmp6425, i64 1
+  %tmp6427 = getelementptr inbounds float* %tmp6426, i64 1
+  %tmp6428 = getelementptr inbounds float* %tmp6427, i64 1
+  %tmp6429 = getelementptr inbounds float* %tmp6428, i64 1
+  %tmp6430 = getelementptr inbounds float* %tmp6429, i64 1
+  %tmp6431 = getelementptr inbounds float* %tmp6430, i64 1
+  %tmp6432 = getelementptr inbounds float* %tmp6431, i64 1
+  %tmp6433 = getelementptr inbounds float* %tmp6432, i64 1
+  %tmp6434 = getelementptr inbounds float* %tmp6433, i64 1
+  %tmp6435 = getelementptr inbounds float* %tmp6434, i64 1
+  %tmp6436 = getelementptr inbounds float* %tmp6435, i64 1
+  %tmp6437 = getelementptr inbounds float* %tmp6436, i64 1
+  %tmp6438 = getelementptr inbounds float* %tmp6437, i64 1
+  %tmp6439 = getelementptr inbounds float* %tmp6438, i64 1
+  %tmp6440 = getelementptr inbounds float* %tmp6439, i64 1
+  %tmp6441 = getelementptr inbounds float* %tmp6440, i64 1
+  %tmp6442 = getelementptr inbounds float* %tmp6441, i64 1
+  %tmp6443 = getelementptr inbounds float* %tmp6442, i64 1
+  %tmp6444 = getelementptr inbounds float* %tmp6443, i64 1
+  %tmp6445 = getelementptr inbounds float* %tmp6444, i64 1
+  %tmp6446 = getelementptr inbounds float* %tmp6445, i64 1
+  %tmp6447 = getelementptr inbounds float* %tmp6446, i64 1
+  %tmp6448 = getelementptr inbounds float* %tmp6447, i64 1
+  %tmp6449 = getelementptr inbounds float* %tmp6448, i64 1
+  %tmp6450 = getelementptr inbounds float* %tmp6449, i64 1
+  %tmp6451 = getelementptr inbounds float* %tmp6450, i64 1
+  %tmp6452 = getelementptr inbounds float* %tmp6451, i64 1
+  %tmp6453 = getelementptr inbounds float* %tmp6452, i64 1
+  %tmp6454 = getelementptr inbounds float* %tmp6453, i64 1
+  %tmp6455 = getelementptr inbounds float* %tmp6454, i64 1
+  %tmp6456 = getelementptr inbounds float* %tmp6455, i64 1
+  %tmp6457 = getelementptr inbounds float* %tmp6456, i64 1
+  %tmp6458 = getelementptr inbounds float* %tmp6457, i64 1
+  %tmp6459 = getelementptr inbounds float* %tmp6458, i64 1
+  %tmp6460 = getelementptr inbounds float* %tmp6459, i64 1
+  %tmp6461 = getelementptr inbounds float* %tmp6460, i64 1
+  %tmp6462 = getelementptr inbounds float* %tmp6461, i64 1
+  %tmp6463 = getelementptr inbounds float* %tmp6462, i64 1
+  %tmp6464 = getelementptr inbounds float* %tmp6463, i64 1
+  %tmp6465 = getelementptr inbounds float* %tmp6464, i64 1
+  %tmp6466 = getelementptr inbounds float* %tmp6465, i64 1
+  %tmp6467 = getelementptr inbounds float* %tmp6466, i64 1
+  %tmp6468 = getelementptr inbounds float* %tmp6467, i64 1
+  %tmp6469 = getelementptr inbounds float* %tmp6468, i64 1
+  %tmp6470 = getelementptr inbounds float* %tmp6469, i64 1
+  %tmp6471 = getelementptr inbounds float* %tmp6470, i64 1
+  %tmp6472 = getelementptr inbounds float* %tmp6471, i64 1
+  %tmp6473 = getelementptr inbounds float* %tmp6472, i64 1
+  %tmp6474 = getelementptr inbounds float* %tmp6473, i64 1
+  %tmp6475 = getelementptr inbounds float* %tmp6474, i64 1
+  %tmp6476 = getelementptr inbounds float* %tmp6475, i64 1
+  %tmp6477 = getelementptr inbounds float* %tmp6476, i64 1
+  %tmp6478 = getelementptr inbounds float* %tmp6477, i64 1
+  %tmp6479 = getelementptr inbounds float* %tmp6478, i64 1
+  %tmp6480 = getelementptr inbounds float* %tmp6479, i64 1
+  %tmp6481 = getelementptr inbounds float* %tmp6480, i64 1
+  %tmp6482 = getelementptr inbounds float* %tmp6481, i64 1
+  %tmp6483 = getelementptr inbounds float* %tmp6482, i64 1
+  %tmp6484 = getelementptr inbounds float* %tmp6483, i64 1
+  %tmp6485 = getelementptr inbounds float* %tmp6484, i64 1
+  %tmp6486 = getelementptr inbounds float* %tmp6485, i64 1
+  %tmp6487 = getelementptr inbounds float* %tmp6486, i64 1
+  %tmp6488 = getelementptr inbounds float* %tmp6487, i64 1
+  %tmp6489 = getelementptr inbounds float* %tmp6488, i64 1
+  %tmp6490 = getelementptr inbounds float* %tmp6489, i64 1
+  %tmp6491 = getelementptr inbounds float* %tmp6490, i64 1
+  %tmp6492 = getelementptr inbounds float* %tmp6491, i64 1
+  %tmp6493 = getelementptr inbounds float* %tmp6492, i64 1
+  %tmp6494 = getelementptr inbounds float* %tmp6493, i64 1
+  %tmp6495 = getelementptr inbounds float* %tmp6494, i64 1
+  %tmp6496 = getelementptr inbounds float* %tmp6495, i64 1
+  %tmp6497 = getelementptr inbounds float* %tmp6496, i64 1
+  %tmp6498 = getelementptr inbounds float* %tmp6497, i64 1
+  %tmp6499 = getelementptr inbounds float* %tmp6498, i64 1
+  %tmp6500 = getelementptr inbounds float* %tmp6499, i64 1
+  %tmp6501 = getelementptr inbounds float* %tmp6500, i64 1
+  %tmp6502 = getelementptr inbounds float* %tmp6501, i64 1
+  %tmp6503 = getelementptr inbounds float* %tmp6502, i64 1
+  %tmp6504 = getelementptr inbounds float* %tmp6503, i64 1
+  %tmp6505 = getelementptr inbounds float* %tmp6504, i64 1
+  %tmp6506 = getelementptr inbounds float* %tmp6505, i64 1
+  %tmp6507 = getelementptr inbounds float* %tmp6506, i64 1
+  %tmp6508 = getelementptr inbounds float* %tmp6507, i64 1
+  %tmp6509 = getelementptr inbounds float* %tmp6508, i64 1
+  %tmp6510 = getelementptr inbounds float* %tmp6509, i64 1
+  %tmp6511 = getelementptr inbounds float* %tmp6510, i64 1
+  %tmp6512 = getelementptr inbounds float* %tmp6511, i64 1
+  %tmp6513 = getelementptr inbounds float* %tmp6512, i64 1
+  %tmp6514 = getelementptr inbounds float* %tmp6513, i64 1
+  %tmp6515 = getelementptr inbounds float* %tmp6514, i64 1
+  %tmp6516 = getelementptr inbounds float* %tmp6515, i64 1
+  %tmp6517 = getelementptr inbounds float* %tmp6516, i64 1
+  %tmp6518 = getelementptr inbounds float* %tmp6517, i64 1
+  %tmp6519 = getelementptr inbounds float* %tmp6518, i64 1
+  %tmp6520 = getelementptr inbounds float* %tmp6519, i64 1
+  %tmp6521 = getelementptr inbounds float* %tmp6520, i64 1
+  %tmp6522 = getelementptr inbounds float* %tmp6521, i64 1
+  %tmp6523 = getelementptr inbounds float* %tmp6522, i64 1
+  %tmp6524 = getelementptr inbounds float* %tmp6523, i64 1
+  %tmp6525 = getelementptr inbounds float* %tmp6524, i64 1
+  %tmp6526 = getelementptr inbounds float* %tmp6525, i64 1
+  %tmp6527 = getelementptr inbounds float* %tmp6526, i64 1
+  %tmp6528 = getelementptr inbounds float* %tmp6527, i64 1
+  %tmp6529 = getelementptr inbounds float* %tmp6528, i64 1
+  %tmp6530 = getelementptr inbounds float* %tmp6529, i64 1
+  %tmp6531 = getelementptr inbounds float* %tmp6530, i64 1
+  %tmp6532 = getelementptr inbounds float* %tmp6531, i64 1
+  %tmp6533 = getelementptr inbounds float* %tmp6532, i64 1
+  %tmp6534 = getelementptr inbounds float* %tmp6533, i64 1
+  %tmp6535 = getelementptr inbounds float* %tmp6534, i64 1
+  %tmp6536 = getelementptr inbounds float* %tmp6535, i64 1
+  %tmp6537 = getelementptr inbounds float* %tmp6536, i64 1
+  %tmp6538 = getelementptr inbounds float* %tmp6537, i64 1
+  %tmp6539 = getelementptr inbounds float* %tmp6538, i64 1
+  %tmp6540 = getelementptr inbounds float* %tmp6539, i64 1
+  %tmp6541 = getelementptr inbounds float* %tmp6540, i64 1
+  %tmp6542 = getelementptr inbounds float* %tmp6541, i64 1
+  %tmp6543 = getelementptr inbounds float* %tmp6542, i64 1
+  %tmp6544 = getelementptr inbounds float* %tmp6543, i64 1
+  %tmp6545 = getelementptr inbounds float* %tmp6544, i64 1
+  %tmp6546 = getelementptr inbounds float* %tmp6545, i64 1
+  %tmp6547 = getelementptr inbounds float* %tmp6546, i64 1
+  %tmp6548 = getelementptr inbounds float* %tmp6547, i64 1
+  %tmp6549 = getelementptr inbounds float* %tmp6548, i64 1
+  %tmp6550 = getelementptr inbounds float* %tmp6549, i64 1
+  %tmp6551 = getelementptr inbounds float* %tmp6550, i64 1
+  %tmp6552 = getelementptr inbounds float* %tmp6551, i64 1
+  %tmp6553 = getelementptr inbounds float* %tmp6552, i64 1
+  %tmp6554 = getelementptr inbounds float* %tmp6553, i64 1
+  %tmp6555 = getelementptr inbounds float* %tmp6554, i64 1
+  %tmp6556 = getelementptr inbounds float* %tmp6555, i64 1
+  %tmp6557 = getelementptr inbounds float* %tmp6556, i64 1
+  %tmp6558 = getelementptr inbounds float* %tmp6557, i64 1
+  %tmp6559 = getelementptr inbounds float* %tmp6558, i64 1
+  %tmp6560 = getelementptr inbounds float* %tmp6559, i64 1
+  %tmp6561 = getelementptr inbounds float* %tmp6560, i64 1
+  %tmp6562 = getelementptr inbounds float* %tmp6561, i64 1
+  %tmp6563 = getelementptr inbounds float* %tmp6562, i64 1
+  %tmp6564 = getelementptr inbounds float* %tmp6563, i64 1
+  %tmp6565 = getelementptr inbounds float* %tmp6564, i64 1
+  %tmp6566 = getelementptr inbounds float* %tmp6565, i64 1
+  %tmp6567 = getelementptr inbounds float* %tmp6566, i64 1
+  %tmp6568 = getelementptr inbounds float* %tmp6567, i64 1
+  %tmp6569 = getelementptr inbounds float* %tmp6568, i64 1
+  %tmp6570 = getelementptr inbounds float* %tmp6569, i64 1
+  %tmp6571 = getelementptr inbounds float* %tmp6570, i64 1
+  %tmp6572 = getelementptr inbounds float* %tmp6571, i64 1
+  %tmp6573 = getelementptr inbounds float* %tmp6572, i64 1
+  %tmp6574 = getelementptr inbounds float* %tmp6573, i64 1
+  %tmp6575 = getelementptr inbounds float* %tmp6574, i64 1
+  %tmp6576 = getelementptr inbounds float* %tmp6575, i64 1
+  %tmp6577 = getelementptr inbounds float* %tmp6576, i64 1
+  %tmp6578 = getelementptr inbounds float* %tmp6577, i64 1
+  %tmp6579 = getelementptr inbounds float* %tmp6578, i64 1
+  %tmp6580 = getelementptr inbounds float* %tmp6579, i64 1
+  %tmp6581 = getelementptr inbounds float* %tmp6580, i64 1
+  %tmp6582 = getelementptr inbounds float* %tmp6581, i64 1
+  %tmp6583 = getelementptr inbounds float* %tmp6582, i64 1
+  %tmp6584 = getelementptr inbounds float* %tmp6583, i64 1
+  %tmp6585 = getelementptr inbounds float* %tmp6584, i64 1
+  %tmp6586 = getelementptr inbounds float* %tmp6585, i64 1
+  %tmp6587 = getelementptr inbounds float* %tmp6586, i64 1
+  %tmp6588 = getelementptr inbounds float* %tmp6587, i64 1
+  %tmp6589 = getelementptr inbounds float* %tmp6588, i64 1
+  %tmp6590 = getelementptr inbounds float* %tmp6589, i64 1
+  %tmp6591 = getelementptr inbounds float* %tmp6590, i64 1
+  %tmp6592 = getelementptr inbounds float* %tmp6591, i64 1
+  %tmp6593 = getelementptr inbounds float* %tmp6592, i64 1
+  %tmp6594 = getelementptr inbounds float* %tmp6593, i64 1
+  %tmp6595 = getelementptr inbounds float* %tmp6594, i64 1
+  %tmp6596 = getelementptr inbounds float* %tmp6595, i64 1
+  %tmp6597 = getelementptr inbounds float* %tmp6596, i64 1
+  %tmp6598 = getelementptr inbounds float* %tmp6597, i64 1
+  %tmp6599 = getelementptr inbounds float* %tmp6598, i64 1
+  %tmp6600 = getelementptr inbounds float* %tmp6599, i64 1
+  %tmp6601 = getelementptr inbounds float* %tmp6600, i64 1
+  %tmp6602 = getelementptr inbounds float* %tmp6601, i64 1
+  %tmp6603 = getelementptr inbounds float* %tmp6602, i64 1
+  %tmp6604 = getelementptr inbounds float* %tmp6603, i64 1
+  %tmp6605 = getelementptr inbounds float* %tmp6604, i64 1
+  %tmp6606 = getelementptr inbounds float* %tmp6605, i64 1
+  %tmp6607 = getelementptr inbounds float* %tmp6606, i64 1
+  %tmp6608 = getelementptr inbounds float* %tmp6607, i64 1
+  %tmp6609 = getelementptr inbounds float* %tmp6608, i64 1
+  %tmp6610 = getelementptr inbounds float* %tmp6609, i64 1
+  %tmp6611 = getelementptr inbounds float* %tmp6610, i64 1
+  %tmp6612 = getelementptr inbounds float* %tmp6611, i64 1
+  %tmp6613 = getelementptr inbounds float* %tmp6612, i64 1
+  %tmp6614 = getelementptr inbounds float* %tmp6613, i64 1
+  %tmp6615 = getelementptr inbounds float* %tmp6614, i64 1
+  %tmp6616 = getelementptr inbounds float* %tmp6615, i64 1
+  %tmp6617 = getelementptr inbounds float* %tmp6616, i64 1
+  %tmp6618 = getelementptr inbounds float* %tmp6617, i64 1
+  %tmp6619 = getelementptr inbounds float* %tmp6618, i64 1
+  %tmp6620 = getelementptr inbounds float* %tmp6619, i64 1
+  %tmp6621 = getelementptr inbounds float* %tmp6620, i64 1
+  %tmp6622 = getelementptr inbounds float* %tmp6621, i64 1
+  %tmp6623 = getelementptr inbounds float* %tmp6622, i64 1
+  %tmp6624 = getelementptr inbounds float* %tmp6623, i64 1
+  %tmp6625 = getelementptr inbounds float* %tmp6624, i64 1
+  %tmp6626 = getelementptr inbounds float* %tmp6625, i64 1
+  %tmp6627 = getelementptr inbounds float* %tmp6626, i64 1
+  %tmp6628 = getelementptr inbounds float* %tmp6627, i64 1
+  %tmp6629 = getelementptr inbounds float* %tmp6628, i64 1
+  %tmp6630 = getelementptr inbounds float* %tmp6629, i64 1
+  %tmp6631 = getelementptr inbounds float* %tmp6630, i64 1
+  %tmp6632 = getelementptr inbounds float* %tmp6631, i64 1
+  %tmp6633 = getelementptr inbounds float* %tmp6632, i64 1
+  %tmp6634 = getelementptr inbounds float* %tmp6633, i64 1
+  %tmp6635 = getelementptr inbounds float* %tmp6634, i64 1
+  %tmp6636 = getelementptr inbounds float* %tmp6635, i64 1
+  %tmp6637 = getelementptr inbounds float* %tmp6636, i64 1
+  %tmp6638 = getelementptr inbounds float* %tmp6637, i64 1
+  %tmp6639 = getelementptr inbounds float* %tmp6638, i64 1
+  %tmp6640 = getelementptr inbounds float* %tmp6639, i64 1
+  %tmp6641 = getelementptr inbounds float* %tmp6640, i64 1
+  %tmp6642 = getelementptr inbounds float* %tmp6641, i64 1
+  %tmp6643 = getelementptr inbounds float* %tmp6642, i64 1
+  %tmp6644 = getelementptr inbounds float* %tmp6643, i64 1
+  %tmp6645 = getelementptr inbounds float* %tmp6644, i64 1
+  %tmp6646 = getelementptr inbounds float* %tmp6645, i64 1
+  %tmp6647 = getelementptr inbounds float* %tmp6646, i64 1
+  %tmp6648 = getelementptr inbounds float* %tmp6647, i64 1
+  %tmp6649 = getelementptr inbounds float* %tmp6648, i64 1
+  %tmp6650 = getelementptr inbounds float* %tmp6649, i64 1
+  %tmp6651 = getelementptr inbounds float* %tmp6650, i64 1
+  %tmp6652 = getelementptr inbounds float* %tmp6651, i64 1
+  %tmp6653 = getelementptr inbounds float* %tmp6652, i64 1
+  %tmp6654 = getelementptr inbounds float* %tmp6653, i64 1
+  %tmp6655 = getelementptr inbounds float* %tmp6654, i64 1
+  %tmp6656 = getelementptr inbounds float* %tmp6655, i64 1
+  %tmp6657 = getelementptr inbounds float* %tmp6656, i64 1
+  %tmp6658 = getelementptr inbounds float* %tmp6657, i64 1
+  %tmp6659 = getelementptr inbounds float* %tmp6658, i64 1
+  %tmp6660 = getelementptr inbounds float* %tmp6659, i64 1
+  %tmp6661 = getelementptr inbounds float* %tmp6660, i64 1
+  %tmp6662 = getelementptr inbounds float* %tmp6661, i64 1
+  %tmp6663 = getelementptr inbounds float* %tmp6662, i64 1
+  %tmp6664 = getelementptr inbounds float* %tmp6663, i64 1
+  %tmp6665 = getelementptr inbounds float* %tmp6664, i64 1
+  %tmp6666 = getelementptr inbounds float* %tmp6665, i64 1
+  %tmp6667 = getelementptr inbounds float* %tmp6666, i64 1
+  %tmp6668 = getelementptr inbounds float* %tmp6667, i64 1
+  %tmp6669 = getelementptr inbounds float* %tmp6668, i64 1
+  %tmp6670 = getelementptr inbounds float* %tmp6669, i64 1
+  %tmp6671 = getelementptr inbounds float* %tmp6670, i64 1
+  %tmp6672 = getelementptr inbounds float* %tmp6671, i64 1
+  %tmp6673 = getelementptr inbounds float* %tmp6672, i64 1
+  %tmp6674 = getelementptr inbounds float* %tmp6673, i64 1
+  %tmp6675 = getelementptr inbounds float* %tmp6674, i64 1
+  %tmp6676 = getelementptr inbounds float* %tmp6675, i64 1
+  %tmp6677 = getelementptr inbounds float* %tmp6676, i64 1
+  %tmp6678 = getelementptr inbounds float* %tmp6677, i64 1
+  %tmp6679 = getelementptr inbounds float* %tmp6678, i64 1
+  %tmp6680 = getelementptr inbounds float* %tmp6679, i64 1
+  %tmp6681 = getelementptr inbounds float* %tmp6680, i64 1
+  %tmp6682 = getelementptr inbounds float* %tmp6681, i64 1
+  %tmp6683 = getelementptr inbounds float* %tmp6682, i64 1
+  %tmp6684 = getelementptr inbounds float* %tmp6683, i64 1
+  %tmp6685 = getelementptr inbounds float* %tmp6684, i64 1
+  %tmp6686 = getelementptr inbounds float* %tmp6685, i64 1
+  %tmp6687 = getelementptr inbounds float* %tmp6686, i64 1
+  %tmp6688 = getelementptr inbounds float* %tmp6687, i64 1
+  %tmp6689 = getelementptr inbounds float* %tmp6688, i64 1
+  %tmp6690 = getelementptr inbounds float* %tmp6689, i64 1
+  %tmp6691 = getelementptr inbounds float* %tmp6690, i64 1
+  %tmp6692 = getelementptr inbounds float* %tmp6691, i64 1
+  %tmp6693 = getelementptr inbounds float* %tmp6692, i64 1
+  %tmp6694 = getelementptr inbounds float* %tmp6693, i64 1
+  %tmp6695 = getelementptr inbounds float* %tmp6694, i64 1
+  %tmp6696 = getelementptr inbounds float* %tmp6695, i64 1
+  %tmp6697 = getelementptr inbounds float* %tmp6696, i64 1
+  %tmp6698 = getelementptr inbounds float* %tmp6697, i64 1
+  %tmp6699 = getelementptr inbounds float* %tmp6698, i64 1
+  %tmp6700 = getelementptr inbounds float* %tmp6699, i64 1
+  %tmp6701 = getelementptr inbounds float* %tmp6700, i64 1
+  %tmp6702 = getelementptr inbounds float* %tmp6701, i64 1
+  %tmp6703 = getelementptr inbounds float* %tmp6702, i64 1
+  %tmp6704 = getelementptr inbounds float* %tmp6703, i64 1
+  %tmp6705 = getelementptr inbounds float* %tmp6704, i64 1
+  %tmp6706 = getelementptr inbounds float* %tmp6705, i64 1
+  %tmp6707 = getelementptr inbounds float* %tmp6706, i64 1
+  %tmp6708 = getelementptr inbounds float* %tmp6707, i64 1
+  %tmp6709 = getelementptr inbounds float* %tmp6708, i64 1
+  %tmp6710 = getelementptr inbounds float* %tmp6709, i64 1
+  %tmp6711 = getelementptr inbounds float* %tmp6710, i64 1
+  %tmp6712 = getelementptr inbounds float* %tmp6711, i64 1
+  %tmp6713 = getelementptr inbounds float* %tmp6712, i64 1
+  %tmp6714 = getelementptr inbounds float* %tmp6713, i64 1
+  %tmp6715 = getelementptr inbounds float* %tmp6714, i64 1
+  %tmp6716 = getelementptr inbounds float* %tmp6715, i64 1
+  %tmp6717 = getelementptr inbounds float* %tmp6716, i64 1
+  %tmp6718 = getelementptr inbounds float* %tmp6717, i64 1
+  %tmp6719 = getelementptr inbounds float* %tmp6718, i64 1
+  %tmp6720 = getelementptr inbounds float* %tmp6719, i64 1
+  %tmp6721 = getelementptr inbounds float* %tmp6720, i64 1
+  %tmp6722 = getelementptr inbounds float* %tmp6721, i64 1
+  %tmp6723 = getelementptr inbounds float* %tmp6722, i64 1
+  %tmp6724 = getelementptr inbounds float* %tmp6723, i64 1
+  %tmp6725 = getelementptr inbounds float* %tmp6724, i64 1
+  %tmp6726 = getelementptr inbounds float* %tmp6725, i64 1
+  %tmp6727 = getelementptr inbounds float* %tmp6726, i64 1
+  %tmp6728 = getelementptr inbounds float* %tmp6727, i64 1
+  %tmp6729 = getelementptr inbounds float* %tmp6728, i64 1
+  %tmp6730 = getelementptr inbounds float* %tmp6729, i64 1
+  %tmp6731 = getelementptr inbounds float* %tmp6730, i64 1
+  %tmp6732 = getelementptr inbounds float* %tmp6731, i64 1
+  %tmp6733 = getelementptr inbounds float* %tmp6732, i64 1
+  %tmp6734 = getelementptr inbounds float* %tmp6733, i64 1
+  %tmp6735 = getelementptr inbounds float* %tmp6734, i64 1
+  %tmp6736 = getelementptr inbounds float* %tmp6735, i64 1
+  %tmp6737 = getelementptr inbounds float* %tmp6736, i64 1
+  %tmp6738 = getelementptr inbounds float* %tmp6737, i64 1
+  %tmp6739 = getelementptr inbounds float* %tmp6738, i64 1
+  %tmp6740 = getelementptr inbounds float* %tmp6739, i64 1
+  %tmp6741 = getelementptr inbounds float* %tmp6740, i64 1
+  %tmp6742 = getelementptr inbounds float* %tmp6741, i64 1
+  %tmp6743 = getelementptr inbounds float* %tmp6742, i64 1
+  %tmp6744 = getelementptr inbounds float* %tmp6743, i64 1
+  %tmp6745 = getelementptr inbounds float* %tmp6744, i64 1
+  %tmp6746 = getelementptr inbounds float* %tmp6745, i64 1
+  %tmp6747 = getelementptr inbounds float* %tmp6746, i64 1
+  %tmp6748 = getelementptr inbounds float* %tmp6747, i64 1
+  %tmp6749 = getelementptr inbounds float* %tmp6748, i64 1
+  %tmp6750 = getelementptr inbounds float* %tmp6749, i64 1
+  %tmp6751 = getelementptr inbounds float* %tmp6750, i64 1
+  %tmp6752 = getelementptr inbounds float* %tmp6751, i64 1
+  %tmp6753 = getelementptr inbounds float* %tmp6752, i64 1
+  %tmp6754 = getelementptr inbounds float* %tmp6753, i64 1
+  %tmp6755 = getelementptr inbounds float* %tmp6754, i64 1
+  %tmp6756 = getelementptr inbounds float* %tmp6755, i64 1
+  %tmp6757 = getelementptr inbounds float* %tmp6756, i64 1
+  %tmp6758 = getelementptr inbounds float* %tmp6757, i64 1
+  %tmp6759 = getelementptr inbounds float* %tmp6758, i64 1
+  %tmp6760 = getelementptr inbounds float* %tmp6759, i64 1
+  %tmp6761 = getelementptr inbounds float* %tmp6760, i64 1
+  %tmp6762 = getelementptr inbounds float* %tmp6761, i64 1
+  %tmp6763 = getelementptr inbounds float* %tmp6762, i64 1
+  %tmp6764 = getelementptr inbounds float* %tmp6763, i64 1
+  %tmp6765 = getelementptr inbounds float* %tmp6764, i64 1
+  %tmp6766 = getelementptr inbounds float* %tmp6765, i64 1
+  %tmp6767 = getelementptr inbounds float* %tmp6766, i64 1
+  %tmp6768 = getelementptr inbounds float* %tmp6767, i64 1
+  %tmp6769 = getelementptr inbounds float* %tmp6768, i64 1
+  %tmp6770 = getelementptr inbounds float* %tmp6769, i64 1
+  %tmp6771 = getelementptr inbounds float* %tmp6770, i64 1
+  %tmp6772 = getelementptr inbounds float* %tmp6771, i64 1
+  %tmp6773 = getelementptr inbounds float* %tmp6772, i64 1
+  %tmp6774 = getelementptr inbounds float* %tmp6773, i64 1
+  %tmp6775 = getelementptr inbounds float* %tmp6774, i64 1
+  %tmp6776 = getelementptr inbounds float* %tmp6775, i64 1
+  %tmp6777 = getelementptr inbounds float* %tmp6776, i64 1
+  %tmp6778 = getelementptr inbounds float* %tmp6777, i64 1
+  %tmp6779 = getelementptr inbounds float* %tmp6778, i64 1
+  %tmp6780 = getelementptr inbounds float* %tmp6779, i64 1
+  %tmp6781 = getelementptr inbounds float* %tmp6780, i64 1
+  %tmp6782 = getelementptr inbounds float* %tmp6781, i64 1
+  %tmp6783 = getelementptr inbounds float* %tmp6782, i64 1
+  %tmp6784 = getelementptr inbounds float* %tmp6783, i64 1
+  %tmp6785 = getelementptr inbounds float* %tmp6784, i64 1
+  %tmp6786 = getelementptr inbounds float* %tmp6785, i64 1
+  %tmp6787 = getelementptr inbounds float* %tmp6786, i64 1
+  %tmp6788 = getelementptr inbounds float* %tmp6787, i64 1
+  %tmp6789 = getelementptr inbounds float* %tmp6788, i64 1
+  %tmp6790 = getelementptr inbounds float* %tmp6789, i64 1
+  %tmp6791 = getelementptr inbounds float* %tmp6790, i64 1
+  %tmp6792 = getelementptr inbounds float* %tmp6791, i64 1
+  %tmp6793 = getelementptr inbounds float* %tmp6792, i64 1
+  %tmp6794 = getelementptr inbounds float* %tmp6793, i64 1
+  %tmp6795 = getelementptr inbounds float* %tmp6794, i64 1
+  %tmp6796 = getelementptr inbounds float* %tmp6795, i64 1
+  %tmp6797 = getelementptr inbounds float* %tmp6796, i64 1
+  %tmp6798 = getelementptr inbounds float* %tmp6797, i64 1
+  %tmp6799 = getelementptr inbounds float* %tmp6798, i64 1
+  %tmp6800 = getelementptr inbounds float* %tmp6799, i64 1
+  %tmp6801 = getelementptr inbounds float* %tmp6800, i64 1
+  %tmp6802 = getelementptr inbounds float* %tmp6801, i64 1
+  %tmp6803 = getelementptr inbounds float* %tmp6802, i64 1
+  %tmp6804 = getelementptr inbounds float* %tmp6803, i64 1
+  %tmp6805 = getelementptr inbounds float* %tmp6804, i64 1
+  %tmp6806 = getelementptr inbounds float* %tmp6805, i64 1
+  %tmp6807 = getelementptr inbounds float* %tmp6806, i64 1
+  %tmp6808 = getelementptr inbounds float* %tmp6807, i64 1
+  %tmp6809 = getelementptr inbounds float* %tmp6808, i64 1
+  %tmp6810 = getelementptr inbounds float* %tmp6809, i64 1
+  %tmp6811 = getelementptr inbounds float* %tmp6810, i64 1
+  %tmp6812 = getelementptr inbounds float* %tmp6811, i64 1
+  %tmp6813 = getelementptr inbounds float* %tmp6812, i64 1
+  %tmp6814 = getelementptr inbounds float* %tmp6813, i64 1
+  %tmp6815 = getelementptr inbounds float* %tmp6814, i64 1
+  %tmp6816 = getelementptr inbounds float* %tmp6815, i64 1
+  %tmp6817 = getelementptr inbounds float* %tmp6816, i64 1
+  %tmp6818 = getelementptr inbounds float* %tmp6817, i64 1
+  %tmp6819 = getelementptr inbounds float* %tmp6818, i64 1
+  %tmp6820 = getelementptr inbounds float* %tmp6819, i64 1
+  %tmp6821 = getelementptr inbounds float* %tmp6820, i64 1
+  %tmp6822 = getelementptr inbounds float* %tmp6821, i64 1
+  %tmp6823 = getelementptr inbounds float* %tmp6822, i64 1
+  %tmp6824 = getelementptr inbounds float* %tmp6823, i64 1
+  %tmp6825 = getelementptr inbounds float* %tmp6824, i64 1
+  %tmp6826 = getelementptr inbounds float* %tmp6825, i64 1
+  %tmp6827 = getelementptr inbounds float* %tmp6826, i64 1
+  %tmp6828 = getelementptr inbounds float* %tmp6827, i64 1
+  %tmp6829 = getelementptr inbounds float* %tmp6828, i64 1
+  %tmp6830 = getelementptr inbounds float* %tmp6829, i64 1
+  %tmp6831 = getelementptr inbounds float* %tmp6830, i64 1
+  %tmp6832 = getelementptr inbounds float* %tmp6831, i64 1
+  %tmp6833 = getelementptr inbounds float* %tmp6832, i64 1
+  %tmp6834 = getelementptr inbounds float* %tmp6833, i64 1
+  %tmp6835 = getelementptr inbounds float* %tmp6834, i64 1
+  %tmp6836 = getelementptr inbounds float* %tmp6835, i64 1
+  %tmp6837 = getelementptr inbounds float* %tmp6836, i64 1
+  %tmp6838 = getelementptr inbounds float* %tmp6837, i64 1
+  %tmp6839 = getelementptr inbounds float* %tmp6838, i64 1
+  %tmp6840 = getelementptr inbounds float* %tmp6839, i64 1
+  %tmp6841 = getelementptr inbounds float* %tmp6840, i64 1
+  %tmp6842 = getelementptr inbounds float* %tmp6841, i64 1
+  %tmp6843 = getelementptr inbounds float* %tmp6842, i64 1
+  %tmp6844 = getelementptr inbounds float* %tmp6843, i64 1
+  %tmp6845 = getelementptr inbounds float* %tmp6844, i64 1
+  %tmp6846 = getelementptr inbounds float* %tmp6845, i64 1
+  %tmp6847 = getelementptr inbounds float* %tmp6846, i64 1
+  %tmp6848 = getelementptr inbounds float* %tmp6847, i64 1
+  %tmp6849 = getelementptr inbounds float* %tmp6848, i64 1
+  %tmp6850 = getelementptr inbounds float* %tmp6849, i64 1
+  %tmp6851 = getelementptr inbounds float* %tmp6850, i64 1
+  %tmp6852 = getelementptr inbounds float* %tmp6851, i64 1
+  %tmp6853 = getelementptr inbounds float* %tmp6852, i64 1
+  %tmp6854 = getelementptr inbounds float* %tmp6853, i64 1
+  %tmp6855 = getelementptr inbounds float* %tmp6854, i64 1
+  %tmp6856 = getelementptr inbounds float* %tmp6855, i64 1
+  %tmp6857 = getelementptr inbounds float* %tmp6856, i64 1
+  %tmp6858 = getelementptr inbounds float* %tmp6857, i64 1
+  %tmp6859 = getelementptr inbounds float* %tmp6858, i64 1
+  %tmp6860 = getelementptr inbounds float* %tmp6859, i64 1
+  %tmp6861 = getelementptr inbounds float* %tmp6860, i64 1
+  %tmp6862 = getelementptr inbounds float* %tmp6861, i64 1
+  %tmp6863 = getelementptr inbounds float* %tmp6862, i64 1
+  %tmp6864 = getelementptr inbounds float* %tmp6863, i64 1
+  %tmp6865 = getelementptr inbounds float* %tmp6864, i64 1
+  %tmp6866 = getelementptr inbounds float* %tmp6865, i64 1
+  %tmp6867 = getelementptr inbounds float* %tmp6866, i64 1
+  %tmp6868 = getelementptr inbounds float* %tmp6867, i64 1
+  %tmp6869 = getelementptr inbounds float* %tmp6868, i64 1
+  %tmp6870 = getelementptr inbounds float* %tmp6869, i64 1
+  %tmp6871 = getelementptr inbounds float* %tmp6870, i64 1
+  %tmp6872 = getelementptr inbounds float* %tmp6871, i64 1
+  %tmp6873 = getelementptr inbounds float* %tmp6872, i64 1
+  %tmp6874 = getelementptr inbounds float* %tmp6873, i64 1
+  %tmp6875 = getelementptr inbounds float* %tmp6874, i64 1
+  %tmp6876 = getelementptr inbounds float* %tmp6875, i64 1
+  %tmp6877 = getelementptr inbounds float* %tmp6876, i64 1
+  %tmp6878 = getelementptr inbounds float* %tmp6877, i64 1
+  %tmp6879 = getelementptr inbounds float* %tmp6878, i64 1
+  %tmp6880 = getelementptr inbounds float* %tmp6879, i64 1
+  %tmp6881 = getelementptr inbounds float* %tmp6880, i64 1
+  %tmp6882 = getelementptr inbounds float* %tmp6881, i64 1
+  %tmp6883 = getelementptr inbounds float* %tmp6882, i64 1
+  %tmp6884 = getelementptr inbounds float* %tmp6883, i64 1
+  %tmp6885 = getelementptr inbounds float* %tmp6884, i64 1
+  %tmp6886 = getelementptr inbounds float* %tmp6885, i64 1
+  %tmp6887 = getelementptr inbounds float* %tmp6886, i64 1
+  %tmp6888 = getelementptr inbounds float* %tmp6887, i64 1
+  %tmp6889 = getelementptr inbounds float* %tmp6888, i64 1
+  %tmp6890 = getelementptr inbounds float* %tmp6889, i64 1
+  %tmp6891 = getelementptr inbounds float* %tmp6890, i64 1
+  %tmp6892 = getelementptr inbounds float* %tmp6891, i64 1
+  %tmp6893 = getelementptr inbounds float* %tmp6892, i64 1
+  %tmp6894 = getelementptr inbounds float* %tmp6893, i64 1
+  %tmp6895 = getelementptr inbounds float* %tmp6894, i64 1
+  %tmp6896 = getelementptr inbounds float* %tmp6895, i64 1
+  %tmp6897 = getelementptr inbounds float* %tmp6896, i64 1
+  %tmp6898 = getelementptr inbounds float* %tmp6897, i64 1
+  %tmp6899 = getelementptr inbounds float* %tmp6898, i64 1
+  %tmp6900 = getelementptr inbounds float* %tmp6899, i64 1
+  %tmp6901 = getelementptr inbounds float* %tmp6900, i64 1
+  %tmp6902 = getelementptr inbounds float* %tmp6901, i64 1
+  %tmp6903 = getelementptr inbounds float* %tmp6902, i64 1
+  %tmp6904 = getelementptr inbounds float* %tmp6903, i64 1
+  %tmp6905 = getelementptr inbounds float* %tmp6904, i64 1
+  %tmp6906 = getelementptr inbounds float* %tmp6905, i64 1
+  %tmp6907 = getelementptr inbounds float* %tmp6906, i64 1
+  %tmp6908 = getelementptr inbounds float* %tmp6907, i64 1
+  %tmp6909 = getelementptr inbounds float* %tmp6908, i64 1
+  %tmp6910 = getelementptr inbounds float* %tmp6909, i64 1
+  %tmp6911 = getelementptr inbounds float* %tmp6910, i64 1
+  %tmp6912 = getelementptr inbounds float* %tmp6911, i64 1
+  %tmp6913 = getelementptr inbounds float* %tmp6912, i64 1
+  %tmp6914 = getelementptr inbounds float* %tmp6913, i64 1
+  %tmp6915 = getelementptr inbounds float* %tmp6914, i64 1
+  %tmp6916 = getelementptr inbounds float* %tmp6915, i64 1
+  %tmp6917 = getelementptr inbounds float* %tmp6916, i64 1
+  %tmp6918 = getelementptr inbounds float* %tmp6917, i64 1
+  %tmp6919 = getelementptr inbounds float* %tmp6918, i64 1
+  %tmp6920 = getelementptr inbounds float* %tmp6919, i64 1
+  %tmp6921 = getelementptr inbounds float* %tmp6920, i64 1
+  %tmp6922 = getelementptr inbounds float* %tmp6921, i64 1
+  %tmp6923 = getelementptr inbounds float* %tmp6922, i64 1
+  %tmp6924 = getelementptr inbounds float* %tmp6923, i64 1
+  %tmp6925 = getelementptr inbounds float* %tmp6924, i64 1
+  %tmp6926 = getelementptr inbounds float* %tmp6925, i64 1
+  %tmp6927 = getelementptr inbounds float* %tmp6926, i64 1
+  %tmp6928 = getelementptr inbounds float* %tmp6927, i64 1
+  %tmp6929 = getelementptr inbounds float* %tmp6928, i64 1
+  %tmp6930 = getelementptr inbounds float* %tmp6929, i64 1
+  %tmp6931 = getelementptr inbounds float* %tmp6930, i64 1
+  %tmp6932 = getelementptr inbounds float* %tmp6931, i64 1
+  %tmp6933 = getelementptr inbounds float* %tmp6932, i64 1
+  %tmp6934 = getelementptr inbounds float* %tmp6933, i64 1
+  %tmp6935 = getelementptr inbounds float* %tmp6934, i64 1
+  %tmp6936 = getelementptr inbounds float* %tmp6935, i64 1
+  %tmp6937 = getelementptr inbounds float* %tmp6936, i64 1
+  %tmp6938 = getelementptr inbounds float* %tmp6937, i64 1
+  %tmp6939 = getelementptr inbounds float* %tmp6938, i64 1
+  %tmp6940 = getelementptr inbounds float* %tmp6939, i64 1
+  %tmp6941 = getelementptr inbounds float* %tmp6940, i64 1
+  %tmp6942 = getelementptr inbounds float* %tmp6941, i64 1
+  %tmp6943 = getelementptr inbounds float* %tmp6942, i64 1
+  %tmp6944 = getelementptr inbounds float* %tmp6943, i64 1
+  %tmp6945 = getelementptr inbounds float* %tmp6944, i64 1
+  %tmp6946 = getelementptr inbounds float* %tmp6945, i64 1
+  %tmp6947 = getelementptr inbounds float* %tmp6946, i64 1
+  %tmp6948 = getelementptr inbounds float* %tmp6947, i64 1
+  %tmp6949 = getelementptr inbounds float* %tmp6948, i64 1
+  %tmp6950 = getelementptr inbounds float* %tmp6949, i64 1
+  %tmp6951 = getelementptr inbounds float* %tmp6950, i64 1
+  %tmp6952 = getelementptr inbounds float* %tmp6951, i64 1
+  %tmp6953 = getelementptr inbounds float* %tmp6952, i64 1
+  %tmp6954 = getelementptr inbounds float* %tmp6953, i64 1
+  %tmp6955 = getelementptr inbounds float* %tmp6954, i64 1
+  %tmp6956 = getelementptr inbounds float* %tmp6955, i64 1
+  %tmp6957 = getelementptr inbounds float* %tmp6956, i64 1
+  %tmp6958 = getelementptr inbounds float* %tmp6957, i64 1
+  %tmp6959 = getelementptr inbounds float* %tmp6958, i64 1
+  %tmp6960 = getelementptr inbounds float* %tmp6959, i64 1
+  %tmp6961 = getelementptr inbounds float* %tmp6960, i64 1
+  %tmp6962 = getelementptr inbounds float* %tmp6961, i64 1
+  %tmp6963 = getelementptr inbounds float* %tmp6962, i64 1
+  %tmp6964 = getelementptr inbounds float* %tmp6963, i64 1
+  %tmp6965 = getelementptr inbounds float* %tmp6964, i64 1
+  %tmp6966 = getelementptr inbounds float* %tmp6965, i64 1
+  %tmp6967 = getelementptr inbounds float* %tmp6966, i64 1
+  %tmp6968 = getelementptr inbounds float* %tmp6967, i64 1
+  %tmp6969 = getelementptr inbounds float* %tmp6968, i64 1
+  %tmp6970 = getelementptr inbounds float* %tmp6969, i64 1
+  %tmp6971 = getelementptr inbounds float* %tmp6970, i64 1
+  %tmp6972 = getelementptr inbounds float* %tmp6971, i64 1
+  %tmp6973 = getelementptr inbounds float* %tmp6972, i64 1
+  %tmp6974 = getelementptr inbounds float* %tmp6973, i64 1
+  %tmp6975 = getelementptr inbounds float* %tmp6974, i64 1
+  %tmp6976 = getelementptr inbounds float* %tmp6975, i64 1
+  %tmp6977 = getelementptr inbounds float* %tmp6976, i64 1
+  %tmp6978 = getelementptr inbounds float* %tmp6977, i64 1
+  %tmp6979 = getelementptr inbounds float* %tmp6978, i64 1
+  %tmp6980 = getelementptr inbounds float* %tmp6979, i64 1
+  %tmp6981 = getelementptr inbounds float* %tmp6980, i64 1
+  %tmp6982 = getelementptr inbounds float* %tmp6981, i64 1
+  %tmp6983 = getelementptr inbounds float* %tmp6982, i64 1
+  %tmp6984 = getelementptr inbounds float* %tmp6983, i64 1
+  %tmp6985 = getelementptr inbounds float* %tmp6984, i64 1
+  %tmp6986 = getelementptr inbounds float* %tmp6985, i64 1
+  %tmp6987 = getelementptr inbounds float* %tmp6986, i64 1
+  %tmp6988 = getelementptr inbounds float* %tmp6987, i64 1
+  %tmp6989 = getelementptr inbounds float* %tmp6988, i64 1
+  %tmp6990 = getelementptr inbounds float* %tmp6989, i64 1
+  %tmp6991 = getelementptr inbounds float* %tmp6990, i64 1
+  %tmp6992 = getelementptr inbounds float* %tmp6991, i64 1
+  %tmp6993 = getelementptr inbounds float* %tmp6992, i64 1
+  %tmp6994 = getelementptr inbounds float* %tmp6993, i64 1
+  %tmp6995 = getelementptr inbounds float* %tmp6994, i64 1
+  %tmp6996 = getelementptr inbounds float* %tmp6995, i64 1
+  %tmp6997 = getelementptr inbounds float* %tmp6996, i64 1
+  %tmp6998 = getelementptr inbounds float* %tmp6997, i64 1
+  %tmp6999 = getelementptr inbounds float* %tmp6998, i64 1
+  %tmp7000 = getelementptr inbounds float* %tmp6999, i64 1
+  %tmp7001 = getelementptr inbounds float* %tmp7000, i64 1
+  %tmp7002 = getelementptr inbounds float* %tmp7001, i64 1
+  %tmp7003 = getelementptr inbounds float* %tmp7002, i64 1
+  %tmp7004 = getelementptr inbounds float* %tmp7003, i64 1
+  %tmp7005 = getelementptr inbounds float* %tmp7004, i64 1
+  %tmp7006 = getelementptr inbounds float* %tmp7005, i64 1
+  %tmp7007 = getelementptr inbounds float* %tmp7006, i64 1
+  %tmp7008 = getelementptr inbounds float* %tmp7007, i64 1
+  %tmp7009 = getelementptr inbounds float* %tmp7008, i64 1
+  %tmp7010 = getelementptr inbounds float* %tmp7009, i64 1
+  %tmp7011 = getelementptr inbounds float* %tmp7010, i64 1
+  %tmp7012 = getelementptr inbounds float* %tmp7011, i64 1
+  %tmp7013 = getelementptr inbounds float* %tmp7012, i64 1
+  %tmp7014 = getelementptr inbounds float* %tmp7013, i64 1
+  %tmp7015 = getelementptr inbounds float* %tmp7014, i64 1
+  %tmp7016 = getelementptr inbounds float* %tmp7015, i64 1
+  %tmp7017 = getelementptr inbounds float* %tmp7016, i64 1
+  %tmp7018 = getelementptr inbounds float* %tmp7017, i64 1
+  %tmp7019 = getelementptr inbounds float* %tmp7018, i64 1
+  %tmp7020 = getelementptr inbounds float* %tmp7019, i64 1
+  %tmp7021 = getelementptr inbounds float* %tmp7020, i64 1
+  %tmp7022 = getelementptr inbounds float* %tmp7021, i64 1
+  %tmp7023 = getelementptr inbounds float* %tmp7022, i64 1
+  %tmp7024 = getelementptr inbounds float* %tmp7023, i64 1
+  %tmp7025 = getelementptr inbounds float* %tmp7024, i64 1
+  %tmp7026 = getelementptr inbounds float* %tmp7025, i64 1
+  %tmp7027 = getelementptr inbounds float* %tmp7026, i64 1
+  %tmp7028 = getelementptr inbounds float* %tmp7027, i64 1
+  %tmp7029 = getelementptr inbounds float* %tmp7028, i64 1
+  %tmp7030 = getelementptr inbounds float* %tmp7029, i64 1
+  %tmp7031 = getelementptr inbounds float* %tmp7030, i64 1
+  %tmp7032 = getelementptr inbounds float* %tmp7031, i64 1
+  %tmp7033 = getelementptr inbounds float* %tmp7032, i64 1
+  %tmp7034 = getelementptr inbounds float* %tmp7033, i64 1
+  %tmp7035 = getelementptr inbounds float* %tmp7034, i64 1
+  %tmp7036 = getelementptr inbounds float* %tmp7035, i64 1
+  %tmp7037 = getelementptr inbounds float* %tmp7036, i64 1
+  %tmp7038 = getelementptr inbounds float* %tmp7037, i64 1
+  %tmp7039 = getelementptr inbounds float* %tmp7038, i64 1
+  %tmp7040 = getelementptr inbounds float* %tmp7039, i64 1
+  %tmp7041 = getelementptr inbounds float* %tmp7040, i64 1
+  %tmp7042 = getelementptr inbounds float* %tmp7041, i64 1
+  %tmp7043 = getelementptr inbounds float* %tmp7042, i64 1
+  %tmp7044 = getelementptr inbounds float* %tmp7043, i64 1
+  %tmp7045 = getelementptr inbounds float* %tmp7044, i64 1
+  %tmp7046 = getelementptr inbounds float* %tmp7045, i64 1
+  %tmp7047 = getelementptr inbounds float* %tmp7046, i64 1
+  %tmp7048 = getelementptr inbounds float* %tmp7047, i64 1
+  %tmp7049 = getelementptr inbounds float* %tmp7048, i64 1
+  %tmp7050 = getelementptr inbounds float* %tmp7049, i64 1
+  %tmp7051 = getelementptr inbounds float* %tmp7050, i64 1
+  %tmp7052 = getelementptr inbounds float* %tmp7051, i64 1
+  %tmp7053 = getelementptr inbounds float* %tmp7052, i64 1
+  %tmp7054 = getelementptr inbounds float* %tmp7053, i64 1
+  %tmp7055 = getelementptr inbounds float* %tmp7054, i64 1
+  %tmp7056 = getelementptr inbounds float* %tmp7055, i64 1
+  %tmp7057 = getelementptr inbounds float* %tmp7056, i64 1
+  %tmp7058 = getelementptr inbounds float* %tmp7057, i64 1
+  %tmp7059 = getelementptr inbounds float* %tmp7058, i64 1
+  %tmp7060 = getelementptr inbounds float* %tmp7059, i64 1
+  %tmp7061 = getelementptr inbounds float* %tmp7060, i64 1
+  %tmp7062 = getelementptr inbounds float* %tmp7061, i64 1
+  %tmp7063 = getelementptr inbounds float* %tmp7062, i64 1
+  %tmp7064 = getelementptr inbounds float* %tmp7063, i64 1
+  %tmp7065 = getelementptr inbounds float* %tmp7064, i64 1
+  %tmp7066 = getelementptr inbounds float* %tmp7065, i64 1
+  %tmp7067 = getelementptr inbounds float* %tmp7066, i64 1
+  %tmp7068 = getelementptr inbounds float* %tmp7067, i64 1
+  %tmp7069 = getelementptr inbounds float* %tmp7068, i64 1
+  %tmp7070 = getelementptr inbounds float* %tmp7069, i64 1
+  %tmp7071 = getelementptr inbounds float* %tmp7070, i64 1
+  %tmp7072 = getelementptr inbounds float* %tmp7071, i64 1
+  %tmp7073 = getelementptr inbounds float* %tmp7072, i64 1
+  %tmp7074 = getelementptr inbounds float* %tmp7073, i64 1
+  %tmp7075 = getelementptr inbounds float* %tmp7074, i64 1
+  %tmp7076 = getelementptr inbounds float* %tmp7075, i64 1
+  %tmp7077 = getelementptr inbounds float* %tmp7076, i64 1
+  %tmp7078 = getelementptr inbounds float* %tmp7077, i64 1
+  %tmp7079 = getelementptr inbounds float* %tmp7078, i64 1
+  %tmp7080 = getelementptr inbounds float* %tmp7079, i64 1
+  %tmp7081 = getelementptr inbounds float* %tmp7080, i64 1
+  %tmp7082 = getelementptr inbounds float* %tmp7081, i64 1
+  %tmp7083 = getelementptr inbounds float* %tmp7082, i64 1
+  %tmp7084 = getelementptr inbounds float* %tmp7083, i64 1
+  %tmp7085 = getelementptr inbounds float* %tmp7084, i64 1
+  %tmp7086 = getelementptr inbounds float* %tmp7085, i64 1
+  %tmp7087 = getelementptr inbounds float* %tmp7086, i64 1
+  %tmp7088 = getelementptr inbounds float* %tmp7087, i64 1
+  %tmp7089 = getelementptr inbounds float* %tmp7088, i64 1
+  %tmp7090 = getelementptr inbounds float* %tmp7089, i64 1
+  %tmp7091 = getelementptr inbounds float* %tmp7090, i64 1
+  %tmp7092 = getelementptr inbounds float* %tmp7091, i64 1
+  %tmp7093 = getelementptr inbounds float* %tmp7092, i64 1
+  %tmp7094 = getelementptr inbounds float* %tmp7093, i64 1
+  %tmp7095 = getelementptr inbounds float* %tmp7094, i64 1
+  %tmp7096 = getelementptr inbounds float* %tmp7095, i64 1
+  %tmp7097 = getelementptr inbounds float* %tmp7096, i64 1
+  %tmp7098 = getelementptr inbounds float* %tmp7097, i64 1
+  %tmp7099 = getelementptr inbounds float* %tmp7098, i64 1
+  %tmp7100 = getelementptr inbounds float* %tmp7099, i64 1
+  %tmp7101 = getelementptr inbounds float* %tmp7100, i64 1
+  %tmp7102 = getelementptr inbounds float* %tmp7101, i64 1
+  %tmp7103 = getelementptr inbounds float* %tmp7102, i64 1
+  %tmp7104 = getelementptr inbounds float* %tmp7103, i64 1
+  %tmp7105 = getelementptr inbounds float* %tmp7104, i64 1
+  %tmp7106 = getelementptr inbounds float* %tmp7105, i64 1
+  %tmp7107 = getelementptr inbounds float* %tmp7106, i64 1
+  %tmp7108 = getelementptr inbounds float* %tmp7107, i64 1
+  %tmp7109 = getelementptr inbounds float* %tmp7108, i64 1
+  %tmp7110 = getelementptr inbounds float* %tmp7109, i64 1
+  %tmp7111 = getelementptr inbounds float* %tmp7110, i64 1
+  %tmp7112 = getelementptr inbounds float* %tmp7111, i64 1
+  %tmp7113 = getelementptr inbounds float* %tmp7112, i64 1
+  %tmp7114 = getelementptr inbounds float* %tmp7113, i64 1
+  %tmp7115 = getelementptr inbounds float* %tmp7114, i64 1
+  %tmp7116 = getelementptr inbounds float* %tmp7115, i64 1
+  %tmp7117 = getelementptr inbounds float* %tmp7116, i64 1
+  %tmp7118 = getelementptr inbounds float* %tmp7117, i64 1
+  %tmp7119 = getelementptr inbounds float* %tmp7118, i64 1
+  %tmp7120 = getelementptr inbounds float* %tmp7119, i64 1
+  %tmp7121 = getelementptr inbounds float* %tmp7120, i64 1
+  %tmp7122 = getelementptr inbounds float* %tmp7121, i64 1
+  %tmp7123 = getelementptr inbounds float* %tmp7122, i64 1
+  %tmp7124 = getelementptr inbounds float* %tmp7123, i64 1
+  %tmp7125 = getelementptr inbounds float* %tmp7124, i64 1
+  %tmp7126 = getelementptr inbounds float* %tmp7125, i64 1
+  %tmp7127 = getelementptr inbounds float* %tmp7126, i64 1
+  %tmp7128 = getelementptr inbounds float* %tmp7127, i64 1
+  %tmp7129 = getelementptr inbounds float* %tmp7128, i64 1
+  %tmp7130 = getelementptr inbounds float* %tmp7129, i64 1
+  %tmp7131 = getelementptr inbounds float* %tmp7130, i64 1
+  %tmp7132 = getelementptr inbounds float* %tmp7131, i64 1
+  %tmp7133 = getelementptr inbounds float* %tmp7132, i64 1
+  %tmp7134 = getelementptr inbounds float* %tmp7133, i64 1
+  %tmp7135 = getelementptr inbounds float* %tmp7134, i64 1
+  %tmp7136 = getelementptr inbounds float* %tmp7135, i64 1
+  %tmp7137 = getelementptr inbounds float* %tmp7136, i64 1
+  %tmp7138 = getelementptr inbounds float* %tmp7137, i64 1
+  %tmp7139 = getelementptr inbounds float* %tmp7138, i64 1
+  %tmp7140 = getelementptr inbounds float* %tmp7139, i64 1
+  %tmp7141 = getelementptr inbounds float* %tmp7140, i64 1
+  %tmp7142 = getelementptr inbounds float* %tmp7141, i64 1
+  %tmp7143 = getelementptr inbounds float* %tmp7142, i64 1
+  %tmp7144 = getelementptr inbounds float* %tmp7143, i64 1
+  %tmp7145 = getelementptr inbounds float* %tmp7144, i64 1
+  %tmp7146 = getelementptr inbounds float* %tmp7145, i64 1
+  %tmp7147 = getelementptr inbounds float* %tmp7146, i64 1
+  %tmp7148 = getelementptr inbounds float* %tmp7147, i64 1
+  %tmp7149 = getelementptr inbounds float* %tmp7148, i64 1
+  %tmp7150 = getelementptr inbounds float* %tmp7149, i64 1
+  %tmp7151 = getelementptr inbounds float* %tmp7150, i64 1
+  %tmp7152 = getelementptr inbounds float* %tmp7151, i64 1
+  %tmp7153 = getelementptr inbounds float* %tmp7152, i64 1
+  %tmp7154 = getelementptr inbounds float* %tmp7153, i64 1
+  %tmp7155 = getelementptr inbounds float* %tmp7154, i64 1
+  %tmp7156 = getelementptr inbounds float* %tmp7155, i64 1
+  %tmp7157 = getelementptr inbounds float* %tmp7156, i64 1
+  %tmp7158 = getelementptr inbounds float* %tmp7157, i64 1
+  %tmp7159 = getelementptr inbounds float* %tmp7158, i64 1
+  %tmp7160 = getelementptr inbounds float* %tmp7159, i64 1
+  %tmp7161 = getelementptr inbounds float* %tmp7160, i64 1
+  %tmp7162 = getelementptr inbounds float* %tmp7161, i64 1
+  %tmp7163 = getelementptr inbounds float* %tmp7162, i64 1
+  %tmp7164 = getelementptr inbounds float* %tmp7163, i64 1
+  %tmp7165 = getelementptr inbounds float* %tmp7164, i64 1
+  %tmp7166 = getelementptr inbounds float* %tmp7165, i64 1
+  %tmp7167 = getelementptr inbounds float* %tmp7166, i64 1
+  %tmp7168 = getelementptr inbounds float* %tmp7167, i64 1
+  %tmp7169 = getelementptr inbounds float* %tmp7168, i64 1
+  %tmp7170 = getelementptr inbounds float* %tmp7169, i64 1
+  %tmp7171 = getelementptr inbounds float* %tmp7170, i64 1
+  %tmp7172 = getelementptr inbounds float* %tmp7171, i64 1
+  %tmp7173 = getelementptr inbounds float* %tmp7172, i64 1
+  %tmp7174 = getelementptr inbounds float* %tmp7173, i64 1
+  %tmp7175 = getelementptr inbounds float* %tmp7174, i64 1
+  %tmp7176 = getelementptr inbounds float* %tmp7175, i64 1
+  %tmp7177 = getelementptr inbounds float* %tmp7176, i64 1
+  %tmp7178 = getelementptr inbounds float* %tmp7177, i64 1
+  %tmp7179 = getelementptr inbounds float* %tmp7178, i64 1
+  %tmp7180 = getelementptr inbounds float* %tmp7179, i64 1
+  %tmp7181 = getelementptr inbounds float* %tmp7180, i64 1
+  %tmp7182 = getelementptr inbounds float* %tmp7181, i64 1
+  %tmp7183 = getelementptr inbounds float* %tmp7182, i64 1
+  %tmp7184 = getelementptr inbounds float* %tmp7183, i64 1
+  %tmp7185 = getelementptr inbounds float* %tmp7184, i64 1
+  %tmp7186 = getelementptr inbounds float* %tmp7185, i64 1
+  %tmp7187 = getelementptr inbounds float* %tmp7186, i64 1
+  %tmp7188 = getelementptr inbounds float* %tmp7187, i64 1
+  %tmp7189 = getelementptr inbounds float* %tmp7188, i64 1
+  %tmp7190 = getelementptr inbounds float* %tmp7189, i64 1
+  %tmp7191 = getelementptr inbounds float* %tmp7190, i64 1
+  %tmp7192 = getelementptr inbounds float* %tmp7191, i64 1
+  %tmp7193 = getelementptr inbounds float* %tmp7192, i64 1
+  %tmp7194 = getelementptr inbounds float* %tmp7193, i64 1
+  %tmp7195 = getelementptr inbounds float* %tmp7194, i64 1
+  %tmp7196 = getelementptr inbounds float* %tmp7195, i64 1
+  %tmp7197 = getelementptr inbounds float* %tmp7196, i64 1
+  %tmp7198 = getelementptr inbounds float* %tmp7197, i64 1
+  %tmp7199 = getelementptr inbounds float* %tmp7198, i64 1
+  %tmp7200 = getelementptr inbounds float* %tmp7199, i64 1
+  %tmp7201 = getelementptr inbounds float* %tmp7200, i64 1
+  %tmp7202 = getelementptr inbounds float* %tmp7201, i64 1
+  %tmp7203 = getelementptr inbounds float* %tmp7202, i64 1
+  %tmp7204 = getelementptr inbounds float* %tmp7203, i64 1
+  %tmp7205 = getelementptr inbounds float* %tmp7204, i64 1
+  %tmp7206 = getelementptr inbounds float* %tmp7205, i64 1
+  %tmp7207 = getelementptr inbounds float* %tmp7206, i64 1
+  %tmp7208 = getelementptr inbounds float* %tmp7207, i64 1
+  %tmp7209 = getelementptr inbounds float* %tmp7208, i64 1
+  %tmp7210 = getelementptr inbounds float* %tmp7209, i64 1
+  %tmp7211 = getelementptr inbounds float* %tmp7210, i64 1
+  %tmp7212 = getelementptr inbounds float* %tmp7211, i64 1
+  %tmp7213 = getelementptr inbounds float* %tmp7212, i64 1
+  %tmp7214 = getelementptr inbounds float* %tmp7213, i64 1
+  %tmp7215 = getelementptr inbounds float* %tmp7214, i64 1
+  %tmp7216 = getelementptr inbounds float* %tmp7215, i64 1
+  %tmp7217 = getelementptr inbounds float* %tmp7216, i64 1
+  %tmp7218 = getelementptr inbounds float* %tmp7217, i64 1
+  %tmp7219 = getelementptr inbounds float* %tmp7218, i64 1
+  %tmp7220 = getelementptr inbounds float* %tmp7219, i64 1
+  %tmp7221 = getelementptr inbounds float* %tmp7220, i64 1
+  %tmp7222 = getelementptr inbounds float* %tmp7221, i64 1
+  %tmp7223 = getelementptr inbounds float* %tmp7222, i64 1
+  %tmp7224 = getelementptr inbounds float* %tmp7223, i64 1
+  %tmp7225 = getelementptr inbounds float* %tmp7224, i64 1
+  %tmp7226 = getelementptr inbounds float* %tmp7225, i64 1
+  %tmp7227 = getelementptr inbounds float* %tmp7226, i64 1
+  %tmp7228 = getelementptr inbounds float* %tmp7227, i64 1
+  %tmp7229 = getelementptr inbounds float* %tmp7228, i64 1
+  %tmp7230 = getelementptr inbounds float* %tmp7229, i64 1
+  %tmp7231 = getelementptr inbounds float* %tmp7230, i64 1
+  %tmp7232 = getelementptr inbounds float* %tmp7231, i64 1
+  %tmp7233 = getelementptr inbounds float* %tmp7232, i64 1
+  %tmp7234 = getelementptr inbounds float* %tmp7233, i64 1
+  %tmp7235 = getelementptr inbounds float* %tmp7234, i64 1
+  %tmp7236 = getelementptr inbounds float* %tmp7235, i64 1
+  %tmp7237 = getelementptr inbounds float* %tmp7236, i64 1
+  %tmp7238 = getelementptr inbounds float* %tmp7237, i64 1
+  %tmp7239 = getelementptr inbounds float* %tmp7238, i64 1
+  %tmp7240 = getelementptr inbounds float* %tmp7239, i64 1
+  %tmp7241 = getelementptr inbounds float* %tmp7240, i64 1
+  %tmp7242 = getelementptr inbounds float* %tmp7241, i64 1
+  %tmp7243 = getelementptr inbounds float* %tmp7242, i64 1
+  %tmp7244 = getelementptr inbounds float* %tmp7243, i64 1
+  %tmp7245 = getelementptr inbounds float* %tmp7244, i64 1
+  %tmp7246 = getelementptr inbounds float* %tmp7245, i64 1
+  %tmp7247 = getelementptr inbounds float* %tmp7246, i64 1
+  %tmp7248 = getelementptr inbounds float* %tmp7247, i64 1
+  %tmp7249 = getelementptr inbounds float* %tmp7248, i64 1
+  %tmp7250 = getelementptr inbounds float* %tmp7249, i64 1
+  %tmp7251 = getelementptr inbounds float* %tmp7250, i64 1
+  %tmp7252 = getelementptr inbounds float* %tmp7251, i64 1
+  %tmp7253 = getelementptr inbounds float* %tmp7252, i64 1
+  %tmp7254 = getelementptr inbounds float* %tmp7253, i64 1
+  %tmp7255 = getelementptr inbounds float* %tmp7254, i64 1
+  %tmp7256 = getelementptr inbounds float* %tmp7255, i64 1
+  %tmp7257 = getelementptr inbounds float* %tmp7256, i64 1
+  %tmp7258 = getelementptr inbounds float* %tmp7257, i64 1
+  %tmp7259 = getelementptr inbounds float* %tmp7258, i64 1
+  %tmp7260 = getelementptr inbounds float* %tmp7259, i64 1
+  %tmp7261 = getelementptr inbounds float* %tmp7260, i64 1
+  %tmp7262 = getelementptr inbounds float* %tmp7261, i64 1
+  %tmp7263 = getelementptr inbounds float* %tmp7262, i64 1
+  %tmp7264 = getelementptr inbounds float* %tmp7263, i64 1
+  %tmp7265 = getelementptr inbounds float* %tmp7264, i64 1
+  %tmp7266 = getelementptr inbounds float* %tmp7265, i64 1
+  %tmp7267 = getelementptr inbounds float* %tmp7266, i64 1
+  %tmp7268 = getelementptr inbounds float* %tmp7267, i64 1
+  %tmp7269 = getelementptr inbounds float* %tmp7268, i64 1
+  %tmp7270 = getelementptr inbounds float* %tmp7269, i64 1
+  %tmp7271 = getelementptr inbounds float* %tmp7270, i64 1
+  %tmp7272 = getelementptr inbounds float* %tmp7271, i64 1
+  %tmp7273 = getelementptr inbounds float* %tmp7272, i64 1
+  %tmp7274 = getelementptr inbounds float* %tmp7273, i64 1
+  %tmp7275 = getelementptr inbounds float* %tmp7274, i64 1
+  %tmp7276 = getelementptr inbounds float* %tmp7275, i64 1
+  %tmp7277 = getelementptr inbounds float* %tmp7276, i64 1
+  %tmp7278 = getelementptr inbounds float* %tmp7277, i64 1
+  %tmp7279 = getelementptr inbounds float* %tmp7278, i64 1
+  %tmp7280 = getelementptr inbounds float* %tmp7279, i64 1
+  %tmp7281 = getelementptr inbounds float* %tmp7280, i64 1
+  %tmp7282 = getelementptr inbounds float* %tmp7281, i64 1
+  %tmp7283 = getelementptr inbounds float* %tmp7282, i64 1
+  %tmp7284 = getelementptr inbounds float* %tmp7283, i64 1
+  %tmp7285 = getelementptr inbounds float* %tmp7284, i64 1
+  %tmp7286 = getelementptr inbounds float* %tmp7285, i64 1
+  %tmp7287 = getelementptr inbounds float* %tmp7286, i64 1
+  %tmp7288 = getelementptr inbounds float* %tmp7287, i64 1
+  %tmp7289 = getelementptr inbounds float* %tmp7288, i64 1
+  %tmp7290 = getelementptr inbounds float* %tmp7289, i64 1
+  %tmp7291 = getelementptr inbounds float* %tmp7290, i64 1
+  %tmp7292 = getelementptr inbounds float* %tmp7291, i64 1
+  %tmp7293 = getelementptr inbounds float* %tmp7292, i64 1
+  %tmp7294 = getelementptr inbounds float* %tmp7293, i64 1
+  %tmp7295 = getelementptr inbounds float* %tmp7294, i64 1
+  %tmp7296 = getelementptr inbounds float* %tmp7295, i64 1
+  %tmp7297 = getelementptr inbounds float* %tmp7296, i64 1
+  %tmp7298 = getelementptr inbounds float* %tmp7297, i64 1
+  %tmp7299 = getelementptr inbounds float* %tmp7298, i64 1
+  %tmp7300 = getelementptr inbounds float* %tmp7299, i64 1
+  %tmp7301 = getelementptr inbounds float* %tmp7300, i64 1
+  %tmp7302 = getelementptr inbounds float* %tmp7301, i64 1
+  %tmp7303 = getelementptr inbounds float* %tmp7302, i64 1
+  %tmp7304 = getelementptr inbounds float* %tmp7303, i64 1
+  %tmp7305 = getelementptr inbounds float* %tmp7304, i64 1
+  %tmp7306 = getelementptr inbounds float* %tmp7305, i64 1
+  %tmp7307 = getelementptr inbounds float* %tmp7306, i64 1
+  %tmp7308 = getelementptr inbounds float* %tmp7307, i64 1
+  %tmp7309 = getelementptr inbounds float* %tmp7308, i64 1
+  %tmp7310 = getelementptr inbounds float* %tmp7309, i64 1
+  %tmp7311 = getelementptr inbounds float* %tmp7310, i64 1
+  %tmp7312 = getelementptr inbounds float* %tmp7311, i64 1
+  %tmp7313 = getelementptr inbounds float* %tmp7312, i64 1
+  %tmp7314 = getelementptr inbounds float* %tmp7313, i64 1
+  %tmp7315 = getelementptr inbounds float* %tmp7314, i64 1
+  %tmp7316 = getelementptr inbounds float* %tmp7315, i64 1
+  %tmp7317 = getelementptr inbounds float* %tmp7316, i64 1
+  %tmp7318 = getelementptr inbounds float* %tmp7317, i64 1
+  %tmp7319 = getelementptr inbounds float* %tmp7318, i64 1
+  %tmp7320 = getelementptr inbounds float* %tmp7319, i64 1
+  %tmp7321 = getelementptr inbounds float* %tmp7320, i64 1
+  %tmp7322 = getelementptr inbounds float* %tmp7321, i64 1
+  %tmp7323 = getelementptr inbounds float* %tmp7322, i64 1
+  %tmp7324 = getelementptr inbounds float* %tmp7323, i64 1
+  %tmp7325 = getelementptr inbounds float* %tmp7324, i64 1
+  %tmp7326 = getelementptr inbounds float* %tmp7325, i64 1
+  %tmp7327 = getelementptr inbounds float* %tmp7326, i64 1
+  %tmp7328 = getelementptr inbounds float* %tmp7327, i64 1
+  %tmp7329 = getelementptr inbounds float* %tmp7328, i64 1
+  %tmp7330 = getelementptr inbounds float* %tmp7329, i64 1
+  %tmp7331 = getelementptr inbounds float* %tmp7330, i64 1
+  %tmp7332 = getelementptr inbounds float* %tmp7331, i64 1
+  %tmp7333 = getelementptr inbounds float* %tmp7332, i64 1
+  %tmp7334 = getelementptr inbounds float* %tmp7333, i64 1
+  %tmp7335 = getelementptr inbounds float* %tmp7334, i64 1
+  %tmp7336 = getelementptr inbounds float* %tmp7335, i64 1
+  %tmp7337 = getelementptr inbounds float* %tmp7336, i64 1
+  %tmp7338 = getelementptr inbounds float* %tmp7337, i64 1
+  %tmp7339 = getelementptr inbounds float* %tmp7338, i64 1
+  %tmp7340 = getelementptr inbounds float* %tmp7339, i64 1
+  %tmp7341 = getelementptr inbounds float* %tmp7340, i64 1
+  %tmp7342 = getelementptr inbounds float* %tmp7341, i64 1
+  %tmp7343 = getelementptr inbounds float* %tmp7342, i64 1
+  %tmp7344 = getelementptr inbounds float* %tmp7343, i64 1
+  %tmp7345 = getelementptr inbounds float* %tmp7344, i64 1
+  %tmp7346 = getelementptr inbounds float* %tmp7345, i64 1
+  %tmp7347 = getelementptr inbounds float* %tmp7346, i64 1
+  %tmp7348 = getelementptr inbounds float* %tmp7347, i64 1
+  %tmp7349 = getelementptr inbounds float* %tmp7348, i64 1
+  %tmp7350 = getelementptr inbounds float* %tmp7349, i64 1
+  %tmp7351 = getelementptr inbounds float* %tmp7350, i64 1
+  %tmp7352 = getelementptr inbounds float* %tmp7351, i64 1
+  %tmp7353 = getelementptr inbounds float* %tmp7352, i64 1
+  %tmp7354 = getelementptr inbounds float* %tmp7353, i64 1
+  %tmp7355 = getelementptr inbounds float* %tmp7354, i64 1
+  %tmp7356 = getelementptr inbounds float* %tmp7355, i64 1
+  %tmp7357 = getelementptr inbounds float* %tmp7356, i64 1
+  %tmp7358 = getelementptr inbounds float* %tmp7357, i64 1
+  %tmp7359 = getelementptr inbounds float* %tmp7358, i64 1
+  %tmp7360 = getelementptr inbounds float* %tmp7359, i64 1
+  %tmp7361 = getelementptr inbounds float* %tmp7360, i64 1
+  %tmp7362 = getelementptr inbounds float* %tmp7361, i64 1
+  %tmp7363 = getelementptr inbounds float* %tmp7362, i64 1
+  %tmp7364 = getelementptr inbounds float* %tmp7363, i64 1
+  %tmp7365 = getelementptr inbounds float* %tmp7364, i64 1
+  %tmp7366 = getelementptr inbounds float* %tmp7365, i64 1
+  %tmp7367 = getelementptr inbounds float* %tmp7366, i64 1
+  %tmp7368 = getelementptr inbounds float* %tmp7367, i64 1
+  %tmp7369 = getelementptr inbounds float* %tmp7368, i64 1
+  %tmp7370 = getelementptr inbounds float* %tmp7369, i64 1
+  %tmp7371 = getelementptr inbounds float* %tmp7370, i64 1
+  %tmp7372 = getelementptr inbounds float* %tmp7371, i64 1
+  %tmp7373 = getelementptr inbounds float* %tmp7372, i64 1
+  %tmp7374 = getelementptr inbounds float* %tmp7373, i64 1
+  %tmp7375 = getelementptr inbounds float* %tmp7374, i64 1
+  %tmp7376 = getelementptr inbounds float* %tmp7375, i64 1
+  %tmp7377 = getelementptr inbounds float* %tmp7376, i64 1
+  %tmp7378 = getelementptr inbounds float* %tmp7377, i64 1
+  %tmp7379 = getelementptr inbounds float* %tmp7378, i64 1
+  %tmp7380 = getelementptr inbounds float* %tmp7379, i64 1
+  %tmp7381 = getelementptr inbounds float* %tmp7380, i64 1
+  %tmp7382 = getelementptr inbounds float* %tmp7381, i64 1
+  %tmp7383 = getelementptr inbounds float* %tmp7382, i64 1
+  %tmp7384 = getelementptr inbounds float* %tmp7383, i64 1
+  %tmp7385 = getelementptr inbounds float* %tmp7384, i64 1
+  %tmp7386 = getelementptr inbounds float* %tmp7385, i64 1
+  %tmp7387 = getelementptr inbounds float* %tmp7386, i64 1
+  %tmp7388 = getelementptr inbounds float* %tmp7387, i64 1
+  %tmp7389 = getelementptr inbounds float* %tmp7388, i64 1
+  %tmp7390 = getelementptr inbounds float* %tmp7389, i64 1
+  %tmp7391 = getelementptr inbounds float* %tmp7390, i64 1
+  %tmp7392 = getelementptr inbounds float* %tmp7391, i64 1
+  %tmp7393 = getelementptr inbounds float* %tmp7392, i64 1
+  %tmp7394 = getelementptr inbounds float* %tmp7393, i64 1
+  %tmp7395 = getelementptr inbounds float* %tmp7394, i64 1
+  %tmp7396 = getelementptr inbounds float* %tmp7395, i64 1
+  %tmp7397 = getelementptr inbounds float* %tmp7396, i64 1
+  %tmp7398 = getelementptr inbounds float* %tmp7397, i64 1
+  %tmp7399 = getelementptr inbounds float* %tmp7398, i64 1
+  %tmp7400 = getelementptr inbounds float* %tmp7399, i64 1
+  %tmp7401 = getelementptr inbounds float* %tmp7400, i64 1
+  %tmp7402 = getelementptr inbounds float* %tmp7401, i64 1
+  %tmp7403 = getelementptr inbounds float* %tmp7402, i64 1
+  %tmp7404 = getelementptr inbounds float* %tmp7403, i64 1
+  %tmp7405 = getelementptr inbounds float* %tmp7404, i64 1
+  %tmp7406 = getelementptr inbounds float* %tmp7405, i64 1
+  %tmp7407 = getelementptr inbounds float* %tmp7406, i64 1
+  %tmp7408 = getelementptr inbounds float* %tmp7407, i64 1
+  %tmp7409 = getelementptr inbounds float* %tmp7408, i64 1
+  %tmp7410 = getelementptr inbounds float* %tmp7409, i64 1
+  %tmp7411 = getelementptr inbounds float* %tmp7410, i64 1
+  %tmp7412 = getelementptr inbounds float* %tmp7411, i64 1
+  %tmp7413 = getelementptr inbounds float* %tmp7412, i64 1
+  %tmp7414 = getelementptr inbounds float* %tmp7413, i64 1
+  %tmp7415 = getelementptr inbounds float* %tmp7414, i64 1
+  %tmp7416 = getelementptr inbounds float* %tmp7415, i64 1
+  %tmp7417 = getelementptr inbounds float* %tmp7416, i64 1
+  %tmp7418 = getelementptr inbounds float* %tmp7417, i64 1
+  %tmp7419 = getelementptr inbounds float* %tmp7418, i64 1
+  %tmp7420 = getelementptr inbounds float* %tmp7419, i64 1
+  %tmp7421 = getelementptr inbounds float* %tmp7420, i64 1
+  %tmp7422 = getelementptr inbounds float* %tmp7421, i64 1
+  %tmp7423 = getelementptr inbounds float* %tmp7422, i64 1
+  %tmp7424 = getelementptr inbounds float* %tmp7423, i64 1
+  %tmp7425 = getelementptr inbounds float* %tmp7424, i64 1
+  %tmp7426 = getelementptr inbounds float* %tmp7425, i64 1
+  %tmp7427 = getelementptr inbounds float* %tmp7426, i64 1
+  %tmp7428 = getelementptr inbounds float* %tmp7427, i64 1
+  %tmp7429 = getelementptr inbounds float* %tmp7428, i64 1
+  %tmp7430 = getelementptr inbounds float* %tmp7429, i64 1
+  %tmp7431 = getelementptr inbounds float* %tmp7430, i64 1
+  %tmp7432 = getelementptr inbounds float* %tmp7431, i64 1
+  %tmp7433 = getelementptr inbounds float* %tmp7432, i64 1
+  %tmp7434 = getelementptr inbounds float* %tmp7433, i64 1
+  %tmp7435 = getelementptr inbounds float* %tmp7434, i64 1
+  %tmp7436 = getelementptr inbounds float* %tmp7435, i64 1
+  %tmp7437 = getelementptr inbounds float* %tmp7436, i64 1
+  %tmp7438 = getelementptr inbounds float* %tmp7437, i64 1
+  %tmp7439 = getelementptr inbounds float* %tmp7438, i64 1
+  %tmp7440 = getelementptr inbounds float* %tmp7439, i64 1
+  %tmp7441 = getelementptr inbounds float* %tmp7440, i64 1
+  %tmp7442 = getelementptr inbounds float* %tmp7441, i64 1
+  %tmp7443 = getelementptr inbounds float* %tmp7442, i64 1
+  %tmp7444 = getelementptr inbounds float* %tmp7443, i64 1
+  %tmp7445 = getelementptr inbounds float* %tmp7444, i64 1
+  %tmp7446 = getelementptr inbounds float* %tmp7445, i64 1
+  %tmp7447 = getelementptr inbounds float* %tmp7446, i64 1
+  %tmp7448 = getelementptr inbounds float* %tmp7447, i64 1
+  %tmp7449 = getelementptr inbounds float* %tmp7448, i64 1
+  %tmp7450 = getelementptr inbounds float* %tmp7449, i64 1
+  %tmp7451 = getelementptr inbounds float* %tmp7450, i64 1
+  %tmp7452 = getelementptr inbounds float* %tmp7451, i64 1
+  %tmp7453 = getelementptr inbounds float* %tmp7452, i64 1
+  %tmp7454 = getelementptr inbounds float* %tmp7453, i64 1
+  %tmp7455 = getelementptr inbounds float* %tmp7454, i64 1
+  %tmp7456 = getelementptr inbounds float* %tmp7455, i64 1
+  %tmp7457 = getelementptr inbounds float* %tmp7456, i64 1
+  %tmp7458 = getelementptr inbounds float* %tmp7457, i64 1
+  %tmp7459 = getelementptr inbounds float* %tmp7458, i64 1
+  %tmp7460 = getelementptr inbounds float* %tmp7459, i64 1
+  %tmp7461 = getelementptr inbounds float* %tmp7460, i64 1
+  %tmp7462 = getelementptr inbounds float* %tmp7461, i64 1
+  %tmp7463 = getelementptr inbounds float* %tmp7462, i64 1
+  %tmp7464 = getelementptr inbounds float* %tmp7463, i64 1
+  %tmp7465 = getelementptr inbounds float* %tmp7464, i64 1
+  %tmp7466 = getelementptr inbounds float* %tmp7465, i64 1
+  %tmp7467 = getelementptr inbounds float* %tmp7466, i64 1
+  %tmp7468 = getelementptr inbounds float* %tmp7467, i64 1
+  %tmp7469 = getelementptr inbounds float* %tmp7468, i64 1
+  %tmp7470 = getelementptr inbounds float* %tmp7469, i64 1
+  %tmp7471 = getelementptr inbounds float* %tmp7470, i64 1
+  %tmp7472 = getelementptr inbounds float* %tmp7471, i64 1
+  %tmp7473 = getelementptr inbounds float* %tmp7472, i64 1
+  %tmp7474 = getelementptr inbounds float* %tmp7473, i64 1
+  %tmp7475 = getelementptr inbounds float* %tmp7474, i64 1
+  %tmp7476 = getelementptr inbounds float* %tmp7475, i64 1
+  %tmp7477 = getelementptr inbounds float* %tmp7476, i64 1
+  %tmp7478 = getelementptr inbounds float* %tmp7477, i64 1
+  %tmp7479 = getelementptr inbounds float* %tmp7478, i64 1
+  %tmp7480 = getelementptr inbounds float* %tmp7479, i64 1
+  %tmp7481 = getelementptr inbounds float* %tmp7480, i64 1
+  %tmp7482 = getelementptr inbounds float* %tmp7481, i64 1
+  %tmp7483 = getelementptr inbounds float* %tmp7482, i64 1
+  %tmp7484 = getelementptr inbounds float* %tmp7483, i64 1
+  %tmp7485 = getelementptr inbounds float* %tmp7484, i64 1
+  %tmp7486 = getelementptr inbounds float* %tmp7485, i64 1
+  %tmp7487 = getelementptr inbounds float* %tmp7486, i64 1
+  %tmp7488 = getelementptr inbounds float* %tmp7487, i64 1
+  %tmp7489 = getelementptr inbounds float* %tmp7488, i64 1
+  %tmp7490 = getelementptr inbounds float* %tmp7489, i64 1
+  %tmp7491 = getelementptr inbounds float* %tmp7490, i64 1
+  %tmp7492 = getelementptr inbounds float* %tmp7491, i64 1
+  %tmp7493 = getelementptr inbounds float* %tmp7492, i64 1
+  %tmp7494 = getelementptr inbounds float* %tmp7493, i64 1
+  %tmp7495 = getelementptr inbounds float* %tmp7494, i64 1
+  %tmp7496 = getelementptr inbounds float* %tmp7495, i64 1
+  %tmp7497 = getelementptr inbounds float* %tmp7496, i64 1
+  %tmp7498 = getelementptr inbounds float* %tmp7497, i64 1
+  %tmp7499 = getelementptr inbounds float* %tmp7498, i64 1
+  %tmp7500 = getelementptr inbounds float* %tmp7499, i64 1
+  %tmp7501 = getelementptr inbounds float* %tmp7500, i64 1
+  %tmp7502 = getelementptr inbounds float* %tmp7501, i64 1
+  %tmp7503 = getelementptr inbounds float* %tmp7502, i64 1
+  %tmp7504 = getelementptr inbounds float* %tmp7503, i64 1
+  %tmp7505 = getelementptr inbounds float* %tmp7504, i64 1
+  %tmp7506 = getelementptr inbounds float* %tmp7505, i64 1
+  %tmp7507 = getelementptr inbounds float* %tmp7506, i64 1
+  %tmp7508 = getelementptr inbounds float* %tmp7507, i64 1
+  %tmp7509 = getelementptr inbounds float* %tmp7508, i64 1
+  %tmp7510 = getelementptr inbounds float* %tmp7509, i64 1
+  %tmp7511 = getelementptr inbounds float* %tmp7510, i64 1
+  %tmp7512 = getelementptr inbounds float* %tmp7511, i64 1
+  %tmp7513 = getelementptr inbounds float* %tmp7512, i64 1
+  %tmp7514 = getelementptr inbounds float* %tmp7513, i64 1
+  %tmp7515 = getelementptr inbounds float* %tmp7514, i64 1
+  %tmp7516 = getelementptr inbounds float* %tmp7515, i64 1
+  %tmp7517 = getelementptr inbounds float* %tmp7516, i64 1
+  %tmp7518 = getelementptr inbounds float* %tmp7517, i64 1
+  %tmp7519 = getelementptr inbounds float* %tmp7518, i64 1
+  %tmp7520 = getelementptr inbounds float* %tmp7519, i64 1
+  %tmp7521 = getelementptr inbounds float* %tmp7520, i64 1
+  %tmp7522 = getelementptr inbounds float* %tmp7521, i64 1
+  %tmp7523 = getelementptr inbounds float* %tmp7522, i64 1
+  %tmp7524 = getelementptr inbounds float* %tmp7523, i64 1
+  %tmp7525 = getelementptr inbounds float* %tmp7524, i64 1
+  %tmp7526 = getelementptr inbounds float* %tmp7525, i64 1
+  %tmp7527 = getelementptr inbounds float* %tmp7526, i64 1
+  %tmp7528 = getelementptr inbounds float* %tmp7527, i64 1
+  %tmp7529 = getelementptr inbounds float* %tmp7528, i64 1
+  %tmp7530 = getelementptr inbounds float* %tmp7529, i64 1
+  %tmp7531 = getelementptr inbounds float* %tmp7530, i64 1
+  %tmp7532 = getelementptr inbounds float* %tmp7531, i64 1
+  %tmp7533 = getelementptr inbounds float* %tmp7532, i64 1
+  %tmp7534 = getelementptr inbounds float* %tmp7533, i64 1
+  %tmp7535 = getelementptr inbounds float* %tmp7534, i64 1
+  %tmp7536 = getelementptr inbounds float* %tmp7535, i64 1
+  %tmp7537 = getelementptr inbounds float* %tmp7536, i64 1
+  %tmp7538 = getelementptr inbounds float* %tmp7537, i64 1
+  %tmp7539 = getelementptr inbounds float* %tmp7538, i64 1
+  %tmp7540 = getelementptr inbounds float* %tmp7539, i64 1
+  %tmp7541 = getelementptr inbounds float* %tmp7540, i64 1
+  %tmp7542 = getelementptr inbounds float* %tmp7541, i64 1
+  %tmp7543 = getelementptr inbounds float* %tmp7542, i64 1
+  %tmp7544 = getelementptr inbounds float* %tmp7543, i64 1
+  %tmp7545 = getelementptr inbounds float* %tmp7544, i64 1
+  %tmp7546 = getelementptr inbounds float* %tmp7545, i64 1
+  %tmp7547 = getelementptr inbounds float* %tmp7546, i64 1
+  %tmp7548 = getelementptr inbounds float* %tmp7547, i64 1
+  %tmp7549 = getelementptr inbounds float* %tmp7548, i64 1
+  %tmp7550 = getelementptr inbounds float* %tmp7549, i64 1
+  %tmp7551 = getelementptr inbounds float* %tmp7550, i64 1
+  %tmp7552 = getelementptr inbounds float* %tmp7551, i64 1
+  %tmp7553 = getelementptr inbounds float* %tmp7552, i64 1
+  %tmp7554 = getelementptr inbounds float* %tmp7553, i64 1
+  %tmp7555 = getelementptr inbounds float* %tmp7554, i64 1
+  %tmp7556 = getelementptr inbounds float* %tmp7555, i64 1
+  %tmp7557 = getelementptr inbounds float* %tmp7556, i64 1
+  %tmp7558 = getelementptr inbounds float* %tmp7557, i64 1
+  %tmp7559 = getelementptr inbounds float* %tmp7558, i64 1
+  %tmp7560 = getelementptr inbounds float* %tmp7559, i64 1
+  %tmp7561 = getelementptr inbounds float* %tmp7560, i64 1
+  %tmp7562 = getelementptr inbounds float* %tmp7561, i64 1
+  %tmp7563 = getelementptr inbounds float* %tmp7562, i64 1
+  %tmp7564 = getelementptr inbounds float* %tmp7563, i64 1
+  %tmp7565 = getelementptr inbounds float* %tmp7564, i64 1
+  %tmp7566 = getelementptr inbounds float* %tmp7565, i64 1
+  %tmp7567 = getelementptr inbounds float* %tmp7566, i64 1
+  %tmp7568 = getelementptr inbounds float* %tmp7567, i64 1
+  %tmp7569 = getelementptr inbounds float* %tmp7568, i64 1
+  %tmp7570 = getelementptr inbounds float* %tmp7569, i64 1
+  %tmp7571 = getelementptr inbounds float* %tmp7570, i64 1
+  %tmp7572 = getelementptr inbounds float* %tmp7571, i64 1
+  %tmp7573 = getelementptr inbounds float* %tmp7572, i64 1
+  %tmp7574 = getelementptr inbounds float* %tmp7573, i64 1
+  %tmp7575 = getelementptr inbounds float* %tmp7574, i64 1
+  %tmp7576 = getelementptr inbounds float* %tmp7575, i64 1
+  %tmp7577 = getelementptr inbounds float* %tmp7576, i64 1
+  %tmp7578 = getelementptr inbounds float* %tmp7577, i64 1
+  %tmp7579 = getelementptr inbounds float* %tmp7578, i64 1
+  %tmp7580 = getelementptr inbounds float* %tmp7579, i64 1
+  %tmp7581 = getelementptr inbounds float* %tmp7580, i64 1
+  %tmp7582 = getelementptr inbounds float* %tmp7581, i64 1
+  %tmp7583 = getelementptr inbounds float* %tmp7582, i64 1
+  %tmp7584 = getelementptr inbounds float* %tmp7583, i64 1
+  %tmp7585 = getelementptr inbounds float* %tmp7584, i64 1
+  %tmp7586 = getelementptr inbounds float* %tmp7585, i64 1
+  %tmp7587 = getelementptr inbounds float* %tmp7586, i64 1
+  %tmp7588 = getelementptr inbounds float* %tmp7587, i64 1
+  %tmp7589 = getelementptr inbounds float* %tmp7588, i64 1
+  %tmp7590 = getelementptr inbounds float* %tmp7589, i64 1
+  %tmp7591 = getelementptr inbounds float* %tmp7590, i64 1
+  %tmp7592 = getelementptr inbounds float* %tmp7591, i64 1
+  %tmp7593 = getelementptr inbounds float* %tmp7592, i64 1
+  %tmp7594 = getelementptr inbounds float* %tmp7593, i64 1
+  %tmp7595 = getelementptr inbounds float* %tmp7594, i64 1
+  %tmp7596 = getelementptr inbounds float* %tmp7595, i64 1
+  %tmp7597 = getelementptr inbounds float* %tmp7596, i64 1
+  %tmp7598 = getelementptr inbounds float* %tmp7597, i64 1
+  %tmp7599 = getelementptr inbounds float* %tmp7598, i64 1
+  %tmp7600 = getelementptr inbounds float* %tmp7599, i64 1
+  %tmp7601 = getelementptr inbounds float* %tmp7600, i64 1
+  %tmp7602 = getelementptr inbounds float* %tmp7601, i64 1
+  %tmp7603 = getelementptr inbounds float* %tmp7602, i64 1
+  %tmp7604 = getelementptr inbounds float* %tmp7603, i64 1
+  %tmp7605 = getelementptr inbounds float* %tmp7604, i64 1
+  %tmp7606 = getelementptr inbounds float* %tmp7605, i64 1
+  %tmp7607 = getelementptr inbounds float* %tmp7606, i64 1
+  %tmp7608 = getelementptr inbounds float* %tmp7607, i64 1
+  %tmp7609 = getelementptr inbounds float* %tmp7608, i64 1
+  %tmp7610 = getelementptr inbounds float* %tmp7609, i64 1
+  %tmp7611 = getelementptr inbounds float* %tmp7610, i64 1
+  %tmp7612 = getelementptr inbounds float* %tmp7611, i64 1
+  %tmp7613 = getelementptr inbounds float* %tmp7612, i64 1
+  %tmp7614 = getelementptr inbounds float* %tmp7613, i64 1
+  %tmp7615 = getelementptr inbounds float* %tmp7614, i64 1
+  %tmp7616 = getelementptr inbounds float* %tmp7615, i64 1
+  %tmp7617 = getelementptr inbounds float* %tmp7616, i64 1
+  %tmp7618 = getelementptr inbounds float* %tmp7617, i64 1
+  %tmp7619 = getelementptr inbounds float* %tmp7618, i64 1
+  %tmp7620 = getelementptr inbounds float* %tmp7619, i64 1
+  %tmp7621 = getelementptr inbounds float* %tmp7620, i64 1
+  %tmp7622 = getelementptr inbounds float* %tmp7621, i64 1
+  %tmp7623 = getelementptr inbounds float* %tmp7622, i64 1
+  %tmp7624 = getelementptr inbounds float* %tmp7623, i64 1
+  %tmp7625 = getelementptr inbounds float* %tmp7624, i64 1
+  %tmp7626 = getelementptr inbounds float* %tmp7625, i64 1
+  %tmp7627 = getelementptr inbounds float* %tmp7626, i64 1
+  %tmp7628 = getelementptr inbounds float* %tmp7627, i64 1
+  %tmp7629 = getelementptr inbounds float* %tmp7628, i64 1
+  %tmp7630 = getelementptr inbounds float* %tmp7629, i64 1
+  %tmp7631 = getelementptr inbounds float* %tmp7630, i64 1
+  %tmp7632 = getelementptr inbounds float* %tmp7631, i64 1
+  %tmp7633 = getelementptr inbounds float* %tmp7632, i64 1
+  %tmp7634 = getelementptr inbounds float* %tmp7633, i64 1
+  %tmp7635 = getelementptr inbounds float* %tmp7634, i64 1
+  %tmp7636 = getelementptr inbounds float* %tmp7635, i64 1
+  %tmp7637 = getelementptr inbounds float* %tmp7636, i64 1
+  %tmp7638 = getelementptr inbounds float* %tmp7637, i64 1
+  %tmp7639 = getelementptr inbounds float* %tmp7638, i64 1
+  %tmp7640 = getelementptr inbounds float* %tmp7639, i64 1
+  %tmp7641 = getelementptr inbounds float* %tmp7640, i64 1
+  %tmp7642 = getelementptr inbounds float* %tmp7641, i64 1
+  %tmp7643 = getelementptr inbounds float* %tmp7642, i64 1
+  %tmp7644 = getelementptr inbounds float* %tmp7643, i64 1
+  %tmp7645 = getelementptr inbounds float* %tmp7644, i64 1
+  %tmp7646 = getelementptr inbounds float* %tmp7645, i64 1
+  %tmp7647 = getelementptr inbounds float* %tmp7646, i64 1
+  %tmp7648 = getelementptr inbounds float* %tmp7647, i64 1
+  %tmp7649 = getelementptr inbounds float* %tmp7648, i64 1
+  %tmp7650 = getelementptr inbounds float* %tmp7649, i64 1
+  %tmp7651 = getelementptr inbounds float* %tmp7650, i64 1
+  %tmp7652 = getelementptr inbounds float* %tmp7651, i64 1
+  %tmp7653 = getelementptr inbounds float* %tmp7652, i64 1
+  %tmp7654 = getelementptr inbounds float* %tmp7653, i64 1
+  %tmp7655 = getelementptr inbounds float* %tmp7654, i64 1
+  %tmp7656 = getelementptr inbounds float* %tmp7655, i64 1
+  %tmp7657 = getelementptr inbounds float* %tmp7656, i64 1
+  %tmp7658 = getelementptr inbounds float* %tmp7657, i64 1
+  %tmp7659 = getelementptr inbounds float* %tmp7658, i64 1
+  %tmp7660 = getelementptr inbounds float* %tmp7659, i64 1
+  %tmp7661 = getelementptr inbounds float* %tmp7660, i64 1
+  %tmp7662 = getelementptr inbounds float* %tmp7661, i64 1
+  %tmp7663 = getelementptr inbounds float* %tmp7662, i64 1
+  %tmp7664 = getelementptr inbounds float* %tmp7663, i64 1
+  %tmp7665 = getelementptr inbounds float* %tmp7664, i64 1
+  %tmp7666 = getelementptr inbounds float* %tmp7665, i64 1
+  %tmp7667 = getelementptr inbounds float* %tmp7666, i64 1
+  %tmp7668 = getelementptr inbounds float* %tmp7667, i64 1
+  %tmp7669 = getelementptr inbounds float* %tmp7668, i64 1
+  %tmp7670 = getelementptr inbounds float* %tmp7669, i64 1
+  %tmp7671 = getelementptr inbounds float* %tmp7670, i64 1
+  %tmp7672 = getelementptr inbounds float* %tmp7671, i64 1
+  %tmp7673 = getelementptr inbounds float* %tmp7672, i64 1
+  %tmp7674 = getelementptr inbounds float* %tmp7673, i64 1
+  %tmp7675 = getelementptr inbounds float* %tmp7674, i64 1
+  %tmp7676 = getelementptr inbounds float* %tmp7675, i64 1
+  %tmp7677 = getelementptr inbounds float* %tmp7676, i64 1
+  %tmp7678 = getelementptr inbounds float* %tmp7677, i64 1
+  %tmp7679 = getelementptr inbounds float* %tmp7678, i64 1
+  %tmp7680 = getelementptr inbounds float* %tmp7679, i64 1
+  %tmp7681 = getelementptr inbounds float* %tmp7680, i64 1
+  %tmp7682 = getelementptr inbounds float* %tmp7681, i64 1
+  %tmp7683 = getelementptr inbounds float* %tmp7682, i64 1
+  %tmp7684 = getelementptr inbounds float* %tmp7683, i64 1
+  %tmp7685 = getelementptr inbounds float* %tmp7684, i64 1
+  %tmp7686 = getelementptr inbounds float* %tmp7685, i64 1
+  %tmp7687 = getelementptr inbounds float* %tmp7686, i64 1
+  %tmp7688 = getelementptr inbounds float* %tmp7687, i64 1
+  %tmp7689 = getelementptr inbounds float* %tmp7688, i64 1
+  %tmp7690 = getelementptr inbounds float* %tmp7689, i64 1
+  %tmp7691 = getelementptr inbounds float* %tmp7690, i64 1
+  %tmp7692 = getelementptr inbounds float* %tmp7691, i64 1
+  %tmp7693 = getelementptr inbounds float* %tmp7692, i64 1
+  %tmp7694 = getelementptr inbounds float* %tmp7693, i64 1
+  %tmp7695 = getelementptr inbounds float* %tmp7694, i64 1
+  %tmp7696 = getelementptr inbounds float* %tmp7695, i64 1
+  %tmp7697 = getelementptr inbounds float* %tmp7696, i64 1
+  %tmp7698 = getelementptr inbounds float* %tmp7697, i64 1
+  %tmp7699 = getelementptr inbounds float* %tmp7698, i64 1
+  %tmp7700 = getelementptr inbounds float* %tmp7699, i64 1
+  %tmp7701 = getelementptr inbounds float* %tmp7700, i64 1
+  %tmp7702 = getelementptr inbounds float* %tmp7701, i64 1
+  %tmp7703 = getelementptr inbounds float* %tmp7702, i64 1
+  %tmp7704 = getelementptr inbounds float* %tmp7703, i64 1
+  %tmp7705 = getelementptr inbounds float* %tmp7704, i64 1
+  %tmp7706 = getelementptr inbounds float* %tmp7705, i64 1
+  %tmp7707 = getelementptr inbounds float* %tmp7706, i64 1
+  %tmp7708 = getelementptr inbounds float* %tmp7707, i64 1
+  %tmp7709 = getelementptr inbounds float* %tmp7708, i64 1
+  %tmp7710 = getelementptr inbounds float* %tmp7709, i64 1
+  %tmp7711 = getelementptr inbounds float* %tmp7710, i64 1
+  %tmp7712 = getelementptr inbounds float* %tmp7711, i64 1
+  %tmp7713 = getelementptr inbounds float* %tmp7712, i64 1
+  %tmp7714 = getelementptr inbounds float* %tmp7713, i64 1
+  %tmp7715 = getelementptr inbounds float* %tmp7714, i64 1
+  %tmp7716 = getelementptr inbounds float* %tmp7715, i64 1
+  %tmp7717 = getelementptr inbounds float* %tmp7716, i64 1
+  %tmp7718 = getelementptr inbounds float* %tmp7717, i64 1
+  %tmp7719 = getelementptr inbounds float* %tmp7718, i64 1
+  %tmp7720 = getelementptr inbounds float* %tmp7719, i64 1
+  %tmp7721 = getelementptr inbounds float* %tmp7720, i64 1
+  %tmp7722 = getelementptr inbounds float* %tmp7721, i64 1
+  %tmp7723 = getelementptr inbounds float* %tmp7722, i64 1
+  %tmp7724 = getelementptr inbounds float* %tmp7723, i64 1
+  %tmp7725 = getelementptr inbounds float* %tmp7724, i64 1
+  %tmp7726 = getelementptr inbounds float* %tmp7725, i64 1
+  %tmp7727 = getelementptr inbounds float* %tmp7726, i64 1
+  %tmp7728 = getelementptr inbounds float* %tmp7727, i64 1
+  %tmp7729 = getelementptr inbounds float* %tmp7728, i64 1
+  %tmp7730 = getelementptr inbounds float* %tmp7729, i64 1
+  %tmp7731 = getelementptr inbounds float* %tmp7730, i64 1
+  %tmp7732 = getelementptr inbounds float* %tmp7731, i64 1
+  %tmp7733 = getelementptr inbounds float* %tmp7732, i64 1
+  %tmp7734 = getelementptr inbounds float* %tmp7733, i64 1
+  %tmp7735 = getelementptr inbounds float* %tmp7734, i64 1
+  %tmp7736 = getelementptr inbounds float* %tmp7735, i64 1
+  %tmp7737 = getelementptr inbounds float* %tmp7736, i64 1
+  %tmp7738 = getelementptr inbounds float* %tmp7737, i64 1
+  %tmp7739 = getelementptr inbounds float* %tmp7738, i64 1
+  %tmp7740 = getelementptr inbounds float* %tmp7739, i64 1
+  %tmp7741 = getelementptr inbounds float* %tmp7740, i64 1
+  %tmp7742 = getelementptr inbounds float* %tmp7741, i64 1
+  %tmp7743 = getelementptr inbounds float* %tmp7742, i64 1
+  %tmp7744 = getelementptr inbounds float* %tmp7743, i64 1
+  %tmp7745 = getelementptr inbounds float* %tmp7744, i64 1
+  %tmp7746 = getelementptr inbounds float* %tmp7745, i64 1
+  %tmp7747 = getelementptr inbounds float* %tmp7746, i64 1
+  %tmp7748 = getelementptr inbounds float* %tmp7747, i64 1
+  %tmp7749 = getelementptr inbounds float* %tmp7748, i64 1
+  %tmp7750 = getelementptr inbounds float* %tmp7749, i64 1
+  %tmp7751 = getelementptr inbounds float* %tmp7750, i64 1
+  %tmp7752 = getelementptr inbounds float* %tmp7751, i64 1
+  %tmp7753 = getelementptr inbounds float* %tmp7752, i64 1
+  %tmp7754 = getelementptr inbounds float* %tmp7753, i64 1
+  %tmp7755 = getelementptr inbounds float* %tmp7754, i64 1
+  %tmp7756 = getelementptr inbounds float* %tmp7755, i64 1
+  %tmp7757 = getelementptr inbounds float* %tmp7756, i64 1
+  %tmp7758 = getelementptr inbounds float* %tmp7757, i64 1
+  %tmp7759 = getelementptr inbounds float* %tmp7758, i64 1
+  %tmp7760 = getelementptr inbounds float* %tmp7759, i64 1
+  %tmp7761 = getelementptr inbounds float* %tmp7760, i64 1
+  %tmp7762 = getelementptr inbounds float* %tmp7761, i64 1
+  %tmp7763 = getelementptr inbounds float* %tmp7762, i64 1
+  %tmp7764 = getelementptr inbounds float* %tmp7763, i64 1
+  %tmp7765 = getelementptr inbounds float* %tmp7764, i64 1
+  %tmp7766 = getelementptr inbounds float* %tmp7765, i64 1
+  %tmp7767 = getelementptr inbounds float* %tmp7766, i64 1
+  %tmp7768 = getelementptr inbounds float* %tmp7767, i64 1
+  %tmp7769 = getelementptr inbounds float* %tmp7768, i64 1
+  %tmp7770 = getelementptr inbounds float* %tmp7769, i64 1
+  %tmp7771 = getelementptr inbounds float* %tmp7770, i64 1
+  %tmp7772 = getelementptr inbounds float* %tmp7771, i64 1
+  %tmp7773 = getelementptr inbounds float* %tmp7772, i64 1
+  %tmp7774 = getelementptr inbounds float* %tmp7773, i64 1
+  %tmp7775 = getelementptr inbounds float* %tmp7774, i64 1
+  %tmp7776 = getelementptr inbounds float* %tmp7775, i64 1
+  %tmp7777 = getelementptr inbounds float* %tmp7776, i64 1
+  %tmp7778 = getelementptr inbounds float* %tmp7777, i64 1
+  %tmp7779 = getelementptr inbounds float* %tmp7778, i64 1
+  %tmp7780 = getelementptr inbounds float* %tmp7779, i64 1
+  %tmp7781 = getelementptr inbounds float* %tmp7780, i64 1
+  %tmp7782 = getelementptr inbounds float* %tmp7781, i64 1
+  %tmp7783 = getelementptr inbounds float* %tmp7782, i64 1
+  %tmp7784 = getelementptr inbounds float* %tmp7783, i64 1
+  %tmp7785 = getelementptr inbounds float* %tmp7784, i64 1
+  %tmp7786 = getelementptr inbounds float* %tmp7785, i64 1
+  %tmp7787 = getelementptr inbounds float* %tmp7786, i64 1
+  %tmp7788 = getelementptr inbounds float* %tmp7787, i64 1
+  %tmp7789 = getelementptr inbounds float* %tmp7788, i64 1
+  %tmp7790 = getelementptr inbounds float* %tmp7789, i64 1
+  %tmp7791 = getelementptr inbounds float* %tmp7790, i64 1
+  %tmp7792 = getelementptr inbounds float* %tmp7791, i64 1
+  %tmp7793 = getelementptr inbounds float* %tmp7792, i64 1
+  %tmp7794 = getelementptr inbounds float* %tmp7793, i64 1
+  %tmp7795 = getelementptr inbounds float* %tmp7794, i64 1
+  %tmp7796 = getelementptr inbounds float* %tmp7795, i64 1
+  %tmp7797 = getelementptr inbounds float* %tmp7796, i64 1
+  %tmp7798 = getelementptr inbounds float* %tmp7797, i64 1
+  %tmp7799 = getelementptr inbounds float* %tmp7798, i64 1
+  %tmp7800 = getelementptr inbounds float* %tmp7799, i64 1
+  %tmp7801 = getelementptr inbounds float* %tmp7800, i64 1
+  %tmp7802 = getelementptr inbounds float* %tmp7801, i64 1
+  %tmp7803 = getelementptr inbounds float* %tmp7802, i64 1
+  %tmp7804 = getelementptr inbounds float* %tmp7803, i64 1
+  %tmp7805 = getelementptr inbounds float* %tmp7804, i64 1
+  %tmp7806 = getelementptr inbounds float* %tmp7805, i64 1
+  %tmp7807 = getelementptr inbounds float* %tmp7806, i64 1
+  %tmp7808 = getelementptr inbounds float* %tmp7807, i64 1
+  %tmp7809 = getelementptr inbounds float* %tmp7808, i64 1
+  %tmp7810 = getelementptr inbounds float* %tmp7809, i64 1
+  %tmp7811 = getelementptr inbounds float* %tmp7810, i64 1
+  %tmp7812 = getelementptr inbounds float* %tmp7811, i64 1
+  %tmp7813 = getelementptr inbounds float* %tmp7812, i64 1
+  %tmp7814 = getelementptr inbounds float* %tmp7813, i64 1
+  %tmp7815 = getelementptr inbounds float* %tmp7814, i64 1
+  %tmp7816 = getelementptr inbounds float* %tmp7815, i64 1
+  %tmp7817 = getelementptr inbounds float* %tmp7816, i64 1
+  %tmp7818 = getelementptr inbounds float* %tmp7817, i64 1
+  %tmp7819 = getelementptr inbounds float* %tmp7818, i64 1
+  %tmp7820 = getelementptr inbounds float* %tmp7819, i64 1
+  %tmp7821 = getelementptr inbounds float* %tmp7820, i64 1
+  %tmp7822 = getelementptr inbounds float* %tmp7821, i64 1
+  %tmp7823 = getelementptr inbounds float* %tmp7822, i64 1
+  %tmp7824 = getelementptr inbounds float* %tmp7823, i64 1
+  %tmp7825 = getelementptr inbounds float* %tmp7824, i64 1
+  %tmp7826 = getelementptr inbounds float* %tmp7825, i64 1
+  %tmp7827 = getelementptr inbounds float* %tmp7826, i64 1
+  %tmp7828 = getelementptr inbounds float* %tmp7827, i64 1
+  %tmp7829 = getelementptr inbounds float* %tmp7828, i64 1
+  %tmp7830 = getelementptr inbounds float* %tmp7829, i64 1
+  %tmp7831 = getelementptr inbounds float* %tmp7830, i64 1
+  %tmp7832 = getelementptr inbounds float* %tmp7831, i64 1
+  %tmp7833 = getelementptr inbounds float* %tmp7832, i64 1
+  %tmp7834 = getelementptr inbounds float* %tmp7833, i64 1
+  %tmp7835 = getelementptr inbounds float* %tmp7834, i64 1
+  %tmp7836 = getelementptr inbounds float* %tmp7835, i64 1
+  %tmp7837 = getelementptr inbounds float* %tmp7836, i64 1
+  %tmp7838 = getelementptr inbounds float* %tmp7837, i64 1
+  %tmp7839 = getelementptr inbounds float* %tmp7838, i64 1
+  %tmp7840 = getelementptr inbounds float* %tmp7839, i64 1
+  %tmp7841 = getelementptr inbounds float* %tmp7840, i64 1
+  %tmp7842 = getelementptr inbounds float* %tmp7841, i64 1
+  %tmp7843 = getelementptr inbounds float* %tmp7842, i64 1
+  %tmp7844 = getelementptr inbounds float* %tmp7843, i64 1
+  %tmp7845 = getelementptr inbounds float* %tmp7844, i64 1
+  %tmp7846 = getelementptr inbounds float* %tmp7845, i64 1
+  %tmp7847 = getelementptr inbounds float* %tmp7846, i64 1
+  %tmp7848 = getelementptr inbounds float* %tmp7847, i64 1
+  %tmp7849 = getelementptr inbounds float* %tmp7848, i64 1
+  %tmp7850 = getelementptr inbounds float* %tmp7849, i64 1
+  %tmp7851 = getelementptr inbounds float* %tmp7850, i64 1
+  %tmp7852 = getelementptr inbounds float* %tmp7851, i64 1
+  %tmp7853 = getelementptr inbounds float* %tmp7852, i64 1
+  %tmp7854 = getelementptr inbounds float* %tmp7853, i64 1
+  %tmp7855 = getelementptr inbounds float* %tmp7854, i64 1
+  %tmp7856 = getelementptr inbounds float* %tmp7855, i64 1
+  %tmp7857 = getelementptr inbounds float* %tmp7856, i64 1
+  %tmp7858 = getelementptr inbounds float* %tmp7857, i64 1
+  %tmp7859 = getelementptr inbounds float* %tmp7858, i64 1
+  %tmp7860 = getelementptr inbounds float* %tmp7859, i64 1
+  %tmp7861 = getelementptr inbounds float* %tmp7860, i64 1
+  %tmp7862 = getelementptr inbounds float* %tmp7861, i64 1
+  %tmp7863 = getelementptr inbounds float* %tmp7862, i64 1
+  %tmp7864 = getelementptr inbounds float* %tmp7863, i64 1
+  %tmp7865 = getelementptr inbounds float* %tmp7864, i64 1
+  %tmp7866 = getelementptr inbounds float* %tmp7865, i64 1
+  %tmp7867 = getelementptr inbounds float* %tmp7866, i64 1
+  %tmp7868 = getelementptr inbounds float* %tmp7867, i64 1
+  %tmp7869 = getelementptr inbounds float* %tmp7868, i64 1
+  %tmp7870 = getelementptr inbounds float* %tmp7869, i64 1
+  %tmp7871 = getelementptr inbounds float* %tmp7870, i64 1
+  %tmp7872 = getelementptr inbounds float* %tmp7871, i64 1
+  %tmp7873 = getelementptr inbounds float* %tmp7872, i64 1
+  %tmp7874 = getelementptr inbounds float* %tmp7873, i64 1
+  %tmp7875 = getelementptr inbounds float* %tmp7874, i64 1
+  %tmp7876 = getelementptr inbounds float* %tmp7875, i64 1
+  %tmp7877 = getelementptr inbounds float* %tmp7876, i64 1
+  %tmp7878 = getelementptr inbounds float* %tmp7877, i64 1
+  %tmp7879 = getelementptr inbounds float* %tmp7878, i64 1
+  %tmp7880 = getelementptr inbounds float* %tmp7879, i64 1
+  %tmp7881 = getelementptr inbounds float* %tmp7880, i64 1
+  %tmp7882 = getelementptr inbounds float* %tmp7881, i64 1
+  %tmp7883 = getelementptr inbounds float* %tmp7882, i64 1
+  %tmp7884 = getelementptr inbounds float* %tmp7883, i64 1
+  %tmp7885 = getelementptr inbounds float* %tmp7884, i64 1
+  %tmp7886 = getelementptr inbounds float* %tmp7885, i64 1
+  %tmp7887 = getelementptr inbounds float* %tmp7886, i64 1
+  %tmp7888 = getelementptr inbounds float* %tmp7887, i64 1
+  %tmp7889 = getelementptr inbounds float* %tmp7888, i64 1
+  %tmp7890 = getelementptr inbounds float* %tmp7889, i64 1
+  %tmp7891 = getelementptr inbounds float* %tmp7890, i64 1
+  %tmp7892 = getelementptr inbounds float* %tmp7891, i64 1
+  %tmp7893 = getelementptr inbounds float* %tmp7892, i64 1
+  %tmp7894 = getelementptr inbounds float* %tmp7893, i64 1
+  %tmp7895 = getelementptr inbounds float* %tmp7894, i64 1
+  %tmp7896 = getelementptr inbounds float* %tmp7895, i64 1
+  %tmp7897 = getelementptr inbounds float* %tmp7896, i64 1
+  %tmp7898 = getelementptr inbounds float* %tmp7897, i64 1
+  %tmp7899 = getelementptr inbounds float* %tmp7898, i64 1
+  %tmp7900 = getelementptr inbounds float* %tmp7899, i64 1
+  %tmp7901 = getelementptr inbounds float* %tmp7900, i64 1
+  %tmp7902 = getelementptr inbounds float* %tmp7901, i64 1
+  %tmp7903 = getelementptr inbounds float* %tmp7902, i64 1
+  %tmp7904 = getelementptr inbounds float* %tmp7903, i64 1
+  %tmp7905 = getelementptr inbounds float* %tmp7904, i64 1
+  %tmp7906 = getelementptr inbounds float* %tmp7905, i64 1
+  %tmp7907 = getelementptr inbounds float* %tmp7906, i64 1
+  %tmp7908 = getelementptr inbounds float* %tmp7907, i64 1
+  %tmp7909 = getelementptr inbounds float* %tmp7908, i64 1
+  %tmp7910 = getelementptr inbounds float* %tmp7909, i64 1
+  %tmp7911 = getelementptr inbounds float* %tmp7910, i64 1
+  %tmp7912 = getelementptr inbounds float* %tmp7911, i64 1
+  %tmp7913 = getelementptr inbounds float* %tmp7912, i64 1
+  %tmp7914 = getelementptr inbounds float* %tmp7913, i64 1
+  %tmp7915 = getelementptr inbounds float* %tmp7914, i64 1
+  %tmp7916 = getelementptr inbounds float* %tmp7915, i64 1
+  %tmp7917 = getelementptr inbounds float* %tmp7916, i64 1
+  %tmp7918 = getelementptr inbounds float* %tmp7917, i64 1
+  %tmp7919 = getelementptr inbounds float* %tmp7918, i64 1
+  %tmp7920 = getelementptr inbounds float* %tmp7919, i64 1
+  %tmp7921 = getelementptr inbounds float* %tmp7920, i64 1
+  %tmp7922 = getelementptr inbounds float* %tmp7921, i64 1
+  %tmp7923 = getelementptr inbounds float* %tmp7922, i64 1
+  %tmp7924 = getelementptr inbounds float* %tmp7923, i64 1
+  %tmp7925 = getelementptr inbounds float* %tmp7924, i64 1
+  %tmp7926 = getelementptr inbounds float* %tmp7925, i64 1
+  %tmp7927 = getelementptr inbounds float* %tmp7926, i64 1
+  %tmp7928 = getelementptr inbounds float* %tmp7927, i64 1
+  %tmp7929 = getelementptr inbounds float* %tmp7928, i64 1
+  %tmp7930 = getelementptr inbounds float* %tmp7929, i64 1
+  %tmp7931 = getelementptr inbounds float* %tmp7930, i64 1
+  %tmp7932 = getelementptr inbounds float* %tmp7931, i64 1
+  %tmp7933 = getelementptr inbounds float* %tmp7932, i64 1
+  %tmp7934 = getelementptr inbounds float* %tmp7933, i64 1
+  %tmp7935 = getelementptr inbounds float* %tmp7934, i64 1
+  %tmp7936 = getelementptr inbounds float* %tmp7935, i64 1
+  %tmp7937 = getelementptr inbounds float* %tmp7936, i64 1
+  %tmp7938 = getelementptr inbounds float* %tmp7937, i64 1
+  %tmp7939 = getelementptr inbounds float* %tmp7938, i64 1
+  %tmp7940 = getelementptr inbounds float* %tmp7939, i64 1
+  %tmp7941 = getelementptr inbounds float* %tmp7940, i64 1
+  %tmp7942 = getelementptr inbounds float* %tmp7941, i64 1
+  %tmp7943 = getelementptr inbounds float* %tmp7942, i64 1
+  %tmp7944 = getelementptr inbounds float* %tmp7943, i64 1
+  %tmp7945 = getelementptr inbounds float* %tmp7944, i64 1
+  %tmp7946 = getelementptr inbounds float* %tmp7945, i64 1
+  %tmp7947 = getelementptr inbounds float* %tmp7946, i64 1
+  %tmp7948 = getelementptr inbounds float* %tmp7947, i64 1
+  %tmp7949 = getelementptr inbounds float* %tmp7948, i64 1
+  %tmp7950 = getelementptr inbounds float* %tmp7949, i64 1
+  %tmp7951 = getelementptr inbounds float* %tmp7950, i64 1
+  %tmp7952 = getelementptr inbounds float* %tmp7951, i64 1
+  %tmp7953 = getelementptr inbounds float* %tmp7952, i64 1
+  %tmp7954 = getelementptr inbounds float* %tmp7953, i64 1
+  %tmp7955 = getelementptr inbounds float* %tmp7954, i64 1
+  %tmp7956 = getelementptr inbounds float* %tmp7955, i64 1
+  %tmp7957 = getelementptr inbounds float* %tmp7956, i64 1
+  %tmp7958 = getelementptr inbounds float* %tmp7957, i64 1
+  %tmp7959 = getelementptr inbounds float* %tmp7958, i64 1
+  %tmp7960 = getelementptr inbounds float* %tmp7959, i64 1
+  %tmp7961 = getelementptr inbounds float* %tmp7960, i64 1
+  %tmp7962 = getelementptr inbounds float* %tmp7961, i64 1
+  %tmp7963 = getelementptr inbounds float* %tmp7962, i64 1
+  %tmp7964 = getelementptr inbounds float* %tmp7963, i64 1
+  %tmp7965 = getelementptr inbounds float* %tmp7964, i64 1
+  %tmp7966 = getelementptr inbounds float* %tmp7965, i64 1
+  %tmp7967 = getelementptr inbounds float* %tmp7966, i64 1
+  %tmp7968 = getelementptr inbounds float* %tmp7967, i64 1
+  %tmp7969 = getelementptr inbounds float* %tmp7968, i64 1
+  %tmp7970 = getelementptr inbounds float* %tmp7969, i64 1
+  %tmp7971 = getelementptr inbounds float* %tmp7970, i64 1
+  %tmp7972 = getelementptr inbounds float* %tmp7971, i64 1
+  %tmp7973 = getelementptr inbounds float* %tmp7972, i64 1
+  %tmp7974 = getelementptr inbounds float* %tmp7973, i64 1
+  %tmp7975 = getelementptr inbounds float* %tmp7974, i64 1
+  %tmp7976 = getelementptr inbounds float* %tmp7975, i64 1
+  %tmp7977 = getelementptr inbounds float* %tmp7976, i64 1
+  %tmp7978 = getelementptr inbounds float* %tmp7977, i64 1
+  %tmp7979 = getelementptr inbounds float* %tmp7978, i64 1
+  %tmp7980 = getelementptr inbounds float* %tmp7979, i64 1
+  %tmp7981 = getelementptr inbounds float* %tmp7980, i64 1
+  %tmp7982 = getelementptr inbounds float* %tmp7981, i64 1
+  %tmp7983 = getelementptr inbounds float* %tmp7982, i64 1
+  %tmp7984 = getelementptr inbounds float* %tmp7983, i64 1
+  %tmp7985 = getelementptr inbounds float* %tmp7984, i64 1
+  %tmp7986 = getelementptr inbounds float* %tmp7985, i64 1
+  %tmp7987 = getelementptr inbounds float* %tmp7986, i64 1
+  %tmp7988 = getelementptr inbounds float* %tmp7987, i64 1
+  %tmp7989 = getelementptr inbounds float* %tmp7988, i64 1
+  %tmp7990 = getelementptr inbounds float* %tmp7989, i64 1
+  %tmp7991 = getelementptr inbounds float* %tmp7990, i64 1
+  %tmp7992 = getelementptr inbounds float* %tmp7991, i64 1
+  %tmp7993 = getelementptr inbounds float* %tmp7992, i64 1
+  %tmp7994 = getelementptr inbounds float* %tmp7993, i64 1
+  %tmp7995 = getelementptr inbounds float* %tmp7994, i64 1
+  %tmp7996 = getelementptr inbounds float* %tmp7995, i64 1
+  %tmp7997 = getelementptr inbounds float* %tmp7996, i64 1
+  %tmp7998 = getelementptr inbounds float* %tmp7997, i64 1
+  %tmp7999 = getelementptr inbounds float* %tmp7998, i64 1
+  %tmp8000 = getelementptr inbounds float* %tmp7999, i64 1
+  %tmp8001 = getelementptr inbounds float* %tmp8000, i64 1
+  %tmp8002 = getelementptr inbounds float* %tmp8001, i64 1
+  %tmp8003 = getelementptr inbounds float* %tmp8002, i64 1
+  %tmp8004 = getelementptr inbounds float* %tmp8003, i64 1
+  %tmp8005 = getelementptr inbounds float* %tmp8004, i64 1
+  %tmp8006 = getelementptr inbounds float* %tmp8005, i64 1
+  %tmp8007 = getelementptr inbounds float* %tmp8006, i64 1
+  %tmp8008 = getelementptr inbounds float* %tmp8007, i64 1
+  %tmp8009 = getelementptr inbounds float* %tmp8008, i64 1
+  %tmp8010 = getelementptr inbounds float* %tmp8009, i64 1
+  %tmp8011 = getelementptr inbounds float* %tmp8010, i64 1
+  %tmp8012 = getelementptr inbounds float* %tmp8011, i64 1
+  %tmp8013 = getelementptr inbounds float* %tmp8012, i64 1
+  %tmp8014 = getelementptr inbounds float* %tmp8013, i64 1
+  %tmp8015 = getelementptr inbounds float* %tmp8014, i64 1
+  %tmp8016 = getelementptr inbounds float* %tmp8015, i64 1
+  %tmp8017 = getelementptr inbounds float* %tmp8016, i64 1
+  %tmp8018 = getelementptr inbounds float* %tmp8017, i64 1
+  %tmp8019 = getelementptr inbounds float* %tmp8018, i64 1
+  %tmp8020 = getelementptr inbounds float* %tmp8019, i64 1
+  %tmp8021 = getelementptr inbounds float* %tmp8020, i64 1
+  %tmp8022 = getelementptr inbounds float* %tmp8021, i64 1
+  %tmp8023 = getelementptr inbounds float* %tmp8022, i64 1
+  %tmp8024 = getelementptr inbounds float* %tmp8023, i64 1
+  %tmp8025 = getelementptr inbounds float* %tmp8024, i64 1
+  %tmp8026 = getelementptr inbounds float* %tmp8025, i64 1
+  %tmp8027 = getelementptr inbounds float* %tmp8026, i64 1
+  %tmp8028 = getelementptr inbounds float* %tmp8027, i64 1
+  %tmp8029 = getelementptr inbounds float* %tmp8028, i64 1
+  %tmp8030 = getelementptr inbounds float* %tmp8029, i64 1
+  %tmp8031 = getelementptr inbounds float* %tmp8030, i64 1
+  %tmp8032 = getelementptr inbounds float* %tmp8031, i64 1
+  %tmp8033 = getelementptr inbounds float* %tmp8032, i64 1
+  %tmp8034 = getelementptr inbounds float* %tmp8033, i64 1
+  %tmp8035 = getelementptr inbounds float* %tmp8034, i64 1
+  %tmp8036 = getelementptr inbounds float* %tmp8035, i64 1
+  %tmp8037 = getelementptr inbounds float* %tmp8036, i64 1
+  %tmp8038 = getelementptr inbounds float* %tmp8037, i64 1
+  %tmp8039 = getelementptr inbounds float* %tmp8038, i64 1
+  %tmp8040 = getelementptr inbounds float* %tmp8039, i64 1
+  %tmp8041 = getelementptr inbounds float* %tmp8040, i64 1
+  %tmp8042 = getelementptr inbounds float* %tmp8041, i64 1
+  %tmp8043 = getelementptr inbounds float* %tmp8042, i64 1
+  %tmp8044 = getelementptr inbounds float* %tmp8043, i64 1
+  %tmp8045 = getelementptr inbounds float* %tmp8044, i64 1
+  %tmp8046 = getelementptr inbounds float* %tmp8045, i64 1
+  %tmp8047 = getelementptr inbounds float* %tmp8046, i64 1
+  %tmp8048 = getelementptr inbounds float* %tmp8047, i64 1
+  %tmp8049 = getelementptr inbounds float* %tmp8048, i64 1
+  %tmp8050 = getelementptr inbounds float* %tmp8049, i64 1
+  %tmp8051 = getelementptr inbounds float* %tmp8050, i64 1
+  %tmp8052 = getelementptr inbounds float* %tmp8051, i64 1
+  %tmp8053 = getelementptr inbounds float* %tmp8052, i64 1
+  %tmp8054 = getelementptr inbounds float* %tmp8053, i64 1
+  %tmp8055 = getelementptr inbounds float* %tmp8054, i64 1
+  %tmp8056 = getelementptr inbounds float* %tmp8055, i64 1
+  %tmp8057 = getelementptr inbounds float* %tmp8056, i64 1
+  %tmp8058 = getelementptr inbounds float* %tmp8057, i64 1
+  %tmp8059 = getelementptr inbounds float* %tmp8058, i64 1
+  %tmp8060 = getelementptr inbounds float* %tmp8059, i64 1
+  %tmp8061 = getelementptr inbounds float* %tmp8060, i64 1
+  %tmp8062 = getelementptr inbounds float* %tmp8061, i64 1
+  %tmp8063 = getelementptr inbounds float* %tmp8062, i64 1
+  %tmp8064 = getelementptr inbounds float* %tmp8063, i64 1
+  %tmp8065 = getelementptr inbounds float* %tmp8064, i64 1
+  %tmp8066 = getelementptr inbounds float* %tmp8065, i64 1
+  %tmp8067 = getelementptr inbounds float* %tmp8066, i64 1
+  %tmp8068 = getelementptr inbounds float* %tmp8067, i64 1
+  %tmp8069 = getelementptr inbounds float* %tmp8068, i64 1
+  %tmp8070 = getelementptr inbounds float* %tmp8069, i64 1
+  %tmp8071 = getelementptr inbounds float* %tmp8070, i64 1
+  %tmp8072 = getelementptr inbounds float* %tmp8071, i64 1
+  %tmp8073 = getelementptr inbounds float* %tmp8072, i64 1
+  %tmp8074 = getelementptr inbounds float* %tmp8073, i64 1
+  %tmp8075 = getelementptr inbounds float* %tmp8074, i64 1
+  %tmp8076 = getelementptr inbounds float* %tmp8075, i64 1
+  %tmp8077 = getelementptr inbounds float* %tmp8076, i64 1
+  %tmp8078 = getelementptr inbounds float* %tmp8077, i64 1
+  %tmp8079 = getelementptr inbounds float* %tmp8078, i64 1
+  %tmp8080 = getelementptr inbounds float* %tmp8079, i64 1
+  %tmp8081 = getelementptr inbounds float* %tmp8080, i64 1
+  %tmp8082 = getelementptr inbounds float* %tmp8081, i64 1
+  %tmp8083 = getelementptr inbounds float* %tmp8082, i64 1
+  %tmp8084 = getelementptr inbounds float* %tmp8083, i64 1
+  %tmp8085 = getelementptr inbounds float* %tmp8084, i64 1
+  %tmp8086 = getelementptr inbounds float* %tmp8085, i64 1
+  %tmp8087 = getelementptr inbounds float* %tmp8086, i64 1
+  %tmp8088 = getelementptr inbounds float* %tmp8087, i64 1
+  %tmp8089 = getelementptr inbounds float* %tmp8088, i64 1
+  %tmp8090 = getelementptr inbounds float* %tmp8089, i64 1
+  %tmp8091 = getelementptr inbounds float* %tmp8090, i64 1
+  %tmp8092 = getelementptr inbounds float* %tmp8091, i64 1
+  %tmp8093 = getelementptr inbounds float* %tmp8092, i64 1
+  %tmp8094 = getelementptr inbounds float* %tmp8093, i64 1
+  %tmp8095 = getelementptr inbounds float* %tmp8094, i64 1
+  %tmp8096 = getelementptr inbounds float* %tmp8095, i64 1
+  %tmp8097 = getelementptr inbounds float* %tmp8096, i64 1
+  %tmp8098 = getelementptr inbounds float* %tmp8097, i64 1
+  %tmp8099 = getelementptr inbounds float* %tmp8098, i64 1
+  %tmp8100 = getelementptr inbounds float* %tmp8099, i64 1
+  %tmp8101 = getelementptr inbounds float* %tmp8100, i64 1
+  %tmp8102 = getelementptr inbounds float* %tmp8101, i64 1
+  %tmp8103 = getelementptr inbounds float* %tmp8102, i64 1
+  %tmp8104 = getelementptr inbounds float* %tmp8103, i64 1
+  %tmp8105 = getelementptr inbounds float* %tmp8104, i64 1
+  %tmp8106 = getelementptr inbounds float* %tmp8105, i64 1
+  %tmp8107 = getelementptr inbounds float* %tmp8106, i64 1
+  %tmp8108 = getelementptr inbounds float* %tmp8107, i64 1
+  %tmp8109 = getelementptr inbounds float* %tmp8108, i64 1
+  %tmp8110 = getelementptr inbounds float* %tmp8109, i64 1
+  %tmp8111 = getelementptr inbounds float* %tmp8110, i64 1
+  %tmp8112 = getelementptr inbounds float* %tmp8111, i64 1
+  %tmp8113 = getelementptr inbounds float* %tmp8112, i64 1
+  %tmp8114 = getelementptr inbounds float* %tmp8113, i64 1
+  %tmp8115 = getelementptr inbounds float* %tmp8114, i64 1
+  %tmp8116 = getelementptr inbounds float* %tmp8115, i64 1
+  %tmp8117 = getelementptr inbounds float* %tmp8116, i64 1
+  %tmp8118 = getelementptr inbounds float* %tmp8117, i64 1
+  %tmp8119 = getelementptr inbounds float* %tmp8118, i64 1
+  %tmp8120 = getelementptr inbounds float* %tmp8119, i64 1
+  %tmp8121 = getelementptr inbounds float* %tmp8120, i64 1
+  %tmp8122 = getelementptr inbounds float* %tmp8121, i64 1
+  %tmp8123 = getelementptr inbounds float* %tmp8122, i64 1
+  %tmp8124 = getelementptr inbounds float* %tmp8123, i64 1
+  %tmp8125 = getelementptr inbounds float* %tmp8124, i64 1
+  %tmp8126 = getelementptr inbounds float* %tmp8125, i64 1
+  %tmp8127 = getelementptr inbounds float* %tmp8126, i64 1
+  %tmp8128 = getelementptr inbounds float* %tmp8127, i64 1
+  %tmp8129 = getelementptr inbounds float* %tmp8128, i64 1
+  %tmp8130 = getelementptr inbounds float* %tmp8129, i64 1
+  %tmp8131 = getelementptr inbounds float* %tmp8130, i64 1
+  %tmp8132 = getelementptr inbounds float* %tmp8131, i64 1
+  %tmp8133 = getelementptr inbounds float* %tmp8132, i64 1
+  %tmp8134 = getelementptr inbounds float* %tmp8133, i64 1
+  %tmp8135 = getelementptr inbounds float* %tmp8134, i64 1
+  %tmp8136 = getelementptr inbounds float* %tmp8135, i64 1
+  %tmp8137 = getelementptr inbounds float* %tmp8136, i64 1
+  %tmp8138 = getelementptr inbounds float* %tmp8137, i64 1
+  %tmp8139 = getelementptr inbounds float* %tmp8138, i64 1
+  %tmp8140 = getelementptr inbounds float* %tmp8139, i64 1
+  %tmp8141 = getelementptr inbounds float* %tmp8140, i64 1
+  %tmp8142 = getelementptr inbounds float* %tmp8141, i64 1
+  %tmp8143 = getelementptr inbounds float* %tmp8142, i64 1
+  %tmp8144 = getelementptr inbounds float* %tmp8143, i64 1
+  %tmp8145 = getelementptr inbounds float* %tmp8144, i64 1
+  %tmp8146 = getelementptr inbounds float* %tmp8145, i64 1
+  %tmp8147 = getelementptr inbounds float* %tmp8146, i64 1
+  %tmp8148 = getelementptr inbounds float* %tmp8147, i64 1
+  %tmp8149 = getelementptr inbounds float* %tmp8148, i64 1
+  %tmp8150 = getelementptr inbounds float* %tmp8149, i64 1
+  %tmp8151 = getelementptr inbounds float* %tmp8150, i64 1
+  %tmp8152 = getelementptr inbounds float* %tmp8151, i64 1
+  %tmp8153 = getelementptr inbounds float* %tmp8152, i64 1
+  %tmp8154 = getelementptr inbounds float* %tmp8153, i64 1
+  %tmp8155 = getelementptr inbounds float* %tmp8154, i64 1
+  %tmp8156 = getelementptr inbounds float* %tmp8155, i64 1
+  %tmp8157 = getelementptr inbounds float* %tmp8156, i64 1
+  %tmp8158 = getelementptr inbounds float* %tmp8157, i64 1
+  %tmp8159 = getelementptr inbounds float* %tmp8158, i64 1
+  %tmp8160 = getelementptr inbounds float* %tmp8159, i64 1
+  %tmp8161 = getelementptr inbounds float* %tmp8160, i64 1
+  %tmp8162 = getelementptr inbounds float* %tmp8161, i64 1
+  %tmp8163 = getelementptr inbounds float* %tmp8162, i64 1
+  %tmp8164 = getelementptr inbounds float* %tmp8163, i64 1
+  %tmp8165 = getelementptr inbounds float* %tmp8164, i64 1
+  %tmp8166 = getelementptr inbounds float* %tmp8165, i64 1
+  %tmp8167 = getelementptr inbounds float* %tmp8166, i64 1
+  %tmp8168 = getelementptr inbounds float* %tmp8167, i64 1
+  %tmp8169 = getelementptr inbounds float* %tmp8168, i64 1
+  %tmp8170 = getelementptr inbounds float* %tmp8169, i64 1
+  %tmp8171 = getelementptr inbounds float* %tmp8170, i64 1
+  %tmp8172 = getelementptr inbounds float* %tmp8171, i64 1
+  %tmp8173 = getelementptr inbounds float* %tmp8172, i64 1
+  %tmp8174 = getelementptr inbounds float* %tmp8173, i64 1
+  %tmp8175 = getelementptr inbounds float* %tmp8174, i64 1
+  %tmp8176 = getelementptr inbounds float* %tmp8175, i64 1
+  %tmp8177 = getelementptr inbounds float* %tmp8176, i64 1
+  %tmp8178 = getelementptr inbounds float* %tmp8177, i64 1
+  %tmp8179 = getelementptr inbounds float* %tmp8178, i64 1
+  %tmp8180 = getelementptr inbounds float* %tmp8179, i64 1
+  %tmp8181 = getelementptr inbounds float* %tmp8180, i64 1
+  %tmp8182 = getelementptr inbounds float* %tmp8181, i64 1
+  %tmp8183 = getelementptr inbounds float* %tmp8182, i64 1
+  %tmp8184 = getelementptr inbounds float* %tmp8183, i64 1
+  %tmp8185 = getelementptr inbounds float* %tmp8184, i64 1
+  %tmp8186 = getelementptr inbounds float* %tmp8185, i64 1
+  %tmp8187 = getelementptr inbounds float* %tmp8186, i64 1
+  %tmp8188 = getelementptr inbounds float* %tmp8187, i64 1
+  %tmp8189 = getelementptr inbounds float* %tmp8188, i64 1
+  %tmp8190 = getelementptr inbounds float* %tmp8189, i64 1
+  %tmp8191 = getelementptr inbounds float* %tmp8190, i64 1
+  %tmp8192 = getelementptr inbounds float* %tmp8191, i64 1
+  %tmp8193 = getelementptr inbounds float* %tmp8192, i64 1
+  %tmp8194 = getelementptr inbounds float* %tmp8193, i64 1
+  %tmp8195 = getelementptr inbounds float* %tmp8194, i64 1
+  %tmp8196 = getelementptr inbounds float* %tmp8195, i64 1
+  %tmp8197 = getelementptr inbounds float* %tmp8196, i64 1
+  %tmp8198 = getelementptr inbounds float* %tmp8197, i64 1
+  %tmp8199 = getelementptr inbounds float* %tmp8198, i64 1
+  %tmp8200 = getelementptr inbounds float* %tmp8199, i64 1
+  %tmp8201 = getelementptr inbounds float* %tmp8200, i64 1
+  %tmp8202 = getelementptr inbounds float* %tmp8201, i64 1
+  %tmp8203 = getelementptr inbounds float* %tmp8202, i64 1
+  %tmp8204 = getelementptr inbounds float* %tmp8203, i64 1
+  %tmp8205 = getelementptr inbounds float* %tmp8204, i64 1
+  %tmp8206 = getelementptr inbounds float* %tmp8205, i64 1
+  %tmp8207 = getelementptr inbounds float* %tmp8206, i64 1
+  %tmp8208 = getelementptr inbounds float* %tmp8207, i64 1
+  %tmp8209 = getelementptr inbounds float* %tmp8208, i64 1
+  %tmp8210 = getelementptr inbounds float* %tmp8209, i64 1
+  %tmp8211 = getelementptr inbounds float* %tmp8210, i64 1
+  %tmp8212 = getelementptr inbounds float* %tmp8211, i64 1
+  %tmp8213 = getelementptr inbounds float* %tmp8212, i64 1
+  %tmp8214 = getelementptr inbounds float* %tmp8213, i64 1
+  %tmp8215 = getelementptr inbounds float* %tmp8214, i64 1
+  %tmp8216 = getelementptr inbounds float* %tmp8215, i64 1
+  %tmp8217 = getelementptr inbounds float* %tmp8216, i64 1
+  %tmp8218 = getelementptr inbounds float* %tmp8217, i64 1
+  %tmp8219 = getelementptr inbounds float* %tmp8218, i64 1
+  %tmp8220 = getelementptr inbounds float* %tmp8219, i64 1
+  %tmp8221 = getelementptr inbounds float* %tmp8220, i64 1
+  %tmp8222 = getelementptr inbounds float* %tmp8221, i64 1
+  %tmp8223 = getelementptr inbounds float* %tmp8222, i64 1
+  %tmp8224 = getelementptr inbounds float* %tmp8223, i64 1
+  %tmp8225 = getelementptr inbounds float* %tmp8224, i64 1
+  %tmp8226 = getelementptr inbounds float* %tmp8225, i64 1
+  %tmp8227 = getelementptr inbounds float* %tmp8226, i64 1
+  %tmp8228 = getelementptr inbounds float* %tmp8227, i64 1
+  %tmp8229 = getelementptr inbounds float* %tmp8228, i64 1
+  %tmp8230 = getelementptr inbounds float* %tmp8229, i64 1
+  %tmp8231 = getelementptr inbounds float* %tmp8230, i64 1
+  %tmp8232 = getelementptr inbounds float* %tmp8231, i64 1
+  %tmp8233 = getelementptr inbounds float* %tmp8232, i64 1
+  %tmp8234 = getelementptr inbounds float* %tmp8233, i64 1
+  %tmp8235 = getelementptr inbounds float* %tmp8234, i64 1
+  %tmp8236 = getelementptr inbounds float* %tmp8235, i64 1
+  %tmp8237 = getelementptr inbounds float* %tmp8236, i64 1
+  %tmp8238 = getelementptr inbounds float* %tmp8237, i64 1
+  %tmp8239 = getelementptr inbounds float* %tmp8238, i64 1
+  %tmp8240 = getelementptr inbounds float* %tmp8239, i64 1
+  %tmp8241 = getelementptr inbounds float* %tmp8240, i64 1
+  %tmp8242 = getelementptr inbounds float* %tmp8241, i64 1
+  %tmp8243 = getelementptr inbounds float* %tmp8242, i64 1
+  %tmp8244 = getelementptr inbounds float* %tmp8243, i64 1
+  %tmp8245 = getelementptr inbounds float* %tmp8244, i64 1
+  %tmp8246 = getelementptr inbounds float* %tmp8245, i64 1
+  %tmp8247 = getelementptr inbounds float* %tmp8246, i64 1
+  %tmp8248 = getelementptr inbounds float* %tmp8247, i64 1
+  %tmp8249 = getelementptr inbounds float* %tmp8248, i64 1
+  %tmp8250 = getelementptr inbounds float* %tmp8249, i64 1
+  %tmp8251 = getelementptr inbounds float* %tmp8250, i64 1
+  %tmp8252 = getelementptr inbounds float* %tmp8251, i64 1
+  %tmp8253 = getelementptr inbounds float* %tmp8252, i64 1
+  %tmp8254 = getelementptr inbounds float* %tmp8253, i64 1
+  %tmp8255 = getelementptr inbounds float* %tmp8254, i64 1
+  %tmp8256 = getelementptr inbounds float* %tmp8255, i64 1
+  %tmp8257 = getelementptr inbounds float* %tmp8256, i64 1
+  %tmp8258 = getelementptr inbounds float* %tmp8257, i64 1
+  %tmp8259 = getelementptr inbounds float* %tmp8258, i64 1
+  %tmp8260 = getelementptr inbounds float* %tmp8259, i64 1
+  %tmp8261 = getelementptr inbounds float* %tmp8260, i64 1
+  %tmp8262 = getelementptr inbounds float* %tmp8261, i64 1
+  %tmp8263 = getelementptr inbounds float* %tmp8262, i64 1
+  %tmp8264 = getelementptr inbounds float* %tmp8263, i64 1
+  %tmp8265 = getelementptr inbounds float* %tmp8264, i64 1
+  %tmp8266 = getelementptr inbounds float* %tmp8265, i64 1
+  %tmp8267 = getelementptr inbounds float* %tmp8266, i64 1
+  %tmp8268 = getelementptr inbounds float* %tmp8267, i64 1
+  %tmp8269 = getelementptr inbounds float* %tmp8268, i64 1
+  %tmp8270 = getelementptr inbounds float* %tmp8269, i64 1
+  %tmp8271 = getelementptr inbounds float* %tmp8270, i64 1
+  %tmp8272 = getelementptr inbounds float* %tmp8271, i64 1
+  %tmp8273 = getelementptr inbounds float* %tmp8272, i64 1
+  %tmp8274 = getelementptr inbounds float* %tmp8273, i64 1
+  %tmp8275 = getelementptr inbounds float* %tmp8274, i64 1
+  %tmp8276 = getelementptr inbounds float* %tmp8275, i64 1
+  %tmp8277 = getelementptr inbounds float* %tmp8276, i64 1
+  %tmp8278 = getelementptr inbounds float* %tmp8277, i64 1
+  %tmp8279 = getelementptr inbounds float* %tmp8278, i64 1
+  %tmp8280 = getelementptr inbounds float* %tmp8279, i64 1
+  %tmp8281 = getelementptr inbounds float* %tmp8280, i64 1
+  %tmp8282 = getelementptr inbounds float* %tmp8281, i64 1
+  %tmp8283 = getelementptr inbounds float* %tmp8282, i64 1
+  %tmp8284 = getelementptr inbounds float* %tmp8283, i64 1
+  %tmp8285 = getelementptr inbounds float* %tmp8284, i64 1
+  %tmp8286 = getelementptr inbounds float* %tmp8285, i64 1
+  %tmp8287 = getelementptr inbounds float* %tmp8286, i64 1
+  %tmp8288 = getelementptr inbounds float* %tmp8287, i64 1
+  %tmp8289 = getelementptr inbounds float* %tmp8288, i64 1
+  %tmp8290 = getelementptr inbounds float* %tmp8289, i64 1
+  %tmp8291 = getelementptr inbounds float* %tmp8290, i64 1
+  %tmp8292 = getelementptr inbounds float* %tmp8291, i64 1
+  %tmp8293 = getelementptr inbounds float* %tmp8292, i64 1
+  %tmp8294 = getelementptr inbounds float* %tmp8293, i64 1
+  %tmp8295 = getelementptr inbounds float* %tmp8294, i64 1
+  %tmp8296 = getelementptr inbounds float* %tmp8295, i64 1
+  %tmp8297 = getelementptr inbounds float* %tmp8296, i64 1
+  %tmp8298 = getelementptr inbounds float* %tmp8297, i64 1
+  %tmp8299 = getelementptr inbounds float* %tmp8298, i64 1
+  %tmp8300 = getelementptr inbounds float* %tmp8299, i64 1
+  %tmp8301 = getelementptr inbounds float* %tmp8300, i64 1
+  %tmp8302 = getelementptr inbounds float* %tmp8301, i64 1
+  %tmp8303 = getelementptr inbounds float* %tmp8302, i64 1
+  %tmp8304 = getelementptr inbounds float* %tmp8303, i64 1
+  %tmp8305 = getelementptr inbounds float* %tmp8304, i64 1
+  %tmp8306 = getelementptr inbounds float* %tmp8305, i64 1
+  %tmp8307 = getelementptr inbounds float* %tmp8306, i64 1
+  %tmp8308 = getelementptr inbounds float* %tmp8307, i64 1
+  %tmp8309 = getelementptr inbounds float* %tmp8308, i64 1
+  %tmp8310 = getelementptr inbounds float* %tmp8309, i64 1
+  %tmp8311 = getelementptr inbounds float* %tmp8310, i64 1
+  %tmp8312 = getelementptr inbounds float* %tmp8311, i64 1
+  %tmp8313 = getelementptr inbounds float* %tmp8312, i64 1
+  %tmp8314 = getelementptr inbounds float* %tmp8313, i64 1
+  %tmp8315 = getelementptr inbounds float* %tmp8314, i64 1
+  %tmp8316 = getelementptr inbounds float* %tmp8315, i64 1
+  %tmp8317 = getelementptr inbounds float* %tmp8316, i64 1
+  %tmp8318 = getelementptr inbounds float* %tmp8317, i64 1
+  %tmp8319 = getelementptr inbounds float* %tmp8318, i64 1
+  %tmp8320 = getelementptr inbounds float* %tmp8319, i64 1
+  %tmp8321 = getelementptr inbounds float* %tmp8320, i64 1
+  %tmp8322 = getelementptr inbounds float* %tmp8321, i64 1
+  %tmp8323 = getelementptr inbounds float* %tmp8322, i64 1
+  %tmp8324 = getelementptr inbounds float* %tmp8323, i64 1
+  %tmp8325 = getelementptr inbounds float* %tmp8324, i64 1
+  %tmp8326 = getelementptr inbounds float* %tmp8325, i64 1
+  %tmp8327 = getelementptr inbounds float* %tmp8326, i64 1
+  %tmp8328 = getelementptr inbounds float* %tmp8327, i64 1
+  %tmp8329 = getelementptr inbounds float* %tmp8328, i64 1
+  %tmp8330 = getelementptr inbounds float* %tmp8329, i64 1
+  %tmp8331 = getelementptr inbounds float* %tmp8330, i64 1
+  %tmp8332 = getelementptr inbounds float* %tmp8331, i64 1
+  %tmp8333 = getelementptr inbounds float* %tmp8332, i64 1
+  %tmp8334 = getelementptr inbounds float* %tmp8333, i64 1
+  %tmp8335 = getelementptr inbounds float* %tmp8334, i64 1
+  %tmp8336 = getelementptr inbounds float* %tmp8335, i64 1
+  %tmp8337 = getelementptr inbounds float* %tmp8336, i64 1
+  %tmp8338 = getelementptr inbounds float* %tmp8337, i64 1
+  %tmp8339 = getelementptr inbounds float* %tmp8338, i64 1
+  %tmp8340 = getelementptr inbounds float* %tmp8339, i64 1
+  %tmp8341 = getelementptr inbounds float* %tmp8340, i64 1
+  %tmp8342 = getelementptr inbounds float* %tmp8341, i64 1
+  %tmp8343 = getelementptr inbounds float* %tmp8342, i64 1
+  %tmp8344 = getelementptr inbounds float* %tmp8343, i64 1
+  %tmp8345 = getelementptr inbounds float* %tmp8344, i64 1
+  %tmp8346 = getelementptr inbounds float* %tmp8345, i64 1
+  %tmp8347 = getelementptr inbounds float* %tmp8346, i64 1
+  %tmp8348 = getelementptr inbounds float* %tmp8347, i64 1
+  %tmp8349 = getelementptr inbounds float* %tmp8348, i64 1
+  %tmp8350 = getelementptr inbounds float* %tmp8349, i64 1
+  %tmp8351 = getelementptr inbounds float* %tmp8350, i64 1
+  %tmp8352 = getelementptr inbounds float* %tmp8351, i64 1
+  %tmp8353 = getelementptr inbounds float* %tmp8352, i64 1
+  %tmp8354 = getelementptr inbounds float* %tmp8353, i64 1
+  %tmp8355 = getelementptr inbounds float* %tmp8354, i64 1
+  %tmp8356 = getelementptr inbounds float* %tmp8355, i64 1
+  %tmp8357 = getelementptr inbounds float* %tmp8356, i64 1
+  %tmp8358 = getelementptr inbounds float* %tmp8357, i64 1
+  %tmp8359 = getelementptr inbounds float* %tmp8358, i64 1
+  %tmp8360 = getelementptr inbounds float* %tmp8359, i64 1
+  %tmp8361 = getelementptr inbounds float* %tmp8360, i64 1
+  %tmp8362 = getelementptr inbounds float* %tmp8361, i64 1
+  %tmp8363 = getelementptr inbounds float* %tmp8362, i64 1
+  %tmp8364 = getelementptr inbounds float* %tmp8363, i64 1
+  %tmp8365 = getelementptr inbounds float* %tmp8364, i64 1
+  %tmp8366 = getelementptr inbounds float* %tmp8365, i64 1
+  %tmp8367 = getelementptr inbounds float* %tmp8366, i64 1
+  %tmp8368 = getelementptr inbounds float* %tmp8367, i64 1
+  %tmp8369 = getelementptr inbounds float* %tmp8368, i64 1
+  %tmp8370 = getelementptr inbounds float* %tmp8369, i64 1
+  %tmp8371 = getelementptr inbounds float* %tmp8370, i64 1
+  %tmp8372 = getelementptr inbounds float* %tmp8371, i64 1
+  %tmp8373 = getelementptr inbounds float* %tmp8372, i64 1
+  %tmp8374 = getelementptr inbounds float* %tmp8373, i64 1
+  %tmp8375 = getelementptr inbounds float* %tmp8374, i64 1
+  %tmp8376 = getelementptr inbounds float* %tmp8375, i64 1
+  %tmp8377 = getelementptr inbounds float* %tmp8376, i64 1
+  %tmp8378 = getelementptr inbounds float* %tmp8377, i64 1
+  %tmp8379 = getelementptr inbounds float* %tmp8378, i64 1
+  %tmp8380 = getelementptr inbounds float* %tmp8379, i64 1
+  %tmp8381 = getelementptr inbounds float* %tmp8380, i64 1
+  %tmp8382 = getelementptr inbounds float* %tmp8381, i64 1
+  %tmp8383 = getelementptr inbounds float* %tmp8382, i64 1
+  %tmp8384 = getelementptr inbounds float* %tmp8383, i64 1
+  %tmp8385 = getelementptr inbounds float* %tmp8384, i64 1
+  %tmp8386 = getelementptr inbounds float* %tmp8385, i64 1
+  %tmp8387 = getelementptr inbounds float* %tmp8386, i64 1
+  %tmp8388 = getelementptr inbounds float* %tmp8387, i64 1
+  %tmp8389 = getelementptr inbounds float* %tmp8388, i64 1
+  %tmp8390 = getelementptr inbounds float* %tmp8389, i64 1
+  %tmp8391 = getelementptr inbounds float* %tmp8390, i64 1
+  %tmp8392 = getelementptr inbounds float* %tmp8391, i64 1
+  %tmp8393 = getelementptr inbounds float* %tmp8392, i64 1
+  %tmp8394 = getelementptr inbounds float* %tmp8393, i64 1
+  %tmp8395 = getelementptr inbounds float* %tmp8394, i64 1
+  %tmp8396 = getelementptr inbounds float* %tmp8395, i64 1
+  %tmp8397 = getelementptr inbounds float* %tmp8396, i64 1
+  %tmp8398 = getelementptr inbounds float* %tmp8397, i64 1
+  %tmp8399 = getelementptr inbounds float* %tmp8398, i64 1
+  %tmp8400 = getelementptr inbounds float* %tmp8399, i64 1
+  %tmp8401 = getelementptr inbounds float* %tmp8400, i64 1
+  %tmp8402 = getelementptr inbounds float* %tmp8401, i64 1
+  %tmp8403 = getelementptr inbounds float* %tmp8402, i64 1
+  %tmp8404 = getelementptr inbounds float* %tmp8403, i64 1
+  %tmp8405 = getelementptr inbounds float* %tmp8404, i64 1
+  %tmp8406 = getelementptr inbounds float* %tmp8405, i64 1
+  %tmp8407 = getelementptr inbounds float* %tmp8406, i64 1
+  %tmp8408 = getelementptr inbounds float* %tmp8407, i64 1
+  %tmp8409 = getelementptr inbounds float* %tmp8408, i64 1
+  %tmp8410 = getelementptr inbounds float* %tmp8409, i64 1
+  %tmp8411 = getelementptr inbounds float* %tmp8410, i64 1
+  %tmp8412 = getelementptr inbounds float* %tmp8411, i64 1
+  %tmp8413 = getelementptr inbounds float* %tmp8412, i64 1
+  %tmp8414 = getelementptr inbounds float* %tmp8413, i64 1
+  %tmp8415 = getelementptr inbounds float* %tmp8414, i64 1
+  %tmp8416 = getelementptr inbounds float* %tmp8415, i64 1
+  %tmp8417 = getelementptr inbounds float* %tmp8416, i64 1
+  %tmp8418 = getelementptr inbounds float* %tmp8417, i64 1
+  %tmp8419 = getelementptr inbounds float* %tmp8418, i64 1
+  %tmp8420 = getelementptr inbounds float* %tmp8419, i64 1
+  %tmp8421 = getelementptr inbounds float* %tmp8420, i64 1
+  %tmp8422 = getelementptr inbounds float* %tmp8421, i64 1
+  %tmp8423 = getelementptr inbounds float* %tmp8422, i64 1
+  %tmp8424 = getelementptr inbounds float* %tmp8423, i64 1
+  %tmp8425 = getelementptr inbounds float* %tmp8424, i64 1
+  %tmp8426 = getelementptr inbounds float* %tmp8425, i64 1
+  %tmp8427 = getelementptr inbounds float* %tmp8426, i64 1
+  %tmp8428 = getelementptr inbounds float* %tmp8427, i64 1
+  %tmp8429 = getelementptr inbounds float* %tmp8428, i64 1
+  %tmp8430 = getelementptr inbounds float* %tmp8429, i64 1
+  %tmp8431 = getelementptr inbounds float* %tmp8430, i64 1
+  %tmp8432 = getelementptr inbounds float* %tmp8431, i64 1
+  %tmp8433 = getelementptr inbounds float* %tmp8432, i64 1
+  %tmp8434 = getelementptr inbounds float* %tmp8433, i64 1
+  %tmp8435 = getelementptr inbounds float* %tmp8434, i64 1
+  %tmp8436 = getelementptr inbounds float* %tmp8435, i64 1
+  %tmp8437 = getelementptr inbounds float* %tmp8436, i64 1
+  %tmp8438 = getelementptr inbounds float* %tmp8437, i64 1
+  %tmp8439 = getelementptr inbounds float* %tmp8438, i64 1
+  %tmp8440 = getelementptr inbounds float* %tmp8439, i64 1
+  %tmp8441 = getelementptr inbounds float* %tmp8440, i64 1
+  %tmp8442 = getelementptr inbounds float* %tmp8441, i64 1
+  %tmp8443 = getelementptr inbounds float* %tmp8442, i64 1
+  %tmp8444 = getelementptr inbounds float* %tmp8443, i64 1
+  %tmp8445 = getelementptr inbounds float* %tmp8444, i64 1
+  %tmp8446 = getelementptr inbounds float* %tmp8445, i64 1
+  %tmp8447 = getelementptr inbounds float* %tmp8446, i64 1
+  %tmp8448 = getelementptr inbounds float* %tmp8447, i64 1
+  %tmp8449 = getelementptr inbounds float* %tmp8448, i64 1
+  %tmp8450 = getelementptr inbounds float* %tmp8449, i64 1
+  %tmp8451 = getelementptr inbounds float* %tmp8450, i64 1
+  %tmp8452 = getelementptr inbounds float* %tmp8451, i64 1
+  %tmp8453 = getelementptr inbounds float* %tmp8452, i64 1
+  %tmp8454 = getelementptr inbounds float* %tmp8453, i64 1
+  %tmp8455 = getelementptr inbounds float* %tmp8454, i64 1
+  %tmp8456 = getelementptr inbounds float* %tmp8455, i64 1
+  %tmp8457 = getelementptr inbounds float* %tmp8456, i64 1
+  %tmp8458 = getelementptr inbounds float* %tmp8457, i64 1
+  %tmp8459 = getelementptr inbounds float* %tmp8458, i64 1
+  %tmp8460 = getelementptr inbounds float* %tmp8459, i64 1
+  %tmp8461 = getelementptr inbounds float* %tmp8460, i64 1
+  %tmp8462 = getelementptr inbounds float* %tmp8461, i64 1
+  %tmp8463 = getelementptr inbounds float* %tmp8462, i64 1
+  %tmp8464 = getelementptr inbounds float* %tmp8463, i64 1
+  %tmp8465 = getelementptr inbounds float* %tmp8464, i64 1
+  %tmp8466 = getelementptr inbounds float* %tmp8465, i64 1
+  %tmp8467 = getelementptr inbounds float* %tmp8466, i64 1
+  %tmp8468 = getelementptr inbounds float* %tmp8467, i64 1
+  %tmp8469 = getelementptr inbounds float* %tmp8468, i64 1
+  %tmp8470 = getelementptr inbounds float* %tmp8469, i64 1
+  %tmp8471 = getelementptr inbounds float* %tmp8470, i64 1
+  %tmp8472 = getelementptr inbounds float* %tmp8471, i64 1
+  %tmp8473 = getelementptr inbounds float* %tmp8472, i64 1
+  %tmp8474 = getelementptr inbounds float* %tmp8473, i64 1
+  %tmp8475 = getelementptr inbounds float* %tmp8474, i64 1
+  %tmp8476 = getelementptr inbounds float* %tmp8475, i64 1
+  %tmp8477 = getelementptr inbounds float* %tmp8476, i64 1
+  %tmp8478 = getelementptr inbounds float* %tmp8477, i64 1
+  %tmp8479 = getelementptr inbounds float* %tmp8478, i64 1
+  %tmp8480 = getelementptr inbounds float* %tmp8479, i64 1
+  %tmp8481 = getelementptr inbounds float* %tmp8480, i64 1
+  %tmp8482 = getelementptr inbounds float* %tmp8481, i64 1
+  %tmp8483 = getelementptr inbounds float* %tmp8482, i64 1
+  %tmp8484 = getelementptr inbounds float* %tmp8483, i64 1
+  %tmp8485 = getelementptr inbounds float* %tmp8484, i64 1
+  %tmp8486 = getelementptr inbounds float* %tmp8485, i64 1
+  %tmp8487 = getelementptr inbounds float* %tmp8486, i64 1
+  %tmp8488 = getelementptr inbounds float* %tmp8487, i64 1
+  %tmp8489 = getelementptr inbounds float* %tmp8488, i64 1
+  %tmp8490 = getelementptr inbounds float* %tmp8489, i64 1
+  %tmp8491 = getelementptr inbounds float* %tmp8490, i64 1
+  %tmp8492 = getelementptr inbounds float* %tmp8491, i64 1
+  %tmp8493 = getelementptr inbounds float* %tmp8492, i64 1
+  %tmp8494 = getelementptr inbounds float* %tmp8493, i64 1
+  %tmp8495 = getelementptr inbounds float* %tmp8494, i64 1
+  %tmp8496 = getelementptr inbounds float* %tmp8495, i64 1
+  %tmp8497 = getelementptr inbounds float* %tmp8496, i64 1
+  %tmp8498 = getelementptr inbounds float* %tmp8497, i64 1
+  %tmp8499 = getelementptr inbounds float* %tmp8498, i64 1
+  %tmp8500 = getelementptr inbounds float* %tmp8499, i64 1
+  %tmp8501 = getelementptr inbounds float* %tmp8500, i64 1
+  %tmp8502 = getelementptr inbounds float* %tmp8501, i64 1
+  %tmp8503 = getelementptr inbounds float* %tmp8502, i64 1
+  %tmp8504 = getelementptr inbounds float* %tmp8503, i64 1
+  %tmp8505 = getelementptr inbounds float* %tmp8504, i64 1
+  %tmp8506 = getelementptr inbounds float* %tmp8505, i64 1
+  %tmp8507 = getelementptr inbounds float* %tmp8506, i64 1
+  %tmp8508 = getelementptr inbounds float* %tmp8507, i64 1
+  %tmp8509 = getelementptr inbounds float* %tmp8508, i64 1
+  %tmp8510 = getelementptr inbounds float* %tmp8509, i64 1
+  %tmp8511 = getelementptr inbounds float* %tmp8510, i64 1
+  %tmp8512 = getelementptr inbounds float* %tmp8511, i64 1
+  %tmp8513 = getelementptr inbounds float* %tmp8512, i64 1
+  %tmp8514 = getelementptr inbounds float* %tmp8513, i64 1
+  %tmp8515 = getelementptr inbounds float* %tmp8514, i64 1
+  %tmp8516 = getelementptr inbounds float* %tmp8515, i64 1
+  %tmp8517 = getelementptr inbounds float* %tmp8516, i64 1
+  %tmp8518 = getelementptr inbounds float* %tmp8517, i64 1
+  %tmp8519 = getelementptr inbounds float* %tmp8518, i64 1
+  %tmp8520 = getelementptr inbounds float* %tmp8519, i64 1
+  %tmp8521 = getelementptr inbounds float* %tmp8520, i64 1
+  %tmp8522 = getelementptr inbounds float* %tmp8521, i64 1
+  %tmp8523 = getelementptr inbounds float* %tmp8522, i64 1
+  %tmp8524 = getelementptr inbounds float* %tmp8523, i64 1
+  %tmp8525 = getelementptr inbounds float* %tmp8524, i64 1
+  %tmp8526 = getelementptr inbounds float* %tmp8525, i64 1
+  %tmp8527 = getelementptr inbounds float* %tmp8526, i64 1
+  %tmp8528 = getelementptr inbounds float* %tmp8527, i64 1
+  %tmp8529 = getelementptr inbounds float* %tmp8528, i64 1
+  %tmp8530 = getelementptr inbounds float* %tmp8529, i64 1
+  %tmp8531 = getelementptr inbounds float* %tmp8530, i64 1
+  %tmp8532 = getelementptr inbounds float* %tmp8531, i64 1
+  %tmp8533 = getelementptr inbounds float* %tmp8532, i64 1
+  %tmp8534 = getelementptr inbounds float* %tmp8533, i64 1
+  %tmp8535 = getelementptr inbounds float* %tmp8534, i64 1
+  %tmp8536 = getelementptr inbounds float* %tmp8535, i64 1
+  %tmp8537 = getelementptr inbounds float* %tmp8536, i64 1
+  %tmp8538 = getelementptr inbounds float* %tmp8537, i64 1
+  %tmp8539 = getelementptr inbounds float* %tmp8538, i64 1
+  %tmp8540 = getelementptr inbounds float* %tmp8539, i64 1
+  %tmp8541 = getelementptr inbounds float* %tmp8540, i64 1
+  %tmp8542 = getelementptr inbounds float* %tmp8541, i64 1
+  %tmp8543 = getelementptr inbounds float* %tmp8542, i64 1
+  %tmp8544 = getelementptr inbounds float* %tmp8543, i64 1
+  %tmp8545 = getelementptr inbounds float* %tmp8544, i64 1
+  %tmp8546 = getelementptr inbounds float* %tmp8545, i64 1
+  %tmp8547 = getelementptr inbounds float* %tmp8546, i64 1
+  %tmp8548 = getelementptr inbounds float* %tmp8547, i64 1
+  %tmp8549 = getelementptr inbounds float* %tmp8548, i64 1
+  %tmp8550 = getelementptr inbounds float* %tmp8549, i64 1
+  %tmp8551 = getelementptr inbounds float* %tmp8550, i64 1
+  %tmp8552 = getelementptr inbounds float* %tmp8551, i64 1
+  %tmp8553 = getelementptr inbounds float* %tmp8552, i64 1
+  %tmp8554 = getelementptr inbounds float* %tmp8553, i64 1
+  %tmp8555 = getelementptr inbounds float* %tmp8554, i64 1
+  %tmp8556 = getelementptr inbounds float* %tmp8555, i64 1
+  %tmp8557 = getelementptr inbounds float* %tmp8556, i64 1
+  %tmp8558 = getelementptr inbounds float* %tmp8557, i64 1
+  %tmp8559 = getelementptr inbounds float* %tmp8558, i64 1
+  %tmp8560 = getelementptr inbounds float* %tmp8559, i64 1
+  %tmp8561 = getelementptr inbounds float* %tmp8560, i64 1
+  %tmp8562 = getelementptr inbounds float* %tmp8561, i64 1
+  %tmp8563 = getelementptr inbounds float* %tmp8562, i64 1
+  %tmp8564 = getelementptr inbounds float* %tmp8563, i64 1
+  %tmp8565 = getelementptr inbounds float* %tmp8564, i64 1
+  %tmp8566 = getelementptr inbounds float* %tmp8565, i64 1
+  %tmp8567 = getelementptr inbounds float* %tmp8566, i64 1
+  %tmp8568 = getelementptr inbounds float* %tmp8567, i64 1
+  %tmp8569 = getelementptr inbounds float* %tmp8568, i64 1
+  %tmp8570 = getelementptr inbounds float* %tmp8569, i64 1
+  %tmp8571 = getelementptr inbounds float* %tmp8570, i64 1
+  %tmp8572 = getelementptr inbounds float* %tmp8571, i64 1
+  %tmp8573 = getelementptr inbounds float* %tmp8572, i64 1
+  %tmp8574 = getelementptr inbounds float* %tmp8573, i64 1
+  %tmp8575 = getelementptr inbounds float* %tmp8574, i64 1
+  %tmp8576 = getelementptr inbounds float* %tmp8575, i64 1
+  %tmp8577 = getelementptr inbounds float* %tmp8576, i64 1
+  %tmp8578 = getelementptr inbounds float* %tmp8577, i64 1
+  %tmp8579 = getelementptr inbounds float* %tmp8578, i64 1
+  %tmp8580 = getelementptr inbounds float* %tmp8579, i64 1
+  %tmp8581 = getelementptr inbounds float* %tmp8580, i64 1
+  %tmp8582 = getelementptr inbounds float* %tmp8581, i64 1
+  %tmp8583 = getelementptr inbounds float* %tmp8582, i64 1
+  %tmp8584 = getelementptr inbounds float* %tmp8583, i64 1
+  %tmp8585 = getelementptr inbounds float* %tmp8584, i64 1
+  %tmp8586 = getelementptr inbounds float* %tmp8585, i64 1
+  %tmp8587 = getelementptr inbounds float* %tmp8586, i64 1
+  %tmp8588 = getelementptr inbounds float* %tmp8587, i64 1
+  %tmp8589 = getelementptr inbounds float* %tmp8588, i64 1
+  %tmp8590 = getelementptr inbounds float* %tmp8589, i64 1
+  %tmp8591 = getelementptr inbounds float* %tmp8590, i64 1
+  %tmp8592 = getelementptr inbounds float* %tmp8591, i64 1
+  %tmp8593 = getelementptr inbounds float* %tmp8592, i64 1
+  %tmp8594 = getelementptr inbounds float* %tmp8593, i64 1
+  %tmp8595 = getelementptr inbounds float* %tmp8594, i64 1
+  %tmp8596 = getelementptr inbounds float* %tmp8595, i64 1
+  %tmp8597 = getelementptr inbounds float* %tmp8596, i64 1
+  %tmp8598 = getelementptr inbounds float* %tmp8597, i64 1
+  %tmp8599 = getelementptr inbounds float* %tmp8598, i64 1
+  %tmp8600 = getelementptr inbounds float* %tmp8599, i64 1
+  %tmp8601 = getelementptr inbounds float* %tmp8600, i64 1
+  %tmp8602 = getelementptr inbounds float* %tmp8601, i64 1
+  %tmp8603 = getelementptr inbounds float* %tmp8602, i64 1
+  %tmp8604 = getelementptr inbounds float* %tmp8603, i64 1
+  %tmp8605 = getelementptr inbounds float* %tmp8604, i64 1
+  %tmp8606 = getelementptr inbounds float* %tmp8605, i64 1
+  %tmp8607 = getelementptr inbounds float* %tmp8606, i64 1
+  %tmp8608 = getelementptr inbounds float* %tmp8607, i64 1
+  %tmp8609 = getelementptr inbounds float* %tmp8608, i64 1
+  %tmp8610 = getelementptr inbounds float* %tmp8609, i64 1
+  %tmp8611 = getelementptr inbounds float* %tmp8610, i64 1
+  %tmp8612 = getelementptr inbounds float* %tmp8611, i64 1
+  %tmp8613 = getelementptr inbounds float* %tmp8612, i64 1
+  %tmp8614 = getelementptr inbounds float* %tmp8613, i64 1
+  %tmp8615 = getelementptr inbounds float* %tmp8614, i64 1
+  %tmp8616 = getelementptr inbounds float* %tmp8615, i64 1
+  %tmp8617 = getelementptr inbounds float* %tmp8616, i64 1
+  %tmp8618 = getelementptr inbounds float* %tmp8617, i64 1
+  %tmp8619 = getelementptr inbounds float* %tmp8618, i64 1
+  %tmp8620 = getelementptr inbounds float* %tmp8619, i64 1
+  %tmp8621 = getelementptr inbounds float* %tmp8620, i64 1
+  %tmp8622 = getelementptr inbounds float* %tmp8621, i64 1
+  %tmp8623 = getelementptr inbounds float* %tmp8622, i64 1
+  %tmp8624 = getelementptr inbounds float* %tmp8623, i64 1
+  %tmp8625 = getelementptr inbounds float* %tmp8624, i64 1
+  %tmp8626 = getelementptr inbounds float* %tmp8625, i64 1
+  %tmp8627 = getelementptr inbounds float* %tmp8626, i64 1
+  %tmp8628 = getelementptr inbounds float* %tmp8627, i64 1
+  %tmp8629 = getelementptr inbounds float* %tmp8628, i64 1
+  %tmp8630 = getelementptr inbounds float* %tmp8629, i64 1
+  %tmp8631 = getelementptr inbounds float* %tmp8630, i64 1
+  %tmp8632 = getelementptr inbounds float* %tmp8631, i64 1
+  %tmp8633 = getelementptr inbounds float* %tmp8632, i64 1
+  %tmp8634 = getelementptr inbounds float* %tmp8633, i64 1
+  %tmp8635 = getelementptr inbounds float* %tmp8634, i64 1
+  %tmp8636 = getelementptr inbounds float* %tmp8635, i64 1
+  %tmp8637 = getelementptr inbounds float* %tmp8636, i64 1
+  %tmp8638 = getelementptr inbounds float* %tmp8637, i64 1
+  %tmp8639 = getelementptr inbounds float* %tmp8638, i64 1
+  %tmp8640 = getelementptr inbounds float* %tmp8639, i64 1
+  %tmp8641 = getelementptr inbounds float* %tmp8640, i64 1
+  %tmp8642 = getelementptr inbounds float* %tmp8641, i64 1
+  %tmp8643 = getelementptr inbounds float* %tmp8642, i64 1
+  %tmp8644 = getelementptr inbounds float* %tmp8643, i64 1
+  %tmp8645 = getelementptr inbounds float* %tmp8644, i64 1
+  %tmp8646 = getelementptr inbounds float* %tmp8645, i64 1
+  %tmp8647 = getelementptr inbounds float* %tmp8646, i64 1
+  %tmp8648 = getelementptr inbounds float* %tmp8647, i64 1
+  %tmp8649 = getelementptr inbounds float* %tmp8648, i64 1
+  %tmp8650 = getelementptr inbounds float* %tmp8649, i64 1
+  %tmp8651 = getelementptr inbounds float* %tmp8650, i64 1
+  %tmp8652 = getelementptr inbounds float* %tmp8651, i64 1
+  %tmp8653 = getelementptr inbounds float* %tmp8652, i64 1
+  %tmp8654 = getelementptr inbounds float* %tmp8653, i64 1
+  %tmp8655 = getelementptr inbounds float* %tmp8654, i64 1
+  %tmp8656 = getelementptr inbounds float* %tmp8655, i64 1
+  %tmp8657 = getelementptr inbounds float* %tmp8656, i64 1
+  %tmp8658 = getelementptr inbounds float* %tmp8657, i64 1
+  %tmp8659 = getelementptr inbounds float* %tmp8658, i64 1
+  %tmp8660 = getelementptr inbounds float* %tmp8659, i64 1
+  %tmp8661 = getelementptr inbounds float* %tmp8660, i64 1
+  %tmp8662 = getelementptr inbounds float* %tmp8661, i64 1
+  %tmp8663 = getelementptr inbounds float* %tmp8662, i64 1
+  %tmp8664 = getelementptr inbounds float* %tmp8663, i64 1
+  %tmp8665 = getelementptr inbounds float* %tmp8664, i64 1
+  %tmp8666 = getelementptr inbounds float* %tmp8665, i64 1
+  %tmp8667 = getelementptr inbounds float* %tmp8666, i64 1
+  %tmp8668 = getelementptr inbounds float* %tmp8667, i64 1
+  %tmp8669 = getelementptr inbounds float* %tmp8668, i64 1
+  %tmp8670 = getelementptr inbounds float* %tmp8669, i64 1
+  %tmp8671 = getelementptr inbounds float* %tmp8670, i64 1
+  %tmp8672 = getelementptr inbounds float* %tmp8671, i64 1
+  %tmp8673 = getelementptr inbounds float* %tmp8672, i64 1
+  %tmp8674 = getelementptr inbounds float* %tmp8673, i64 1
+  %tmp8675 = getelementptr inbounds float* %tmp8674, i64 1
+  %tmp8676 = getelementptr inbounds float* %tmp8675, i64 1
+  %tmp8677 = getelementptr inbounds float* %tmp8676, i64 1
+  %tmp8678 = getelementptr inbounds float* %tmp8677, i64 1
+  %tmp8679 = getelementptr inbounds float* %tmp8678, i64 1
+  %tmp8680 = getelementptr inbounds float* %tmp8679, i64 1
+  %tmp8681 = getelementptr inbounds float* %tmp8680, i64 1
+  %tmp8682 = getelementptr inbounds float* %tmp8681, i64 1
+  %tmp8683 = getelementptr inbounds float* %tmp8682, i64 1
+  %tmp8684 = getelementptr inbounds float* %tmp8683, i64 1
+  %tmp8685 = getelementptr inbounds float* %tmp8684, i64 1
+  %tmp8686 = getelementptr inbounds float* %tmp8685, i64 1
+  %tmp8687 = getelementptr inbounds float* %tmp8686, i64 1
+  %tmp8688 = getelementptr inbounds float* %tmp8687, i64 1
+  %tmp8689 = getelementptr inbounds float* %tmp8688, i64 1
+  %tmp8690 = getelementptr inbounds float* %tmp8689, i64 1
+  %tmp8691 = getelementptr inbounds float* %tmp8690, i64 1
+  %tmp8692 = getelementptr inbounds float* %tmp8691, i64 1
+  %tmp8693 = getelementptr inbounds float* %tmp8692, i64 1
+  %tmp8694 = getelementptr inbounds float* %tmp8693, i64 1
+  %tmp8695 = getelementptr inbounds float* %tmp8694, i64 1
+  %tmp8696 = getelementptr inbounds float* %tmp8695, i64 1
+  %tmp8697 = getelementptr inbounds float* %tmp8696, i64 1
+  %tmp8698 = getelementptr inbounds float* %tmp8697, i64 1
+  %tmp8699 = getelementptr inbounds float* %tmp8698, i64 1
+  %tmp8700 = getelementptr inbounds float* %tmp8699, i64 1
+  %tmp8701 = getelementptr inbounds float* %tmp8700, i64 1
+  %tmp8702 = getelementptr inbounds float* %tmp8701, i64 1
+  %tmp8703 = getelementptr inbounds float* %tmp8702, i64 1
+  %tmp8704 = getelementptr inbounds float* %tmp8703, i64 1
+  %tmp8705 = getelementptr inbounds float* %tmp8704, i64 1
+  %tmp8706 = getelementptr inbounds float* %tmp8705, i64 1
+  %tmp8707 = getelementptr inbounds float* %tmp8706, i64 1
+  %tmp8708 = getelementptr inbounds float* %tmp8707, i64 1
+  %tmp8709 = getelementptr inbounds float* %tmp8708, i64 1
+  %tmp8710 = getelementptr inbounds float* %tmp8709, i64 1
+  %tmp8711 = getelementptr inbounds float* %tmp8710, i64 1
+  %tmp8712 = getelementptr inbounds float* %tmp8711, i64 1
+  %tmp8713 = getelementptr inbounds float* %tmp8712, i64 1
+  %tmp8714 = getelementptr inbounds float* %tmp8713, i64 1
+  %tmp8715 = getelementptr inbounds float* %tmp8714, i64 1
+  %tmp8716 = getelementptr inbounds float* %tmp8715, i64 1
+  %tmp8717 = getelementptr inbounds float* %tmp8716, i64 1
+  %tmp8718 = getelementptr inbounds float* %tmp8717, i64 1
+  %tmp8719 = getelementptr inbounds float* %tmp8718, i64 1
+  %tmp8720 = getelementptr inbounds float* %tmp8719, i64 1
+  %tmp8721 = getelementptr inbounds float* %tmp8720, i64 1
+  %tmp8722 = getelementptr inbounds float* %tmp8721, i64 1
+  %tmp8723 = getelementptr inbounds float* %tmp8722, i64 1
+  %tmp8724 = getelementptr inbounds float* %tmp8723, i64 1
+  %tmp8725 = getelementptr inbounds float* %tmp8724, i64 1
+  %tmp8726 = getelementptr inbounds float* %tmp8725, i64 1
+  %tmp8727 = getelementptr inbounds float* %tmp8726, i64 1
+  %tmp8728 = getelementptr inbounds float* %tmp8727, i64 1
+  %tmp8729 = getelementptr inbounds float* %tmp8728, i64 1
+  %tmp8730 = getelementptr inbounds float* %tmp8729, i64 1
+  %tmp8731 = getelementptr inbounds float* %tmp8730, i64 1
+  %tmp8732 = getelementptr inbounds float* %tmp8731, i64 1
+  %tmp8733 = getelementptr inbounds float* %tmp8732, i64 1
+  %tmp8734 = getelementptr inbounds float* %tmp8733, i64 1
+  %tmp8735 = getelementptr inbounds float* %tmp8734, i64 1
+  %tmp8736 = getelementptr inbounds float* %tmp8735, i64 1
+  %tmp8737 = getelementptr inbounds float* %tmp8736, i64 1
+  %tmp8738 = getelementptr inbounds float* %tmp8737, i64 1
+  %tmp8739 = getelementptr inbounds float* %tmp8738, i64 1
+  %tmp8740 = getelementptr inbounds float* %tmp8739, i64 1
+  %tmp8741 = getelementptr inbounds float* %tmp8740, i64 1
+  %tmp8742 = getelementptr inbounds float* %tmp8741, i64 1
+  %tmp8743 = getelementptr inbounds float* %tmp8742, i64 1
+  %tmp8744 = getelementptr inbounds float* %tmp8743, i64 1
+  %tmp8745 = getelementptr inbounds float* %tmp8744, i64 1
+  %tmp8746 = getelementptr inbounds float* %tmp8745, i64 1
+  %tmp8747 = getelementptr inbounds float* %tmp8746, i64 1
+  %tmp8748 = getelementptr inbounds float* %tmp8747, i64 1
+  %tmp8749 = getelementptr inbounds float* %tmp8748, i64 1
+  %tmp8750 = getelementptr inbounds float* %tmp8749, i64 1
+  %tmp8751 = getelementptr inbounds float* %tmp8750, i64 1
+  %tmp8752 = getelementptr inbounds float* %tmp8751, i64 1
+  %tmp8753 = getelementptr inbounds float* %tmp8752, i64 1
+  %tmp8754 = getelementptr inbounds float* %tmp8753, i64 1
+  %tmp8755 = getelementptr inbounds float* %tmp8754, i64 1
+  %tmp8756 = getelementptr inbounds float* %tmp8755, i64 1
+  %tmp8757 = getelementptr inbounds float* %tmp8756, i64 1
+  %tmp8758 = getelementptr inbounds float* %tmp8757, i64 1
+  %tmp8759 = getelementptr inbounds float* %tmp8758, i64 1
+  %tmp8760 = getelementptr inbounds float* %tmp8759, i64 1
+  %tmp8761 = getelementptr inbounds float* %tmp8760, i64 1
+  %tmp8762 = getelementptr inbounds float* %tmp8761, i64 1
+  %tmp8763 = getelementptr inbounds float* %tmp8762, i64 1
+  %tmp8764 = getelementptr inbounds float* %tmp8763, i64 1
+  %tmp8765 = getelementptr inbounds float* %tmp8764, i64 1
+  %tmp8766 = getelementptr inbounds float* %tmp8765, i64 1
+  %tmp8767 = getelementptr inbounds float* %tmp8766, i64 1
+  %tmp8768 = getelementptr inbounds float* %tmp8767, i64 1
+  %tmp8769 = getelementptr inbounds float* %tmp8768, i64 1
+  %tmp8770 = getelementptr inbounds float* %tmp8769, i64 1
+  %tmp8771 = getelementptr inbounds float* %tmp8770, i64 1
+  %tmp8772 = getelementptr inbounds float* %tmp8771, i64 1
+  %tmp8773 = getelementptr inbounds float* %tmp8772, i64 1
+  %tmp8774 = getelementptr inbounds float* %tmp8773, i64 1
+  %tmp8775 = getelementptr inbounds float* %tmp8774, i64 1
+  %tmp8776 = getelementptr inbounds float* %tmp8775, i64 1
+  %tmp8777 = getelementptr inbounds float* %tmp8776, i64 1
+  %tmp8778 = getelementptr inbounds float* %tmp8777, i64 1
+  %tmp8779 = getelementptr inbounds float* %tmp8778, i64 1
+  %tmp8780 = getelementptr inbounds float* %tmp8779, i64 1
+  %tmp8781 = getelementptr inbounds float* %tmp8780, i64 1
+  %tmp8782 = getelementptr inbounds float* %tmp8781, i64 1
+  %tmp8783 = getelementptr inbounds float* %tmp8782, i64 1
+  %tmp8784 = getelementptr inbounds float* %tmp8783, i64 1
+  %tmp8785 = getelementptr inbounds float* %tmp8784, i64 1
+  %tmp8786 = getelementptr inbounds float* %tmp8785, i64 1
+  %tmp8787 = getelementptr inbounds float* %tmp8786, i64 1
+  %tmp8788 = getelementptr inbounds float* %tmp8787, i64 1
+  %tmp8789 = getelementptr inbounds float* %tmp8788, i64 1
+  %tmp8790 = getelementptr inbounds float* %tmp8789, i64 1
+  %tmp8791 = getelementptr inbounds float* %tmp8790, i64 1
+  %tmp8792 = getelementptr inbounds float* %tmp8791, i64 1
+  %tmp8793 = getelementptr inbounds float* %tmp8792, i64 1
+  %tmp8794 = getelementptr inbounds float* %tmp8793, i64 1
+  %tmp8795 = getelementptr inbounds float* %tmp8794, i64 1
+  %tmp8796 = getelementptr inbounds float* %tmp8795, i64 1
+  %tmp8797 = getelementptr inbounds float* %tmp8796, i64 1
+  %tmp8798 = getelementptr inbounds float* %tmp8797, i64 1
+  %tmp8799 = getelementptr inbounds float* %tmp8798, i64 1
+  %tmp8800 = getelementptr inbounds float* %tmp8799, i64 1
+  %tmp8801 = getelementptr inbounds float* %tmp8800, i64 1
+  %tmp8802 = getelementptr inbounds float* %tmp8801, i64 1
+  %tmp8803 = getelementptr inbounds float* %tmp8802, i64 1
+  %tmp8804 = getelementptr inbounds float* %tmp8803, i64 1
+  %tmp8805 = getelementptr inbounds float* %tmp8804, i64 1
+  %tmp8806 = getelementptr inbounds float* %tmp8805, i64 1
+  %tmp8807 = getelementptr inbounds float* %tmp8806, i64 1
+  %tmp8808 = getelementptr inbounds float* %tmp8807, i64 1
+  %tmp8809 = getelementptr inbounds float* %tmp8808, i64 1
+  %tmp8810 = getelementptr inbounds float* %tmp8809, i64 1
+  %tmp8811 = getelementptr inbounds float* %tmp8810, i64 1
+  %tmp8812 = getelementptr inbounds float* %tmp8811, i64 1
+  %tmp8813 = getelementptr inbounds float* %tmp8812, i64 1
+  %tmp8814 = getelementptr inbounds float* %tmp8813, i64 1
+  %tmp8815 = getelementptr inbounds float* %tmp8814, i64 1
+  %tmp8816 = getelementptr inbounds float* %tmp8815, i64 1
+  %tmp8817 = getelementptr inbounds float* %tmp8816, i64 1
+  %tmp8818 = getelementptr inbounds float* %tmp8817, i64 1
+  %tmp8819 = getelementptr inbounds float* %tmp8818, i64 1
+  %tmp8820 = getelementptr inbounds float* %tmp8819, i64 1
+  %tmp8821 = getelementptr inbounds float* %tmp8820, i64 1
+  %tmp8822 = getelementptr inbounds float* %tmp8821, i64 1
+  %tmp8823 = getelementptr inbounds float* %tmp8822, i64 1
+  %tmp8824 = getelementptr inbounds float* %tmp8823, i64 1
+  %tmp8825 = getelementptr inbounds float* %tmp8824, i64 1
+  %tmp8826 = getelementptr inbounds float* %tmp8825, i64 1
+  %tmp8827 = getelementptr inbounds float* %tmp8826, i64 1
+  %tmp8828 = getelementptr inbounds float* %tmp8827, i64 1
+  %tmp8829 = getelementptr inbounds float* %tmp8828, i64 1
+  %tmp8830 = getelementptr inbounds float* %tmp8829, i64 1
+  %tmp8831 = getelementptr inbounds float* %tmp8830, i64 1
+  %tmp8832 = getelementptr inbounds float* %tmp8831, i64 1
+  %tmp8833 = getelementptr inbounds float* %tmp8832, i64 1
+  %tmp8834 = getelementptr inbounds float* %tmp8833, i64 1
+  %tmp8835 = getelementptr inbounds float* %tmp8834, i64 1
+  %tmp8836 = getelementptr inbounds float* %tmp8835, i64 1
+  %tmp8837 = getelementptr inbounds float* %tmp8836, i64 1
+  %tmp8838 = getelementptr inbounds float* %tmp8837, i64 1
+  %tmp8839 = getelementptr inbounds float* %tmp8838, i64 1
+  %tmp8840 = getelementptr inbounds float* %tmp8839, i64 1
+  %tmp8841 = getelementptr inbounds float* %tmp8840, i64 1
+  %tmp8842 = getelementptr inbounds float* %tmp8841, i64 1
+  %tmp8843 = getelementptr inbounds float* %tmp8842, i64 1
+  %tmp8844 = getelementptr inbounds float* %tmp8843, i64 1
+  %tmp8845 = getelementptr inbounds float* %tmp8844, i64 1
+  %tmp8846 = getelementptr inbounds float* %tmp8845, i64 1
+  %tmp8847 = getelementptr inbounds float* %tmp8846, i64 1
+  %tmp8848 = getelementptr inbounds float* %tmp8847, i64 1
+  %tmp8849 = getelementptr inbounds float* %tmp8848, i64 1
+  %tmp8850 = getelementptr inbounds float* %tmp8849, i64 1
+  %tmp8851 = getelementptr inbounds float* %tmp8850, i64 1
+  %tmp8852 = getelementptr inbounds float* %tmp8851, i64 1
+  %tmp8853 = getelementptr inbounds float* %tmp8852, i64 1
+  %tmp8854 = getelementptr inbounds float* %tmp8853, i64 1
+  %tmp8855 = getelementptr inbounds float* %tmp8854, i64 1
+  %tmp8856 = getelementptr inbounds float* %tmp8855, i64 1
+  %tmp8857 = getelementptr inbounds float* %tmp8856, i64 1
+  %tmp8858 = getelementptr inbounds float* %tmp8857, i64 1
+  %tmp8859 = getelementptr inbounds float* %tmp8858, i64 1
+  %tmp8860 = getelementptr inbounds float* %tmp8859, i64 1
+  %tmp8861 = getelementptr inbounds float* %tmp8860, i64 1
+  %tmp8862 = getelementptr inbounds float* %tmp8861, i64 1
+  %tmp8863 = getelementptr inbounds float* %tmp8862, i64 1
+  %tmp8864 = getelementptr inbounds float* %tmp8863, i64 1
+  %tmp8865 = getelementptr inbounds float* %tmp8864, i64 1
+  %tmp8866 = getelementptr inbounds float* %tmp8865, i64 1
+  %tmp8867 = getelementptr inbounds float* %tmp8866, i64 1
+  %tmp8868 = getelementptr inbounds float* %tmp8867, i64 1
+  %tmp8869 = getelementptr inbounds float* %tmp8868, i64 1
+  %tmp8870 = getelementptr inbounds float* %tmp8869, i64 1
+  %tmp8871 = getelementptr inbounds float* %tmp8870, i64 1
+  %tmp8872 = getelementptr inbounds float* %tmp8871, i64 1
+  %tmp8873 = getelementptr inbounds float* %tmp8872, i64 1
+  %tmp8874 = getelementptr inbounds float* %tmp8873, i64 1
+  %tmp8875 = getelementptr inbounds float* %tmp8874, i64 1
+  %tmp8876 = getelementptr inbounds float* %tmp8875, i64 1
+  %tmp8877 = getelementptr inbounds float* %tmp8876, i64 1
+  %tmp8878 = getelementptr inbounds float* %tmp8877, i64 1
+  %tmp8879 = getelementptr inbounds float* %tmp8878, i64 1
+  %tmp8880 = getelementptr inbounds float* %tmp8879, i64 1
+  %tmp8881 = getelementptr inbounds float* %tmp8880, i64 1
+  %tmp8882 = getelementptr inbounds float* %tmp8881, i64 1
+  %tmp8883 = getelementptr inbounds float* %tmp8882, i64 1
+  %tmp8884 = getelementptr inbounds float* %tmp8883, i64 1
+  %tmp8885 = getelementptr inbounds float* %tmp8884, i64 1
+  %tmp8886 = getelementptr inbounds float* %tmp8885, i64 1
+  %tmp8887 = getelementptr inbounds float* %tmp8886, i64 1
+  %tmp8888 = getelementptr inbounds float* %tmp8887, i64 1
+  %tmp8889 = getelementptr inbounds float* %tmp8888, i64 1
+  %tmp8890 = getelementptr inbounds float* %tmp8889, i64 1
+  %tmp8891 = getelementptr inbounds float* %tmp8890, i64 1
+  %tmp8892 = getelementptr inbounds float* %tmp8891, i64 1
+  %tmp8893 = getelementptr inbounds float* %tmp8892, i64 1
+  %tmp8894 = getelementptr inbounds float* %tmp8893, i64 1
+  %tmp8895 = getelementptr inbounds float* %tmp8894, i64 1
+  %tmp8896 = getelementptr inbounds float* %tmp8895, i64 1
+  %tmp8897 = getelementptr inbounds float* %tmp8896, i64 1
+  %tmp8898 = getelementptr inbounds float* %tmp8897, i64 1
+  %tmp8899 = getelementptr inbounds float* %tmp8898, i64 1
+  %tmp8900 = getelementptr inbounds float* %tmp8899, i64 1
+  %tmp8901 = getelementptr inbounds float* %tmp8900, i64 1
+  %tmp8902 = getelementptr inbounds float* %tmp8901, i64 1
+  %tmp8903 = getelementptr inbounds float* %tmp8902, i64 1
+  %tmp8904 = getelementptr inbounds float* %tmp8903, i64 1
+  %tmp8905 = getelementptr inbounds float* %tmp8904, i64 1
+  %tmp8906 = getelementptr inbounds float* %tmp8905, i64 1
+  %tmp8907 = getelementptr inbounds float* %tmp8906, i64 1
+  %tmp8908 = getelementptr inbounds float* %tmp8907, i64 1
+  %tmp8909 = getelementptr inbounds float* %tmp8908, i64 1
+  %tmp8910 = getelementptr inbounds float* %tmp8909, i64 1
+  %tmp8911 = getelementptr inbounds float* %tmp8910, i64 1
+  %tmp8912 = getelementptr inbounds float* %tmp8911, i64 1
+  %tmp8913 = getelementptr inbounds float* %tmp8912, i64 1
+  %tmp8914 = getelementptr inbounds float* %tmp8913, i64 1
+  %tmp8915 = getelementptr inbounds float* %tmp8914, i64 1
+  %tmp8916 = getelementptr inbounds float* %tmp8915, i64 1
+  %tmp8917 = getelementptr inbounds float* %tmp8916, i64 1
+  %tmp8918 = getelementptr inbounds float* %tmp8917, i64 1
+  %tmp8919 = getelementptr inbounds float* %tmp8918, i64 1
+  %tmp8920 = getelementptr inbounds float* %tmp8919, i64 1
+  %tmp8921 = getelementptr inbounds float* %tmp8920, i64 1
+  %tmp8922 = getelementptr inbounds float* %tmp8921, i64 1
+  %tmp8923 = getelementptr inbounds float* %tmp8922, i64 1
+  %tmp8924 = getelementptr inbounds float* %tmp8923, i64 1
+  %tmp8925 = getelementptr inbounds float* %tmp8924, i64 1
+  %tmp8926 = getelementptr inbounds float* %tmp8925, i64 1
+  %tmp8927 = getelementptr inbounds float* %tmp8926, i64 1
+  %tmp8928 = getelementptr inbounds float* %tmp8927, i64 1
+  %tmp8929 = getelementptr inbounds float* %tmp8928, i64 1
+  %tmp8930 = getelementptr inbounds float* %tmp8929, i64 1
+  %tmp8931 = getelementptr inbounds float* %tmp8930, i64 1
+  %tmp8932 = getelementptr inbounds float* %tmp8931, i64 1
+  %tmp8933 = getelementptr inbounds float* %tmp8932, i64 1
+  %tmp8934 = getelementptr inbounds float* %tmp8933, i64 1
+  %tmp8935 = getelementptr inbounds float* %tmp8934, i64 1
+  %tmp8936 = getelementptr inbounds float* %tmp8935, i64 1
+  %tmp8937 = getelementptr inbounds float* %tmp8936, i64 1
+  %tmp8938 = getelementptr inbounds float* %tmp8937, i64 1
+  %tmp8939 = getelementptr inbounds float* %tmp8938, i64 1
+  %tmp8940 = getelementptr inbounds float* %tmp8939, i64 1
+  %tmp8941 = getelementptr inbounds float* %tmp8940, i64 1
+  %tmp8942 = getelementptr inbounds float* %tmp8941, i64 1
+  %tmp8943 = getelementptr inbounds float* %tmp8942, i64 1
+  %tmp8944 = getelementptr inbounds float* %tmp8943, i64 1
+  %tmp8945 = getelementptr inbounds float* %tmp8944, i64 1
+  %tmp8946 = getelementptr inbounds float* %tmp8945, i64 1
+  %tmp8947 = getelementptr inbounds float* %tmp8946, i64 1
+  %tmp8948 = getelementptr inbounds float* %tmp8947, i64 1
+  %tmp8949 = getelementptr inbounds float* %tmp8948, i64 1
+  %tmp8950 = getelementptr inbounds float* %tmp8949, i64 1
+  %tmp8951 = getelementptr inbounds float* %tmp8950, i64 1
+  %tmp8952 = getelementptr inbounds float* %tmp8951, i64 1
+  %tmp8953 = getelementptr inbounds float* %tmp8952, i64 1
+  %tmp8954 = getelementptr inbounds float* %tmp8953, i64 1
+  %tmp8955 = getelementptr inbounds float* %tmp8954, i64 1
+  %tmp8956 = getelementptr inbounds float* %tmp8955, i64 1
+  %tmp8957 = getelementptr inbounds float* %tmp8956, i64 1
+  %tmp8958 = getelementptr inbounds float* %tmp8957, i64 1
+  %tmp8959 = getelementptr inbounds float* %tmp8958, i64 1
+  %tmp8960 = getelementptr inbounds float* %tmp8959, i64 1
+  %tmp8961 = getelementptr inbounds float* %tmp8960, i64 1
+  %tmp8962 = getelementptr inbounds float* %tmp8961, i64 1
+  %tmp8963 = getelementptr inbounds float* %tmp8962, i64 1
+  %tmp8964 = getelementptr inbounds float* %tmp8963, i64 1
+  %tmp8965 = getelementptr inbounds float* %tmp8964, i64 1
+  %tmp8966 = getelementptr inbounds float* %tmp8965, i64 1
+  %tmp8967 = getelementptr inbounds float* %tmp8966, i64 1
+  %tmp8968 = getelementptr inbounds float* %tmp8967, i64 1
+  %tmp8969 = getelementptr inbounds float* %tmp8968, i64 1
+  %tmp8970 = getelementptr inbounds float* %tmp8969, i64 1
+  %tmp8971 = getelementptr inbounds float* %tmp8970, i64 1
+  %tmp8972 = getelementptr inbounds float* %tmp8971, i64 1
+  %tmp8973 = getelementptr inbounds float* %tmp8972, i64 1
+  %tmp8974 = getelementptr inbounds float* %tmp8973, i64 1
+  %tmp8975 = getelementptr inbounds float* %tmp8974, i64 1
+  %tmp8976 = getelementptr inbounds float* %tmp8975, i64 1
+  %tmp8977 = getelementptr inbounds float* %tmp8976, i64 1
+  %tmp8978 = getelementptr inbounds float* %tmp8977, i64 1
+  %tmp8979 = getelementptr inbounds float* %tmp8978, i64 1
+  %tmp8980 = getelementptr inbounds float* %tmp8979, i64 1
+  %tmp8981 = getelementptr inbounds float* %tmp8980, i64 1
+  %tmp8982 = getelementptr inbounds float* %tmp8981, i64 1
+  %tmp8983 = getelementptr inbounds float* %tmp8982, i64 1
+  %tmp8984 = getelementptr inbounds float* %tmp8983, i64 1
+  %tmp8985 = getelementptr inbounds float* %tmp8984, i64 1
+  %tmp8986 = getelementptr inbounds float* %tmp8985, i64 1
+  %tmp8987 = getelementptr inbounds float* %tmp8986, i64 1
+  %tmp8988 = getelementptr inbounds float* %tmp8987, i64 1
+  %tmp8989 = getelementptr inbounds float* %tmp8988, i64 1
+  %tmp8990 = getelementptr inbounds float* %tmp8989, i64 1
+  %tmp8991 = getelementptr inbounds float* %tmp8990, i64 1
+  %tmp8992 = getelementptr inbounds float* %tmp8991, i64 1
+  %tmp8993 = getelementptr inbounds float* %tmp8992, i64 1
+  %tmp8994 = getelementptr inbounds float* %tmp8993, i64 1
+  %tmp8995 = getelementptr inbounds float* %tmp8994, i64 1
+  %tmp8996 = getelementptr inbounds float* %tmp8995, i64 1
+  %tmp8997 = getelementptr inbounds float* %tmp8996, i64 1
+  %tmp8998 = getelementptr inbounds float* %tmp8997, i64 1
+  %tmp8999 = getelementptr inbounds float* %tmp8998, i64 1
+  %tmp9000 = getelementptr inbounds float* %tmp8999, i64 1
+  %tmp9001 = getelementptr inbounds float* %tmp9000, i64 1
+  %tmp9002 = getelementptr inbounds float* %tmp9001, i64 1
+  %tmp9003 = getelementptr inbounds float* %tmp9002, i64 1
+  %tmp9004 = getelementptr inbounds float* %tmp9003, i64 1
+  %tmp9005 = getelementptr inbounds float* %tmp9004, i64 1
+  %tmp9006 = getelementptr inbounds float* %tmp9005, i64 1
+  %tmp9007 = getelementptr inbounds float* %tmp9006, i64 1
+  %tmp9008 = getelementptr inbounds float* %tmp9007, i64 1
+  %tmp9009 = getelementptr inbounds float* %tmp9008, i64 1
+  %tmp9010 = getelementptr inbounds float* %tmp9009, i64 1
+  %tmp9011 = getelementptr inbounds float* %tmp9010, i64 1
+  %tmp9012 = getelementptr inbounds float* %tmp9011, i64 1
+  %tmp9013 = getelementptr inbounds float* %tmp9012, i64 1
+  %tmp9014 = getelementptr inbounds float* %tmp9013, i64 1
+  %tmp9015 = getelementptr inbounds float* %tmp9014, i64 1
+  %tmp9016 = getelementptr inbounds float* %tmp9015, i64 1
+  %tmp9017 = getelementptr inbounds float* %tmp9016, i64 1
+  %tmp9018 = getelementptr inbounds float* %tmp9017, i64 1
+  %tmp9019 = getelementptr inbounds float* %tmp9018, i64 1
+  %tmp9020 = getelementptr inbounds float* %tmp9019, i64 1
+  %tmp9021 = getelementptr inbounds float* %tmp9020, i64 1
+  %tmp9022 = getelementptr inbounds float* %tmp9021, i64 1
+  %tmp9023 = getelementptr inbounds float* %tmp9022, i64 1
+  %tmp9024 = getelementptr inbounds float* %tmp9023, i64 1
+  %tmp9025 = getelementptr inbounds float* %tmp9024, i64 1
+  %tmp9026 = getelementptr inbounds float* %tmp9025, i64 1
+  %tmp9027 = getelementptr inbounds float* %tmp9026, i64 1
+  %tmp9028 = getelementptr inbounds float* %tmp9027, i64 1
+  %tmp9029 = getelementptr inbounds float* %tmp9028, i64 1
+  %tmp9030 = getelementptr inbounds float* %tmp9029, i64 1
+  %tmp9031 = getelementptr inbounds float* %tmp9030, i64 1
+  %tmp9032 = getelementptr inbounds float* %tmp9031, i64 1
+  %tmp9033 = getelementptr inbounds float* %tmp9032, i64 1
+  %tmp9034 = getelementptr inbounds float* %tmp9033, i64 1
+  %tmp9035 = getelementptr inbounds float* %tmp9034, i64 1
+  %tmp9036 = getelementptr inbounds float* %tmp9035, i64 1
+  %tmp9037 = getelementptr inbounds float* %tmp9036, i64 1
+  %tmp9038 = getelementptr inbounds float* %tmp9037, i64 1
+  %tmp9039 = getelementptr inbounds float* %tmp9038, i64 1
+  %tmp9040 = getelementptr inbounds float* %tmp9039, i64 1
+  %tmp9041 = getelementptr inbounds float* %tmp9040, i64 1
+  %tmp9042 = getelementptr inbounds float* %tmp9041, i64 1
+  %tmp9043 = getelementptr inbounds float* %tmp9042, i64 1
+  %tmp9044 = getelementptr inbounds float* %tmp9043, i64 1
+  %tmp9045 = getelementptr inbounds float* %tmp9044, i64 1
+  %tmp9046 = getelementptr inbounds float* %tmp9045, i64 1
+  %tmp9047 = getelementptr inbounds float* %tmp9046, i64 1
+  %tmp9048 = getelementptr inbounds float* %tmp9047, i64 1
+  %tmp9049 = getelementptr inbounds float* %tmp9048, i64 1
+  %tmp9050 = getelementptr inbounds float* %tmp9049, i64 1
+  %tmp9051 = getelementptr inbounds float* %tmp9050, i64 1
+  %tmp9052 = getelementptr inbounds float* %tmp9051, i64 1
+  %tmp9053 = getelementptr inbounds float* %tmp9052, i64 1
+  %tmp9054 = getelementptr inbounds float* %tmp9053, i64 1
+  %tmp9055 = getelementptr inbounds float* %tmp9054, i64 1
+  %tmp9056 = getelementptr inbounds float* %tmp9055, i64 1
+  %tmp9057 = getelementptr inbounds float* %tmp9056, i64 1
+  %tmp9058 = getelementptr inbounds float* %tmp9057, i64 1
+  %tmp9059 = getelementptr inbounds float* %tmp9058, i64 1
+  %tmp9060 = getelementptr inbounds float* %tmp9059, i64 1
+  %tmp9061 = getelementptr inbounds float* %tmp9060, i64 1
+  %tmp9062 = getelementptr inbounds float* %tmp9061, i64 1
+  %tmp9063 = getelementptr inbounds float* %tmp9062, i64 1
+  %tmp9064 = getelementptr inbounds float* %tmp9063, i64 1
+  %tmp9065 = getelementptr inbounds float* %tmp9064, i64 1
+  %tmp9066 = getelementptr inbounds float* %tmp9065, i64 1
+  %tmp9067 = getelementptr inbounds float* %tmp9066, i64 1
+  %tmp9068 = getelementptr inbounds float* %tmp9067, i64 1
+  %tmp9069 = getelementptr inbounds float* %tmp9068, i64 1
+  %tmp9070 = getelementptr inbounds float* %tmp9069, i64 1
+  %tmp9071 = getelementptr inbounds float* %tmp9070, i64 1
+  %tmp9072 = getelementptr inbounds float* %tmp9071, i64 1
+  %tmp9073 = getelementptr inbounds float* %tmp9072, i64 1
+  %tmp9074 = getelementptr inbounds float* %tmp9073, i64 1
+  %tmp9075 = getelementptr inbounds float* %tmp9074, i64 1
+  %tmp9076 = getelementptr inbounds float* %tmp9075, i64 1
+  %tmp9077 = getelementptr inbounds float* %tmp9076, i64 1
+  %tmp9078 = getelementptr inbounds float* %tmp9077, i64 1
+  %tmp9079 = getelementptr inbounds float* %tmp9078, i64 1
+  %tmp9080 = getelementptr inbounds float* %tmp9079, i64 1
+  %tmp9081 = getelementptr inbounds float* %tmp9080, i64 1
+  %tmp9082 = getelementptr inbounds float* %tmp9081, i64 1
+  %tmp9083 = getelementptr inbounds float* %tmp9082, i64 1
+  %tmp9084 = getelementptr inbounds float* %tmp9083, i64 1
+  %tmp9085 = getelementptr inbounds float* %tmp9084, i64 1
+  %tmp9086 = getelementptr inbounds float* %tmp9085, i64 1
+  %tmp9087 = getelementptr inbounds float* %tmp9086, i64 1
+  %tmp9088 = getelementptr inbounds float* %tmp9087, i64 1
+  %tmp9089 = getelementptr inbounds float* %tmp9088, i64 1
+  %tmp9090 = getelementptr inbounds float* %tmp9089, i64 1
+  %tmp9091 = getelementptr inbounds float* %tmp9090, i64 1
+  %tmp9092 = getelementptr inbounds float* %tmp9091, i64 1
+  %tmp9093 = getelementptr inbounds float* %tmp9092, i64 1
+  %tmp9094 = getelementptr inbounds float* %tmp9093, i64 1
+  %tmp9095 = getelementptr inbounds float* %tmp9094, i64 1
+  %tmp9096 = getelementptr inbounds float* %tmp9095, i64 1
+  %tmp9097 = getelementptr inbounds float* %tmp9096, i64 1
+  %tmp9098 = getelementptr inbounds float* %tmp9097, i64 1
+  %tmp9099 = getelementptr inbounds float* %tmp9098, i64 1
+  %tmp9100 = getelementptr inbounds float* %tmp9099, i64 1
+  %tmp9101 = getelementptr inbounds float* %tmp9100, i64 1
+  %tmp9102 = getelementptr inbounds float* %tmp9101, i64 1
+  %tmp9103 = getelementptr inbounds float* %tmp9102, i64 1
+  %tmp9104 = getelementptr inbounds float* %tmp9103, i64 1
+  %tmp9105 = getelementptr inbounds float* %tmp9104, i64 1
+  %tmp9106 = getelementptr inbounds float* %tmp9105, i64 1
+  %tmp9107 = getelementptr inbounds float* %tmp9106, i64 1
+  %tmp9108 = getelementptr inbounds float* %tmp9107, i64 1
+  %tmp9109 = getelementptr inbounds float* %tmp9108, i64 1
+  %tmp9110 = getelementptr inbounds float* %tmp9109, i64 1
+  %tmp9111 = getelementptr inbounds float* %tmp9110, i64 1
+  %tmp9112 = getelementptr inbounds float* %tmp9111, i64 1
+  %tmp9113 = getelementptr inbounds float* %tmp9112, i64 1
+  %tmp9114 = getelementptr inbounds float* %tmp9113, i64 1
+  %tmp9115 = getelementptr inbounds float* %tmp9114, i64 1
+  %tmp9116 = getelementptr inbounds float* %tmp9115, i64 1
+  %tmp9117 = getelementptr inbounds float* %tmp9116, i64 1
+  %tmp9118 = getelementptr inbounds float* %tmp9117, i64 1
+  %tmp9119 = getelementptr inbounds float* %tmp9118, i64 1
+  %tmp9120 = getelementptr inbounds float* %tmp9119, i64 1
+  %tmp9121 = getelementptr inbounds float* %tmp9120, i64 1
+  %tmp9122 = getelementptr inbounds float* %tmp9121, i64 1
+  %tmp9123 = getelementptr inbounds float* %tmp9122, i64 1
+  %tmp9124 = getelementptr inbounds float* %tmp9123, i64 1
+  %tmp9125 = getelementptr inbounds float* %tmp9124, i64 1
+  %tmp9126 = getelementptr inbounds float* %tmp9125, i64 1
+  %tmp9127 = getelementptr inbounds float* %tmp9126, i64 1
+  %tmp9128 = getelementptr inbounds float* %tmp9127, i64 1
+  %tmp9129 = getelementptr inbounds float* %tmp9128, i64 1
+  %tmp9130 = getelementptr inbounds float* %tmp9129, i64 1
+  %tmp9131 = getelementptr inbounds float* %tmp9130, i64 1
+  %tmp9132 = getelementptr inbounds float* %tmp9131, i64 1
+  %tmp9133 = getelementptr inbounds float* %tmp9132, i64 1
+  %tmp9134 = getelementptr inbounds float* %tmp9133, i64 1
+  %tmp9135 = getelementptr inbounds float* %tmp9134, i64 1
+  %tmp9136 = getelementptr inbounds float* %tmp9135, i64 1
+  %tmp9137 = getelementptr inbounds float* %tmp9136, i64 1
+  %tmp9138 = getelementptr inbounds float* %tmp9137, i64 1
+  %tmp9139 = getelementptr inbounds float* %tmp9138, i64 1
+  %tmp9140 = getelementptr inbounds float* %tmp9139, i64 1
+  %tmp9141 = getelementptr inbounds float* %tmp9140, i64 1
+  %tmp9142 = getelementptr inbounds float* %tmp9141, i64 1
+  %tmp9143 = getelementptr inbounds float* %tmp9142, i64 1
+  %tmp9144 = getelementptr inbounds float* %tmp9143, i64 1
+  %tmp9145 = getelementptr inbounds float* %tmp9144, i64 1
+  %tmp9146 = getelementptr inbounds float* %tmp9145, i64 1
+  %tmp9147 = getelementptr inbounds float* %tmp9146, i64 1
+  %tmp9148 = getelementptr inbounds float* %tmp9147, i64 1
+  %tmp9149 = getelementptr inbounds float* %tmp9148, i64 1
+  %tmp9150 = getelementptr inbounds float* %tmp9149, i64 1
+  %tmp9151 = getelementptr inbounds float* %tmp9150, i64 1
+  %tmp9152 = getelementptr inbounds float* %tmp9151, i64 1
+  %tmp9153 = getelementptr inbounds float* %tmp9152, i64 1
+  %tmp9154 = getelementptr inbounds float* %tmp9153, i64 1
+  %tmp9155 = getelementptr inbounds float* %tmp9154, i64 1
+  %tmp9156 = getelementptr inbounds float* %tmp9155, i64 1
+  %tmp9157 = getelementptr inbounds float* %tmp9156, i64 1
+  %tmp9158 = getelementptr inbounds float* %tmp9157, i64 1
+  %tmp9159 = getelementptr inbounds float* %tmp9158, i64 1
+  %tmp9160 = getelementptr inbounds float* %tmp9159, i64 1
+  %tmp9161 = getelementptr inbounds float* %tmp9160, i64 1
+  %tmp9162 = getelementptr inbounds float* %tmp9161, i64 1
+  %tmp9163 = getelementptr inbounds float* %tmp9162, i64 1
+  %tmp9164 = getelementptr inbounds float* %tmp9163, i64 1
+  %tmp9165 = getelementptr inbounds float* %tmp9164, i64 1
+  %tmp9166 = getelementptr inbounds float* %tmp9165, i64 1
+  %tmp9167 = getelementptr inbounds float* %tmp9166, i64 1
+  %tmp9168 = getelementptr inbounds float* %tmp9167, i64 1
+  %tmp9169 = getelementptr inbounds float* %tmp9168, i64 1
+  %tmp9170 = getelementptr inbounds float* %tmp9169, i64 1
+  %tmp9171 = getelementptr inbounds float* %tmp9170, i64 1
+  %tmp9172 = getelementptr inbounds float* %tmp9171, i64 1
+  %tmp9173 = getelementptr inbounds float* %tmp9172, i64 1
+  %tmp9174 = getelementptr inbounds float* %tmp9173, i64 1
+  %tmp9175 = getelementptr inbounds float* %tmp9174, i64 1
+  %tmp9176 = getelementptr inbounds float* %tmp9175, i64 1
+  %tmp9177 = getelementptr inbounds float* %tmp9176, i64 1
+  %tmp9178 = getelementptr inbounds float* %tmp9177, i64 1
+  %tmp9179 = getelementptr inbounds float* %tmp9178, i64 1
+  %tmp9180 = getelementptr inbounds float* %tmp9179, i64 1
+  %tmp9181 = getelementptr inbounds float* %tmp9180, i64 1
+  %tmp9182 = getelementptr inbounds float* %tmp9181, i64 1
+  %tmp9183 = getelementptr inbounds float* %tmp9182, i64 1
+  %tmp9184 = getelementptr inbounds float* %tmp9183, i64 1
+  %tmp9185 = getelementptr inbounds float* %tmp9184, i64 1
+  %tmp9186 = getelementptr inbounds float* %tmp9185, i64 1
+  %tmp9187 = getelementptr inbounds float* %tmp9186, i64 1
+  %tmp9188 = getelementptr inbounds float* %tmp9187, i64 1
+  %tmp9189 = getelementptr inbounds float* %tmp9188, i64 1
+  %tmp9190 = getelementptr inbounds float* %tmp9189, i64 1
+  %tmp9191 = getelementptr inbounds float* %tmp9190, i64 1
+  %tmp9192 = getelementptr inbounds float* %tmp9191, i64 1
+  %tmp9193 = getelementptr inbounds float* %tmp9192, i64 1
+  %tmp9194 = getelementptr inbounds float* %tmp9193, i64 1
+  %tmp9195 = getelementptr inbounds float* %tmp9194, i64 1
+  %tmp9196 = getelementptr inbounds float* %tmp9195, i64 1
+  %tmp9197 = getelementptr inbounds float* %tmp9196, i64 1
+  %tmp9198 = getelementptr inbounds float* %tmp9197, i64 1
+  %tmp9199 = getelementptr inbounds float* %tmp9198, i64 1
+  %tmp9200 = getelementptr inbounds float* %tmp9199, i64 1
+  %tmp9201 = getelementptr inbounds float* %tmp9200, i64 1
+  %tmp9202 = getelementptr inbounds float* %tmp9201, i64 1
+  %tmp9203 = getelementptr inbounds float* %tmp9202, i64 1
+  %tmp9204 = getelementptr inbounds float* %tmp9203, i64 1
+  %tmp9205 = getelementptr inbounds float* %tmp9204, i64 1
+  %tmp9206 = getelementptr inbounds float* %tmp9205, i64 1
+  %tmp9207 = getelementptr inbounds float* %tmp9206, i64 1
+  %tmp9208 = getelementptr inbounds float* %tmp9207, i64 1
+  %tmp9209 = getelementptr inbounds float* %tmp9208, i64 1
+  %tmp9210 = getelementptr inbounds float* %tmp9209, i64 1
+  %tmp9211 = getelementptr inbounds float* %tmp9210, i64 1
+  %tmp9212 = getelementptr inbounds float* %tmp9211, i64 1
+  %tmp9213 = getelementptr inbounds float* %tmp9212, i64 1
+  %tmp9214 = getelementptr inbounds float* %tmp9213, i64 1
+  %tmp9215 = getelementptr inbounds float* %tmp9214, i64 1
+  %tmp9216 = getelementptr inbounds float* %tmp9215, i64 1
+  %tmp9217 = getelementptr inbounds float* %tmp9216, i64 1
+  %tmp9218 = getelementptr inbounds float* %tmp9217, i64 1
+  %tmp9219 = getelementptr inbounds float* %tmp9218, i64 1
+  %tmp9220 = getelementptr inbounds float* %tmp9219, i64 1
+  %tmp9221 = getelementptr inbounds float* %tmp9220, i64 1
+  %tmp9222 = getelementptr inbounds float* %tmp9221, i64 1
+  %tmp9223 = getelementptr inbounds float* %tmp9222, i64 1
+  %tmp9224 = getelementptr inbounds float* %tmp9223, i64 1
+  %tmp9225 = getelementptr inbounds float* %tmp9224, i64 1
+  %tmp9226 = getelementptr inbounds float* %tmp9225, i64 1
+  %tmp9227 = getelementptr inbounds float* %tmp9226, i64 1
+  %tmp9228 = getelementptr inbounds float* %tmp9227, i64 1
+  %tmp9229 = getelementptr inbounds float* %tmp9228, i64 1
+  %tmp9230 = getelementptr inbounds float* %tmp9229, i64 1
+  %tmp9231 = getelementptr inbounds float* %tmp9230, i64 1
+  %tmp9232 = getelementptr inbounds float* %tmp9231, i64 1
+  %tmp9233 = getelementptr inbounds float* %tmp9232, i64 1
+  %tmp9234 = getelementptr inbounds float* %tmp9233, i64 1
+  %tmp9235 = getelementptr inbounds float* %tmp9234, i64 1
+  %tmp9236 = getelementptr inbounds float* %tmp9235, i64 1
+  %tmp9237 = getelementptr inbounds float* %tmp9236, i64 1
+  %tmp9238 = getelementptr inbounds float* %tmp9237, i64 1
+  %tmp9239 = getelementptr inbounds float* %tmp9238, i64 1
+  %tmp9240 = getelementptr inbounds float* %tmp9239, i64 1
+  %tmp9241 = getelementptr inbounds float* %tmp9240, i64 1
+  %tmp9242 = getelementptr inbounds float* %tmp9241, i64 1
+  %tmp9243 = getelementptr inbounds float* %tmp9242, i64 1
+  %tmp9244 = getelementptr inbounds float* %tmp9243, i64 1
+  %tmp9245 = getelementptr inbounds float* %tmp9244, i64 1
+  %tmp9246 = getelementptr inbounds float* %tmp9245, i64 1
+  %tmp9247 = getelementptr inbounds float* %tmp9246, i64 1
+  %tmp9248 = getelementptr inbounds float* %tmp9247, i64 1
+  %tmp9249 = getelementptr inbounds float* %tmp9248, i64 1
+  %tmp9250 = getelementptr inbounds float* %tmp9249, i64 1
+  %tmp9251 = getelementptr inbounds float* %tmp9250, i64 1
+  %tmp9252 = getelementptr inbounds float* %tmp9251, i64 1
+  %tmp9253 = getelementptr inbounds float* %tmp9252, i64 1
+  %tmp9254 = getelementptr inbounds float* %tmp9253, i64 1
+  %tmp9255 = getelementptr inbounds float* %tmp9254, i64 1
+  %tmp9256 = getelementptr inbounds float* %tmp9255, i64 1
+  %tmp9257 = getelementptr inbounds float* %tmp9256, i64 1
+  %tmp9258 = getelementptr inbounds float* %tmp9257, i64 1
+  %tmp9259 = getelementptr inbounds float* %tmp9258, i64 1
+  %tmp9260 = getelementptr inbounds float* %tmp9259, i64 1
+  %tmp9261 = getelementptr inbounds float* %tmp9260, i64 1
+  %tmp9262 = getelementptr inbounds float* %tmp9261, i64 1
+  %tmp9263 = getelementptr inbounds float* %tmp9262, i64 1
+  %tmp9264 = getelementptr inbounds float* %tmp9263, i64 1
+  %tmp9265 = getelementptr inbounds float* %tmp9264, i64 1
+  %tmp9266 = getelementptr inbounds float* %tmp9265, i64 1
+  %tmp9267 = getelementptr inbounds float* %tmp9266, i64 1
+  %tmp9268 = getelementptr inbounds float* %tmp9267, i64 1
+  %tmp9269 = getelementptr inbounds float* %tmp9268, i64 1
+  %tmp9270 = getelementptr inbounds float* %tmp9269, i64 1
+  %tmp9271 = getelementptr inbounds float* %tmp9270, i64 1
+  %tmp9272 = getelementptr inbounds float* %tmp9271, i64 1
+  %tmp9273 = getelementptr inbounds float* %tmp9272, i64 1
+  %tmp9274 = getelementptr inbounds float* %tmp9273, i64 1
+  %tmp9275 = getelementptr inbounds float* %tmp9274, i64 1
+  %tmp9276 = getelementptr inbounds float* %tmp9275, i64 1
+  %tmp9277 = getelementptr inbounds float* %tmp9276, i64 1
+  %tmp9278 = getelementptr inbounds float* %tmp9277, i64 1
+  %tmp9279 = getelementptr inbounds float* %tmp9278, i64 1
+  %tmp9280 = getelementptr inbounds float* %tmp9279, i64 1
+  %tmp9281 = getelementptr inbounds float* %tmp9280, i64 1
+  %tmp9282 = getelementptr inbounds float* %tmp9281, i64 1
+  %tmp9283 = getelementptr inbounds float* %tmp9282, i64 1
+  %tmp9284 = getelementptr inbounds float* %tmp9283, i64 1
+  %tmp9285 = getelementptr inbounds float* %tmp9284, i64 1
+  %tmp9286 = getelementptr inbounds float* %tmp9285, i64 1
+  %tmp9287 = getelementptr inbounds float* %tmp9286, i64 1
+  %tmp9288 = getelementptr inbounds float* %tmp9287, i64 1
+  %tmp9289 = getelementptr inbounds float* %tmp9288, i64 1
+  %tmp9290 = getelementptr inbounds float* %tmp9289, i64 1
+  %tmp9291 = getelementptr inbounds float* %tmp9290, i64 1
+  %tmp9292 = getelementptr inbounds float* %tmp9291, i64 1
+  %tmp9293 = getelementptr inbounds float* %tmp9292, i64 1
+  %tmp9294 = getelementptr inbounds float* %tmp9293, i64 1
+  %tmp9295 = getelementptr inbounds float* %tmp9294, i64 1
+  %tmp9296 = getelementptr inbounds float* %tmp9295, i64 1
+  %tmp9297 = getelementptr inbounds float* %tmp9296, i64 1
+  %tmp9298 = getelementptr inbounds float* %tmp9297, i64 1
+  %tmp9299 = getelementptr inbounds float* %tmp9298, i64 1
+  %tmp9300 = getelementptr inbounds float* %tmp9299, i64 1
+  %tmp9301 = getelementptr inbounds float* %tmp9300, i64 1
+  %tmp9302 = getelementptr inbounds float* %tmp9301, i64 1
+  %tmp9303 = getelementptr inbounds float* %tmp9302, i64 1
+  %tmp9304 = getelementptr inbounds float* %tmp9303, i64 1
+  %tmp9305 = getelementptr inbounds float* %tmp9304, i64 1
+  %tmp9306 = getelementptr inbounds float* %tmp9305, i64 1
+  %tmp9307 = getelementptr inbounds float* %tmp9306, i64 1
+  %tmp9308 = getelementptr inbounds float* %tmp9307, i64 1
+  %tmp9309 = getelementptr inbounds float* %tmp9308, i64 1
+  %tmp9310 = getelementptr inbounds float* %tmp9309, i64 1
+  %tmp9311 = getelementptr inbounds float* %tmp9310, i64 1
+  %tmp9312 = getelementptr inbounds float* %tmp9311, i64 1
+  %tmp9313 = getelementptr inbounds float* %tmp9312, i64 1
+  %tmp9314 = getelementptr inbounds float* %tmp9313, i64 1
+  %tmp9315 = getelementptr inbounds float* %tmp9314, i64 1
+  %tmp9316 = getelementptr inbounds float* %tmp9315, i64 1
+  %tmp9317 = getelementptr inbounds float* %tmp9316, i64 1
+  %tmp9318 = getelementptr inbounds float* %tmp9317, i64 1
+  %tmp9319 = getelementptr inbounds float* %tmp9318, i64 1
+  %tmp9320 = getelementptr inbounds float* %tmp9319, i64 1
+  %tmp9321 = getelementptr inbounds float* %tmp9320, i64 1
+  %tmp9322 = getelementptr inbounds float* %tmp9321, i64 1
+  %tmp9323 = getelementptr inbounds float* %tmp9322, i64 1
+  %tmp9324 = getelementptr inbounds float* %tmp9323, i64 1
+  %tmp9325 = getelementptr inbounds float* %tmp9324, i64 1
+  %tmp9326 = getelementptr inbounds float* %tmp9325, i64 1
+  %tmp9327 = getelementptr inbounds float* %tmp9326, i64 1
+  %tmp9328 = getelementptr inbounds float* %tmp9327, i64 1
+  %tmp9329 = getelementptr inbounds float* %tmp9328, i64 1
+  %tmp9330 = getelementptr inbounds float* %tmp9329, i64 1
+  %tmp9331 = getelementptr inbounds float* %tmp9330, i64 1
+  %tmp9332 = getelementptr inbounds float* %tmp9331, i64 1
+  %tmp9333 = getelementptr inbounds float* %tmp9332, i64 1
+  %tmp9334 = getelementptr inbounds float* %tmp9333, i64 1
+  %tmp9335 = getelementptr inbounds float* %tmp9334, i64 1
+  %tmp9336 = getelementptr inbounds float* %tmp9335, i64 1
+  %tmp9337 = getelementptr inbounds float* %tmp9336, i64 1
+  %tmp9338 = getelementptr inbounds float* %tmp9337, i64 1
+  %tmp9339 = getelementptr inbounds float* %tmp9338, i64 1
+  %tmp9340 = getelementptr inbounds float* %tmp9339, i64 1
+  %tmp9341 = getelementptr inbounds float* %tmp9340, i64 1
+  %tmp9342 = getelementptr inbounds float* %tmp9341, i64 1
+  %tmp9343 = getelementptr inbounds float* %tmp9342, i64 1
+  %tmp9344 = getelementptr inbounds float* %tmp9343, i64 1
+  %tmp9345 = getelementptr inbounds float* %tmp9344, i64 1
+  %tmp9346 = getelementptr inbounds float* %tmp9345, i64 1
+  %tmp9347 = getelementptr inbounds float* %tmp9346, i64 1
+  %tmp9348 = getelementptr inbounds float* %tmp9347, i64 1
+  %tmp9349 = getelementptr inbounds float* %tmp9348, i64 1
+  %tmp9350 = getelementptr inbounds float* %tmp9349, i64 1
+  %tmp9351 = getelementptr inbounds float* %tmp9350, i64 1
+  %tmp9352 = getelementptr inbounds float* %tmp9351, i64 1
+  %tmp9353 = getelementptr inbounds float* %tmp9352, i64 1
+  %tmp9354 = getelementptr inbounds float* %tmp9353, i64 1
+  %tmp9355 = getelementptr inbounds float* %tmp9354, i64 1
+  %tmp9356 = getelementptr inbounds float* %tmp9355, i64 1
+  %tmp9357 = getelementptr inbounds float* %tmp9356, i64 1
+  %tmp9358 = getelementptr inbounds float* %tmp9357, i64 1
+  %tmp9359 = getelementptr inbounds float* %tmp9358, i64 1
+  %tmp9360 = getelementptr inbounds float* %tmp9359, i64 1
+  %tmp9361 = getelementptr inbounds float* %tmp9360, i64 1
+  %tmp9362 = getelementptr inbounds float* %tmp9361, i64 1
+  %tmp9363 = getelementptr inbounds float* %tmp9362, i64 1
+  %tmp9364 = getelementptr inbounds float* %tmp9363, i64 1
+  %tmp9365 = getelementptr inbounds float* %tmp9364, i64 1
+  %tmp9366 = getelementptr inbounds float* %tmp9365, i64 1
+  %tmp9367 = getelementptr inbounds float* %tmp9366, i64 1
+  %tmp9368 = getelementptr inbounds float* %tmp9367, i64 1
+  %tmp9369 = getelementptr inbounds float* %tmp9368, i64 1
+  %tmp9370 = getelementptr inbounds float* %tmp9369, i64 1
+  %tmp9371 = getelementptr inbounds float* %tmp9370, i64 1
+  %tmp9372 = getelementptr inbounds float* %tmp9371, i64 1
+  %tmp9373 = getelementptr inbounds float* %tmp9372, i64 1
+  %tmp9374 = getelementptr inbounds float* %tmp9373, i64 1
+  %tmp9375 = getelementptr inbounds float* %tmp9374, i64 1
+  %tmp9376 = getelementptr inbounds float* %tmp9375, i64 1
+  %tmp9377 = getelementptr inbounds float* %tmp9376, i64 1
+  %tmp9378 = getelementptr inbounds float* %tmp9377, i64 1
+  %tmp9379 = getelementptr inbounds float* %tmp9378, i64 1
+  %tmp9380 = getelementptr inbounds float* %tmp9379, i64 1
+  %tmp9381 = getelementptr inbounds float* %tmp9380, i64 1
+  %tmp9382 = getelementptr inbounds float* %tmp9381, i64 1
+  %tmp9383 = getelementptr inbounds float* %tmp9382, i64 1
+  %tmp9384 = getelementptr inbounds float* %tmp9383, i64 1
+  %tmp9385 = getelementptr inbounds float* %tmp9384, i64 1
+  %tmp9386 = getelementptr inbounds float* %tmp9385, i64 1
+  %tmp9387 = getelementptr inbounds float* %tmp9386, i64 1
+  %tmp9388 = getelementptr inbounds float* %tmp9387, i64 1
+  %tmp9389 = getelementptr inbounds float* %tmp9388, i64 1
+  %tmp9390 = getelementptr inbounds float* %tmp9389, i64 1
+  %tmp9391 = getelementptr inbounds float* %tmp9390, i64 1
+  %tmp9392 = getelementptr inbounds float* %tmp9391, i64 1
+  %tmp9393 = getelementptr inbounds float* %tmp9392, i64 1
+  %tmp9394 = getelementptr inbounds float* %tmp9393, i64 1
+  %tmp9395 = getelementptr inbounds float* %tmp9394, i64 1
+  %tmp9396 = getelementptr inbounds float* %tmp9395, i64 1
+  %tmp9397 = getelementptr inbounds float* %tmp9396, i64 1
+  %tmp9398 = getelementptr inbounds float* %tmp9397, i64 1
+  %tmp9399 = getelementptr inbounds float* %tmp9398, i64 1
+  %tmp9400 = getelementptr inbounds float* %tmp9399, i64 1
+  %tmp9401 = getelementptr inbounds float* %tmp9400, i64 1
+  %tmp9402 = getelementptr inbounds float* %tmp9401, i64 1
+  %tmp9403 = getelementptr inbounds float* %tmp9402, i64 1
+  %tmp9404 = getelementptr inbounds float* %tmp9403, i64 1
+  %tmp9405 = getelementptr inbounds float* %tmp9404, i64 1
+  %tmp9406 = getelementptr inbounds float* %tmp9405, i64 1
+  %tmp9407 = getelementptr inbounds float* %tmp9406, i64 1
+  %tmp9408 = getelementptr inbounds float* %tmp9407, i64 1
+  %tmp9409 = getelementptr inbounds float* %tmp9408, i64 1
+  %tmp9410 = getelementptr inbounds float* %tmp9409, i64 1
+  %tmp9411 = getelementptr inbounds float* %tmp9410, i64 1
+  %tmp9412 = getelementptr inbounds float* %tmp9411, i64 1
+  %tmp9413 = getelementptr inbounds float* %tmp9412, i64 1
+  %tmp9414 = getelementptr inbounds float* %tmp9413, i64 1
+  %tmp9415 = getelementptr inbounds float* %tmp9414, i64 1
+  %tmp9416 = getelementptr inbounds float* %tmp9415, i64 1
+  %tmp9417 = getelementptr inbounds float* %tmp9416, i64 1
+  %tmp9418 = getelementptr inbounds float* %tmp9417, i64 1
+  %tmp9419 = getelementptr inbounds float* %tmp9418, i64 1
+  %tmp9420 = getelementptr inbounds float* %tmp9419, i64 1
+  %tmp9421 = getelementptr inbounds float* %tmp9420, i64 1
+  %tmp9422 = getelementptr inbounds float* %tmp9421, i64 1
+  %tmp9423 = getelementptr inbounds float* %tmp9422, i64 1
+  %tmp9424 = getelementptr inbounds float* %tmp9423, i64 1
+  %tmp9425 = getelementptr inbounds float* %tmp9424, i64 1
+  %tmp9426 = getelementptr inbounds float* %tmp9425, i64 1
+  %tmp9427 = getelementptr inbounds float* %tmp9426, i64 1
+  %tmp9428 = getelementptr inbounds float* %tmp9427, i64 1
+  %tmp9429 = getelementptr inbounds float* %tmp9428, i64 1
+  %tmp9430 = getelementptr inbounds float* %tmp9429, i64 1
+  %tmp9431 = getelementptr inbounds float* %tmp9430, i64 1
+  %tmp9432 = getelementptr inbounds float* %tmp9431, i64 1
+  %tmp9433 = getelementptr inbounds float* %tmp9432, i64 1
+  %tmp9434 = getelementptr inbounds float* %tmp9433, i64 1
+  %tmp9435 = getelementptr inbounds float* %tmp9434, i64 1
+  %tmp9436 = getelementptr inbounds float* %tmp9435, i64 1
+  %tmp9437 = getelementptr inbounds float* %tmp9436, i64 1
+  %tmp9438 = getelementptr inbounds float* %tmp9437, i64 1
+  %tmp9439 = getelementptr inbounds float* %tmp9438, i64 1
+  %tmp9440 = getelementptr inbounds float* %tmp9439, i64 1
+  %tmp9441 = getelementptr inbounds float* %tmp9440, i64 1
+  %tmp9442 = getelementptr inbounds float* %tmp9441, i64 1
+  %tmp9443 = getelementptr inbounds float* %tmp9442, i64 1
+  %tmp9444 = getelementptr inbounds float* %tmp9443, i64 1
+  %tmp9445 = getelementptr inbounds float* %tmp9444, i64 1
+  %tmp9446 = getelementptr inbounds float* %tmp9445, i64 1
+  %tmp9447 = getelementptr inbounds float* %tmp9446, i64 1
+  %tmp9448 = getelementptr inbounds float* %tmp9447, i64 1
+  %tmp9449 = getelementptr inbounds float* %tmp9448, i64 1
+  %tmp9450 = getelementptr inbounds float* %tmp9449, i64 1
+  %tmp9451 = getelementptr inbounds float* %tmp9450, i64 1
+  %tmp9452 = getelementptr inbounds float* %tmp9451, i64 1
+  %tmp9453 = getelementptr inbounds float* %tmp9452, i64 1
+  %tmp9454 = getelementptr inbounds float* %tmp9453, i64 1
+  %tmp9455 = getelementptr inbounds float* %tmp9454, i64 1
+  %tmp9456 = getelementptr inbounds float* %tmp9455, i64 1
+  %tmp9457 = getelementptr inbounds float* %tmp9456, i64 1
+  %tmp9458 = getelementptr inbounds float* %tmp9457, i64 1
+  %tmp9459 = getelementptr inbounds float* %tmp9458, i64 1
+  %tmp9460 = getelementptr inbounds float* %tmp9459, i64 1
+  %tmp9461 = getelementptr inbounds float* %tmp9460, i64 1
+  %tmp9462 = getelementptr inbounds float* %tmp9461, i64 1
+  %tmp9463 = getelementptr inbounds float* %tmp9462, i64 1
+  %tmp9464 = getelementptr inbounds float* %tmp9463, i64 1
+  %tmp9465 = getelementptr inbounds float* %tmp9464, i64 1
+  %tmp9466 = getelementptr inbounds float* %tmp9465, i64 1
+  %tmp9467 = getelementptr inbounds float* %tmp9466, i64 1
+  %tmp9468 = getelementptr inbounds float* %tmp9467, i64 1
+  %tmp9469 = getelementptr inbounds float* %tmp9468, i64 1
+  %tmp9470 = getelementptr inbounds float* %tmp9469, i64 1
+  %tmp9471 = getelementptr inbounds float* %tmp9470, i64 1
+  %tmp9472 = getelementptr inbounds float* %tmp9471, i64 1
+  %tmp9473 = getelementptr inbounds float* %tmp9472, i64 1
+  %tmp9474 = getelementptr inbounds float* %tmp9473, i64 1
+  %tmp9475 = getelementptr inbounds float* %tmp9474, i64 1
+  %tmp9476 = getelementptr inbounds float* %tmp9475, i64 1
+  %tmp9477 = getelementptr inbounds float* %tmp9476, i64 1
+  %tmp9478 = getelementptr inbounds float* %tmp9477, i64 1
+  %tmp9479 = getelementptr inbounds float* %tmp9478, i64 1
+  %tmp9480 = getelementptr inbounds float* %tmp9479, i64 1
+  %tmp9481 = getelementptr inbounds float* %tmp9480, i64 1
+  %tmp9482 = getelementptr inbounds float* %tmp9481, i64 1
+  %tmp9483 = getelementptr inbounds float* %tmp9482, i64 1
+  %tmp9484 = getelementptr inbounds float* %tmp9483, i64 1
+  %tmp9485 = getelementptr inbounds float* %tmp9484, i64 1
+  %tmp9486 = getelementptr inbounds float* %tmp9485, i64 1
+  %tmp9487 = getelementptr inbounds float* %tmp9486, i64 1
+  %tmp9488 = getelementptr inbounds float* %tmp9487, i64 1
+  %tmp9489 = getelementptr inbounds float* %tmp9488, i64 1
+  %tmp9490 = getelementptr inbounds float* %tmp9489, i64 1
+  %tmp9491 = getelementptr inbounds float* %tmp9490, i64 1
+  %tmp9492 = getelementptr inbounds float* %tmp9491, i64 1
+  %tmp9493 = getelementptr inbounds float* %tmp9492, i64 1
+  %tmp9494 = getelementptr inbounds float* %tmp9493, i64 1
+  %tmp9495 = getelementptr inbounds float* %tmp9494, i64 1
+  %tmp9496 = getelementptr inbounds float* %tmp9495, i64 1
+  %tmp9497 = getelementptr inbounds float* %tmp9496, i64 1
+  %tmp9498 = getelementptr inbounds float* %tmp9497, i64 1
+  %tmp9499 = getelementptr inbounds float* %tmp9498, i64 1
+  %tmp9500 = getelementptr inbounds float* %tmp9499, i64 1
+  %tmp9501 = getelementptr inbounds float* %tmp9500, i64 1
+  %tmp9502 = getelementptr inbounds float* %tmp9501, i64 1
+  %tmp9503 = getelementptr inbounds float* %tmp9502, i64 1
+  %tmp9504 = getelementptr inbounds float* %tmp9503, i64 1
+  %tmp9505 = getelementptr inbounds float* %tmp9504, i64 1
+  %tmp9506 = getelementptr inbounds float* %tmp9505, i64 1
+  %tmp9507 = getelementptr inbounds float* %tmp9506, i64 1
+  %tmp9508 = getelementptr inbounds float* %tmp9507, i64 1
+  %tmp9509 = getelementptr inbounds float* %tmp9508, i64 1
+  %tmp9510 = getelementptr inbounds float* %tmp9509, i64 1
+  %tmp9511 = getelementptr inbounds float* %tmp9510, i64 1
+  %tmp9512 = getelementptr inbounds float* %tmp9511, i64 1
+  %tmp9513 = getelementptr inbounds float* %tmp9512, i64 1
+  %tmp9514 = getelementptr inbounds float* %tmp9513, i64 1
+  %tmp9515 = getelementptr inbounds float* %tmp9514, i64 1
+  %tmp9516 = getelementptr inbounds float* %tmp9515, i64 1
+  %tmp9517 = getelementptr inbounds float* %tmp9516, i64 1
+  %tmp9518 = getelementptr inbounds float* %tmp9517, i64 1
+  %tmp9519 = getelementptr inbounds float* %tmp9518, i64 1
+  %tmp9520 = getelementptr inbounds float* %tmp9519, i64 1
+  %tmp9521 = getelementptr inbounds float* %tmp9520, i64 1
+  %tmp9522 = getelementptr inbounds float* %tmp9521, i64 1
+  %tmp9523 = getelementptr inbounds float* %tmp9522, i64 1
+  %tmp9524 = getelementptr inbounds float* %tmp9523, i64 1
+  %tmp9525 = getelementptr inbounds float* %tmp9524, i64 1
+  %tmp9526 = getelementptr inbounds float* %tmp9525, i64 1
+  %tmp9527 = getelementptr inbounds float* %tmp9526, i64 1
+  %tmp9528 = getelementptr inbounds float* %tmp9527, i64 1
+  %tmp9529 = getelementptr inbounds float* %tmp9528, i64 1
+  %tmp9530 = getelementptr inbounds float* %tmp9529, i64 1
+  %tmp9531 = getelementptr inbounds float* %tmp9530, i64 1
+  %tmp9532 = getelementptr inbounds float* %tmp9531, i64 1
+  %tmp9533 = getelementptr inbounds float* %tmp9532, i64 1
+  %tmp9534 = getelementptr inbounds float* %tmp9533, i64 1
+  %tmp9535 = getelementptr inbounds float* %tmp9534, i64 1
+  %tmp9536 = getelementptr inbounds float* %tmp9535, i64 1
+  %tmp9537 = getelementptr inbounds float* %tmp9536, i64 1
+  %tmp9538 = getelementptr inbounds float* %tmp9537, i64 1
+  %tmp9539 = getelementptr inbounds float* %tmp9538, i64 1
+  %tmp9540 = getelementptr inbounds float* %tmp9539, i64 1
+  %tmp9541 = getelementptr inbounds float* %tmp9540, i64 1
+  %tmp9542 = getelementptr inbounds float* %tmp9541, i64 1
+  %tmp9543 = getelementptr inbounds float* %tmp9542, i64 1
+  %tmp9544 = getelementptr inbounds float* %tmp9543, i64 1
+  %tmp9545 = getelementptr inbounds float* %tmp9544, i64 1
+  %tmp9546 = getelementptr inbounds float* %tmp9545, i64 1
+  %tmp9547 = getelementptr inbounds float* %tmp9546, i64 1
+  %tmp9548 = getelementptr inbounds float* %tmp9547, i64 1
+  %tmp9549 = getelementptr inbounds float* %tmp9548, i64 1
+  %tmp9550 = getelementptr inbounds float* %tmp9549, i64 1
+  %tmp9551 = getelementptr inbounds float* %tmp9550, i64 1
+  %tmp9552 = getelementptr inbounds float* %tmp9551, i64 1
+  %tmp9553 = getelementptr inbounds float* %tmp9552, i64 1
+  %tmp9554 = getelementptr inbounds float* %tmp9553, i64 1
+  %tmp9555 = getelementptr inbounds float* %tmp9554, i64 1
+  %tmp9556 = getelementptr inbounds float* %tmp9555, i64 1
+  %tmp9557 = getelementptr inbounds float* %tmp9556, i64 1
+  %tmp9558 = getelementptr inbounds float* %tmp9557, i64 1
+  %tmp9559 = getelementptr inbounds float* %tmp9558, i64 1
+  %tmp9560 = getelementptr inbounds float* %tmp9559, i64 1
+  %tmp9561 = getelementptr inbounds float* %tmp9560, i64 1
+  %tmp9562 = getelementptr inbounds float* %tmp9561, i64 1
+  %tmp9563 = getelementptr inbounds float* %tmp9562, i64 1
+  %tmp9564 = getelementptr inbounds float* %tmp9563, i64 1
+  %tmp9565 = getelementptr inbounds float* %tmp9564, i64 1
+  %tmp9566 = getelementptr inbounds float* %tmp9565, i64 1
+  %tmp9567 = getelementptr inbounds float* %tmp9566, i64 1
+  %tmp9568 = getelementptr inbounds float* %tmp9567, i64 1
+  %tmp9569 = getelementptr inbounds float* %tmp9568, i64 1
+  %tmp9570 = getelementptr inbounds float* %tmp9569, i64 1
+  %tmp9571 = getelementptr inbounds float* %tmp9570, i64 1
+  %tmp9572 = getelementptr inbounds float* %tmp9571, i64 1
+  %tmp9573 = getelementptr inbounds float* %tmp9572, i64 1
+  %tmp9574 = getelementptr inbounds float* %tmp9573, i64 1
+  %tmp9575 = getelementptr inbounds float* %tmp9574, i64 1
+  %tmp9576 = getelementptr inbounds float* %tmp9575, i64 1
+  %tmp9577 = getelementptr inbounds float* %tmp9576, i64 1
+  %tmp9578 = getelementptr inbounds float* %tmp9577, i64 1
+  %tmp9579 = getelementptr inbounds float* %tmp9578, i64 1
+  %tmp9580 = getelementptr inbounds float* %tmp9579, i64 1
+  %tmp9581 = getelementptr inbounds float* %tmp9580, i64 1
+  %tmp9582 = getelementptr inbounds float* %tmp9581, i64 1
+  %tmp9583 = getelementptr inbounds float* %tmp9582, i64 1
+  %tmp9584 = getelementptr inbounds float* %tmp9583, i64 1
+  %tmp9585 = getelementptr inbounds float* %tmp9584, i64 1
+  %tmp9586 = getelementptr inbounds float* %tmp9585, i64 1
+  %tmp9587 = getelementptr inbounds float* %tmp9586, i64 1
+  %tmp9588 = getelementptr inbounds float* %tmp9587, i64 1
+  %tmp9589 = getelementptr inbounds float* %tmp9588, i64 1
+  %tmp9590 = getelementptr inbounds float* %tmp9589, i64 1
+  %tmp9591 = getelementptr inbounds float* %tmp9590, i64 1
+  %tmp9592 = getelementptr inbounds float* %tmp9591, i64 1
+  %tmp9593 = getelementptr inbounds float* %tmp9592, i64 1
+  %tmp9594 = getelementptr inbounds float* %tmp9593, i64 1
+  %tmp9595 = getelementptr inbounds float* %tmp9594, i64 1
+  %tmp9596 = getelementptr inbounds float* %tmp9595, i64 1
+  %tmp9597 = getelementptr inbounds float* %tmp9596, i64 1
+  %tmp9598 = getelementptr inbounds float* %tmp9597, i64 1
+  %tmp9599 = getelementptr inbounds float* %tmp9598, i64 1
+  %tmp9600 = getelementptr inbounds float* %tmp9599, i64 1
+  %tmp9601 = getelementptr inbounds float* %tmp9600, i64 1
+  %tmp9602 = getelementptr inbounds float* %tmp9601, i64 1
+  %tmp9603 = getelementptr inbounds float* %tmp9602, i64 1
+  %tmp9604 = getelementptr inbounds float* %tmp9603, i64 1
+  %tmp9605 = getelementptr inbounds float* %tmp9604, i64 1
+  %tmp9606 = getelementptr inbounds float* %tmp9605, i64 1
+  %tmp9607 = getelementptr inbounds float* %tmp9606, i64 1
+  %tmp9608 = getelementptr inbounds float* %tmp9607, i64 1
+  %tmp9609 = getelementptr inbounds float* %tmp9608, i64 1
+  %tmp9610 = getelementptr inbounds float* %tmp9609, i64 1
+  %tmp9611 = getelementptr inbounds float* %tmp9610, i64 1
+  %tmp9612 = getelementptr inbounds float* %tmp9611, i64 1
+  %tmp9613 = getelementptr inbounds float* %tmp9612, i64 1
+  %tmp9614 = getelementptr inbounds float* %tmp9613, i64 1
+  %tmp9615 = getelementptr inbounds float* %tmp9614, i64 1
+  %tmp9616 = getelementptr inbounds float* %tmp9615, i64 1
+  %tmp9617 = getelementptr inbounds float* %tmp9616, i64 1
+  %tmp9618 = getelementptr inbounds float* %tmp9617, i64 1
+  %tmp9619 = getelementptr inbounds float* %tmp9618, i64 1
+  %tmp9620 = getelementptr inbounds float* %tmp9619, i64 1
+  %tmp9621 = getelementptr inbounds float* %tmp9620, i64 1
+  %tmp9622 = getelementptr inbounds float* %tmp9621, i64 1
+  %tmp9623 = getelementptr inbounds float* %tmp9622, i64 1
+  %tmp9624 = getelementptr inbounds float* %tmp9623, i64 1
+  %tmp9625 = getelementptr inbounds float* %tmp9624, i64 1
+  %tmp9626 = getelementptr inbounds float* %tmp9625, i64 1
+  %tmp9627 = getelementptr inbounds float* %tmp9626, i64 1
+  %tmp9628 = getelementptr inbounds float* %tmp9627, i64 1
+  %tmp9629 = getelementptr inbounds float* %tmp9628, i64 1
+  %tmp9630 = getelementptr inbounds float* %tmp9629, i64 1
+  %tmp9631 = getelementptr inbounds float* %tmp9630, i64 1
+  %tmp9632 = getelementptr inbounds float* %tmp9631, i64 1
+  %tmp9633 = getelementptr inbounds float* %tmp9632, i64 1
+  %tmp9634 = getelementptr inbounds float* %tmp9633, i64 1
+  %tmp9635 = getelementptr inbounds float* %tmp9634, i64 1
+  %tmp9636 = getelementptr inbounds float* %tmp9635, i64 1
+  %tmp9637 = getelementptr inbounds float* %tmp9636, i64 1
+  %tmp9638 = getelementptr inbounds float* %tmp9637, i64 1
+  %tmp9639 = getelementptr inbounds float* %tmp9638, i64 1
+  %tmp9640 = getelementptr inbounds float* %tmp9639, i64 1
+  %tmp9641 = getelementptr inbounds float* %tmp9640, i64 1
+  %tmp9642 = getelementptr inbounds float* %tmp9641, i64 1
+  %tmp9643 = getelementptr inbounds float* %tmp9642, i64 1
+  %tmp9644 = getelementptr inbounds float* %tmp9643, i64 1
+  %tmp9645 = getelementptr inbounds float* %tmp9644, i64 1
+  %tmp9646 = getelementptr inbounds float* %tmp9645, i64 1
+  %tmp9647 = getelementptr inbounds float* %tmp9646, i64 1
+  %tmp9648 = getelementptr inbounds float* %tmp9647, i64 1
+  %tmp9649 = getelementptr inbounds float* %tmp9648, i64 1
+  %tmp9650 = getelementptr inbounds float* %tmp9649, i64 1
+  %tmp9651 = getelementptr inbounds float* %tmp9650, i64 1
+  %tmp9652 = getelementptr inbounds float* %tmp9651, i64 1
+  %tmp9653 = getelementptr inbounds float* %tmp9652, i64 1
+  %tmp9654 = getelementptr inbounds float* %tmp9653, i64 1
+  %tmp9655 = getelementptr inbounds float* %tmp9654, i64 1
+  %tmp9656 = getelementptr inbounds float* %tmp9655, i64 1
+  %tmp9657 = getelementptr inbounds float* %tmp9656, i64 1
+  %tmp9658 = getelementptr inbounds float* %tmp9657, i64 1
+  %tmp9659 = getelementptr inbounds float* %tmp9658, i64 1
+  %tmp9660 = getelementptr inbounds float* %tmp9659, i64 1
+  %tmp9661 = getelementptr inbounds float* %tmp9660, i64 1
+  %tmp9662 = getelementptr inbounds float* %tmp9661, i64 1
+  %tmp9663 = getelementptr inbounds float* %tmp9662, i64 1
+  %tmp9664 = getelementptr inbounds float* %tmp9663, i64 1
+  %tmp9665 = getelementptr inbounds float* %tmp9664, i64 1
+  %tmp9666 = getelementptr inbounds float* %tmp9665, i64 1
+  %tmp9667 = getelementptr inbounds float* %tmp9666, i64 1
+  %tmp9668 = getelementptr inbounds float* %tmp9667, i64 1
+  %tmp9669 = getelementptr inbounds float* %tmp9668, i64 1
+  %tmp9670 = getelementptr inbounds float* %tmp9669, i64 1
+  %tmp9671 = getelementptr inbounds float* %tmp9670, i64 1
+  %tmp9672 = getelementptr inbounds float* %tmp9671, i64 1
+  %tmp9673 = getelementptr inbounds float* %tmp9672, i64 1
+  %tmp9674 = getelementptr inbounds float* %tmp9673, i64 1
+  %tmp9675 = getelementptr inbounds float* %tmp9674, i64 1
+  %tmp9676 = getelementptr inbounds float* %tmp9675, i64 1
+  %tmp9677 = getelementptr inbounds float* %tmp9676, i64 1
+  %tmp9678 = getelementptr inbounds float* %tmp9677, i64 1
+  %tmp9679 = getelementptr inbounds float* %tmp9678, i64 1
+  %tmp9680 = getelementptr inbounds float* %tmp9679, i64 1
+  %tmp9681 = getelementptr inbounds float* %tmp9680, i64 1
+  %tmp9682 = getelementptr inbounds float* %tmp9681, i64 1
+  %tmp9683 = getelementptr inbounds float* %tmp9682, i64 1
+  %tmp9684 = getelementptr inbounds float* %tmp9683, i64 1
+  %tmp9685 = getelementptr inbounds float* %tmp9684, i64 1
+  %tmp9686 = getelementptr inbounds float* %tmp9685, i64 1
+  %tmp9687 = getelementptr inbounds float* %tmp9686, i64 1
+  %tmp9688 = getelementptr inbounds float* %tmp9687, i64 1
+  %tmp9689 = getelementptr inbounds float* %tmp9688, i64 1
+  %tmp9690 = getelementptr inbounds float* %tmp9689, i64 1
+  %tmp9691 = getelementptr inbounds float* %tmp9690, i64 1
+  %tmp9692 = getelementptr inbounds float* %tmp9691, i64 1
+  %tmp9693 = getelementptr inbounds float* %tmp9692, i64 1
+  %tmp9694 = getelementptr inbounds float* %tmp9693, i64 1
+  %tmp9695 = getelementptr inbounds float* %tmp9694, i64 1
+  %tmp9696 = getelementptr inbounds float* %tmp9695, i64 1
+  %tmp9697 = getelementptr inbounds float* %tmp9696, i64 1
+  %tmp9698 = getelementptr inbounds float* %tmp9697, i64 1
+  %tmp9699 = getelementptr inbounds float* %tmp9698, i64 1
+  %tmp9700 = getelementptr inbounds float* %tmp9699, i64 1
+  %tmp9701 = getelementptr inbounds float* %tmp9700, i64 1
+  %tmp9702 = getelementptr inbounds float* %tmp9701, i64 1
+  %tmp9703 = getelementptr inbounds float* %tmp9702, i64 1
+  %tmp9704 = getelementptr inbounds float* %tmp9703, i64 1
+  %tmp9705 = getelementptr inbounds float* %tmp9704, i64 1
+  %tmp9706 = getelementptr inbounds float* %tmp9705, i64 1
+  %tmp9707 = getelementptr inbounds float* %tmp9706, i64 1
+  %tmp9708 = getelementptr inbounds float* %tmp9707, i64 1
+  %tmp9709 = getelementptr inbounds float* %tmp9708, i64 1
+  %tmp9710 = getelementptr inbounds float* %tmp9709, i64 1
+  %tmp9711 = getelementptr inbounds float* %tmp9710, i64 1
+  %tmp9712 = getelementptr inbounds float* %tmp9711, i64 1
+  %tmp9713 = getelementptr inbounds float* %tmp9712, i64 1
+  %tmp9714 = getelementptr inbounds float* %tmp9713, i64 1
+  %tmp9715 = getelementptr inbounds float* %tmp9714, i64 1
+  %tmp9716 = getelementptr inbounds float* %tmp9715, i64 1
+  %tmp9717 = getelementptr inbounds float* %tmp9716, i64 1
+  %tmp9718 = getelementptr inbounds float* %tmp9717, i64 1
+  %tmp9719 = getelementptr inbounds float* %tmp9718, i64 1
+  %tmp9720 = getelementptr inbounds float* %tmp9719, i64 1
+  %tmp9721 = getelementptr inbounds float* %tmp9720, i64 1
+  %tmp9722 = getelementptr inbounds float* %tmp9721, i64 1
+  %tmp9723 = getelementptr inbounds float* %tmp9722, i64 1
+  %tmp9724 = getelementptr inbounds float* %tmp9723, i64 1
+  %tmp9725 = getelementptr inbounds float* %tmp9724, i64 1
+  %tmp9726 = getelementptr inbounds float* %tmp9725, i64 1
+  %tmp9727 = getelementptr inbounds float* %tmp9726, i64 1
+  %tmp9728 = getelementptr inbounds float* %tmp9727, i64 1
+  %tmp9729 = getelementptr inbounds float* %tmp9728, i64 1
+  %tmp9730 = getelementptr inbounds float* %tmp9729, i64 1
+  %tmp9731 = getelementptr inbounds float* %tmp9730, i64 1
+  %tmp9732 = getelementptr inbounds float* %tmp9731, i64 1
+  %tmp9733 = getelementptr inbounds float* %tmp9732, i64 1
+  %tmp9734 = getelementptr inbounds float* %tmp9733, i64 1
+  %tmp9735 = getelementptr inbounds float* %tmp9734, i64 1
+  %tmp9736 = getelementptr inbounds float* %tmp9735, i64 1
+  %tmp9737 = getelementptr inbounds float* %tmp9736, i64 1
+  %tmp9738 = getelementptr inbounds float* %tmp9737, i64 1
+  %tmp9739 = getelementptr inbounds float* %tmp9738, i64 1
+  %tmp9740 = getelementptr inbounds float* %tmp9739, i64 1
+  %tmp9741 = getelementptr inbounds float* %tmp9740, i64 1
+  %tmp9742 = getelementptr inbounds float* %tmp9741, i64 1
+  %tmp9743 = getelementptr inbounds float* %tmp9742, i64 1
+  %tmp9744 = getelementptr inbounds float* %tmp9743, i64 1
+  %tmp9745 = getelementptr inbounds float* %tmp9744, i64 1
+  %tmp9746 = getelementptr inbounds float* %tmp9745, i64 1
+  %tmp9747 = getelementptr inbounds float* %tmp9746, i64 1
+  %tmp9748 = getelementptr inbounds float* %tmp9747, i64 1
+  %tmp9749 = getelementptr inbounds float* %tmp9748, i64 1
+  %tmp9750 = getelementptr inbounds float* %tmp9749, i64 1
+  %tmp9751 = getelementptr inbounds float* %tmp9750, i64 1
+  %tmp9752 = getelementptr inbounds float* %tmp9751, i64 1
+  %tmp9753 = getelementptr inbounds float* %tmp9752, i64 1
+  %tmp9754 = getelementptr inbounds float* %tmp9753, i64 1
+  %tmp9755 = getelementptr inbounds float* %tmp9754, i64 1
+  %tmp9756 = getelementptr inbounds float* %tmp9755, i64 1
+  %tmp9757 = getelementptr inbounds float* %tmp9756, i64 1
+  %tmp9758 = getelementptr inbounds float* %tmp9757, i64 1
+  %tmp9759 = getelementptr inbounds float* %tmp9758, i64 1
+  %tmp9760 = getelementptr inbounds float* %tmp9759, i64 1
+  %tmp9761 = getelementptr inbounds float* %tmp9760, i64 1
+  %tmp9762 = getelementptr inbounds float* %tmp9761, i64 1
+  %tmp9763 = getelementptr inbounds float* %tmp9762, i64 1
+  %tmp9764 = getelementptr inbounds float* %tmp9763, i64 1
+  %tmp9765 = getelementptr inbounds float* %tmp9764, i64 1
+  %tmp9766 = getelementptr inbounds float* %tmp9765, i64 1
+  %tmp9767 = getelementptr inbounds float* %tmp9766, i64 1
+  %tmp9768 = getelementptr inbounds float* %tmp9767, i64 1
+  %tmp9769 = getelementptr inbounds float* %tmp9768, i64 1
+  %tmp9770 = getelementptr inbounds float* %tmp9769, i64 1
+  %tmp9771 = getelementptr inbounds float* %tmp9770, i64 1
+  %tmp9772 = getelementptr inbounds float* %tmp9771, i64 1
+  %tmp9773 = getelementptr inbounds float* %tmp9772, i64 1
+  %tmp9774 = getelementptr inbounds float* %tmp9773, i64 1
+  %tmp9775 = getelementptr inbounds float* %tmp9774, i64 1
+  %tmp9776 = getelementptr inbounds float* %tmp9775, i64 1
+  %tmp9777 = getelementptr inbounds float* %tmp9776, i64 1
+  %tmp9778 = getelementptr inbounds float* %tmp9777, i64 1
+  %tmp9779 = getelementptr inbounds float* %tmp9778, i64 1
+  %tmp9780 = getelementptr inbounds float* %tmp9779, i64 1
+  %tmp9781 = getelementptr inbounds float* %tmp9780, i64 1
+  %tmp9782 = getelementptr inbounds float* %tmp9781, i64 1
+  %tmp9783 = getelementptr inbounds float* %tmp9782, i64 1
+  %tmp9784 = getelementptr inbounds float* %tmp9783, i64 1
+  %tmp9785 = getelementptr inbounds float* %tmp9784, i64 1
+  %tmp9786 = getelementptr inbounds float* %tmp9785, i64 1
+  %tmp9787 = getelementptr inbounds float* %tmp9786, i64 1
+  %tmp9788 = getelementptr inbounds float* %tmp9787, i64 1
+  %tmp9789 = getelementptr inbounds float* %tmp9788, i64 1
+  %tmp9790 = getelementptr inbounds float* %tmp9789, i64 1
+  %tmp9791 = getelementptr inbounds float* %tmp9790, i64 1
+  %tmp9792 = getelementptr inbounds float* %tmp9791, i64 1
+  %tmp9793 = getelementptr inbounds float* %tmp9792, i64 1
+  %tmp9794 = getelementptr inbounds float* %tmp9793, i64 1
+  %tmp9795 = getelementptr inbounds float* %tmp9794, i64 1
+  %tmp9796 = getelementptr inbounds float* %tmp9795, i64 1
+  %tmp9797 = getelementptr inbounds float* %tmp9796, i64 1
+  %tmp9798 = getelementptr inbounds float* %tmp9797, i64 1
+  %tmp9799 = getelementptr inbounds float* %tmp9798, i64 1
+  %tmp9800 = getelementptr inbounds float* %tmp9799, i64 1
+  %tmp9801 = getelementptr inbounds float* %tmp9800, i64 1
+  %tmp9802 = getelementptr inbounds float* %tmp9801, i64 1
+  %tmp9803 = getelementptr inbounds float* %tmp9802, i64 1
+  %tmp9804 = getelementptr inbounds float* %tmp9803, i64 1
+  %tmp9805 = getelementptr inbounds float* %tmp9804, i64 1
+  %tmp9806 = getelementptr inbounds float* %tmp9805, i64 1
+  %tmp9807 = getelementptr inbounds float* %tmp9806, i64 1
+  %tmp9808 = getelementptr inbounds float* %tmp9807, i64 1
+  %tmp9809 = getelementptr inbounds float* %tmp9808, i64 1
+  %tmp9810 = getelementptr inbounds float* %tmp9809, i64 1
+  %tmp9811 = getelementptr inbounds float* %tmp9810, i64 1
+  %tmp9812 = getelementptr inbounds float* %tmp9811, i64 1
+  %tmp9813 = getelementptr inbounds float* %tmp9812, i64 1
+  %tmp9814 = getelementptr inbounds float* %tmp9813, i64 1
+  %tmp9815 = getelementptr inbounds float* %tmp9814, i64 1
+  %tmp9816 = getelementptr inbounds float* %tmp9815, i64 1
+  %tmp9817 = getelementptr inbounds float* %tmp9816, i64 1
+  %tmp9818 = getelementptr inbounds float* %tmp9817, i64 1
+  %tmp9819 = getelementptr inbounds float* %tmp9818, i64 1
+  %tmp9820 = getelementptr inbounds float* %tmp9819, i64 1
+  %tmp9821 = getelementptr inbounds float* %tmp9820, i64 1
+  %tmp9822 = getelementptr inbounds float* %tmp9821, i64 1
+  %tmp9823 = getelementptr inbounds float* %tmp9822, i64 1
+  %tmp9824 = getelementptr inbounds float* %tmp9823, i64 1
+  %tmp9825 = getelementptr inbounds float* %tmp9824, i64 1
+  %tmp9826 = getelementptr inbounds float* %tmp9825, i64 1
+  %tmp9827 = getelementptr inbounds float* %tmp9826, i64 1
+  %tmp9828 = getelementptr inbounds float* %tmp9827, i64 1
+  %tmp9829 = getelementptr inbounds float* %tmp9828, i64 1
+  %tmp9830 = getelementptr inbounds float* %tmp9829, i64 1
+  %tmp9831 = getelementptr inbounds float* %tmp9830, i64 1
+  %tmp9832 = getelementptr inbounds float* %tmp9831, i64 1
+  %tmp9833 = getelementptr inbounds float* %tmp9832, i64 1
+  %tmp9834 = getelementptr inbounds float* %tmp9833, i64 1
+  %tmp9835 = getelementptr inbounds float* %tmp9834, i64 1
+  %tmp9836 = getelementptr inbounds float* %tmp9835, i64 1
+  %tmp9837 = getelementptr inbounds float* %tmp9836, i64 1
+  %tmp9838 = getelementptr inbounds float* %tmp9837, i64 1
+  %tmp9839 = getelementptr inbounds float* %tmp9838, i64 1
+  %tmp9840 = getelementptr inbounds float* %tmp9839, i64 1
+  %tmp9841 = getelementptr inbounds float* %tmp9840, i64 1
+  %tmp9842 = getelementptr inbounds float* %tmp9841, i64 1
+  %tmp9843 = getelementptr inbounds float* %tmp9842, i64 1
+  %tmp9844 = getelementptr inbounds float* %tmp9843, i64 1
+  %tmp9845 = getelementptr inbounds float* %tmp9844, i64 1
+  %tmp9846 = getelementptr inbounds float* %tmp9845, i64 1
+  %tmp9847 = getelementptr inbounds float* %tmp9846, i64 1
+  %tmp9848 = getelementptr inbounds float* %tmp9847, i64 1
+  %tmp9849 = getelementptr inbounds float* %tmp9848, i64 1
+  %tmp9850 = getelementptr inbounds float* %tmp9849, i64 1
+  %tmp9851 = getelementptr inbounds float* %tmp9850, i64 1
+  %tmp9852 = getelementptr inbounds float* %tmp9851, i64 1
+  %tmp9853 = getelementptr inbounds float* %tmp9852, i64 1
+  %tmp9854 = getelementptr inbounds float* %tmp9853, i64 1
+  %tmp9855 = getelementptr inbounds float* %tmp9854, i64 1
+  %tmp9856 = getelementptr inbounds float* %tmp9855, i64 1
+  %tmp9857 = getelementptr inbounds float* %tmp9856, i64 1
+  %tmp9858 = getelementptr inbounds float* %tmp9857, i64 1
+  %tmp9859 = getelementptr inbounds float* %tmp9858, i64 1
+  %tmp9860 = getelementptr inbounds float* %tmp9859, i64 1
+  %tmp9861 = getelementptr inbounds float* %tmp9860, i64 1
+  %tmp9862 = getelementptr inbounds float* %tmp9861, i64 1
+  %tmp9863 = getelementptr inbounds float* %tmp9862, i64 1
+  %tmp9864 = getelementptr inbounds float* %tmp9863, i64 1
+  %tmp9865 = getelementptr inbounds float* %tmp9864, i64 1
+  %tmp9866 = getelementptr inbounds float* %tmp9865, i64 1
+  %tmp9867 = getelementptr inbounds float* %tmp9866, i64 1
+  %tmp9868 = getelementptr inbounds float* %tmp9867, i64 1
+  %tmp9869 = getelementptr inbounds float* %tmp9868, i64 1
+  %tmp9870 = getelementptr inbounds float* %tmp9869, i64 1
+  %tmp9871 = getelementptr inbounds float* %tmp9870, i64 1
+  %tmp9872 = getelementptr inbounds float* %tmp9871, i64 1
+  %tmp9873 = getelementptr inbounds float* %tmp9872, i64 1
+  %tmp9874 = getelementptr inbounds float* %tmp9873, i64 1
+  %tmp9875 = getelementptr inbounds float* %tmp9874, i64 1
+  %tmp9876 = getelementptr inbounds float* %tmp9875, i64 1
+  %tmp9877 = getelementptr inbounds float* %tmp9876, i64 1
+  %tmp9878 = getelementptr inbounds float* %tmp9877, i64 1
+  %tmp9879 = getelementptr inbounds float* %tmp9878, i64 1
+  %tmp9880 = getelementptr inbounds float* %tmp9879, i64 1
+  %tmp9881 = getelementptr inbounds float* %tmp9880, i64 1
+  %tmp9882 = getelementptr inbounds float* %tmp9881, i64 1
+  %tmp9883 = getelementptr inbounds float* %tmp9882, i64 1
+  %tmp9884 = getelementptr inbounds float* %tmp9883, i64 1
+  %tmp9885 = getelementptr inbounds float* %tmp9884, i64 1
+  %tmp9886 = getelementptr inbounds float* %tmp9885, i64 1
+  %tmp9887 = getelementptr inbounds float* %tmp9886, i64 1
+  %tmp9888 = getelementptr inbounds float* %tmp9887, i64 1
+  %tmp9889 = getelementptr inbounds float* %tmp9888, i64 1
+  %tmp9890 = getelementptr inbounds float* %tmp9889, i64 1
+  %tmp9891 = getelementptr inbounds float* %tmp9890, i64 1
+  %tmp9892 = getelementptr inbounds float* %tmp9891, i64 1
+  %tmp9893 = getelementptr inbounds float* %tmp9892, i64 1
+  %tmp9894 = getelementptr inbounds float* %tmp9893, i64 1
+  %tmp9895 = getelementptr inbounds float* %tmp9894, i64 1
+  %tmp9896 = getelementptr inbounds float* %tmp9895, i64 1
+  %tmp9897 = getelementptr inbounds float* %tmp9896, i64 1
+  %tmp9898 = getelementptr inbounds float* %tmp9897, i64 1
+  %tmp9899 = getelementptr inbounds float* %tmp9898, i64 1
+  %tmp9900 = getelementptr inbounds float* %tmp9899, i64 1
+  %tmp9901 = getelementptr inbounds float* %tmp9900, i64 1
+  %tmp9902 = getelementptr inbounds float* %tmp9901, i64 1
+  %tmp9903 = getelementptr inbounds float* %tmp9902, i64 1
+  %tmp9904 = getelementptr inbounds float* %tmp9903, i64 1
+  %tmp9905 = getelementptr inbounds float* %tmp9904, i64 1
+  %tmp9906 = getelementptr inbounds float* %tmp9905, i64 1
+  %tmp9907 = getelementptr inbounds float* %tmp9906, i64 1
+  %tmp9908 = getelementptr inbounds float* %tmp9907, i64 1
+  %tmp9909 = getelementptr inbounds float* %tmp9908, i64 1
+  %tmp9910 = getelementptr inbounds float* %tmp9909, i64 1
+  %tmp9911 = getelementptr inbounds float* %tmp9910, i64 1
+  %tmp9912 = getelementptr inbounds float* %tmp9911, i64 1
+  %tmp9913 = getelementptr inbounds float* %tmp9912, i64 1
+  %tmp9914 = getelementptr inbounds float* %tmp9913, i64 1
+  %tmp9915 = getelementptr inbounds float* %tmp9914, i64 1
+  %tmp9916 = getelementptr inbounds float* %tmp9915, i64 1
+  %tmp9917 = getelementptr inbounds float* %tmp9916, i64 1
+  %tmp9918 = getelementptr inbounds float* %tmp9917, i64 1
+  %tmp9919 = getelementptr inbounds float* %tmp9918, i64 1
+  %tmp9920 = getelementptr inbounds float* %tmp9919, i64 1
+  %tmp9921 = getelementptr inbounds float* %tmp9920, i64 1
+  %tmp9922 = getelementptr inbounds float* %tmp9921, i64 1
+  %tmp9923 = getelementptr inbounds float* %tmp9922, i64 1
+  %tmp9924 = getelementptr inbounds float* %tmp9923, i64 1
+  %tmp9925 = getelementptr inbounds float* %tmp9924, i64 1
+  %tmp9926 = getelementptr inbounds float* %tmp9925, i64 1
+  %tmp9927 = getelementptr inbounds float* %tmp9926, i64 1
+  %tmp9928 = getelementptr inbounds float* %tmp9927, i64 1
+  %tmp9929 = getelementptr inbounds float* %tmp9928, i64 1
+  %tmp9930 = getelementptr inbounds float* %tmp9929, i64 1
+  %tmp9931 = getelementptr inbounds float* %tmp9930, i64 1
+  %tmp9932 = getelementptr inbounds float* %tmp9931, i64 1
+  %tmp9933 = getelementptr inbounds float* %tmp9932, i64 1
+  %tmp9934 = getelementptr inbounds float* %tmp9933, i64 1
+  %tmp9935 = getelementptr inbounds float* %tmp9934, i64 1
+  %tmp9936 = getelementptr inbounds float* %tmp9935, i64 1
+  %tmp9937 = getelementptr inbounds float* %tmp9936, i64 1
+  %tmp9938 = getelementptr inbounds float* %tmp9937, i64 1
+  %tmp9939 = getelementptr inbounds float* %tmp9938, i64 1
+  %tmp9940 = getelementptr inbounds float* %tmp9939, i64 1
+  %tmp9941 = getelementptr inbounds float* %tmp9940, i64 1
+  %tmp9942 = getelementptr inbounds float* %tmp9941, i64 1
+  %tmp9943 = getelementptr inbounds float* %tmp9942, i64 1
+  %tmp9944 = getelementptr inbounds float* %tmp9943, i64 1
+  %tmp9945 = getelementptr inbounds float* %tmp9944, i64 1
+  %tmp9946 = getelementptr inbounds float* %tmp9945, i64 1
+  %tmp9947 = getelementptr inbounds float* %tmp9946, i64 1
+  %tmp9948 = getelementptr inbounds float* %tmp9947, i64 1
+  %tmp9949 = getelementptr inbounds float* %tmp9948, i64 1
+  %tmp9950 = getelementptr inbounds float* %tmp9949, i64 1
+  %tmp9951 = getelementptr inbounds float* %tmp9950, i64 1
+  %tmp9952 = getelementptr inbounds float* %tmp9951, i64 1
+  %tmp9953 = getelementptr inbounds float* %tmp9952, i64 1
+  %tmp9954 = getelementptr inbounds float* %tmp9953, i64 1
+  %tmp9955 = getelementptr inbounds float* %tmp9954, i64 1
+  %tmp9956 = getelementptr inbounds float* %tmp9955, i64 1
+  %tmp9957 = getelementptr inbounds float* %tmp9956, i64 1
+  %tmp9958 = getelementptr inbounds float* %tmp9957, i64 1
+  %tmp9959 = getelementptr inbounds float* %tmp9958, i64 1
+  %tmp9960 = getelementptr inbounds float* %tmp9959, i64 1
+  %tmp9961 = getelementptr inbounds float* %tmp9960, i64 1
+  %tmp9962 = getelementptr inbounds float* %tmp9961, i64 1
+  %tmp9963 = getelementptr inbounds float* %tmp9962, i64 1
+  %tmp9964 = getelementptr inbounds float* %tmp9963, i64 1
+  %tmp9965 = getelementptr inbounds float* %tmp9964, i64 1
+  %tmp9966 = getelementptr inbounds float* %tmp9965, i64 1
+  %tmp9967 = getelementptr inbounds float* %tmp9966, i64 1
+  %tmp9968 = getelementptr inbounds float* %tmp9967, i64 1
+  %tmp9969 = getelementptr inbounds float* %tmp9968, i64 1
+  %tmp9970 = getelementptr inbounds float* %tmp9969, i64 1
+  %tmp9971 = getelementptr inbounds float* %tmp9970, i64 1
+  %tmp9972 = getelementptr inbounds float* %tmp9971, i64 1
+  %tmp9973 = getelementptr inbounds float* %tmp9972, i64 1
+  %tmp9974 = getelementptr inbounds float* %tmp9973, i64 1
+  %tmp9975 = getelementptr inbounds float* %tmp9974, i64 1
+  %tmp9976 = getelementptr inbounds float* %tmp9975, i64 1
+  %tmp9977 = getelementptr inbounds float* %tmp9976, i64 1
+  %tmp9978 = getelementptr inbounds float* %tmp9977, i64 1
+  %tmp9979 = getelementptr inbounds float* %tmp9978, i64 1
+  %tmp9980 = getelementptr inbounds float* %tmp9979, i64 1
+  %tmp9981 = getelementptr inbounds float* %tmp9980, i64 1
+  %tmp9982 = getelementptr inbounds float* %tmp9981, i64 1
+  %tmp9983 = getelementptr inbounds float* %tmp9982, i64 1
+  %tmp9984 = getelementptr inbounds float* %tmp9983, i64 1
+  %tmp9985 = getelementptr inbounds float* %tmp9984, i64 1
+  %tmp9986 = getelementptr inbounds float* %tmp9985, i64 1
+  %tmp9987 = getelementptr inbounds float* %tmp9986, i64 1
+  %tmp9988 = getelementptr inbounds float* %tmp9987, i64 1
+  %tmp9989 = getelementptr inbounds float* %tmp9988, i64 1
+  %tmp9990 = getelementptr inbounds float* %tmp9989, i64 1
+  %tmp9991 = getelementptr inbounds float* %tmp9990, i64 1
+  %tmp9992 = getelementptr inbounds float* %tmp9991, i64 1
+  %tmp9993 = getelementptr inbounds float* %tmp9992, i64 1
+  %tmp9994 = getelementptr inbounds float* %tmp9993, i64 1
+  %tmp9995 = getelementptr inbounds float* %tmp9994, i64 1
+  %tmp9996 = getelementptr inbounds float* %tmp9995, i64 1
+  %tmp9997 = getelementptr inbounds float* %tmp9996, i64 1
+  %tmp9998 = getelementptr inbounds float* %tmp9997, i64 1
+  %tmp9999 = getelementptr inbounds float* %tmp9998, i64 1
+  %tmp10000 = getelementptr inbounds float* %tmp9999, i64 1
+  %tmp10001 = getelementptr inbounds float* %tmp10000, i64 1
+  %tmp10002 = getelementptr inbounds float* %tmp10001, i64 1
+  %tmp10003 = getelementptr inbounds float* %tmp10002, i64 1
+  %tmp10004 = getelementptr inbounds float* %tmp10003, i64 1
+  %tmp10005 = getelementptr inbounds float* %tmp10004, i64 1
+  %tmp10006 = getelementptr inbounds float* %tmp10005, i64 1
+  %tmp10007 = getelementptr inbounds float* %tmp10006, i64 1
+  %tmp10008 = getelementptr inbounds float* %tmp10007, i64 1
+  %tmp10009 = getelementptr inbounds float* %tmp10008, i64 1
+  %tmp10010 = getelementptr inbounds float* %tmp10009, i64 1
+  %tmp10011 = getelementptr inbounds float* %tmp10010, i64 1
+  %tmp10012 = getelementptr inbounds float* %tmp10011, i64 1
+  %tmp10013 = getelementptr inbounds float* %tmp10012, i64 1
+  %tmp10014 = getelementptr inbounds float* %tmp10013, i64 1
+  %tmp10015 = getelementptr inbounds float* %tmp10014, i64 1
+  %tmp10016 = getelementptr inbounds float* %tmp10015, i64 1
+  %tmp10017 = getelementptr inbounds float* %tmp10016, i64 1
+  %tmp10018 = getelementptr inbounds float* %tmp10017, i64 1
+  %tmp10019 = getelementptr inbounds float* %tmp10018, i64 1
+  %tmp10020 = getelementptr inbounds float* %tmp10019, i64 1
+  %tmp10021 = getelementptr inbounds float* %tmp10020, i64 1
+  %tmp10022 = getelementptr inbounds float* %tmp10021, i64 1
+  %tmp10023 = getelementptr inbounds float* %tmp10022, i64 1
+  %tmp10024 = getelementptr inbounds float* %tmp10023, i64 1
+  %tmp10025 = getelementptr inbounds float* %tmp10024, i64 1
+  %tmp10026 = getelementptr inbounds float* %tmp10025, i64 1
+  %tmp10027 = getelementptr inbounds float* %tmp10026, i64 1
+  %tmp10028 = getelementptr inbounds float* %tmp10027, i64 1
+  %tmp10029 = getelementptr inbounds float* %tmp10028, i64 1
+  %tmp10030 = getelementptr inbounds float* %tmp10029, i64 1
+  %tmp10031 = getelementptr inbounds float* %tmp10030, i64 1
+  %tmp10032 = getelementptr inbounds float* %tmp10031, i64 1
+  %tmp10033 = getelementptr inbounds float* %tmp10032, i64 1
+  %tmp10034 = getelementptr inbounds float* %tmp10033, i64 1
+  %tmp10035 = getelementptr inbounds float* %tmp10034, i64 1
+  %tmp10036 = getelementptr inbounds float* %tmp10035, i64 1
+  %tmp10037 = getelementptr inbounds float* %tmp10036, i64 1
+  %tmp10038 = getelementptr inbounds float* %tmp10037, i64 1
+  %tmp10039 = getelementptr inbounds float* %tmp10038, i64 1
+  %tmp10040 = getelementptr inbounds float* %tmp10039, i64 1
+  %tmp10041 = getelementptr inbounds float* %tmp10040, i64 1
+  %tmp10042 = getelementptr inbounds float* %tmp10041, i64 1
+  %tmp10043 = getelementptr inbounds float* %tmp10042, i64 1
+  %tmp10044 = getelementptr inbounds float* %tmp10043, i64 1
+  %tmp10045 = getelementptr inbounds float* %tmp10044, i64 1
+  %tmp10046 = getelementptr inbounds float* %tmp10045, i64 1
+  %tmp10047 = getelementptr inbounds float* %tmp10046, i64 1
+  %tmp10048 = getelementptr inbounds float* %tmp10047, i64 1
+  %tmp10049 = getelementptr inbounds float* %tmp10048, i64 1
+  %tmp10050 = getelementptr inbounds float* %tmp10049, i64 1
+  %tmp10051 = getelementptr inbounds float* %tmp10050, i64 1
+  %tmp10052 = getelementptr inbounds float* %tmp10051, i64 1
+  %tmp10053 = getelementptr inbounds float* %tmp10052, i64 1
+  %tmp10054 = getelementptr inbounds float* %tmp10053, i64 1
+  %tmp10055 = getelementptr inbounds float* %tmp10054, i64 1
+  %tmp10056 = getelementptr inbounds float* %tmp10055, i64 1
+  %tmp10057 = getelementptr inbounds float* %tmp10056, i64 1
+  %tmp10058 = getelementptr inbounds float* %tmp10057, i64 1
+  %tmp10059 = getelementptr inbounds float* %tmp10058, i64 1
+  %tmp10060 = getelementptr inbounds float* %tmp10059, i64 1
+  %tmp10061 = getelementptr inbounds float* %tmp10060, i64 1
+  %tmp10062 = getelementptr inbounds float* %tmp10061, i64 1
+  %tmp10063 = getelementptr inbounds float* %tmp10062, i64 1
+  %tmp10064 = getelementptr inbounds float* %tmp10063, i64 1
+  %tmp10065 = getelementptr inbounds float* %tmp10064, i64 1
+  %tmp10066 = getelementptr inbounds float* %tmp10065, i64 1
+  %tmp10067 = getelementptr inbounds float* %tmp10066, i64 1
+  %tmp10068 = getelementptr inbounds float* %tmp10067, i64 1
+  %tmp10069 = getelementptr inbounds float* %tmp10068, i64 1
+  %tmp10070 = getelementptr inbounds float* %tmp10069, i64 1
+  %tmp10071 = getelementptr inbounds float* %tmp10070, i64 1
+  %tmp10072 = getelementptr inbounds float* %tmp10071, i64 1
+  %tmp10073 = getelementptr inbounds float* %tmp10072, i64 1
+  %tmp10074 = getelementptr inbounds float* %tmp10073, i64 1
+  %tmp10075 = getelementptr inbounds float* %tmp10074, i64 1
+  %tmp10076 = getelementptr inbounds float* %tmp10075, i64 1
+  %tmp10077 = getelementptr inbounds float* %tmp10076, i64 1
+  %tmp10078 = getelementptr inbounds float* %tmp10077, i64 1
+  %tmp10079 = getelementptr inbounds float* %tmp10078, i64 1
+  %tmp10080 = getelementptr inbounds float* %tmp10079, i64 1
+  %tmp10081 = getelementptr inbounds float* %tmp10080, i64 1
+  %tmp10082 = getelementptr inbounds float* %tmp10081, i64 1
+  %tmp10083 = getelementptr inbounds float* %tmp10082, i64 1
+  %tmp10084 = getelementptr inbounds float* %tmp10083, i64 1
+  %tmp10085 = getelementptr inbounds float* %tmp10084, i64 1
+  %tmp10086 = getelementptr inbounds float* %tmp10085, i64 1
+  %tmp10087 = getelementptr inbounds float* %tmp10086, i64 1
+  %tmp10088 = getelementptr inbounds float* %tmp10087, i64 1
+  %tmp10089 = getelementptr inbounds float* %tmp10088, i64 1
+  %tmp10090 = getelementptr inbounds float* %tmp10089, i64 1
+  %tmp10091 = getelementptr inbounds float* %tmp10090, i64 1
+  %tmp10092 = getelementptr inbounds float* %tmp10091, i64 1
+  %tmp10093 = getelementptr inbounds float* %tmp10092, i64 1
+  %tmp10094 = getelementptr inbounds float* %tmp10093, i64 1
+  %tmp10095 = getelementptr inbounds float* %tmp10094, i64 1
+  %tmp10096 = getelementptr inbounds float* %tmp10095, i64 1
+  %tmp10097 = getelementptr inbounds float* %tmp10096, i64 1
+  %tmp10098 = getelementptr inbounds float* %tmp10097, i64 1
+  %tmp10099 = getelementptr inbounds float* %tmp10098, i64 1
+  %tmp10100 = getelementptr inbounds float* %tmp10099, i64 1
+  %tmp10101 = getelementptr inbounds float* %tmp10100, i64 1
+  %tmp10102 = getelementptr inbounds float* %tmp10101, i64 1
+  %tmp10103 = getelementptr inbounds float* %tmp10102, i64 1
+  %tmp10104 = getelementptr inbounds float* %tmp10103, i64 1
+  %tmp10105 = getelementptr inbounds float* %tmp10104, i64 1
+  %tmp10106 = getelementptr inbounds float* %tmp10105, i64 1
+  %tmp10107 = getelementptr inbounds float* %tmp10106, i64 1
+  %tmp10108 = getelementptr inbounds float* %tmp10107, i64 1
+  %tmp10109 = getelementptr inbounds float* %tmp10108, i64 1
+  %tmp10110 = getelementptr inbounds float* %tmp10109, i64 1
+  %tmp10111 = getelementptr inbounds float* %tmp10110, i64 1
+  %tmp10112 = getelementptr inbounds float* %tmp10111, i64 1
+  %tmp10113 = getelementptr inbounds float* %tmp10112, i64 1
+  %tmp10114 = getelementptr inbounds float* %tmp10113, i64 1
+  %tmp10115 = getelementptr inbounds float* %tmp10114, i64 1
+  %tmp10116 = getelementptr inbounds float* %tmp10115, i64 1
+  %tmp10117 = getelementptr inbounds float* %tmp10116, i64 1
+  %tmp10118 = getelementptr inbounds float* %tmp10117, i64 1
+  %tmp10119 = getelementptr inbounds float* %tmp10118, i64 1
+  %tmp10120 = getelementptr inbounds float* %tmp10119, i64 1
+  %tmp10121 = getelementptr inbounds float* %tmp10120, i64 1
+  %tmp10122 = getelementptr inbounds float* %tmp10121, i64 1
+  %tmp10123 = getelementptr inbounds float* %tmp10122, i64 1
+  %tmp10124 = getelementptr inbounds float* %tmp10123, i64 1
+  %tmp10125 = getelementptr inbounds float* %tmp10124, i64 1
+  %tmp10126 = getelementptr inbounds float* %tmp10125, i64 1
+  %tmp10127 = getelementptr inbounds float* %tmp10126, i64 1
+  %tmp10128 = getelementptr inbounds float* %tmp10127, i64 1
+  %tmp10129 = getelementptr inbounds float* %tmp10128, i64 1
+  %tmp10130 = getelementptr inbounds float* %tmp10129, i64 1
+  %tmp10131 = getelementptr inbounds float* %tmp10130, i64 1
+  %tmp10132 = getelementptr inbounds float* %tmp10131, i64 1
+  %tmp10133 = getelementptr inbounds float* %tmp10132, i64 1
+  %tmp10134 = getelementptr inbounds float* %tmp10133, i64 1
+  %tmp10135 = getelementptr inbounds float* %tmp10134, i64 1
+  %tmp10136 = getelementptr inbounds float* %tmp10135, i64 1
+  %tmp10137 = getelementptr inbounds float* %tmp10136, i64 1
+  %tmp10138 = getelementptr inbounds float* %tmp10137, i64 1
+  %tmp10139 = getelementptr inbounds float* %tmp10138, i64 1
+  %tmp10140 = getelementptr inbounds float* %tmp10139, i64 1
+  %tmp10141 = getelementptr inbounds float* %tmp10140, i64 1
+  %tmp10142 = getelementptr inbounds float* %tmp10141, i64 1
+  %tmp10143 = getelementptr inbounds float* %tmp10142, i64 1
+  %tmp10144 = getelementptr inbounds float* %tmp10143, i64 1
+  %tmp10145 = getelementptr inbounds float* %tmp10144, i64 1
+  %tmp10146 = getelementptr inbounds float* %tmp10145, i64 1
+  %tmp10147 = getelementptr inbounds float* %tmp10146, i64 1
+  %tmp10148 = getelementptr inbounds float* %tmp10147, i64 1
+  %tmp10149 = getelementptr inbounds float* %tmp10148, i64 1
+  %tmp10150 = getelementptr inbounds float* %tmp10149, i64 1
+  %tmp10151 = getelementptr inbounds float* %tmp10150, i64 1
+  %tmp10152 = getelementptr inbounds float* %tmp10151, i64 1
+  %tmp10153 = getelementptr inbounds float* %tmp10152, i64 1
+  %tmp10154 = getelementptr inbounds float* %tmp10153, i64 1
+  %tmp10155 = getelementptr inbounds float* %tmp10154, i64 1
+  %tmp10156 = getelementptr inbounds float* %tmp10155, i64 1
+  %tmp10157 = getelementptr inbounds float* %tmp10156, i64 1
+  %tmp10158 = getelementptr inbounds float* %tmp10157, i64 1
+  %tmp10159 = getelementptr inbounds float* %tmp10158, i64 1
+  %tmp10160 = getelementptr inbounds float* %tmp10159, i64 1
+  %tmp10161 = getelementptr inbounds float* %tmp10160, i64 1
+  %tmp10162 = getelementptr inbounds float* %tmp10161, i64 1
+  %tmp10163 = getelementptr inbounds float* %tmp10162, i64 1
+  %tmp10164 = getelementptr inbounds float* %tmp10163, i64 1
+  %tmp10165 = getelementptr inbounds float* %tmp10164, i64 1
+  %tmp10166 = getelementptr inbounds float* %tmp10165, i64 1
+  %tmp10167 = getelementptr inbounds float* %tmp10166, i64 1
+  %tmp10168 = getelementptr inbounds float* %tmp10167, i64 1
+  %tmp10169 = getelementptr inbounds float* %tmp10168, i64 1
+  %tmp10170 = getelementptr inbounds float* %tmp10169, i64 1
+  %tmp10171 = getelementptr inbounds float* %tmp10170, i64 1
+  %tmp10172 = getelementptr inbounds float* %tmp10171, i64 1
+  %tmp10173 = getelementptr inbounds float* %tmp10172, i64 1
+  %tmp10174 = getelementptr inbounds float* %tmp10173, i64 1
+  %tmp10175 = getelementptr inbounds float* %tmp10174, i64 1
+  %tmp10176 = getelementptr inbounds float* %tmp10175, i64 1
+  %tmp10177 = getelementptr inbounds float* %tmp10176, i64 1
+  %tmp10178 = getelementptr inbounds float* %tmp10177, i64 1
+  %tmp10179 = getelementptr inbounds float* %tmp10178, i64 1
+  %tmp10180 = getelementptr inbounds float* %tmp10179, i64 1
+  %tmp10181 = getelementptr inbounds float* %tmp10180, i64 1
+  %tmp10182 = getelementptr inbounds float* %tmp10181, i64 1
+  %tmp10183 = getelementptr inbounds float* %tmp10182, i64 1
+  %tmp10184 = getelementptr inbounds float* %tmp10183, i64 1
+  %tmp10185 = getelementptr inbounds float* %tmp10184, i64 1
+  %tmp10186 = getelementptr inbounds float* %tmp10185, i64 1
+  %tmp10187 = getelementptr inbounds float* %tmp10186, i64 1
+  %tmp10188 = getelementptr inbounds float* %tmp10187, i64 1
+  %tmp10189 = getelementptr inbounds float* %tmp10188, i64 1
+  %tmp10190 = getelementptr inbounds float* %tmp10189, i64 1
+  %tmp10191 = getelementptr inbounds float* %tmp10190, i64 1
+  %tmp10192 = getelementptr inbounds float* %tmp10191, i64 1
+  %tmp10193 = getelementptr inbounds float* %tmp10192, i64 1
+  %tmp10194 = getelementptr inbounds float* %tmp10193, i64 1
+  %tmp10195 = getelementptr inbounds float* %tmp10194, i64 1
+  %tmp10196 = getelementptr inbounds float* %tmp10195, i64 1
+  %tmp10197 = getelementptr inbounds float* %tmp10196, i64 1
+  %tmp10198 = getelementptr inbounds float* %tmp10197, i64 1
+  %tmp10199 = getelementptr inbounds float* %tmp10198, i64 1
+  %tmp10200 = getelementptr inbounds float* %tmp10199, i64 1
+  %tmp10201 = getelementptr inbounds float* %tmp10200, i64 1
+  %tmp10202 = getelementptr inbounds float* %tmp10201, i64 1
+  %tmp10203 = getelementptr inbounds float* %tmp10202, i64 1
+  %tmp10204 = getelementptr inbounds float* %tmp10203, i64 1
+  %tmp10205 = getelementptr inbounds float* %tmp10204, i64 1
+  %tmp10206 = getelementptr inbounds float* %tmp10205, i64 1
+  %tmp10207 = getelementptr inbounds float* %tmp10206, i64 1
+  %tmp10208 = getelementptr inbounds float* %tmp10207, i64 1
+  %tmp10209 = getelementptr inbounds float* %tmp10208, i64 1
+  %tmp10210 = getelementptr inbounds float* %tmp10209, i64 1
+  %tmp10211 = getelementptr inbounds float* %tmp10210, i64 1
+  %tmp10212 = getelementptr inbounds float* %tmp10211, i64 1
+  %tmp10213 = getelementptr inbounds float* %tmp10212, i64 1
+  %tmp10214 = getelementptr inbounds float* %tmp10213, i64 1
+  %tmp10215 = getelementptr inbounds float* %tmp10214, i64 1
+  %tmp10216 = getelementptr inbounds float* %tmp10215, i64 1
+  %tmp10217 = getelementptr inbounds float* %tmp10216, i64 1
+  %tmp10218 = getelementptr inbounds float* %tmp10217, i64 1
+  %tmp10219 = getelementptr inbounds float* %tmp10218, i64 1
+  %tmp10220 = getelementptr inbounds float* %tmp10219, i64 1
+  %tmp10221 = getelementptr inbounds float* %tmp10220, i64 1
+  %tmp10222 = getelementptr inbounds float* %tmp10221, i64 1
+  %tmp10223 = getelementptr inbounds float* %tmp10222, i64 1
+  %tmp10224 = getelementptr inbounds float* %tmp10223, i64 1
+  %tmp10225 = getelementptr inbounds float* %tmp10224, i64 1
+  %tmp10226 = getelementptr inbounds float* %tmp10225, i64 1
+  %tmp10227 = getelementptr inbounds float* %tmp10226, i64 1
+  %tmp10228 = getelementptr inbounds float* %tmp10227, i64 1
+  %tmp10229 = getelementptr inbounds float* %tmp10228, i64 1
+  %tmp10230 = getelementptr inbounds float* %tmp10229, i64 1
+  %tmp10231 = getelementptr inbounds float* %tmp10230, i64 1
+  %tmp10232 = getelementptr inbounds float* %tmp10231, i64 1
+  %tmp10233 = getelementptr inbounds float* %tmp10232, i64 1
+  %tmp10234 = getelementptr inbounds float* %tmp10233, i64 1
+  %tmp10235 = getelementptr inbounds float* %tmp10234, i64 1
+  %tmp10236 = getelementptr inbounds float* %tmp10235, i64 1
+  %tmp10237 = getelementptr inbounds float* %tmp10236, i64 1
+  %tmp10238 = getelementptr inbounds float* %tmp10237, i64 1
+  %tmp10239 = getelementptr inbounds float* %tmp10238, i64 1
+  %tmp10240 = getelementptr inbounds float* %tmp10239, i64 1
+  %tmp10241 = getelementptr inbounds float* %tmp10240, i64 1
+  %tmp10242 = getelementptr inbounds float* %tmp10241, i64 1
+  %tmp10243 = getelementptr inbounds float* %tmp10242, i64 1
+  %tmp10244 = getelementptr inbounds float* %tmp10243, i64 1
+  %tmp10245 = getelementptr inbounds float* %tmp10244, i64 1
+  %tmp10246 = getelementptr inbounds float* %tmp10245, i64 1
+  %tmp10247 = getelementptr inbounds float* %tmp10246, i64 1
+  %tmp10248 = getelementptr inbounds float* %tmp10247, i64 1
+  %tmp10249 = getelementptr inbounds float* %tmp10248, i64 1
+  %tmp10250 = getelementptr inbounds float* %tmp10249, i64 1
+  %tmp10251 = getelementptr inbounds float* %tmp10250, i64 1
+  %tmp10252 = getelementptr inbounds float* %tmp10251, i64 1
+  %tmp10253 = getelementptr inbounds float* %tmp10252, i64 1
+  %tmp10254 = getelementptr inbounds float* %tmp10253, i64 1
+  %tmp10255 = getelementptr inbounds float* %tmp10254, i64 1
+  %tmp10256 = getelementptr inbounds float* %tmp10255, i64 1
+  %tmp10257 = getelementptr inbounds float* %tmp10256, i64 1
+  %tmp10258 = getelementptr inbounds float* %tmp10257, i64 1
+  %tmp10259 = getelementptr inbounds float* %tmp10258, i64 1
+  %tmp10260 = getelementptr inbounds float* %tmp10259, i64 1
+  %tmp10261 = getelementptr inbounds float* %tmp10260, i64 1
+  %tmp10262 = getelementptr inbounds float* %tmp10261, i64 1
+  %tmp10263 = getelementptr inbounds float* %tmp10262, i64 1
+  %tmp10264 = getelementptr inbounds float* %tmp10263, i64 1
+  %tmp10265 = getelementptr inbounds float* %tmp10264, i64 1
+  %tmp10266 = getelementptr inbounds float* %tmp10265, i64 1
+  %tmp10267 = getelementptr inbounds float* %tmp10266, i64 1
+  %tmp10268 = getelementptr inbounds float* %tmp10267, i64 1
+  %tmp10269 = getelementptr inbounds float* %tmp10268, i64 1
+  %tmp10270 = getelementptr inbounds float* %tmp10269, i64 1
+  %tmp10271 = getelementptr inbounds float* %tmp10270, i64 1
+  %tmp10272 = getelementptr inbounds float* %tmp10271, i64 1
+  %tmp10273 = getelementptr inbounds float* %tmp10272, i64 1
+  %tmp10274 = getelementptr inbounds float* %tmp10273, i64 1
+  %tmp10275 = getelementptr inbounds float* %tmp10274, i64 1
+  %tmp10276 = getelementptr inbounds float* %tmp10275, i64 1
+  %tmp10277 = getelementptr inbounds float* %tmp10276, i64 1
+  %tmp10278 = getelementptr inbounds float* %tmp10277, i64 1
+  %tmp10279 = getelementptr inbounds float* %tmp10278, i64 1
+  %tmp10280 = getelementptr inbounds float* %tmp10279, i64 1
+  %tmp10281 = getelementptr inbounds float* %tmp10280, i64 1
+  %tmp10282 = getelementptr inbounds float* %tmp10281, i64 1
+  %tmp10283 = getelementptr inbounds float* %tmp10282, i64 1
+  %tmp10284 = getelementptr inbounds float* %tmp10283, i64 1
+  %tmp10285 = getelementptr inbounds float* %tmp10284, i64 1
+  %tmp10286 = getelementptr inbounds float* %tmp10285, i64 1
+  %tmp10287 = getelementptr inbounds float* %tmp10286, i64 1
+  %tmp10288 = getelementptr inbounds float* %tmp10287, i64 1
+  %tmp10289 = getelementptr inbounds float* %tmp10288, i64 1
+  %tmp10290 = getelementptr inbounds float* %tmp10289, i64 1
+  %tmp10291 = getelementptr inbounds float* %tmp10290, i64 1
+  %tmp10292 = getelementptr inbounds float* %tmp10291, i64 1
+  %tmp10293 = getelementptr inbounds float* %tmp10292, i64 1
+  %tmp10294 = getelementptr inbounds float* %tmp10293, i64 1
+  %tmp10295 = getelementptr inbounds float* %tmp10294, i64 1
+  %tmp10296 = getelementptr inbounds float* %tmp10295, i64 1
+  %tmp10297 = getelementptr inbounds float* %tmp10296, i64 1
+  %tmp10298 = getelementptr inbounds float* %tmp10297, i64 1
+  %tmp10299 = getelementptr inbounds float* %tmp10298, i64 1
+  %tmp10300 = getelementptr inbounds float* %tmp10299, i64 1
+  %tmp10301 = getelementptr inbounds float* %tmp10300, i64 1
+  %tmp10302 = getelementptr inbounds float* %tmp10301, i64 1
+  %tmp10303 = getelementptr inbounds float* %tmp10302, i64 1
+  %tmp10304 = getelementptr inbounds float* %tmp10303, i64 1
+  %tmp10305 = getelementptr inbounds float* %tmp10304, i64 1
+  %tmp10306 = getelementptr inbounds float* %tmp10305, i64 1
+  %tmp10307 = getelementptr inbounds float* %tmp10306, i64 1
+  %tmp10308 = getelementptr inbounds float* %tmp10307, i64 1
+  %tmp10309 = getelementptr inbounds float* %tmp10308, i64 1
+  %tmp10310 = getelementptr inbounds float* %tmp10309, i64 1
+  %tmp10311 = getelementptr inbounds float* %tmp10310, i64 1
+  %tmp10312 = getelementptr inbounds float* %tmp10311, i64 1
+  %tmp10313 = getelementptr inbounds float* %tmp10312, i64 1
+  %tmp10314 = getelementptr inbounds float* %tmp10313, i64 1
+  %tmp10315 = getelementptr inbounds float* %tmp10314, i64 1
+  %tmp10316 = getelementptr inbounds float* %tmp10315, i64 1
+  %tmp10317 = getelementptr inbounds float* %tmp10316, i64 1
+  %tmp10318 = getelementptr inbounds float* %tmp10317, i64 1
+  %tmp10319 = getelementptr inbounds float* %tmp10318, i64 1
+  %tmp10320 = getelementptr inbounds float* %tmp10319, i64 1
+  %tmp10321 = getelementptr inbounds float* %tmp10320, i64 1
+  %tmp10322 = getelementptr inbounds float* %tmp10321, i64 1
+  %tmp10323 = getelementptr inbounds float* %tmp10322, i64 1
+  %tmp10324 = getelementptr inbounds float* %tmp10323, i64 1
+  %tmp10325 = getelementptr inbounds float* %tmp10324, i64 1
+  %tmp10326 = getelementptr inbounds float* %tmp10325, i64 1
+  %tmp10327 = getelementptr inbounds float* %tmp10326, i64 1
+  %tmp10328 = getelementptr inbounds float* %tmp10327, i64 1
+  %tmp10329 = getelementptr inbounds float* %tmp10328, i64 1
+  %tmp10330 = getelementptr inbounds float* %tmp10329, i64 1
+  %tmp10331 = getelementptr inbounds float* %tmp10330, i64 1
+  %tmp10332 = getelementptr inbounds float* %tmp10331, i64 1
+  %tmp10333 = getelementptr inbounds float* %tmp10332, i64 1
+  %tmp10334 = getelementptr inbounds float* %tmp10333, i64 1
+  %tmp10335 = getelementptr inbounds float* %tmp10334, i64 1
+  %tmp10336 = getelementptr inbounds float* %tmp10335, i64 1
+  %tmp10337 = getelementptr inbounds float* %tmp10336, i64 1
+  %tmp10338 = getelementptr inbounds float* %tmp10337, i64 1
+  %tmp10339 = getelementptr inbounds float* %tmp10338, i64 1
+  %tmp10340 = getelementptr inbounds float* %tmp10339, i64 1
+  %tmp10341 = getelementptr inbounds float* %tmp10340, i64 1
+  %tmp10342 = getelementptr inbounds float* %tmp10341, i64 1
+  %tmp10343 = getelementptr inbounds float* %tmp10342, i64 1
+  %tmp10344 = getelementptr inbounds float* %tmp10343, i64 1
+  %tmp10345 = getelementptr inbounds float* %tmp10344, i64 1
+  %tmp10346 = getelementptr inbounds float* %tmp10345, i64 1
+  %tmp10347 = getelementptr inbounds float* %tmp10346, i64 1
+  %tmp10348 = getelementptr inbounds float* %tmp10347, i64 1
+  %tmp10349 = getelementptr inbounds float* %tmp10348, i64 1
+  %tmp10350 = getelementptr inbounds float* %tmp10349, i64 1
+  %tmp10351 = getelementptr inbounds float* %tmp10350, i64 1
+  %tmp10352 = getelementptr inbounds float* %tmp10351, i64 1
+  %tmp10353 = getelementptr inbounds float* %tmp10352, i64 1
+  %tmp10354 = getelementptr inbounds float* %tmp10353, i64 1
+  %tmp10355 = getelementptr inbounds float* %tmp10354, i64 1
+  %tmp10356 = getelementptr inbounds float* %tmp10355, i64 1
+  %tmp10357 = getelementptr inbounds float* %tmp10356, i64 1
+  %tmp10358 = getelementptr inbounds float* %tmp10357, i64 1
+  %tmp10359 = getelementptr inbounds float* %tmp10358, i64 1
+  %tmp10360 = getelementptr inbounds float* %tmp10359, i64 1
+  %tmp10361 = getelementptr inbounds float* %tmp10360, i64 1
+  %tmp10362 = getelementptr inbounds float* %tmp10361, i64 1
+  %tmp10363 = getelementptr inbounds float* %tmp10362, i64 1
+  %tmp10364 = getelementptr inbounds float* %tmp10363, i64 1
+  %tmp10365 = getelementptr inbounds float* %tmp10364, i64 1
+  %tmp10366 = getelementptr inbounds float* %tmp10365, i64 1
+  %tmp10367 = getelementptr inbounds float* %tmp10366, i64 1
+  %tmp10368 = getelementptr inbounds float* %tmp10367, i64 1
+  %tmp10369 = getelementptr inbounds float* %tmp10368, i64 1
+  %tmp10370 = getelementptr inbounds float* %tmp10369, i64 1
+  %tmp10371 = getelementptr inbounds float* %tmp10370, i64 1
+  %tmp10372 = getelementptr inbounds float* %tmp10371, i64 1
+  %tmp10373 = getelementptr inbounds float* %tmp10372, i64 1
+  %tmp10374 = getelementptr inbounds float* %tmp10373, i64 1
+  %tmp10375 = getelementptr inbounds float* %tmp10374, i64 1
+  %tmp10376 = getelementptr inbounds float* %tmp10375, i64 1
+  %tmp10377 = getelementptr inbounds float* %tmp10376, i64 1
+  %tmp10378 = getelementptr inbounds float* %tmp10377, i64 1
+  %tmp10379 = getelementptr inbounds float* %tmp10378, i64 1
+  %tmp10380 = getelementptr inbounds float* %tmp10379, i64 1
+  %tmp10381 = getelementptr inbounds float* %tmp10380, i64 1
+  %tmp10382 = getelementptr inbounds float* %tmp10381, i64 1
+  %tmp10383 = getelementptr inbounds float* %tmp10382, i64 1
+  %tmp10384 = getelementptr inbounds float* %tmp10383, i64 1
+  %tmp10385 = getelementptr inbounds float* %tmp10384, i64 1
+  %tmp10386 = getelementptr inbounds float* %tmp10385, i64 1
+  %tmp10387 = getelementptr inbounds float* %tmp10386, i64 1
+  %tmp10388 = getelementptr inbounds float* %tmp10387, i64 1
+  %tmp10389 = getelementptr inbounds float* %tmp10388, i64 1
+  %tmp10390 = getelementptr inbounds float* %tmp10389, i64 1
+  %tmp10391 = getelementptr inbounds float* %tmp10390, i64 1
+  %tmp10392 = getelementptr inbounds float* %tmp10391, i64 1
+  %tmp10393 = getelementptr inbounds float* %tmp10392, i64 1
+  %tmp10394 = getelementptr inbounds float* %tmp10393, i64 1
+  %tmp10395 = getelementptr inbounds float* %tmp10394, i64 1
+  %tmp10396 = getelementptr inbounds float* %tmp10395, i64 1
+  %tmp10397 = getelementptr inbounds float* %tmp10396, i64 1
+  %tmp10398 = getelementptr inbounds float* %tmp10397, i64 1
+  %tmp10399 = getelementptr inbounds float* %tmp10398, i64 1
+  %tmp10400 = getelementptr inbounds float* %tmp10399, i64 1
+  %tmp10401 = getelementptr inbounds float* %tmp10400, i64 1
+  %tmp10402 = getelementptr inbounds float* %tmp10401, i64 1
+  %tmp10403 = getelementptr inbounds float* %tmp10402, i64 1
+  %tmp10404 = getelementptr inbounds float* %tmp10403, i64 1
+  %tmp10405 = getelementptr inbounds float* %tmp10404, i64 1
+  %tmp10406 = getelementptr inbounds float* %tmp10405, i64 1
+  %tmp10407 = getelementptr inbounds float* %tmp10406, i64 1
+  %tmp10408 = getelementptr inbounds float* %tmp10407, i64 1
+  %tmp10409 = getelementptr inbounds float* %tmp10408, i64 1
+  %tmp10410 = getelementptr inbounds float* %tmp10409, i64 1
+  %tmp10411 = getelementptr inbounds float* %tmp10410, i64 1
+  %tmp10412 = getelementptr inbounds float* %tmp10411, i64 1
+  %tmp10413 = getelementptr inbounds float* %tmp10412, i64 1
+  %tmp10414 = getelementptr inbounds float* %tmp10413, i64 1
+  %tmp10415 = getelementptr inbounds float* %tmp10414, i64 1
+  %tmp10416 = getelementptr inbounds float* %tmp10415, i64 1
+  %tmp10417 = getelementptr inbounds float* %tmp10416, i64 1
+  %tmp10418 = getelementptr inbounds float* %tmp10417, i64 1
+  %tmp10419 = getelementptr inbounds float* %tmp10418, i64 1
+  %tmp10420 = getelementptr inbounds float* %tmp10419, i64 1
+  %tmp10421 = getelementptr inbounds float* %tmp10420, i64 1
+  %tmp10422 = getelementptr inbounds float* %tmp10421, i64 1
+  %tmp10423 = getelementptr inbounds float* %tmp10422, i64 1
+  %tmp10424 = getelementptr inbounds float* %tmp10423, i64 1
+  %tmp10425 = getelementptr inbounds float* %tmp10424, i64 1
+  %tmp10426 = getelementptr inbounds float* %tmp10425, i64 1
+  %tmp10427 = getelementptr inbounds float* %tmp10426, i64 1
+  %tmp10428 = getelementptr inbounds float* %tmp10427, i64 1
+  %tmp10429 = getelementptr inbounds float* %tmp10428, i64 1
+  %tmp10430 = getelementptr inbounds float* %tmp10429, i64 1
+  %tmp10431 = getelementptr inbounds float* %tmp10430, i64 1
+  %tmp10432 = getelementptr inbounds float* %tmp10431, i64 1
+  %tmp10433 = getelementptr inbounds float* %tmp10432, i64 1
+  %tmp10434 = getelementptr inbounds float* %tmp10433, i64 1
+  %tmp10435 = getelementptr inbounds float* %tmp10434, i64 1
+  %tmp10436 = getelementptr inbounds float* %tmp10435, i64 1
+  %tmp10437 = getelementptr inbounds float* %tmp10436, i64 1
+  %tmp10438 = getelementptr inbounds float* %tmp10437, i64 1
+  %tmp10439 = getelementptr inbounds float* %tmp10438, i64 1
+  %tmp10440 = getelementptr inbounds float* %tmp10439, i64 1
+  %tmp10441 = getelementptr inbounds float* %tmp10440, i64 1
+  %tmp10442 = getelementptr inbounds float* %tmp10441, i64 1
+  %tmp10443 = getelementptr inbounds float* %tmp10442, i64 1
+  %tmp10444 = getelementptr inbounds float* %tmp10443, i64 1
+  %tmp10445 = getelementptr inbounds float* %tmp10444, i64 1
+  %tmp10446 = getelementptr inbounds float* %tmp10445, i64 1
+  %tmp10447 = getelementptr inbounds float* %tmp10446, i64 1
+  %tmp10448 = getelementptr inbounds float* %tmp10447, i64 1
+  %tmp10449 = getelementptr inbounds float* %tmp10448, i64 1
+  %tmp10450 = getelementptr inbounds float* %tmp10449, i64 1
+  %tmp10451 = getelementptr inbounds float* %tmp10450, i64 1
+  %tmp10452 = getelementptr inbounds float* %tmp10451, i64 1
+  %tmp10453 = getelementptr inbounds float* %tmp10452, i64 1
+  %tmp10454 = getelementptr inbounds float* %tmp10453, i64 1
+  %tmp10455 = getelementptr inbounds float* %tmp10454, i64 1
+  %tmp10456 = getelementptr inbounds float* %tmp10455, i64 1
+  %tmp10457 = getelementptr inbounds float* %tmp10456, i64 1
+  %tmp10458 = getelementptr inbounds float* %tmp10457, i64 1
+  %tmp10459 = getelementptr inbounds float* %tmp10458, i64 1
+  %tmp10460 = getelementptr inbounds float* %tmp10459, i64 1
+  %tmp10461 = getelementptr inbounds float* %tmp10460, i64 1
+  %tmp10462 = getelementptr inbounds float* %tmp10461, i64 1
+  %tmp10463 = getelementptr inbounds float* %tmp10462, i64 1
+  %tmp10464 = getelementptr inbounds float* %tmp10463, i64 1
+  %tmp10465 = getelementptr inbounds float* %tmp10464, i64 1
+  %tmp10466 = getelementptr inbounds float* %tmp10465, i64 1
+  %tmp10467 = getelementptr inbounds float* %tmp10466, i64 1
+  %tmp10468 = getelementptr inbounds float* %tmp10467, i64 1
+  %tmp10469 = getelementptr inbounds float* %tmp10468, i64 1
+  %tmp10470 = getelementptr inbounds float* %tmp10469, i64 1
+  %tmp10471 = getelementptr inbounds float* %tmp10470, i64 1
+  %tmp10472 = getelementptr inbounds float* %tmp10471, i64 1
+  %tmp10473 = getelementptr inbounds float* %tmp10472, i64 1
+  %tmp10474 = getelementptr inbounds float* %tmp10473, i64 1
+  %tmp10475 = getelementptr inbounds float* %tmp10474, i64 1
+  %tmp10476 = getelementptr inbounds float* %tmp10475, i64 1
+  %tmp10477 = getelementptr inbounds float* %tmp10476, i64 1
+  %tmp10478 = getelementptr inbounds float* %tmp10477, i64 1
+  %tmp10479 = getelementptr inbounds float* %tmp10478, i64 1
+  %tmp10480 = getelementptr inbounds float* %tmp10479, i64 1
+  %tmp10481 = getelementptr inbounds float* %tmp10480, i64 1
+  %tmp10482 = getelementptr inbounds float* %tmp10481, i64 1
+  %tmp10483 = getelementptr inbounds float* %tmp10482, i64 1
+  %tmp10484 = getelementptr inbounds float* %tmp10483, i64 1
+  %tmp10485 = getelementptr inbounds float* %tmp10484, i64 1
+  %tmp10486 = getelementptr inbounds float* %tmp10485, i64 1
+  %tmp10487 = getelementptr inbounds float* %tmp10486, i64 1
+  %tmp10488 = getelementptr inbounds float* %tmp10487, i64 1
+  %tmp10489 = getelementptr inbounds float* %tmp10488, i64 1
+  %tmp10490 = getelementptr inbounds float* %tmp10489, i64 1
+  %tmp10491 = getelementptr inbounds float* %tmp10490, i64 1
+  %tmp10492 = getelementptr inbounds float* %tmp10491, i64 1
+  %tmp10493 = getelementptr inbounds float* %tmp10492, i64 1
+  %tmp10494 = getelementptr inbounds float* %tmp10493, i64 1
+  %tmp10495 = getelementptr inbounds float* %tmp10494, i64 1
+  %tmp10496 = getelementptr inbounds float* %tmp10495, i64 1
+  %tmp10497 = getelementptr inbounds float* %tmp10496, i64 1
+  %tmp10498 = getelementptr inbounds float* %tmp10497, i64 1
+  %tmp10499 = getelementptr inbounds float* %tmp10498, i64 1
+  %tmp10500 = getelementptr inbounds float* %tmp10499, i64 1
+  %tmp10501 = getelementptr inbounds float* %tmp10500, i64 1
+  %tmp10502 = getelementptr inbounds float* %tmp10501, i64 1
+  %tmp10503 = getelementptr inbounds float* %tmp10502, i64 1
+  %tmp10504 = getelementptr inbounds float* %tmp10503, i64 1
+  %tmp10505 = getelementptr inbounds float* %tmp10504, i64 1
+  %tmp10506 = getelementptr inbounds float* %tmp10505, i64 1
+  %tmp10507 = getelementptr inbounds float* %tmp10506, i64 1
+  %tmp10508 = getelementptr inbounds float* %tmp10507, i64 1
+  %tmp10509 = getelementptr inbounds float* %tmp10508, i64 1
+  %tmp10510 = getelementptr inbounds float* %tmp10509, i64 1
+  %tmp10511 = getelementptr inbounds float* %tmp10510, i64 1
+  %tmp10512 = getelementptr inbounds float* %tmp10511, i64 1
+  %tmp10513 = getelementptr inbounds float* %tmp10512, i64 1
+  %tmp10514 = getelementptr inbounds float* %tmp10513, i64 1
+  %tmp10515 = getelementptr inbounds float* %tmp10514, i64 1
+  %tmp10516 = getelementptr inbounds float* %tmp10515, i64 1
+  %tmp10517 = getelementptr inbounds float* %tmp10516, i64 1
+  %tmp10518 = getelementptr inbounds float* %tmp10517, i64 1
+  %tmp10519 = getelementptr inbounds float* %tmp10518, i64 1
+  %tmp10520 = getelementptr inbounds float* %tmp10519, i64 1
+  %tmp10521 = getelementptr inbounds float* %tmp10520, i64 1
+  %tmp10522 = getelementptr inbounds float* %tmp10521, i64 1
+  %tmp10523 = getelementptr inbounds float* %tmp10522, i64 1
+  %tmp10524 = getelementptr inbounds float* %tmp10523, i64 1
+  %tmp10525 = getelementptr inbounds float* %tmp10524, i64 1
+  %tmp10526 = getelementptr inbounds float* %tmp10525, i64 1
+  %tmp10527 = getelementptr inbounds float* %tmp10526, i64 1
+  %tmp10528 = getelementptr inbounds float* %tmp10527, i64 1
+  %tmp10529 = getelementptr inbounds float* %tmp10528, i64 1
+  %tmp10530 = getelementptr inbounds float* %tmp10529, i64 1
+  %tmp10531 = getelementptr inbounds float* %tmp10530, i64 1
+  %tmp10532 = getelementptr inbounds float* %tmp10531, i64 1
+  %tmp10533 = getelementptr inbounds float* %tmp10532, i64 1
+  %tmp10534 = getelementptr inbounds float* %tmp10533, i64 1
+  %tmp10535 = getelementptr inbounds float* %tmp10534, i64 1
+  %tmp10536 = getelementptr inbounds float* %tmp10535, i64 1
+  %tmp10537 = getelementptr inbounds float* %tmp10536, i64 1
+  %tmp10538 = getelementptr inbounds float* %tmp10537, i64 1
+  %tmp10539 = getelementptr inbounds float* %tmp10538, i64 1
+  %tmp10540 = getelementptr inbounds float* %tmp10539, i64 1
+  %tmp10541 = getelementptr inbounds float* %tmp10540, i64 1
+  %tmp10542 = getelementptr inbounds float* %tmp10541, i64 1
+  %tmp10543 = getelementptr inbounds float* %tmp10542, i64 1
+  %tmp10544 = getelementptr inbounds float* %tmp10543, i64 1
+  %tmp10545 = getelementptr inbounds float* %tmp10544, i64 1
+  %tmp10546 = getelementptr inbounds float* %tmp10545, i64 1
+  %tmp10547 = getelementptr inbounds float* %tmp10546, i64 1
+  %tmp10548 = getelementptr inbounds float* %tmp10547, i64 1
+  %tmp10549 = getelementptr inbounds float* %tmp10548, i64 1
+  %tmp10550 = getelementptr inbounds float* %tmp10549, i64 1
+  %tmp10551 = getelementptr inbounds float* %tmp10550, i64 1
+  %tmp10552 = getelementptr inbounds float* %tmp10551, i64 1
+  %tmp10553 = getelementptr inbounds float* %tmp10552, i64 1
+  %tmp10554 = getelementptr inbounds float* %tmp10553, i64 1
+  %tmp10555 = getelementptr inbounds float* %tmp10554, i64 1
+  %tmp10556 = getelementptr inbounds float* %tmp10555, i64 1
+  %tmp10557 = getelementptr inbounds float* %tmp10556, i64 1
+  %tmp10558 = getelementptr inbounds float* %tmp10557, i64 1
+  %tmp10559 = getelementptr inbounds float* %tmp10558, i64 1
+  %tmp10560 = getelementptr inbounds float* %tmp10559, i64 1
+  %tmp10561 = getelementptr inbounds float* %tmp10560, i64 1
+  %tmp10562 = getelementptr inbounds float* %tmp10561, i64 1
+  %tmp10563 = getelementptr inbounds float* %tmp10562, i64 1
+  %tmp10564 = getelementptr inbounds float* %tmp10563, i64 1
+  %tmp10565 = getelementptr inbounds float* %tmp10564, i64 1
+  %tmp10566 = getelementptr inbounds float* %tmp10565, i64 1
+  %tmp10567 = getelementptr inbounds float* %tmp10566, i64 1
+  %tmp10568 = getelementptr inbounds float* %tmp10567, i64 1
+  %tmp10569 = getelementptr inbounds float* %tmp10568, i64 1
+  %tmp10570 = getelementptr inbounds float* %tmp10569, i64 1
+  %tmp10571 = getelementptr inbounds float* %tmp10570, i64 1
+  %tmp10572 = getelementptr inbounds float* %tmp10571, i64 1
+  %tmp10573 = getelementptr inbounds float* %tmp10572, i64 1
+  %tmp10574 = getelementptr inbounds float* %tmp10573, i64 1
+  %tmp10575 = getelementptr inbounds float* %tmp10574, i64 1
+  %tmp10576 = getelementptr inbounds float* %tmp10575, i64 1
+  %tmp10577 = getelementptr inbounds float* %tmp10576, i64 1
+  %tmp10578 = getelementptr inbounds float* %tmp10577, i64 1
+  %tmp10579 = getelementptr inbounds float* %tmp10578, i64 1
+  %tmp10580 = getelementptr inbounds float* %tmp10579, i64 1
+  %tmp10581 = getelementptr inbounds float* %tmp10580, i64 1
+  %tmp10582 = getelementptr inbounds float* %tmp10581, i64 1
+  %tmp10583 = getelementptr inbounds float* %tmp10582, i64 1
+  %tmp10584 = getelementptr inbounds float* %tmp10583, i64 1
+  %tmp10585 = getelementptr inbounds float* %tmp10584, i64 1
+  %tmp10586 = getelementptr inbounds float* %tmp10585, i64 1
+  %tmp10587 = getelementptr inbounds float* %tmp10586, i64 1
+  %tmp10588 = getelementptr inbounds float* %tmp10587, i64 1
+  %tmp10589 = getelementptr inbounds float* %tmp10588, i64 1
+  %tmp10590 = getelementptr inbounds float* %tmp10589, i64 1
+  %tmp10591 = getelementptr inbounds float* %tmp10590, i64 1
+  %tmp10592 = getelementptr inbounds float* %tmp10591, i64 1
+  %tmp10593 = getelementptr inbounds float* %tmp10592, i64 1
+  %tmp10594 = getelementptr inbounds float* %tmp10593, i64 1
+  %tmp10595 = getelementptr inbounds float* %tmp10594, i64 1
+  %tmp10596 = getelementptr inbounds float* %tmp10595, i64 1
+  %tmp10597 = getelementptr inbounds float* %tmp10596, i64 1
+  %tmp10598 = getelementptr inbounds float* %tmp10597, i64 1
+  %tmp10599 = getelementptr inbounds float* %tmp10598, i64 1
+  %tmp10600 = getelementptr inbounds float* %tmp10599, i64 1
+  %tmp10601 = getelementptr inbounds float* %tmp10600, i64 1
+  %tmp10602 = getelementptr inbounds float* %tmp10601, i64 1
+  %tmp10603 = getelementptr inbounds float* %tmp10602, i64 1
+  %tmp10604 = getelementptr inbounds float* %tmp10603, i64 1
+  %tmp10605 = getelementptr inbounds float* %tmp10604, i64 1
+  %tmp10606 = getelementptr inbounds float* %tmp10605, i64 1
+  %tmp10607 = getelementptr inbounds float* %tmp10606, i64 1
+  %tmp10608 = getelementptr inbounds float* %tmp10607, i64 1
+  %tmp10609 = getelementptr inbounds float* %tmp10608, i64 1
+  %tmp10610 = getelementptr inbounds float* %tmp10609, i64 1
+  %tmp10611 = getelementptr inbounds float* %tmp10610, i64 1
+  %tmp10612 = getelementptr inbounds float* %tmp10611, i64 1
+  %tmp10613 = getelementptr inbounds float* %tmp10612, i64 1
+  %tmp10614 = getelementptr inbounds float* %tmp10613, i64 1
+  %tmp10615 = getelementptr inbounds float* %tmp10614, i64 1
+  %tmp10616 = getelementptr inbounds float* %tmp10615, i64 1
+  %tmp10617 = getelementptr inbounds float* %tmp10616, i64 1
+  %tmp10618 = getelementptr inbounds float* %tmp10617, i64 1
+  %tmp10619 = getelementptr inbounds float* %tmp10618, i64 1
+  %tmp10620 = getelementptr inbounds float* %tmp10619, i64 1
+  %tmp10621 = getelementptr inbounds float* %tmp10620, i64 1
+  %tmp10622 = getelementptr inbounds float* %tmp10621, i64 1
+  %tmp10623 = getelementptr inbounds float* %tmp10622, i64 1
+  %tmp10624 = getelementptr inbounds float* %tmp10623, i64 1
+  %tmp10625 = getelementptr inbounds float* %tmp10624, i64 1
+  %tmp10626 = getelementptr inbounds float* %tmp10625, i64 1
+  %tmp10627 = getelementptr inbounds float* %tmp10626, i64 1
+  %tmp10628 = getelementptr inbounds float* %tmp10627, i64 1
+  %tmp10629 = getelementptr inbounds float* %tmp10628, i64 1
+  %tmp10630 = getelementptr inbounds float* %tmp10629, i64 1
+  %tmp10631 = getelementptr inbounds float* %tmp10630, i64 1
+  %tmp10632 = getelementptr inbounds float* %tmp10631, i64 1
+  %tmp10633 = getelementptr inbounds float* %tmp10632, i64 1
+  %tmp10634 = getelementptr inbounds float* %tmp10633, i64 1
+  %tmp10635 = getelementptr inbounds float* %tmp10634, i64 1
+  %tmp10636 = getelementptr inbounds float* %tmp10635, i64 1
+  %tmp10637 = getelementptr inbounds float* %tmp10636, i64 1
+  %tmp10638 = getelementptr inbounds float* %tmp10637, i64 1
+  %tmp10639 = getelementptr inbounds float* %tmp10638, i64 1
+  %tmp10640 = getelementptr inbounds float* %tmp10639, i64 1
+  %tmp10641 = getelementptr inbounds float* %tmp10640, i64 1
+  %tmp10642 = getelementptr inbounds float* %tmp10641, i64 1
+  %tmp10643 = getelementptr inbounds float* %tmp10642, i64 1
+  %tmp10644 = getelementptr inbounds float* %tmp10643, i64 1
+  %tmp10645 = getelementptr inbounds float* %tmp10644, i64 1
+  %tmp10646 = getelementptr inbounds float* %tmp10645, i64 1
+  %tmp10647 = getelementptr inbounds float* %tmp10646, i64 1
+  %tmp10648 = getelementptr inbounds float* %tmp10647, i64 1
+  %tmp10649 = getelementptr inbounds float* %tmp10648, i64 1
+  %tmp10650 = getelementptr inbounds float* %tmp10649, i64 1
+  %tmp10651 = getelementptr inbounds float* %tmp10650, i64 1
+  %tmp10652 = getelementptr inbounds float* %tmp10651, i64 1
+  %tmp10653 = getelementptr inbounds float* %tmp10652, i64 1
+  %tmp10654 = getelementptr inbounds float* %tmp10653, i64 1
+  %tmp10655 = getelementptr inbounds float* %tmp10654, i64 1
+  %tmp10656 = getelementptr inbounds float* %tmp10655, i64 1
+  %tmp10657 = getelementptr inbounds float* %tmp10656, i64 1
+  %tmp10658 = getelementptr inbounds float* %tmp10657, i64 1
+  %tmp10659 = getelementptr inbounds float* %tmp10658, i64 1
+  %tmp10660 = getelementptr inbounds float* %tmp10659, i64 1
+  %tmp10661 = getelementptr inbounds float* %tmp10660, i64 1
+  %tmp10662 = getelementptr inbounds float* %tmp10661, i64 1
+  %tmp10663 = getelementptr inbounds float* %tmp10662, i64 1
+  %tmp10664 = getelementptr inbounds float* %tmp10663, i64 1
+  %tmp10665 = getelementptr inbounds float* %tmp10664, i64 1
+  %tmp10666 = getelementptr inbounds float* %tmp10665, i64 1
+  %tmp10667 = getelementptr inbounds float* %tmp10666, i64 1
+  %tmp10668 = getelementptr inbounds float* %tmp10667, i64 1
+  %tmp10669 = getelementptr inbounds float* %tmp10668, i64 1
+  %tmp10670 = getelementptr inbounds float* %tmp10669, i64 1
+  %tmp10671 = getelementptr inbounds float* %tmp10670, i64 1
+  %tmp10672 = getelementptr inbounds float* %tmp10671, i64 1
+  %tmp10673 = getelementptr inbounds float* %tmp10672, i64 1
+  %tmp10674 = getelementptr inbounds float* %tmp10673, i64 1
+  %tmp10675 = getelementptr inbounds float* %tmp10674, i64 1
+  %tmp10676 = getelementptr inbounds float* %tmp10675, i64 1
+  %tmp10677 = getelementptr inbounds float* %tmp10676, i64 1
+  %tmp10678 = getelementptr inbounds float* %tmp10677, i64 1
+  %tmp10679 = getelementptr inbounds float* %tmp10678, i64 1
+  %tmp10680 = getelementptr inbounds float* %tmp10679, i64 1
+  %tmp10681 = getelementptr inbounds float* %tmp10680, i64 1
+  %tmp10682 = getelementptr inbounds float* %tmp10681, i64 1
+  %tmp10683 = getelementptr inbounds float* %tmp10682, i64 1
+  %tmp10684 = getelementptr inbounds float* %tmp10683, i64 1
+  %tmp10685 = getelementptr inbounds float* %tmp10684, i64 1
+  %tmp10686 = getelementptr inbounds float* %tmp10685, i64 1
+  %tmp10687 = getelementptr inbounds float* %tmp10686, i64 1
+  %tmp10688 = getelementptr inbounds float* %tmp10687, i64 1
+  %tmp10689 = getelementptr inbounds float* %tmp10688, i64 1
+  %tmp10690 = getelementptr inbounds float* %tmp10689, i64 1
+  %tmp10691 = getelementptr inbounds float* %tmp10690, i64 1
+  %tmp10692 = getelementptr inbounds float* %tmp10691, i64 1
+  %tmp10693 = getelementptr inbounds float* %tmp10692, i64 1
+  %tmp10694 = getelementptr inbounds float* %tmp10693, i64 1
+  %tmp10695 = getelementptr inbounds float* %tmp10694, i64 1
+  %tmp10696 = getelementptr inbounds float* %tmp10695, i64 1
+  %tmp10697 = getelementptr inbounds float* %tmp10696, i64 1
+  %tmp10698 = getelementptr inbounds float* %tmp10697, i64 1
+  %tmp10699 = getelementptr inbounds float* %tmp10698, i64 1
+  %tmp10700 = getelementptr inbounds float* %tmp10699, i64 1
+  %tmp10701 = getelementptr inbounds float* %tmp10700, i64 1
+  %tmp10702 = getelementptr inbounds float* %tmp10701, i64 1
+  %tmp10703 = getelementptr inbounds float* %tmp10702, i64 1
+  %tmp10704 = getelementptr inbounds float* %tmp10703, i64 1
+  %tmp10705 = getelementptr inbounds float* %tmp10704, i64 1
+  %tmp10706 = getelementptr inbounds float* %tmp10705, i64 1
+  %tmp10707 = getelementptr inbounds float* %tmp10706, i64 1
+  %tmp10708 = getelementptr inbounds float* %tmp10707, i64 1
+  %tmp10709 = getelementptr inbounds float* %tmp10708, i64 1
+  %tmp10710 = getelementptr inbounds float* %tmp10709, i64 1
+  %tmp10711 = getelementptr inbounds float* %tmp10710, i64 1
+  %tmp10712 = getelementptr inbounds float* %tmp10711, i64 1
+  %tmp10713 = getelementptr inbounds float* %tmp10712, i64 1
+  %tmp10714 = getelementptr inbounds float* %tmp10713, i64 1
+  %tmp10715 = getelementptr inbounds float* %tmp10714, i64 1
+  %tmp10716 = getelementptr inbounds float* %tmp10715, i64 1
+  %tmp10717 = getelementptr inbounds float* %tmp10716, i64 1
+  %tmp10718 = getelementptr inbounds float* %tmp10717, i64 1
+  %tmp10719 = getelementptr inbounds float* %tmp10718, i64 1
+  %tmp10720 = getelementptr inbounds float* %tmp10719, i64 1
+  %tmp10721 = getelementptr inbounds float* %tmp10720, i64 1
+  %tmp10722 = getelementptr inbounds float* %tmp10721, i64 1
+  %tmp10723 = getelementptr inbounds float* %tmp10722, i64 1
+  %tmp10724 = getelementptr inbounds float* %tmp10723, i64 1
+  %tmp10725 = getelementptr inbounds float* %tmp10724, i64 1
+  %tmp10726 = getelementptr inbounds float* %tmp10725, i64 1
+  %tmp10727 = getelementptr inbounds float* %tmp10726, i64 1
+  %tmp10728 = getelementptr inbounds float* %tmp10727, i64 1
+  %tmp10729 = getelementptr inbounds float* %tmp10728, i64 1
+  %tmp10730 = getelementptr inbounds float* %tmp10729, i64 1
+  %tmp10731 = getelementptr inbounds float* %tmp10730, i64 1
+  %tmp10732 = getelementptr inbounds float* %tmp10731, i64 1
+  %tmp10733 = getelementptr inbounds float* %tmp10732, i64 1
+  %tmp10734 = getelementptr inbounds float* %tmp10733, i64 1
+  %tmp10735 = getelementptr inbounds float* %tmp10734, i64 1
+  %tmp10736 = getelementptr inbounds float* %tmp10735, i64 1
+  %tmp10737 = getelementptr inbounds float* %tmp10736, i64 1
+  %tmp10738 = getelementptr inbounds float* %tmp10737, i64 1
+  %tmp10739 = getelementptr inbounds float* %tmp10738, i64 1
+  %tmp10740 = getelementptr inbounds float* %tmp10739, i64 1
+  %tmp10741 = getelementptr inbounds float* %tmp10740, i64 1
+  %tmp10742 = getelementptr inbounds float* %tmp10741, i64 1
+  %tmp10743 = getelementptr inbounds float* %tmp10742, i64 1
+  %tmp10744 = getelementptr inbounds float* %tmp10743, i64 1
+  %tmp10745 = getelementptr inbounds float* %tmp10744, i64 1
+  %tmp10746 = getelementptr inbounds float* %tmp10745, i64 1
+  %tmp10747 = getelementptr inbounds float* %tmp10746, i64 1
+  %tmp10748 = getelementptr inbounds float* %tmp10747, i64 1
+  %tmp10749 = getelementptr inbounds float* %tmp10748, i64 1
+  %tmp10750 = getelementptr inbounds float* %tmp10749, i64 1
+  %tmp10751 = getelementptr inbounds float* %tmp10750, i64 1
+  %tmp10752 = getelementptr inbounds float* %tmp10751, i64 1
+  %tmp10753 = getelementptr inbounds float* %tmp10752, i64 1
+  %tmp10754 = getelementptr inbounds float* %tmp10753, i64 1
+  %tmp10755 = getelementptr inbounds float* %tmp10754, i64 1
+  %tmp10756 = getelementptr inbounds float* %tmp10755, i64 1
+  %tmp10757 = getelementptr inbounds float* %tmp10756, i64 1
+  %tmp10758 = getelementptr inbounds float* %tmp10757, i64 1
+  %tmp10759 = getelementptr inbounds float* %tmp10758, i64 1
+  %tmp10760 = getelementptr inbounds float* %tmp10759, i64 1
+  %tmp10761 = getelementptr inbounds float* %tmp10760, i64 1
+  %tmp10762 = getelementptr inbounds float* %tmp10761, i64 1
+  %tmp10763 = getelementptr inbounds float* %tmp10762, i64 1
+  %tmp10764 = getelementptr inbounds float* %tmp10763, i64 1
+  %tmp10765 = getelementptr inbounds float* %tmp10764, i64 1
+  %tmp10766 = getelementptr inbounds float* %tmp10765, i64 1
+  %tmp10767 = getelementptr inbounds float* %tmp10766, i64 1
+  %tmp10768 = getelementptr inbounds float* %tmp10767, i64 1
+  %tmp10769 = getelementptr inbounds float* %tmp10768, i64 1
+  %tmp10770 = getelementptr inbounds float* %tmp10769, i64 1
+  %tmp10771 = getelementptr inbounds float* %tmp10770, i64 1
+  %tmp10772 = getelementptr inbounds float* %tmp10771, i64 1
+  %tmp10773 = getelementptr inbounds float* %tmp10772, i64 1
+  %tmp10774 = getelementptr inbounds float* %tmp10773, i64 1
+  %tmp10775 = getelementptr inbounds float* %tmp10774, i64 1
+  %tmp10776 = getelementptr inbounds float* %tmp10775, i64 1
+  %tmp10777 = getelementptr inbounds float* %tmp10776, i64 1
+  %tmp10778 = getelementptr inbounds float* %tmp10777, i64 1
+  %tmp10779 = getelementptr inbounds float* %tmp10778, i64 1
+  %tmp10780 = getelementptr inbounds float* %tmp10779, i64 1
+  %tmp10781 = getelementptr inbounds float* %tmp10780, i64 1
+  %tmp10782 = getelementptr inbounds float* %tmp10781, i64 1
+  %tmp10783 = getelementptr inbounds float* %tmp10782, i64 1
+  %tmp10784 = getelementptr inbounds float* %tmp10783, i64 1
+  %tmp10785 = getelementptr inbounds float* %tmp10784, i64 1
+  %tmp10786 = getelementptr inbounds float* %tmp10785, i64 1
+  %tmp10787 = getelementptr inbounds float* %tmp10786, i64 1
+  %tmp10788 = getelementptr inbounds float* %tmp10787, i64 1
+  %tmp10789 = getelementptr inbounds float* %tmp10788, i64 1
+  %tmp10790 = getelementptr inbounds float* %tmp10789, i64 1
+  %tmp10791 = getelementptr inbounds float* %tmp10790, i64 1
+  %tmp10792 = getelementptr inbounds float* %tmp10791, i64 1
+  %tmp10793 = getelementptr inbounds float* %tmp10792, i64 1
+  %tmp10794 = getelementptr inbounds float* %tmp10793, i64 1
+  %tmp10795 = getelementptr inbounds float* %tmp10794, i64 1
+  %tmp10796 = getelementptr inbounds float* %tmp10795, i64 1
+  %tmp10797 = getelementptr inbounds float* %tmp10796, i64 1
+  %tmp10798 = getelementptr inbounds float* %tmp10797, i64 1
+  %tmp10799 = getelementptr inbounds float* %tmp10798, i64 1
+  %tmp10800 = getelementptr inbounds float* %tmp10799, i64 1
+  %tmp10801 = getelementptr inbounds float* %tmp10800, i64 1
+  %tmp10802 = getelementptr inbounds float* %tmp10801, i64 1
+  %tmp10803 = getelementptr inbounds float* %tmp10802, i64 1
+  %tmp10804 = getelementptr inbounds float* %tmp10803, i64 1
+  %tmp10805 = getelementptr inbounds float* %tmp10804, i64 1
+  %tmp10806 = getelementptr inbounds float* %tmp10805, i64 1
+  %tmp10807 = getelementptr inbounds float* %tmp10806, i64 1
+  %tmp10808 = getelementptr inbounds float* %tmp10807, i64 1
+  %tmp10809 = getelementptr inbounds float* %tmp10808, i64 1
+  %tmp10810 = getelementptr inbounds float* %tmp10809, i64 1
+  %tmp10811 = getelementptr inbounds float* %tmp10810, i64 1
+  %tmp10812 = getelementptr inbounds float* %tmp10811, i64 1
+  %tmp10813 = getelementptr inbounds float* %tmp10812, i64 1
+  %tmp10814 = getelementptr inbounds float* %tmp10813, i64 1
+  %tmp10815 = getelementptr inbounds float* %tmp10814, i64 1
+  %tmp10816 = getelementptr inbounds float* %tmp10815, i64 1
+  %tmp10817 = getelementptr inbounds float* %tmp10816, i64 1
+  %tmp10818 = getelementptr inbounds float* %tmp10817, i64 1
+  %tmp10819 = getelementptr inbounds float* %tmp10818, i64 1
+  %tmp10820 = getelementptr inbounds float* %tmp10819, i64 1
+  %tmp10821 = getelementptr inbounds float* %tmp10820, i64 1
+  %tmp10822 = getelementptr inbounds float* %tmp10821, i64 1
+  %tmp10823 = getelementptr inbounds float* %tmp10822, i64 1
+  %tmp10824 = getelementptr inbounds float* %tmp10823, i64 1
+  %tmp10825 = getelementptr inbounds float* %tmp10824, i64 1
+  %tmp10826 = getelementptr inbounds float* %tmp10825, i64 1
+  %tmp10827 = getelementptr inbounds float* %tmp10826, i64 1
+  %tmp10828 = getelementptr inbounds float* %tmp10827, i64 1
+  %tmp10829 = getelementptr inbounds float* %tmp10828, i64 1
+  %tmp10830 = getelementptr inbounds float* %tmp10829, i64 1
+  %tmp10831 = getelementptr inbounds float* %tmp10830, i64 1
+  %tmp10832 = getelementptr inbounds float* %tmp10831, i64 1
+  %tmp10833 = getelementptr inbounds float* %tmp10832, i64 1
+  %tmp10834 = getelementptr inbounds float* %tmp10833, i64 1
+  %tmp10835 = getelementptr inbounds float* %tmp10834, i64 1
+  %tmp10836 = getelementptr inbounds float* %tmp10835, i64 1
+  %tmp10837 = getelementptr inbounds float* %tmp10836, i64 1
+  %tmp10838 = getelementptr inbounds float* %tmp10837, i64 1
+  %tmp10839 = getelementptr inbounds float* %tmp10838, i64 1
+  %tmp10840 = getelementptr inbounds float* %tmp10839, i64 1
+  %tmp10841 = getelementptr inbounds float* %tmp10840, i64 1
+  %tmp10842 = getelementptr inbounds float* %tmp10841, i64 1
+  %tmp10843 = getelementptr inbounds float* %tmp10842, i64 1
+  %tmp10844 = getelementptr inbounds float* %tmp10843, i64 1
+  %tmp10845 = getelementptr inbounds float* %tmp10844, i64 1
+  %tmp10846 = getelementptr inbounds float* %tmp10845, i64 1
+  %tmp10847 = getelementptr inbounds float* %tmp10846, i64 1
+  %tmp10848 = getelementptr inbounds float* %tmp10847, i64 1
+  %tmp10849 = getelementptr inbounds float* %tmp10848, i64 1
+  %tmp10850 = getelementptr inbounds float* %tmp10849, i64 1
+  %tmp10851 = getelementptr inbounds float* %tmp10850, i64 1
+  %tmp10852 = getelementptr inbounds float* %tmp10851, i64 1
+  %tmp10853 = getelementptr inbounds float* %tmp10852, i64 1
+  %tmp10854 = getelementptr inbounds float* %tmp10853, i64 1
+  %tmp10855 = getelementptr inbounds float* %tmp10854, i64 1
+  %tmp10856 = getelementptr inbounds float* %tmp10855, i64 1
+  %tmp10857 = getelementptr inbounds float* %tmp10856, i64 1
+  %tmp10858 = getelementptr inbounds float* %tmp10857, i64 1
+  %tmp10859 = getelementptr inbounds float* %tmp10858, i64 1
+  %tmp10860 = getelementptr inbounds float* %tmp10859, i64 1
+  %tmp10861 = getelementptr inbounds float* %tmp10860, i64 1
+  %tmp10862 = getelementptr inbounds float* %tmp10861, i64 1
+  %tmp10863 = getelementptr inbounds float* %tmp10862, i64 1
+  %tmp10864 = getelementptr inbounds float* %tmp10863, i64 1
+  %tmp10865 = getelementptr inbounds float* %tmp10864, i64 1
+  %tmp10866 = getelementptr inbounds float* %tmp10865, i64 1
+  %tmp10867 = getelementptr inbounds float* %tmp10866, i64 1
+  %tmp10868 = getelementptr inbounds float* %tmp10867, i64 1
+  %tmp10869 = getelementptr inbounds float* %tmp10868, i64 1
+  %tmp10870 = getelementptr inbounds float* %tmp10869, i64 1
+  %tmp10871 = getelementptr inbounds float* %tmp10870, i64 1
+  %tmp10872 = getelementptr inbounds float* %tmp10871, i64 1
+  %tmp10873 = getelementptr inbounds float* %tmp10872, i64 1
+  %tmp10874 = getelementptr inbounds float* %tmp10873, i64 1
+  %tmp10875 = getelementptr inbounds float* %tmp10874, i64 1
+  %tmp10876 = getelementptr inbounds float* %tmp10875, i64 1
+  %tmp10877 = getelementptr inbounds float* %tmp10876, i64 1
+  %tmp10878 = getelementptr inbounds float* %tmp10877, i64 1
+  %tmp10879 = getelementptr inbounds float* %tmp10878, i64 1
+  %tmp10880 = getelementptr inbounds float* %tmp10879, i64 1
+  %tmp10881 = getelementptr inbounds float* %tmp10880, i64 1
+  %tmp10882 = getelementptr inbounds float* %tmp10881, i64 1
+  %tmp10883 = getelementptr inbounds float* %tmp10882, i64 1
+  %tmp10884 = getelementptr inbounds float* %tmp10883, i64 1
+  %tmp10885 = getelementptr inbounds float* %tmp10884, i64 1
+  %tmp10886 = getelementptr inbounds float* %tmp10885, i64 1
+  %tmp10887 = getelementptr inbounds float* %tmp10886, i64 1
+  %tmp10888 = getelementptr inbounds float* %tmp10887, i64 1
+  %tmp10889 = getelementptr inbounds float* %tmp10888, i64 1
+  %tmp10890 = getelementptr inbounds float* %tmp10889, i64 1
+  %tmp10891 = getelementptr inbounds float* %tmp10890, i64 1
+  %tmp10892 = getelementptr inbounds float* %tmp10891, i64 1
+  %tmp10893 = getelementptr inbounds float* %tmp10892, i64 1
+  %tmp10894 = getelementptr inbounds float* %tmp10893, i64 1
+  %tmp10895 = getelementptr inbounds float* %tmp10894, i64 1
+  %tmp10896 = getelementptr inbounds float* %tmp10895, i64 1
+  %tmp10897 = getelementptr inbounds float* %tmp10896, i64 1
+  %tmp10898 = getelementptr inbounds float* %tmp10897, i64 1
+  %tmp10899 = getelementptr inbounds float* %tmp10898, i64 1
+  %tmp10900 = getelementptr inbounds float* %tmp10899, i64 1
+  %tmp10901 = getelementptr inbounds float* %tmp10900, i64 1
+  %tmp10902 = getelementptr inbounds float* %tmp10901, i64 1
+  %tmp10903 = getelementptr inbounds float* %tmp10902, i64 1
+  %tmp10904 = getelementptr inbounds float* %tmp10903, i64 1
+  %tmp10905 = getelementptr inbounds float* %tmp10904, i64 1
+  %tmp10906 = getelementptr inbounds float* %tmp10905, i64 1
+  %tmp10907 = getelementptr inbounds float* %tmp10906, i64 1
+  %tmp10908 = getelementptr inbounds float* %tmp10907, i64 1
+  %tmp10909 = getelementptr inbounds float* %tmp10908, i64 1
+  %tmp10910 = getelementptr inbounds float* %tmp10909, i64 1
+  %tmp10911 = getelementptr inbounds float* %tmp10910, i64 1
+  %tmp10912 = getelementptr inbounds float* %tmp10911, i64 1
+  %tmp10913 = getelementptr inbounds float* %tmp10912, i64 1
+  %tmp10914 = getelementptr inbounds float* %tmp10913, i64 1
+  %tmp10915 = getelementptr inbounds float* %tmp10914, i64 1
+  %tmp10916 = getelementptr inbounds float* %tmp10915, i64 1
+  %tmp10917 = getelementptr inbounds float* %tmp10916, i64 1
+  %tmp10918 = getelementptr inbounds float* %tmp10917, i64 1
+  %tmp10919 = getelementptr inbounds float* %tmp10918, i64 1
+  %tmp10920 = getelementptr inbounds float* %tmp10919, i64 1
+  %tmp10921 = getelementptr inbounds float* %tmp10920, i64 1
+  %tmp10922 = getelementptr inbounds float* %tmp10921, i64 1
+  %tmp10923 = getelementptr inbounds float* %tmp10922, i64 1
+  %tmp10924 = getelementptr inbounds float* %tmp10923, i64 1
+  %tmp10925 = getelementptr inbounds float* %tmp10924, i64 1
+  %tmp10926 = getelementptr inbounds float* %tmp10925, i64 1
+  %tmp10927 = getelementptr inbounds float* %tmp10926, i64 1
+  %tmp10928 = getelementptr inbounds float* %tmp10927, i64 1
+  %tmp10929 = getelementptr inbounds float* %tmp10928, i64 1
+  %tmp10930 = getelementptr inbounds float* %tmp10929, i64 1
+  %tmp10931 = getelementptr inbounds float* %tmp10930, i64 1
+  %tmp10932 = getelementptr inbounds float* %tmp10931, i64 1
+  %tmp10933 = getelementptr inbounds float* %tmp10932, i64 1
+  %tmp10934 = getelementptr inbounds float* %tmp10933, i64 1
+  %tmp10935 = getelementptr inbounds float* %tmp10934, i64 1
+  %tmp10936 = getelementptr inbounds float* %tmp10935, i64 1
+  %tmp10937 = getelementptr inbounds float* %tmp10936, i64 1
+  %tmp10938 = getelementptr inbounds float* %tmp10937, i64 1
+  %tmp10939 = getelementptr inbounds float* %tmp10938, i64 1
+  %tmp10940 = getelementptr inbounds float* %tmp10939, i64 1
+  %tmp10941 = getelementptr inbounds float* %tmp10940, i64 1
+  %tmp10942 = getelementptr inbounds float* %tmp10941, i64 1
+  %tmp10943 = getelementptr inbounds float* %tmp10942, i64 1
+  %tmp10944 = getelementptr inbounds float* %tmp10943, i64 1
+  %tmp10945 = getelementptr inbounds float* %tmp10944, i64 1
+  %tmp10946 = getelementptr inbounds float* %tmp10945, i64 1
+  %tmp10947 = getelementptr inbounds float* %tmp10946, i64 1
+  %tmp10948 = getelementptr inbounds float* %tmp10947, i64 1
+  %tmp10949 = getelementptr inbounds float* %tmp10948, i64 1
+  %tmp10950 = getelementptr inbounds float* %tmp10949, i64 1
+  %tmp10951 = getelementptr inbounds float* %tmp10950, i64 1
+  %tmp10952 = getelementptr inbounds float* %tmp10951, i64 1
+  %tmp10953 = getelementptr inbounds float* %tmp10952, i64 1
+  %tmp10954 = getelementptr inbounds float* %tmp10953, i64 1
+  %tmp10955 = getelementptr inbounds float* %tmp10954, i64 1
+  %tmp10956 = getelementptr inbounds float* %tmp10955, i64 1
+  %tmp10957 = getelementptr inbounds float* %tmp10956, i64 1
+  %tmp10958 = getelementptr inbounds float* %tmp10957, i64 1
+  %tmp10959 = getelementptr inbounds float* %tmp10958, i64 1
+  %tmp10960 = getelementptr inbounds float* %tmp10959, i64 1
+  %tmp10961 = getelementptr inbounds float* %tmp10960, i64 1
+  %tmp10962 = getelementptr inbounds float* %tmp10961, i64 1
+  %tmp10963 = getelementptr inbounds float* %tmp10962, i64 1
+  %tmp10964 = getelementptr inbounds float* %tmp10963, i64 1
+  %tmp10965 = getelementptr inbounds float* %tmp10964, i64 1
+  %tmp10966 = getelementptr inbounds float* %tmp10965, i64 1
+  %tmp10967 = getelementptr inbounds float* %tmp10966, i64 1
+  %tmp10968 = getelementptr inbounds float* %tmp10967, i64 1
+  %tmp10969 = getelementptr inbounds float* %tmp10968, i64 1
+  %tmp10970 = getelementptr inbounds float* %tmp10969, i64 1
+  %tmp10971 = getelementptr inbounds float* %tmp10970, i64 1
+  %tmp10972 = getelementptr inbounds float* %tmp10971, i64 1
+  %tmp10973 = getelementptr inbounds float* %tmp10972, i64 1
+  %tmp10974 = getelementptr inbounds float* %tmp10973, i64 1
+  %tmp10975 = getelementptr inbounds float* %tmp10974, i64 1
+  %tmp10976 = getelementptr inbounds float* %tmp10975, i64 1
+  %tmp10977 = getelementptr inbounds float* %tmp10976, i64 1
+  %tmp10978 = getelementptr inbounds float* %tmp10977, i64 1
+  %tmp10979 = getelementptr inbounds float* %tmp10978, i64 1
+  %tmp10980 = getelementptr inbounds float* %tmp10979, i64 1
+  %tmp10981 = getelementptr inbounds float* %tmp10980, i64 1
+  %tmp10982 = getelementptr inbounds float* %tmp10981, i64 1
+  %tmp10983 = getelementptr inbounds float* %tmp10982, i64 1
+  %tmp10984 = getelementptr inbounds float* %tmp10983, i64 1
+  %tmp10985 = getelementptr inbounds float* %tmp10984, i64 1
+  %tmp10986 = getelementptr inbounds float* %tmp10985, i64 1
+  %tmp10987 = getelementptr inbounds float* %tmp10986, i64 1
+  %tmp10988 = getelementptr inbounds float* %tmp10987, i64 1
+  %tmp10989 = getelementptr inbounds float* %tmp10988, i64 1
+  %tmp10990 = getelementptr inbounds float* %tmp10989, i64 1
+  %tmp10991 = getelementptr inbounds float* %tmp10990, i64 1
+  %tmp10992 = getelementptr inbounds float* %tmp10991, i64 1
+  %tmp10993 = getelementptr inbounds float* %tmp10992, i64 1
+  %tmp10994 = getelementptr inbounds float* %tmp10993, i64 1
+  %tmp10995 = getelementptr inbounds float* %tmp10994, i64 1
+  %tmp10996 = getelementptr inbounds float* %tmp10995, i64 1
+  %tmp10997 = getelementptr inbounds float* %tmp10996, i64 1
+  %tmp10998 = getelementptr inbounds float* %tmp10997, i64 1
+  %tmp10999 = getelementptr inbounds float* %tmp10998, i64 1
+  %tmp11000 = getelementptr inbounds float* %tmp10999, i64 1
+  %tmp11001 = getelementptr inbounds float* %tmp11000, i64 1
+  %tmp11002 = getelementptr inbounds float* %tmp11001, i64 1
+  %tmp11003 = getelementptr inbounds float* %tmp11002, i64 1
+  %tmp11004 = getelementptr inbounds float* %tmp11003, i64 1
+  %tmp11005 = getelementptr inbounds float* %tmp11004, i64 1
+  %tmp11006 = getelementptr inbounds float* %tmp11005, i64 1
+  %tmp11007 = getelementptr inbounds float* %tmp11006, i64 1
+  %tmp11008 = getelementptr inbounds float* %tmp11007, i64 1
+  %tmp11009 = getelementptr inbounds float* %tmp11008, i64 1
+  %tmp11010 = getelementptr inbounds float* %tmp11009, i64 1
+  %tmp11011 = getelementptr inbounds float* %tmp11010, i64 1
+  %tmp11012 = getelementptr inbounds float* %tmp11011, i64 1
+  %tmp11013 = getelementptr inbounds float* %tmp11012, i64 1
+  %tmp11014 = getelementptr inbounds float* %tmp11013, i64 1
+  %tmp11015 = getelementptr inbounds float* %tmp11014, i64 1
+  %tmp11016 = getelementptr inbounds float* %tmp11015, i64 1
+  %tmp11017 = getelementptr inbounds float* %tmp11016, i64 1
+  %tmp11018 = getelementptr inbounds float* %tmp11017, i64 1
+  %tmp11019 = getelementptr inbounds float* %tmp11018, i64 1
+  %tmp11020 = getelementptr inbounds float* %tmp11019, i64 1
+  %tmp11021 = getelementptr inbounds float* %tmp11020, i64 1
+  %tmp11022 = getelementptr inbounds float* %tmp11021, i64 1
+  %tmp11023 = getelementptr inbounds float* %tmp11022, i64 1
+  %tmp11024 = getelementptr inbounds float* %tmp11023, i64 1
+  %tmp11025 = getelementptr inbounds float* %tmp11024, i64 1
+  %tmp11026 = getelementptr inbounds float* %tmp11025, i64 1
+  %tmp11027 = getelementptr inbounds float* %tmp11026, i64 1
+  %tmp11028 = getelementptr inbounds float* %tmp11027, i64 1
+  %tmp11029 = getelementptr inbounds float* %tmp11028, i64 1
+  %tmp11030 = getelementptr inbounds float* %tmp11029, i64 1
+  %tmp11031 = getelementptr inbounds float* %tmp11030, i64 1
+  %tmp11032 = getelementptr inbounds float* %tmp11031, i64 1
+  %tmp11033 = getelementptr inbounds float* %tmp11032, i64 1
+  %tmp11034 = getelementptr inbounds float* %tmp11033, i64 1
+  %tmp11035 = getelementptr inbounds float* %tmp11034, i64 1
+  %tmp11036 = getelementptr inbounds float* %tmp11035, i64 1
+  %tmp11037 = getelementptr inbounds float* %tmp11036, i64 1
+  %tmp11038 = getelementptr inbounds float* %tmp11037, i64 1
+  %tmp11039 = getelementptr inbounds float* %tmp11038, i64 1
+  %tmp11040 = getelementptr inbounds float* %tmp11039, i64 1
+  %tmp11041 = getelementptr inbounds float* %tmp11040, i64 1
+  %tmp11042 = getelementptr inbounds float* %tmp11041, i64 1
+  %tmp11043 = getelementptr inbounds float* %tmp11042, i64 1
+  %tmp11044 = getelementptr inbounds float* %tmp11043, i64 1
+  %tmp11045 = getelementptr inbounds float* %tmp11044, i64 1
+  %tmp11046 = getelementptr inbounds float* %tmp11045, i64 1
+  %tmp11047 = getelementptr inbounds float* %tmp11046, i64 1
+  %tmp11048 = getelementptr inbounds float* %tmp11047, i64 1
+  %tmp11049 = getelementptr inbounds float* %tmp11048, i64 1
+  %tmp11050 = getelementptr inbounds float* %tmp11049, i64 1
+  %tmp11051 = getelementptr inbounds float* %tmp11050, i64 1
+  %tmp11052 = getelementptr inbounds float* %tmp11051, i64 1
+  %tmp11053 = getelementptr inbounds float* %tmp11052, i64 1
+  %tmp11054 = getelementptr inbounds float* %tmp11053, i64 1
+  %tmp11055 = getelementptr inbounds float* %tmp11054, i64 1
+  %tmp11056 = getelementptr inbounds float* %tmp11055, i64 1
+  %tmp11057 = getelementptr inbounds float* %tmp11056, i64 1
+  %tmp11058 = getelementptr inbounds float* %tmp11057, i64 1
+  %tmp11059 = getelementptr inbounds float* %tmp11058, i64 1
+  %tmp11060 = getelementptr inbounds float* %tmp11059, i64 1
+  %tmp11061 = getelementptr inbounds float* %tmp11060, i64 1
+  %tmp11062 = getelementptr inbounds float* %tmp11061, i64 1
+  %tmp11063 = getelementptr inbounds float* %tmp11062, i64 1
+  %tmp11064 = getelementptr inbounds float* %tmp11063, i64 1
+  %tmp11065 = getelementptr inbounds float* %tmp11064, i64 1
+  %tmp11066 = getelementptr inbounds float* %tmp11065, i64 1
+  %tmp11067 = getelementptr inbounds float* %tmp11066, i64 1
+  %tmp11068 = getelementptr inbounds float* %tmp11067, i64 1
+  %tmp11069 = getelementptr inbounds float* %tmp11068, i64 1
+  %tmp11070 = getelementptr inbounds float* %tmp11069, i64 1
+  %tmp11071 = getelementptr inbounds float* %tmp11070, i64 1
+  %tmp11072 = getelementptr inbounds float* %tmp11071, i64 1
+  %tmp11073 = getelementptr inbounds float* %tmp11072, i64 1
+  %tmp11074 = getelementptr inbounds float* %tmp11073, i64 1
+  %tmp11075 = getelementptr inbounds float* %tmp11074, i64 1
+  %tmp11076 = getelementptr inbounds float* %tmp11075, i64 1
+  %tmp11077 = getelementptr inbounds float* %tmp11076, i64 1
+  %tmp11078 = getelementptr inbounds float* %tmp11077, i64 1
+  %tmp11079 = getelementptr inbounds float* %tmp11078, i64 1
+  %tmp11080 = getelementptr inbounds float* %tmp11079, i64 1
+  %tmp11081 = getelementptr inbounds float* %tmp11080, i64 1
+  %tmp11082 = getelementptr inbounds float* %tmp11081, i64 1
+  %tmp11083 = getelementptr inbounds float* %tmp11082, i64 1
+  %tmp11084 = getelementptr inbounds float* %tmp11083, i64 1
+  %tmp11085 = getelementptr inbounds float* %tmp11084, i64 1
+  %tmp11086 = getelementptr inbounds float* %tmp11085, i64 1
+  %tmp11087 = getelementptr inbounds float* %tmp11086, i64 1
+  %tmp11088 = getelementptr inbounds float* %tmp11087, i64 1
+  %tmp11089 = getelementptr inbounds float* %tmp11088, i64 1
+  %tmp11090 = getelementptr inbounds float* %tmp11089, i64 1
+  %tmp11091 = getelementptr inbounds float* %tmp11090, i64 1
+  %tmp11092 = getelementptr inbounds float* %tmp11091, i64 1
+  %tmp11093 = getelementptr inbounds float* %tmp11092, i64 1
+  %tmp11094 = getelementptr inbounds float* %tmp11093, i64 1
+  %tmp11095 = getelementptr inbounds float* %tmp11094, i64 1
+  %tmp11096 = getelementptr inbounds float* %tmp11095, i64 1
+  %tmp11097 = getelementptr inbounds float* %tmp11096, i64 1
+  %tmp11098 = getelementptr inbounds float* %tmp11097, i64 1
+  %tmp11099 = getelementptr inbounds float* %tmp11098, i64 1
+  %tmp11100 = getelementptr inbounds float* %tmp11099, i64 1
+  %tmp11101 = getelementptr inbounds float* %tmp11100, i64 1
+  %tmp11102 = getelementptr inbounds float* %tmp11101, i64 1
+  %tmp11103 = getelementptr inbounds float* %tmp11102, i64 1
+  %tmp11104 = getelementptr inbounds float* %tmp11103, i64 1
+  %tmp11105 = getelementptr inbounds float* %tmp11104, i64 1
+  %tmp11106 = getelementptr inbounds float* %tmp11105, i64 1
+  %tmp11107 = getelementptr inbounds float* %tmp11106, i64 1
+  %tmp11108 = getelementptr inbounds float* %tmp11107, i64 1
+  %tmp11109 = getelementptr inbounds float* %tmp11108, i64 1
+  %tmp11110 = getelementptr inbounds float* %tmp11109, i64 1
+  %tmp11111 = getelementptr inbounds float* %tmp11110, i64 1
+  %tmp11112 = getelementptr inbounds float* %tmp11111, i64 1
+  %tmp11113 = getelementptr inbounds float* %tmp11112, i64 1
+  %tmp11114 = getelementptr inbounds float* %tmp11113, i64 1
+  %tmp11115 = getelementptr inbounds float* %tmp11114, i64 1
+  %tmp11116 = getelementptr inbounds float* %tmp11115, i64 1
+  %tmp11117 = getelementptr inbounds float* %tmp11116, i64 1
+  %tmp11118 = getelementptr inbounds float* %tmp11117, i64 1
+  %tmp11119 = getelementptr inbounds float* %tmp11118, i64 1
+  %tmp11120 = getelementptr inbounds float* %tmp11119, i64 1
+  %tmp11121 = getelementptr inbounds float* %tmp11120, i64 1
+  %tmp11122 = getelementptr inbounds float* %tmp11121, i64 1
+  %tmp11123 = getelementptr inbounds float* %tmp11122, i64 1
+  %tmp11124 = getelementptr inbounds float* %tmp11123, i64 1
+  %tmp11125 = getelementptr inbounds float* %tmp11124, i64 1
+  %tmp11126 = getelementptr inbounds float* %tmp11125, i64 1
+  %tmp11127 = getelementptr inbounds float* %tmp11126, i64 1
+  %tmp11128 = getelementptr inbounds float* %tmp11127, i64 1
+  %tmp11129 = getelementptr inbounds float* %tmp11128, i64 1
+  %tmp11130 = getelementptr inbounds float* %tmp11129, i64 1
+  %tmp11131 = getelementptr inbounds float* %tmp11130, i64 1
+  %tmp11132 = getelementptr inbounds float* %tmp11131, i64 1
+  %tmp11133 = getelementptr inbounds float* %tmp11132, i64 1
+  %tmp11134 = getelementptr inbounds float* %tmp11133, i64 1
+  %tmp11135 = getelementptr inbounds float* %tmp11134, i64 1
+  %tmp11136 = getelementptr inbounds float* %tmp11135, i64 1
+  %tmp11137 = getelementptr inbounds float* %tmp11136, i64 1
+  %tmp11138 = getelementptr inbounds float* %tmp11137, i64 1
+  %tmp11139 = getelementptr inbounds float* %tmp11138, i64 1
+  %tmp11140 = getelementptr inbounds float* %tmp11139, i64 1
+  %tmp11141 = getelementptr inbounds float* %tmp11140, i64 1
+  %tmp11142 = getelementptr inbounds float* %tmp11141, i64 1
+  %tmp11143 = getelementptr inbounds float* %tmp11142, i64 1
+  %tmp11144 = getelementptr inbounds float* %tmp11143, i64 1
+  %tmp11145 = getelementptr inbounds float* %tmp11144, i64 1
+  %tmp11146 = getelementptr inbounds float* %tmp11145, i64 1
+  %tmp11147 = getelementptr inbounds float* %tmp11146, i64 1
+  %tmp11148 = getelementptr inbounds float* %tmp11147, i64 1
+  %tmp11149 = getelementptr inbounds float* %tmp11148, i64 1
+  %tmp11150 = getelementptr inbounds float* %tmp11149, i64 1
+  %tmp11151 = getelementptr inbounds float* %tmp11150, i64 1
+  %tmp11152 = getelementptr inbounds float* %tmp11151, i64 1
+  %tmp11153 = getelementptr inbounds float* %tmp11152, i64 1
+  %tmp11154 = getelementptr inbounds float* %tmp11153, i64 1
+  %tmp11155 = getelementptr inbounds float* %tmp11154, i64 1
+  %tmp11156 = getelementptr inbounds float* %tmp11155, i64 1
+  %tmp11157 = getelementptr inbounds float* %tmp11156, i64 1
+  %tmp11158 = getelementptr inbounds float* %tmp11157, i64 1
+  %tmp11159 = getelementptr inbounds float* %tmp11158, i64 1
+  %tmp11160 = getelementptr inbounds float* %tmp11159, i64 1
+  %tmp11161 = getelementptr inbounds float* %tmp11160, i64 1
+  %tmp11162 = getelementptr inbounds float* %tmp11161, i64 1
+  %tmp11163 = getelementptr inbounds float* %tmp11162, i64 1
+  %tmp11164 = getelementptr inbounds float* %tmp11163, i64 1
+  %tmp11165 = getelementptr inbounds float* %tmp11164, i64 1
+  %tmp11166 = getelementptr inbounds float* %tmp11165, i64 1
+  %tmp11167 = getelementptr inbounds float* %tmp11166, i64 1
+  %tmp11168 = getelementptr inbounds float* %tmp11167, i64 1
+  %tmp11169 = getelementptr inbounds float* %tmp11168, i64 1
+  %tmp11170 = getelementptr inbounds float* %tmp11169, i64 1
+  %tmp11171 = getelementptr inbounds float* %tmp11170, i64 1
+  %tmp11172 = getelementptr inbounds float* %tmp11171, i64 1
+  %tmp11173 = getelementptr inbounds float* %tmp11172, i64 1
+  %tmp11174 = getelementptr inbounds float* %tmp11173, i64 1
+  %tmp11175 = getelementptr inbounds float* %tmp11174, i64 1
+  %tmp11176 = getelementptr inbounds float* %tmp11175, i64 1
+  %tmp11177 = getelementptr inbounds float* %tmp11176, i64 1
+  %tmp11178 = getelementptr inbounds float* %tmp11177, i64 1
+  %tmp11179 = getelementptr inbounds float* %tmp11178, i64 1
+  %tmp11180 = getelementptr inbounds float* %tmp11179, i64 1
+  %tmp11181 = getelementptr inbounds float* %tmp11180, i64 1
+  %tmp11182 = getelementptr inbounds float* %tmp11181, i64 1
+  %tmp11183 = getelementptr inbounds float* %tmp11182, i64 1
+  %tmp11184 = getelementptr inbounds float* %tmp11183, i64 1
+  %tmp11185 = getelementptr inbounds float* %tmp11184, i64 1
+  %tmp11186 = getelementptr inbounds float* %tmp11185, i64 1
+  %tmp11187 = getelementptr inbounds float* %tmp11186, i64 1
+  %tmp11188 = getelementptr inbounds float* %tmp11187, i64 1
+  %tmp11189 = getelementptr inbounds float* %tmp11188, i64 1
+  %tmp11190 = getelementptr inbounds float* %tmp11189, i64 1
+  %tmp11191 = getelementptr inbounds float* %tmp11190, i64 1
+  %tmp11192 = getelementptr inbounds float* %tmp11191, i64 1
+  %tmp11193 = getelementptr inbounds float* %tmp11192, i64 1
+  %tmp11194 = getelementptr inbounds float* %tmp11193, i64 1
+  %tmp11195 = getelementptr inbounds float* %tmp11194, i64 1
+  %tmp11196 = getelementptr inbounds float* %tmp11195, i64 1
+  %tmp11197 = getelementptr inbounds float* %tmp11196, i64 1
+  %tmp11198 = getelementptr inbounds float* %tmp11197, i64 1
+  %tmp11199 = getelementptr inbounds float* %tmp11198, i64 1
+  %tmp11200 = getelementptr inbounds float* %tmp11199, i64 1
+  %tmp11201 = getelementptr inbounds float* %tmp11200, i64 1
+  %tmp11202 = getelementptr inbounds float* %tmp11201, i64 1
+  %tmp11203 = getelementptr inbounds float* %tmp11202, i64 1
+  %tmp11204 = getelementptr inbounds float* %tmp11203, i64 1
+  %tmp11205 = getelementptr inbounds float* %tmp11204, i64 1
+  %tmp11206 = getelementptr inbounds float* %tmp11205, i64 1
+  %tmp11207 = getelementptr inbounds float* %tmp11206, i64 1
+  %tmp11208 = getelementptr inbounds float* %tmp11207, i64 1
+  %tmp11209 = getelementptr inbounds float* %tmp11208, i64 1
+  %tmp11210 = getelementptr inbounds float* %tmp11209, i64 1
+  %tmp11211 = getelementptr inbounds float* %tmp11210, i64 1
+  %tmp11212 = getelementptr inbounds float* %tmp11211, i64 1
+  %tmp11213 = getelementptr inbounds float* %tmp11212, i64 1
+  %tmp11214 = getelementptr inbounds float* %tmp11213, i64 1
+  %tmp11215 = getelementptr inbounds float* %tmp11214, i64 1
+  %tmp11216 = getelementptr inbounds float* %tmp11215, i64 1
+  %tmp11217 = getelementptr inbounds float* %tmp11216, i64 1
+  %tmp11218 = getelementptr inbounds float* %tmp11217, i64 1
+  %tmp11219 = getelementptr inbounds float* %tmp11218, i64 1
+  %tmp11220 = getelementptr inbounds float* %tmp11219, i64 1
+  %tmp11221 = getelementptr inbounds float* %tmp11220, i64 1
+  %tmp11222 = getelementptr inbounds float* %tmp11221, i64 1
+  %tmp11223 = getelementptr inbounds float* %tmp11222, i64 1
+  %tmp11224 = getelementptr inbounds float* %tmp11223, i64 1
+  %tmp11225 = getelementptr inbounds float* %tmp11224, i64 1
+  %tmp11226 = getelementptr inbounds float* %tmp11225, i64 1
+  %tmp11227 = getelementptr inbounds float* %tmp11226, i64 1
+  %tmp11228 = getelementptr inbounds float* %tmp11227, i64 1
+  %tmp11229 = getelementptr inbounds float* %tmp11228, i64 1
+  %tmp11230 = getelementptr inbounds float* %tmp11229, i64 1
+  %tmp11231 = getelementptr inbounds float* %tmp11230, i64 1
+  %tmp11232 = getelementptr inbounds float* %tmp11231, i64 1
+  %tmp11233 = getelementptr inbounds float* %tmp11232, i64 1
+  %tmp11234 = getelementptr inbounds float* %tmp11233, i64 1
+  %tmp11235 = getelementptr inbounds float* %tmp11234, i64 1
+  %tmp11236 = getelementptr inbounds float* %tmp11235, i64 1
+  %tmp11237 = getelementptr inbounds float* %tmp11236, i64 1
+  %tmp11238 = getelementptr inbounds float* %tmp11237, i64 1
+  %tmp11239 = getelementptr inbounds float* %tmp11238, i64 1
+  %tmp11240 = getelementptr inbounds float* %tmp11239, i64 1
+  %tmp11241 = getelementptr inbounds float* %tmp11240, i64 1
+  %tmp11242 = getelementptr inbounds float* %tmp11241, i64 1
+  %tmp11243 = getelementptr inbounds float* %tmp11242, i64 1
+  %tmp11244 = getelementptr inbounds float* %tmp11243, i64 1
+  %tmp11245 = getelementptr inbounds float* %tmp11244, i64 1
+  %tmp11246 = getelementptr inbounds float* %tmp11245, i64 1
+  %tmp11247 = getelementptr inbounds float* %tmp11246, i64 1
+  %tmp11248 = getelementptr inbounds float* %tmp11247, i64 1
+  %tmp11249 = getelementptr inbounds float* %tmp11248, i64 1
+  %tmp11250 = getelementptr inbounds float* %tmp11249, i64 1
+  %tmp11251 = getelementptr inbounds float* %tmp11250, i64 1
+  %tmp11252 = getelementptr inbounds float* %tmp11251, i64 1
+  %tmp11253 = getelementptr inbounds float* %tmp11252, i64 1
+  %tmp11254 = getelementptr inbounds float* %tmp11253, i64 1
+  %tmp11255 = getelementptr inbounds float* %tmp11254, i64 1
+  %tmp11256 = getelementptr inbounds float* %tmp11255, i64 1
+  %tmp11257 = getelementptr inbounds float* %tmp11256, i64 1
+  %tmp11258 = getelementptr inbounds float* %tmp11257, i64 1
+  %tmp11259 = getelementptr inbounds float* %tmp11258, i64 1
+  %tmp11260 = getelementptr inbounds float* %tmp11259, i64 1
+  %tmp11261 = getelementptr inbounds float* %tmp11260, i64 1
+  %tmp11262 = getelementptr inbounds float* %tmp11261, i64 1
+  %tmp11263 = getelementptr inbounds float* %tmp11262, i64 1
+  %tmp11264 = getelementptr inbounds float* %tmp11263, i64 1
+  %tmp11265 = getelementptr inbounds float* %tmp11264, i64 1
+  %tmp11266 = getelementptr inbounds float* %tmp11265, i64 1
+  %tmp11267 = getelementptr inbounds float* %tmp11266, i64 1
+  %tmp11268 = getelementptr inbounds float* %tmp11267, i64 1
+  %tmp11269 = getelementptr inbounds float* %tmp11268, i64 1
+  %tmp11270 = getelementptr inbounds float* %tmp11269, i64 1
+  %tmp11271 = getelementptr inbounds float* %tmp11270, i64 1
+  %tmp11272 = getelementptr inbounds float* %tmp11271, i64 1
+  %tmp11273 = getelementptr inbounds float* %tmp11272, i64 1
+  %tmp11274 = getelementptr inbounds float* %tmp11273, i64 1
+  %tmp11275 = getelementptr inbounds float* %tmp11274, i64 1
+  %tmp11276 = getelementptr inbounds float* %tmp11275, i64 1
+  %tmp11277 = getelementptr inbounds float* %tmp11276, i64 1
+  %tmp11278 = getelementptr inbounds float* %tmp11277, i64 1
+  %tmp11279 = getelementptr inbounds float* %tmp11278, i64 1
+  %tmp11280 = getelementptr inbounds float* %tmp11279, i64 1
+  %tmp11281 = getelementptr inbounds float* %tmp11280, i64 1
+  %tmp11282 = getelementptr inbounds float* %tmp11281, i64 1
+  %tmp11283 = getelementptr inbounds float* %tmp11282, i64 1
+  %tmp11284 = getelementptr inbounds float* %tmp11283, i64 1
+  %tmp11285 = getelementptr inbounds float* %tmp11284, i64 1
+  %tmp11286 = getelementptr inbounds float* %tmp11285, i64 1
+  %tmp11287 = getelementptr inbounds float* %tmp11286, i64 1
+  %tmp11288 = getelementptr inbounds float* %tmp11287, i64 1
+  %tmp11289 = getelementptr inbounds float* %tmp11288, i64 1
+  %tmp11290 = getelementptr inbounds float* %tmp11289, i64 1
+  %tmp11291 = getelementptr inbounds float* %tmp11290, i64 1
+  %tmp11292 = getelementptr inbounds float* %tmp11291, i64 1
+  %tmp11293 = getelementptr inbounds float* %tmp11292, i64 1
+  %tmp11294 = getelementptr inbounds float* %tmp11293, i64 1
+  %tmp11295 = getelementptr inbounds float* %tmp11294, i64 1
+  %tmp11296 = getelementptr inbounds float* %tmp11295, i64 1
+  %tmp11297 = getelementptr inbounds float* %tmp11296, i64 1
+  %tmp11298 = getelementptr inbounds float* %tmp11297, i64 1
+  %tmp11299 = getelementptr inbounds float* %tmp11298, i64 1
+  %tmp11300 = getelementptr inbounds float* %tmp11299, i64 1
+  %tmp11301 = getelementptr inbounds float* %tmp11300, i64 1
+  %tmp11302 = getelementptr inbounds float* %tmp11301, i64 1
+  %tmp11303 = getelementptr inbounds float* %tmp11302, i64 1
+  %tmp11304 = getelementptr inbounds float* %tmp11303, i64 1
+  %tmp11305 = getelementptr inbounds float* %tmp11304, i64 1
+  %tmp11306 = getelementptr inbounds float* %tmp11305, i64 1
+  %tmp11307 = getelementptr inbounds float* %tmp11306, i64 1
+  %tmp11308 = getelementptr inbounds float* %tmp11307, i64 1
+  %tmp11309 = getelementptr inbounds float* %tmp11308, i64 1
+  %tmp11310 = getelementptr inbounds float* %tmp11309, i64 1
+  %tmp11311 = getelementptr inbounds float* %tmp11310, i64 1
+  %tmp11312 = getelementptr inbounds float* %tmp11311, i64 1
+  %tmp11313 = getelementptr inbounds float* %tmp11312, i64 1
+  %tmp11314 = getelementptr inbounds float* %tmp11313, i64 1
+  %tmp11315 = getelementptr inbounds float* %tmp11314, i64 1
+  %tmp11316 = getelementptr inbounds float* %tmp11315, i64 1
+  %tmp11317 = getelementptr inbounds float* %tmp11316, i64 1
+  %tmp11318 = getelementptr inbounds float* %tmp11317, i64 1
+  %tmp11319 = getelementptr inbounds float* %tmp11318, i64 1
+  %tmp11320 = getelementptr inbounds float* %tmp11319, i64 1
+  %tmp11321 = getelementptr inbounds float* %tmp11320, i64 1
+  %tmp11322 = getelementptr inbounds float* %tmp11321, i64 1
+  %tmp11323 = getelementptr inbounds float* %tmp11322, i64 1
+  %tmp11324 = getelementptr inbounds float* %tmp11323, i64 1
+  %tmp11325 = getelementptr inbounds float* %tmp11324, i64 1
+  %tmp11326 = getelementptr inbounds float* %tmp11325, i64 1
+  %tmp11327 = getelementptr inbounds float* %tmp11326, i64 1
+  %tmp11328 = getelementptr inbounds float* %tmp11327, i64 1
+  %tmp11329 = getelementptr inbounds float* %tmp11328, i64 1
+  %tmp11330 = getelementptr inbounds float* %tmp11329, i64 1
+  %tmp11331 = getelementptr inbounds float* %tmp11330, i64 1
+  %tmp11332 = getelementptr inbounds float* %tmp11331, i64 1
+  %tmp11333 = getelementptr inbounds float* %tmp11332, i64 1
+  %tmp11334 = getelementptr inbounds float* %tmp11333, i64 1
+  %tmp11335 = getelementptr inbounds float* %tmp11334, i64 1
+  %tmp11336 = getelementptr inbounds float* %tmp11335, i64 1
+  %tmp11337 = getelementptr inbounds float* %tmp11336, i64 1
+  %tmp11338 = getelementptr inbounds float* %tmp11337, i64 1
+  %tmp11339 = getelementptr inbounds float* %tmp11338, i64 1
+  %tmp11340 = getelementptr inbounds float* %tmp11339, i64 1
+  %tmp11341 = getelementptr inbounds float* %tmp11340, i64 1
+  %tmp11342 = getelementptr inbounds float* %tmp11341, i64 1
+  %tmp11343 = getelementptr inbounds float* %tmp11342, i64 1
+  %tmp11344 = getelementptr inbounds float* %tmp11343, i64 1
+  %tmp11345 = getelementptr inbounds float* %tmp11344, i64 1
+  %tmp11346 = getelementptr inbounds float* %tmp11345, i64 1
+  %tmp11347 = getelementptr inbounds float* %tmp11346, i64 1
+  %tmp11348 = getelementptr inbounds float* %tmp11347, i64 1
+  %tmp11349 = getelementptr inbounds float* %tmp11348, i64 1
+  %tmp11350 = getelementptr inbounds float* %tmp11349, i64 1
+  %tmp11351 = getelementptr inbounds float* %tmp11350, i64 1
+  %tmp11352 = getelementptr inbounds float* %tmp11351, i64 1
+  %tmp11353 = getelementptr inbounds float* %tmp11352, i64 1
+  %tmp11354 = getelementptr inbounds float* %tmp11353, i64 1
+  %tmp11355 = getelementptr inbounds float* %tmp11354, i64 1
+  %tmp11356 = getelementptr inbounds float* %tmp11355, i64 1
+  %tmp11357 = getelementptr inbounds float* %tmp11356, i64 1
+  %tmp11358 = getelementptr inbounds float* %tmp11357, i64 1
+  %tmp11359 = getelementptr inbounds float* %tmp11358, i64 1
+  %tmp11360 = getelementptr inbounds float* %tmp11359, i64 1
+  %tmp11361 = getelementptr inbounds float* %tmp11360, i64 1
+  %tmp11362 = getelementptr inbounds float* %tmp11361, i64 1
+  %tmp11363 = getelementptr inbounds float* %tmp11362, i64 1
+  %tmp11364 = getelementptr inbounds float* %tmp11363, i64 1
+  %tmp11365 = getelementptr inbounds float* %tmp11364, i64 1
+  %tmp11366 = getelementptr inbounds float* %tmp11365, i64 1
+  %tmp11367 = getelementptr inbounds float* %tmp11366, i64 1
+  %tmp11368 = getelementptr inbounds float* %tmp11367, i64 1
+  %tmp11369 = getelementptr inbounds float* %tmp11368, i64 1
+  %tmp11370 = getelementptr inbounds float* %tmp11369, i64 1
+  %tmp11371 = getelementptr inbounds float* %tmp11370, i64 1
+  %tmp11372 = getelementptr inbounds float* %tmp11371, i64 1
+  %tmp11373 = getelementptr inbounds float* %tmp11372, i64 1
+  %tmp11374 = getelementptr inbounds float* %tmp11373, i64 1
+  %tmp11375 = getelementptr inbounds float* %tmp11374, i64 1
+  %tmp11376 = getelementptr inbounds float* %tmp11375, i64 1
+  %tmp11377 = getelementptr inbounds float* %tmp11376, i64 1
+  %tmp11378 = getelementptr inbounds float* %tmp11377, i64 1
+  %tmp11379 = getelementptr inbounds float* %tmp11378, i64 1
+  %tmp11380 = getelementptr inbounds float* %tmp11379, i64 1
+  %tmp11381 = getelementptr inbounds float* %tmp11380, i64 1
+  %tmp11382 = getelementptr inbounds float* %tmp11381, i64 1
+  %tmp11383 = getelementptr inbounds float* %tmp11382, i64 1
+  %tmp11384 = getelementptr inbounds float* %tmp11383, i64 1
+  %tmp11385 = getelementptr inbounds float* %tmp11384, i64 1
+  %tmp11386 = getelementptr inbounds float* %tmp11385, i64 1
+  %tmp11387 = getelementptr inbounds float* %tmp11386, i64 1
+  %tmp11388 = getelementptr inbounds float* %tmp11387, i64 1
+  %tmp11389 = getelementptr inbounds float* %tmp11388, i64 1
+  %tmp11390 = getelementptr inbounds float* %tmp11389, i64 1
+  %tmp11391 = getelementptr inbounds float* %tmp11390, i64 1
+  %tmp11392 = getelementptr inbounds float* %tmp11391, i64 1
+  %tmp11393 = getelementptr inbounds float* %tmp11392, i64 1
+  %tmp11394 = getelementptr inbounds float* %tmp11393, i64 1
+  %tmp11395 = getelementptr inbounds float* %tmp11394, i64 1
+  %tmp11396 = getelementptr inbounds float* %tmp11395, i64 1
+  %tmp11397 = getelementptr inbounds float* %tmp11396, i64 1
+  %tmp11398 = getelementptr inbounds float* %tmp11397, i64 1
+  %tmp11399 = getelementptr inbounds float* %tmp11398, i64 1
+  %tmp11400 = getelementptr inbounds float* %tmp11399, i64 1
+  %tmp11401 = getelementptr inbounds float* %tmp11400, i64 1
+  %tmp11402 = getelementptr inbounds float* %tmp11401, i64 1
+  %tmp11403 = getelementptr inbounds float* %tmp11402, i64 1
+  %tmp11404 = getelementptr inbounds float* %tmp11403, i64 1
+  %tmp11405 = getelementptr inbounds float* %tmp11404, i64 1
+  %tmp11406 = getelementptr inbounds float* %tmp11405, i64 1
+  %tmp11407 = getelementptr inbounds float* %tmp11406, i64 1
+  %tmp11408 = getelementptr inbounds float* %tmp11407, i64 1
+  %tmp11409 = getelementptr inbounds float* %tmp11408, i64 1
+  %tmp11410 = getelementptr inbounds float* %tmp11409, i64 1
+  %tmp11411 = getelementptr inbounds float* %tmp11410, i64 1
+  %tmp11412 = getelementptr inbounds float* %tmp11411, i64 1
+  %tmp11413 = getelementptr inbounds float* %tmp11412, i64 1
+  %tmp11414 = getelementptr inbounds float* %tmp11413, i64 1
+  %tmp11415 = getelementptr inbounds float* %tmp11414, i64 1
+  %tmp11416 = getelementptr inbounds float* %tmp11415, i64 1
+  %tmp11417 = getelementptr inbounds float* %tmp11416, i64 1
+  %tmp11418 = getelementptr inbounds float* %tmp11417, i64 1
+  %tmp11419 = getelementptr inbounds float* %tmp11418, i64 1
+  %tmp11420 = getelementptr inbounds float* %tmp11419, i64 1
+  %tmp11421 = getelementptr inbounds float* %tmp11420, i64 1
+  %tmp11422 = getelementptr inbounds float* %tmp11421, i64 1
+  %tmp11423 = getelementptr inbounds float* %tmp11422, i64 1
+  %tmp11424 = getelementptr inbounds float* %tmp11423, i64 1
+  %tmp11425 = getelementptr inbounds float* %tmp11424, i64 1
+  %tmp11426 = getelementptr inbounds float* %tmp11425, i64 1
+  %tmp11427 = getelementptr inbounds float* %tmp11426, i64 1
+  %tmp11428 = getelementptr inbounds float* %tmp11427, i64 1
+  %tmp11429 = getelementptr inbounds float* %tmp11428, i64 1
+  %tmp11430 = getelementptr inbounds float* %tmp11429, i64 1
+  %tmp11431 = getelementptr inbounds float* %tmp11430, i64 1
+  %tmp11432 = getelementptr inbounds float* %tmp11431, i64 1
+  %tmp11433 = getelementptr inbounds float* %tmp11432, i64 1
+  %tmp11434 = getelementptr inbounds float* %tmp11433, i64 1
+  %tmp11435 = getelementptr inbounds float* %tmp11434, i64 1
+  %tmp11436 = getelementptr inbounds float* %tmp11435, i64 1
+  %tmp11437 = getelementptr inbounds float* %tmp11436, i64 1
+  %tmp11438 = getelementptr inbounds float* %tmp11437, i64 1
+  %tmp11439 = getelementptr inbounds float* %tmp11438, i64 1
+  %tmp11440 = getelementptr inbounds float* %tmp11439, i64 1
+  %tmp11441 = getelementptr inbounds float* %tmp11440, i64 1
+  %tmp11442 = getelementptr inbounds float* %tmp11441, i64 1
+  %tmp11443 = getelementptr inbounds float* %tmp11442, i64 1
+  %tmp11444 = getelementptr inbounds float* %tmp11443, i64 1
+  %tmp11445 = getelementptr inbounds float* %tmp11444, i64 1
+  %tmp11446 = getelementptr inbounds float* %tmp11445, i64 1
+  %tmp11447 = getelementptr inbounds float* %tmp11446, i64 1
+  %tmp11448 = getelementptr inbounds float* %tmp11447, i64 1
+  %tmp11449 = getelementptr inbounds float* %tmp11448, i64 1
+  %tmp11450 = getelementptr inbounds float* %tmp11449, i64 1
+  %tmp11451 = getelementptr inbounds float* %tmp11450, i64 1
+  %tmp11452 = getelementptr inbounds float* %tmp11451, i64 1
+  %tmp11453 = getelementptr inbounds float* %tmp11452, i64 1
+  %tmp11454 = getelementptr inbounds float* %tmp11453, i64 1
+  %tmp11455 = getelementptr inbounds float* %tmp11454, i64 1
+  %tmp11456 = getelementptr inbounds float* %tmp11455, i64 1
+  %tmp11457 = getelementptr inbounds float* %tmp11456, i64 1
+  %tmp11458 = getelementptr inbounds float* %tmp11457, i64 1
+  %tmp11459 = getelementptr inbounds float* %tmp11458, i64 1
+  %tmp11460 = getelementptr inbounds float* %tmp11459, i64 1
+  %tmp11461 = getelementptr inbounds float* %tmp11460, i64 1
+  %tmp11462 = getelementptr inbounds float* %tmp11461, i64 1
+  %tmp11463 = getelementptr inbounds float* %tmp11462, i64 1
+  %tmp11464 = getelementptr inbounds float* %tmp11463, i64 1
+  %tmp11465 = getelementptr inbounds float* %tmp11464, i64 1
+  %tmp11466 = getelementptr inbounds float* %tmp11465, i64 1
+  %tmp11467 = getelementptr inbounds float* %tmp11466, i64 1
+  %tmp11468 = getelementptr inbounds float* %tmp11467, i64 1
+  %tmp11469 = getelementptr inbounds float* %tmp11468, i64 1
+  %tmp11470 = getelementptr inbounds float* %tmp11469, i64 1
+  %tmp11471 = getelementptr inbounds float* %tmp11470, i64 1
+  %tmp11472 = getelementptr inbounds float* %tmp11471, i64 1
+  %tmp11473 = getelementptr inbounds float* %tmp11472, i64 1
+  %tmp11474 = getelementptr inbounds float* %tmp11473, i64 1
+  %tmp11475 = getelementptr inbounds float* %tmp11474, i64 1
+  %tmp11476 = getelementptr inbounds float* %tmp11475, i64 1
+  %tmp11477 = getelementptr inbounds float* %tmp11476, i64 1
+  %tmp11478 = getelementptr inbounds float* %tmp11477, i64 1
+  %tmp11479 = getelementptr inbounds float* %tmp11478, i64 1
+  %tmp11480 = getelementptr inbounds float* %tmp11479, i64 1
+  %tmp11481 = getelementptr inbounds float* %tmp11480, i64 1
+  %tmp11482 = getelementptr inbounds float* %tmp11481, i64 1
+  %tmp11483 = getelementptr inbounds float* %tmp11482, i64 1
+  %tmp11484 = getelementptr inbounds float* %tmp11483, i64 1
+  %tmp11485 = getelementptr inbounds float* %tmp11484, i64 1
+  %tmp11486 = getelementptr inbounds float* %tmp11485, i64 1
+  %tmp11487 = getelementptr inbounds float* %tmp11486, i64 1
+  %tmp11488 = getelementptr inbounds float* %tmp11487, i64 1
+  %tmp11489 = getelementptr inbounds float* %tmp11488, i64 1
+  %tmp11490 = getelementptr inbounds float* %tmp11489, i64 1
+  %tmp11491 = getelementptr inbounds float* %tmp11490, i64 1
+  %tmp11492 = getelementptr inbounds float* %tmp11491, i64 1
+  %tmp11493 = getelementptr inbounds float* %tmp11492, i64 1
+  %tmp11494 = getelementptr inbounds float* %tmp11493, i64 1
+  %tmp11495 = getelementptr inbounds float* %tmp11494, i64 1
+  %tmp11496 = getelementptr inbounds float* %tmp11495, i64 1
+  %tmp11497 = getelementptr inbounds float* %tmp11496, i64 1
+  %tmp11498 = getelementptr inbounds float* %tmp11497, i64 1
+  %tmp11499 = getelementptr inbounds float* %tmp11498, i64 1
+  %tmp11500 = getelementptr inbounds float* %tmp11499, i64 1
+  %tmp11501 = getelementptr inbounds float* %tmp11500, i64 1
+  %tmp11502 = getelementptr inbounds float* %tmp11501, i64 1
+  %tmp11503 = getelementptr inbounds float* %tmp11502, i64 1
+  %tmp11504 = getelementptr inbounds float* %tmp11503, i64 1
+  %tmp11505 = getelementptr inbounds float* %tmp11504, i64 1
+  %tmp11506 = getelementptr inbounds float* %tmp11505, i64 1
+  %tmp11507 = getelementptr inbounds float* %tmp11506, i64 1
+  %tmp11508 = getelementptr inbounds float* %tmp11507, i64 1
+  %tmp11509 = getelementptr inbounds float* %tmp11508, i64 1
+  %tmp11510 = getelementptr inbounds float* %tmp11509, i64 1
+  %tmp11511 = getelementptr inbounds float* %tmp11510, i64 1
+  %tmp11512 = getelementptr inbounds float* %tmp11511, i64 1
+  %tmp11513 = getelementptr inbounds float* %tmp11512, i64 1
+  %tmp11514 = getelementptr inbounds float* %tmp11513, i64 1
+  %tmp11515 = getelementptr inbounds float* %tmp11514, i64 1
+  %tmp11516 = getelementptr inbounds float* %tmp11515, i64 1
+  %tmp11517 = getelementptr inbounds float* %tmp11516, i64 1
+  %tmp11518 = getelementptr inbounds float* %tmp11517, i64 1
+  %tmp11519 = getelementptr inbounds float* %tmp11518, i64 1
+  %tmp11520 = getelementptr inbounds float* %tmp11519, i64 1
+  %tmp11521 = getelementptr inbounds float* %tmp11520, i64 1
+  %tmp11522 = getelementptr inbounds float* %tmp11521, i64 1
+  %tmp11523 = getelementptr inbounds float* %tmp11522, i64 1
+  %tmp11524 = getelementptr inbounds float* %tmp11523, i64 1
+  %tmp11525 = getelementptr inbounds float* %tmp11524, i64 1
+  %tmp11526 = getelementptr inbounds float* %tmp11525, i64 1
+  %tmp11527 = getelementptr inbounds float* %tmp11526, i64 1
+  %tmp11528 = getelementptr inbounds float* %tmp11527, i64 1
+  %tmp11529 = getelementptr inbounds float* %tmp11528, i64 1
+  %tmp11530 = getelementptr inbounds float* %tmp11529, i64 1
+  %tmp11531 = getelementptr inbounds float* %tmp11530, i64 1
+  %tmp11532 = getelementptr inbounds float* %tmp11531, i64 1
+  %tmp11533 = getelementptr inbounds float* %tmp11532, i64 1
+  %tmp11534 = getelementptr inbounds float* %tmp11533, i64 1
+  %tmp11535 = getelementptr inbounds float* %tmp11534, i64 1
+  %tmp11536 = getelementptr inbounds float* %tmp11535, i64 1
+  %tmp11537 = getelementptr inbounds float* %tmp11536, i64 1
+  %tmp11538 = getelementptr inbounds float* %tmp11537, i64 1
+  %tmp11539 = getelementptr inbounds float* %tmp11538, i64 1
+  %tmp11540 = getelementptr inbounds float* %tmp11539, i64 1
+  %tmp11541 = getelementptr inbounds float* %tmp11540, i64 1
+  %tmp11542 = getelementptr inbounds float* %tmp11541, i64 1
+  %tmp11543 = getelementptr inbounds float* %tmp11542, i64 1
+  %tmp11544 = getelementptr inbounds float* %tmp11543, i64 1
+  %tmp11545 = getelementptr inbounds float* %tmp11544, i64 1
+  %tmp11546 = getelementptr inbounds float* %tmp11545, i64 1
+  %tmp11547 = getelementptr inbounds float* %tmp11546, i64 1
+  %tmp11548 = getelementptr inbounds float* %tmp11547, i64 1
+  %tmp11549 = getelementptr inbounds float* %tmp11548, i64 1
+  %tmp11550 = getelementptr inbounds float* %tmp11549, i64 1
+  %tmp11551 = getelementptr inbounds float* %tmp11550, i64 1
+  %tmp11552 = getelementptr inbounds float* %tmp11551, i64 1
+  %tmp11553 = getelementptr inbounds float* %tmp11552, i64 1
+  %tmp11554 = getelementptr inbounds float* %tmp11553, i64 1
+  %tmp11555 = getelementptr inbounds float* %tmp11554, i64 1
+  %tmp11556 = getelementptr inbounds float* %tmp11555, i64 1
+  %tmp11557 = getelementptr inbounds float* %tmp11556, i64 1
+  %tmp11558 = getelementptr inbounds float* %tmp11557, i64 1
+  %tmp11559 = getelementptr inbounds float* %tmp11558, i64 1
+  %tmp11560 = getelementptr inbounds float* %tmp11559, i64 1
+  %tmp11561 = getelementptr inbounds float* %tmp11560, i64 1
+  %tmp11562 = getelementptr inbounds float* %tmp11561, i64 1
+  %tmp11563 = getelementptr inbounds float* %tmp11562, i64 1
+  %tmp11564 = getelementptr inbounds float* %tmp11563, i64 1
+  %tmp11565 = getelementptr inbounds float* %tmp11564, i64 1
+  %tmp11566 = getelementptr inbounds float* %tmp11565, i64 1
+  %tmp11567 = getelementptr inbounds float* %tmp11566, i64 1
+  %tmp11568 = getelementptr inbounds float* %tmp11567, i64 1
+  %tmp11569 = getelementptr inbounds float* %tmp11568, i64 1
+  %tmp11570 = getelementptr inbounds float* %tmp11569, i64 1
+  %tmp11571 = getelementptr inbounds float* %tmp11570, i64 1
+  %tmp11572 = getelementptr inbounds float* %tmp11571, i64 1
+  %tmp11573 = getelementptr inbounds float* %tmp11572, i64 1
+  %tmp11574 = getelementptr inbounds float* %tmp11573, i64 1
+  %tmp11575 = getelementptr inbounds float* %tmp11574, i64 1
+  %tmp11576 = getelementptr inbounds float* %tmp11575, i64 1
+  %tmp11577 = getelementptr inbounds float* %tmp11576, i64 1
+  %tmp11578 = getelementptr inbounds float* %tmp11577, i64 1
+  %tmp11579 = getelementptr inbounds float* %tmp11578, i64 1
+  %tmp11580 = getelementptr inbounds float* %tmp11579, i64 1
+  %tmp11581 = getelementptr inbounds float* %tmp11580, i64 1
+  %tmp11582 = getelementptr inbounds float* %tmp11581, i64 1
+  %tmp11583 = getelementptr inbounds float* %tmp11582, i64 1
+  %tmp11584 = getelementptr inbounds float* %tmp11583, i64 1
+  %tmp11585 = getelementptr inbounds float* %tmp11584, i64 1
+  %tmp11586 = getelementptr inbounds float* %tmp11585, i64 1
+  %tmp11587 = getelementptr inbounds float* %tmp11586, i64 1
+  %tmp11588 = getelementptr inbounds float* %tmp11587, i64 1
+  %tmp11589 = getelementptr inbounds float* %tmp11588, i64 1
+  %tmp11590 = getelementptr inbounds float* %tmp11589, i64 1
+  %tmp11591 = getelementptr inbounds float* %tmp11590, i64 1
+  %tmp11592 = getelementptr inbounds float* %tmp11591, i64 1
+  %tmp11593 = getelementptr inbounds float* %tmp11592, i64 1
+  %tmp11594 = getelementptr inbounds float* %tmp11593, i64 1
+  %tmp11595 = getelementptr inbounds float* %tmp11594, i64 1
+  %tmp11596 = getelementptr inbounds float* %tmp11595, i64 1
+  %tmp11597 = getelementptr inbounds float* %tmp11596, i64 1
+  %tmp11598 = getelementptr inbounds float* %tmp11597, i64 1
+  %tmp11599 = getelementptr inbounds float* %tmp11598, i64 1
+  %tmp11600 = getelementptr inbounds float* %tmp11599, i64 1
+  %tmp11601 = getelementptr inbounds float* %tmp11600, i64 1
+  %tmp11602 = getelementptr inbounds float* %tmp11601, i64 1
+  %tmp11603 = getelementptr inbounds float* %tmp11602, i64 1
+  %tmp11604 = getelementptr inbounds float* %tmp11603, i64 1
+  %tmp11605 = getelementptr inbounds float* %tmp11604, i64 1
+  %tmp11606 = getelementptr inbounds float* %tmp11605, i64 1
+  %tmp11607 = getelementptr inbounds float* %tmp11606, i64 1
+  %tmp11608 = getelementptr inbounds float* %tmp11607, i64 1
+  %tmp11609 = getelementptr inbounds float* %tmp11608, i64 1
+  %tmp11610 = getelementptr inbounds float* %tmp11609, i64 1
+  %tmp11611 = getelementptr inbounds float* %tmp11610, i64 1
+  %tmp11612 = getelementptr inbounds float* %tmp11611, i64 1
+  %tmp11613 = getelementptr inbounds float* %tmp11612, i64 1
+  %tmp11614 = getelementptr inbounds float* %tmp11613, i64 1
+  %tmp11615 = getelementptr inbounds float* %tmp11614, i64 1
+  %tmp11616 = getelementptr inbounds float* %tmp11615, i64 1
+  %tmp11617 = getelementptr inbounds float* %tmp11616, i64 1
+  %tmp11618 = getelementptr inbounds float* %tmp11617, i64 1
+  %tmp11619 = getelementptr inbounds float* %tmp11618, i64 1
+  %tmp11620 = getelementptr inbounds float* %tmp11619, i64 1
+  %tmp11621 = getelementptr inbounds float* %tmp11620, i64 1
+  %tmp11622 = getelementptr inbounds float* %tmp11621, i64 1
+  %tmp11623 = getelementptr inbounds float* %tmp11622, i64 1
+  %tmp11624 = getelementptr inbounds float* %tmp11623, i64 1
+  %tmp11625 = getelementptr inbounds float* %tmp11624, i64 1
+  %tmp11626 = getelementptr inbounds float* %tmp11625, i64 1
+  %tmp11627 = getelementptr inbounds float* %tmp11626, i64 1
+  %tmp11628 = getelementptr inbounds float* %tmp11627, i64 1
+  %tmp11629 = getelementptr inbounds float* %tmp11628, i64 1
+  %tmp11630 = getelementptr inbounds float* %tmp11629, i64 1
+  %tmp11631 = getelementptr inbounds float* %tmp11630, i64 1
+  %tmp11632 = getelementptr inbounds float* %tmp11631, i64 1
+  %tmp11633 = getelementptr inbounds float* %tmp11632, i64 1
+  %tmp11634 = getelementptr inbounds float* %tmp11633, i64 1
+  %tmp11635 = getelementptr inbounds float* %tmp11634, i64 1
+  %tmp11636 = getelementptr inbounds float* %tmp11635, i64 1
+  %tmp11637 = getelementptr inbounds float* %tmp11636, i64 1
+  %tmp11638 = getelementptr inbounds float* %tmp11637, i64 1
+  %tmp11639 = getelementptr inbounds float* %tmp11638, i64 1
+  %tmp11640 = getelementptr inbounds float* %tmp11639, i64 1
+  %tmp11641 = getelementptr inbounds float* %tmp11640, i64 1
+  %tmp11642 = getelementptr inbounds float* %tmp11641, i64 1
+  %tmp11643 = getelementptr inbounds float* %tmp11642, i64 1
+  %tmp11644 = getelementptr inbounds float* %tmp11643, i64 1
+  %tmp11645 = getelementptr inbounds float* %tmp11644, i64 1
+  %tmp11646 = getelementptr inbounds float* %tmp11645, i64 1
+  %tmp11647 = getelementptr inbounds float* %tmp11646, i64 1
+  %tmp11648 = getelementptr inbounds float* %tmp11647, i64 1
+  %tmp11649 = getelementptr inbounds float* %tmp11648, i64 1
+  %tmp11650 = getelementptr inbounds float* %tmp11649, i64 1
+  %tmp11651 = getelementptr inbounds float* %tmp11650, i64 1
+  %tmp11652 = getelementptr inbounds float* %tmp11651, i64 1
+  %tmp11653 = getelementptr inbounds float* %tmp11652, i64 1
+  %tmp11654 = getelementptr inbounds float* %tmp11653, i64 1
+  %tmp11655 = getelementptr inbounds float* %tmp11654, i64 1
+  %tmp11656 = getelementptr inbounds float* %tmp11655, i64 1
+  %tmp11657 = getelementptr inbounds float* %tmp11656, i64 1
+  %tmp11658 = getelementptr inbounds float* %tmp11657, i64 1
+  %tmp11659 = getelementptr inbounds float* %tmp11658, i64 1
+  %tmp11660 = getelementptr inbounds float* %tmp11659, i64 1
+  %tmp11661 = getelementptr inbounds float* %tmp11660, i64 1
+  %tmp11662 = getelementptr inbounds float* %tmp11661, i64 1
+  %tmp11663 = getelementptr inbounds float* %tmp11662, i64 1
+  %tmp11664 = getelementptr inbounds float* %tmp11663, i64 1
+  %tmp11665 = getelementptr inbounds float* %tmp11664, i64 1
+  %tmp11666 = getelementptr inbounds float* %tmp11665, i64 1
+  %tmp11667 = getelementptr inbounds float* %tmp11666, i64 1
+  %tmp11668 = getelementptr inbounds float* %tmp11667, i64 1
+  %tmp11669 = getelementptr inbounds float* %tmp11668, i64 1
+  %tmp11670 = getelementptr inbounds float* %tmp11669, i64 1
+  %tmp11671 = getelementptr inbounds float* %tmp11670, i64 1
+  %tmp11672 = getelementptr inbounds float* %tmp11671, i64 1
+  %tmp11673 = getelementptr inbounds float* %tmp11672, i64 1
+  %tmp11674 = getelementptr inbounds float* %tmp11673, i64 1
+  %tmp11675 = getelementptr inbounds float* %tmp11674, i64 1
+  %tmp11676 = getelementptr inbounds float* %tmp11675, i64 1
+  %tmp11677 = getelementptr inbounds float* %tmp11676, i64 1
+  %tmp11678 = getelementptr inbounds float* %tmp11677, i64 1
+  %tmp11679 = getelementptr inbounds float* %tmp11678, i64 1
+  %tmp11680 = getelementptr inbounds float* %tmp11679, i64 1
+  %tmp11681 = getelementptr inbounds float* %tmp11680, i64 1
+  %tmp11682 = getelementptr inbounds float* %tmp11681, i64 1
+  %tmp11683 = getelementptr inbounds float* %tmp11682, i64 1
+  %tmp11684 = getelementptr inbounds float* %tmp11683, i64 1
+  %tmp11685 = getelementptr inbounds float* %tmp11684, i64 1
+  %tmp11686 = getelementptr inbounds float* %tmp11685, i64 1
+  %tmp11687 = getelementptr inbounds float* %tmp11686, i64 1
+  %tmp11688 = getelementptr inbounds float* %tmp11687, i64 1
+  %tmp11689 = getelementptr inbounds float* %tmp11688, i64 1
+  %tmp11690 = getelementptr inbounds float* %tmp11689, i64 1
+  %tmp11691 = getelementptr inbounds float* %tmp11690, i64 1
+  %tmp11692 = getelementptr inbounds float* %tmp11691, i64 1
+  %tmp11693 = getelementptr inbounds float* %tmp11692, i64 1
+  %tmp11694 = getelementptr inbounds float* %tmp11693, i64 1
+  %tmp11695 = getelementptr inbounds float* %tmp11694, i64 1
+  %tmp11696 = getelementptr inbounds float* %tmp11695, i64 1
+  %tmp11697 = getelementptr inbounds float* %tmp11696, i64 1
+  %tmp11698 = getelementptr inbounds float* %tmp11697, i64 1
+  %tmp11699 = getelementptr inbounds float* %tmp11698, i64 1
+  %tmp11700 = getelementptr inbounds float* %tmp11699, i64 1
+  %tmp11701 = getelementptr inbounds float* %tmp11700, i64 1
+  %tmp11702 = getelementptr inbounds float* %tmp11701, i64 1
+  %tmp11703 = getelementptr inbounds float* %tmp11702, i64 1
+  %tmp11704 = getelementptr inbounds float* %tmp11703, i64 1
+  %tmp11705 = getelementptr inbounds float* %tmp11704, i64 1
+  %tmp11706 = getelementptr inbounds float* %tmp11705, i64 1
+  %tmp11707 = getelementptr inbounds float* %tmp11706, i64 1
+  %tmp11708 = getelementptr inbounds float* %tmp11707, i64 1
+  %tmp11709 = getelementptr inbounds float* %tmp11708, i64 1
+  %tmp11710 = getelementptr inbounds float* %tmp11709, i64 1
+  %tmp11711 = getelementptr inbounds float* %tmp11710, i64 1
+  %tmp11712 = getelementptr inbounds float* %tmp11711, i64 1
+  %tmp11713 = getelementptr inbounds float* %tmp11712, i64 1
+  %tmp11714 = getelementptr inbounds float* %tmp11713, i64 1
+  %tmp11715 = getelementptr inbounds float* %tmp11714, i64 1
+  %tmp11716 = getelementptr inbounds float* %tmp11715, i64 1
+  %tmp11717 = getelementptr inbounds float* %tmp11716, i64 1
+  %tmp11718 = getelementptr inbounds float* %tmp11717, i64 1
+  %tmp11719 = getelementptr inbounds float* %tmp11718, i64 1
+  %tmp11720 = getelementptr inbounds float* %tmp11719, i64 1
+  %tmp11721 = getelementptr inbounds float* %tmp11720, i64 1
+  %tmp11722 = getelementptr inbounds float* %tmp11721, i64 1
+  %tmp11723 = getelementptr inbounds float* %tmp11722, i64 1
+  %tmp11724 = getelementptr inbounds float* %tmp11723, i64 1
+  %tmp11725 = getelementptr inbounds float* %tmp11724, i64 1
+  %tmp11726 = getelementptr inbounds float* %tmp11725, i64 1
+  %tmp11727 = getelementptr inbounds float* %tmp11726, i64 1
+  %tmp11728 = getelementptr inbounds float* %tmp11727, i64 1
+  %tmp11729 = getelementptr inbounds float* %tmp11728, i64 1
+  %tmp11730 = getelementptr inbounds float* %tmp11729, i64 1
+  %tmp11731 = getelementptr inbounds float* %tmp11730, i64 1
+  %tmp11732 = getelementptr inbounds float* %tmp11731, i64 1
+  %tmp11733 = getelementptr inbounds float* %tmp11732, i64 1
+  %tmp11734 = getelementptr inbounds float* %tmp11733, i64 1
+  %tmp11735 = getelementptr inbounds float* %tmp11734, i64 1
+  %tmp11736 = getelementptr inbounds float* %tmp11735, i64 1
+  %tmp11737 = getelementptr inbounds float* %tmp11736, i64 1
+  %tmp11738 = getelementptr inbounds float* %tmp11737, i64 1
+  %tmp11739 = getelementptr inbounds float* %tmp11738, i64 1
+  %tmp11740 = getelementptr inbounds float* %tmp11739, i64 1
+  %tmp11741 = getelementptr inbounds float* %tmp11740, i64 1
+  %tmp11742 = getelementptr inbounds float* %tmp11741, i64 1
+  %tmp11743 = getelementptr inbounds float* %tmp11742, i64 1
+  %tmp11744 = getelementptr inbounds float* %tmp11743, i64 1
+  %tmp11745 = getelementptr inbounds float* %tmp11744, i64 1
+  %tmp11746 = getelementptr inbounds float* %tmp11745, i64 1
+  %tmp11747 = getelementptr inbounds float* %tmp11746, i64 1
+  %tmp11748 = getelementptr inbounds float* %tmp11747, i64 1
+  %tmp11749 = getelementptr inbounds float* %tmp11748, i64 1
+  %tmp11750 = getelementptr inbounds float* %tmp11749, i64 1
+  %tmp11751 = getelementptr inbounds float* %tmp11750, i64 1
+  %tmp11752 = getelementptr inbounds float* %tmp11751, i64 1
+  %tmp11753 = getelementptr inbounds float* %tmp11752, i64 1
+  %tmp11754 = getelementptr inbounds float* %tmp11753, i64 1
+  %tmp11755 = getelementptr inbounds float* %tmp11754, i64 1
+  %tmp11756 = getelementptr inbounds float* %tmp11755, i64 1
+  %tmp11757 = getelementptr inbounds float* %tmp11756, i64 1
+  %tmp11758 = getelementptr inbounds float* %tmp11757, i64 1
+  %tmp11759 = getelementptr inbounds float* %tmp11758, i64 1
+  %tmp11760 = getelementptr inbounds float* %tmp11759, i64 1
+  %tmp11761 = getelementptr inbounds float* %tmp11760, i64 1
+  %tmp11762 = getelementptr inbounds float* %tmp11761, i64 1
+  %tmp11763 = getelementptr inbounds float* %tmp11762, i64 1
+  %tmp11764 = getelementptr inbounds float* %tmp11763, i64 1
+  %tmp11765 = getelementptr inbounds float* %tmp11764, i64 1
+  %tmp11766 = getelementptr inbounds float* %tmp11765, i64 1
+  %tmp11767 = getelementptr inbounds float* %tmp11766, i64 1
+  %tmp11768 = getelementptr inbounds float* %tmp11767, i64 1
+  %tmp11769 = getelementptr inbounds float* %tmp11768, i64 1
+  %tmp11770 = getelementptr inbounds float* %tmp11769, i64 1
+  %tmp11771 = getelementptr inbounds float* %tmp11770, i64 1
+  %tmp11772 = getelementptr inbounds float* %tmp11771, i64 1
+  %tmp11773 = getelementptr inbounds float* %tmp11772, i64 1
+  %tmp11774 = getelementptr inbounds float* %tmp11773, i64 1
+  %tmp11775 = getelementptr inbounds float* %tmp11774, i64 1
+  %tmp11776 = getelementptr inbounds float* %tmp11775, i64 1
+  %tmp11777 = getelementptr inbounds float* %tmp11776, i64 1
+  %tmp11778 = getelementptr inbounds float* %tmp11777, i64 1
+  %tmp11779 = getelementptr inbounds float* %tmp11778, i64 1
+  %tmp11780 = getelementptr inbounds float* %tmp11779, i64 1
+  %tmp11781 = getelementptr inbounds float* %tmp11780, i64 1
+  %tmp11782 = getelementptr inbounds float* %tmp11781, i64 1
+  %tmp11783 = getelementptr inbounds float* %tmp11782, i64 1
+  %tmp11784 = getelementptr inbounds float* %tmp11783, i64 1
+  %tmp11785 = getelementptr inbounds float* %tmp11784, i64 1
+  %tmp11786 = getelementptr inbounds float* %tmp11785, i64 1
+  %tmp11787 = getelementptr inbounds float* %tmp11786, i64 1
+  %tmp11788 = getelementptr inbounds float* %tmp11787, i64 1
+  %tmp11789 = getelementptr inbounds float* %tmp11788, i64 1
+  %tmp11790 = getelementptr inbounds float* %tmp11789, i64 1
+  %tmp11791 = getelementptr inbounds float* %tmp11790, i64 1
+  %tmp11792 = getelementptr inbounds float* %tmp11791, i64 1
+  %tmp11793 = getelementptr inbounds float* %tmp11792, i64 1
+  %tmp11794 = getelementptr inbounds float* %tmp11793, i64 1
+  %tmp11795 = getelementptr inbounds float* %tmp11794, i64 1
+  %tmp11796 = getelementptr inbounds float* %tmp11795, i64 1
+  %tmp11797 = getelementptr inbounds float* %tmp11796, i64 1
+  %tmp11798 = getelementptr inbounds float* %tmp11797, i64 1
+  %tmp11799 = getelementptr inbounds float* %tmp11798, i64 1
+  %tmp11800 = getelementptr inbounds float* %tmp11799, i64 1
+  %tmp11801 = getelementptr inbounds float* %tmp11800, i64 1
+  %tmp11802 = getelementptr inbounds float* %tmp11801, i64 1
+  %tmp11803 = getelementptr inbounds float* %tmp11802, i64 1
+  %tmp11804 = getelementptr inbounds float* %tmp11803, i64 1
+  %tmp11805 = getelementptr inbounds float* %tmp11804, i64 1
+  %tmp11806 = getelementptr inbounds float* %tmp11805, i64 1
+  %tmp11807 = getelementptr inbounds float* %tmp11806, i64 1
+  %tmp11808 = getelementptr inbounds float* %tmp11807, i64 1
+  %tmp11809 = getelementptr inbounds float* %tmp11808, i64 1
+  %tmp11810 = getelementptr inbounds float* %tmp11809, i64 1
+  %tmp11811 = getelementptr inbounds float* %tmp11810, i64 1
+  %tmp11812 = getelementptr inbounds float* %tmp11811, i64 1
+  %tmp11813 = getelementptr inbounds float* %tmp11812, i64 1
+  %tmp11814 = getelementptr inbounds float* %tmp11813, i64 1
+  %tmp11815 = getelementptr inbounds float* %tmp11814, i64 1
+  %tmp11816 = getelementptr inbounds float* %tmp11815, i64 1
+  %tmp11817 = getelementptr inbounds float* %tmp11816, i64 1
+  %tmp11818 = getelementptr inbounds float* %tmp11817, i64 1
+  %tmp11819 = getelementptr inbounds float* %tmp11818, i64 1
+  %tmp11820 = getelementptr inbounds float* %tmp11819, i64 1
+  %tmp11821 = getelementptr inbounds float* %tmp11820, i64 1
+  %tmp11822 = getelementptr inbounds float* %tmp11821, i64 1
+  %tmp11823 = getelementptr inbounds float* %tmp11822, i64 1
+  %tmp11824 = getelementptr inbounds float* %tmp11823, i64 1
+  %tmp11825 = getelementptr inbounds float* %tmp11824, i64 1
+  %tmp11826 = getelementptr inbounds float* %tmp11825, i64 1
+  %tmp11827 = getelementptr inbounds float* %tmp11826, i64 1
+  %tmp11828 = getelementptr inbounds float* %tmp11827, i64 1
+  %tmp11829 = getelementptr inbounds float* %tmp11828, i64 1
+  %tmp11830 = getelementptr inbounds float* %tmp11829, i64 1
+  %tmp11831 = getelementptr inbounds float* %tmp11830, i64 1
+  %tmp11832 = getelementptr inbounds float* %tmp11831, i64 1
+  %tmp11833 = getelementptr inbounds float* %tmp11832, i64 1
+  %tmp11834 = getelementptr inbounds float* %tmp11833, i64 1
+  %tmp11835 = getelementptr inbounds float* %tmp11834, i64 1
+  %tmp11836 = getelementptr inbounds float* %tmp11835, i64 1
+  %tmp11837 = getelementptr inbounds float* %tmp11836, i64 1
+  %tmp11838 = getelementptr inbounds float* %tmp11837, i64 1
+  %tmp11839 = getelementptr inbounds float* %tmp11838, i64 1
+  %tmp11840 = getelementptr inbounds float* %tmp11839, i64 1
+  %tmp11841 = getelementptr inbounds float* %tmp11840, i64 1
+  %tmp11842 = getelementptr inbounds float* %tmp11841, i64 1
+  %tmp11843 = getelementptr inbounds float* %tmp11842, i64 1
+  %tmp11844 = getelementptr inbounds float* %tmp11843, i64 1
+  %tmp11845 = getelementptr inbounds float* %tmp11844, i64 1
+  %tmp11846 = getelementptr inbounds float* %tmp11845, i64 1
+  %tmp11847 = getelementptr inbounds float* %tmp11846, i64 1
+  %tmp11848 = getelementptr inbounds float* %tmp11847, i64 1
+  %tmp11849 = getelementptr inbounds float* %tmp11848, i64 1
+  %tmp11850 = getelementptr inbounds float* %tmp11849, i64 1
+  %tmp11851 = getelementptr inbounds float* %tmp11850, i64 1
+  %tmp11852 = getelementptr inbounds float* %tmp11851, i64 1
+  %tmp11853 = getelementptr inbounds float* %tmp11852, i64 1
+  %tmp11854 = getelementptr inbounds float* %tmp11853, i64 1
+  %tmp11855 = getelementptr inbounds float* %tmp11854, i64 1
+  %tmp11856 = getelementptr inbounds float* %tmp11855, i64 1
+  %tmp11857 = getelementptr inbounds float* %tmp11856, i64 1
+  %tmp11858 = getelementptr inbounds float* %tmp11857, i64 1
+  %tmp11859 = getelementptr inbounds float* %tmp11858, i64 1
+  %tmp11860 = getelementptr inbounds float* %tmp11859, i64 1
+  %tmp11861 = getelementptr inbounds float* %tmp11860, i64 1
+  %tmp11862 = getelementptr inbounds float* %tmp11861, i64 1
+  %tmp11863 = getelementptr inbounds float* %tmp11862, i64 1
+  %tmp11864 = getelementptr inbounds float* %tmp11863, i64 1
+  %tmp11865 = getelementptr inbounds float* %tmp11864, i64 1
+  %tmp11866 = getelementptr inbounds float* %tmp11865, i64 1
+  %tmp11867 = getelementptr inbounds float* %tmp11866, i64 1
+  %tmp11868 = getelementptr inbounds float* %tmp11867, i64 1
+  %tmp11869 = getelementptr inbounds float* %tmp11868, i64 1
+  %tmp11870 = getelementptr inbounds float* %tmp11869, i64 1
+  %tmp11871 = getelementptr inbounds float* %tmp11870, i64 1
+  %tmp11872 = getelementptr inbounds float* %tmp11871, i64 1
+  %tmp11873 = getelementptr inbounds float* %tmp11872, i64 1
+  %tmp11874 = getelementptr inbounds float* %tmp11873, i64 1
+  %tmp11875 = getelementptr inbounds float* %tmp11874, i64 1
+  %tmp11876 = getelementptr inbounds float* %tmp11875, i64 1
+  %tmp11877 = getelementptr inbounds float* %tmp11876, i64 1
+  %tmp11878 = getelementptr inbounds float* %tmp11877, i64 1
+  %tmp11879 = getelementptr inbounds float* %tmp11878, i64 1
+  %tmp11880 = getelementptr inbounds float* %tmp11879, i64 1
+  %tmp11881 = getelementptr inbounds float* %tmp11880, i64 1
+  %tmp11882 = getelementptr inbounds float* %tmp11881, i64 1
+  %tmp11883 = getelementptr inbounds float* %tmp11882, i64 1
+  %tmp11884 = getelementptr inbounds float* %tmp11883, i64 1
+  %tmp11885 = getelementptr inbounds float* %tmp11884, i64 1
+  %tmp11886 = getelementptr inbounds float* %tmp11885, i64 1
+  %tmp11887 = getelementptr inbounds float* %tmp11886, i64 1
+  %tmp11888 = getelementptr inbounds float* %tmp11887, i64 1
+  %tmp11889 = getelementptr inbounds float* %tmp11888, i64 1
+  %tmp11890 = getelementptr inbounds float* %tmp11889, i64 1
+  %tmp11891 = getelementptr inbounds float* %tmp11890, i64 1
+  %tmp11892 = getelementptr inbounds float* %tmp11891, i64 1
+  %tmp11893 = getelementptr inbounds float* %tmp11892, i64 1
+  %tmp11894 = getelementptr inbounds float* %tmp11893, i64 1
+  %tmp11895 = getelementptr inbounds float* %tmp11894, i64 1
+  %tmp11896 = getelementptr inbounds float* %tmp11895, i64 1
+  %tmp11897 = getelementptr inbounds float* %tmp11896, i64 1
+  %tmp11898 = getelementptr inbounds float* %tmp11897, i64 1
+  %tmp11899 = getelementptr inbounds float* %tmp11898, i64 1
+  %tmp11900 = getelementptr inbounds float* %tmp11899, i64 1
+  %tmp11901 = getelementptr inbounds float* %tmp11900, i64 1
+  %tmp11902 = getelementptr inbounds float* %tmp11901, i64 1
+  %tmp11903 = getelementptr inbounds float* %tmp11902, i64 1
+  %tmp11904 = getelementptr inbounds float* %tmp11903, i64 1
+  %tmp11905 = getelementptr inbounds float* %tmp11904, i64 1
+  %tmp11906 = getelementptr inbounds float* %tmp11905, i64 1
+  %tmp11907 = getelementptr inbounds float* %tmp11906, i64 1
+  %tmp11908 = getelementptr inbounds float* %tmp11907, i64 1
+  %tmp11909 = getelementptr inbounds float* %tmp11908, i64 1
+  %tmp11910 = getelementptr inbounds float* %tmp11909, i64 1
+  %tmp11911 = getelementptr inbounds float* %tmp11910, i64 1
+  %tmp11912 = getelementptr inbounds float* %tmp11911, i64 1
+  %tmp11913 = getelementptr inbounds float* %tmp11912, i64 1
+  %tmp11914 = getelementptr inbounds float* %tmp11913, i64 1
+  %tmp11915 = getelementptr inbounds float* %tmp11914, i64 1
+  %tmp11916 = getelementptr inbounds float* %tmp11915, i64 1
+  %tmp11917 = getelementptr inbounds float* %tmp11916, i64 1
+  %tmp11918 = getelementptr inbounds float* %tmp11917, i64 1
+  %tmp11919 = getelementptr inbounds float* %tmp11918, i64 1
+  %tmp11920 = getelementptr inbounds float* %tmp11919, i64 1
+  %tmp11921 = getelementptr inbounds float* %tmp11920, i64 1
+  %tmp11922 = getelementptr inbounds float* %tmp11921, i64 1
+  %tmp11923 = getelementptr inbounds float* %tmp11922, i64 1
+  %tmp11924 = getelementptr inbounds float* %tmp11923, i64 1
+  %tmp11925 = getelementptr inbounds float* %tmp11924, i64 1
+  %tmp11926 = getelementptr inbounds float* %tmp11925, i64 1
+  %tmp11927 = getelementptr inbounds float* %tmp11926, i64 1
+  %tmp11928 = getelementptr inbounds float* %tmp11927, i64 1
+  %tmp11929 = getelementptr inbounds float* %tmp11928, i64 1
+  %tmp11930 = getelementptr inbounds float* %tmp11929, i64 1
+  %tmp11931 = getelementptr inbounds float* %tmp11930, i64 1
+  %tmp11932 = getelementptr inbounds float* %tmp11931, i64 1
+  %tmp11933 = getelementptr inbounds float* %tmp11932, i64 1
+  %tmp11934 = getelementptr inbounds float* %tmp11933, i64 1
+  %tmp11935 = getelementptr inbounds float* %tmp11934, i64 1
+  %tmp11936 = getelementptr inbounds float* %tmp11935, i64 1
+  %tmp11937 = getelementptr inbounds float* %tmp11936, i64 1
+  %tmp11938 = getelementptr inbounds float* %tmp11937, i64 1
+  %tmp11939 = getelementptr inbounds float* %tmp11938, i64 1
+  %tmp11940 = getelementptr inbounds float* %tmp11939, i64 1
+  %tmp11941 = getelementptr inbounds float* %tmp11940, i64 1
+  %tmp11942 = getelementptr inbounds float* %tmp11941, i64 1
+  %tmp11943 = getelementptr inbounds float* %tmp11942, i64 1
+  %tmp11944 = getelementptr inbounds float* %tmp11943, i64 1
+  %tmp11945 = getelementptr inbounds float* %tmp11944, i64 1
+  %tmp11946 = getelementptr inbounds float* %tmp11945, i64 1
+  %tmp11947 = getelementptr inbounds float* %tmp11946, i64 1
+  %tmp11948 = getelementptr inbounds float* %tmp11947, i64 1
+  %tmp11949 = getelementptr inbounds float* %tmp11948, i64 1
+  %tmp11950 = getelementptr inbounds float* %tmp11949, i64 1
+  %tmp11951 = getelementptr inbounds float* %tmp11950, i64 1
+  %tmp11952 = getelementptr inbounds float* %tmp11951, i64 1
+  %tmp11953 = getelementptr inbounds float* %tmp11952, i64 1
+  %tmp11954 = getelementptr inbounds float* %tmp11953, i64 1
+  %tmp11955 = getelementptr inbounds float* %tmp11954, i64 1
+  %tmp11956 = getelementptr inbounds float* %tmp11955, i64 1
+  %tmp11957 = getelementptr inbounds float* %tmp11956, i64 1
+  %tmp11958 = getelementptr inbounds float* %tmp11957, i64 1
+  %tmp11959 = getelementptr inbounds float* %tmp11958, i64 1
+  %tmp11960 = getelementptr inbounds float* %tmp11959, i64 1
+  %tmp11961 = getelementptr inbounds float* %tmp11960, i64 1
+  %tmp11962 = getelementptr inbounds float* %tmp11961, i64 1
+  %tmp11963 = getelementptr inbounds float* %tmp11962, i64 1
+  %tmp11964 = getelementptr inbounds float* %tmp11963, i64 1
+  %tmp11965 = getelementptr inbounds float* %tmp11964, i64 1
+  %tmp11966 = getelementptr inbounds float* %tmp11965, i64 1
+  %tmp11967 = getelementptr inbounds float* %tmp11966, i64 1
+  %tmp11968 = getelementptr inbounds float* %tmp11967, i64 1
+  %tmp11969 = getelementptr inbounds float* %tmp11968, i64 1
+  %tmp11970 = getelementptr inbounds float* %tmp11969, i64 1
+  %tmp11971 = getelementptr inbounds float* %tmp11970, i64 1
+  %tmp11972 = getelementptr inbounds float* %tmp11971, i64 1
+  %tmp11973 = getelementptr inbounds float* %tmp11972, i64 1
+  %tmp11974 = getelementptr inbounds float* %tmp11973, i64 1
+  %tmp11975 = getelementptr inbounds float* %tmp11974, i64 1
+  %tmp11976 = getelementptr inbounds float* %tmp11975, i64 1
+  %tmp11977 = getelementptr inbounds float* %tmp11976, i64 1
+  %tmp11978 = getelementptr inbounds float* %tmp11977, i64 1
+  %tmp11979 = getelementptr inbounds float* %tmp11978, i64 1
+  %tmp11980 = getelementptr inbounds float* %tmp11979, i64 1
+  %tmp11981 = getelementptr inbounds float* %tmp11980, i64 1
+  %tmp11982 = getelementptr inbounds float* %tmp11981, i64 1
+  %tmp11983 = getelementptr inbounds float* %tmp11982, i64 1
+  %tmp11984 = getelementptr inbounds float* %tmp11983, i64 1
+  %tmp11985 = getelementptr inbounds float* %tmp11984, i64 1
+  %tmp11986 = getelementptr inbounds float* %tmp11985, i64 1
+  %tmp11987 = getelementptr inbounds float* %tmp11986, i64 1
+  %tmp11988 = getelementptr inbounds float* %tmp11987, i64 1
+  %tmp11989 = getelementptr inbounds float* %tmp11988, i64 1
+  %tmp11990 = getelementptr inbounds float* %tmp11989, i64 1
+  %tmp11991 = getelementptr inbounds float* %tmp11990, i64 1
+  %tmp11992 = getelementptr inbounds float* %tmp11991, i64 1
+  %tmp11993 = getelementptr inbounds float* %tmp11992, i64 1
+  %tmp11994 = getelementptr inbounds float* %tmp11993, i64 1
+  %tmp11995 = getelementptr inbounds float* %tmp11994, i64 1
+  %tmp11996 = getelementptr inbounds float* %tmp11995, i64 1
+  %tmp11997 = getelementptr inbounds float* %tmp11996, i64 1
+  %tmp11998 = getelementptr inbounds float* %tmp11997, i64 1
+  %tmp11999 = getelementptr inbounds float* %tmp11998, i64 1
+  %tmp12000 = getelementptr inbounds float* %tmp11999, i64 1
+  %tmp12001 = getelementptr inbounds float* %tmp12000, i64 1
+  %tmp12002 = getelementptr inbounds float* %tmp12001, i64 1
+  %tmp12003 = getelementptr inbounds float* %tmp12002, i64 1
+  %tmp12004 = getelementptr inbounds float* %tmp12003, i64 1
+  %tmp12005 = getelementptr inbounds float* %tmp12004, i64 1
+  %tmp12006 = getelementptr inbounds float* %tmp12005, i64 1
+  %tmp12007 = getelementptr inbounds float* %tmp12006, i64 1
+  %tmp12008 = getelementptr inbounds float* %tmp12007, i64 1
+  %tmp12009 = getelementptr inbounds float* %tmp12008, i64 1
+  %tmp12010 = getelementptr inbounds float* %tmp12009, i64 1
+  %tmp12011 = getelementptr inbounds float* %tmp12010, i64 1
+  %tmp12012 = getelementptr inbounds float* %tmp12011, i64 1
+  %tmp12013 = getelementptr inbounds float* %tmp12012, i64 1
+  %tmp12014 = getelementptr inbounds float* %tmp12013, i64 1
+  %tmp12015 = getelementptr inbounds float* %tmp12014, i64 1
+  %tmp12016 = getelementptr inbounds float* %tmp12015, i64 1
+  %tmp12017 = getelementptr inbounds float* %tmp12016, i64 1
+  %tmp12018 = getelementptr inbounds float* %tmp12017, i64 1
+  %tmp12019 = getelementptr inbounds float* %tmp12018, i64 1
+  %tmp12020 = getelementptr inbounds float* %tmp12019, i64 1
+  %tmp12021 = getelementptr inbounds float* %tmp12020, i64 1
+  %tmp12022 = getelementptr inbounds float* %tmp12021, i64 1
+  %tmp12023 = getelementptr inbounds float* %tmp12022, i64 1
+  %tmp12024 = getelementptr inbounds float* %tmp12023, i64 1
+  %tmp12025 = getelementptr inbounds float* %tmp12024, i64 1
+  %tmp12026 = getelementptr inbounds float* %tmp12025, i64 1
+  %tmp12027 = getelementptr inbounds float* %tmp12026, i64 1
+  %tmp12028 = getelementptr inbounds float* %tmp12027, i64 1
+  %tmp12029 = getelementptr inbounds float* %tmp12028, i64 1
+  %tmp12030 = getelementptr inbounds float* %tmp12029, i64 1
+  %tmp12031 = getelementptr inbounds float* %tmp12030, i64 1
+  %tmp12032 = getelementptr inbounds float* %tmp12031, i64 1
+  %tmp12033 = getelementptr inbounds float* %tmp12032, i64 1
+  %tmp12034 = getelementptr inbounds float* %tmp12033, i64 1
+  %tmp12035 = getelementptr inbounds float* %tmp12034, i64 1
+  %tmp12036 = getelementptr inbounds float* %tmp12035, i64 1
+  %tmp12037 = getelementptr inbounds float* %tmp12036, i64 1
+  %tmp12038 = getelementptr inbounds float* %tmp12037, i64 1
+  %tmp12039 = getelementptr inbounds float* %tmp12038, i64 1
+  %tmp12040 = getelementptr inbounds float* %tmp12039, i64 1
+  %tmp12041 = getelementptr inbounds float* %tmp12040, i64 1
+  %tmp12042 = getelementptr inbounds float* %tmp12041, i64 1
+  %tmp12043 = getelementptr inbounds float* %tmp12042, i64 1
+  %tmp12044 = getelementptr inbounds float* %tmp12043, i64 1
+  %tmp12045 = getelementptr inbounds float* %tmp12044, i64 1
+  %tmp12046 = getelementptr inbounds float* %tmp12045, i64 1
+  %tmp12047 = getelementptr inbounds float* %tmp12046, i64 1
+  %tmp12048 = getelementptr inbounds float* %tmp12047, i64 1
+  %tmp12049 = getelementptr inbounds float* %tmp12048, i64 1
+  %tmp12050 = getelementptr inbounds float* %tmp12049, i64 1
+  %tmp12051 = getelementptr inbounds float* %tmp12050, i64 1
+  %tmp12052 = getelementptr inbounds float* %tmp12051, i64 1
+  %tmp12053 = getelementptr inbounds float* %tmp12052, i64 1
+  %tmp12054 = getelementptr inbounds float* %tmp12053, i64 1
+  %tmp12055 = getelementptr inbounds float* %tmp12054, i64 1
+  %tmp12056 = getelementptr inbounds float* %tmp12055, i64 1
+  %tmp12057 = getelementptr inbounds float* %tmp12056, i64 1
+  %tmp12058 = getelementptr inbounds float* %tmp12057, i64 1
+  %tmp12059 = getelementptr inbounds float* %tmp12058, i64 1
+  %tmp12060 = getelementptr inbounds float* %tmp12059, i64 1
+  %tmp12061 = getelementptr inbounds float* %tmp12060, i64 1
+  %tmp12062 = getelementptr inbounds float* %tmp12061, i64 1
+  %tmp12063 = getelementptr inbounds float* %tmp12062, i64 1
+  %tmp12064 = getelementptr inbounds float* %tmp12063, i64 1
+  %tmp12065 = getelementptr inbounds float* %tmp12064, i64 1
+  %tmp12066 = getelementptr inbounds float* %tmp12065, i64 1
+  %tmp12067 = getelementptr inbounds float* %tmp12066, i64 1
+  %tmp12068 = getelementptr inbounds float* %tmp12067, i64 1
+  %tmp12069 = getelementptr inbounds float* %tmp12068, i64 1
+  %tmp12070 = getelementptr inbounds float* %tmp12069, i64 1
+  %tmp12071 = getelementptr inbounds float* %tmp12070, i64 1
+  %tmp12072 = getelementptr inbounds float* %tmp12071, i64 1
+  %tmp12073 = getelementptr inbounds float* %tmp12072, i64 1
+  %tmp12074 = getelementptr inbounds float* %tmp12073, i64 1
+  %tmp12075 = getelementptr inbounds float* %tmp12074, i64 1
+  %tmp12076 = getelementptr inbounds float* %tmp12075, i64 1
+  %tmp12077 = getelementptr inbounds float* %tmp12076, i64 1
+  %tmp12078 = getelementptr inbounds float* %tmp12077, i64 1
+  %tmp12079 = getelementptr inbounds float* %tmp12078, i64 1
+  %tmp12080 = getelementptr inbounds float* %tmp12079, i64 1
+  %tmp12081 = getelementptr inbounds float* %tmp12080, i64 1
+  %tmp12082 = getelementptr inbounds float* %tmp12081, i64 1
+  %tmp12083 = getelementptr inbounds float* %tmp12082, i64 1
+  %tmp12084 = getelementptr inbounds float* %tmp12083, i64 1
+  %tmp12085 = getelementptr inbounds float* %tmp12084, i64 1
+  %tmp12086 = getelementptr inbounds float* %tmp12085, i64 1
+  %tmp12087 = getelementptr inbounds float* %tmp12086, i64 1
+  %tmp12088 = getelementptr inbounds float* %tmp12087, i64 1
+  %tmp12089 = getelementptr inbounds float* %tmp12088, i64 1
+  %tmp12090 = getelementptr inbounds float* %tmp12089, i64 1
+  %tmp12091 = getelementptr inbounds float* %tmp12090, i64 1
+  %tmp12092 = getelementptr inbounds float* %tmp12091, i64 1
+  %tmp12093 = getelementptr inbounds float* %tmp12092, i64 1
+  %tmp12094 = getelementptr inbounds float* %tmp12093, i64 1
+  %tmp12095 = getelementptr inbounds float* %tmp12094, i64 1
+  %tmp12096 = getelementptr inbounds float* %tmp12095, i64 1
+  %tmp12097 = getelementptr inbounds float* %tmp12096, i64 1
+  %tmp12098 = getelementptr inbounds float* %tmp12097, i64 1
+  %tmp12099 = getelementptr inbounds float* %tmp12098, i64 1
+  %tmp12100 = getelementptr inbounds float* %tmp12099, i64 1
+  %tmp12101 = getelementptr inbounds float* %tmp12100, i64 1
+  %tmp12102 = getelementptr inbounds float* %tmp12101, i64 1
+  %tmp12103 = getelementptr inbounds float* %tmp12102, i64 1
+  %tmp12104 = getelementptr inbounds float* %tmp12103, i64 1
+  %tmp12105 = getelementptr inbounds float* %tmp12104, i64 1
+  %tmp12106 = getelementptr inbounds float* %tmp12105, i64 1
+  %tmp12107 = getelementptr inbounds float* %tmp12106, i64 1
+  %tmp12108 = getelementptr inbounds float* %tmp12107, i64 1
+  %tmp12109 = getelementptr inbounds float* %tmp12108, i64 1
+  %tmp12110 = getelementptr inbounds float* %tmp12109, i64 1
+  %tmp12111 = getelementptr inbounds float* %tmp12110, i64 1
+  %tmp12112 = getelementptr inbounds float* %tmp12111, i64 1
+  %tmp12113 = getelementptr inbounds float* %tmp12112, i64 1
+  %tmp12114 = getelementptr inbounds float* %tmp12113, i64 1
+  %tmp12115 = getelementptr inbounds float* %tmp12114, i64 1
+  %tmp12116 = getelementptr inbounds float* %tmp12115, i64 1
+  %tmp12117 = getelementptr inbounds float* %tmp12116, i64 1
+  %tmp12118 = getelementptr inbounds float* %tmp12117, i64 1
+  %tmp12119 = getelementptr inbounds float* %tmp12118, i64 1
+  %tmp12120 = getelementptr inbounds float* %tmp12119, i64 1
+  %tmp12121 = getelementptr inbounds float* %tmp12120, i64 1
+  %tmp12122 = getelementptr inbounds float* %tmp12121, i64 1
+  %tmp12123 = getelementptr inbounds float* %tmp12122, i64 1
+  %tmp12124 = getelementptr inbounds float* %tmp12123, i64 1
+  %tmp12125 = getelementptr inbounds float* %tmp12124, i64 1
+  %tmp12126 = getelementptr inbounds float* %tmp12125, i64 1
+  %tmp12127 = getelementptr inbounds float* %tmp12126, i64 1
+  %tmp12128 = getelementptr inbounds float* %tmp12127, i64 1
+  %tmp12129 = getelementptr inbounds float* %tmp12128, i64 1
+  %tmp12130 = getelementptr inbounds float* %tmp12129, i64 1
+  %tmp12131 = getelementptr inbounds float* %tmp12130, i64 1
+  %tmp12132 = getelementptr inbounds float* %tmp12131, i64 1
+  %tmp12133 = getelementptr inbounds float* %tmp12132, i64 1
+  %tmp12134 = getelementptr inbounds float* %tmp12133, i64 1
+  %tmp12135 = getelementptr inbounds float* %tmp12134, i64 1
+  %tmp12136 = getelementptr inbounds float* %tmp12135, i64 1
+  %tmp12137 = getelementptr inbounds float* %tmp12136, i64 1
+  %tmp12138 = getelementptr inbounds float* %tmp12137, i64 1
+  %tmp12139 = getelementptr inbounds float* %tmp12138, i64 1
+  %tmp12140 = getelementptr inbounds float* %tmp12139, i64 1
+  %tmp12141 = getelementptr inbounds float* %tmp12140, i64 1
+  %tmp12142 = getelementptr inbounds float* %tmp12141, i64 1
+  %tmp12143 = getelementptr inbounds float* %tmp12142, i64 1
+  %tmp12144 = getelementptr inbounds float* %tmp12143, i64 1
+  %tmp12145 = getelementptr inbounds float* %tmp12144, i64 1
+  %tmp12146 = getelementptr inbounds float* %tmp12145, i64 1
+  %tmp12147 = getelementptr inbounds float* %tmp12146, i64 1
+  %tmp12148 = getelementptr inbounds float* %tmp12147, i64 1
+  %tmp12149 = getelementptr inbounds float* %tmp12148, i64 1
+  %tmp12150 = getelementptr inbounds float* %tmp12149, i64 1
+  %tmp12151 = getelementptr inbounds float* %tmp12150, i64 1
+  %tmp12152 = getelementptr inbounds float* %tmp12151, i64 1
+  %tmp12153 = getelementptr inbounds float* %tmp12152, i64 1
+  %tmp12154 = getelementptr inbounds float* %tmp12153, i64 1
+  %tmp12155 = getelementptr inbounds float* %tmp12154, i64 1
+  %tmp12156 = getelementptr inbounds float* %tmp12155, i64 1
+  %tmp12157 = getelementptr inbounds float* %tmp12156, i64 1
+  %tmp12158 = getelementptr inbounds float* %tmp12157, i64 1
+  %tmp12159 = getelementptr inbounds float* %tmp12158, i64 1
+  %tmp12160 = getelementptr inbounds float* %tmp12159, i64 1
+  %tmp12161 = getelementptr inbounds float* %tmp12160, i64 1
+  %tmp12162 = getelementptr inbounds float* %tmp12161, i64 1
+  %tmp12163 = getelementptr inbounds float* %tmp12162, i64 1
+  %tmp12164 = getelementptr inbounds float* %tmp12163, i64 1
+  %tmp12165 = getelementptr inbounds float* %tmp12164, i64 1
+  %tmp12166 = getelementptr inbounds float* %tmp12165, i64 1
+  %tmp12167 = getelementptr inbounds float* %tmp12166, i64 1
+  %tmp12168 = getelementptr inbounds float* %tmp12167, i64 1
+  %tmp12169 = getelementptr inbounds float* %tmp12168, i64 1
+  %tmp12170 = getelementptr inbounds float* %tmp12169, i64 1
+  %tmp12171 = getelementptr inbounds float* %tmp12170, i64 1
+  %tmp12172 = getelementptr inbounds float* %tmp12171, i64 1
+  %tmp12173 = getelementptr inbounds float* %tmp12172, i64 1
+  %tmp12174 = getelementptr inbounds float* %tmp12173, i64 1
+  %tmp12175 = getelementptr inbounds float* %tmp12174, i64 1
+  %tmp12176 = getelementptr inbounds float* %tmp12175, i64 1
+  %tmp12177 = getelementptr inbounds float* %tmp12176, i64 1
+  %tmp12178 = getelementptr inbounds float* %tmp12177, i64 1
+  %tmp12179 = getelementptr inbounds float* %tmp12178, i64 1
+  %tmp12180 = getelementptr inbounds float* %tmp12179, i64 1
+  %tmp12181 = getelementptr inbounds float* %tmp12180, i64 1
+  %tmp12182 = getelementptr inbounds float* %tmp12181, i64 1
+  %tmp12183 = getelementptr inbounds float* %tmp12182, i64 1
+  %tmp12184 = getelementptr inbounds float* %tmp12183, i64 1
+  %tmp12185 = getelementptr inbounds float* %tmp12184, i64 1
+  %tmp12186 = getelementptr inbounds float* %tmp12185, i64 1
+  %tmp12187 = getelementptr inbounds float* %tmp12186, i64 1
+  %tmp12188 = getelementptr inbounds float* %tmp12187, i64 1
+  %tmp12189 = getelementptr inbounds float* %tmp12188, i64 1
+  %tmp12190 = getelementptr inbounds float* %tmp12189, i64 1
+  %tmp12191 = getelementptr inbounds float* %tmp12190, i64 1
+  %tmp12192 = getelementptr inbounds float* %tmp12191, i64 1
+  %tmp12193 = getelementptr inbounds float* %tmp12192, i64 1
+  %tmp12194 = getelementptr inbounds float* %tmp12193, i64 1
+  %tmp12195 = getelementptr inbounds float* %tmp12194, i64 1
+  %tmp12196 = getelementptr inbounds float* %tmp12195, i64 1
+  %tmp12197 = getelementptr inbounds float* %tmp12196, i64 1
+  %tmp12198 = getelementptr inbounds float* %tmp12197, i64 1
+  %tmp12199 = getelementptr inbounds float* %tmp12198, i64 1
+  %tmp12200 = getelementptr inbounds float* %tmp12199, i64 1
+  %tmp12201 = getelementptr inbounds float* %tmp12200, i64 1
+  %tmp12202 = getelementptr inbounds float* %tmp12201, i64 1
+  %tmp12203 = getelementptr inbounds float* %tmp12202, i64 1
+  %tmp12204 = getelementptr inbounds float* %tmp12203, i64 1
+  %tmp12205 = getelementptr inbounds float* %tmp12204, i64 1
+  %tmp12206 = getelementptr inbounds float* %tmp12205, i64 1
+  %tmp12207 = getelementptr inbounds float* %tmp12206, i64 1
+  %tmp12208 = getelementptr inbounds float* %tmp12207, i64 1
+  %tmp12209 = getelementptr inbounds float* %tmp12208, i64 1
+  %tmp12210 = getelementptr inbounds float* %tmp12209, i64 1
+  %tmp12211 = getelementptr inbounds float* %tmp12210, i64 1
+  %tmp12212 = getelementptr inbounds float* %tmp12211, i64 1
+  %tmp12213 = getelementptr inbounds float* %tmp12212, i64 1
+  %tmp12214 = getelementptr inbounds float* %tmp12213, i64 1
+  %tmp12215 = getelementptr inbounds float* %tmp12214, i64 1
+  %tmp12216 = getelementptr inbounds float* %tmp12215, i64 1
+  %tmp12217 = getelementptr inbounds float* %tmp12216, i64 1
+  %tmp12218 = getelementptr inbounds float* %tmp12217, i64 1
+  %tmp12219 = getelementptr inbounds float* %tmp12218, i64 1
+  %tmp12220 = getelementptr inbounds float* %tmp12219, i64 1
+  %tmp12221 = getelementptr inbounds float* %tmp12220, i64 1
+  %tmp12222 = getelementptr inbounds float* %tmp12221, i64 1
+  %tmp12223 = getelementptr inbounds float* %tmp12222, i64 1
+  %tmp12224 = getelementptr inbounds float* %tmp12223, i64 1
+  %tmp12225 = getelementptr inbounds float* %tmp12224, i64 1
+  %tmp12226 = getelementptr inbounds float* %tmp12225, i64 1
+  %tmp12227 = getelementptr inbounds float* %tmp12226, i64 1
+  %tmp12228 = getelementptr inbounds float* %tmp12227, i64 1
+  %tmp12229 = getelementptr inbounds float* %tmp12228, i64 1
+  %tmp12230 = getelementptr inbounds float* %tmp12229, i64 1
+  %tmp12231 = getelementptr inbounds float* %tmp12230, i64 1
+  %tmp12232 = getelementptr inbounds float* %tmp12231, i64 1
+  %tmp12233 = getelementptr inbounds float* %tmp12232, i64 1
+  %tmp12234 = getelementptr inbounds float* %tmp12233, i64 1
+  %tmp12235 = getelementptr inbounds float* %tmp12234, i64 1
+  %tmp12236 = getelementptr inbounds float* %tmp12235, i64 1
+  %tmp12237 = getelementptr inbounds float* %tmp12236, i64 1
+  %tmp12238 = getelementptr inbounds float* %tmp12237, i64 1
+  %tmp12239 = getelementptr inbounds float* %tmp12238, i64 1
+  %tmp12240 = getelementptr inbounds float* %tmp12239, i64 1
+  %tmp12241 = getelementptr inbounds float* %tmp12240, i64 1
+  %tmp12242 = getelementptr inbounds float* %tmp12241, i64 1
+  %tmp12243 = getelementptr inbounds float* %tmp12242, i64 1
+  %tmp12244 = getelementptr inbounds float* %tmp12243, i64 1
+  %tmp12245 = getelementptr inbounds float* %tmp12244, i64 1
+  %tmp12246 = getelementptr inbounds float* %tmp12245, i64 1
+  %tmp12247 = getelementptr inbounds float* %tmp12246, i64 1
+  %tmp12248 = getelementptr inbounds float* %tmp12247, i64 1
+  %tmp12249 = getelementptr inbounds float* %tmp12248, i64 1
+  %tmp12250 = getelementptr inbounds float* %tmp12249, i64 1
+  %tmp12251 = getelementptr inbounds float* %tmp12250, i64 1
+  %tmp12252 = getelementptr inbounds float* %tmp12251, i64 1
+  %tmp12253 = getelementptr inbounds float* %tmp12252, i64 1
+  %tmp12254 = getelementptr inbounds float* %tmp12253, i64 1
+  %tmp12255 = getelementptr inbounds float* %tmp12254, i64 1
+  %tmp12256 = getelementptr inbounds float* %tmp12255, i64 1
+  %tmp12257 = getelementptr inbounds float* %tmp12256, i64 1
+  %tmp12258 = getelementptr inbounds float* %tmp12257, i64 1
+  %tmp12259 = getelementptr inbounds float* %tmp12258, i64 1
+  %tmp12260 = getelementptr inbounds float* %tmp12259, i64 1
+  %tmp12261 = getelementptr inbounds float* %tmp12260, i64 1
+  %tmp12262 = getelementptr inbounds float* %tmp12261, i64 1
+  %tmp12263 = getelementptr inbounds float* %tmp12262, i64 1
+  %tmp12264 = getelementptr inbounds float* %tmp12263, i64 1
+  %tmp12265 = getelementptr inbounds float* %tmp12264, i64 1
+  %tmp12266 = getelementptr inbounds float* %tmp12265, i64 1
+  %tmp12267 = getelementptr inbounds float* %tmp12266, i64 1
+  %tmp12268 = getelementptr inbounds float* %tmp12267, i64 1
+  %tmp12269 = getelementptr inbounds float* %tmp12268, i64 1
+  %tmp12270 = getelementptr inbounds float* %tmp12269, i64 1
+  %tmp12271 = getelementptr inbounds float* %tmp12270, i64 1
+  %tmp12272 = getelementptr inbounds float* %tmp12271, i64 1
+  %tmp12273 = getelementptr inbounds float* %tmp12272, i64 1
+  %tmp12274 = getelementptr inbounds float* %tmp12273, i64 1
+  %tmp12275 = getelementptr inbounds float* %tmp12274, i64 1
+  %tmp12276 = getelementptr inbounds float* %tmp12275, i64 1
+  %tmp12277 = getelementptr inbounds float* %tmp12276, i64 1
+  %tmp12278 = getelementptr inbounds float* %tmp12277, i64 1
+  %tmp12279 = getelementptr inbounds float* %tmp12278, i64 1
+  %tmp12280 = getelementptr inbounds float* %tmp12279, i64 1
+  %tmp12281 = getelementptr inbounds float* %tmp12280, i64 1
+  %tmp12282 = getelementptr inbounds float* %tmp12281, i64 1
+  %tmp12283 = getelementptr inbounds float* %tmp12282, i64 1
+  %tmp12284 = getelementptr inbounds float* %tmp12283, i64 1
+  %tmp12285 = getelementptr inbounds float* %tmp12284, i64 1
+  %tmp12286 = getelementptr inbounds float* %tmp12285, i64 1
+  %tmp12287 = getelementptr inbounds float* %tmp12286, i64 1
+  %tmp12288 = getelementptr inbounds float* %tmp12287, i64 1
+  %tmp12289 = getelementptr inbounds float* %tmp12288, i64 1
+  %tmp12290 = getelementptr inbounds float* %tmp12289, i64 1
+  %tmp12291 = getelementptr inbounds float* %tmp12290, i64 1
+  %tmp12292 = getelementptr inbounds float* %tmp12291, i64 1
+  %tmp12293 = getelementptr inbounds float* %tmp12292, i64 1
+  %tmp12294 = getelementptr inbounds float* %tmp12293, i64 1
+  %tmp12295 = getelementptr inbounds float* %tmp12294, i64 1
+  %tmp12296 = getelementptr inbounds float* %tmp12295, i64 1
+  %tmp12297 = getelementptr inbounds float* %tmp12296, i64 1
+  %tmp12298 = getelementptr inbounds float* %tmp12297, i64 1
+  %tmp12299 = getelementptr inbounds float* %tmp12298, i64 1
+  %tmp12300 = getelementptr inbounds float* %tmp12299, i64 1
+  %tmp12301 = getelementptr inbounds float* %tmp12300, i64 1
+  %tmp12302 = getelementptr inbounds float* %tmp12301, i64 1
+  %tmp12303 = getelementptr inbounds float* %tmp12302, i64 1
+  %tmp12304 = getelementptr inbounds float* %tmp12303, i64 1
+  %tmp12305 = getelementptr inbounds float* %tmp12304, i64 1
+  %tmp12306 = getelementptr inbounds float* %tmp12305, i64 1
+  %tmp12307 = getelementptr inbounds float* %tmp12306, i64 1
+  %tmp12308 = getelementptr inbounds float* %tmp12307, i64 1
+  %tmp12309 = getelementptr inbounds float* %tmp12308, i64 1
+  %tmp12310 = getelementptr inbounds float* %tmp12309, i64 1
+  %tmp12311 = getelementptr inbounds float* %tmp12310, i64 1
+  %tmp12312 = getelementptr inbounds float* %tmp12311, i64 1
+  %tmp12313 = getelementptr inbounds float* %tmp12312, i64 1
+  %tmp12314 = getelementptr inbounds float* %tmp12313, i64 1
+  %tmp12315 = getelementptr inbounds float* %tmp12314, i64 1
+  %tmp12316 = getelementptr inbounds float* %tmp12315, i64 1
+  %tmp12317 = getelementptr inbounds float* %tmp12316, i64 1
+  %tmp12318 = getelementptr inbounds float* %tmp12317, i64 1
+  %tmp12319 = getelementptr inbounds float* %tmp12318, i64 1
+  %tmp12320 = getelementptr inbounds float* %tmp12319, i64 1
+  %tmp12321 = getelementptr inbounds float* %tmp12320, i64 1
+  %tmp12322 = getelementptr inbounds float* %tmp12321, i64 1
+  %tmp12323 = getelementptr inbounds float* %tmp12322, i64 1
+  %tmp12324 = getelementptr inbounds float* %tmp12323, i64 1
+  %tmp12325 = getelementptr inbounds float* %tmp12324, i64 1
+  %tmp12326 = getelementptr inbounds float* %tmp12325, i64 1
+  %tmp12327 = getelementptr inbounds float* %tmp12326, i64 1
+  %tmp12328 = getelementptr inbounds float* %tmp12327, i64 1
+  %tmp12329 = getelementptr inbounds float* %tmp12328, i64 1
+  %tmp12330 = getelementptr inbounds float* %tmp12329, i64 1
+  %tmp12331 = getelementptr inbounds float* %tmp12330, i64 1
+  %tmp12332 = getelementptr inbounds float* %tmp12331, i64 1
+  %tmp12333 = getelementptr inbounds float* %tmp12332, i64 1
+  %tmp12334 = getelementptr inbounds float* %tmp12333, i64 1
+  %tmp12335 = getelementptr inbounds float* %tmp12334, i64 1
+  %tmp12336 = getelementptr inbounds float* %tmp12335, i64 1
+  %tmp12337 = getelementptr inbounds float* %tmp12336, i64 1
+  %tmp12338 = getelementptr inbounds float* %tmp12337, i64 1
+  %tmp12339 = getelementptr inbounds float* %tmp12338, i64 1
+  %tmp12340 = getelementptr inbounds float* %tmp12339, i64 1
+  %tmp12341 = getelementptr inbounds float* %tmp12340, i64 1
+  %tmp12342 = getelementptr inbounds float* %tmp12341, i64 1
+  %tmp12343 = getelementptr inbounds float* %tmp12342, i64 1
+  %tmp12344 = getelementptr inbounds float* %tmp12343, i64 1
+  %tmp12345 = getelementptr inbounds float* %tmp12344, i64 1
+  %tmp12346 = getelementptr inbounds float* %tmp12345, i64 1
+  %tmp12347 = getelementptr inbounds float* %tmp12346, i64 1
+  %tmp12348 = getelementptr inbounds float* %tmp12347, i64 1
+  %tmp12349 = getelementptr inbounds float* %tmp12348, i64 1
+  %tmp12350 = getelementptr inbounds float* %tmp12349, i64 1
+  %tmp12351 = getelementptr inbounds float* %tmp12350, i64 1
+  %tmp12352 = getelementptr inbounds float* %tmp12351, i64 1
+  %tmp12353 = getelementptr inbounds float* %tmp12352, i64 1
+  %tmp12354 = getelementptr inbounds float* %tmp12353, i64 1
+  %tmp12355 = getelementptr inbounds float* %tmp12354, i64 1
+  %tmp12356 = getelementptr inbounds float* %tmp12355, i64 1
+  %tmp12357 = getelementptr inbounds float* %tmp12356, i64 1
+  %tmp12358 = getelementptr inbounds float* %tmp12357, i64 1
+  %tmp12359 = getelementptr inbounds float* %tmp12358, i64 1
+  %tmp12360 = getelementptr inbounds float* %tmp12359, i64 1
+  %tmp12361 = getelementptr inbounds float* %tmp12360, i64 1
+  %tmp12362 = getelementptr inbounds float* %tmp12361, i64 1
+  %tmp12363 = getelementptr inbounds float* %tmp12362, i64 1
+  %tmp12364 = getelementptr inbounds float* %tmp12363, i64 1
+  %tmp12365 = getelementptr inbounds float* %tmp12364, i64 1
+  %tmp12366 = getelementptr inbounds float* %tmp12365, i64 1
+  %tmp12367 = getelementptr inbounds float* %tmp12366, i64 1
+  %tmp12368 = getelementptr inbounds float* %tmp12367, i64 1
+  %tmp12369 = getelementptr inbounds float* %tmp12368, i64 1
+  %tmp12370 = getelementptr inbounds float* %tmp12369, i64 1
+  %tmp12371 = getelementptr inbounds float* %tmp12370, i64 1
+  %tmp12372 = getelementptr inbounds float* %tmp12371, i64 1
+  %tmp12373 = getelementptr inbounds float* %tmp12372, i64 1
+  %tmp12374 = getelementptr inbounds float* %tmp12373, i64 1
+  %tmp12375 = getelementptr inbounds float* %tmp12374, i64 1
+  %tmp12376 = getelementptr inbounds float* %tmp12375, i64 1
+  %tmp12377 = getelementptr inbounds float* %tmp12376, i64 1
+  %tmp12378 = getelementptr inbounds float* %tmp12377, i64 1
+  %tmp12379 = getelementptr inbounds float* %tmp12378, i64 1
+  %tmp12380 = getelementptr inbounds float* %tmp12379, i64 1
+  %tmp12381 = getelementptr inbounds float* %tmp12380, i64 1
+  %tmp12382 = getelementptr inbounds float* %tmp12381, i64 1
+  %tmp12383 = getelementptr inbounds float* %tmp12382, i64 1
+  %tmp12384 = getelementptr inbounds float* %tmp12383, i64 1
+  %tmp12385 = getelementptr inbounds float* %tmp12384, i64 1
+  %tmp12386 = getelementptr inbounds float* %tmp12385, i64 1
+  %tmp12387 = getelementptr inbounds float* %tmp12386, i64 1
+  %tmp12388 = getelementptr inbounds float* %tmp12387, i64 1
+  %tmp12389 = getelementptr inbounds float* %tmp12388, i64 1
+  %tmp12390 = getelementptr inbounds float* %tmp12389, i64 1
+  %tmp12391 = getelementptr inbounds float* %tmp12390, i64 1
+  %tmp12392 = getelementptr inbounds float* %tmp12391, i64 1
+  %tmp12393 = getelementptr inbounds float* %tmp12392, i64 1
+  %tmp12394 = getelementptr inbounds float* %tmp12393, i64 1
+  %tmp12395 = getelementptr inbounds float* %tmp12394, i64 1
+  %tmp12396 = getelementptr inbounds float* %tmp12395, i64 1
+  %tmp12397 = getelementptr inbounds float* %tmp12396, i64 1
+  %tmp12398 = getelementptr inbounds float* %tmp12397, i64 1
+  %tmp12399 = getelementptr inbounds float* %tmp12398, i64 1
+  %tmp12400 = getelementptr inbounds float* %tmp12399, i64 1
+  %tmp12401 = getelementptr inbounds float* %tmp12400, i64 1
+  %tmp12402 = getelementptr inbounds float* %tmp12401, i64 1
+  %tmp12403 = getelementptr inbounds float* %tmp12402, i64 1
+  %tmp12404 = getelementptr inbounds float* %tmp12403, i64 1
+  %tmp12405 = getelementptr inbounds float* %tmp12404, i64 1
+  %tmp12406 = getelementptr inbounds float* %tmp12405, i64 1
+  %tmp12407 = getelementptr inbounds float* %tmp12406, i64 1
+  %tmp12408 = getelementptr inbounds float* %tmp12407, i64 1
+  %tmp12409 = getelementptr inbounds float* %tmp12408, i64 1
+  %tmp12410 = getelementptr inbounds float* %tmp12409, i64 1
+  %tmp12411 = getelementptr inbounds float* %tmp12410, i64 1
+  %tmp12412 = getelementptr inbounds float* %tmp12411, i64 1
+  %tmp12413 = getelementptr inbounds float* %tmp12412, i64 1
+  %tmp12414 = getelementptr inbounds float* %tmp12413, i64 1
+  %tmp12415 = getelementptr inbounds float* %tmp12414, i64 1
+  %tmp12416 = getelementptr inbounds float* %tmp12415, i64 1
+  %tmp12417 = getelementptr inbounds float* %tmp12416, i64 1
+  %tmp12418 = getelementptr inbounds float* %tmp12417, i64 1
+  %tmp12419 = getelementptr inbounds float* %tmp12418, i64 1
+  %tmp12420 = getelementptr inbounds float* %tmp12419, i64 1
+  %tmp12421 = getelementptr inbounds float* %tmp12420, i64 1
+  %tmp12422 = getelementptr inbounds float* %tmp12421, i64 1
+  %tmp12423 = getelementptr inbounds float* %tmp12422, i64 1
+  %tmp12424 = getelementptr inbounds float* %tmp12423, i64 1
+  %tmp12425 = getelementptr inbounds float* %tmp12424, i64 1
+  %tmp12426 = getelementptr inbounds float* %tmp12425, i64 1
+  %tmp12427 = getelementptr inbounds float* %tmp12426, i64 1
+  %tmp12428 = getelementptr inbounds float* %tmp12427, i64 1
+  %tmp12429 = getelementptr inbounds float* %tmp12428, i64 1
+  %tmp12430 = getelementptr inbounds float* %tmp12429, i64 1
+  %tmp12431 = getelementptr inbounds float* %tmp12430, i64 1
+  %tmp12432 = getelementptr inbounds float* %tmp12431, i64 1
+  %tmp12433 = getelementptr inbounds float* %tmp12432, i64 1
+  %tmp12434 = getelementptr inbounds float* %tmp12433, i64 1
+  %tmp12435 = getelementptr inbounds float* %tmp12434, i64 1
+  %tmp12436 = getelementptr inbounds float* %tmp12435, i64 1
+  %tmp12437 = getelementptr inbounds float* %tmp12436, i64 1
+  %tmp12438 = getelementptr inbounds float* %tmp12437, i64 1
+  %tmp12439 = getelementptr inbounds float* %tmp12438, i64 1
+  %tmp12440 = getelementptr inbounds float* %tmp12439, i64 1
+  %tmp12441 = getelementptr inbounds float* %tmp12440, i64 1
+  %tmp12442 = getelementptr inbounds float* %tmp12441, i64 1
+  %tmp12443 = getelementptr inbounds float* %tmp12442, i64 1
+  %tmp12444 = getelementptr inbounds float* %tmp12443, i64 1
+  %tmp12445 = getelementptr inbounds float* %tmp12444, i64 1
+  %tmp12446 = getelementptr inbounds float* %tmp12445, i64 1
+  %tmp12447 = getelementptr inbounds float* %tmp12446, i64 1
+  %tmp12448 = getelementptr inbounds float* %tmp12447, i64 1
+  %tmp12449 = getelementptr inbounds float* %tmp12448, i64 1
+  %tmp12450 = getelementptr inbounds float* %tmp12449, i64 1
+  %tmp12451 = getelementptr inbounds float* %tmp12450, i64 1
+  %tmp12452 = getelementptr inbounds float* %tmp12451, i64 1
+  %tmp12453 = getelementptr inbounds float* %tmp12452, i64 1
+  %tmp12454 = getelementptr inbounds float* %tmp12453, i64 1
+  %tmp12455 = getelementptr inbounds float* %tmp12454, i64 1
+  %tmp12456 = getelementptr inbounds float* %tmp12455, i64 1
+  %tmp12457 = getelementptr inbounds float* %tmp12456, i64 1
+  %tmp12458 = getelementptr inbounds float* %tmp12457, i64 1
+  %tmp12459 = getelementptr inbounds float* %tmp12458, i64 1
+  %tmp12460 = getelementptr inbounds float* %tmp12459, i64 1
+  %tmp12461 = getelementptr inbounds float* %tmp12460, i64 1
+  %tmp12462 = getelementptr inbounds float* %tmp12461, i64 1
+  %tmp12463 = getelementptr inbounds float* %tmp12462, i64 1
+  %tmp12464 = getelementptr inbounds float* %tmp12463, i64 1
+  %tmp12465 = getelementptr inbounds float* %tmp12464, i64 1
+  %tmp12466 = getelementptr inbounds float* %tmp12465, i64 1
+  %tmp12467 = getelementptr inbounds float* %tmp12466, i64 1
+  %tmp12468 = getelementptr inbounds float* %tmp12467, i64 1
+  %tmp12469 = getelementptr inbounds float* %tmp12468, i64 1
+  %tmp12470 = getelementptr inbounds float* %tmp12469, i64 1
+  %tmp12471 = getelementptr inbounds float* %tmp12470, i64 1
+  %tmp12472 = getelementptr inbounds float* %tmp12471, i64 1
+  %tmp12473 = getelementptr inbounds float* %tmp12472, i64 1
+  %tmp12474 = getelementptr inbounds float* %tmp12473, i64 1
+  %tmp12475 = getelementptr inbounds float* %tmp12474, i64 1
+  %tmp12476 = getelementptr inbounds float* %tmp12475, i64 1
+  %tmp12477 = getelementptr inbounds float* %tmp12476, i64 1
+  %tmp12478 = getelementptr inbounds float* %tmp12477, i64 1
+  %tmp12479 = getelementptr inbounds float* %tmp12478, i64 1
+  %tmp12480 = getelementptr inbounds float* %tmp12479, i64 1
+  %tmp12481 = getelementptr inbounds float* %tmp12480, i64 1
+  %tmp12482 = getelementptr inbounds float* %tmp12481, i64 1
+  %tmp12483 = getelementptr inbounds float* %tmp12482, i64 1
+  %tmp12484 = getelementptr inbounds float* %tmp12483, i64 1
+  %tmp12485 = getelementptr inbounds float* %tmp12484, i64 1
+  %tmp12486 = getelementptr inbounds float* %tmp12485, i64 1
+  %tmp12487 = getelementptr inbounds float* %tmp12486, i64 1
+  %tmp12488 = getelementptr inbounds float* %tmp12487, i64 1
+  %tmp12489 = getelementptr inbounds float* %tmp12488, i64 1
+  %tmp12490 = getelementptr inbounds float* %tmp12489, i64 1
+  %tmp12491 = getelementptr inbounds float* %tmp12490, i64 1
+  %tmp12492 = getelementptr inbounds float* %tmp12491, i64 1
+  %tmp12493 = getelementptr inbounds float* %tmp12492, i64 1
+  %tmp12494 = getelementptr inbounds float* %tmp12493, i64 1
+  %tmp12495 = getelementptr inbounds float* %tmp12494, i64 1
+  %tmp12496 = getelementptr inbounds float* %tmp12495, i64 1
+  %tmp12497 = getelementptr inbounds float* %tmp12496, i64 1
+  %tmp12498 = getelementptr inbounds float* %tmp12497, i64 1
+  %tmp12499 = getelementptr inbounds float* %tmp12498, i64 1
+  %tmp12500 = getelementptr inbounds float* %tmp12499, i64 1
+  %tmp12501 = getelementptr inbounds float* %tmp12500, i64 1
+  %tmp12502 = getelementptr inbounds float* %tmp12501, i64 1
+  %tmp12503 = getelementptr inbounds float* %tmp12502, i64 1
+  %tmp12504 = getelementptr inbounds float* %tmp12503, i64 1
+  %tmp12505 = getelementptr inbounds float* %tmp12504, i64 1
+  %tmp12506 = getelementptr inbounds float* %tmp12505, i64 1
+  %tmp12507 = getelementptr inbounds float* %tmp12506, i64 1
+  %tmp12508 = getelementptr inbounds float* %tmp12507, i64 1
+  %tmp12509 = getelementptr inbounds float* %tmp12508, i64 1
+  %tmp12510 = getelementptr inbounds float* %tmp12509, i64 1
+  %tmp12511 = getelementptr inbounds float* %tmp12510, i64 1
+  %tmp12512 = getelementptr inbounds float* %tmp12511, i64 1
+  %tmp12513 = getelementptr inbounds float* %tmp12512, i64 1
+  %tmp12514 = getelementptr inbounds float* %tmp12513, i64 1
+  %tmp12515 = getelementptr inbounds float* %tmp12514, i64 1
+  %tmp12516 = getelementptr inbounds float* %tmp12515, i64 1
+  %tmp12517 = getelementptr inbounds float* %tmp12516, i64 1
+  %tmp12518 = getelementptr inbounds float* %tmp12517, i64 1
+  %tmp12519 = getelementptr inbounds float* %tmp12518, i64 1
+  %tmp12520 = getelementptr inbounds float* %tmp12519, i64 1
+  %tmp12521 = getelementptr inbounds float* %tmp12520, i64 1
+  %tmp12522 = getelementptr inbounds float* %tmp12521, i64 1
+  %tmp12523 = getelementptr inbounds float* %tmp12522, i64 1
+  %tmp12524 = getelementptr inbounds float* %tmp12523, i64 1
+  %tmp12525 = getelementptr inbounds float* %tmp12524, i64 1
+  %tmp12526 = getelementptr inbounds float* %tmp12525, i64 1
+  %tmp12527 = getelementptr inbounds float* %tmp12526, i64 1
+  %tmp12528 = getelementptr inbounds float* %tmp12527, i64 1
+  %tmp12529 = getelementptr inbounds float* %tmp12528, i64 1
+  %tmp12530 = getelementptr inbounds float* %tmp12529, i64 1
+  %tmp12531 = getelementptr inbounds float* %tmp12530, i64 1
+  %tmp12532 = getelementptr inbounds float* %tmp12531, i64 1
+  %tmp12533 = getelementptr inbounds float* %tmp12532, i64 1
+  %tmp12534 = getelementptr inbounds float* %tmp12533, i64 1
+  %tmp12535 = getelementptr inbounds float* %tmp12534, i64 1
+  %tmp12536 = getelementptr inbounds float* %tmp12535, i64 1
+  %tmp12537 = getelementptr inbounds float* %tmp12536, i64 1
+  %tmp12538 = getelementptr inbounds float* %tmp12537, i64 1
+  %tmp12539 = getelementptr inbounds float* %tmp12538, i64 1
+  %tmp12540 = getelementptr inbounds float* %tmp12539, i64 1
+  %tmp12541 = getelementptr inbounds float* %tmp12540, i64 1
+  %tmp12542 = getelementptr inbounds float* %tmp12541, i64 1
+  %tmp12543 = getelementptr inbounds float* %tmp12542, i64 1
+  %tmp12544 = getelementptr inbounds float* %tmp12543, i64 1
+  %tmp12545 = getelementptr inbounds float* %tmp12544, i64 1
+  %tmp12546 = getelementptr inbounds float* %tmp12545, i64 1
+  %tmp12547 = getelementptr inbounds float* %tmp12546, i64 1
+  %tmp12548 = getelementptr inbounds float* %tmp12547, i64 1
+  %tmp12549 = getelementptr inbounds float* %tmp12548, i64 1
+  %tmp12550 = getelementptr inbounds float* %tmp12549, i64 1
+  %tmp12551 = getelementptr inbounds float* %tmp12550, i64 1
+  %tmp12552 = getelementptr inbounds float* %tmp12551, i64 1
+  %tmp12553 = getelementptr inbounds float* %tmp12552, i64 1
+  %tmp12554 = getelementptr inbounds float* %tmp12553, i64 1
+  %tmp12555 = getelementptr inbounds float* %tmp12554, i64 1
+  %tmp12556 = getelementptr inbounds float* %tmp12555, i64 1
+  %tmp12557 = getelementptr inbounds float* %tmp12556, i64 1
+  %tmp12558 = getelementptr inbounds float* %tmp12557, i64 1
+  %tmp12559 = getelementptr inbounds float* %tmp12558, i64 1
+  %tmp12560 = getelementptr inbounds float* %tmp12559, i64 1
+  %tmp12561 = getelementptr inbounds float* %tmp12560, i64 1
+  %tmp12562 = getelementptr inbounds float* %tmp12561, i64 1
+  %tmp12563 = getelementptr inbounds float* %tmp12562, i64 1
+  %tmp12564 = getelementptr inbounds float* %tmp12563, i64 1
+  %tmp12565 = getelementptr inbounds float* %tmp12564, i64 1
+  %tmp12566 = getelementptr inbounds float* %tmp12565, i64 1
+  %tmp12567 = getelementptr inbounds float* %tmp12566, i64 1
+  %tmp12568 = getelementptr inbounds float* %tmp12567, i64 1
+  %tmp12569 = getelementptr inbounds float* %tmp12568, i64 1
+  %tmp12570 = getelementptr inbounds float* %tmp12569, i64 1
+  %tmp12571 = getelementptr inbounds float* %tmp12570, i64 1
+  %tmp12572 = getelementptr inbounds float* %tmp12571, i64 1
+  %tmp12573 = getelementptr inbounds float* %tmp12572, i64 1
+  %tmp12574 = getelementptr inbounds float* %tmp12573, i64 1
+  %tmp12575 = getelementptr inbounds float* %tmp12574, i64 1
+  %tmp12576 = getelementptr inbounds float* %tmp12575, i64 1
+  %tmp12577 = getelementptr inbounds float* %tmp12576, i64 1
+  %tmp12578 = getelementptr inbounds float* %tmp12577, i64 1
+  %tmp12579 = getelementptr inbounds float* %tmp12578, i64 1
+  %tmp12580 = getelementptr inbounds float* %tmp12579, i64 1
+  %tmp12581 = getelementptr inbounds float* %tmp12580, i64 1
+  %tmp12582 = getelementptr inbounds float* %tmp12581, i64 1
+  %tmp12583 = getelementptr inbounds float* %tmp12582, i64 1
+  %tmp12584 = getelementptr inbounds float* %tmp12583, i64 1
+  %tmp12585 = getelementptr inbounds float* %tmp12584, i64 1
+  %tmp12586 = getelementptr inbounds float* %tmp12585, i64 1
+  %tmp12587 = getelementptr inbounds float* %tmp12586, i64 1
+  %tmp12588 = getelementptr inbounds float* %tmp12587, i64 1
+  %tmp12589 = getelementptr inbounds float* %tmp12588, i64 1
+  %tmp12590 = getelementptr inbounds float* %tmp12589, i64 1
+  %tmp12591 = getelementptr inbounds float* %tmp12590, i64 1
+  %tmp12592 = getelementptr inbounds float* %tmp12591, i64 1
+  %tmp12593 = getelementptr inbounds float* %tmp12592, i64 1
+  %tmp12594 = getelementptr inbounds float* %tmp12593, i64 1
+  %tmp12595 = getelementptr inbounds float* %tmp12594, i64 1
+  %tmp12596 = getelementptr inbounds float* %tmp12595, i64 1
+  %tmp12597 = getelementptr inbounds float* %tmp12596, i64 1
+  %tmp12598 = getelementptr inbounds float* %tmp12597, i64 1
+  %tmp12599 = getelementptr inbounds float* %tmp12598, i64 1
+  %tmp12600 = getelementptr inbounds float* %tmp12599, i64 1
+  %tmp12601 = getelementptr inbounds float* %tmp12600, i64 1
+  %tmp12602 = getelementptr inbounds float* %tmp12601, i64 1
+  %tmp12603 = getelementptr inbounds float* %tmp12602, i64 1
+  %tmp12604 = getelementptr inbounds float* %tmp12603, i64 1
+  %tmp12605 = getelementptr inbounds float* %tmp12604, i64 1
+  %tmp12606 = getelementptr inbounds float* %tmp12605, i64 1
+  %tmp12607 = getelementptr inbounds float* %tmp12606, i64 1
+  %tmp12608 = getelementptr inbounds float* %tmp12607, i64 1
+  %tmp12609 = getelementptr inbounds float* %tmp12608, i64 1
+  %tmp12610 = getelementptr inbounds float* %tmp12609, i64 1
+  %tmp12611 = getelementptr inbounds float* %tmp12610, i64 1
+  %tmp12612 = getelementptr inbounds float* %tmp12611, i64 1
+  %tmp12613 = getelementptr inbounds float* %tmp12612, i64 1
+  %tmp12614 = getelementptr inbounds float* %tmp12613, i64 1
+  %tmp12615 = getelementptr inbounds float* %tmp12614, i64 1
+  %tmp12616 = getelementptr inbounds float* %tmp12615, i64 1
+  %tmp12617 = getelementptr inbounds float* %tmp12616, i64 1
+  %tmp12618 = getelementptr inbounds float* %tmp12617, i64 1
+  %tmp12619 = getelementptr inbounds float* %tmp12618, i64 1
+  %tmp12620 = getelementptr inbounds float* %tmp12619, i64 1
+  %tmp12621 = getelementptr inbounds float* %tmp12620, i64 1
+  %tmp12622 = getelementptr inbounds float* %tmp12621, i64 1
+  %tmp12623 = getelementptr inbounds float* %tmp12622, i64 1
+  %tmp12624 = getelementptr inbounds float* %tmp12623, i64 1
+  %tmp12625 = getelementptr inbounds float* %tmp12624, i64 1
+  %tmp12626 = getelementptr inbounds float* %tmp12625, i64 1
+  %tmp12627 = getelementptr inbounds float* %tmp12626, i64 1
+  %tmp12628 = getelementptr inbounds float* %tmp12627, i64 1
+  %tmp12629 = getelementptr inbounds float* %tmp12628, i64 1
+  %tmp12630 = getelementptr inbounds float* %tmp12629, i64 1
+  %tmp12631 = getelementptr inbounds float* %tmp12630, i64 1
+  %tmp12632 = getelementptr inbounds float* %tmp12631, i64 1
+  %tmp12633 = getelementptr inbounds float* %tmp12632, i64 1
+  %tmp12634 = getelementptr inbounds float* %tmp12633, i64 1
+  %tmp12635 = getelementptr inbounds float* %tmp12634, i64 1
+  %tmp12636 = getelementptr inbounds float* %tmp12635, i64 1
+  %tmp12637 = getelementptr inbounds float* %tmp12636, i64 1
+  %tmp12638 = getelementptr inbounds float* %tmp12637, i64 1
+  %tmp12639 = getelementptr inbounds float* %tmp12638, i64 1
+  %tmp12640 = getelementptr inbounds float* %tmp12639, i64 1
+  %tmp12641 = getelementptr inbounds float* %tmp12640, i64 1
+  %tmp12642 = getelementptr inbounds float* %tmp12641, i64 1
+  %tmp12643 = getelementptr inbounds float* %tmp12642, i64 1
+  %tmp12644 = getelementptr inbounds float* %tmp12643, i64 1
+  %tmp12645 = getelementptr inbounds float* %tmp12644, i64 1
+  %tmp12646 = getelementptr inbounds float* %tmp12645, i64 1
+  %tmp12647 = getelementptr inbounds float* %tmp12646, i64 1
+  %tmp12648 = getelementptr inbounds float* %tmp12647, i64 1
+  %tmp12649 = getelementptr inbounds float* %tmp12648, i64 1
+  %tmp12650 = getelementptr inbounds float* %tmp12649, i64 1
+  %tmp12651 = getelementptr inbounds float* %tmp12650, i64 1
+  %tmp12652 = getelementptr inbounds float* %tmp12651, i64 1
+  %tmp12653 = getelementptr inbounds float* %tmp12652, i64 1
+  %tmp12654 = getelementptr inbounds float* %tmp12653, i64 1
+  %tmp12655 = getelementptr inbounds float* %tmp12654, i64 1
+  %tmp12656 = getelementptr inbounds float* %tmp12655, i64 1
+  %tmp12657 = getelementptr inbounds float* %tmp12656, i64 1
+  %tmp12658 = getelementptr inbounds float* %tmp12657, i64 1
+  %tmp12659 = getelementptr inbounds float* %tmp12658, i64 1
+  %tmp12660 = getelementptr inbounds float* %tmp12659, i64 1
+  %tmp12661 = getelementptr inbounds float* %tmp12660, i64 1
+  %tmp12662 = getelementptr inbounds float* %tmp12661, i64 1
+  %tmp12663 = getelementptr inbounds float* %tmp12662, i64 1
+  %tmp12664 = getelementptr inbounds float* %tmp12663, i64 1
+  %tmp12665 = getelementptr inbounds float* %tmp12664, i64 1
+  %tmp12666 = getelementptr inbounds float* %tmp12665, i64 1
+  %tmp12667 = getelementptr inbounds float* %tmp12666, i64 1
+  %tmp12668 = getelementptr inbounds float* %tmp12667, i64 1
+  %tmp12669 = getelementptr inbounds float* %tmp12668, i64 1
+  %tmp12670 = getelementptr inbounds float* %tmp12669, i64 1
+  %tmp12671 = getelementptr inbounds float* %tmp12670, i64 1
+  %tmp12672 = getelementptr inbounds float* %tmp12671, i64 1
+  %tmp12673 = getelementptr inbounds float* %tmp12672, i64 1
+  %tmp12674 = getelementptr inbounds float* %tmp12673, i64 1
+  %tmp12675 = getelementptr inbounds float* %tmp12674, i64 1
+  %tmp12676 = getelementptr inbounds float* %tmp12675, i64 1
+  %tmp12677 = getelementptr inbounds float* %tmp12676, i64 1
+  %tmp12678 = getelementptr inbounds float* %tmp12677, i64 1
+  %tmp12679 = getelementptr inbounds float* %tmp12678, i64 1
+  %tmp12680 = getelementptr inbounds float* %tmp12679, i64 1
+  %tmp12681 = getelementptr inbounds float* %tmp12680, i64 1
+  %tmp12682 = getelementptr inbounds float* %tmp12681, i64 1
+  %tmp12683 = getelementptr inbounds float* %tmp12682, i64 1
+  %tmp12684 = getelementptr inbounds float* %tmp12683, i64 1
+  %tmp12685 = getelementptr inbounds float* %tmp12684, i64 1
+  %tmp12686 = getelementptr inbounds float* %tmp12685, i64 1
+  %tmp12687 = getelementptr inbounds float* %tmp12686, i64 1
+  %tmp12688 = getelementptr inbounds float* %tmp12687, i64 1
+  %tmp12689 = getelementptr inbounds float* %tmp12688, i64 1
+  %tmp12690 = getelementptr inbounds float* %tmp12689, i64 1
+  %tmp12691 = getelementptr inbounds float* %tmp12690, i64 1
+  %tmp12692 = getelementptr inbounds float* %tmp12691, i64 1
+  %tmp12693 = getelementptr inbounds float* %tmp12692, i64 1
+  %tmp12694 = getelementptr inbounds float* %tmp12693, i64 1
+  %tmp12695 = getelementptr inbounds float* %tmp12694, i64 1
+  %tmp12696 = getelementptr inbounds float* %tmp12695, i64 1
+  %tmp12697 = getelementptr inbounds float* %tmp12696, i64 1
+  %tmp12698 = getelementptr inbounds float* %tmp12697, i64 1
+  %tmp12699 = getelementptr inbounds float* %tmp12698, i64 1
+  %tmp12700 = getelementptr inbounds float* %tmp12699, i64 1
+  %tmp12701 = getelementptr inbounds float* %tmp12700, i64 1
+  %tmp12702 = getelementptr inbounds float* %tmp12701, i64 1
+  %tmp12703 = getelementptr inbounds float* %tmp12702, i64 1
+  %tmp12704 = getelementptr inbounds float* %tmp12703, i64 1
+  %tmp12705 = getelementptr inbounds float* %tmp12704, i64 1
+  %tmp12706 = getelementptr inbounds float* %tmp12705, i64 1
+  %tmp12707 = getelementptr inbounds float* %tmp12706, i64 1
+  %tmp12708 = getelementptr inbounds float* %tmp12707, i64 1
+  %tmp12709 = getelementptr inbounds float* %tmp12708, i64 1
+  %tmp12710 = getelementptr inbounds float* %tmp12709, i64 1
+  %tmp12711 = getelementptr inbounds float* %tmp12710, i64 1
+  %tmp12712 = getelementptr inbounds float* %tmp12711, i64 1
+  %tmp12713 = getelementptr inbounds float* %tmp12712, i64 1
+  %tmp12714 = getelementptr inbounds float* %tmp12713, i64 1
+  %tmp12715 = getelementptr inbounds float* %tmp12714, i64 1
+  %tmp12716 = getelementptr inbounds float* %tmp12715, i64 1
+  %tmp12717 = getelementptr inbounds float* %tmp12716, i64 1
+  %tmp12718 = getelementptr inbounds float* %tmp12717, i64 1
+  %tmp12719 = getelementptr inbounds float* %tmp12718, i64 1
+  %tmp12720 = getelementptr inbounds float* %tmp12719, i64 1
+  %tmp12721 = getelementptr inbounds float* %tmp12720, i64 1
+  %tmp12722 = getelementptr inbounds float* %tmp12721, i64 1
+  %tmp12723 = getelementptr inbounds float* %tmp12722, i64 1
+  %tmp12724 = getelementptr inbounds float* %tmp12723, i64 1
+  %tmp12725 = getelementptr inbounds float* %tmp12724, i64 1
+  %tmp12726 = getelementptr inbounds float* %tmp12725, i64 1
+  %tmp12727 = getelementptr inbounds float* %tmp12726, i64 1
+  %tmp12728 = getelementptr inbounds float* %tmp12727, i64 1
+  %tmp12729 = getelementptr inbounds float* %tmp12728, i64 1
+  %tmp12730 = getelementptr inbounds float* %tmp12729, i64 1
+  %tmp12731 = getelementptr inbounds float* %tmp12730, i64 1
+  %tmp12732 = getelementptr inbounds float* %tmp12731, i64 1
+  %tmp12733 = getelementptr inbounds float* %tmp12732, i64 1
+  %tmp12734 = getelementptr inbounds float* %tmp12733, i64 1
+  %tmp12735 = getelementptr inbounds float* %tmp12734, i64 1
+  %tmp12736 = getelementptr inbounds float* %tmp12735, i64 1
+  %tmp12737 = getelementptr inbounds float* %tmp12736, i64 1
+  %tmp12738 = getelementptr inbounds float* %tmp12737, i64 1
+  %tmp12739 = getelementptr inbounds float* %tmp12738, i64 1
+  %tmp12740 = getelementptr inbounds float* %tmp12739, i64 1
+  %tmp12741 = getelementptr inbounds float* %tmp12740, i64 1
+  %tmp12742 = getelementptr inbounds float* %tmp12741, i64 1
+  %tmp12743 = getelementptr inbounds float* %tmp12742, i64 1
+  %tmp12744 = getelementptr inbounds float* %tmp12743, i64 1
+  %tmp12745 = getelementptr inbounds float* %tmp12744, i64 1
+  %tmp12746 = getelementptr inbounds float* %tmp12745, i64 1
+  %tmp12747 = getelementptr inbounds float* %tmp12746, i64 1
+  %tmp12748 = getelementptr inbounds float* %tmp12747, i64 1
+  %tmp12749 = getelementptr inbounds float* %tmp12748, i64 1
+  %tmp12750 = getelementptr inbounds float* %tmp12749, i64 1
+  %tmp12751 = getelementptr inbounds float* %tmp12750, i64 1
+  %tmp12752 = getelementptr inbounds float* %tmp12751, i64 1
+  %tmp12753 = getelementptr inbounds float* %tmp12752, i64 1
+  %tmp12754 = getelementptr inbounds float* %tmp12753, i64 1
+  %tmp12755 = getelementptr inbounds float* %tmp12754, i64 1
+  %tmp12756 = getelementptr inbounds float* %tmp12755, i64 1
+  %tmp12757 = getelementptr inbounds float* %tmp12756, i64 1
+  %tmp12758 = getelementptr inbounds float* %tmp12757, i64 1
+  %tmp12759 = getelementptr inbounds float* %tmp12758, i64 1
+  %tmp12760 = getelementptr inbounds float* %tmp12759, i64 1
+  %tmp12761 = getelementptr inbounds float* %tmp12760, i64 1
+  %tmp12762 = getelementptr inbounds float* %tmp12761, i64 1
+  %tmp12763 = getelementptr inbounds float* %tmp12762, i64 1
+  %tmp12764 = getelementptr inbounds float* %tmp12763, i64 1
+  %tmp12765 = getelementptr inbounds float* %tmp12764, i64 1
+  %tmp12766 = getelementptr inbounds float* %tmp12765, i64 1
+  %tmp12767 = getelementptr inbounds float* %tmp12766, i64 1
+  %tmp12768 = getelementptr inbounds float* %tmp12767, i64 1
+  %tmp12769 = getelementptr inbounds float* %tmp12768, i64 1
+  %tmp12770 = getelementptr inbounds float* %tmp12769, i64 1
+  %tmp12771 = getelementptr inbounds float* %tmp12770, i64 1
+  %tmp12772 = getelementptr inbounds float* %tmp12771, i64 1
+  %tmp12773 = getelementptr inbounds float* %tmp12772, i64 1
+  %tmp12774 = getelementptr inbounds float* %tmp12773, i64 1
+  %tmp12775 = getelementptr inbounds float* %tmp12774, i64 1
+  %tmp12776 = getelementptr inbounds float* %tmp12775, i64 1
+  %tmp12777 = getelementptr inbounds float* %tmp12776, i64 1
+  %tmp12778 = getelementptr inbounds float* %tmp12777, i64 1
+  %tmp12779 = getelementptr inbounds float* %tmp12778, i64 1
+  %tmp12780 = getelementptr inbounds float* %tmp12779, i64 1
+  %tmp12781 = getelementptr inbounds float* %tmp12780, i64 1
+  %tmp12782 = getelementptr inbounds float* %tmp12781, i64 1
+  %tmp12783 = getelementptr inbounds float* %tmp12782, i64 1
+  %tmp12784 = getelementptr inbounds float* %tmp12783, i64 1
+  %tmp12785 = getelementptr inbounds float* %tmp12784, i64 1
+  %tmp12786 = getelementptr inbounds float* %tmp12785, i64 1
+  %tmp12787 = getelementptr inbounds float* %tmp12786, i64 1
+  %tmp12788 = getelementptr inbounds float* %tmp12787, i64 1
+  %tmp12789 = getelementptr inbounds float* %tmp12788, i64 1
+  %tmp12790 = getelementptr inbounds float* %tmp12789, i64 1
+  %tmp12791 = getelementptr inbounds float* %tmp12790, i64 1
+  %tmp12792 = getelementptr inbounds float* %tmp12791, i64 1
+  %tmp12793 = getelementptr inbounds float* %tmp12792, i64 1
+  %tmp12794 = getelementptr inbounds float* %tmp12793, i64 1
+  %tmp12795 = getelementptr inbounds float* %tmp12794, i64 1
+  %tmp12796 = getelementptr inbounds float* %tmp12795, i64 1
+  %tmp12797 = getelementptr inbounds float* %tmp12796, i64 1
+  %tmp12798 = getelementptr inbounds float* %tmp12797, i64 1
+  %tmp12799 = getelementptr inbounds float* %tmp12798, i64 1
+  %tmp12800 = getelementptr inbounds float* %tmp12799, i64 1
+  %tmp12801 = getelementptr inbounds float* %tmp12800, i64 1
+  %tmp12802 = getelementptr inbounds float* %tmp12801, i64 1
+  %tmp12803 = getelementptr inbounds float* %tmp12802, i64 1
+  %tmp12804 = getelementptr inbounds float* %tmp12803, i64 1
+  %tmp12805 = getelementptr inbounds float* %tmp12804, i64 1
+  %tmp12806 = getelementptr inbounds float* %tmp12805, i64 1
+  %tmp12807 = getelementptr inbounds float* %tmp12806, i64 1
+  %tmp12808 = getelementptr inbounds float* %tmp12807, i64 1
+  %tmp12809 = getelementptr inbounds float* %tmp12808, i64 1
+  %tmp12810 = getelementptr inbounds float* %tmp12809, i64 1
+  %tmp12811 = getelementptr inbounds float* %tmp12810, i64 1
+  %tmp12812 = getelementptr inbounds float* %tmp12811, i64 1
+  %tmp12813 = getelementptr inbounds float* %tmp12812, i64 1
+  %tmp12814 = getelementptr inbounds float* %tmp12813, i64 1
+  %tmp12815 = getelementptr inbounds float* %tmp12814, i64 1
+  %tmp12816 = getelementptr inbounds float* %tmp12815, i64 1
+  %tmp12817 = getelementptr inbounds float* %tmp12816, i64 1
+  %tmp12818 = getelementptr inbounds float* %tmp12817, i64 1
+  %tmp12819 = getelementptr inbounds float* %tmp12818, i64 1
+  %tmp12820 = getelementptr inbounds float* %tmp12819, i64 1
+  %tmp12821 = getelementptr inbounds float* %tmp12820, i64 1
+  %tmp12822 = getelementptr inbounds float* %tmp12821, i64 1
+  %tmp12823 = getelementptr inbounds float* %tmp12822, i64 1
+  %tmp12824 = getelementptr inbounds float* %tmp12823, i64 1
+  %tmp12825 = getelementptr inbounds float* %tmp12824, i64 1
+  %tmp12826 = getelementptr inbounds float* %tmp12825, i64 1
+  %tmp12827 = getelementptr inbounds float* %tmp12826, i64 1
+  %tmp12828 = getelementptr inbounds float* %tmp12827, i64 1
+  %tmp12829 = getelementptr inbounds float* %tmp12828, i64 1
+  %tmp12830 = getelementptr inbounds float* %tmp12829, i64 1
+  %tmp12831 = getelementptr inbounds float* %tmp12830, i64 1
+  %tmp12832 = getelementptr inbounds float* %tmp12831, i64 1
+  %tmp12833 = getelementptr inbounds float* %tmp12832, i64 1
+  %tmp12834 = getelementptr inbounds float* %tmp12833, i64 1
+  %tmp12835 = getelementptr inbounds float* %tmp12834, i64 1
+  %tmp12836 = getelementptr inbounds float* %tmp12835, i64 1
+  %tmp12837 = getelementptr inbounds float* %tmp12836, i64 1
+  %tmp12838 = getelementptr inbounds float* %tmp12837, i64 1
+  %tmp12839 = getelementptr inbounds float* %tmp12838, i64 1
+  %tmp12840 = getelementptr inbounds float* %tmp12839, i64 1
+  %tmp12841 = getelementptr inbounds float* %tmp12840, i64 1
+  %tmp12842 = getelementptr inbounds float* %tmp12841, i64 1
+  %tmp12843 = getelementptr inbounds float* %tmp12842, i64 1
+  %tmp12844 = getelementptr inbounds float* %tmp12843, i64 1
+  %tmp12845 = getelementptr inbounds float* %tmp12844, i64 1
+  %tmp12846 = getelementptr inbounds float* %tmp12845, i64 1
+  %tmp12847 = getelementptr inbounds float* %tmp12846, i64 1
+  %tmp12848 = getelementptr inbounds float* %tmp12847, i64 1
+  %tmp12849 = getelementptr inbounds float* %tmp12848, i64 1
+  %tmp12850 = getelementptr inbounds float* %tmp12849, i64 1
+  %tmp12851 = getelementptr inbounds float* %tmp12850, i64 1
+  %tmp12852 = getelementptr inbounds float* %tmp12851, i64 1
+  %tmp12853 = getelementptr inbounds float* %tmp12852, i64 1
+  %tmp12854 = getelementptr inbounds float* %tmp12853, i64 1
+  %tmp12855 = getelementptr inbounds float* %tmp12854, i64 1
+  %tmp12856 = getelementptr inbounds float* %tmp12855, i64 1
+  %tmp12857 = getelementptr inbounds float* %tmp12856, i64 1
+  %tmp12858 = getelementptr inbounds float* %tmp12857, i64 1
+  %tmp12859 = getelementptr inbounds float* %tmp12858, i64 1
+  %tmp12860 = getelementptr inbounds float* %tmp12859, i64 1
+  %tmp12861 = getelementptr inbounds float* %tmp12860, i64 1
+  %tmp12862 = getelementptr inbounds float* %tmp12861, i64 1
+  %tmp12863 = getelementptr inbounds float* %tmp12862, i64 1
+  %tmp12864 = getelementptr inbounds float* %tmp12863, i64 1
+  %tmp12865 = getelementptr inbounds float* %tmp12864, i64 1
+  %tmp12866 = getelementptr inbounds float* %tmp12865, i64 1
+  %tmp12867 = getelementptr inbounds float* %tmp12866, i64 1
+  %tmp12868 = getelementptr inbounds float* %tmp12867, i64 1
+  %tmp12869 = getelementptr inbounds float* %tmp12868, i64 1
+  %tmp12870 = getelementptr inbounds float* %tmp12869, i64 1
+  %tmp12871 = getelementptr inbounds float* %tmp12870, i64 1
+  %tmp12872 = getelementptr inbounds float* %tmp12871, i64 1
+  %tmp12873 = getelementptr inbounds float* %tmp12872, i64 1
+  %tmp12874 = getelementptr inbounds float* %tmp12873, i64 1
+  %tmp12875 = getelementptr inbounds float* %tmp12874, i64 1
+  %tmp12876 = getelementptr inbounds float* %tmp12875, i64 1
+  %tmp12877 = getelementptr inbounds float* %tmp12876, i64 1
+  %tmp12878 = getelementptr inbounds float* %tmp12877, i64 1
+  %tmp12879 = getelementptr inbounds float* %tmp12878, i64 1
+  %tmp12880 = getelementptr inbounds float* %tmp12879, i64 1
+  %tmp12881 = getelementptr inbounds float* %tmp12880, i64 1
+  %tmp12882 = getelementptr inbounds float* %tmp12881, i64 1
+  %tmp12883 = getelementptr inbounds float* %tmp12882, i64 1
+  %tmp12884 = getelementptr inbounds float* %tmp12883, i64 1
+  %tmp12885 = getelementptr inbounds float* %tmp12884, i64 1
+  %tmp12886 = getelementptr inbounds float* %tmp12885, i64 1
+  %tmp12887 = getelementptr inbounds float* %tmp12886, i64 1
+  %tmp12888 = getelementptr inbounds float* %tmp12887, i64 1
+  %tmp12889 = getelementptr inbounds float* %tmp12888, i64 1
+  %tmp12890 = getelementptr inbounds float* %tmp12889, i64 1
+  %tmp12891 = getelementptr inbounds float* %tmp12890, i64 1
+  %tmp12892 = getelementptr inbounds float* %tmp12891, i64 1
+  %tmp12893 = getelementptr inbounds float* %tmp12892, i64 1
+  %tmp12894 = getelementptr inbounds float* %tmp12893, i64 1
+  %tmp12895 = getelementptr inbounds float* %tmp12894, i64 1
+  %tmp12896 = getelementptr inbounds float* %tmp12895, i64 1
+  %tmp12897 = getelementptr inbounds float* %tmp12896, i64 1
+  %tmp12898 = getelementptr inbounds float* %tmp12897, i64 1
+  %tmp12899 = getelementptr inbounds float* %tmp12898, i64 1
+  %tmp12900 = getelementptr inbounds float* %tmp12899, i64 1
+  %tmp12901 = getelementptr inbounds float* %tmp12900, i64 1
+  %tmp12902 = getelementptr inbounds float* %tmp12901, i64 1
+  %tmp12903 = getelementptr inbounds float* %tmp12902, i64 1
+  %tmp12904 = getelementptr inbounds float* %tmp12903, i64 1
+  %tmp12905 = getelementptr inbounds float* %tmp12904, i64 1
+  %tmp12906 = getelementptr inbounds float* %tmp12905, i64 1
+  %tmp12907 = getelementptr inbounds float* %tmp12906, i64 1
+  %tmp12908 = getelementptr inbounds float* %tmp12907, i64 1
+  %tmp12909 = getelementptr inbounds float* %tmp12908, i64 1
+  %tmp12910 = getelementptr inbounds float* %tmp12909, i64 1
+  %tmp12911 = getelementptr inbounds float* %tmp12910, i64 1
+  %tmp12912 = getelementptr inbounds float* %tmp12911, i64 1
+  %tmp12913 = getelementptr inbounds float* %tmp12912, i64 1
+  %tmp12914 = getelementptr inbounds float* %tmp12913, i64 1
+  %tmp12915 = getelementptr inbounds float* %tmp12914, i64 1
+  %tmp12916 = getelementptr inbounds float* %tmp12915, i64 1
+  %tmp12917 = getelementptr inbounds float* %tmp12916, i64 1
+  %tmp12918 = getelementptr inbounds float* %tmp12917, i64 1
+  %tmp12919 = getelementptr inbounds float* %tmp12918, i64 1
+  %tmp12920 = getelementptr inbounds float* %tmp12919, i64 1
+  %tmp12921 = getelementptr inbounds float* %tmp12920, i64 1
+  %tmp12922 = getelementptr inbounds float* %tmp12921, i64 1
+  %tmp12923 = getelementptr inbounds float* %tmp12922, i64 1
+  %tmp12924 = getelementptr inbounds float* %tmp12923, i64 1
+  %tmp12925 = getelementptr inbounds float* %tmp12924, i64 1
+  %tmp12926 = getelementptr inbounds float* %tmp12925, i64 1
+  %tmp12927 = getelementptr inbounds float* %tmp12926, i64 1
+  %tmp12928 = getelementptr inbounds float* %tmp12927, i64 1
+  %tmp12929 = getelementptr inbounds float* %tmp12928, i64 1
+  %tmp12930 = getelementptr inbounds float* %tmp12929, i64 1
+  %tmp12931 = getelementptr inbounds float* %tmp12930, i64 1
+  %tmp12932 = getelementptr inbounds float* %tmp12931, i64 1
+  %tmp12933 = getelementptr inbounds float* %tmp12932, i64 1
+  %tmp12934 = getelementptr inbounds float* %tmp12933, i64 1
+  %tmp12935 = getelementptr inbounds float* %tmp12934, i64 1
+  %tmp12936 = getelementptr inbounds float* %tmp12935, i64 1
+  %tmp12937 = getelementptr inbounds float* %tmp12936, i64 1
+  %tmp12938 = getelementptr inbounds float* %tmp12937, i64 1
+  %tmp12939 = getelementptr inbounds float* %tmp12938, i64 1
+  %tmp12940 = getelementptr inbounds float* %tmp12939, i64 1
+  %tmp12941 = getelementptr inbounds float* %tmp12940, i64 1
+  %tmp12942 = getelementptr inbounds float* %tmp12941, i64 1
+  %tmp12943 = getelementptr inbounds float* %tmp12942, i64 1
+  %tmp12944 = getelementptr inbounds float* %tmp12943, i64 1
+  %tmp12945 = getelementptr inbounds float* %tmp12944, i64 1
+  %tmp12946 = getelementptr inbounds float* %tmp12945, i64 1
+  %tmp12947 = getelementptr inbounds float* %tmp12946, i64 1
+  %tmp12948 = getelementptr inbounds float* %tmp12947, i64 1
+  %tmp12949 = getelementptr inbounds float* %tmp12948, i64 1
+  %tmp12950 = getelementptr inbounds float* %tmp12949, i64 1
+  %tmp12951 = getelementptr inbounds float* %tmp12950, i64 1
+  %tmp12952 = getelementptr inbounds float* %tmp12951, i64 1
+  %tmp12953 = getelementptr inbounds float* %tmp12952, i64 1
+  %tmp12954 = getelementptr inbounds float* %tmp12953, i64 1
+  %tmp12955 = getelementptr inbounds float* %tmp12954, i64 1
+  %tmp12956 = getelementptr inbounds float* %tmp12955, i64 1
+  %tmp12957 = getelementptr inbounds float* %tmp12956, i64 1
+  %tmp12958 = getelementptr inbounds float* %tmp12957, i64 1
+  %tmp12959 = getelementptr inbounds float* %tmp12958, i64 1
+  %tmp12960 = getelementptr inbounds float* %tmp12959, i64 1
+  %tmp12961 = getelementptr inbounds float* %tmp12960, i64 1
+  %tmp12962 = getelementptr inbounds float* %tmp12961, i64 1
+  %tmp12963 = getelementptr inbounds float* %tmp12962, i64 1
+  %tmp12964 = getelementptr inbounds float* %tmp12963, i64 1
+  %tmp12965 = getelementptr inbounds float* %tmp12964, i64 1
+  %tmp12966 = getelementptr inbounds float* %tmp12965, i64 1
+  %tmp12967 = getelementptr inbounds float* %tmp12966, i64 1
+  %tmp12968 = getelementptr inbounds float* %tmp12967, i64 1
+  %tmp12969 = getelementptr inbounds float* %tmp12968, i64 1
+  %tmp12970 = getelementptr inbounds float* %tmp12969, i64 1
+  %tmp12971 = getelementptr inbounds float* %tmp12970, i64 1
+  %tmp12972 = getelementptr inbounds float* %tmp12971, i64 1
+  %tmp12973 = getelementptr inbounds float* %tmp12972, i64 1
+  %tmp12974 = getelementptr inbounds float* %tmp12973, i64 1
+  %tmp12975 = getelementptr inbounds float* %tmp12974, i64 1
+  %tmp12976 = getelementptr inbounds float* %tmp12975, i64 1
+  %tmp12977 = getelementptr inbounds float* %tmp12976, i64 1
+  %tmp12978 = getelementptr inbounds float* %tmp12977, i64 1
+  %tmp12979 = getelementptr inbounds float* %tmp12978, i64 1
+  %tmp12980 = getelementptr inbounds float* %tmp12979, i64 1
+  %tmp12981 = getelementptr inbounds float* %tmp12980, i64 1
+  %tmp12982 = getelementptr inbounds float* %tmp12981, i64 1
+  %tmp12983 = getelementptr inbounds float* %tmp12982, i64 1
+  %tmp12984 = getelementptr inbounds float* %tmp12983, i64 1
+  %tmp12985 = getelementptr inbounds float* %tmp12984, i64 1
+  %tmp12986 = getelementptr inbounds float* %tmp12985, i64 1
+  %tmp12987 = getelementptr inbounds float* %tmp12986, i64 1
+  %tmp12988 = getelementptr inbounds float* %tmp12987, i64 1
+  %tmp12989 = getelementptr inbounds float* %tmp12988, i64 1
+  %tmp12990 = getelementptr inbounds float* %tmp12989, i64 1
+  %tmp12991 = getelementptr inbounds float* %tmp12990, i64 1
+  %tmp12992 = getelementptr inbounds float* %tmp12991, i64 1
+  %tmp12993 = getelementptr inbounds float* %tmp12992, i64 1
+  %tmp12994 = getelementptr inbounds float* %tmp12993, i64 1
+  %tmp12995 = getelementptr inbounds float* %tmp12994, i64 1
+  %tmp12996 = getelementptr inbounds float* %tmp12995, i64 1
+  %tmp12997 = getelementptr inbounds float* %tmp12996, i64 1
+  %tmp12998 = getelementptr inbounds float* %tmp12997, i64 1
+  %tmp12999 = getelementptr inbounds float* %tmp12998, i64 1
+  %tmp13000 = getelementptr inbounds float* %tmp12999, i64 1
+  %tmp13001 = getelementptr inbounds float* %tmp13000, i64 1
+  %tmp13002 = getelementptr inbounds float* %tmp13001, i64 1
+  %tmp13003 = getelementptr inbounds float* %tmp13002, i64 1
+  %tmp13004 = getelementptr inbounds float* %tmp13003, i64 1
+  %tmp13005 = getelementptr inbounds float* %tmp13004, i64 1
+  %tmp13006 = getelementptr inbounds float* %tmp13005, i64 1
+  %tmp13007 = getelementptr inbounds float* %tmp13006, i64 1
+  %tmp13008 = getelementptr inbounds float* %tmp13007, i64 1
+  %tmp13009 = getelementptr inbounds float* %tmp13008, i64 1
+  %tmp13010 = getelementptr inbounds float* %tmp13009, i64 1
+  %tmp13011 = getelementptr inbounds float* %tmp13010, i64 1
+  %tmp13012 = getelementptr inbounds float* %tmp13011, i64 1
+  %tmp13013 = getelementptr inbounds float* %tmp13012, i64 1
+  %tmp13014 = getelementptr inbounds float* %tmp13013, i64 1
+  %tmp13015 = getelementptr inbounds float* %tmp13014, i64 1
+  %tmp13016 = getelementptr inbounds float* %tmp13015, i64 1
+  %tmp13017 = getelementptr inbounds float* %tmp13016, i64 1
+  %tmp13018 = getelementptr inbounds float* %tmp13017, i64 1
+  %tmp13019 = getelementptr inbounds float* %tmp13018, i64 1
+  %tmp13020 = getelementptr inbounds float* %tmp13019, i64 1
+  %tmp13021 = getelementptr inbounds float* %tmp13020, i64 1
+  %tmp13022 = getelementptr inbounds float* %tmp13021, i64 1
+  %tmp13023 = getelementptr inbounds float* %tmp13022, i64 1
+  %tmp13024 = getelementptr inbounds float* %tmp13023, i64 1
+  %tmp13025 = getelementptr inbounds float* %tmp13024, i64 1
+  %tmp13026 = getelementptr inbounds float* %tmp13025, i64 1
+  %tmp13027 = getelementptr inbounds float* %tmp13026, i64 1
+  %tmp13028 = getelementptr inbounds float* %tmp13027, i64 1
+  %tmp13029 = getelementptr inbounds float* %tmp13028, i64 1
+  %tmp13030 = getelementptr inbounds float* %tmp13029, i64 1
+  %tmp13031 = getelementptr inbounds float* %tmp13030, i64 1
+  %tmp13032 = getelementptr inbounds float* %tmp13031, i64 1
+  %tmp13033 = getelementptr inbounds float* %tmp13032, i64 1
+  %tmp13034 = getelementptr inbounds float* %tmp13033, i64 1
+  %tmp13035 = getelementptr inbounds float* %tmp13034, i64 1
+  %tmp13036 = getelementptr inbounds float* %tmp13035, i64 1
+  %tmp13037 = getelementptr inbounds float* %tmp13036, i64 1
+  %tmp13038 = getelementptr inbounds float* %tmp13037, i64 1
+  %tmp13039 = getelementptr inbounds float* %tmp13038, i64 1
+  %tmp13040 = getelementptr inbounds float* %tmp13039, i64 1
+  %tmp13041 = getelementptr inbounds float* %tmp13040, i64 1
+  %tmp13042 = getelementptr inbounds float* %tmp13041, i64 1
+  %tmp13043 = getelementptr inbounds float* %tmp13042, i64 1
+  %tmp13044 = getelementptr inbounds float* %tmp13043, i64 1
+  %tmp13045 = getelementptr inbounds float* %tmp13044, i64 1
+  %tmp13046 = getelementptr inbounds float* %tmp13045, i64 1
+  %tmp13047 = getelementptr inbounds float* %tmp13046, i64 1
+  %tmp13048 = getelementptr inbounds float* %tmp13047, i64 1
+  %tmp13049 = getelementptr inbounds float* %tmp13048, i64 1
+  %tmp13050 = getelementptr inbounds float* %tmp13049, i64 1
+  %tmp13051 = getelementptr inbounds float* %tmp13050, i64 1
+  %tmp13052 = getelementptr inbounds float* %tmp13051, i64 1
+  %tmp13053 = getelementptr inbounds float* %tmp13052, i64 1
+  %tmp13054 = getelementptr inbounds float* %tmp13053, i64 1
+  %tmp13055 = getelementptr inbounds float* %tmp13054, i64 1
+  %tmp13056 = getelementptr inbounds float* %tmp13055, i64 1
+  %tmp13057 = getelementptr inbounds float* %tmp13056, i64 1
+  %tmp13058 = getelementptr inbounds float* %tmp13057, i64 1
+  %tmp13059 = getelementptr inbounds float* %tmp13058, i64 1
+  %tmp13060 = getelementptr inbounds float* %tmp13059, i64 1
+  %tmp13061 = getelementptr inbounds float* %tmp13060, i64 1
+  %tmp13062 = getelementptr inbounds float* %tmp13061, i64 1
+  %tmp13063 = getelementptr inbounds float* %tmp13062, i64 1
+  %tmp13064 = getelementptr inbounds float* %tmp13063, i64 1
+  %tmp13065 = getelementptr inbounds float* %tmp13064, i64 1
+  %tmp13066 = getelementptr inbounds float* %tmp13065, i64 1
+  %tmp13067 = getelementptr inbounds float* %tmp13066, i64 1
+  %tmp13068 = getelementptr inbounds float* %tmp13067, i64 1
+  %tmp13069 = getelementptr inbounds float* %tmp13068, i64 1
+  %tmp13070 = getelementptr inbounds float* %tmp13069, i64 1
+  %tmp13071 = getelementptr inbounds float* %tmp13070, i64 1
+  %tmp13072 = getelementptr inbounds float* %tmp13071, i64 1
+  %tmp13073 = getelementptr inbounds float* %tmp13072, i64 1
+  %tmp13074 = getelementptr inbounds float* %tmp13073, i64 1
+  %tmp13075 = getelementptr inbounds float* %tmp13074, i64 1
+  %tmp13076 = getelementptr inbounds float* %tmp13075, i64 1
+  %tmp13077 = getelementptr inbounds float* %tmp13076, i64 1
+  %tmp13078 = getelementptr inbounds float* %tmp13077, i64 1
+  %tmp13079 = getelementptr inbounds float* %tmp13078, i64 1
+  %tmp13080 = getelementptr inbounds float* %tmp13079, i64 1
+  %tmp13081 = getelementptr inbounds float* %tmp13080, i64 1
+  %tmp13082 = getelementptr inbounds float* %tmp13081, i64 1
+  %tmp13083 = getelementptr inbounds float* %tmp13082, i64 1
+  %tmp13084 = getelementptr inbounds float* %tmp13083, i64 1
+  %tmp13085 = getelementptr inbounds float* %tmp13084, i64 1
+  %tmp13086 = getelementptr inbounds float* %tmp13085, i64 1
+  %tmp13087 = getelementptr inbounds float* %tmp13086, i64 1
+  %tmp13088 = getelementptr inbounds float* %tmp13087, i64 1
+  %tmp13089 = getelementptr inbounds float* %tmp13088, i64 1
+  %tmp13090 = getelementptr inbounds float* %tmp13089, i64 1
+  %tmp13091 = getelementptr inbounds float* %tmp13090, i64 1
+  %tmp13092 = getelementptr inbounds float* %tmp13091, i64 1
+  %tmp13093 = getelementptr inbounds float* %tmp13092, i64 1
+  %tmp13094 = getelementptr inbounds float* %tmp13093, i64 1
+  %tmp13095 = getelementptr inbounds float* %tmp13094, i64 1
+  %tmp13096 = getelementptr inbounds float* %tmp13095, i64 1
+  %tmp13097 = getelementptr inbounds float* %tmp13096, i64 1
+  %tmp13098 = getelementptr inbounds float* %tmp13097, i64 1
+  %tmp13099 = getelementptr inbounds float* %tmp13098, i64 1
+  %tmp13100 = getelementptr inbounds float* %tmp13099, i64 1
+  %tmp13101 = getelementptr inbounds float* %tmp13100, i64 1
+  %tmp13102 = getelementptr inbounds float* %tmp13101, i64 1
+  %tmp13103 = getelementptr inbounds float* %tmp13102, i64 1
+  %tmp13104 = getelementptr inbounds float* %tmp13103, i64 1
+  %tmp13105 = getelementptr inbounds float* %tmp13104, i64 1
+  %tmp13106 = getelementptr inbounds float* %tmp13105, i64 1
+  %tmp13107 = getelementptr inbounds float* %tmp13106, i64 1
+  %tmp13108 = getelementptr inbounds float* %tmp13107, i64 1
+  %tmp13109 = getelementptr inbounds float* %tmp13108, i64 1
+  %tmp13110 = getelementptr inbounds float* %tmp13109, i64 1
+  %tmp13111 = getelementptr inbounds float* %tmp13110, i64 1
+  %tmp13112 = getelementptr inbounds float* %tmp13111, i64 1
+  %tmp13113 = getelementptr inbounds float* %tmp13112, i64 1
+  %tmp13114 = getelementptr inbounds float* %tmp13113, i64 1
+  %tmp13115 = getelementptr inbounds float* %tmp13114, i64 1
+  %tmp13116 = getelementptr inbounds float* %tmp13115, i64 1
+  %tmp13117 = getelementptr inbounds float* %tmp13116, i64 1
+  %tmp13118 = getelementptr inbounds float* %tmp13117, i64 1
+  %tmp13119 = getelementptr inbounds float* %tmp13118, i64 1
+  %tmp13120 = getelementptr inbounds float* %tmp13119, i64 1
+  %tmp13121 = getelementptr inbounds float* %tmp13120, i64 1
+  %tmp13122 = getelementptr inbounds float* %tmp13121, i64 1
+  %tmp13123 = getelementptr inbounds float* %tmp13122, i64 1
+  %tmp13124 = getelementptr inbounds float* %tmp13123, i64 1
+  %tmp13125 = getelementptr inbounds float* %tmp13124, i64 1
+  %tmp13126 = getelementptr inbounds float* %tmp13125, i64 1
+  %tmp13127 = getelementptr inbounds float* %tmp13126, i64 1
+  %tmp13128 = getelementptr inbounds float* %tmp13127, i64 1
+  %tmp13129 = getelementptr inbounds float* %tmp13128, i64 1
+  %tmp13130 = getelementptr inbounds float* %tmp13129, i64 1
+  %tmp13131 = getelementptr inbounds float* %tmp13130, i64 1
+  %tmp13132 = getelementptr inbounds float* %tmp13131, i64 1
+  %tmp13133 = getelementptr inbounds float* %tmp13132, i64 1
+  %tmp13134 = getelementptr inbounds float* %tmp13133, i64 1
+  %tmp13135 = getelementptr inbounds float* %tmp13134, i64 1
+  %tmp13136 = getelementptr inbounds float* %tmp13135, i64 1
+  %tmp13137 = getelementptr inbounds float* %tmp13136, i64 1
+  %tmp13138 = getelementptr inbounds float* %tmp13137, i64 1
+  %tmp13139 = getelementptr inbounds float* %tmp13138, i64 1
+  %tmp13140 = getelementptr inbounds float* %tmp13139, i64 1
+  %tmp13141 = getelementptr inbounds float* %tmp13140, i64 1
+  %tmp13142 = getelementptr inbounds float* %tmp13141, i64 1
+  %tmp13143 = getelementptr inbounds float* %tmp13142, i64 1
+  %tmp13144 = getelementptr inbounds float* %tmp13143, i64 1
+  %tmp13145 = getelementptr inbounds float* %tmp13144, i64 1
+  %tmp13146 = getelementptr inbounds float* %tmp13145, i64 1
+  %tmp13147 = getelementptr inbounds float* %tmp13146, i64 1
+  %tmp13148 = getelementptr inbounds float* %tmp13147, i64 1
+  %tmp13149 = getelementptr inbounds float* %tmp13148, i64 1
+  %tmp13150 = getelementptr inbounds float* %tmp13149, i64 1
+  %tmp13151 = getelementptr inbounds float* %tmp13150, i64 1
+  %tmp13152 = getelementptr inbounds float* %tmp13151, i64 1
+  %tmp13153 = getelementptr inbounds float* %tmp13152, i64 1
+  %tmp13154 = getelementptr inbounds float* %tmp13153, i64 1
+  %tmp13155 = getelementptr inbounds float* %tmp13154, i64 1
+  %tmp13156 = getelementptr inbounds float* %tmp13155, i64 1
+  %tmp13157 = getelementptr inbounds float* %tmp13156, i64 1
+  %tmp13158 = getelementptr inbounds float* %tmp13157, i64 1
+  %tmp13159 = getelementptr inbounds float* %tmp13158, i64 1
+  %tmp13160 = getelementptr inbounds float* %tmp13159, i64 1
+  %tmp13161 = getelementptr inbounds float* %tmp13160, i64 1
+  %tmp13162 = getelementptr inbounds float* %tmp13161, i64 1
+  %tmp13163 = getelementptr inbounds float* %tmp13162, i64 1
+  %tmp13164 = getelementptr inbounds float* %tmp13163, i64 1
+  %tmp13165 = getelementptr inbounds float* %tmp13164, i64 1
+  %tmp13166 = getelementptr inbounds float* %tmp13165, i64 1
+  %tmp13167 = getelementptr inbounds float* %tmp13166, i64 1
+  %tmp13168 = getelementptr inbounds float* %tmp13167, i64 1
+  %tmp13169 = getelementptr inbounds float* %tmp13168, i64 1
+  %tmp13170 = getelementptr inbounds float* %tmp13169, i64 1
+  %tmp13171 = getelementptr inbounds float* %tmp13170, i64 1
+  %tmp13172 = getelementptr inbounds float* %tmp13171, i64 1
+  %tmp13173 = getelementptr inbounds float* %tmp13172, i64 1
+  %tmp13174 = getelementptr inbounds float* %tmp13173, i64 1
+  %tmp13175 = getelementptr inbounds float* %tmp13174, i64 1
+  %tmp13176 = getelementptr inbounds float* %tmp13175, i64 1
+  %tmp13177 = getelementptr inbounds float* %tmp13176, i64 1
+  %tmp13178 = getelementptr inbounds float* %tmp13177, i64 1
+  %tmp13179 = getelementptr inbounds float* %tmp13178, i64 1
+  %tmp13180 = getelementptr inbounds float* %tmp13179, i64 1
+  %tmp13181 = getelementptr inbounds float* %tmp13180, i64 1
+  %tmp13182 = getelementptr inbounds float* %tmp13181, i64 1
+  %tmp13183 = getelementptr inbounds float* %tmp13182, i64 1
+  %tmp13184 = getelementptr inbounds float* %tmp13183, i64 1
+  %tmp13185 = getelementptr inbounds float* %tmp13184, i64 1
+  %tmp13186 = getelementptr inbounds float* %tmp13185, i64 1
+  %tmp13187 = getelementptr inbounds float* %tmp13186, i64 1
+  %tmp13188 = getelementptr inbounds float* %tmp13187, i64 1
+  %tmp13189 = getelementptr inbounds float* %tmp13188, i64 1
+  %tmp13190 = getelementptr inbounds float* %tmp13189, i64 1
+  %tmp13191 = getelementptr inbounds float* %tmp13190, i64 1
+  %tmp13192 = getelementptr inbounds float* %tmp13191, i64 1
+  %tmp13193 = getelementptr inbounds float* %tmp13192, i64 1
+  %tmp13194 = getelementptr inbounds float* %tmp13193, i64 1
+  %tmp13195 = getelementptr inbounds float* %tmp13194, i64 1
+  %tmp13196 = getelementptr inbounds float* %tmp13195, i64 1
+  %tmp13197 = getelementptr inbounds float* %tmp13196, i64 1
+  %tmp13198 = getelementptr inbounds float* %tmp13197, i64 1
+  %tmp13199 = getelementptr inbounds float* %tmp13198, i64 1
+  %tmp13200 = getelementptr inbounds float* %tmp13199, i64 1
+  %tmp13201 = getelementptr inbounds float* %tmp13200, i64 1
+  %tmp13202 = getelementptr inbounds float* %tmp13201, i64 1
+  %tmp13203 = getelementptr inbounds float* %tmp13202, i64 1
+  %tmp13204 = getelementptr inbounds float* %tmp13203, i64 1
+  %tmp13205 = getelementptr inbounds float* %tmp13204, i64 1
+  %tmp13206 = getelementptr inbounds float* %tmp13205, i64 1
+  %tmp13207 = getelementptr inbounds float* %tmp13206, i64 1
+  %tmp13208 = getelementptr inbounds float* %tmp13207, i64 1
+  %tmp13209 = getelementptr inbounds float* %tmp13208, i64 1
+  %tmp13210 = getelementptr inbounds float* %tmp13209, i64 1
+  %tmp13211 = getelementptr inbounds float* %tmp13210, i64 1
+  %tmp13212 = getelementptr inbounds float* %tmp13211, i64 1
+  %tmp13213 = getelementptr inbounds float* %tmp13212, i64 1
+  %tmp13214 = getelementptr inbounds float* %tmp13213, i64 1
+  %tmp13215 = getelementptr inbounds float* %tmp13214, i64 1
+  %tmp13216 = getelementptr inbounds float* %tmp13215, i64 1
+  %tmp13217 = getelementptr inbounds float* %tmp13216, i64 1
+  %tmp13218 = getelementptr inbounds float* %tmp13217, i64 1
+  %tmp13219 = getelementptr inbounds float* %tmp13218, i64 1
+  %tmp13220 = getelementptr inbounds float* %tmp13219, i64 1
+  %tmp13221 = getelementptr inbounds float* %tmp13220, i64 1
+  %tmp13222 = getelementptr inbounds float* %tmp13221, i64 1
+  %tmp13223 = getelementptr inbounds float* %tmp13222, i64 1
+  %tmp13224 = getelementptr inbounds float* %tmp13223, i64 1
+  %tmp13225 = getelementptr inbounds float* %tmp13224, i64 1
+  %tmp13226 = getelementptr inbounds float* %tmp13225, i64 1
+  %tmp13227 = getelementptr inbounds float* %tmp13226, i64 1
+  %tmp13228 = getelementptr inbounds float* %tmp13227, i64 1
+  %tmp13229 = getelementptr inbounds float* %tmp13228, i64 1
+  %tmp13230 = getelementptr inbounds float* %tmp13229, i64 1
+  %tmp13231 = getelementptr inbounds float* %tmp13230, i64 1
+  %tmp13232 = getelementptr inbounds float* %tmp13231, i64 1
+  %tmp13233 = getelementptr inbounds float* %tmp13232, i64 1
+  %tmp13234 = getelementptr inbounds float* %tmp13233, i64 1
+  %tmp13235 = getelementptr inbounds float* %tmp13234, i64 1
+  %tmp13236 = getelementptr inbounds float* %tmp13235, i64 1
+  %tmp13237 = getelementptr inbounds float* %tmp13236, i64 1
+  %tmp13238 = getelementptr inbounds float* %tmp13237, i64 1
+  %tmp13239 = getelementptr inbounds float* %tmp13238, i64 1
+  %tmp13240 = getelementptr inbounds float* %tmp13239, i64 1
+  %tmp13241 = getelementptr inbounds float* %tmp13240, i64 1
+  %tmp13242 = getelementptr inbounds float* %tmp13241, i64 1
+  %tmp13243 = getelementptr inbounds float* %tmp13242, i64 1
+  %tmp13244 = getelementptr inbounds float* %tmp13243, i64 1
+  %tmp13245 = getelementptr inbounds float* %tmp13244, i64 1
+  %tmp13246 = getelementptr inbounds float* %tmp13245, i64 1
+  %tmp13247 = getelementptr inbounds float* %tmp13246, i64 1
+  %tmp13248 = getelementptr inbounds float* %tmp13247, i64 1
+  %tmp13249 = getelementptr inbounds float* %tmp13248, i64 1
+  %tmp13250 = getelementptr inbounds float* %tmp13249, i64 1
+  %tmp13251 = getelementptr inbounds float* %tmp13250, i64 1
+  %tmp13252 = getelementptr inbounds float* %tmp13251, i64 1
+  %tmp13253 = getelementptr inbounds float* %tmp13252, i64 1
+  %tmp13254 = getelementptr inbounds float* %tmp13253, i64 1
+  %tmp13255 = getelementptr inbounds float* %tmp13254, i64 1
+  %tmp13256 = getelementptr inbounds float* %tmp13255, i64 1
+  %tmp13257 = getelementptr inbounds float* %tmp13256, i64 1
+  %tmp13258 = getelementptr inbounds float* %tmp13257, i64 1
+  %tmp13259 = getelementptr inbounds float* %tmp13258, i64 1
+  %tmp13260 = getelementptr inbounds float* %tmp13259, i64 1
+  %tmp13261 = getelementptr inbounds float* %tmp13260, i64 1
+  %tmp13262 = getelementptr inbounds float* %tmp13261, i64 1
+  %tmp13263 = getelementptr inbounds float* %tmp13262, i64 1
+  %tmp13264 = getelementptr inbounds float* %tmp13263, i64 1
+  %tmp13265 = getelementptr inbounds float* %tmp13264, i64 1
+  %tmp13266 = getelementptr inbounds float* %tmp13265, i64 1
+  %tmp13267 = getelementptr inbounds float* %tmp13266, i64 1
+  %tmp13268 = getelementptr inbounds float* %tmp13267, i64 1
+  %tmp13269 = getelementptr inbounds float* %tmp13268, i64 1
+  %tmp13270 = getelementptr inbounds float* %tmp13269, i64 1
+  %tmp13271 = getelementptr inbounds float* %tmp13270, i64 1
+  %tmp13272 = getelementptr inbounds float* %tmp13271, i64 1
+  %tmp13273 = getelementptr inbounds float* %tmp13272, i64 1
+  %tmp13274 = getelementptr inbounds float* %tmp13273, i64 1
+  %tmp13275 = getelementptr inbounds float* %tmp13274, i64 1
+  %tmp13276 = getelementptr inbounds float* %tmp13275, i64 1
+  %tmp13277 = getelementptr inbounds float* %tmp13276, i64 1
+  %tmp13278 = getelementptr inbounds float* %tmp13277, i64 1
+  %tmp13279 = getelementptr inbounds float* %tmp13278, i64 1
+  %tmp13280 = getelementptr inbounds float* %tmp13279, i64 1
+  %tmp13281 = getelementptr inbounds float* %tmp13280, i64 1
+  %tmp13282 = getelementptr inbounds float* %tmp13281, i64 1
+  %tmp13283 = getelementptr inbounds float* %tmp13282, i64 1
+  %tmp13284 = getelementptr inbounds float* %tmp13283, i64 1
+  %tmp13285 = getelementptr inbounds float* %tmp13284, i64 1
+  %tmp13286 = getelementptr inbounds float* %tmp13285, i64 1
+  %tmp13287 = getelementptr inbounds float* %tmp13286, i64 1
+  %tmp13288 = getelementptr inbounds float* %tmp13287, i64 1
+  %tmp13289 = getelementptr inbounds float* %tmp13288, i64 1
+  %tmp13290 = getelementptr inbounds float* %tmp13289, i64 1
+  %tmp13291 = getelementptr inbounds float* %tmp13290, i64 1
+  %tmp13292 = getelementptr inbounds float* %tmp13291, i64 1
+  %tmp13293 = getelementptr inbounds float* %tmp13292, i64 1
+  %tmp13294 = getelementptr inbounds float* %tmp13293, i64 1
+  %tmp13295 = getelementptr inbounds float* %tmp13294, i64 1
+  %tmp13296 = getelementptr inbounds float* %tmp13295, i64 1
+  %tmp13297 = getelementptr inbounds float* %tmp13296, i64 1
+  %tmp13298 = getelementptr inbounds float* %tmp13297, i64 1
+  %tmp13299 = getelementptr inbounds float* %tmp13298, i64 1
+  %tmp13300 = getelementptr inbounds float* %tmp13299, i64 1
+  %tmp13301 = getelementptr inbounds float* %tmp13300, i64 1
+  %tmp13302 = getelementptr inbounds float* %tmp13301, i64 1
+  %tmp13303 = getelementptr inbounds float* %tmp13302, i64 1
+  %tmp13304 = getelementptr inbounds float* %tmp13303, i64 1
+  %tmp13305 = getelementptr inbounds float* %tmp13304, i64 1
+  %tmp13306 = getelementptr inbounds float* %tmp13305, i64 1
+  %tmp13307 = getelementptr inbounds float* %tmp13306, i64 1
+  %tmp13308 = getelementptr inbounds float* %tmp13307, i64 1
+  %tmp13309 = getelementptr inbounds float* %tmp13308, i64 1
+  %tmp13310 = getelementptr inbounds float* %tmp13309, i64 1
+  %tmp13311 = getelementptr inbounds float* %tmp13310, i64 1
+  %tmp13312 = getelementptr inbounds float* %tmp13311, i64 1
+  %tmp13313 = getelementptr inbounds float* %tmp13312, i64 1
+  %tmp13314 = getelementptr inbounds float* %tmp13313, i64 1
+  %tmp13315 = getelementptr inbounds float* %tmp13314, i64 1
+  %tmp13316 = getelementptr inbounds float* %tmp13315, i64 1
+  %tmp13317 = getelementptr inbounds float* %tmp13316, i64 1
+  %tmp13318 = getelementptr inbounds float* %tmp13317, i64 1
+  %tmp13319 = getelementptr inbounds float* %tmp13318, i64 1
+  %tmp13320 = getelementptr inbounds float* %tmp13319, i64 1
+  %tmp13321 = getelementptr inbounds float* %tmp13320, i64 1
+  %tmp13322 = getelementptr inbounds float* %tmp13321, i64 1
+  %tmp13323 = getelementptr inbounds float* %tmp13322, i64 1
+  %tmp13324 = getelementptr inbounds float* %tmp13323, i64 1
+  %tmp13325 = getelementptr inbounds float* %tmp13324, i64 1
+  %tmp13326 = getelementptr inbounds float* %tmp13325, i64 1
+  %tmp13327 = getelementptr inbounds float* %tmp13326, i64 1
+  %tmp13328 = getelementptr inbounds float* %tmp13327, i64 1
+  %tmp13329 = getelementptr inbounds float* %tmp13328, i64 1
+  %tmp13330 = getelementptr inbounds float* %tmp13329, i64 1
+  %tmp13331 = getelementptr inbounds float* %tmp13330, i64 1
+  %tmp13332 = getelementptr inbounds float* %tmp13331, i64 1
+  %tmp13333 = getelementptr inbounds float* %tmp13332, i64 1
+  %tmp13334 = getelementptr inbounds float* %tmp13333, i64 1
+  %tmp13335 = getelementptr inbounds float* %tmp13334, i64 1
+  %tmp13336 = getelementptr inbounds float* %tmp13335, i64 1
+  %tmp13337 = getelementptr inbounds float* %tmp13336, i64 1
+  %tmp13338 = getelementptr inbounds float* %tmp13337, i64 1
+  %tmp13339 = getelementptr inbounds float* %tmp13338, i64 1
+  %tmp13340 = getelementptr inbounds float* %tmp13339, i64 1
+  %tmp13341 = getelementptr inbounds float* %tmp13340, i64 1
+  %tmp13342 = getelementptr inbounds float* %tmp13341, i64 1
+  %tmp13343 = getelementptr inbounds float* %tmp13342, i64 1
+  %tmp13344 = getelementptr inbounds float* %tmp13343, i64 1
+  %tmp13345 = getelementptr inbounds float* %tmp13344, i64 1
+  %tmp13346 = getelementptr inbounds float* %tmp13345, i64 1
+  %tmp13347 = getelementptr inbounds float* %tmp13346, i64 1
+  %tmp13348 = getelementptr inbounds float* %tmp13347, i64 1
+  %tmp13349 = getelementptr inbounds float* %tmp13348, i64 1
+  %tmp13350 = getelementptr inbounds float* %tmp13349, i64 1
+  %tmp13351 = getelementptr inbounds float* %tmp13350, i64 1
+  %tmp13352 = getelementptr inbounds float* %tmp13351, i64 1
+  %tmp13353 = getelementptr inbounds float* %tmp13352, i64 1
+  %tmp13354 = getelementptr inbounds float* %tmp13353, i64 1
+  %tmp13355 = getelementptr inbounds float* %tmp13354, i64 1
+  %tmp13356 = getelementptr inbounds float* %tmp13355, i64 1
+  %tmp13357 = getelementptr inbounds float* %tmp13356, i64 1
+  %tmp13358 = getelementptr inbounds float* %tmp13357, i64 1
+  %tmp13359 = getelementptr inbounds float* %tmp13358, i64 1
+  %tmp13360 = getelementptr inbounds float* %tmp13359, i64 1
+  %tmp13361 = getelementptr inbounds float* %tmp13360, i64 1
+  %tmp13362 = getelementptr inbounds float* %tmp13361, i64 1
+  %tmp13363 = getelementptr inbounds float* %tmp13362, i64 1
+  %tmp13364 = getelementptr inbounds float* %tmp13363, i64 1
+  %tmp13365 = getelementptr inbounds float* %tmp13364, i64 1
+  %tmp13366 = getelementptr inbounds float* %tmp13365, i64 1
+  %tmp13367 = getelementptr inbounds float* %tmp13366, i64 1
+  %tmp13368 = getelementptr inbounds float* %tmp13367, i64 1
+  %tmp13369 = getelementptr inbounds float* %tmp13368, i64 1
+  %tmp13370 = getelementptr inbounds float* %tmp13369, i64 1
+  %tmp13371 = getelementptr inbounds float* %tmp13370, i64 1
+  %tmp13372 = getelementptr inbounds float* %tmp13371, i64 1
+  %tmp13373 = getelementptr inbounds float* %tmp13372, i64 1
+  %tmp13374 = getelementptr inbounds float* %tmp13373, i64 1
+  %tmp13375 = getelementptr inbounds float* %tmp13374, i64 1
+  %tmp13376 = getelementptr inbounds float* %tmp13375, i64 1
+  %tmp13377 = getelementptr inbounds float* %tmp13376, i64 1
+  %tmp13378 = getelementptr inbounds float* %tmp13377, i64 1
+  %tmp13379 = getelementptr inbounds float* %tmp13378, i64 1
+  %tmp13380 = getelementptr inbounds float* %tmp13379, i64 1
+  %tmp13381 = getelementptr inbounds float* %tmp13380, i64 1
+  %tmp13382 = getelementptr inbounds float* %tmp13381, i64 1
+  %tmp13383 = getelementptr inbounds float* %tmp13382, i64 1
+  %tmp13384 = getelementptr inbounds float* %tmp13383, i64 1
+  %tmp13385 = getelementptr inbounds float* %tmp13384, i64 1
+  %tmp13386 = getelementptr inbounds float* %tmp13385, i64 1
+  %tmp13387 = getelementptr inbounds float* %tmp13386, i64 1
+  %tmp13388 = getelementptr inbounds float* %tmp13387, i64 1
+  %tmp13389 = getelementptr inbounds float* %tmp13388, i64 1
+  %tmp13390 = getelementptr inbounds float* %tmp13389, i64 1
+  %tmp13391 = getelementptr inbounds float* %tmp13390, i64 1
+  %tmp13392 = getelementptr inbounds float* %tmp13391, i64 1
+  %tmp13393 = getelementptr inbounds float* %tmp13392, i64 1
+  %tmp13394 = getelementptr inbounds float* %tmp13393, i64 1
+  %tmp13395 = getelementptr inbounds float* %tmp13394, i64 1
+  %tmp13396 = getelementptr inbounds float* %tmp13395, i64 1
+  %tmp13397 = getelementptr inbounds float* %tmp13396, i64 1
+  %tmp13398 = getelementptr inbounds float* %tmp13397, i64 1
+  %tmp13399 = getelementptr inbounds float* %tmp13398, i64 1
+  %tmp13400 = getelementptr inbounds float* %tmp13399, i64 1
+  %tmp13401 = getelementptr inbounds float* %tmp13400, i64 1
+  %tmp13402 = getelementptr inbounds float* %tmp13401, i64 1
+  %tmp13403 = getelementptr inbounds float* %tmp13402, i64 1
+  %tmp13404 = getelementptr inbounds float* %tmp13403, i64 1
+  %tmp13405 = getelementptr inbounds float* %tmp13404, i64 1
+  %tmp13406 = getelementptr inbounds float* %tmp13405, i64 1
+  %tmp13407 = getelementptr inbounds float* %tmp13406, i64 1
+  %tmp13408 = getelementptr inbounds float* %tmp13407, i64 1
+  %tmp13409 = getelementptr inbounds float* %tmp13408, i64 1
+  %tmp13410 = getelementptr inbounds float* %tmp13409, i64 1
+  %tmp13411 = getelementptr inbounds float* %tmp13410, i64 1
+  %tmp13412 = getelementptr inbounds float* %tmp13411, i64 1
+  %tmp13413 = getelementptr inbounds float* %tmp13412, i64 1
+  %tmp13414 = getelementptr inbounds float* %tmp13413, i64 1
+  %tmp13415 = getelementptr inbounds float* %tmp13414, i64 1
+  %tmp13416 = getelementptr inbounds float* %tmp13415, i64 1
+  %tmp13417 = getelementptr inbounds float* %tmp13416, i64 1
+  %tmp13418 = getelementptr inbounds float* %tmp13417, i64 1
+  %tmp13419 = getelementptr inbounds float* %tmp13418, i64 1
+  %tmp13420 = getelementptr inbounds float* %tmp13419, i64 1
+  %tmp13421 = getelementptr inbounds float* %tmp13420, i64 1
+  %tmp13422 = getelementptr inbounds float* %tmp13421, i64 1
+  %tmp13423 = getelementptr inbounds float* %tmp13422, i64 1
+  %tmp13424 = getelementptr inbounds float* %tmp13423, i64 1
+  %tmp13425 = getelementptr inbounds float* %tmp13424, i64 1
+  %tmp13426 = getelementptr inbounds float* %tmp13425, i64 1
+  %tmp13427 = getelementptr inbounds float* %tmp13426, i64 1
+  %tmp13428 = getelementptr inbounds float* %tmp13427, i64 1
+  %tmp13429 = getelementptr inbounds float* %tmp13428, i64 1
+  %tmp13430 = getelementptr inbounds float* %tmp13429, i64 1
+  %tmp13431 = getelementptr inbounds float* %tmp13430, i64 1
+  %tmp13432 = getelementptr inbounds float* %tmp13431, i64 1
+  %tmp13433 = getelementptr inbounds float* %tmp13432, i64 1
+  %tmp13434 = getelementptr inbounds float* %tmp13433, i64 1
+  %tmp13435 = getelementptr inbounds float* %tmp13434, i64 1
+  %tmp13436 = getelementptr inbounds float* %tmp13435, i64 1
+  %tmp13437 = getelementptr inbounds float* %tmp13436, i64 1
+  %tmp13438 = getelementptr inbounds float* %tmp13437, i64 1
+  %tmp13439 = getelementptr inbounds float* %tmp13438, i64 1
+  %tmp13440 = getelementptr inbounds float* %tmp13439, i64 1
+  %tmp13441 = getelementptr inbounds float* %tmp13440, i64 1
+  %tmp13442 = getelementptr inbounds float* %tmp13441, i64 1
+  %tmp13443 = getelementptr inbounds float* %tmp13442, i64 1
+  %tmp13444 = getelementptr inbounds float* %tmp13443, i64 1
+  %tmp13445 = getelementptr inbounds float* %tmp13444, i64 1
+  %tmp13446 = getelementptr inbounds float* %tmp13445, i64 1
+  %tmp13447 = getelementptr inbounds float* %tmp13446, i64 1
+  %tmp13448 = getelementptr inbounds float* %tmp13447, i64 1
+  %tmp13449 = getelementptr inbounds float* %tmp13448, i64 1
+  %tmp13450 = getelementptr inbounds float* %tmp13449, i64 1
+  %tmp13451 = getelementptr inbounds float* %tmp13450, i64 1
+  %tmp13452 = getelementptr inbounds float* %tmp13451, i64 1
+  %tmp13453 = getelementptr inbounds float* %tmp13452, i64 1
+  %tmp13454 = getelementptr inbounds float* %tmp13453, i64 1
+  %tmp13455 = getelementptr inbounds float* %tmp13454, i64 1
+  %tmp13456 = getelementptr inbounds float* %tmp13455, i64 1
+  %tmp13457 = getelementptr inbounds float* %tmp13456, i64 1
+  %tmp13458 = getelementptr inbounds float* %tmp13457, i64 1
+  %tmp13459 = getelementptr inbounds float* %tmp13458, i64 1
+  %tmp13460 = getelementptr inbounds float* %tmp13459, i64 1
+  %tmp13461 = getelementptr inbounds float* %tmp13460, i64 1
+  %tmp13462 = getelementptr inbounds float* %tmp13461, i64 1
+  %tmp13463 = getelementptr inbounds float* %tmp13462, i64 1
+  %tmp13464 = getelementptr inbounds float* %tmp13463, i64 1
+  %tmp13465 = getelementptr inbounds float* %tmp13464, i64 1
+  %tmp13466 = getelementptr inbounds float* %tmp13465, i64 1
+  %tmp13467 = getelementptr inbounds float* %tmp13466, i64 1
+  %tmp13468 = getelementptr inbounds float* %tmp13467, i64 1
+  %tmp13469 = getelementptr inbounds float* %tmp13468, i64 1
+  %tmp13470 = getelementptr inbounds float* %tmp13469, i64 1
+  %tmp13471 = getelementptr inbounds float* %tmp13470, i64 1
+  %tmp13472 = getelementptr inbounds float* %tmp13471, i64 1
+  %tmp13473 = getelementptr inbounds float* %tmp13472, i64 1
+  %tmp13474 = getelementptr inbounds float* %tmp13473, i64 1
+  %tmp13475 = getelementptr inbounds float* %tmp13474, i64 1
+  %tmp13476 = getelementptr inbounds float* %tmp13475, i64 1
+  %tmp13477 = getelementptr inbounds float* %tmp13476, i64 1
+  %tmp13478 = getelementptr inbounds float* %tmp13477, i64 1
+  %tmp13479 = getelementptr inbounds float* %tmp13478, i64 1
+  %tmp13480 = getelementptr inbounds float* %tmp13479, i64 1
+  %tmp13481 = getelementptr inbounds float* %tmp13480, i64 1
+  %tmp13482 = getelementptr inbounds float* %tmp13481, i64 1
+  %tmp13483 = getelementptr inbounds float* %tmp13482, i64 1
+  %tmp13484 = getelementptr inbounds float* %tmp13483, i64 1
+  %tmp13485 = getelementptr inbounds float* %tmp13484, i64 1
+  %tmp13486 = getelementptr inbounds float* %tmp13485, i64 1
+  %tmp13487 = getelementptr inbounds float* %tmp13486, i64 1
+  %tmp13488 = getelementptr inbounds float* %tmp13487, i64 1
+  %tmp13489 = getelementptr inbounds float* %tmp13488, i64 1
+  %tmp13490 = getelementptr inbounds float* %tmp13489, i64 1
+  %tmp13491 = getelementptr inbounds float* %tmp13490, i64 1
+  %tmp13492 = getelementptr inbounds float* %tmp13491, i64 1
+  %tmp13493 = getelementptr inbounds float* %tmp13492, i64 1
+  %tmp13494 = getelementptr inbounds float* %tmp13493, i64 1
+  %tmp13495 = getelementptr inbounds float* %tmp13494, i64 1
+  %tmp13496 = getelementptr inbounds float* %tmp13495, i64 1
+  %tmp13497 = getelementptr inbounds float* %tmp13496, i64 1
+  %tmp13498 = getelementptr inbounds float* %tmp13497, i64 1
+  %tmp13499 = getelementptr inbounds float* %tmp13498, i64 1
+  %tmp13500 = getelementptr inbounds float* %tmp13499, i64 1
+  %tmp13501 = getelementptr inbounds float* %tmp13500, i64 1
+  %tmp13502 = getelementptr inbounds float* %tmp13501, i64 1
+  %tmp13503 = getelementptr inbounds float* %tmp13502, i64 1
+  %tmp13504 = getelementptr inbounds float* %tmp13503, i64 1
+  %tmp13505 = getelementptr inbounds float* %tmp13504, i64 1
+  %tmp13506 = getelementptr inbounds float* %tmp13505, i64 1
+  %tmp13507 = getelementptr inbounds float* %tmp13506, i64 1
+  %tmp13508 = getelementptr inbounds float* %tmp13507, i64 1
+  %tmp13509 = getelementptr inbounds float* %tmp13508, i64 1
+  %tmp13510 = getelementptr inbounds float* %tmp13509, i64 1
+  %tmp13511 = getelementptr inbounds float* %tmp13510, i64 1
+  %tmp13512 = getelementptr inbounds float* %tmp13511, i64 1
+  %tmp13513 = getelementptr inbounds float* %tmp13512, i64 1
+  %tmp13514 = getelementptr inbounds float* %tmp13513, i64 1
+  %tmp13515 = getelementptr inbounds float* %tmp13514, i64 1
+  %tmp13516 = getelementptr inbounds float* %tmp13515, i64 1
+  %tmp13517 = getelementptr inbounds float* %tmp13516, i64 1
+  %tmp13518 = getelementptr inbounds float* %tmp13517, i64 1
+  %tmp13519 = getelementptr inbounds float* %tmp13518, i64 1
+  %tmp13520 = getelementptr inbounds float* %tmp13519, i64 1
+  %tmp13521 = getelementptr inbounds float* %tmp13520, i64 1
+  %tmp13522 = getelementptr inbounds float* %tmp13521, i64 1
+  %tmp13523 = getelementptr inbounds float* %tmp13522, i64 1
+  %tmp13524 = getelementptr inbounds float* %tmp13523, i64 1
+  %tmp13525 = getelementptr inbounds float* %tmp13524, i64 1
+  %tmp13526 = getelementptr inbounds float* %tmp13525, i64 1
+  %tmp13527 = getelementptr inbounds float* %tmp13526, i64 1
+  %tmp13528 = getelementptr inbounds float* %tmp13527, i64 1
+  %tmp13529 = getelementptr inbounds float* %tmp13528, i64 1
+  %tmp13530 = getelementptr inbounds float* %tmp13529, i64 1
+  %tmp13531 = getelementptr inbounds float* %tmp13530, i64 1
+  %tmp13532 = getelementptr inbounds float* %tmp13531, i64 1
+  %tmp13533 = getelementptr inbounds float* %tmp13532, i64 1
+  %tmp13534 = getelementptr inbounds float* %tmp13533, i64 1
+  %tmp13535 = getelementptr inbounds float* %tmp13534, i64 1
+  %tmp13536 = getelementptr inbounds float* %tmp13535, i64 1
+  %tmp13537 = getelementptr inbounds float* %tmp13536, i64 1
+  %tmp13538 = getelementptr inbounds float* %tmp13537, i64 1
+  %tmp13539 = getelementptr inbounds float* %tmp13538, i64 1
+  %tmp13540 = getelementptr inbounds float* %tmp13539, i64 1
+  %tmp13541 = getelementptr inbounds float* %tmp13540, i64 1
+  %tmp13542 = getelementptr inbounds float* %tmp13541, i64 1
+  %tmp13543 = getelementptr inbounds float* %tmp13542, i64 1
+  %tmp13544 = getelementptr inbounds float* %tmp13543, i64 1
+  %tmp13545 = getelementptr inbounds float* %tmp13544, i64 1
+  %tmp13546 = getelementptr inbounds float* %tmp13545, i64 1
+  %tmp13547 = getelementptr inbounds float* %tmp13546, i64 1
+  %tmp13548 = getelementptr inbounds float* %tmp13547, i64 1
+  %tmp13549 = getelementptr inbounds float* %tmp13548, i64 1
+  %tmp13550 = getelementptr inbounds float* %tmp13549, i64 1
+  %tmp13551 = getelementptr inbounds float* %tmp13550, i64 1
+  %tmp13552 = getelementptr inbounds float* %tmp13551, i64 1
+  %tmp13553 = getelementptr inbounds float* %tmp13552, i64 1
+  %tmp13554 = getelementptr inbounds float* %tmp13553, i64 1
+  %tmp13555 = getelementptr inbounds float* %tmp13554, i64 1
+  %tmp13556 = getelementptr inbounds float* %tmp13555, i64 1
+  %tmp13557 = getelementptr inbounds float* %tmp13556, i64 1
+  %tmp13558 = getelementptr inbounds float* %tmp13557, i64 1
+  %tmp13559 = getelementptr inbounds float* %tmp13558, i64 1
+  %tmp13560 = getelementptr inbounds float* %tmp13559, i64 1
+  %tmp13561 = getelementptr inbounds float* %tmp13560, i64 1
+  %tmp13562 = getelementptr inbounds float* %tmp13561, i64 1
+  %tmp13563 = getelementptr inbounds float* %tmp13562, i64 1
+  %tmp13564 = getelementptr inbounds float* %tmp13563, i64 1
+  %tmp13565 = getelementptr inbounds float* %tmp13564, i64 1
+  %tmp13566 = getelementptr inbounds float* %tmp13565, i64 1
+  %tmp13567 = getelementptr inbounds float* %tmp13566, i64 1
+  %tmp13568 = getelementptr inbounds float* %tmp13567, i64 1
+  %tmp13569 = getelementptr inbounds float* %tmp13568, i64 1
+  %tmp13570 = getelementptr inbounds float* %tmp13569, i64 1
+  %tmp13571 = getelementptr inbounds float* %tmp13570, i64 1
+  %tmp13572 = getelementptr inbounds float* %tmp13571, i64 1
+  %tmp13573 = getelementptr inbounds float* %tmp13572, i64 1
+  %tmp13574 = getelementptr inbounds float* %tmp13573, i64 1
+  %tmp13575 = getelementptr inbounds float* %tmp13574, i64 1
+  %tmp13576 = getelementptr inbounds float* %tmp13575, i64 1
+  %tmp13577 = getelementptr inbounds float* %tmp13576, i64 1
+  %tmp13578 = getelementptr inbounds float* %tmp13577, i64 1
+  %tmp13579 = getelementptr inbounds float* %tmp13578, i64 1
+  %tmp13580 = getelementptr inbounds float* %tmp13579, i64 1
+  %tmp13581 = getelementptr inbounds float* %tmp13580, i64 1
+  %tmp13582 = getelementptr inbounds float* %tmp13581, i64 1
+  %tmp13583 = getelementptr inbounds float* %tmp13582, i64 1
+  %tmp13584 = getelementptr inbounds float* %tmp13583, i64 1
+  %tmp13585 = getelementptr inbounds float* %tmp13584, i64 1
+  %tmp13586 = getelementptr inbounds float* %tmp13585, i64 1
+  %tmp13587 = getelementptr inbounds float* %tmp13586, i64 1
+  %tmp13588 = getelementptr inbounds float* %tmp13587, i64 1
+  %tmp13589 = getelementptr inbounds float* %tmp13588, i64 1
+  %tmp13590 = getelementptr inbounds float* %tmp13589, i64 1
+  %tmp13591 = getelementptr inbounds float* %tmp13590, i64 1
+  %tmp13592 = getelementptr inbounds float* %tmp13591, i64 1
+  %tmp13593 = getelementptr inbounds float* %tmp13592, i64 1
+  %tmp13594 = getelementptr inbounds float* %tmp13593, i64 1
+  %tmp13595 = getelementptr inbounds float* %tmp13594, i64 1
+  %tmp13596 = getelementptr inbounds float* %tmp13595, i64 1
+  %tmp13597 = getelementptr inbounds float* %tmp13596, i64 1
+  %tmp13598 = getelementptr inbounds float* %tmp13597, i64 1
+  %tmp13599 = getelementptr inbounds float* %tmp13598, i64 1
+  %tmp13600 = getelementptr inbounds float* %tmp13599, i64 1
+  %tmp13601 = getelementptr inbounds float* %tmp13600, i64 1
+  %tmp13602 = getelementptr inbounds float* %tmp13601, i64 1
+  %tmp13603 = getelementptr inbounds float* %tmp13602, i64 1
+  %tmp13604 = getelementptr inbounds float* %tmp13603, i64 1
+  %tmp13605 = getelementptr inbounds float* %tmp13604, i64 1
+  %tmp13606 = getelementptr inbounds float* %tmp13605, i64 1
+  %tmp13607 = getelementptr inbounds float* %tmp13606, i64 1
+  %tmp13608 = getelementptr inbounds float* %tmp13607, i64 1
+  %tmp13609 = getelementptr inbounds float* %tmp13608, i64 1
+  %tmp13610 = getelementptr inbounds float* %tmp13609, i64 1
+  %tmp13611 = getelementptr inbounds float* %tmp13610, i64 1
+  %tmp13612 = getelementptr inbounds float* %tmp13611, i64 1
+  %tmp13613 = getelementptr inbounds float* %tmp13612, i64 1
+  %tmp13614 = getelementptr inbounds float* %tmp13613, i64 1
+  %tmp13615 = getelementptr inbounds float* %tmp13614, i64 1
+  %tmp13616 = getelementptr inbounds float* %tmp13615, i64 1
+  %tmp13617 = getelementptr inbounds float* %tmp13616, i64 1
+  %tmp13618 = getelementptr inbounds float* %tmp13617, i64 1
+  %tmp13619 = getelementptr inbounds float* %tmp13618, i64 1
+  %tmp13620 = getelementptr inbounds float* %tmp13619, i64 1
+  %tmp13621 = getelementptr inbounds float* %tmp13620, i64 1
+  %tmp13622 = getelementptr inbounds float* %tmp13621, i64 1
+  %tmp13623 = getelementptr inbounds float* %tmp13622, i64 1
+  %tmp13624 = getelementptr inbounds float* %tmp13623, i64 1
+  %tmp13625 = getelementptr inbounds float* %tmp13624, i64 1
+  %tmp13626 = getelementptr inbounds float* %tmp13625, i64 1
+  %tmp13627 = getelementptr inbounds float* %tmp13626, i64 1
+  %tmp13628 = getelementptr inbounds float* %tmp13627, i64 1
+  %tmp13629 = getelementptr inbounds float* %tmp13628, i64 1
+  %tmp13630 = getelementptr inbounds float* %tmp13629, i64 1
+  %tmp13631 = getelementptr inbounds float* %tmp13630, i64 1
+  %tmp13632 = getelementptr inbounds float* %tmp13631, i64 1
+  %tmp13633 = getelementptr inbounds float* %tmp13632, i64 1
+  %tmp13634 = getelementptr inbounds float* %tmp13633, i64 1
+  %tmp13635 = getelementptr inbounds float* %tmp13634, i64 1
+  %tmp13636 = getelementptr inbounds float* %tmp13635, i64 1
+  %tmp13637 = getelementptr inbounds float* %tmp13636, i64 1
+  %tmp13638 = getelementptr inbounds float* %tmp13637, i64 1
+  %tmp13639 = getelementptr inbounds float* %tmp13638, i64 1
+  %tmp13640 = getelementptr inbounds float* %tmp13639, i64 1
+  %tmp13641 = getelementptr inbounds float* %tmp13640, i64 1
+  %tmp13642 = getelementptr inbounds float* %tmp13641, i64 1
+  %tmp13643 = getelementptr inbounds float* %tmp13642, i64 1
+  %tmp13644 = getelementptr inbounds float* %tmp13643, i64 1
+  %tmp13645 = getelementptr inbounds float* %tmp13644, i64 1
+  %tmp13646 = getelementptr inbounds float* %tmp13645, i64 1
+  %tmp13647 = getelementptr inbounds float* %tmp13646, i64 1
+  %tmp13648 = getelementptr inbounds float* %tmp13647, i64 1
+  %tmp13649 = getelementptr inbounds float* %tmp13648, i64 1
+  %tmp13650 = getelementptr inbounds float* %tmp13649, i64 1
+  %tmp13651 = getelementptr inbounds float* %tmp13650, i64 1
+  %tmp13652 = getelementptr inbounds float* %tmp13651, i64 1
+  %tmp13653 = getelementptr inbounds float* %tmp13652, i64 1
+  %tmp13654 = getelementptr inbounds float* %tmp13653, i64 1
+  %tmp13655 = getelementptr inbounds float* %tmp13654, i64 1
+  %tmp13656 = getelementptr inbounds float* %tmp13655, i64 1
+  %tmp13657 = getelementptr inbounds float* %tmp13656, i64 1
+  %tmp13658 = getelementptr inbounds float* %tmp13657, i64 1
+  %tmp13659 = getelementptr inbounds float* %tmp13658, i64 1
+  %tmp13660 = getelementptr inbounds float* %tmp13659, i64 1
+  %tmp13661 = getelementptr inbounds float* %tmp13660, i64 1
+  %tmp13662 = getelementptr inbounds float* %tmp13661, i64 1
+  %tmp13663 = getelementptr inbounds float* %tmp13662, i64 1
+  %tmp13664 = getelementptr inbounds float* %tmp13663, i64 1
+  %tmp13665 = getelementptr inbounds float* %tmp13664, i64 1
+  %tmp13666 = getelementptr inbounds float* %tmp13665, i64 1
+  %tmp13667 = getelementptr inbounds float* %tmp13666, i64 1
+  %tmp13668 = getelementptr inbounds float* %tmp13667, i64 1
+  %tmp13669 = getelementptr inbounds float* %tmp13668, i64 1
+  %tmp13670 = getelementptr inbounds float* %tmp13669, i64 1
+  %tmp13671 = getelementptr inbounds float* %tmp13670, i64 1
+  %tmp13672 = getelementptr inbounds float* %tmp13671, i64 1
+  %tmp13673 = getelementptr inbounds float* %tmp13672, i64 1
+  %tmp13674 = getelementptr inbounds float* %tmp13673, i64 1
+  %tmp13675 = getelementptr inbounds float* %tmp13674, i64 1
+  %tmp13676 = getelementptr inbounds float* %tmp13675, i64 1
+  %tmp13677 = getelementptr inbounds float* %tmp13676, i64 1
+  %tmp13678 = getelementptr inbounds float* %tmp13677, i64 1
+  %tmp13679 = getelementptr inbounds float* %tmp13678, i64 1
+  %tmp13680 = getelementptr inbounds float* %tmp13679, i64 1
+  %tmp13681 = getelementptr inbounds float* %tmp13680, i64 1
+  %tmp13682 = getelementptr inbounds float* %tmp13681, i64 1
+  %tmp13683 = getelementptr inbounds float* %tmp13682, i64 1
+  %tmp13684 = getelementptr inbounds float* %tmp13683, i64 1
+  %tmp13685 = getelementptr inbounds float* %tmp13684, i64 1
+  %tmp13686 = getelementptr inbounds float* %tmp13685, i64 1
+  %tmp13687 = getelementptr inbounds float* %tmp13686, i64 1
+  %tmp13688 = getelementptr inbounds float* %tmp13687, i64 1
+  %tmp13689 = getelementptr inbounds float* %tmp13688, i64 1
+  %tmp13690 = getelementptr inbounds float* %tmp13689, i64 1
+  %tmp13691 = getelementptr inbounds float* %tmp13690, i64 1
+  %tmp13692 = getelementptr inbounds float* %tmp13691, i64 1
+  %tmp13693 = getelementptr inbounds float* %tmp13692, i64 1
+  %tmp13694 = getelementptr inbounds float* %tmp13693, i64 1
+  %tmp13695 = getelementptr inbounds float* %tmp13694, i64 1
+  %tmp13696 = getelementptr inbounds float* %tmp13695, i64 1
+  %tmp13697 = getelementptr inbounds float* %tmp13696, i64 1
+  %tmp13698 = getelementptr inbounds float* %tmp13697, i64 1
+  %tmp13699 = getelementptr inbounds float* %tmp13698, i64 1
+  %tmp13700 = getelementptr inbounds float* %tmp13699, i64 1
+  %tmp13701 = getelementptr inbounds float* %tmp13700, i64 1
+  %tmp13702 = getelementptr inbounds float* %tmp13701, i64 1
+  %tmp13703 = getelementptr inbounds float* %tmp13702, i64 1
+  %tmp13704 = getelementptr inbounds float* %tmp13703, i64 1
+  %tmp13705 = getelementptr inbounds float* %tmp13704, i64 1
+  %tmp13706 = getelementptr inbounds float* %tmp13705, i64 1
+  %tmp13707 = getelementptr inbounds float* %tmp13706, i64 1
+  %tmp13708 = getelementptr inbounds float* %tmp13707, i64 1
+  %tmp13709 = getelementptr inbounds float* %tmp13708, i64 1
+  %tmp13710 = getelementptr inbounds float* %tmp13709, i64 1
+  %tmp13711 = getelementptr inbounds float* %tmp13710, i64 1
+  %tmp13712 = getelementptr inbounds float* %tmp13711, i64 1
+  %tmp13713 = getelementptr inbounds float* %tmp13712, i64 1
+  %tmp13714 = getelementptr inbounds float* %tmp13713, i64 1
+  %tmp13715 = getelementptr inbounds float* %tmp13714, i64 1
+  %tmp13716 = getelementptr inbounds float* %tmp13715, i64 1
+  %tmp13717 = getelementptr inbounds float* %tmp13716, i64 1
+  %tmp13718 = getelementptr inbounds float* %tmp13717, i64 1
+  %tmp13719 = getelementptr inbounds float* %tmp13718, i64 1
+  %tmp13720 = getelementptr inbounds float* %tmp13719, i64 1
+  %tmp13721 = getelementptr inbounds float* %tmp13720, i64 1
+  %tmp13722 = getelementptr inbounds float* %tmp13721, i64 1
+  %tmp13723 = getelementptr inbounds float* %tmp13722, i64 1
+  %tmp13724 = getelementptr inbounds float* %tmp13723, i64 1
+  %tmp13725 = getelementptr inbounds float* %tmp13724, i64 1
+  %tmp13726 = getelementptr inbounds float* %tmp13725, i64 1
+  %tmp13727 = getelementptr inbounds float* %tmp13726, i64 1
+  %tmp13728 = getelementptr inbounds float* %tmp13727, i64 1
+  %tmp13729 = getelementptr inbounds float* %tmp13728, i64 1
+  %tmp13730 = getelementptr inbounds float* %tmp13729, i64 1
+  %tmp13731 = getelementptr inbounds float* %tmp13730, i64 1
+  %tmp13732 = getelementptr inbounds float* %tmp13731, i64 1
+  %tmp13733 = getelementptr inbounds float* %tmp13732, i64 1
+  %tmp13734 = getelementptr inbounds float* %tmp13733, i64 1
+  %tmp13735 = getelementptr inbounds float* %tmp13734, i64 1
+  %tmp13736 = getelementptr inbounds float* %tmp13735, i64 1
+  %tmp13737 = getelementptr inbounds float* %tmp13736, i64 1
+  %tmp13738 = getelementptr inbounds float* %tmp13737, i64 1
+  %tmp13739 = getelementptr inbounds float* %tmp13738, i64 1
+  %tmp13740 = getelementptr inbounds float* %tmp13739, i64 1
+  %tmp13741 = getelementptr inbounds float* %tmp13740, i64 1
+  %tmp13742 = getelementptr inbounds float* %tmp13741, i64 1
+  %tmp13743 = getelementptr inbounds float* %tmp13742, i64 1
+  %tmp13744 = getelementptr inbounds float* %tmp13743, i64 1
+  %tmp13745 = getelementptr inbounds float* %tmp13744, i64 1
+  %tmp13746 = getelementptr inbounds float* %tmp13745, i64 1
+  %tmp13747 = getelementptr inbounds float* %tmp13746, i64 1
+  %tmp13748 = getelementptr inbounds float* %tmp13747, i64 1
+  %tmp13749 = getelementptr inbounds float* %tmp13748, i64 1
+  %tmp13750 = getelementptr inbounds float* %tmp13749, i64 1
+  %tmp13751 = getelementptr inbounds float* %tmp13750, i64 1
+  %tmp13752 = getelementptr inbounds float* %tmp13751, i64 1
+  %tmp13753 = getelementptr inbounds float* %tmp13752, i64 1
+  %tmp13754 = getelementptr inbounds float* %tmp13753, i64 1
+  %tmp13755 = getelementptr inbounds float* %tmp13754, i64 1
+  %tmp13756 = getelementptr inbounds float* %tmp13755, i64 1
+  %tmp13757 = getelementptr inbounds float* %tmp13756, i64 1
+  %tmp13758 = getelementptr inbounds float* %tmp13757, i64 1
+  %tmp13759 = getelementptr inbounds float* %tmp13758, i64 1
+  %tmp13760 = getelementptr inbounds float* %tmp13759, i64 1
+  %tmp13761 = getelementptr inbounds float* %tmp13760, i64 1
+  %tmp13762 = getelementptr inbounds float* %tmp13761, i64 1
+  %tmp13763 = getelementptr inbounds float* %tmp13762, i64 1
+  %tmp13764 = getelementptr inbounds float* %tmp13763, i64 1
+  %tmp13765 = getelementptr inbounds float* %tmp13764, i64 1
+  %tmp13766 = getelementptr inbounds float* %tmp13765, i64 1
+  %tmp13767 = getelementptr inbounds float* %tmp13766, i64 1
+  %tmp13768 = getelementptr inbounds float* %tmp13767, i64 1
+  %tmp13769 = getelementptr inbounds float* %tmp13768, i64 1
+  %tmp13770 = getelementptr inbounds float* %tmp13769, i64 1
+  %tmp13771 = getelementptr inbounds float* %tmp13770, i64 1
+  %tmp13772 = getelementptr inbounds float* %tmp13771, i64 1
+  %tmp13773 = getelementptr inbounds float* %tmp13772, i64 1
+  %tmp13774 = getelementptr inbounds float* %tmp13773, i64 1
+  %tmp13775 = getelementptr inbounds float* %tmp13774, i64 1
+  %tmp13776 = getelementptr inbounds float* %tmp13775, i64 1
+  %tmp13777 = getelementptr inbounds float* %tmp13776, i64 1
+  %tmp13778 = getelementptr inbounds float* %tmp13777, i64 1
+  %tmp13779 = getelementptr inbounds float* %tmp13778, i64 1
+  %tmp13780 = getelementptr inbounds float* %tmp13779, i64 1
+  %tmp13781 = getelementptr inbounds float* %tmp13780, i64 1
+  %tmp13782 = getelementptr inbounds float* %tmp13781, i64 1
+  %tmp13783 = getelementptr inbounds float* %tmp13782, i64 1
+  %tmp13784 = getelementptr inbounds float* %tmp13783, i64 1
+  %tmp13785 = getelementptr inbounds float* %tmp13784, i64 1
+  %tmp13786 = getelementptr inbounds float* %tmp13785, i64 1
+  %tmp13787 = getelementptr inbounds float* %tmp13786, i64 1
+  %tmp13788 = getelementptr inbounds float* %tmp13787, i64 1
+  %tmp13789 = getelementptr inbounds float* %tmp13788, i64 1
+  %tmp13790 = getelementptr inbounds float* %tmp13789, i64 1
+  %tmp13791 = getelementptr inbounds float* %tmp13790, i64 1
+  %tmp13792 = getelementptr inbounds float* %tmp13791, i64 1
+  %tmp13793 = getelementptr inbounds float* %tmp13792, i64 1
+  %tmp13794 = getelementptr inbounds float* %tmp13793, i64 1
+  %tmp13795 = getelementptr inbounds float* %tmp13794, i64 1
+  %tmp13796 = getelementptr inbounds float* %tmp13795, i64 1
+  %tmp13797 = getelementptr inbounds float* %tmp13796, i64 1
+  %tmp13798 = getelementptr inbounds float* %tmp13797, i64 1
+  %tmp13799 = getelementptr inbounds float* %tmp13798, i64 1
+  %tmp13800 = getelementptr inbounds float* %tmp13799, i64 1
+  %tmp13801 = getelementptr inbounds float* %tmp13800, i64 1
+  %tmp13802 = getelementptr inbounds float* %tmp13801, i64 1
+  %tmp13803 = getelementptr inbounds float* %tmp13802, i64 1
+  %tmp13804 = getelementptr inbounds float* %tmp13803, i64 1
+  %tmp13805 = getelementptr inbounds float* %tmp13804, i64 1
+  %tmp13806 = getelementptr inbounds float* %tmp13805, i64 1
+  %tmp13807 = getelementptr inbounds float* %tmp13806, i64 1
+  %tmp13808 = getelementptr inbounds float* %tmp13807, i64 1
+  %tmp13809 = getelementptr inbounds float* %tmp13808, i64 1
+  %tmp13810 = getelementptr inbounds float* %tmp13809, i64 1
+  %tmp13811 = getelementptr inbounds float* %tmp13810, i64 1
+  %tmp13812 = getelementptr inbounds float* %tmp13811, i64 1
+  %tmp13813 = getelementptr inbounds float* %tmp13812, i64 1
+  %tmp13814 = getelementptr inbounds float* %tmp13813, i64 1
+  %tmp13815 = getelementptr inbounds float* %tmp13814, i64 1
+  %tmp13816 = getelementptr inbounds float* %tmp13815, i64 1
+  %tmp13817 = getelementptr inbounds float* %tmp13816, i64 1
+  %tmp13818 = getelementptr inbounds float* %tmp13817, i64 1
+  %tmp13819 = getelementptr inbounds float* %tmp13818, i64 1
+  %tmp13820 = getelementptr inbounds float* %tmp13819, i64 1
+  %tmp13821 = getelementptr inbounds float* %tmp13820, i64 1
+  %tmp13822 = getelementptr inbounds float* %tmp13821, i64 1
+  %tmp13823 = getelementptr inbounds float* %tmp13822, i64 1
+  %tmp13824 = getelementptr inbounds float* %tmp13823, i64 1
+  %tmp13825 = getelementptr inbounds float* %tmp13824, i64 1
+  %tmp13826 = getelementptr inbounds float* %tmp13825, i64 1
+  %tmp13827 = getelementptr inbounds float* %tmp13826, i64 1
+  %tmp13828 = getelementptr inbounds float* %tmp13827, i64 1
+  %tmp13829 = getelementptr inbounds float* %tmp13828, i64 1
+  %tmp13830 = getelementptr inbounds float* %tmp13829, i64 1
+  %tmp13831 = getelementptr inbounds float* %tmp13830, i64 1
+  %tmp13832 = getelementptr inbounds float* %tmp13831, i64 1
+  %tmp13833 = getelementptr inbounds float* %tmp13832, i64 1
+  %tmp13834 = getelementptr inbounds float* %tmp13833, i64 1
+  %tmp13835 = getelementptr inbounds float* %tmp13834, i64 1
+  %tmp13836 = getelementptr inbounds float* %tmp13835, i64 1
+  %tmp13837 = getelementptr inbounds float* %tmp13836, i64 1
+  %tmp13838 = getelementptr inbounds float* %tmp13837, i64 1
+  %tmp13839 = getelementptr inbounds float* %tmp13838, i64 1
+  %tmp13840 = getelementptr inbounds float* %tmp13839, i64 1
+  %tmp13841 = getelementptr inbounds float* %tmp13840, i64 1
+  %tmp13842 = getelementptr inbounds float* %tmp13841, i64 1
+  %tmp13843 = getelementptr inbounds float* %tmp13842, i64 1
+  %tmp13844 = getelementptr inbounds float* %tmp13843, i64 1
+  %tmp13845 = getelementptr inbounds float* %tmp13844, i64 1
+  %tmp13846 = getelementptr inbounds float* %tmp13845, i64 1
+  %tmp13847 = getelementptr inbounds float* %tmp13846, i64 1
+  %tmp13848 = getelementptr inbounds float* %tmp13847, i64 1
+  %tmp13849 = getelementptr inbounds float* %tmp13848, i64 1
+  %tmp13850 = getelementptr inbounds float* %tmp13849, i64 1
+  %tmp13851 = getelementptr inbounds float* %tmp13850, i64 1
+  %tmp13852 = getelementptr inbounds float* %tmp13851, i64 1
+  %tmp13853 = getelementptr inbounds float* %tmp13852, i64 1
+  %tmp13854 = getelementptr inbounds float* %tmp13853, i64 1
+  %tmp13855 = getelementptr inbounds float* %tmp13854, i64 1
+  %tmp13856 = getelementptr inbounds float* %tmp13855, i64 1
+  %tmp13857 = getelementptr inbounds float* %tmp13856, i64 1
+  %tmp13858 = getelementptr inbounds float* %tmp13857, i64 1
+  %tmp13859 = getelementptr inbounds float* %tmp13858, i64 1
+  %tmp13860 = getelementptr inbounds float* %tmp13859, i64 1
+  %tmp13861 = getelementptr inbounds float* %tmp13860, i64 1
+  %tmp13862 = getelementptr inbounds float* %tmp13861, i64 1
+  %tmp13863 = getelementptr inbounds float* %tmp13862, i64 1
+  %tmp13864 = getelementptr inbounds float* %tmp13863, i64 1
+  %tmp13865 = getelementptr inbounds float* %tmp13864, i64 1
+  %tmp13866 = getelementptr inbounds float* %tmp13865, i64 1
+  %tmp13867 = getelementptr inbounds float* %tmp13866, i64 1
+  %tmp13868 = getelementptr inbounds float* %tmp13867, i64 1
+  %tmp13869 = getelementptr inbounds float* %tmp13868, i64 1
+  %tmp13870 = getelementptr inbounds float* %tmp13869, i64 1
+  %tmp13871 = getelementptr inbounds float* %tmp13870, i64 1
+  %tmp13872 = getelementptr inbounds float* %tmp13871, i64 1
+  %tmp13873 = getelementptr inbounds float* %tmp13872, i64 1
+  %tmp13874 = getelementptr inbounds float* %tmp13873, i64 1
+  %tmp13875 = getelementptr inbounds float* %tmp13874, i64 1
+  %tmp13876 = getelementptr inbounds float* %tmp13875, i64 1
+  %tmp13877 = getelementptr inbounds float* %tmp13876, i64 1
+  %tmp13878 = getelementptr inbounds float* %tmp13877, i64 1
+  %tmp13879 = getelementptr inbounds float* %tmp13878, i64 1
+  %tmp13880 = getelementptr inbounds float* %tmp13879, i64 1
+  %tmp13881 = getelementptr inbounds float* %tmp13880, i64 1
+  %tmp13882 = getelementptr inbounds float* %tmp13881, i64 1
+  %tmp13883 = getelementptr inbounds float* %tmp13882, i64 1
+  %tmp13884 = getelementptr inbounds float* %tmp13883, i64 1
+  %tmp13885 = getelementptr inbounds float* %tmp13884, i64 1
+  %tmp13886 = getelementptr inbounds float* %tmp13885, i64 1
+  %tmp13887 = getelementptr inbounds float* %tmp13886, i64 1
+  %tmp13888 = getelementptr inbounds float* %tmp13887, i64 1
+  %tmp13889 = getelementptr inbounds float* %tmp13888, i64 1
+  %tmp13890 = getelementptr inbounds float* %tmp13889, i64 1
+  %tmp13891 = getelementptr inbounds float* %tmp13890, i64 1
+  %tmp13892 = getelementptr inbounds float* %tmp13891, i64 1
+  %tmp13893 = getelementptr inbounds float* %tmp13892, i64 1
+  %tmp13894 = getelementptr inbounds float* %tmp13893, i64 1
+  %tmp13895 = getelementptr inbounds float* %tmp13894, i64 1
+  %tmp13896 = getelementptr inbounds float* %tmp13895, i64 1
+  %tmp13897 = getelementptr inbounds float* %tmp13896, i64 1
+  %tmp13898 = getelementptr inbounds float* %tmp13897, i64 1
+  %tmp13899 = getelementptr inbounds float* %tmp13898, i64 1
+  %tmp13900 = getelementptr inbounds float* %tmp13899, i64 1
+  %tmp13901 = getelementptr inbounds float* %tmp13900, i64 1
+  %tmp13902 = getelementptr inbounds float* %tmp13901, i64 1
+  %tmp13903 = getelementptr inbounds float* %tmp13902, i64 1
+  %tmp13904 = getelementptr inbounds float* %tmp13903, i64 1
+  %tmp13905 = getelementptr inbounds float* %tmp13904, i64 1
+  %tmp13906 = getelementptr inbounds float* %tmp13905, i64 1
+  %tmp13907 = getelementptr inbounds float* %tmp13906, i64 1
+  %tmp13908 = getelementptr inbounds float* %tmp13907, i64 1
+  %tmp13909 = getelementptr inbounds float* %tmp13908, i64 1
+  %tmp13910 = getelementptr inbounds float* %tmp13909, i64 1
+  %tmp13911 = getelementptr inbounds float* %tmp13910, i64 1
+  %tmp13912 = getelementptr inbounds float* %tmp13911, i64 1
+  %tmp13913 = getelementptr inbounds float* %tmp13912, i64 1
+  %tmp13914 = getelementptr inbounds float* %tmp13913, i64 1
+  %tmp13915 = getelementptr inbounds float* %tmp13914, i64 1
+  %tmp13916 = getelementptr inbounds float* %tmp13915, i64 1
+  %tmp13917 = getelementptr inbounds float* %tmp13916, i64 1
+  %tmp13918 = getelementptr inbounds float* %tmp13917, i64 1
+  %tmp13919 = getelementptr inbounds float* %tmp13918, i64 1
+  %tmp13920 = getelementptr inbounds float* %tmp13919, i64 1
+  %tmp13921 = getelementptr inbounds float* %tmp13920, i64 1
+  %tmp13922 = getelementptr inbounds float* %tmp13921, i64 1
+  %tmp13923 = getelementptr inbounds float* %tmp13922, i64 1
+  %tmp13924 = getelementptr inbounds float* %tmp13923, i64 1
+  %tmp13925 = getelementptr inbounds float* %tmp13924, i64 1
+  %tmp13926 = getelementptr inbounds float* %tmp13925, i64 1
+  %tmp13927 = getelementptr inbounds float* %tmp13926, i64 1
+  %tmp13928 = getelementptr inbounds float* %tmp13927, i64 1
+  %tmp13929 = getelementptr inbounds float* %tmp13928, i64 1
+  %tmp13930 = getelementptr inbounds float* %tmp13929, i64 1
+  %tmp13931 = getelementptr inbounds float* %tmp13930, i64 1
+  %tmp13932 = getelementptr inbounds float* %tmp13931, i64 1
+  %tmp13933 = getelementptr inbounds float* %tmp13932, i64 1
+  %tmp13934 = getelementptr inbounds float* %tmp13933, i64 1
+  %tmp13935 = getelementptr inbounds float* %tmp13934, i64 1
+  %tmp13936 = getelementptr inbounds float* %tmp13935, i64 1
+  %tmp13937 = getelementptr inbounds float* %tmp13936, i64 1
+  %tmp13938 = getelementptr inbounds float* %tmp13937, i64 1
+  %tmp13939 = getelementptr inbounds float* %tmp13938, i64 1
+  %tmp13940 = getelementptr inbounds float* %tmp13939, i64 1
+  %tmp13941 = getelementptr inbounds float* %tmp13940, i64 1
+  %tmp13942 = getelementptr inbounds float* %tmp13941, i64 1
+  %tmp13943 = getelementptr inbounds float* %tmp13942, i64 1
+  %tmp13944 = getelementptr inbounds float* %tmp13943, i64 1
+  %tmp13945 = getelementptr inbounds float* %tmp13944, i64 1
+  %tmp13946 = getelementptr inbounds float* %tmp13945, i64 1
+  %tmp13947 = getelementptr inbounds float* %tmp13946, i64 1
+  %tmp13948 = getelementptr inbounds float* %tmp13947, i64 1
+  %tmp13949 = getelementptr inbounds float* %tmp13948, i64 1
+  %tmp13950 = getelementptr inbounds float* %tmp13949, i64 1
+  %tmp13951 = getelementptr inbounds float* %tmp13950, i64 1
+  %tmp13952 = getelementptr inbounds float* %tmp13951, i64 1
+  %tmp13953 = getelementptr inbounds float* %tmp13952, i64 1
+  %tmp13954 = getelementptr inbounds float* %tmp13953, i64 1
+  %tmp13955 = getelementptr inbounds float* %tmp13954, i64 1
+  %tmp13956 = getelementptr inbounds float* %tmp13955, i64 1
+  %tmp13957 = getelementptr inbounds float* %tmp13956, i64 1
+  %tmp13958 = getelementptr inbounds float* %tmp13957, i64 1
+  %tmp13959 = getelementptr inbounds float* %tmp13958, i64 1
+  %tmp13960 = getelementptr inbounds float* %tmp13959, i64 1
+  %tmp13961 = getelementptr inbounds float* %tmp13960, i64 1
+  %tmp13962 = getelementptr inbounds float* %tmp13961, i64 1
+  %tmp13963 = getelementptr inbounds float* %tmp13962, i64 1
+  %tmp13964 = getelementptr inbounds float* %tmp13963, i64 1
+  %tmp13965 = getelementptr inbounds float* %tmp13964, i64 1
+  %tmp13966 = getelementptr inbounds float* %tmp13965, i64 1
+  %tmp13967 = getelementptr inbounds float* %tmp13966, i64 1
+  %tmp13968 = getelementptr inbounds float* %tmp13967, i64 1
+  %tmp13969 = getelementptr inbounds float* %tmp13968, i64 1
+  %tmp13970 = getelementptr inbounds float* %tmp13969, i64 1
+  %tmp13971 = getelementptr inbounds float* %tmp13970, i64 1
+  %tmp13972 = getelementptr inbounds float* %tmp13971, i64 1
+  %tmp13973 = getelementptr inbounds float* %tmp13972, i64 1
+  %tmp13974 = getelementptr inbounds float* %tmp13973, i64 1
+  %tmp13975 = getelementptr inbounds float* %tmp13974, i64 1
+  %tmp13976 = getelementptr inbounds float* %tmp13975, i64 1
+  %tmp13977 = getelementptr inbounds float* %tmp13976, i64 1
+  %tmp13978 = getelementptr inbounds float* %tmp13977, i64 1
+  %tmp13979 = getelementptr inbounds float* %tmp13978, i64 1
+  %tmp13980 = getelementptr inbounds float* %tmp13979, i64 1
+  %tmp13981 = getelementptr inbounds float* %tmp13980, i64 1
+  %tmp13982 = getelementptr inbounds float* %tmp13981, i64 1
+  %tmp13983 = getelementptr inbounds float* %tmp13982, i64 1
+  %tmp13984 = getelementptr inbounds float* %tmp13983, i64 1
+  %tmp13985 = getelementptr inbounds float* %tmp13984, i64 1
+  %tmp13986 = getelementptr inbounds float* %tmp13985, i64 1
+  %tmp13987 = getelementptr inbounds float* %tmp13986, i64 1
+  %tmp13988 = getelementptr inbounds float* %tmp13987, i64 1
+  %tmp13989 = getelementptr inbounds float* %tmp13988, i64 1
+  %tmp13990 = getelementptr inbounds float* %tmp13989, i64 1
+  %tmp13991 = getelementptr inbounds float* %tmp13990, i64 1
+  %tmp13992 = getelementptr inbounds float* %tmp13991, i64 1
+  %tmp13993 = getelementptr inbounds float* %tmp13992, i64 1
+  %tmp13994 = getelementptr inbounds float* %tmp13993, i64 1
+  %tmp13995 = getelementptr inbounds float* %tmp13994, i64 1
+  %tmp13996 = getelementptr inbounds float* %tmp13995, i64 1
+  %tmp13997 = getelementptr inbounds float* %tmp13996, i64 1
+  %tmp13998 = getelementptr inbounds float* %tmp13997, i64 1
+  %tmp13999 = getelementptr inbounds float* %tmp13998, i64 1
+  %tmp14000 = getelementptr inbounds float* %tmp13999, i64 1
+  %tmp14001 = getelementptr inbounds float* %tmp14000, i64 1
+  %tmp14002 = getelementptr inbounds float* %tmp14001, i64 1
+  %tmp14003 = getelementptr inbounds float* %tmp14002, i64 1
+  %tmp14004 = getelementptr inbounds float* %tmp14003, i64 1
+  %tmp14005 = getelementptr inbounds float* %tmp14004, i64 1
+  %tmp14006 = getelementptr inbounds float* %tmp14005, i64 1
+  %tmp14007 = getelementptr inbounds float* %tmp14006, i64 1
+  %tmp14008 = getelementptr inbounds float* %tmp14007, i64 1
+  %tmp14009 = getelementptr inbounds float* %tmp14008, i64 1
+  %tmp14010 = getelementptr inbounds float* %tmp14009, i64 1
+  %tmp14011 = getelementptr inbounds float* %tmp14010, i64 1
+  %tmp14012 = getelementptr inbounds float* %tmp14011, i64 1
+  %tmp14013 = getelementptr inbounds float* %tmp14012, i64 1
+  %tmp14014 = getelementptr inbounds float* %tmp14013, i64 1
+  %tmp14015 = getelementptr inbounds float* %tmp14014, i64 1
+  %tmp14016 = getelementptr inbounds float* %tmp14015, i64 1
+  %tmp14017 = getelementptr inbounds float* %tmp14016, i64 1
+  %tmp14018 = getelementptr inbounds float* %tmp14017, i64 1
+  %tmp14019 = getelementptr inbounds float* %tmp14018, i64 1
+  %tmp14020 = getelementptr inbounds float* %tmp14019, i64 1
+  %tmp14021 = getelementptr inbounds float* %tmp14020, i64 1
+  %tmp14022 = getelementptr inbounds float* %tmp14021, i64 1
+  %tmp14023 = getelementptr inbounds float* %tmp14022, i64 1
+  %tmp14024 = getelementptr inbounds float* %tmp14023, i64 1
+  %tmp14025 = getelementptr inbounds float* %tmp14024, i64 1
+  %tmp14026 = getelementptr inbounds float* %tmp14025, i64 1
+  %tmp14027 = getelementptr inbounds float* %tmp14026, i64 1
+  %tmp14028 = getelementptr inbounds float* %tmp14027, i64 1
+  %tmp14029 = getelementptr inbounds float* %tmp14028, i64 1
+  %tmp14030 = getelementptr inbounds float* %tmp14029, i64 1
+  %tmp14031 = getelementptr inbounds float* %tmp14030, i64 1
+  %tmp14032 = getelementptr inbounds float* %tmp14031, i64 1
+  %tmp14033 = getelementptr inbounds float* %tmp14032, i64 1
+  %tmp14034 = getelementptr inbounds float* %tmp14033, i64 1
+  %tmp14035 = getelementptr inbounds float* %tmp14034, i64 1
+  %tmp14036 = getelementptr inbounds float* %tmp14035, i64 1
+  %tmp14037 = getelementptr inbounds float* %tmp14036, i64 1
+  %tmp14038 = getelementptr inbounds float* %tmp14037, i64 1
+  %tmp14039 = getelementptr inbounds float* %tmp14038, i64 1
+  %tmp14040 = getelementptr inbounds float* %tmp14039, i64 1
+  %tmp14041 = getelementptr inbounds float* %tmp14040, i64 1
+  %tmp14042 = getelementptr inbounds float* %tmp14041, i64 1
+  %tmp14043 = getelementptr inbounds float* %tmp14042, i64 1
+  %tmp14044 = getelementptr inbounds float* %tmp14043, i64 1
+  %tmp14045 = getelementptr inbounds float* %tmp14044, i64 1
+  %tmp14046 = getelementptr inbounds float* %tmp14045, i64 1
+  %tmp14047 = getelementptr inbounds float* %tmp14046, i64 1
+  %tmp14048 = getelementptr inbounds float* %tmp14047, i64 1
+  %tmp14049 = getelementptr inbounds float* %tmp14048, i64 1
+  %tmp14050 = getelementptr inbounds float* %tmp14049, i64 1
+  %tmp14051 = getelementptr inbounds float* %tmp14050, i64 1
+  %tmp14052 = getelementptr inbounds float* %tmp14051, i64 1
+  %tmp14053 = getelementptr inbounds float* %tmp14052, i64 1
+  %tmp14054 = getelementptr inbounds float* %tmp14053, i64 1
+  %tmp14055 = getelementptr inbounds float* %tmp14054, i64 1
+  %tmp14056 = getelementptr inbounds float* %tmp14055, i64 1
+  %tmp14057 = getelementptr inbounds float* %tmp14056, i64 1
+  %tmp14058 = getelementptr inbounds float* %tmp14057, i64 1
+  %tmp14059 = getelementptr inbounds float* %tmp14058, i64 1
+  %tmp14060 = getelementptr inbounds float* %tmp14059, i64 1
+  %tmp14061 = getelementptr inbounds float* %tmp14060, i64 1
+  %tmp14062 = getelementptr inbounds float* %tmp14061, i64 1
+  %tmp14063 = getelementptr inbounds float* %tmp14062, i64 1
+  %tmp14064 = getelementptr inbounds float* %tmp14063, i64 1
+  %tmp14065 = getelementptr inbounds float* %tmp14064, i64 1
+  %tmp14066 = getelementptr inbounds float* %tmp14065, i64 1
+  %tmp14067 = getelementptr inbounds float* %tmp14066, i64 1
+  %tmp14068 = getelementptr inbounds float* %tmp14067, i64 1
+  %tmp14069 = getelementptr inbounds float* %tmp14068, i64 1
+  %tmp14070 = getelementptr inbounds float* %tmp14069, i64 1
+  %tmp14071 = getelementptr inbounds float* %tmp14070, i64 1
+  %tmp14072 = getelementptr inbounds float* %tmp14071, i64 1
+  %tmp14073 = getelementptr inbounds float* %tmp14072, i64 1
+  %tmp14074 = getelementptr inbounds float* %tmp14073, i64 1
+  %tmp14075 = getelementptr inbounds float* %tmp14074, i64 1
+  %tmp14076 = getelementptr inbounds float* %tmp14075, i64 1
+  %tmp14077 = getelementptr inbounds float* %tmp14076, i64 1
+  %tmp14078 = getelementptr inbounds float* %tmp14077, i64 1
+  %tmp14079 = getelementptr inbounds float* %tmp14078, i64 1
+  %tmp14080 = getelementptr inbounds float* %tmp14079, i64 1
+  %tmp14081 = getelementptr inbounds float* %tmp14080, i64 1
+  %tmp14082 = getelementptr inbounds float* %tmp14081, i64 1
+  %tmp14083 = getelementptr inbounds float* %tmp14082, i64 1
+  %tmp14084 = getelementptr inbounds float* %tmp14083, i64 1
+  %tmp14085 = getelementptr inbounds float* %tmp14084, i64 1
+  %tmp14086 = getelementptr inbounds float* %tmp14085, i64 1
+  %tmp14087 = getelementptr inbounds float* %tmp14086, i64 1
+  %tmp14088 = getelementptr inbounds float* %tmp14087, i64 1
+  %tmp14089 = getelementptr inbounds float* %tmp14088, i64 1
+  %tmp14090 = getelementptr inbounds float* %tmp14089, i64 1
+  %tmp14091 = getelementptr inbounds float* %tmp14090, i64 1
+  %tmp14092 = getelementptr inbounds float* %tmp14091, i64 1
+  %tmp14093 = getelementptr inbounds float* %tmp14092, i64 1
+  %tmp14094 = getelementptr inbounds float* %tmp14093, i64 1
+  %tmp14095 = getelementptr inbounds float* %tmp14094, i64 1
+  %tmp14096 = getelementptr inbounds float* %tmp14095, i64 1
+  %tmp14097 = getelementptr inbounds float* %tmp14096, i64 1
+  %tmp14098 = getelementptr inbounds float* %tmp14097, i64 1
+  %tmp14099 = getelementptr inbounds float* %tmp14098, i64 1
+  %tmp14100 = getelementptr inbounds float* %tmp14099, i64 1
+  %tmp14101 = getelementptr inbounds float* %tmp14100, i64 1
+  %tmp14102 = getelementptr inbounds float* %tmp14101, i64 1
+  %tmp14103 = getelementptr inbounds float* %tmp14102, i64 1
+  %tmp14104 = getelementptr inbounds float* %tmp14103, i64 1
+  %tmp14105 = getelementptr inbounds float* %tmp14104, i64 1
+  %tmp14106 = getelementptr inbounds float* %tmp14105, i64 1
+  %tmp14107 = getelementptr inbounds float* %tmp14106, i64 1
+  %tmp14108 = getelementptr inbounds float* %tmp14107, i64 1
+  %tmp14109 = getelementptr inbounds float* %tmp14108, i64 1
+  %tmp14110 = getelementptr inbounds float* %tmp14109, i64 1
+  %tmp14111 = getelementptr inbounds float* %tmp14110, i64 1
+  %tmp14112 = getelementptr inbounds float* %tmp14111, i64 1
+  %tmp14113 = getelementptr inbounds float* %tmp14112, i64 1
+  %tmp14114 = getelementptr inbounds float* %tmp14113, i64 1
+  %tmp14115 = getelementptr inbounds float* %tmp14114, i64 1
+  %tmp14116 = getelementptr inbounds float* %tmp14115, i64 1
+  %tmp14117 = getelementptr inbounds float* %tmp14116, i64 1
+  %tmp14118 = getelementptr inbounds float* %tmp14117, i64 1
+  %tmp14119 = getelementptr inbounds float* %tmp14118, i64 1
+  %tmp14120 = getelementptr inbounds float* %tmp14119, i64 1
+  %tmp14121 = getelementptr inbounds float* %tmp14120, i64 1
+  %tmp14122 = getelementptr inbounds float* %tmp14121, i64 1
+  %tmp14123 = getelementptr inbounds float* %tmp14122, i64 1
+  %tmp14124 = getelementptr inbounds float* %tmp14123, i64 1
+  %tmp14125 = getelementptr inbounds float* %tmp14124, i64 1
+  %tmp14126 = getelementptr inbounds float* %tmp14125, i64 1
+  %tmp14127 = getelementptr inbounds float* %tmp14126, i64 1
+  %tmp14128 = getelementptr inbounds float* %tmp14127, i64 1
+  %tmp14129 = getelementptr inbounds float* %tmp14128, i64 1
+  %tmp14130 = getelementptr inbounds float* %tmp14129, i64 1
+  %tmp14131 = getelementptr inbounds float* %tmp14130, i64 1
+  %tmp14132 = getelementptr inbounds float* %tmp14131, i64 1
+  %tmp14133 = getelementptr inbounds float* %tmp14132, i64 1
+  %tmp14134 = getelementptr inbounds float* %tmp14133, i64 1
+  %tmp14135 = getelementptr inbounds float* %tmp14134, i64 1
+  %tmp14136 = getelementptr inbounds float* %tmp14135, i64 1
+  %tmp14137 = getelementptr inbounds float* %tmp14136, i64 1
+  %tmp14138 = getelementptr inbounds float* %tmp14137, i64 1
+  %tmp14139 = getelementptr inbounds float* %tmp14138, i64 1
+  %tmp14140 = getelementptr inbounds float* %tmp14139, i64 1
+  %tmp14141 = getelementptr inbounds float* %tmp14140, i64 1
+  %tmp14142 = getelementptr inbounds float* %tmp14141, i64 1
+  %tmp14143 = getelementptr inbounds float* %tmp14142, i64 1
+  %tmp14144 = getelementptr inbounds float* %tmp14143, i64 1
+  %tmp14145 = getelementptr inbounds float* %tmp14144, i64 1
+  %tmp14146 = getelementptr inbounds float* %tmp14145, i64 1
+  %tmp14147 = getelementptr inbounds float* %tmp14146, i64 1
+  %tmp14148 = getelementptr inbounds float* %tmp14147, i64 1
+  %tmp14149 = getelementptr inbounds float* %tmp14148, i64 1
+  %tmp14150 = getelementptr inbounds float* %tmp14149, i64 1
+  %tmp14151 = getelementptr inbounds float* %tmp14150, i64 1
+  %tmp14152 = getelementptr inbounds float* %tmp14151, i64 1
+  %tmp14153 = getelementptr inbounds float* %tmp14152, i64 1
+  %tmp14154 = getelementptr inbounds float* %tmp14153, i64 1
+  %tmp14155 = getelementptr inbounds float* %tmp14154, i64 1
+  %tmp14156 = getelementptr inbounds float* %tmp14155, i64 1
+  %tmp14157 = getelementptr inbounds float* %tmp14156, i64 1
+  %tmp14158 = getelementptr inbounds float* %tmp14157, i64 1
+  %tmp14159 = getelementptr inbounds float* %tmp14158, i64 1
+  %tmp14160 = getelementptr inbounds float* %tmp14159, i64 1
+  %tmp14161 = getelementptr inbounds float* %tmp14160, i64 1
+  %tmp14162 = getelementptr inbounds float* %tmp14161, i64 1
+  %tmp14163 = getelementptr inbounds float* %tmp14162, i64 1
+  %tmp14164 = getelementptr inbounds float* %tmp14163, i64 1
+  %tmp14165 = getelementptr inbounds float* %tmp14164, i64 1
+  %tmp14166 = getelementptr inbounds float* %tmp14165, i64 1
+  %tmp14167 = getelementptr inbounds float* %tmp14166, i64 1
+  %tmp14168 = getelementptr inbounds float* %tmp14167, i64 1
+  %tmp14169 = getelementptr inbounds float* %tmp14168, i64 1
+  %tmp14170 = getelementptr inbounds float* %tmp14169, i64 1
+  %tmp14171 = getelementptr inbounds float* %tmp14170, i64 1
+  %tmp14172 = getelementptr inbounds float* %tmp14171, i64 1
+  %tmp14173 = getelementptr inbounds float* %tmp14172, i64 1
+  %tmp14174 = getelementptr inbounds float* %tmp14173, i64 1
+  %tmp14175 = getelementptr inbounds float* %tmp14174, i64 1
+  %tmp14176 = getelementptr inbounds float* %tmp14175, i64 1
+  %tmp14177 = getelementptr inbounds float* %tmp14176, i64 1
+  %tmp14178 = getelementptr inbounds float* %tmp14177, i64 1
+  %tmp14179 = getelementptr inbounds float* %tmp14178, i64 1
+  %tmp14180 = getelementptr inbounds float* %tmp14179, i64 1
+  %tmp14181 = getelementptr inbounds float* %tmp14180, i64 1
+  %tmp14182 = getelementptr inbounds float* %tmp14181, i64 1
+  %tmp14183 = getelementptr inbounds float* %tmp14182, i64 1
+  %tmp14184 = getelementptr inbounds float* %tmp14183, i64 1
+  %tmp14185 = getelementptr inbounds float* %tmp14184, i64 1
+  %tmp14186 = getelementptr inbounds float* %tmp14185, i64 1
+  %tmp14187 = getelementptr inbounds float* %tmp14186, i64 1
+  %tmp14188 = getelementptr inbounds float* %tmp14187, i64 1
+  %tmp14189 = getelementptr inbounds float* %tmp14188, i64 1
+  %tmp14190 = getelementptr inbounds float* %tmp14189, i64 1
+  %tmp14191 = getelementptr inbounds float* %tmp14190, i64 1
+  %tmp14192 = getelementptr inbounds float* %tmp14191, i64 1
+  %tmp14193 = getelementptr inbounds float* %tmp14192, i64 1
+  %tmp14194 = getelementptr inbounds float* %tmp14193, i64 1
+  %tmp14195 = getelementptr inbounds float* %tmp14194, i64 1
+  %tmp14196 = getelementptr inbounds float* %tmp14195, i64 1
+  %tmp14197 = getelementptr inbounds float* %tmp14196, i64 1
+  %tmp14198 = getelementptr inbounds float* %tmp14197, i64 1
+  %tmp14199 = getelementptr inbounds float* %tmp14198, i64 1
+  %tmp14200 = getelementptr inbounds float* %tmp14199, i64 1
+  %tmp14201 = getelementptr inbounds float* %tmp14200, i64 1
+  %tmp14202 = getelementptr inbounds float* %tmp14201, i64 1
+  %tmp14203 = getelementptr inbounds float* %tmp14202, i64 1
+  %tmp14204 = getelementptr inbounds float* %tmp14203, i64 1
+  %tmp14205 = getelementptr inbounds float* %tmp14204, i64 1
+  %tmp14206 = getelementptr inbounds float* %tmp14205, i64 1
+  %tmp14207 = getelementptr inbounds float* %tmp14206, i64 1
+  %tmp14208 = getelementptr inbounds float* %tmp14207, i64 1
+  %tmp14209 = getelementptr inbounds float* %tmp14208, i64 1
+  %tmp14210 = getelementptr inbounds float* %tmp14209, i64 1
+  %tmp14211 = getelementptr inbounds float* %tmp14210, i64 1
+  %tmp14212 = getelementptr inbounds float* %tmp14211, i64 1
+  %tmp14213 = getelementptr inbounds float* %tmp14212, i64 1
+  %tmp14214 = getelementptr inbounds float* %tmp14213, i64 1
+  %tmp14215 = getelementptr inbounds float* %tmp14214, i64 1
+  %tmp14216 = getelementptr inbounds float* %tmp14215, i64 1
+  %tmp14217 = getelementptr inbounds float* %tmp14216, i64 1
+  %tmp14218 = getelementptr inbounds float* %tmp14217, i64 1
+  %tmp14219 = getelementptr inbounds float* %tmp14218, i64 1
+  %tmp14220 = getelementptr inbounds float* %tmp14219, i64 1
+  %tmp14221 = getelementptr inbounds float* %tmp14220, i64 1
+  %tmp14222 = getelementptr inbounds float* %tmp14221, i64 1
+  %tmp14223 = getelementptr inbounds float* %tmp14222, i64 1
+  %tmp14224 = getelementptr inbounds float* %tmp14223, i64 1
+  %tmp14225 = getelementptr inbounds float* %tmp14224, i64 1
+  %tmp14226 = getelementptr inbounds float* %tmp14225, i64 1
+  %tmp14227 = getelementptr inbounds float* %tmp14226, i64 1
+  %tmp14228 = getelementptr inbounds float* %tmp14227, i64 1
+  %tmp14229 = getelementptr inbounds float* %tmp14228, i64 1
+  %tmp14230 = getelementptr inbounds float* %tmp14229, i64 1
+  %tmp14231 = getelementptr inbounds float* %tmp14230, i64 1
+  %tmp14232 = getelementptr inbounds float* %tmp14231, i64 1
+  %tmp14233 = getelementptr inbounds float* %tmp14232, i64 1
+  %tmp14234 = getelementptr inbounds float* %tmp14233, i64 1
+  %tmp14235 = getelementptr inbounds float* %tmp14234, i64 1
+  %tmp14236 = getelementptr inbounds float* %tmp14235, i64 1
+  %tmp14237 = getelementptr inbounds float* %tmp14236, i64 1
+  %tmp14238 = getelementptr inbounds float* %tmp14237, i64 1
+  %tmp14239 = getelementptr inbounds float* %tmp14238, i64 1
+  %tmp14240 = getelementptr inbounds float* %tmp14239, i64 1
+  %tmp14241 = getelementptr inbounds float* %tmp14240, i64 1
+  %tmp14242 = getelementptr inbounds float* %tmp14241, i64 1
+  %tmp14243 = getelementptr inbounds float* %tmp14242, i64 1
+  %tmp14244 = getelementptr inbounds float* %tmp14243, i64 1
+  %tmp14245 = getelementptr inbounds float* %tmp14244, i64 1
+  %tmp14246 = getelementptr inbounds float* %tmp14245, i64 1
+  %tmp14247 = getelementptr inbounds float* %tmp14246, i64 1
+  %tmp14248 = getelementptr inbounds float* %tmp14247, i64 1
+  %tmp14249 = getelementptr inbounds float* %tmp14248, i64 1
+  %tmp14250 = getelementptr inbounds float* %tmp14249, i64 1
+  %tmp14251 = getelementptr inbounds float* %tmp14250, i64 1
+  %tmp14252 = getelementptr inbounds float* %tmp14251, i64 1
+  %tmp14253 = getelementptr inbounds float* %tmp14252, i64 1
+  %tmp14254 = getelementptr inbounds float* %tmp14253, i64 1
+  %tmp14255 = getelementptr inbounds float* %tmp14254, i64 1
+  %tmp14256 = getelementptr inbounds float* %tmp14255, i64 1
+  %tmp14257 = getelementptr inbounds float* %tmp14256, i64 1
+  %tmp14258 = getelementptr inbounds float* %tmp14257, i64 1
+  %tmp14259 = getelementptr inbounds float* %tmp14258, i64 1
+  %tmp14260 = getelementptr inbounds float* %tmp14259, i64 1
+  %tmp14261 = getelementptr inbounds float* %tmp14260, i64 1
+  %tmp14262 = getelementptr inbounds float* %tmp14261, i64 1
+  %tmp14263 = getelementptr inbounds float* %tmp14262, i64 1
+  %tmp14264 = getelementptr inbounds float* %tmp14263, i64 1
+  %tmp14265 = getelementptr inbounds float* %tmp14264, i64 1
+  %tmp14266 = getelementptr inbounds float* %tmp14265, i64 1
+  %tmp14267 = getelementptr inbounds float* %tmp14266, i64 1
+  %tmp14268 = getelementptr inbounds float* %tmp14267, i64 1
+  %tmp14269 = getelementptr inbounds float* %tmp14268, i64 1
+  %tmp14270 = getelementptr inbounds float* %tmp14269, i64 1
+  %tmp14271 = getelementptr inbounds float* %tmp14270, i64 1
+  %tmp14272 = getelementptr inbounds float* %tmp14271, i64 1
+  %tmp14273 = getelementptr inbounds float* %tmp14272, i64 1
+  %tmp14274 = getelementptr inbounds float* %tmp14273, i64 1
+  %tmp14275 = getelementptr inbounds float* %tmp14274, i64 1
+  %tmp14276 = getelementptr inbounds float* %tmp14275, i64 1
+  %tmp14277 = getelementptr inbounds float* %tmp14276, i64 1
+  %tmp14278 = getelementptr inbounds float* %tmp14277, i64 1
+  %tmp14279 = getelementptr inbounds float* %tmp14278, i64 1
+  %tmp14280 = getelementptr inbounds float* %tmp14279, i64 1
+  %tmp14281 = getelementptr inbounds float* %tmp14280, i64 1
+  %tmp14282 = getelementptr inbounds float* %tmp14281, i64 1
+  %tmp14283 = getelementptr inbounds float* %tmp14282, i64 1
+  %tmp14284 = getelementptr inbounds float* %tmp14283, i64 1
+  %tmp14285 = getelementptr inbounds float* %tmp14284, i64 1
+  %tmp14286 = getelementptr inbounds float* %tmp14285, i64 1
+  %tmp14287 = getelementptr inbounds float* %tmp14286, i64 1
+  %tmp14288 = getelementptr inbounds float* %tmp14287, i64 1
+  %tmp14289 = getelementptr inbounds float* %tmp14288, i64 1
+  %tmp14290 = getelementptr inbounds float* %tmp14289, i64 1
+  %tmp14291 = getelementptr inbounds float* %tmp14290, i64 1
+  %tmp14292 = getelementptr inbounds float* %tmp14291, i64 1
+  %tmp14293 = getelementptr inbounds float* %tmp14292, i64 1
+  %tmp14294 = getelementptr inbounds float* %tmp14293, i64 1
+  %tmp14295 = getelementptr inbounds float* %tmp14294, i64 1
+  %tmp14296 = getelementptr inbounds float* %tmp14295, i64 1
+  %tmp14297 = getelementptr inbounds float* %tmp14296, i64 1
+  %tmp14298 = getelementptr inbounds float* %tmp14297, i64 1
+  %tmp14299 = getelementptr inbounds float* %tmp14298, i64 1
+  %tmp14300 = getelementptr inbounds float* %tmp14299, i64 1
+  %tmp14301 = getelementptr inbounds float* %tmp14300, i64 1
+  %tmp14302 = getelementptr inbounds float* %tmp14301, i64 1
+  %tmp14303 = getelementptr inbounds float* %tmp14302, i64 1
+  %tmp14304 = getelementptr inbounds float* %tmp14303, i64 1
+  %tmp14305 = getelementptr inbounds float* %tmp14304, i64 1
+  %tmp14306 = getelementptr inbounds float* %tmp14305, i64 1
+  %tmp14307 = getelementptr inbounds float* %tmp14306, i64 1
+  %tmp14308 = getelementptr inbounds float* %tmp14307, i64 1
+  %tmp14309 = getelementptr inbounds float* %tmp14308, i64 1
+  %tmp14310 = getelementptr inbounds float* %tmp14309, i64 1
+  %tmp14311 = getelementptr inbounds float* %tmp14310, i64 1
+  %tmp14312 = getelementptr inbounds float* %tmp14311, i64 1
+  %tmp14313 = getelementptr inbounds float* %tmp14312, i64 1
+  %tmp14314 = getelementptr inbounds float* %tmp14313, i64 1
+  %tmp14315 = getelementptr inbounds float* %tmp14314, i64 1
+  %tmp14316 = getelementptr inbounds float* %tmp14315, i64 1
+  %tmp14317 = getelementptr inbounds float* %tmp14316, i64 1
+  %tmp14318 = getelementptr inbounds float* %tmp14317, i64 1
+  %tmp14319 = getelementptr inbounds float* %tmp14318, i64 1
+  %tmp14320 = getelementptr inbounds float* %tmp14319, i64 1
+  %tmp14321 = getelementptr inbounds float* %tmp14320, i64 1
+  %tmp14322 = getelementptr inbounds float* %tmp14321, i64 1
+  %tmp14323 = getelementptr inbounds float* %tmp14322, i64 1
+  %tmp14324 = getelementptr inbounds float* %tmp14323, i64 1
+  %tmp14325 = getelementptr inbounds float* %tmp14324, i64 1
+  %tmp14326 = getelementptr inbounds float* %tmp14325, i64 1
+  %tmp14327 = getelementptr inbounds float* %tmp14326, i64 1
+  %tmp14328 = getelementptr inbounds float* %tmp14327, i64 1
+  %tmp14329 = getelementptr inbounds float* %tmp14328, i64 1
+  %tmp14330 = getelementptr inbounds float* %tmp14329, i64 1
+  %tmp14331 = getelementptr inbounds float* %tmp14330, i64 1
+  %tmp14332 = getelementptr inbounds float* %tmp14331, i64 1
+  %tmp14333 = getelementptr inbounds float* %tmp14332, i64 1
+  %tmp14334 = getelementptr inbounds float* %tmp14333, i64 1
+  %tmp14335 = getelementptr inbounds float* %tmp14334, i64 1
+  %tmp14336 = getelementptr inbounds float* %tmp14335, i64 1
+  %tmp14337 = getelementptr inbounds float* %tmp14336, i64 1
+  %tmp14338 = getelementptr inbounds float* %tmp14337, i64 1
+  %tmp14339 = getelementptr inbounds float* %tmp14338, i64 1
+  %tmp14340 = getelementptr inbounds float* %tmp14339, i64 1
+  %tmp14341 = getelementptr inbounds float* %tmp14340, i64 1
+  %tmp14342 = getelementptr inbounds float* %tmp14341, i64 1
+  %tmp14343 = getelementptr inbounds float* %tmp14342, i64 1
+  %tmp14344 = getelementptr inbounds float* %tmp14343, i64 1
+  %tmp14345 = getelementptr inbounds float* %tmp14344, i64 1
+  %tmp14346 = getelementptr inbounds float* %tmp14345, i64 1
+  %tmp14347 = getelementptr inbounds float* %tmp14346, i64 1
+  %tmp14348 = getelementptr inbounds float* %tmp14347, i64 1
+  %tmp14349 = getelementptr inbounds float* %tmp14348, i64 1
+  %tmp14350 = getelementptr inbounds float* %tmp14349, i64 1
+  %tmp14351 = getelementptr inbounds float* %tmp14350, i64 1
+  %tmp14352 = getelementptr inbounds float* %tmp14351, i64 1
+  %tmp14353 = getelementptr inbounds float* %tmp14352, i64 1
+  %tmp14354 = getelementptr inbounds float* %tmp14353, i64 1
+  %tmp14355 = getelementptr inbounds float* %tmp14354, i64 1
+  %tmp14356 = getelementptr inbounds float* %tmp14355, i64 1
+  %tmp14357 = getelementptr inbounds float* %tmp14356, i64 1
+  %tmp14358 = getelementptr inbounds float* %tmp14357, i64 1
+  %tmp14359 = getelementptr inbounds float* %tmp14358, i64 1
+  %tmp14360 = getelementptr inbounds float* %tmp14359, i64 1
+  %tmp14361 = getelementptr inbounds float* %tmp14360, i64 1
+  %tmp14362 = getelementptr inbounds float* %tmp14361, i64 1
+  %tmp14363 = getelementptr inbounds float* %tmp14362, i64 1
+  %tmp14364 = getelementptr inbounds float* %tmp14363, i64 1
+  %tmp14365 = getelementptr inbounds float* %tmp14364, i64 1
+  %tmp14366 = getelementptr inbounds float* %tmp14365, i64 1
+  %tmp14367 = getelementptr inbounds float* %tmp14366, i64 1
+  %tmp14368 = getelementptr inbounds float* %tmp14367, i64 1
+  %tmp14369 = getelementptr inbounds float* %tmp14368, i64 1
+  %tmp14370 = getelementptr inbounds float* %tmp14369, i64 1
+  %tmp14371 = getelementptr inbounds float* %tmp14370, i64 1
+  %tmp14372 = getelementptr inbounds float* %tmp14371, i64 1
+  %tmp14373 = getelementptr inbounds float* %tmp14372, i64 1
+  %tmp14374 = getelementptr inbounds float* %tmp14373, i64 1
+  %tmp14375 = getelementptr inbounds float* %tmp14374, i64 1
+  %tmp14376 = getelementptr inbounds float* %tmp14375, i64 1
+  %tmp14377 = getelementptr inbounds float* %tmp14376, i64 1
+  %tmp14378 = getelementptr inbounds float* %tmp14377, i64 1
+  %tmp14379 = getelementptr inbounds float* %tmp14378, i64 1
+  %tmp14380 = getelementptr inbounds float* %tmp14379, i64 1
+  %tmp14381 = getelementptr inbounds float* %tmp14380, i64 1
+  %tmp14382 = getelementptr inbounds float* %tmp14381, i64 1
+  %tmp14383 = getelementptr inbounds float* %tmp14382, i64 1
+  %tmp14384 = getelementptr inbounds float* %tmp14383, i64 1
+  %tmp14385 = getelementptr inbounds float* %tmp14384, i64 1
+  %tmp14386 = getelementptr inbounds float* %tmp14385, i64 1
+  %tmp14387 = getelementptr inbounds float* %tmp14386, i64 1
+  %tmp14388 = getelementptr inbounds float* %tmp14387, i64 1
+  %tmp14389 = getelementptr inbounds float* %tmp14388, i64 1
+  %tmp14390 = getelementptr inbounds float* %tmp14389, i64 1
+  %tmp14391 = getelementptr inbounds float* %tmp14390, i64 1
+  %tmp14392 = getelementptr inbounds float* %tmp14391, i64 1
+  %tmp14393 = getelementptr inbounds float* %tmp14392, i64 1
+  %tmp14394 = getelementptr inbounds float* %tmp14393, i64 1
+  %tmp14395 = getelementptr inbounds float* %tmp14394, i64 1
+  %tmp14396 = getelementptr inbounds float* %tmp14395, i64 1
+  %tmp14397 = getelementptr inbounds float* %tmp14396, i64 1
+  %tmp14398 = getelementptr inbounds float* %tmp14397, i64 1
+  %tmp14399 = getelementptr inbounds float* %tmp14398, i64 1
+  %tmp14400 = getelementptr inbounds float* %tmp14399, i64 1
+  %tmp14401 = getelementptr inbounds float* %tmp14400, i64 1
+  %tmp14402 = getelementptr inbounds float* %tmp14401, i64 1
+  %tmp14403 = getelementptr inbounds float* %tmp14402, i64 1
+  %tmp14404 = getelementptr inbounds float* %tmp14403, i64 1
+  %tmp14405 = getelementptr inbounds float* %tmp14404, i64 1
+  %tmp14406 = getelementptr inbounds float* %tmp14405, i64 1
+  %tmp14407 = getelementptr inbounds float* %tmp14406, i64 1
+  %tmp14408 = getelementptr inbounds float* %tmp14407, i64 1
+  %tmp14409 = getelementptr inbounds float* %tmp14408, i64 1
+  %tmp14410 = getelementptr inbounds float* %tmp14409, i64 1
+  %tmp14411 = getelementptr inbounds float* %tmp14410, i64 1
+  %tmp14412 = getelementptr inbounds float* %tmp14411, i64 1
+  %tmp14413 = getelementptr inbounds float* %tmp14412, i64 1
+  %tmp14414 = getelementptr inbounds float* %tmp14413, i64 1
+  %tmp14415 = getelementptr inbounds float* %tmp14414, i64 1
+  %tmp14416 = getelementptr inbounds float* %tmp14415, i64 1
+  %tmp14417 = getelementptr inbounds float* %tmp14416, i64 1
+  %tmp14418 = getelementptr inbounds float* %tmp14417, i64 1
+  %tmp14419 = getelementptr inbounds float* %tmp14418, i64 1
+  %tmp14420 = getelementptr inbounds float* %tmp14419, i64 1
+  %tmp14421 = getelementptr inbounds float* %tmp14420, i64 1
+  %tmp14422 = getelementptr inbounds float* %tmp14421, i64 1
+  %tmp14423 = getelementptr inbounds float* %tmp14422, i64 1
+  %tmp14424 = getelementptr inbounds float* %tmp14423, i64 1
+  %tmp14425 = getelementptr inbounds float* %tmp14424, i64 1
+  %tmp14426 = getelementptr inbounds float* %tmp14425, i64 1
+  %tmp14427 = getelementptr inbounds float* %tmp14426, i64 1
+  %tmp14428 = getelementptr inbounds float* %tmp14427, i64 1
+  %tmp14429 = getelementptr inbounds float* %tmp14428, i64 1
+  %tmp14430 = getelementptr inbounds float* %tmp14429, i64 1
+  %tmp14431 = getelementptr inbounds float* %tmp14430, i64 1
+  %tmp14432 = getelementptr inbounds float* %tmp14431, i64 1
+  %tmp14433 = getelementptr inbounds float* %tmp14432, i64 1
+  %tmp14434 = getelementptr inbounds float* %tmp14433, i64 1
+  %tmp14435 = getelementptr inbounds float* %tmp14434, i64 1
+  %tmp14436 = getelementptr inbounds float* %tmp14435, i64 1
+  %tmp14437 = getelementptr inbounds float* %tmp14436, i64 1
+  %tmp14438 = getelementptr inbounds float* %tmp14437, i64 1
+  %tmp14439 = getelementptr inbounds float* %tmp14438, i64 1
+  %tmp14440 = getelementptr inbounds float* %tmp14439, i64 1
+  %tmp14441 = getelementptr inbounds float* %tmp14440, i64 1
+  %tmp14442 = getelementptr inbounds float* %tmp14441, i64 1
+  %tmp14443 = getelementptr inbounds float* %tmp14442, i64 1
+  %tmp14444 = getelementptr inbounds float* %tmp14443, i64 1
+  %tmp14445 = getelementptr inbounds float* %tmp14444, i64 1
+  %tmp14446 = getelementptr inbounds float* %tmp14445, i64 1
+  %tmp14447 = getelementptr inbounds float* %tmp14446, i64 1
+  %tmp14448 = getelementptr inbounds float* %tmp14447, i64 1
+  %tmp14449 = getelementptr inbounds float* %tmp14448, i64 1
+  %tmp14450 = getelementptr inbounds float* %tmp14449, i64 1
+  %tmp14451 = getelementptr inbounds float* %tmp14450, i64 1
+  %tmp14452 = getelementptr inbounds float* %tmp14451, i64 1
+  %tmp14453 = getelementptr inbounds float* %tmp14452, i64 1
+  %tmp14454 = getelementptr inbounds float* %tmp14453, i64 1
+  %tmp14455 = getelementptr inbounds float* %tmp14454, i64 1
+  %tmp14456 = getelementptr inbounds float* %tmp14455, i64 1
+  %tmp14457 = getelementptr inbounds float* %tmp14456, i64 1
+  %tmp14458 = getelementptr inbounds float* %tmp14457, i64 1
+  %tmp14459 = getelementptr inbounds float* %tmp14458, i64 1
+  %tmp14460 = getelementptr inbounds float* %tmp14459, i64 1
+  %tmp14461 = getelementptr inbounds float* %tmp14460, i64 1
+  %tmp14462 = getelementptr inbounds float* %tmp14461, i64 1
+  %tmp14463 = getelementptr inbounds float* %tmp14462, i64 1
+  %tmp14464 = getelementptr inbounds float* %tmp14463, i64 1
+  %tmp14465 = getelementptr inbounds float* %tmp14464, i64 1
+  %tmp14466 = getelementptr inbounds float* %tmp14465, i64 1
+  %tmp14467 = getelementptr inbounds float* %tmp14466, i64 1
+  %tmp14468 = getelementptr inbounds float* %tmp14467, i64 1
+  %tmp14469 = getelementptr inbounds float* %tmp14468, i64 1
+  %tmp14470 = getelementptr inbounds float* %tmp14469, i64 1
+  %tmp14471 = getelementptr inbounds float* %tmp14470, i64 1
+  %tmp14472 = getelementptr inbounds float* %tmp14471, i64 1
+  %tmp14473 = getelementptr inbounds float* %tmp14472, i64 1
+  %tmp14474 = getelementptr inbounds float* %tmp14473, i64 1
+  %tmp14475 = getelementptr inbounds float* %tmp14474, i64 1
+  %tmp14476 = getelementptr inbounds float* %tmp14475, i64 1
+  %tmp14477 = getelementptr inbounds float* %tmp14476, i64 1
+  %tmp14478 = getelementptr inbounds float* %tmp14477, i64 1
+  %tmp14479 = getelementptr inbounds float* %tmp14478, i64 1
+  %tmp14480 = getelementptr inbounds float* %tmp14479, i64 1
+  %tmp14481 = getelementptr inbounds float* %tmp14480, i64 1
+  %tmp14482 = getelementptr inbounds float* %tmp14481, i64 1
+  %tmp14483 = getelementptr inbounds float* %tmp14482, i64 1
+  %tmp14484 = getelementptr inbounds float* %tmp14483, i64 1
+  %tmp14485 = getelementptr inbounds float* %tmp14484, i64 1
+  %tmp14486 = getelementptr inbounds float* %tmp14485, i64 1
+  %tmp14487 = getelementptr inbounds float* %tmp14486, i64 1
+  %tmp14488 = getelementptr inbounds float* %tmp14487, i64 1
+  %tmp14489 = getelementptr inbounds float* %tmp14488, i64 1
+  %tmp14490 = getelementptr inbounds float* %tmp14489, i64 1
+  %tmp14491 = getelementptr inbounds float* %tmp14490, i64 1
+  %tmp14492 = getelementptr inbounds float* %tmp14491, i64 1
+  %tmp14493 = getelementptr inbounds float* %tmp14492, i64 1
+  %tmp14494 = getelementptr inbounds float* %tmp14493, i64 1
+  %tmp14495 = getelementptr inbounds float* %tmp14494, i64 1
+  %tmp14496 = getelementptr inbounds float* %tmp14495, i64 1
+  %tmp14497 = getelementptr inbounds float* %tmp14496, i64 1
+  %tmp14498 = getelementptr inbounds float* %tmp14497, i64 1
+  %tmp14499 = getelementptr inbounds float* %tmp14498, i64 1
+  %tmp14500 = getelementptr inbounds float* %tmp14499, i64 1
+  %tmp14501 = getelementptr inbounds float* %tmp14500, i64 1
+  %tmp14502 = getelementptr inbounds float* %tmp14501, i64 1
+  %tmp14503 = getelementptr inbounds float* %tmp14502, i64 1
+  %tmp14504 = getelementptr inbounds float* %tmp14503, i64 1
+  %tmp14505 = getelementptr inbounds float* %tmp14504, i64 1
+  %tmp14506 = getelementptr inbounds float* %tmp14505, i64 1
+  %tmp14507 = getelementptr inbounds float* %tmp14506, i64 1
+  %tmp14508 = getelementptr inbounds float* %tmp14507, i64 1
+  %tmp14509 = getelementptr inbounds float* %tmp14508, i64 1
+  %tmp14510 = getelementptr inbounds float* %tmp14509, i64 1
+  %tmp14511 = getelementptr inbounds float* %tmp14510, i64 1
+  %tmp14512 = getelementptr inbounds float* %tmp14511, i64 1
+  %tmp14513 = getelementptr inbounds float* %tmp14512, i64 1
+  %tmp14514 = getelementptr inbounds float* %tmp14513, i64 1
+  %tmp14515 = getelementptr inbounds float* %tmp14514, i64 1
+  %tmp14516 = getelementptr inbounds float* %tmp14515, i64 1
+  %tmp14517 = getelementptr inbounds float* %tmp14516, i64 1
+  %tmp14518 = getelementptr inbounds float* %tmp14517, i64 1
+  %tmp14519 = getelementptr inbounds float* %tmp14518, i64 1
+  %tmp14520 = getelementptr inbounds float* %tmp14519, i64 1
+  %tmp14521 = getelementptr inbounds float* %tmp14520, i64 1
+  %tmp14522 = getelementptr inbounds float* %tmp14521, i64 1
+  %tmp14523 = getelementptr inbounds float* %tmp14522, i64 1
+  %tmp14524 = getelementptr inbounds float* %tmp14523, i64 1
+  %tmp14525 = getelementptr inbounds float* %tmp14524, i64 1
+  %tmp14526 = getelementptr inbounds float* %tmp14525, i64 1
+  %tmp14527 = getelementptr inbounds float* %tmp14526, i64 1
+  %tmp14528 = getelementptr inbounds float* %tmp14527, i64 1
+  %tmp14529 = getelementptr inbounds float* %tmp14528, i64 1
+  %tmp14530 = getelementptr inbounds float* %tmp14529, i64 1
+  %tmp14531 = getelementptr inbounds float* %tmp14530, i64 1
+  %tmp14532 = getelementptr inbounds float* %tmp14531, i64 1
+  %tmp14533 = getelementptr inbounds float* %tmp14532, i64 1
+  %tmp14534 = getelementptr inbounds float* %tmp14533, i64 1
+  %tmp14535 = getelementptr inbounds float* %tmp14534, i64 1
+  %tmp14536 = getelementptr inbounds float* %tmp14535, i64 1
+  %tmp14537 = getelementptr inbounds float* %tmp14536, i64 1
+  %tmp14538 = getelementptr inbounds float* %tmp14537, i64 1
+  %tmp14539 = getelementptr inbounds float* %tmp14538, i64 1
+  %tmp14540 = getelementptr inbounds float* %tmp14539, i64 1
+  %tmp14541 = getelementptr inbounds float* %tmp14540, i64 1
+  %tmp14542 = getelementptr inbounds float* %tmp14541, i64 1
+  %tmp14543 = getelementptr inbounds float* %tmp14542, i64 1
+  %tmp14544 = getelementptr inbounds float* %tmp14543, i64 1
+  %tmp14545 = getelementptr inbounds float* %tmp14544, i64 1
+  %tmp14546 = getelementptr inbounds float* %tmp14545, i64 1
+  %tmp14547 = getelementptr inbounds float* %tmp14546, i64 1
+  %tmp14548 = getelementptr inbounds float* %tmp14547, i64 1
+  %tmp14549 = getelementptr inbounds float* %tmp14548, i64 1
+  %tmp14550 = getelementptr inbounds float* %tmp14549, i64 1
+  %tmp14551 = getelementptr inbounds float* %tmp14550, i64 1
+  %tmp14552 = getelementptr inbounds float* %tmp14551, i64 1
+  %tmp14553 = getelementptr inbounds float* %tmp14552, i64 1
+  %tmp14554 = getelementptr inbounds float* %tmp14553, i64 1
+  %tmp14555 = getelementptr inbounds float* %tmp14554, i64 1
+  %tmp14556 = getelementptr inbounds float* %tmp14555, i64 1
+  %tmp14557 = getelementptr inbounds float* %tmp14556, i64 1
+  %tmp14558 = getelementptr inbounds float* %tmp14557, i64 1
+  %tmp14559 = getelementptr inbounds float* %tmp14558, i64 1
+  %tmp14560 = getelementptr inbounds float* %tmp14559, i64 1
+  %tmp14561 = getelementptr inbounds float* %tmp14560, i64 1
+  %tmp14562 = getelementptr inbounds float* %tmp14561, i64 1
+  %tmp14563 = getelementptr inbounds float* %tmp14562, i64 1
+  %tmp14564 = getelementptr inbounds float* %tmp14563, i64 1
+  %tmp14565 = getelementptr inbounds float* %tmp14564, i64 1
+  %tmp14566 = getelementptr inbounds float* %tmp14565, i64 1
+  %tmp14567 = getelementptr inbounds float* %tmp14566, i64 1
+  %tmp14568 = getelementptr inbounds float* %tmp14567, i64 1
+  %tmp14569 = getelementptr inbounds float* %tmp14568, i64 1
+  %tmp14570 = getelementptr inbounds float* %tmp14569, i64 1
+  %tmp14571 = getelementptr inbounds float* %tmp14570, i64 1
+  %tmp14572 = getelementptr inbounds float* %tmp14571, i64 1
+  %tmp14573 = getelementptr inbounds float* %tmp14572, i64 1
+  %tmp14574 = getelementptr inbounds float* %tmp14573, i64 1
+  %tmp14575 = getelementptr inbounds float* %tmp14574, i64 1
+  %tmp14576 = getelementptr inbounds float* %tmp14575, i64 1
+  %tmp14577 = getelementptr inbounds float* %tmp14576, i64 1
+  %tmp14578 = getelementptr inbounds float* %tmp14577, i64 1
+  %tmp14579 = getelementptr inbounds float* %tmp14578, i64 1
+  %tmp14580 = getelementptr inbounds float* %tmp14579, i64 1
+  %tmp14581 = getelementptr inbounds float* %tmp14580, i64 1
+  %tmp14582 = getelementptr inbounds float* %tmp14581, i64 1
+  %tmp14583 = getelementptr inbounds float* %tmp14582, i64 1
+  %tmp14584 = getelementptr inbounds float* %tmp14583, i64 1
+  %tmp14585 = getelementptr inbounds float* %tmp14584, i64 1
+  %tmp14586 = getelementptr inbounds float* %tmp14585, i64 1
+  %tmp14587 = getelementptr inbounds float* %tmp14586, i64 1
+  %tmp14588 = getelementptr inbounds float* %tmp14587, i64 1
+  %tmp14589 = getelementptr inbounds float* %tmp14588, i64 1
+  %tmp14590 = getelementptr inbounds float* %tmp14589, i64 1
+  %tmp14591 = getelementptr inbounds float* %tmp14590, i64 1
+  %tmp14592 = getelementptr inbounds float* %tmp14591, i64 1
+  %tmp14593 = getelementptr inbounds float* %tmp14592, i64 1
+  %tmp14594 = getelementptr inbounds float* %tmp14593, i64 1
+  %tmp14595 = getelementptr inbounds float* %tmp14594, i64 1
+  %tmp14596 = getelementptr inbounds float* %tmp14595, i64 1
+  %tmp14597 = getelementptr inbounds float* %tmp14596, i64 1
+  %tmp14598 = getelementptr inbounds float* %tmp14597, i64 1
+  %tmp14599 = getelementptr inbounds float* %tmp14598, i64 1
+  %tmp14600 = getelementptr inbounds float* %tmp14599, i64 1
+  %tmp14601 = getelementptr inbounds float* %tmp14600, i64 1
+  %tmp14602 = getelementptr inbounds float* %tmp14601, i64 1
+  %tmp14603 = getelementptr inbounds float* %tmp14602, i64 1
+  %tmp14604 = getelementptr inbounds float* %tmp14603, i64 1
+  %tmp14605 = getelementptr inbounds float* %tmp14604, i64 1
+  %tmp14606 = getelementptr inbounds float* %tmp14605, i64 1
+  %tmp14607 = getelementptr inbounds float* %tmp14606, i64 1
+  %tmp14608 = getelementptr inbounds float* %tmp14607, i64 1
+  %tmp14609 = getelementptr inbounds float* %tmp14608, i64 1
+  %tmp14610 = getelementptr inbounds float* %tmp14609, i64 1
+  %tmp14611 = getelementptr inbounds float* %tmp14610, i64 1
+  %tmp14612 = getelementptr inbounds float* %tmp14611, i64 1
+  %tmp14613 = getelementptr inbounds float* %tmp14612, i64 1
+  %tmp14614 = getelementptr inbounds float* %tmp14613, i64 1
+  %tmp14615 = getelementptr inbounds float* %tmp14614, i64 1
+  %tmp14616 = getelementptr inbounds float* %tmp14615, i64 1
+  %tmp14617 = getelementptr inbounds float* %tmp14616, i64 1
+  %tmp14618 = getelementptr inbounds float* %tmp14617, i64 1
+  %tmp14619 = getelementptr inbounds float* %tmp14618, i64 1
+  %tmp14620 = getelementptr inbounds float* %tmp14619, i64 1
+  %tmp14621 = getelementptr inbounds float* %tmp14620, i64 1
+  %tmp14622 = getelementptr inbounds float* %tmp14621, i64 1
+  %tmp14623 = getelementptr inbounds float* %tmp14622, i64 1
+  %tmp14624 = getelementptr inbounds float* %tmp14623, i64 1
+  %tmp14625 = getelementptr inbounds float* %tmp14624, i64 1
+  %tmp14626 = getelementptr inbounds float* %tmp14625, i64 1
+  %tmp14627 = getelementptr inbounds float* %tmp14626, i64 1
+  %tmp14628 = getelementptr inbounds float* %tmp14627, i64 1
+  %tmp14629 = getelementptr inbounds float* %tmp14628, i64 1
+  %tmp14630 = getelementptr inbounds float* %tmp14629, i64 1
+  %tmp14631 = getelementptr inbounds float* %tmp14630, i64 1
+  %tmp14632 = getelementptr inbounds float* %tmp14631, i64 1
+  %tmp14633 = getelementptr inbounds float* %tmp14632, i64 1
+  %tmp14634 = getelementptr inbounds float* %tmp14633, i64 1
+  %tmp14635 = getelementptr inbounds float* %tmp14634, i64 1
+  %tmp14636 = getelementptr inbounds float* %tmp14635, i64 1
+  %tmp14637 = getelementptr inbounds float* %tmp14636, i64 1
+  %tmp14638 = getelementptr inbounds float* %tmp14637, i64 1
+  %tmp14639 = getelementptr inbounds float* %tmp14638, i64 1
+  %tmp14640 = getelementptr inbounds float* %tmp14639, i64 1
+  %tmp14641 = getelementptr inbounds float* %tmp14640, i64 1
+  %tmp14642 = getelementptr inbounds float* %tmp14641, i64 1
+  %tmp14643 = getelementptr inbounds float* %tmp14642, i64 1
+  %tmp14644 = getelementptr inbounds float* %tmp14643, i64 1
+  %tmp14645 = getelementptr inbounds float* %tmp14644, i64 1
+  %tmp14646 = getelementptr inbounds float* %tmp14645, i64 1
+  %tmp14647 = getelementptr inbounds float* %tmp14646, i64 1
+  %tmp14648 = getelementptr inbounds float* %tmp14647, i64 1
+  %tmp14649 = getelementptr inbounds float* %tmp14648, i64 1
+  %tmp14650 = getelementptr inbounds float* %tmp14649, i64 1
+  %tmp14651 = getelementptr inbounds float* %tmp14650, i64 1
+  %tmp14652 = getelementptr inbounds float* %tmp14651, i64 1
+  %tmp14653 = getelementptr inbounds float* %tmp14652, i64 1
+  %tmp14654 = getelementptr inbounds float* %tmp14653, i64 1
+  %tmp14655 = getelementptr inbounds float* %tmp14654, i64 1
+  %tmp14656 = getelementptr inbounds float* %tmp14655, i64 1
+  %tmp14657 = getelementptr inbounds float* %tmp14656, i64 1
+  %tmp14658 = getelementptr inbounds float* %tmp14657, i64 1
+  %tmp14659 = getelementptr inbounds float* %tmp14658, i64 1
+  %tmp14660 = getelementptr inbounds float* %tmp14659, i64 1
+  %tmp14661 = getelementptr inbounds float* %tmp14660, i64 1
+  %tmp14662 = getelementptr inbounds float* %tmp14661, i64 1
+  %tmp14663 = getelementptr inbounds float* %tmp14662, i64 1
+  %tmp14664 = getelementptr inbounds float* %tmp14663, i64 1
+  %tmp14665 = getelementptr inbounds float* %tmp14664, i64 1
+  %tmp14666 = getelementptr inbounds float* %tmp14665, i64 1
+  %tmp14667 = getelementptr inbounds float* %tmp14666, i64 1
+  %tmp14668 = getelementptr inbounds float* %tmp14667, i64 1
+  %tmp14669 = getelementptr inbounds float* %tmp14668, i64 1
+  %tmp14670 = getelementptr inbounds float* %tmp14669, i64 1
+  %tmp14671 = getelementptr inbounds float* %tmp14670, i64 1
+  %tmp14672 = getelementptr inbounds float* %tmp14671, i64 1
+  %tmp14673 = getelementptr inbounds float* %tmp14672, i64 1
+  %tmp14674 = getelementptr inbounds float* %tmp14673, i64 1
+  %tmp14675 = getelementptr inbounds float* %tmp14674, i64 1
+  %tmp14676 = getelementptr inbounds float* %tmp14675, i64 1
+  %tmp14677 = getelementptr inbounds float* %tmp14676, i64 1
+  %tmp14678 = getelementptr inbounds float* %tmp14677, i64 1
+  %tmp14679 = getelementptr inbounds float* %tmp14678, i64 1
+  %tmp14680 = getelementptr inbounds float* %tmp14679, i64 1
+  %tmp14681 = getelementptr inbounds float* %tmp14680, i64 1
+  %tmp14682 = getelementptr inbounds float* %tmp14681, i64 1
+  %tmp14683 = getelementptr inbounds float* %tmp14682, i64 1
+  %tmp14684 = getelementptr inbounds float* %tmp14683, i64 1
+  %tmp14685 = getelementptr inbounds float* %tmp14684, i64 1
+  %tmp14686 = getelementptr inbounds float* %tmp14685, i64 1
+  %tmp14687 = getelementptr inbounds float* %tmp14686, i64 1
+  %tmp14688 = getelementptr inbounds float* %tmp14687, i64 1
+  %tmp14689 = getelementptr inbounds float* %tmp14688, i64 1
+  %tmp14690 = getelementptr inbounds float* %tmp14689, i64 1
+  %tmp14691 = getelementptr inbounds float* %tmp14690, i64 1
+  %tmp14692 = getelementptr inbounds float* %tmp14691, i64 1
+  %tmp14693 = getelementptr inbounds float* %tmp14692, i64 1
+  %tmp14694 = getelementptr inbounds float* %tmp14693, i64 1
+  %tmp14695 = getelementptr inbounds float* %tmp14694, i64 1
+  %tmp14696 = getelementptr inbounds float* %tmp14695, i64 1
+  %tmp14697 = getelementptr inbounds float* %tmp14696, i64 1
+  %tmp14698 = getelementptr inbounds float* %tmp14697, i64 1
+  %tmp14699 = getelementptr inbounds float* %tmp14698, i64 1
+  %tmp14700 = getelementptr inbounds float* %tmp14699, i64 1
+  %tmp14701 = getelementptr inbounds float* %tmp14700, i64 1
+  %tmp14702 = getelementptr inbounds float* %tmp14701, i64 1
+  %tmp14703 = getelementptr inbounds float* %tmp14702, i64 1
+  %tmp14704 = getelementptr inbounds float* %tmp14703, i64 1
+  %tmp14705 = getelementptr inbounds float* %tmp14704, i64 1
+  %tmp14706 = getelementptr inbounds float* %tmp14705, i64 1
+  %tmp14707 = getelementptr inbounds float* %tmp14706, i64 1
+  %tmp14708 = getelementptr inbounds float* %tmp14707, i64 1
+  %tmp14709 = getelementptr inbounds float* %tmp14708, i64 1
+  %tmp14710 = getelementptr inbounds float* %tmp14709, i64 1
+  %tmp14711 = getelementptr inbounds float* %tmp14710, i64 1
+  %tmp14712 = getelementptr inbounds float* %tmp14711, i64 1
+  %tmp14713 = getelementptr inbounds float* %tmp14712, i64 1
+  %tmp14714 = getelementptr inbounds float* %tmp14713, i64 1
+  %tmp14715 = getelementptr inbounds float* %tmp14714, i64 1
+  %tmp14716 = getelementptr inbounds float* %tmp14715, i64 1
+  %tmp14717 = getelementptr inbounds float* %tmp14716, i64 1
+  %tmp14718 = getelementptr inbounds float* %tmp14717, i64 1
+  %tmp14719 = getelementptr inbounds float* %tmp14718, i64 1
+  %tmp14720 = getelementptr inbounds float* %tmp14719, i64 1
+  %tmp14721 = getelementptr inbounds float* %tmp14720, i64 1
+  %tmp14722 = getelementptr inbounds float* %tmp14721, i64 1
+  %tmp14723 = getelementptr inbounds float* %tmp14722, i64 1
+  %tmp14724 = getelementptr inbounds float* %tmp14723, i64 1
+  %tmp14725 = getelementptr inbounds float* %tmp14724, i64 1
+  %tmp14726 = getelementptr inbounds float* %tmp14725, i64 1
+  %tmp14727 = getelementptr inbounds float* %tmp14726, i64 1
+  %tmp14728 = getelementptr inbounds float* %tmp14727, i64 1
+  %tmp14729 = getelementptr inbounds float* %tmp14728, i64 1
+  %tmp14730 = getelementptr inbounds float* %tmp14729, i64 1
+  %tmp14731 = getelementptr inbounds float* %tmp14730, i64 1
+  %tmp14732 = getelementptr inbounds float* %tmp14731, i64 1
+  %tmp14733 = getelementptr inbounds float* %tmp14732, i64 1
+  %tmp14734 = getelementptr inbounds float* %tmp14733, i64 1
+  %tmp14735 = getelementptr inbounds float* %tmp14734, i64 1
+  %tmp14736 = getelementptr inbounds float* %tmp14735, i64 1
+  %tmp14737 = getelementptr inbounds float* %tmp14736, i64 1
+  %tmp14738 = getelementptr inbounds float* %tmp14737, i64 1
+  %tmp14739 = getelementptr inbounds float* %tmp14738, i64 1
+  %tmp14740 = getelementptr inbounds float* %tmp14739, i64 1
+  %tmp14741 = getelementptr inbounds float* %tmp14740, i64 1
+  %tmp14742 = getelementptr inbounds float* %tmp14741, i64 1
+  %tmp14743 = getelementptr inbounds float* %tmp14742, i64 1
+  %tmp14744 = getelementptr inbounds float* %tmp14743, i64 1
+  %tmp14745 = getelementptr inbounds float* %tmp14744, i64 1
+  %tmp14746 = getelementptr inbounds float* %tmp14745, i64 1
+  %tmp14747 = getelementptr inbounds float* %tmp14746, i64 1
+  %tmp14748 = getelementptr inbounds float* %tmp14747, i64 1
+  %tmp14749 = getelementptr inbounds float* %tmp14748, i64 1
+  %tmp14750 = getelementptr inbounds float* %tmp14749, i64 1
+  %tmp14751 = getelementptr inbounds float* %tmp14750, i64 1
+  %tmp14752 = getelementptr inbounds float* %tmp14751, i64 1
+  %tmp14753 = getelementptr inbounds float* %tmp14752, i64 1
+  %tmp14754 = getelementptr inbounds float* %tmp14753, i64 1
+  %tmp14755 = getelementptr inbounds float* %tmp14754, i64 1
+  %tmp14756 = getelementptr inbounds float* %tmp14755, i64 1
+  %tmp14757 = getelementptr inbounds float* %tmp14756, i64 1
+  %tmp14758 = getelementptr inbounds float* %tmp14757, i64 1
+  %tmp14759 = getelementptr inbounds float* %tmp14758, i64 1
+  %tmp14760 = getelementptr inbounds float* %tmp14759, i64 1
+  %tmp14761 = getelementptr inbounds float* %tmp14760, i64 1
+  %tmp14762 = getelementptr inbounds float* %tmp14761, i64 1
+  %tmp14763 = getelementptr inbounds float* %tmp14762, i64 1
+  %tmp14764 = getelementptr inbounds float* %tmp14763, i64 1
+  %tmp14765 = getelementptr inbounds float* %tmp14764, i64 1
+  %tmp14766 = getelementptr inbounds float* %tmp14765, i64 1
+  %tmp14767 = getelementptr inbounds float* %tmp14766, i64 1
+  %tmp14768 = getelementptr inbounds float* %tmp14767, i64 1
+  %tmp14769 = getelementptr inbounds float* %tmp14768, i64 1
+  %tmp14770 = getelementptr inbounds float* %tmp14769, i64 1
+  %tmp14771 = getelementptr inbounds float* %tmp14770, i64 1
+  %tmp14772 = getelementptr inbounds float* %tmp14771, i64 1
+  %tmp14773 = getelementptr inbounds float* %tmp14772, i64 1
+  %tmp14774 = getelementptr inbounds float* %tmp14773, i64 1
+  %tmp14775 = getelementptr inbounds float* %tmp14774, i64 1
+  %tmp14776 = getelementptr inbounds float* %tmp14775, i64 1
+  %tmp14777 = getelementptr inbounds float* %tmp14776, i64 1
+  %tmp14778 = getelementptr inbounds float* %tmp14777, i64 1
+  %tmp14779 = getelementptr inbounds float* %tmp14778, i64 1
+  %tmp14780 = getelementptr inbounds float* %tmp14779, i64 1
+  %tmp14781 = getelementptr inbounds float* %tmp14780, i64 1
+  %tmp14782 = getelementptr inbounds float* %tmp14781, i64 1
+  %tmp14783 = getelementptr inbounds float* %tmp14782, i64 1
+  %tmp14784 = getelementptr inbounds float* %tmp14783, i64 1
+  %tmp14785 = getelementptr inbounds float* %tmp14784, i64 1
+  %tmp14786 = getelementptr inbounds float* %tmp14785, i64 1
+  %tmp14787 = getelementptr inbounds float* %tmp14786, i64 1
+  %tmp14788 = getelementptr inbounds float* %tmp14787, i64 1
+  %tmp14789 = getelementptr inbounds float* %tmp14788, i64 1
+  %tmp14790 = getelementptr inbounds float* %tmp14789, i64 1
+  %tmp14791 = getelementptr inbounds float* %tmp14790, i64 1
+  %tmp14792 = getelementptr inbounds float* %tmp14791, i64 1
+  %tmp14793 = getelementptr inbounds float* %tmp14792, i64 1
+  %tmp14794 = getelementptr inbounds float* %tmp14793, i64 1
+  %tmp14795 = getelementptr inbounds float* %tmp14794, i64 1
+  %tmp14796 = getelementptr inbounds float* %tmp14795, i64 1
+  %tmp14797 = getelementptr inbounds float* %tmp14796, i64 1
+  %tmp14798 = getelementptr inbounds float* %tmp14797, i64 1
+  %tmp14799 = getelementptr inbounds float* %tmp14798, i64 1
+  %tmp14800 = getelementptr inbounds float* %tmp14799, i64 1
+  %tmp14801 = getelementptr inbounds float* %tmp14800, i64 1
+  %tmp14802 = getelementptr inbounds float* %tmp14801, i64 1
+  %tmp14803 = getelementptr inbounds float* %tmp14802, i64 1
+  %tmp14804 = getelementptr inbounds float* %tmp14803, i64 1
+  %tmp14805 = getelementptr inbounds float* %tmp14804, i64 1
+  %tmp14806 = getelementptr inbounds float* %tmp14805, i64 1
+  %tmp14807 = getelementptr inbounds float* %tmp14806, i64 1
+  %tmp14808 = getelementptr inbounds float* %tmp14807, i64 1
+  %tmp14809 = getelementptr inbounds float* %tmp14808, i64 1
+  %tmp14810 = getelementptr inbounds float* %tmp14809, i64 1
+  %tmp14811 = getelementptr inbounds float* %tmp14810, i64 1
+  %tmp14812 = getelementptr inbounds float* %tmp14811, i64 1
+  %tmp14813 = getelementptr inbounds float* %tmp14812, i64 1
+  %tmp14814 = getelementptr inbounds float* %tmp14813, i64 1
+  %tmp14815 = getelementptr inbounds float* %tmp14814, i64 1
+  %tmp14816 = getelementptr inbounds float* %tmp14815, i64 1
+  %tmp14817 = getelementptr inbounds float* %tmp14816, i64 1
+  %tmp14818 = getelementptr inbounds float* %tmp14817, i64 1
+  %tmp14819 = getelementptr inbounds float* %tmp14818, i64 1
+  %tmp14820 = getelementptr inbounds float* %tmp14819, i64 1
+  %tmp14821 = getelementptr inbounds float* %tmp14820, i64 1
+  %tmp14822 = getelementptr inbounds float* %tmp14821, i64 1
+  %tmp14823 = getelementptr inbounds float* %tmp14822, i64 1
+  %tmp14824 = getelementptr inbounds float* %tmp14823, i64 1
+  %tmp14825 = getelementptr inbounds float* %tmp14824, i64 1
+  %tmp14826 = getelementptr inbounds float* %tmp14825, i64 1
+  %tmp14827 = getelementptr inbounds float* %tmp14826, i64 1
+  %tmp14828 = getelementptr inbounds float* %tmp14827, i64 1
+  %tmp14829 = getelementptr inbounds float* %tmp14828, i64 1
+  %tmp14830 = getelementptr inbounds float* %tmp14829, i64 1
+  %tmp14831 = getelementptr inbounds float* %tmp14830, i64 1
+  %tmp14832 = getelementptr inbounds float* %tmp14831, i64 1
+  %tmp14833 = getelementptr inbounds float* %tmp14832, i64 1
+  %tmp14834 = getelementptr inbounds float* %tmp14833, i64 1
+  %tmp14835 = getelementptr inbounds float* %tmp14834, i64 1
+  %tmp14836 = getelementptr inbounds float* %tmp14835, i64 1
+  %tmp14837 = getelementptr inbounds float* %tmp14836, i64 1
+  %tmp14838 = getelementptr inbounds float* %tmp14837, i64 1
+  %tmp14839 = getelementptr inbounds float* %tmp14838, i64 1
+  %tmp14840 = getelementptr inbounds float* %tmp14839, i64 1
+  %tmp14841 = getelementptr inbounds float* %tmp14840, i64 1
+  %tmp14842 = getelementptr inbounds float* %tmp14841, i64 1
+  %tmp14843 = getelementptr inbounds float* %tmp14842, i64 1
+  %tmp14844 = getelementptr inbounds float* %tmp14843, i64 1
+  %tmp14845 = getelementptr inbounds float* %tmp14844, i64 1
+  %tmp14846 = getelementptr inbounds float* %tmp14845, i64 1
+  %tmp14847 = getelementptr inbounds float* %tmp14846, i64 1
+  %tmp14848 = getelementptr inbounds float* %tmp14847, i64 1
+  %tmp14849 = getelementptr inbounds float* %tmp14848, i64 1
+  %tmp14850 = getelementptr inbounds float* %tmp14849, i64 1
+  %tmp14851 = getelementptr inbounds float* %tmp14850, i64 1
+  %tmp14852 = getelementptr inbounds float* %tmp14851, i64 1
+  %tmp14853 = getelementptr inbounds float* %tmp14852, i64 1
+  %tmp14854 = getelementptr inbounds float* %tmp14853, i64 1
+  %tmp14855 = getelementptr inbounds float* %tmp14854, i64 1
+  %tmp14856 = getelementptr inbounds float* %tmp14855, i64 1
+  %tmp14857 = getelementptr inbounds float* %tmp14856, i64 1
+  %tmp14858 = getelementptr inbounds float* %tmp14857, i64 1
+  %tmp14859 = getelementptr inbounds float* %tmp14858, i64 1
+  %tmp14860 = getelementptr inbounds float* %tmp14859, i64 1
+  %tmp14861 = getelementptr inbounds float* %tmp14860, i64 1
+  %tmp14862 = getelementptr inbounds float* %tmp14861, i64 1
+  %tmp14863 = getelementptr inbounds float* %tmp14862, i64 1
+  %tmp14864 = getelementptr inbounds float* %tmp14863, i64 1
+  %tmp14865 = getelementptr inbounds float* %tmp14864, i64 1
+  %tmp14866 = getelementptr inbounds float* %tmp14865, i64 1
+  %tmp14867 = getelementptr inbounds float* %tmp14866, i64 1
+  %tmp14868 = getelementptr inbounds float* %tmp14867, i64 1
+  %tmp14869 = getelementptr inbounds float* %tmp14868, i64 1
+  %tmp14870 = getelementptr inbounds float* %tmp14869, i64 1
+  %tmp14871 = getelementptr inbounds float* %tmp14870, i64 1
+  %tmp14872 = getelementptr inbounds float* %tmp14871, i64 1
+  %tmp14873 = getelementptr inbounds float* %tmp14872, i64 1
+  %tmp14874 = getelementptr inbounds float* %tmp14873, i64 1
+  %tmp14875 = getelementptr inbounds float* %tmp14874, i64 1
+  %tmp14876 = getelementptr inbounds float* %tmp14875, i64 1
+  %tmp14877 = getelementptr inbounds float* %tmp14876, i64 1
+  %tmp14878 = getelementptr inbounds float* %tmp14877, i64 1
+  %tmp14879 = getelementptr inbounds float* %tmp14878, i64 1
+  %tmp14880 = getelementptr inbounds float* %tmp14879, i64 1
+  %tmp14881 = getelementptr inbounds float* %tmp14880, i64 1
+  %tmp14882 = getelementptr inbounds float* %tmp14881, i64 1
+  %tmp14883 = getelementptr inbounds float* %tmp14882, i64 1
+  %tmp14884 = getelementptr inbounds float* %tmp14883, i64 1
+  %tmp14885 = getelementptr inbounds float* %tmp14884, i64 1
+  %tmp14886 = getelementptr inbounds float* %tmp14885, i64 1
+  %tmp14887 = getelementptr inbounds float* %tmp14886, i64 1
+  %tmp14888 = getelementptr inbounds float* %tmp14887, i64 1
+  %tmp14889 = getelementptr inbounds float* %tmp14888, i64 1
+  %tmp14890 = getelementptr inbounds float* %tmp14889, i64 1
+  %tmp14891 = getelementptr inbounds float* %tmp14890, i64 1
+  %tmp14892 = getelementptr inbounds float* %tmp14891, i64 1
+  %tmp14893 = getelementptr inbounds float* %tmp14892, i64 1
+  %tmp14894 = getelementptr inbounds float* %tmp14893, i64 1
+  %tmp14895 = getelementptr inbounds float* %tmp14894, i64 1
+  %tmp14896 = getelementptr inbounds float* %tmp14895, i64 1
+  %tmp14897 = getelementptr inbounds float* %tmp14896, i64 1
+  %tmp14898 = getelementptr inbounds float* %tmp14897, i64 1
+  %tmp14899 = getelementptr inbounds float* %tmp14898, i64 1
+  %tmp14900 = getelementptr inbounds float* %tmp14899, i64 1
+  %tmp14901 = getelementptr inbounds float* %tmp14900, i64 1
+  %tmp14902 = getelementptr inbounds float* %tmp14901, i64 1
+  %tmp14903 = getelementptr inbounds float* %tmp14902, i64 1
+  %tmp14904 = getelementptr inbounds float* %tmp14903, i64 1
+  %tmp14905 = getelementptr inbounds float* %tmp14904, i64 1
+  %tmp14906 = getelementptr inbounds float* %tmp14905, i64 1
+  %tmp14907 = getelementptr inbounds float* %tmp14906, i64 1
+  %tmp14908 = getelementptr inbounds float* %tmp14907, i64 1
+  %tmp14909 = getelementptr inbounds float* %tmp14908, i64 1
+  %tmp14910 = getelementptr inbounds float* %tmp14909, i64 1
+  %tmp14911 = getelementptr inbounds float* %tmp14910, i64 1
+  %tmp14912 = getelementptr inbounds float* %tmp14911, i64 1
+  %tmp14913 = getelementptr inbounds float* %tmp14912, i64 1
+  %tmp14914 = getelementptr inbounds float* %tmp14913, i64 1
+  %tmp14915 = getelementptr inbounds float* %tmp14914, i64 1
+  %tmp14916 = getelementptr inbounds float* %tmp14915, i64 1
+  %tmp14917 = getelementptr inbounds float* %tmp14916, i64 1
+  %tmp14918 = getelementptr inbounds float* %tmp14917, i64 1
+  %tmp14919 = getelementptr inbounds float* %tmp14918, i64 1
+  %tmp14920 = getelementptr inbounds float* %tmp14919, i64 1
+  %tmp14921 = getelementptr inbounds float* %tmp14920, i64 1
+  %tmp14922 = getelementptr inbounds float* %tmp14921, i64 1
+  %tmp14923 = getelementptr inbounds float* %tmp14922, i64 1
+  %tmp14924 = getelementptr inbounds float* %tmp14923, i64 1
+  %tmp14925 = getelementptr inbounds float* %tmp14924, i64 1
+  %tmp14926 = getelementptr inbounds float* %tmp14925, i64 1
+  %tmp14927 = getelementptr inbounds float* %tmp14926, i64 1
+  %tmp14928 = getelementptr inbounds float* %tmp14927, i64 1
+  %tmp14929 = getelementptr inbounds float* %tmp14928, i64 1
+  %tmp14930 = getelementptr inbounds float* %tmp14929, i64 1
+  %tmp14931 = getelementptr inbounds float* %tmp14930, i64 1
+  %tmp14932 = getelementptr inbounds float* %tmp14931, i64 1
+  %tmp14933 = getelementptr inbounds float* %tmp14932, i64 1
+  %tmp14934 = getelementptr inbounds float* %tmp14933, i64 1
+  %tmp14935 = getelementptr inbounds float* %tmp14934, i64 1
+  %tmp14936 = getelementptr inbounds float* %tmp14935, i64 1
+  %tmp14937 = getelementptr inbounds float* %tmp14936, i64 1
+  %tmp14938 = getelementptr inbounds float* %tmp14937, i64 1
+  %tmp14939 = getelementptr inbounds float* %tmp14938, i64 1
+  %tmp14940 = getelementptr inbounds float* %tmp14939, i64 1
+  %tmp14941 = getelementptr inbounds float* %tmp14940, i64 1
+  %tmp14942 = getelementptr inbounds float* %tmp14941, i64 1
+  %tmp14943 = getelementptr inbounds float* %tmp14942, i64 1
+  %tmp14944 = getelementptr inbounds float* %tmp14943, i64 1
+  %tmp14945 = getelementptr inbounds float* %tmp14944, i64 1
+  %tmp14946 = getelementptr inbounds float* %tmp14945, i64 1
+  %tmp14947 = getelementptr inbounds float* %tmp14946, i64 1
+  %tmp14948 = getelementptr inbounds float* %tmp14947, i64 1
+  %tmp14949 = getelementptr inbounds float* %tmp14948, i64 1
+  %tmp14950 = getelementptr inbounds float* %tmp14949, i64 1
+  %tmp14951 = getelementptr inbounds float* %tmp14950, i64 1
+  %tmp14952 = getelementptr inbounds float* %tmp14951, i64 1
+  %tmp14953 = getelementptr inbounds float* %tmp14952, i64 1
+  %tmp14954 = getelementptr inbounds float* %tmp14953, i64 1
+  %tmp14955 = getelementptr inbounds float* %tmp14954, i64 1
+  %tmp14956 = getelementptr inbounds float* %tmp14955, i64 1
+  %tmp14957 = getelementptr inbounds float* %tmp14956, i64 1
+  %tmp14958 = getelementptr inbounds float* %tmp14957, i64 1
+  %tmp14959 = getelementptr inbounds float* %tmp14958, i64 1
+  %tmp14960 = getelementptr inbounds float* %tmp14959, i64 1
+  %tmp14961 = getelementptr inbounds float* %tmp14960, i64 1
+  %tmp14962 = getelementptr inbounds float* %tmp14961, i64 1
+  %tmp14963 = getelementptr inbounds float* %tmp14962, i64 1
+  %tmp14964 = getelementptr inbounds float* %tmp14963, i64 1
+  %tmp14965 = getelementptr inbounds float* %tmp14964, i64 1
+  %tmp14966 = getelementptr inbounds float* %tmp14965, i64 1
+  %tmp14967 = getelementptr inbounds float* %tmp14966, i64 1
+  %tmp14968 = getelementptr inbounds float* %tmp14967, i64 1
+  %tmp14969 = getelementptr inbounds float* %tmp14968, i64 1
+  %tmp14970 = getelementptr inbounds float* %tmp14969, i64 1
+  %tmp14971 = getelementptr inbounds float* %tmp14970, i64 1
+  %tmp14972 = getelementptr inbounds float* %tmp14971, i64 1
+  %tmp14973 = getelementptr inbounds float* %tmp14972, i64 1
+  %tmp14974 = getelementptr inbounds float* %tmp14973, i64 1
+  %tmp14975 = getelementptr inbounds float* %tmp14974, i64 1
+  %tmp14976 = getelementptr inbounds float* %tmp14975, i64 1
+  %tmp14977 = getelementptr inbounds float* %tmp14976, i64 1
+  %tmp14978 = getelementptr inbounds float* %tmp14977, i64 1
+  %tmp14979 = getelementptr inbounds float* %tmp14978, i64 1
+  %tmp14980 = getelementptr inbounds float* %tmp14979, i64 1
+  %tmp14981 = getelementptr inbounds float* %tmp14980, i64 1
+  %tmp14982 = getelementptr inbounds float* %tmp14981, i64 1
+  %tmp14983 = getelementptr inbounds float* %tmp14982, i64 1
+  %tmp14984 = getelementptr inbounds float* %tmp14983, i64 1
+  %tmp14985 = getelementptr inbounds float* %tmp14984, i64 1
+  %tmp14986 = getelementptr inbounds float* %tmp14985, i64 1
+  %tmp14987 = getelementptr inbounds float* %tmp14986, i64 1
+  %tmp14988 = getelementptr inbounds float* %tmp14987, i64 1
+  %tmp14989 = getelementptr inbounds float* %tmp14988, i64 1
+  %tmp14990 = getelementptr inbounds float* %tmp14989, i64 1
+  %tmp14991 = getelementptr inbounds float* %tmp14990, i64 1
+  %tmp14992 = getelementptr inbounds float* %tmp14991, i64 1
+  %tmp14993 = getelementptr inbounds float* %tmp14992, i64 1
+  %tmp14994 = getelementptr inbounds float* %tmp14993, i64 1
+  %tmp14995 = getelementptr inbounds float* %tmp14994, i64 1
+  %tmp14996 = getelementptr inbounds float* %tmp14995, i64 1
+  %tmp14997 = getelementptr inbounds float* %tmp14996, i64 1
+  %tmp14998 = getelementptr inbounds float* %tmp14997, i64 1
+  %tmp14999 = getelementptr inbounds float* %tmp14998, i64 1
+  %tmp15000 = getelementptr inbounds float* %tmp14999, i64 1
+  %tmp15001 = getelementptr inbounds float* %tmp15000, i64 1
+  %tmp15002 = getelementptr inbounds float* %tmp15001, i64 1
+  %tmp15003 = getelementptr inbounds float* %tmp15002, i64 1
+  %tmp15004 = getelementptr inbounds float* %tmp15003, i64 1
+  %tmp15005 = getelementptr inbounds float* %tmp15004, i64 1
+  %tmp15006 = getelementptr inbounds float* %tmp15005, i64 1
+  %tmp15007 = getelementptr inbounds float* %tmp15006, i64 1
+  %tmp15008 = getelementptr inbounds float* %tmp15007, i64 1
+  %tmp15009 = getelementptr inbounds float* %tmp15008, i64 1
+  %tmp15010 = getelementptr inbounds float* %tmp15009, i64 1
+  %tmp15011 = getelementptr inbounds float* %tmp15010, i64 1
+  %tmp15012 = getelementptr inbounds float* %tmp15011, i64 1
+  %tmp15013 = getelementptr inbounds float* %tmp15012, i64 1
+  %tmp15014 = getelementptr inbounds float* %tmp15013, i64 1
+  %tmp15015 = getelementptr inbounds float* %tmp15014, i64 1
+  %tmp15016 = getelementptr inbounds float* %tmp15015, i64 1
+  %tmp15017 = getelementptr inbounds float* %tmp15016, i64 1
+  %tmp15018 = getelementptr inbounds float* %tmp15017, i64 1
+  %tmp15019 = getelementptr inbounds float* %tmp15018, i64 1
+  %tmp15020 = getelementptr inbounds float* %tmp15019, i64 1
+  %tmp15021 = getelementptr inbounds float* %tmp15020, i64 1
+  %tmp15022 = getelementptr inbounds float* %tmp15021, i64 1
+  %tmp15023 = getelementptr inbounds float* %tmp15022, i64 1
+  %tmp15024 = getelementptr inbounds float* %tmp15023, i64 1
+  %tmp15025 = getelementptr inbounds float* %tmp15024, i64 1
+  %tmp15026 = getelementptr inbounds float* %tmp15025, i64 1
+  %tmp15027 = getelementptr inbounds float* %tmp15026, i64 1
+  %tmp15028 = getelementptr inbounds float* %tmp15027, i64 1
+  %tmp15029 = getelementptr inbounds float* %tmp15028, i64 1
+  %tmp15030 = getelementptr inbounds float* %tmp15029, i64 1
+  %tmp15031 = getelementptr inbounds float* %tmp15030, i64 1
+  %tmp15032 = getelementptr inbounds float* %tmp15031, i64 1
+  %tmp15033 = getelementptr inbounds float* %tmp15032, i64 1
+  %tmp15034 = getelementptr inbounds float* %tmp15033, i64 1
+  %tmp15035 = getelementptr inbounds float* %tmp15034, i64 1
+  %tmp15036 = getelementptr inbounds float* %tmp15035, i64 1
+  %tmp15037 = getelementptr inbounds float* %tmp15036, i64 1
+  %tmp15038 = getelementptr inbounds float* %tmp15037, i64 1
+  %tmp15039 = getelementptr inbounds float* %tmp15038, i64 1
+  %tmp15040 = getelementptr inbounds float* %tmp15039, i64 1
+  %tmp15041 = getelementptr inbounds float* %tmp15040, i64 1
+  %tmp15042 = getelementptr inbounds float* %tmp15041, i64 1
+  %tmp15043 = getelementptr inbounds float* %tmp15042, i64 1
+  %tmp15044 = getelementptr inbounds float* %tmp15043, i64 1
+  %tmp15045 = getelementptr inbounds float* %tmp15044, i64 1
+  %tmp15046 = getelementptr inbounds float* %tmp15045, i64 1
+  %tmp15047 = getelementptr inbounds float* %tmp15046, i64 1
+  %tmp15048 = getelementptr inbounds float* %tmp15047, i64 1
+  %tmp15049 = getelementptr inbounds float* %tmp15048, i64 1
+  %tmp15050 = getelementptr inbounds float* %tmp15049, i64 1
+  %tmp15051 = getelementptr inbounds float* %tmp15050, i64 1
+  %tmp15052 = getelementptr inbounds float* %tmp15051, i64 1
+  %tmp15053 = getelementptr inbounds float* %tmp15052, i64 1
+  %tmp15054 = getelementptr inbounds float* %tmp15053, i64 1
+  %tmp15055 = getelementptr inbounds float* %tmp15054, i64 1
+  %tmp15056 = getelementptr inbounds float* %tmp15055, i64 1
+  %tmp15057 = getelementptr inbounds float* %tmp15056, i64 1
+  %tmp15058 = getelementptr inbounds float* %tmp15057, i64 1
+  %tmp15059 = getelementptr inbounds float* %tmp15058, i64 1
+  %tmp15060 = getelementptr inbounds float* %tmp15059, i64 1
+  %tmp15061 = getelementptr inbounds float* %tmp15060, i64 1
+  %tmp15062 = getelementptr inbounds float* %tmp15061, i64 1
+  %tmp15063 = getelementptr inbounds float* %tmp15062, i64 1
+  %tmp15064 = getelementptr inbounds float* %tmp15063, i64 1
+  %tmp15065 = getelementptr inbounds float* %tmp15064, i64 1
+  %tmp15066 = getelementptr inbounds float* %tmp15065, i64 1
+  %tmp15067 = getelementptr inbounds float* %tmp15066, i64 1
+  %tmp15068 = getelementptr inbounds float* %tmp15067, i64 1
+  %tmp15069 = getelementptr inbounds float* %tmp15068, i64 1
+  %tmp15070 = getelementptr inbounds float* %tmp15069, i64 1
+  %tmp15071 = getelementptr inbounds float* %tmp15070, i64 1
+  %tmp15072 = getelementptr inbounds float* %tmp15071, i64 1
+  %tmp15073 = getelementptr inbounds float* %tmp15072, i64 1
+  %tmp15074 = getelementptr inbounds float* %tmp15073, i64 1
+  %tmp15075 = getelementptr inbounds float* %tmp15074, i64 1
+  %tmp15076 = getelementptr inbounds float* %tmp15075, i64 1
+  %tmp15077 = getelementptr inbounds float* %tmp15076, i64 1
+  %tmp15078 = getelementptr inbounds float* %tmp15077, i64 1
+  %tmp15079 = getelementptr inbounds float* %tmp15078, i64 1
+  %tmp15080 = getelementptr inbounds float* %tmp15079, i64 1
+  %tmp15081 = getelementptr inbounds float* %tmp15080, i64 1
+  %tmp15082 = getelementptr inbounds float* %tmp15081, i64 1
+  %tmp15083 = getelementptr inbounds float* %tmp15082, i64 1
+  %tmp15084 = getelementptr inbounds float* %tmp15083, i64 1
+  %tmp15085 = getelementptr inbounds float* %tmp15084, i64 1
+  %tmp15086 = getelementptr inbounds float* %tmp15085, i64 1
+  %tmp15087 = getelementptr inbounds float* %tmp15086, i64 1
+  %tmp15088 = getelementptr inbounds float* %tmp15087, i64 1
+  %tmp15089 = getelementptr inbounds float* %tmp15088, i64 1
+  %tmp15090 = getelementptr inbounds float* %tmp15089, i64 1
+  %tmp15091 = getelementptr inbounds float* %tmp15090, i64 1
+  %tmp15092 = getelementptr inbounds float* %tmp15091, i64 1
+  %tmp15093 = getelementptr inbounds float* %tmp15092, i64 1
+  %tmp15094 = getelementptr inbounds float* %tmp15093, i64 1
+  %tmp15095 = getelementptr inbounds float* %tmp15094, i64 1
+  %tmp15096 = getelementptr inbounds float* %tmp15095, i64 1
+  %tmp15097 = getelementptr inbounds float* %tmp15096, i64 1
+  %tmp15098 = getelementptr inbounds float* %tmp15097, i64 1
+  %tmp15099 = getelementptr inbounds float* %tmp15098, i64 1
+  %tmp15100 = getelementptr inbounds float* %tmp15099, i64 1
+  %tmp15101 = getelementptr inbounds float* %tmp15100, i64 1
+  %tmp15102 = getelementptr inbounds float* %tmp15101, i64 1
+  %tmp15103 = getelementptr inbounds float* %tmp15102, i64 1
+  %tmp15104 = getelementptr inbounds float* %tmp15103, i64 1
+  %tmp15105 = getelementptr inbounds float* %tmp15104, i64 1
+  %tmp15106 = getelementptr inbounds float* %tmp15105, i64 1
+  %tmp15107 = getelementptr inbounds float* %tmp15106, i64 1
+  %tmp15108 = getelementptr inbounds float* %tmp15107, i64 1
+  %tmp15109 = getelementptr inbounds float* %tmp15108, i64 1
+  %tmp15110 = getelementptr inbounds float* %tmp15109, i64 1
+  %tmp15111 = getelementptr inbounds float* %tmp15110, i64 1
+  %tmp15112 = getelementptr inbounds float* %tmp15111, i64 1
+  %tmp15113 = getelementptr inbounds float* %tmp15112, i64 1
+  %tmp15114 = getelementptr inbounds float* %tmp15113, i64 1
+  %tmp15115 = getelementptr inbounds float* %tmp15114, i64 1
+  %tmp15116 = getelementptr inbounds float* %tmp15115, i64 1
+  %tmp15117 = getelementptr inbounds float* %tmp15116, i64 1
+  %tmp15118 = getelementptr inbounds float* %tmp15117, i64 1
+  %tmp15119 = getelementptr inbounds float* %tmp15118, i64 1
+  %tmp15120 = getelementptr inbounds float* %tmp15119, i64 1
+  %tmp15121 = getelementptr inbounds float* %tmp15120, i64 1
+  %tmp15122 = getelementptr inbounds float* %tmp15121, i64 1
+  %tmp15123 = getelementptr inbounds float* %tmp15122, i64 1
+  %tmp15124 = getelementptr inbounds float* %tmp15123, i64 1
+  %tmp15125 = getelementptr inbounds float* %tmp15124, i64 1
+  %tmp15126 = getelementptr inbounds float* %tmp15125, i64 1
+  %tmp15127 = getelementptr inbounds float* %tmp15126, i64 1
+  %tmp15128 = getelementptr inbounds float* %tmp15127, i64 1
+  %tmp15129 = getelementptr inbounds float* %tmp15128, i64 1
+  %tmp15130 = getelementptr inbounds float* %tmp15129, i64 1
+  %tmp15131 = getelementptr inbounds float* %tmp15130, i64 1
+  %tmp15132 = getelementptr inbounds float* %tmp15131, i64 1
+  %tmp15133 = getelementptr inbounds float* %tmp15132, i64 1
+  %tmp15134 = getelementptr inbounds float* %tmp15133, i64 1
+  %tmp15135 = getelementptr inbounds float* %tmp15134, i64 1
+  %tmp15136 = getelementptr inbounds float* %tmp15135, i64 1
+  %tmp15137 = getelementptr inbounds float* %tmp15136, i64 1
+  %tmp15138 = getelementptr inbounds float* %tmp15137, i64 1
+  %tmp15139 = getelementptr inbounds float* %tmp15138, i64 1
+  %tmp15140 = getelementptr inbounds float* %tmp15139, i64 1
+  %tmp15141 = getelementptr inbounds float* %tmp15140, i64 1
+  %tmp15142 = getelementptr inbounds float* %tmp15141, i64 1
+  %tmp15143 = getelementptr inbounds float* %tmp15142, i64 1
+  %tmp15144 = getelementptr inbounds float* %tmp15143, i64 1
+  %tmp15145 = getelementptr inbounds float* %tmp15144, i64 1
+  %tmp15146 = getelementptr inbounds float* %tmp15145, i64 1
+  %tmp15147 = getelementptr inbounds float* %tmp15146, i64 1
+  %tmp15148 = getelementptr inbounds float* %tmp15147, i64 1
+  %tmp15149 = getelementptr inbounds float* %tmp15148, i64 1
+  %tmp15150 = getelementptr inbounds float* %tmp15149, i64 1
+  %tmp15151 = getelementptr inbounds float* %tmp15150, i64 1
+  %tmp15152 = getelementptr inbounds float* %tmp15151, i64 1
+  %tmp15153 = getelementptr inbounds float* %tmp15152, i64 1
+  %tmp15154 = getelementptr inbounds float* %tmp15153, i64 1
+  %tmp15155 = getelementptr inbounds float* %tmp15154, i64 1
+  %tmp15156 = getelementptr inbounds float* %tmp15155, i64 1
+  %tmp15157 = getelementptr inbounds float* %tmp15156, i64 1
+  %tmp15158 = getelementptr inbounds float* %tmp15157, i64 1
+  %tmp15159 = getelementptr inbounds float* %tmp15158, i64 1
+  %tmp15160 = getelementptr inbounds float* %tmp15159, i64 1
+  %tmp15161 = getelementptr inbounds float* %tmp15160, i64 1
+  %tmp15162 = getelementptr inbounds float* %tmp15161, i64 1
+  %tmp15163 = getelementptr inbounds float* %tmp15162, i64 1
+  %tmp15164 = getelementptr inbounds float* %tmp15163, i64 1
+  %tmp15165 = getelementptr inbounds float* %tmp15164, i64 1
+  %tmp15166 = getelementptr inbounds float* %tmp15165, i64 1
+  %tmp15167 = getelementptr inbounds float* %tmp15166, i64 1
+  %tmp15168 = getelementptr inbounds float* %tmp15167, i64 1
+  %tmp15169 = getelementptr inbounds float* %tmp15168, i64 1
+  %tmp15170 = getelementptr inbounds float* %tmp15169, i64 1
+  %tmp15171 = getelementptr inbounds float* %tmp15170, i64 1
+  %tmp15172 = getelementptr inbounds float* %tmp15171, i64 1
+  %tmp15173 = getelementptr inbounds float* %tmp15172, i64 1
+  %tmp15174 = getelementptr inbounds float* %tmp15173, i64 1
+  %tmp15175 = getelementptr inbounds float* %tmp15174, i64 1
+  %tmp15176 = getelementptr inbounds float* %tmp15175, i64 1
+  %tmp15177 = getelementptr inbounds float* %tmp15176, i64 1
+  %tmp15178 = getelementptr inbounds float* %tmp15177, i64 1
+  %tmp15179 = getelementptr inbounds float* %tmp15178, i64 1
+  %tmp15180 = getelementptr inbounds float* %tmp15179, i64 1
+  %tmp15181 = getelementptr inbounds float* %tmp15180, i64 1
+  %tmp15182 = getelementptr inbounds float* %tmp15181, i64 1
+  %tmp15183 = getelementptr inbounds float* %tmp15182, i64 1
+  %tmp15184 = getelementptr inbounds float* %tmp15183, i64 1
+  %tmp15185 = getelementptr inbounds float* %tmp15184, i64 1
+  %tmp15186 = getelementptr inbounds float* %tmp15185, i64 1
+  %tmp15187 = getelementptr inbounds float* %tmp15186, i64 1
+  %tmp15188 = getelementptr inbounds float* %tmp15187, i64 1
+  %tmp15189 = getelementptr inbounds float* %tmp15188, i64 1
+  %tmp15190 = getelementptr inbounds float* %tmp15189, i64 1
+  %tmp15191 = getelementptr inbounds float* %tmp15190, i64 1
+  %tmp15192 = getelementptr inbounds float* %tmp15191, i64 1
+  %tmp15193 = getelementptr inbounds float* %tmp15192, i64 1
+  %tmp15194 = getelementptr inbounds float* %tmp15193, i64 1
+  %tmp15195 = getelementptr inbounds float* %tmp15194, i64 1
+  %tmp15196 = getelementptr inbounds float* %tmp15195, i64 1
+  %tmp15197 = getelementptr inbounds float* %tmp15196, i64 1
+  %tmp15198 = getelementptr inbounds float* %tmp15197, i64 1
+  %tmp15199 = getelementptr inbounds float* %tmp15198, i64 1
+  %tmp15200 = getelementptr inbounds float* %tmp15199, i64 1
+  %tmp15201 = getelementptr inbounds float* %tmp15200, i64 1
+  %tmp15202 = getelementptr inbounds float* %tmp15201, i64 1
+  %tmp15203 = getelementptr inbounds float* %tmp15202, i64 1
+  %tmp15204 = getelementptr inbounds float* %tmp15203, i64 1
+  %tmp15205 = getelementptr inbounds float* %tmp15204, i64 1
+  %tmp15206 = getelementptr inbounds float* %tmp15205, i64 1
+  %tmp15207 = getelementptr inbounds float* %tmp15206, i64 1
+  %tmp15208 = getelementptr inbounds float* %tmp15207, i64 1
+  %tmp15209 = getelementptr inbounds float* %tmp15208, i64 1
+  %tmp15210 = getelementptr inbounds float* %tmp15209, i64 1
+  %tmp15211 = getelementptr inbounds float* %tmp15210, i64 1
+  %tmp15212 = getelementptr inbounds float* %tmp15211, i64 1
+  %tmp15213 = getelementptr inbounds float* %tmp15212, i64 1
+  %tmp15214 = getelementptr inbounds float* %tmp15213, i64 1
+  %tmp15215 = getelementptr inbounds float* %tmp15214, i64 1
+  %tmp15216 = getelementptr inbounds float* %tmp15215, i64 1
+  %tmp15217 = getelementptr inbounds float* %tmp15216, i64 1
+  %tmp15218 = getelementptr inbounds float* %tmp15217, i64 1
+  %tmp15219 = getelementptr inbounds float* %tmp15218, i64 1
+  %tmp15220 = getelementptr inbounds float* %tmp15219, i64 1
+  %tmp15221 = getelementptr inbounds float* %tmp15220, i64 1
+  %tmp15222 = getelementptr inbounds float* %tmp15221, i64 1
+  %tmp15223 = getelementptr inbounds float* %tmp15222, i64 1
+  %tmp15224 = getelementptr inbounds float* %tmp15223, i64 1
+  %tmp15225 = getelementptr inbounds float* %tmp15224, i64 1
+  %tmp15226 = getelementptr inbounds float* %tmp15225, i64 1
+  %tmp15227 = getelementptr inbounds float* %tmp15226, i64 1
+  %tmp15228 = getelementptr inbounds float* %tmp15227, i64 1
+  %tmp15229 = getelementptr inbounds float* %tmp15228, i64 1
+  %tmp15230 = getelementptr inbounds float* %tmp15229, i64 1
+  %tmp15231 = getelementptr inbounds float* %tmp15230, i64 1
+  %tmp15232 = getelementptr inbounds float* %tmp15231, i64 1
+  %tmp15233 = getelementptr inbounds float* %tmp15232, i64 1
+  %tmp15234 = getelementptr inbounds float* %tmp15233, i64 1
+  %tmp15235 = getelementptr inbounds float* %tmp15234, i64 1
+  %tmp15236 = getelementptr inbounds float* %tmp15235, i64 1
+  %tmp15237 = getelementptr inbounds float* %tmp15236, i64 1
+  %tmp15238 = getelementptr inbounds float* %tmp15237, i64 1
+  %tmp15239 = getelementptr inbounds float* %tmp15238, i64 1
+  %tmp15240 = getelementptr inbounds float* %tmp15239, i64 1
+  %tmp15241 = getelementptr inbounds float* %tmp15240, i64 1
+  %tmp15242 = getelementptr inbounds float* %tmp15241, i64 1
+  %tmp15243 = getelementptr inbounds float* %tmp15242, i64 1
+  %tmp15244 = getelementptr inbounds float* %tmp15243, i64 1
+  %tmp15245 = getelementptr inbounds float* %tmp15244, i64 1
+  %tmp15246 = getelementptr inbounds float* %tmp15245, i64 1
+  %tmp15247 = getelementptr inbounds float* %tmp15246, i64 1
+  %tmp15248 = getelementptr inbounds float* %tmp15247, i64 1
+  %tmp15249 = getelementptr inbounds float* %tmp15248, i64 1
+  %tmp15250 = getelementptr inbounds float* %tmp15249, i64 1
+  %tmp15251 = getelementptr inbounds float* %tmp15250, i64 1
+  %tmp15252 = getelementptr inbounds float* %tmp15251, i64 1
+  %tmp15253 = getelementptr inbounds float* %tmp15252, i64 1
+  %tmp15254 = getelementptr inbounds float* %tmp15253, i64 1
+  %tmp15255 = getelementptr inbounds float* %tmp15254, i64 1
+  %tmp15256 = getelementptr inbounds float* %tmp15255, i64 1
+  %tmp15257 = getelementptr inbounds float* %tmp15256, i64 1
+  %tmp15258 = getelementptr inbounds float* %tmp15257, i64 1
+  %tmp15259 = getelementptr inbounds float* %tmp15258, i64 1
+  %tmp15260 = getelementptr inbounds float* %tmp15259, i64 1
+  %tmp15261 = getelementptr inbounds float* %tmp15260, i64 1
+  %tmp15262 = getelementptr inbounds float* %tmp15261, i64 1
+  %tmp15263 = getelementptr inbounds float* %tmp15262, i64 1
+  %tmp15264 = getelementptr inbounds float* %tmp15263, i64 1
+  %tmp15265 = getelementptr inbounds float* %tmp15264, i64 1
+  %tmp15266 = getelementptr inbounds float* %tmp15265, i64 1
+  %tmp15267 = getelementptr inbounds float* %tmp15266, i64 1
+  %tmp15268 = getelementptr inbounds float* %tmp15267, i64 1
+  %tmp15269 = getelementptr inbounds float* %tmp15268, i64 1
+  %tmp15270 = getelementptr inbounds float* %tmp15269, i64 1
+  %tmp15271 = getelementptr inbounds float* %tmp15270, i64 1
+  %tmp15272 = getelementptr inbounds float* %tmp15271, i64 1
+  %tmp15273 = getelementptr inbounds float* %tmp15272, i64 1
+  %tmp15274 = getelementptr inbounds float* %tmp15273, i64 1
+  %tmp15275 = getelementptr inbounds float* %tmp15274, i64 1
+  %tmp15276 = getelementptr inbounds float* %tmp15275, i64 1
+  %tmp15277 = getelementptr inbounds float* %tmp15276, i64 1
+  %tmp15278 = getelementptr inbounds float* %tmp15277, i64 1
+  %tmp15279 = getelementptr inbounds float* %tmp15278, i64 1
+  %tmp15280 = getelementptr inbounds float* %tmp15279, i64 1
+  %tmp15281 = getelementptr inbounds float* %tmp15280, i64 1
+  %tmp15282 = getelementptr inbounds float* %tmp15281, i64 1
+  %tmp15283 = getelementptr inbounds float* %tmp15282, i64 1
+  %tmp15284 = getelementptr inbounds float* %tmp15283, i64 1
+  %tmp15285 = getelementptr inbounds float* %tmp15284, i64 1
+  %tmp15286 = getelementptr inbounds float* %tmp15285, i64 1
+  %tmp15287 = getelementptr inbounds float* %tmp15286, i64 1
+  %tmp15288 = getelementptr inbounds float* %tmp15287, i64 1
+  %tmp15289 = getelementptr inbounds float* %tmp15288, i64 1
+  %tmp15290 = getelementptr inbounds float* %tmp15289, i64 1
+  %tmp15291 = getelementptr inbounds float* %tmp15290, i64 1
+  %tmp15292 = getelementptr inbounds float* %tmp15291, i64 1
+  %tmp15293 = getelementptr inbounds float* %tmp15292, i64 1
+  %tmp15294 = getelementptr inbounds float* %tmp15293, i64 1
+  %tmp15295 = getelementptr inbounds float* %tmp15294, i64 1
+  %tmp15296 = getelementptr inbounds float* %tmp15295, i64 1
+  %tmp15297 = getelementptr inbounds float* %tmp15296, i64 1
+  %tmp15298 = getelementptr inbounds float* %tmp15297, i64 1
+  %tmp15299 = getelementptr inbounds float* %tmp15298, i64 1
+  %tmp15300 = getelementptr inbounds float* %tmp15299, i64 1
+  %tmp15301 = getelementptr inbounds float* %tmp15300, i64 1
+  %tmp15302 = getelementptr inbounds float* %tmp15301, i64 1
+  %tmp15303 = getelementptr inbounds float* %tmp15302, i64 1
+  %tmp15304 = getelementptr inbounds float* %tmp15303, i64 1
+  %tmp15305 = getelementptr inbounds float* %tmp15304, i64 1
+  %tmp15306 = getelementptr inbounds float* %tmp15305, i64 1
+  %tmp15307 = getelementptr inbounds float* %tmp15306, i64 1
+  %tmp15308 = getelementptr inbounds float* %tmp15307, i64 1
+  %tmp15309 = getelementptr inbounds float* %tmp15308, i64 1
+  %tmp15310 = getelementptr inbounds float* %tmp15309, i64 1
+  %tmp15311 = getelementptr inbounds float* %tmp15310, i64 1
+  %tmp15312 = getelementptr inbounds float* %tmp15311, i64 1
+  %tmp15313 = getelementptr inbounds float* %tmp15312, i64 1
+  %tmp15314 = getelementptr inbounds float* %tmp15313, i64 1
+  %tmp15315 = getelementptr inbounds float* %tmp15314, i64 1
+  %tmp15316 = getelementptr inbounds float* %tmp15315, i64 1
+  %tmp15317 = getelementptr inbounds float* %tmp15316, i64 1
+  %tmp15318 = getelementptr inbounds float* %tmp15317, i64 1
+  %tmp15319 = getelementptr inbounds float* %tmp15318, i64 1
+  %tmp15320 = getelementptr inbounds float* %tmp15319, i64 1
+  %tmp15321 = getelementptr inbounds float* %tmp15320, i64 1
+  %tmp15322 = getelementptr inbounds float* %tmp15321, i64 1
+  %tmp15323 = getelementptr inbounds float* %tmp15322, i64 1
+  %tmp15324 = getelementptr inbounds float* %tmp15323, i64 1
+  %tmp15325 = getelementptr inbounds float* %tmp15324, i64 1
+  %tmp15326 = getelementptr inbounds float* %tmp15325, i64 1
+  %tmp15327 = getelementptr inbounds float* %tmp15326, i64 1
+  %tmp15328 = getelementptr inbounds float* %tmp15327, i64 1
+  %tmp15329 = getelementptr inbounds float* %tmp15328, i64 1
+  %tmp15330 = getelementptr inbounds float* %tmp15329, i64 1
+  %tmp15331 = getelementptr inbounds float* %tmp15330, i64 1
+  %tmp15332 = getelementptr inbounds float* %tmp15331, i64 1
+  %tmp15333 = getelementptr inbounds float* %tmp15332, i64 1
+  %tmp15334 = getelementptr inbounds float* %tmp15333, i64 1
+  %tmp15335 = getelementptr inbounds float* %tmp15334, i64 1
+  %tmp15336 = getelementptr inbounds float* %tmp15335, i64 1
+  %tmp15337 = getelementptr inbounds float* %tmp15336, i64 1
+  %tmp15338 = getelementptr inbounds float* %tmp15337, i64 1
+  %tmp15339 = getelementptr inbounds float* %tmp15338, i64 1
+  %tmp15340 = getelementptr inbounds float* %tmp15339, i64 1
+  %tmp15341 = getelementptr inbounds float* %tmp15340, i64 1
+  %tmp15342 = getelementptr inbounds float* %tmp15341, i64 1
+  %tmp15343 = getelementptr inbounds float* %tmp15342, i64 1
+  %tmp15344 = getelementptr inbounds float* %tmp15343, i64 1
+  %tmp15345 = getelementptr inbounds float* %tmp15344, i64 1
+  %tmp15346 = getelementptr inbounds float* %tmp15345, i64 1
+  %tmp15347 = getelementptr inbounds float* %tmp15346, i64 1
+  %tmp15348 = getelementptr inbounds float* %tmp15347, i64 1
+  %tmp15349 = getelementptr inbounds float* %tmp15348, i64 1
+  %tmp15350 = getelementptr inbounds float* %tmp15349, i64 1
+  %tmp15351 = getelementptr inbounds float* %tmp15350, i64 1
+  %tmp15352 = getelementptr inbounds float* %tmp15351, i64 1
+  %tmp15353 = getelementptr inbounds float* %tmp15352, i64 1
+  %tmp15354 = getelementptr inbounds float* %tmp15353, i64 1
+  %tmp15355 = getelementptr inbounds float* %tmp15354, i64 1
+  %tmp15356 = getelementptr inbounds float* %tmp15355, i64 1
+  %tmp15357 = getelementptr inbounds float* %tmp15356, i64 1
+  %tmp15358 = getelementptr inbounds float* %tmp15357, i64 1
+  %tmp15359 = getelementptr inbounds float* %tmp15358, i64 1
+  %tmp15360 = getelementptr inbounds float* %tmp15359, i64 1
+  %tmp15361 = getelementptr inbounds float* %tmp15360, i64 1
+  %tmp15362 = getelementptr inbounds float* %tmp15361, i64 1
+  %tmp15363 = getelementptr inbounds float* %tmp15362, i64 1
+  %tmp15364 = getelementptr inbounds float* %tmp15363, i64 1
+  %tmp15365 = getelementptr inbounds float* %tmp15364, i64 1
+  %tmp15366 = getelementptr inbounds float* %tmp15365, i64 1
+  %tmp15367 = getelementptr inbounds float* %tmp15366, i64 1
+  %tmp15368 = getelementptr inbounds float* %tmp15367, i64 1
+  %tmp15369 = getelementptr inbounds float* %tmp15368, i64 1
+  %tmp15370 = getelementptr inbounds float* %tmp15369, i64 1
+  %tmp15371 = getelementptr inbounds float* %tmp15370, i64 1
+  %tmp15372 = getelementptr inbounds float* %tmp15371, i64 1
+  %tmp15373 = getelementptr inbounds float* %tmp15372, i64 1
+  %tmp15374 = getelementptr inbounds float* %tmp15373, i64 1
+  %tmp15375 = getelementptr inbounds float* %tmp15374, i64 1
+  %tmp15376 = getelementptr inbounds float* %tmp15375, i64 1
+  %tmp15377 = getelementptr inbounds float* %tmp15376, i64 1
+  %tmp15378 = getelementptr inbounds float* %tmp15377, i64 1
+  %tmp15379 = getelementptr inbounds float* %tmp15378, i64 1
+  %tmp15380 = getelementptr inbounds float* %tmp15379, i64 1
+  %tmp15381 = getelementptr inbounds float* %tmp15380, i64 1
+  %tmp15382 = getelementptr inbounds float* %tmp15381, i64 1
+  %tmp15383 = getelementptr inbounds float* %tmp15382, i64 1
+  %tmp15384 = getelementptr inbounds float* %tmp15383, i64 1
+  %tmp15385 = getelementptr inbounds float* %tmp15384, i64 1
+  %tmp15386 = getelementptr inbounds float* %tmp15385, i64 1
+  %tmp15387 = getelementptr inbounds float* %tmp15386, i64 1
+  %tmp15388 = getelementptr inbounds float* %tmp15387, i64 1
+  %tmp15389 = getelementptr inbounds float* %tmp15388, i64 1
+  %tmp15390 = getelementptr inbounds float* %tmp15389, i64 1
+  %tmp15391 = getelementptr inbounds float* %tmp15390, i64 1
+  %tmp15392 = getelementptr inbounds float* %tmp15391, i64 1
+  %tmp15393 = getelementptr inbounds float* %tmp15392, i64 1
+  %tmp15394 = getelementptr inbounds float* %tmp15393, i64 1
+  %tmp15395 = getelementptr inbounds float* %tmp15394, i64 1
+  %tmp15396 = getelementptr inbounds float* %tmp15395, i64 1
+  %tmp15397 = getelementptr inbounds float* %tmp15396, i64 1
+  %tmp15398 = getelementptr inbounds float* %tmp15397, i64 1
+  %tmp15399 = getelementptr inbounds float* %tmp15398, i64 1
+  %tmp15400 = getelementptr inbounds float* %tmp15399, i64 1
+  %tmp15401 = getelementptr inbounds float* %tmp15400, i64 1
+  %tmp15402 = getelementptr inbounds float* %tmp15401, i64 1
+  %tmp15403 = getelementptr inbounds float* %tmp15402, i64 1
+  %tmp15404 = getelementptr inbounds float* %tmp15403, i64 1
+  %tmp15405 = getelementptr inbounds float* %tmp15404, i64 1
+  %tmp15406 = getelementptr inbounds float* %tmp15405, i64 1
+  %tmp15407 = getelementptr inbounds float* %tmp15406, i64 1
+  %tmp15408 = getelementptr inbounds float* %tmp15407, i64 1
+  %tmp15409 = getelementptr inbounds float* %tmp15408, i64 1
+  %tmp15410 = getelementptr inbounds float* %tmp15409, i64 1
+  %tmp15411 = getelementptr inbounds float* %tmp15410, i64 1
+  %tmp15412 = getelementptr inbounds float* %tmp15411, i64 1
+  %tmp15413 = getelementptr inbounds float* %tmp15412, i64 1
+  %tmp15414 = getelementptr inbounds float* %tmp15413, i64 1
+  %tmp15415 = getelementptr inbounds float* %tmp15414, i64 1
+  %tmp15416 = getelementptr inbounds float* %tmp15415, i64 1
+  %tmp15417 = getelementptr inbounds float* %tmp15416, i64 1
+  %tmp15418 = getelementptr inbounds float* %tmp15417, i64 1
+  %tmp15419 = getelementptr inbounds float* %tmp15418, i64 1
+  %tmp15420 = getelementptr inbounds float* %tmp15419, i64 1
+  %tmp15421 = getelementptr inbounds float* %tmp15420, i64 1
+  %tmp15422 = getelementptr inbounds float* %tmp15421, i64 1
+  %tmp15423 = getelementptr inbounds float* %tmp15422, i64 1
+  %tmp15424 = getelementptr inbounds float* %tmp15423, i64 1
+  %tmp15425 = getelementptr inbounds float* %tmp15424, i64 1
+  %tmp15426 = getelementptr inbounds float* %tmp15425, i64 1
+  %tmp15427 = getelementptr inbounds float* %tmp15426, i64 1
+  %tmp15428 = getelementptr inbounds float* %tmp15427, i64 1
+  %tmp15429 = getelementptr inbounds float* %tmp15428, i64 1
+  %tmp15430 = getelementptr inbounds float* %tmp15429, i64 1
+  %tmp15431 = getelementptr inbounds float* %tmp15430, i64 1
+  %tmp15432 = getelementptr inbounds float* %tmp15431, i64 1
+  %tmp15433 = getelementptr inbounds float* %tmp15432, i64 1
+  %tmp15434 = getelementptr inbounds float* %tmp15433, i64 1
+  %tmp15435 = getelementptr inbounds float* %tmp15434, i64 1
+  %tmp15436 = getelementptr inbounds float* %tmp15435, i64 1
+  %tmp15437 = getelementptr inbounds float* %tmp15436, i64 1
+  %tmp15438 = getelementptr inbounds float* %tmp15437, i64 1
+  %tmp15439 = getelementptr inbounds float* %tmp15438, i64 1
+  %tmp15440 = getelementptr inbounds float* %tmp15439, i64 1
+  %tmp15441 = getelementptr inbounds float* %tmp15440, i64 1
+  %tmp15442 = getelementptr inbounds float* %tmp15441, i64 1
+  %tmp15443 = getelementptr inbounds float* %tmp15442, i64 1
+  %tmp15444 = getelementptr inbounds float* %tmp15443, i64 1
+  %tmp15445 = getelementptr inbounds float* %tmp15444, i64 1
+  %tmp15446 = getelementptr inbounds float* %tmp15445, i64 1
+  %tmp15447 = getelementptr inbounds float* %tmp15446, i64 1
+  %tmp15448 = getelementptr inbounds float* %tmp15447, i64 1
+  %tmp15449 = getelementptr inbounds float* %tmp15448, i64 1
+  %tmp15450 = getelementptr inbounds float* %tmp15449, i64 1
+  %tmp15451 = getelementptr inbounds float* %tmp15450, i64 1
+  %tmp15452 = getelementptr inbounds float* %tmp15451, i64 1
+  %tmp15453 = getelementptr inbounds float* %tmp15452, i64 1
+  %tmp15454 = getelementptr inbounds float* %tmp15453, i64 1
+  %tmp15455 = getelementptr inbounds float* %tmp15454, i64 1
+  %tmp15456 = getelementptr inbounds float* %tmp15455, i64 1
+  %tmp15457 = getelementptr inbounds float* %tmp15456, i64 1
+  %tmp15458 = getelementptr inbounds float* %tmp15457, i64 1
+  %tmp15459 = getelementptr inbounds float* %tmp15458, i64 1
+  %tmp15460 = getelementptr inbounds float* %tmp15459, i64 1
+  %tmp15461 = getelementptr inbounds float* %tmp15460, i64 1
+  %tmp15462 = getelementptr inbounds float* %tmp15461, i64 1
+  %tmp15463 = getelementptr inbounds float* %tmp15462, i64 1
+  %tmp15464 = getelementptr inbounds float* %tmp15463, i64 1
+  %tmp15465 = getelementptr inbounds float* %tmp15464, i64 1
+  %tmp15466 = getelementptr inbounds float* %tmp15465, i64 1
+  %tmp15467 = getelementptr inbounds float* %tmp15466, i64 1
+  %tmp15468 = getelementptr inbounds float* %tmp15467, i64 1
+  %tmp15469 = getelementptr inbounds float* %tmp15468, i64 1
+  %tmp15470 = getelementptr inbounds float* %tmp15469, i64 1
+  %tmp15471 = getelementptr inbounds float* %tmp15470, i64 1
+  %tmp15472 = getelementptr inbounds float* %tmp15471, i64 1
+  %tmp15473 = getelementptr inbounds float* %tmp15472, i64 1
+  %tmp15474 = getelementptr inbounds float* %tmp15473, i64 1
+  %tmp15475 = getelementptr inbounds float* %tmp15474, i64 1
+  %tmp15476 = getelementptr inbounds float* %tmp15475, i64 1
+  %tmp15477 = getelementptr inbounds float* %tmp15476, i64 1
+  %tmp15478 = getelementptr inbounds float* %tmp15477, i64 1
+  %tmp15479 = getelementptr inbounds float* %tmp15478, i64 1
+  %tmp15480 = getelementptr inbounds float* %tmp15479, i64 1
+  %tmp15481 = getelementptr inbounds float* %tmp15480, i64 1
+  %tmp15482 = getelementptr inbounds float* %tmp15481, i64 1
+  %tmp15483 = getelementptr inbounds float* %tmp15482, i64 1
+  %tmp15484 = getelementptr inbounds float* %tmp15483, i64 1
+  %tmp15485 = getelementptr inbounds float* %tmp15484, i64 1
+  %tmp15486 = getelementptr inbounds float* %tmp15485, i64 1
+  %tmp15487 = getelementptr inbounds float* %tmp15486, i64 1
+  %tmp15488 = getelementptr inbounds float* %tmp15487, i64 1
+  %tmp15489 = getelementptr inbounds float* %tmp15488, i64 1
+  %tmp15490 = getelementptr inbounds float* %tmp15489, i64 1
+  %tmp15491 = getelementptr inbounds float* %tmp15490, i64 1
+  %tmp15492 = getelementptr inbounds float* %tmp15491, i64 1
+  %tmp15493 = getelementptr inbounds float* %tmp15492, i64 1
+  %tmp15494 = getelementptr inbounds float* %tmp15493, i64 1
+  %tmp15495 = getelementptr inbounds float* %tmp15494, i64 1
+  %tmp15496 = getelementptr inbounds float* %tmp15495, i64 1
+  %tmp15497 = getelementptr inbounds float* %tmp15496, i64 1
+  %tmp15498 = getelementptr inbounds float* %tmp15497, i64 1
+  %tmp15499 = getelementptr inbounds float* %tmp15498, i64 1
+  %tmp15500 = getelementptr inbounds float* %tmp15499, i64 1
+  %tmp15501 = getelementptr inbounds float* %tmp15500, i64 1
+  %tmp15502 = getelementptr inbounds float* %tmp15501, i64 1
+  %tmp15503 = getelementptr inbounds float* %tmp15502, i64 1
+  %tmp15504 = getelementptr inbounds float* %tmp15503, i64 1
+  %tmp15505 = getelementptr inbounds float* %tmp15504, i64 1
+  %tmp15506 = getelementptr inbounds float* %tmp15505, i64 1
+  %tmp15507 = getelementptr inbounds float* %tmp15506, i64 1
+  %tmp15508 = getelementptr inbounds float* %tmp15507, i64 1
+  %tmp15509 = getelementptr inbounds float* %tmp15508, i64 1
+  %tmp15510 = getelementptr inbounds float* %tmp15509, i64 1
+  %tmp15511 = getelementptr inbounds float* %tmp15510, i64 1
+  %tmp15512 = getelementptr inbounds float* %tmp15511, i64 1
+  %tmp15513 = getelementptr inbounds float* %tmp15512, i64 1
+  %tmp15514 = getelementptr inbounds float* %tmp15513, i64 1
+  %tmp15515 = getelementptr inbounds float* %tmp15514, i64 1
+  %tmp15516 = getelementptr inbounds float* %tmp15515, i64 1
+  %tmp15517 = getelementptr inbounds float* %tmp15516, i64 1
+  %tmp15518 = getelementptr inbounds float* %tmp15517, i64 1
+  %tmp15519 = getelementptr inbounds float* %tmp15518, i64 1
+  %tmp15520 = getelementptr inbounds float* %tmp15519, i64 1
+  %tmp15521 = getelementptr inbounds float* %tmp15520, i64 1
+  %tmp15522 = getelementptr inbounds float* %tmp15521, i64 1
+  %tmp15523 = getelementptr inbounds float* %tmp15522, i64 1
+  %tmp15524 = getelementptr inbounds float* %tmp15523, i64 1
+  %tmp15525 = getelementptr inbounds float* %tmp15524, i64 1
+  %tmp15526 = getelementptr inbounds float* %tmp15525, i64 1
+  %tmp15527 = getelementptr inbounds float* %tmp15526, i64 1
+  %tmp15528 = getelementptr inbounds float* %tmp15527, i64 1
+  %tmp15529 = getelementptr inbounds float* %tmp15528, i64 1
+  %tmp15530 = getelementptr inbounds float* %tmp15529, i64 1
+  %tmp15531 = getelementptr inbounds float* %tmp15530, i64 1
+  %tmp15532 = getelementptr inbounds float* %tmp15531, i64 1
+  %tmp15533 = getelementptr inbounds float* %tmp15532, i64 1
+  %tmp15534 = getelementptr inbounds float* %tmp15533, i64 1
+  %tmp15535 = getelementptr inbounds float* %tmp15534, i64 1
+  %tmp15536 = getelementptr inbounds float* %tmp15535, i64 1
+  %tmp15537 = getelementptr inbounds float* %tmp15536, i64 1
+  %tmp15538 = getelementptr inbounds float* %tmp15537, i64 1
+  %tmp15539 = getelementptr inbounds float* %tmp15538, i64 1
+  %tmp15540 = getelementptr inbounds float* %tmp15539, i64 1
+  %tmp15541 = getelementptr inbounds float* %tmp15540, i64 1
+  %tmp15542 = getelementptr inbounds float* %tmp15541, i64 1
+  %tmp15543 = getelementptr inbounds float* %tmp15542, i64 1
+  %tmp15544 = getelementptr inbounds float* %tmp15543, i64 1
+  %tmp15545 = getelementptr inbounds float* %tmp15544, i64 1
+  %tmp15546 = getelementptr inbounds float* %tmp15545, i64 1
+  %tmp15547 = getelementptr inbounds float* %tmp15546, i64 1
+  %tmp15548 = getelementptr inbounds float* %tmp15547, i64 1
+  %tmp15549 = getelementptr inbounds float* %tmp15548, i64 1
+  %tmp15550 = getelementptr inbounds float* %tmp15549, i64 1
+  %tmp15551 = getelementptr inbounds float* %tmp15550, i64 1
+  %tmp15552 = getelementptr inbounds float* %tmp15551, i64 1
+  %tmp15553 = getelementptr inbounds float* %tmp15552, i64 1
+  %tmp15554 = getelementptr inbounds float* %tmp15553, i64 1
+  %tmp15555 = getelementptr inbounds float* %tmp15554, i64 1
+  %tmp15556 = getelementptr inbounds float* %tmp15555, i64 1
+  %tmp15557 = getelementptr inbounds float* %tmp15556, i64 1
+  %tmp15558 = getelementptr inbounds float* %tmp15557, i64 1
+  %tmp15559 = getelementptr inbounds float* %tmp15558, i64 1
+  %tmp15560 = getelementptr inbounds float* %tmp15559, i64 1
+  %tmp15561 = getelementptr inbounds float* %tmp15560, i64 1
+  %tmp15562 = getelementptr inbounds float* %tmp15561, i64 1
+  %tmp15563 = getelementptr inbounds float* %tmp15562, i64 1
+  %tmp15564 = getelementptr inbounds float* %tmp15563, i64 1
+  %tmp15565 = getelementptr inbounds float* %tmp15564, i64 1
+  %tmp15566 = getelementptr inbounds float* %tmp15565, i64 1
+  %tmp15567 = getelementptr inbounds float* %tmp15566, i64 1
+  %tmp15568 = getelementptr inbounds float* %tmp15567, i64 1
+  %tmp15569 = getelementptr inbounds float* %tmp15568, i64 1
+  %tmp15570 = getelementptr inbounds float* %tmp15569, i64 1
+  %tmp15571 = getelementptr inbounds float* %tmp15570, i64 1
+  %tmp15572 = getelementptr inbounds float* %tmp15571, i64 1
+  %tmp15573 = getelementptr inbounds float* %tmp15572, i64 1
+  %tmp15574 = getelementptr inbounds float* %tmp15573, i64 1
+  %tmp15575 = getelementptr inbounds float* %tmp15574, i64 1
+  %tmp15576 = getelementptr inbounds float* %tmp15575, i64 1
+  %tmp15577 = getelementptr inbounds float* %tmp15576, i64 1
+  %tmp15578 = getelementptr inbounds float* %tmp15577, i64 1
+  %tmp15579 = getelementptr inbounds float* %tmp15578, i64 1
+  %tmp15580 = getelementptr inbounds float* %tmp15579, i64 1
+  %tmp15581 = getelementptr inbounds float* %tmp15580, i64 1
+  %tmp15582 = getelementptr inbounds float* %tmp15581, i64 1
+  %tmp15583 = getelementptr inbounds float* %tmp15582, i64 1
+  %tmp15584 = getelementptr inbounds float* %tmp15583, i64 1
+  %tmp15585 = getelementptr inbounds float* %tmp15584, i64 1
+  %tmp15586 = getelementptr inbounds float* %tmp15585, i64 1
+  %tmp15587 = getelementptr inbounds float* %tmp15586, i64 1
+  %tmp15588 = getelementptr inbounds float* %tmp15587, i64 1
+  %tmp15589 = getelementptr inbounds float* %tmp15588, i64 1
+  %tmp15590 = getelementptr inbounds float* %tmp15589, i64 1
+  %tmp15591 = getelementptr inbounds float* %tmp15590, i64 1
+  %tmp15592 = getelementptr inbounds float* %tmp15591, i64 1
+  %tmp15593 = getelementptr inbounds float* %tmp15592, i64 1
+  %tmp15594 = getelementptr inbounds float* %tmp15593, i64 1
+  %tmp15595 = getelementptr inbounds float* %tmp15594, i64 1
+  %tmp15596 = getelementptr inbounds float* %tmp15595, i64 1
+  %tmp15597 = getelementptr inbounds float* %tmp15596, i64 1
+  %tmp15598 = getelementptr inbounds float* %tmp15597, i64 1
+  %tmp15599 = getelementptr inbounds float* %tmp15598, i64 1
+  %tmp15600 = getelementptr inbounds float* %tmp15599, i64 1
+  %tmp15601 = getelementptr inbounds float* %tmp15600, i64 1
+  %tmp15602 = getelementptr inbounds float* %tmp15601, i64 1
+  %tmp15603 = getelementptr inbounds float* %tmp15602, i64 1
+  %tmp15604 = getelementptr inbounds float* %tmp15603, i64 1
+  %tmp15605 = getelementptr inbounds float* %tmp15604, i64 1
+  %tmp15606 = getelementptr inbounds float* %tmp15605, i64 1
+  %tmp15607 = getelementptr inbounds float* %tmp15606, i64 1
+  %tmp15608 = getelementptr inbounds float* %tmp15607, i64 1
+  %tmp15609 = getelementptr inbounds float* %tmp15608, i64 1
+  %tmp15610 = getelementptr inbounds float* %tmp15609, i64 1
+  %tmp15611 = getelementptr inbounds float* %tmp15610, i64 1
+  %tmp15612 = getelementptr inbounds float* %tmp15611, i64 1
+  %tmp15613 = getelementptr inbounds float* %tmp15612, i64 1
+  %tmp15614 = getelementptr inbounds float* %tmp15613, i64 1
+  %tmp15615 = getelementptr inbounds float* %tmp15614, i64 1
+  %tmp15616 = getelementptr inbounds float* %tmp15615, i64 1
+  %tmp15617 = getelementptr inbounds float* %tmp15616, i64 1
+  %tmp15618 = getelementptr inbounds float* %tmp15617, i64 1
+  %tmp15619 = getelementptr inbounds float* %tmp15618, i64 1
+  %tmp15620 = getelementptr inbounds float* %tmp15619, i64 1
+  %tmp15621 = getelementptr inbounds float* %tmp15620, i64 1
+  %tmp15622 = getelementptr inbounds float* %tmp15621, i64 1
+  %tmp15623 = getelementptr inbounds float* %tmp15622, i64 1
+  %tmp15624 = getelementptr inbounds float* %tmp15623, i64 1
+  %tmp15625 = getelementptr inbounds float* %tmp15624, i64 1
+  %tmp15626 = getelementptr inbounds float* %tmp15625, i64 1
+  %tmp15627 = getelementptr inbounds float* %tmp15626, i64 1
+  %tmp15628 = getelementptr inbounds float* %tmp15627, i64 1
+  %tmp15629 = getelementptr inbounds float* %tmp15628, i64 1
+  %tmp15630 = getelementptr inbounds float* %tmp15629, i64 1
+  %tmp15631 = getelementptr inbounds float* %tmp15630, i64 1
+  %tmp15632 = getelementptr inbounds float* %tmp15631, i64 1
+  %tmp15633 = getelementptr inbounds float* %tmp15632, i64 1
+  %tmp15634 = getelementptr inbounds float* %tmp15633, i64 1
+  %tmp15635 = getelementptr inbounds float* %tmp15634, i64 1
+  %tmp15636 = getelementptr inbounds float* %tmp15635, i64 1
+  %tmp15637 = getelementptr inbounds float* %tmp15636, i64 1
+  %tmp15638 = getelementptr inbounds float* %tmp15637, i64 1
+  %tmp15639 = getelementptr inbounds float* %tmp15638, i64 1
+  %tmp15640 = getelementptr inbounds float* %tmp15639, i64 1
+  %tmp15641 = getelementptr inbounds float* %tmp15640, i64 1
+  %tmp15642 = getelementptr inbounds float* %tmp15641, i64 1
+  %tmp15643 = getelementptr inbounds float* %tmp15642, i64 1
+  %tmp15644 = getelementptr inbounds float* %tmp15643, i64 1
+  %tmp15645 = getelementptr inbounds float* %tmp15644, i64 1
+  %tmp15646 = getelementptr inbounds float* %tmp15645, i64 1
+  %tmp15647 = getelementptr inbounds float* %tmp15646, i64 1
+  %tmp15648 = getelementptr inbounds float* %tmp15647, i64 1
+  %tmp15649 = getelementptr inbounds float* %tmp15648, i64 1
+  %tmp15650 = getelementptr inbounds float* %tmp15649, i64 1
+  %tmp15651 = getelementptr inbounds float* %tmp15650, i64 1
+  %tmp15652 = getelementptr inbounds float* %tmp15651, i64 1
+  %tmp15653 = getelementptr inbounds float* %tmp15652, i64 1
+  %tmp15654 = getelementptr inbounds float* %tmp15653, i64 1
+  %tmp15655 = getelementptr inbounds float* %tmp15654, i64 1
+  %tmp15656 = getelementptr inbounds float* %tmp15655, i64 1
+  %tmp15657 = getelementptr inbounds float* %tmp15656, i64 1
+  %tmp15658 = getelementptr inbounds float* %tmp15657, i64 1
+  %tmp15659 = getelementptr inbounds float* %tmp15658, i64 1
+  %tmp15660 = getelementptr inbounds float* %tmp15659, i64 1
+  %tmp15661 = getelementptr inbounds float* %tmp15660, i64 1
+  %tmp15662 = getelementptr inbounds float* %tmp15661, i64 1
+  %tmp15663 = getelementptr inbounds float* %tmp15662, i64 1
+  %tmp15664 = getelementptr inbounds float* %tmp15663, i64 1
+  %tmp15665 = getelementptr inbounds float* %tmp15664, i64 1
+  %tmp15666 = getelementptr inbounds float* %tmp15665, i64 1
+  %tmp15667 = getelementptr inbounds float* %tmp15666, i64 1
+  %tmp15668 = getelementptr inbounds float* %tmp15667, i64 1
+  %tmp15669 = getelementptr inbounds float* %tmp15668, i64 1
+  %tmp15670 = getelementptr inbounds float* %tmp15669, i64 1
+  %tmp15671 = getelementptr inbounds float* %tmp15670, i64 1
+  %tmp15672 = getelementptr inbounds float* %tmp15671, i64 1
+  %tmp15673 = getelementptr inbounds float* %tmp15672, i64 1
+  %tmp15674 = getelementptr inbounds float* %tmp15673, i64 1
+  %tmp15675 = getelementptr inbounds float* %tmp15674, i64 1
+  %tmp15676 = getelementptr inbounds float* %tmp15675, i64 1
+  %tmp15677 = getelementptr inbounds float* %tmp15676, i64 1
+  %tmp15678 = getelementptr inbounds float* %tmp15677, i64 1
+  %tmp15679 = getelementptr inbounds float* %tmp15678, i64 1
+  %tmp15680 = getelementptr inbounds float* %tmp15679, i64 1
+  %tmp15681 = getelementptr inbounds float* %tmp15680, i64 1
+  %tmp15682 = getelementptr inbounds float* %tmp15681, i64 1
+  %tmp15683 = getelementptr inbounds float* %tmp15682, i64 1
+  %tmp15684 = getelementptr inbounds float* %tmp15683, i64 1
+  %tmp15685 = getelementptr inbounds float* %tmp15684, i64 1
+  %tmp15686 = getelementptr inbounds float* %tmp15685, i64 1
+  %tmp15687 = getelementptr inbounds float* %tmp15686, i64 1
+  %tmp15688 = getelementptr inbounds float* %tmp15687, i64 1
+  %tmp15689 = getelementptr inbounds float* %tmp15688, i64 1
+  %tmp15690 = getelementptr inbounds float* %tmp15689, i64 1
+  %tmp15691 = getelementptr inbounds float* %tmp15690, i64 1
+  %tmp15692 = getelementptr inbounds float* %tmp15691, i64 1
+  %tmp15693 = getelementptr inbounds float* %tmp15692, i64 1
+  %tmp15694 = getelementptr inbounds float* %tmp15693, i64 1
+  %tmp15695 = getelementptr inbounds float* %tmp15694, i64 1
+  %tmp15696 = getelementptr inbounds float* %tmp15695, i64 1
+  %tmp15697 = getelementptr inbounds float* %tmp15696, i64 1
+  %tmp15698 = getelementptr inbounds float* %tmp15697, i64 1
+  %tmp15699 = getelementptr inbounds float* %tmp15698, i64 1
+  %tmp15700 = getelementptr inbounds float* %tmp15699, i64 1
+  %tmp15701 = getelementptr inbounds float* %tmp15700, i64 1
+  %tmp15702 = getelementptr inbounds float* %tmp15701, i64 1
+  %tmp15703 = getelementptr inbounds float* %tmp15702, i64 1
+  %tmp15704 = getelementptr inbounds float* %tmp15703, i64 1
+  %tmp15705 = getelementptr inbounds float* %tmp15704, i64 1
+  %tmp15706 = getelementptr inbounds float* %tmp15705, i64 1
+  %tmp15707 = getelementptr inbounds float* %tmp15706, i64 1
+  %tmp15708 = getelementptr inbounds float* %tmp15707, i64 1
+  %tmp15709 = getelementptr inbounds float* %tmp15708, i64 1
+  %tmp15710 = getelementptr inbounds float* %tmp15709, i64 1
+  %tmp15711 = getelementptr inbounds float* %tmp15710, i64 1
+  %tmp15712 = getelementptr inbounds float* %tmp15711, i64 1
+  %tmp15713 = getelementptr inbounds float* %tmp15712, i64 1
+  %tmp15714 = getelementptr inbounds float* %tmp15713, i64 1
+  %tmp15715 = getelementptr inbounds float* %tmp15714, i64 1
+  %tmp15716 = getelementptr inbounds float* %tmp15715, i64 1
+  %tmp15717 = getelementptr inbounds float* %tmp15716, i64 1
+  %tmp15718 = getelementptr inbounds float* %tmp15717, i64 1
+  %tmp15719 = getelementptr inbounds float* %tmp15718, i64 1
+  %tmp15720 = getelementptr inbounds float* %tmp15719, i64 1
+  %tmp15721 = getelementptr inbounds float* %tmp15720, i64 1
+  %tmp15722 = getelementptr inbounds float* %tmp15721, i64 1
+  %tmp15723 = getelementptr inbounds float* %tmp15722, i64 1
+  %tmp15724 = getelementptr inbounds float* %tmp15723, i64 1
+  %tmp15725 = getelementptr inbounds float* %tmp15724, i64 1
+  %tmp15726 = getelementptr inbounds float* %tmp15725, i64 1
+  %tmp15727 = getelementptr inbounds float* %tmp15726, i64 1
+  %tmp15728 = getelementptr inbounds float* %tmp15727, i64 1
+  %tmp15729 = getelementptr inbounds float* %tmp15728, i64 1
+  %tmp15730 = getelementptr inbounds float* %tmp15729, i64 1
+  %tmp15731 = getelementptr inbounds float* %tmp15730, i64 1
+  %tmp15732 = getelementptr inbounds float* %tmp15731, i64 1
+  %tmp15733 = getelementptr inbounds float* %tmp15732, i64 1
+  %tmp15734 = getelementptr inbounds float* %tmp15733, i64 1
+  %tmp15735 = getelementptr inbounds float* %tmp15734, i64 1
+  %tmp15736 = getelementptr inbounds float* %tmp15735, i64 1
+  %tmp15737 = getelementptr inbounds float* %tmp15736, i64 1
+  %tmp15738 = getelementptr inbounds float* %tmp15737, i64 1
+  %tmp15739 = getelementptr inbounds float* %tmp15738, i64 1
+  %tmp15740 = getelementptr inbounds float* %tmp15739, i64 1
+  %tmp15741 = getelementptr inbounds float* %tmp15740, i64 1
+  %tmp15742 = getelementptr inbounds float* %tmp15741, i64 1
+  %tmp15743 = getelementptr inbounds float* %tmp15742, i64 1
+  %tmp15744 = getelementptr inbounds float* %tmp15743, i64 1
+  %tmp15745 = getelementptr inbounds float* %tmp15744, i64 1
+  %tmp15746 = getelementptr inbounds float* %tmp15745, i64 1
+  %tmp15747 = getelementptr inbounds float* %tmp15746, i64 1
+  %tmp15748 = getelementptr inbounds float* %tmp15747, i64 1
+  %tmp15749 = getelementptr inbounds float* %tmp15748, i64 1
+  %tmp15750 = getelementptr inbounds float* %tmp15749, i64 1
+  %tmp15751 = getelementptr inbounds float* %tmp15750, i64 1
+  %tmp15752 = getelementptr inbounds float* %tmp15751, i64 1
+  %tmp15753 = getelementptr inbounds float* %tmp15752, i64 1
+  %tmp15754 = getelementptr inbounds float* %tmp15753, i64 1
+  %tmp15755 = getelementptr inbounds float* %tmp15754, i64 1
+  %tmp15756 = getelementptr inbounds float* %tmp15755, i64 1
+  %tmp15757 = getelementptr inbounds float* %tmp15756, i64 1
+  %tmp15758 = getelementptr inbounds float* %tmp15757, i64 1
+  %tmp15759 = getelementptr inbounds float* %tmp15758, i64 1
+  %tmp15760 = getelementptr inbounds float* %tmp15759, i64 1
+  %tmp15761 = getelementptr inbounds float* %tmp15760, i64 1
+  %tmp15762 = getelementptr inbounds float* %tmp15761, i64 1
+  %tmp15763 = getelementptr inbounds float* %tmp15762, i64 1
+  %tmp15764 = getelementptr inbounds float* %tmp15763, i64 1
+  %tmp15765 = getelementptr inbounds float* %tmp15764, i64 1
+  %tmp15766 = getelementptr inbounds float* %tmp15765, i64 1
+  %tmp15767 = getelementptr inbounds float* %tmp15766, i64 1
+  %tmp15768 = getelementptr inbounds float* %tmp15767, i64 1
+  %tmp15769 = getelementptr inbounds float* %tmp15768, i64 1
+  %tmp15770 = getelementptr inbounds float* %tmp15769, i64 1
+  %tmp15771 = getelementptr inbounds float* %tmp15770, i64 1
+  %tmp15772 = getelementptr inbounds float* %tmp15771, i64 1
+  %tmp15773 = getelementptr inbounds float* %tmp15772, i64 1
+  %tmp15774 = getelementptr inbounds float* %tmp15773, i64 1
+  %tmp15775 = getelementptr inbounds float* %tmp15774, i64 1
+  %tmp15776 = getelementptr inbounds float* %tmp15775, i64 1
+  %tmp15777 = getelementptr inbounds float* %tmp15776, i64 1
+  %tmp15778 = getelementptr inbounds float* %tmp15777, i64 1
+  %tmp15779 = getelementptr inbounds float* %tmp15778, i64 1
+  %tmp15780 = getelementptr inbounds float* %tmp15779, i64 1
+  %tmp15781 = getelementptr inbounds float* %tmp15780, i64 1
+  %tmp15782 = getelementptr inbounds float* %tmp15781, i64 1
+  %tmp15783 = getelementptr inbounds float* %tmp15782, i64 1
+  %tmp15784 = getelementptr inbounds float* %tmp15783, i64 1
+  %tmp15785 = getelementptr inbounds float* %tmp15784, i64 1
+  %tmp15786 = getelementptr inbounds float* %tmp15785, i64 1
+  %tmp15787 = getelementptr inbounds float* %tmp15786, i64 1
+  %tmp15788 = getelementptr inbounds float* %tmp15787, i64 1
+  %tmp15789 = getelementptr inbounds float* %tmp15788, i64 1
+  %tmp15790 = getelementptr inbounds float* %tmp15789, i64 1
+  %tmp15791 = getelementptr inbounds float* %tmp15790, i64 1
+  %tmp15792 = getelementptr inbounds float* %tmp15791, i64 1
+  %tmp15793 = getelementptr inbounds float* %tmp15792, i64 1
+  %tmp15794 = getelementptr inbounds float* %tmp15793, i64 1
+  %tmp15795 = getelementptr inbounds float* %tmp15794, i64 1
+  %tmp15796 = getelementptr inbounds float* %tmp15795, i64 1
+  %tmp15797 = getelementptr inbounds float* %tmp15796, i64 1
+  %tmp15798 = getelementptr inbounds float* %tmp15797, i64 1
+  %tmp15799 = getelementptr inbounds float* %tmp15798, i64 1
+  %tmp15800 = getelementptr inbounds float* %tmp15799, i64 1
+  %tmp15801 = getelementptr inbounds float* %tmp15800, i64 1
+  %tmp15802 = getelementptr inbounds float* %tmp15801, i64 1
+  %tmp15803 = getelementptr inbounds float* %tmp15802, i64 1
+  %tmp15804 = getelementptr inbounds float* %tmp15803, i64 1
+  %tmp15805 = getelementptr inbounds float* %tmp15804, i64 1
+  %tmp15806 = getelementptr inbounds float* %tmp15805, i64 1
+  %tmp15807 = getelementptr inbounds float* %tmp15806, i64 1
+  %tmp15808 = getelementptr inbounds float* %tmp15807, i64 1
+  %tmp15809 = getelementptr inbounds float* %tmp15808, i64 1
+  %tmp15810 = getelementptr inbounds float* %tmp15809, i64 1
+  %tmp15811 = getelementptr inbounds float* %tmp15810, i64 1
+  %tmp15812 = getelementptr inbounds float* %tmp15811, i64 1
+  %tmp15813 = getelementptr inbounds float* %tmp15812, i64 1
+  %tmp15814 = getelementptr inbounds float* %tmp15813, i64 1
+  %tmp15815 = getelementptr inbounds float* %tmp15814, i64 1
+  %tmp15816 = getelementptr inbounds float* %tmp15815, i64 1
+  %tmp15817 = getelementptr inbounds float* %tmp15816, i64 1
+  %tmp15818 = getelementptr inbounds float* %tmp15817, i64 1
+  %tmp15819 = getelementptr inbounds float* %tmp15818, i64 1
+  %tmp15820 = getelementptr inbounds float* %tmp15819, i64 1
+  %tmp15821 = getelementptr inbounds float* %tmp15820, i64 1
+  %tmp15822 = getelementptr inbounds float* %tmp15821, i64 1
+  %tmp15823 = getelementptr inbounds float* %tmp15822, i64 1
+  %tmp15824 = getelementptr inbounds float* %tmp15823, i64 1
+  %tmp15825 = getelementptr inbounds float* %tmp15824, i64 1
+  %tmp15826 = getelementptr inbounds float* %tmp15825, i64 1
+  %tmp15827 = getelementptr inbounds float* %tmp15826, i64 1
+  %tmp15828 = getelementptr inbounds float* %tmp15827, i64 1
+  %tmp15829 = getelementptr inbounds float* %tmp15828, i64 1
+  %tmp15830 = getelementptr inbounds float* %tmp15829, i64 1
+  %tmp15831 = getelementptr inbounds float* %tmp15830, i64 1
+  %tmp15832 = getelementptr inbounds float* %tmp15831, i64 1
+  %tmp15833 = getelementptr inbounds float* %tmp15832, i64 1
+  %tmp15834 = getelementptr inbounds float* %tmp15833, i64 1
+  %tmp15835 = getelementptr inbounds float* %tmp15834, i64 1
+  %tmp15836 = getelementptr inbounds float* %tmp15835, i64 1
+  %tmp15837 = getelementptr inbounds float* %tmp15836, i64 1
+  %tmp15838 = getelementptr inbounds float* %tmp15837, i64 1
+  %tmp15839 = getelementptr inbounds float* %tmp15838, i64 1
+  %tmp15840 = getelementptr inbounds float* %tmp15839, i64 1
+  %tmp15841 = getelementptr inbounds float* %tmp15840, i64 1
+  %tmp15842 = getelementptr inbounds float* %tmp15841, i64 1
+  %tmp15843 = getelementptr inbounds float* %tmp15842, i64 1
+  %tmp15844 = getelementptr inbounds float* %tmp15843, i64 1
+  %tmp15845 = getelementptr inbounds float* %tmp15844, i64 1
+  %tmp15846 = getelementptr inbounds float* %tmp15845, i64 1
+  %tmp15847 = getelementptr inbounds float* %tmp15846, i64 1
+  %tmp15848 = getelementptr inbounds float* %tmp15847, i64 1
+  %tmp15849 = getelementptr inbounds float* %tmp15848, i64 1
+  %tmp15850 = getelementptr inbounds float* %tmp15849, i64 1
+  %tmp15851 = getelementptr inbounds float* %tmp15850, i64 1
+  %tmp15852 = getelementptr inbounds float* %tmp15851, i64 1
+  %tmp15853 = getelementptr inbounds float* %tmp15852, i64 1
+  %tmp15854 = getelementptr inbounds float* %tmp15853, i64 1
+  %tmp15855 = getelementptr inbounds float* %tmp15854, i64 1
+  %tmp15856 = getelementptr inbounds float* %tmp15855, i64 1
+  %tmp15857 = getelementptr inbounds float* %tmp15856, i64 1
+  %tmp15858 = getelementptr inbounds float* %tmp15857, i64 1
+  %tmp15859 = getelementptr inbounds float* %tmp15858, i64 1
+  %tmp15860 = getelementptr inbounds float* %tmp15859, i64 1
+  %tmp15861 = getelementptr inbounds float* %tmp15860, i64 1
+  %tmp15862 = getelementptr inbounds float* %tmp15861, i64 1
+  %tmp15863 = getelementptr inbounds float* %tmp15862, i64 1
+  %tmp15864 = getelementptr inbounds float* %tmp15863, i64 1
+  %tmp15865 = getelementptr inbounds float* %tmp15864, i64 1
+  %tmp15866 = getelementptr inbounds float* %tmp15865, i64 1
+  %tmp15867 = getelementptr inbounds float* %tmp15866, i64 1
+  %tmp15868 = getelementptr inbounds float* %tmp15867, i64 1
+  %tmp15869 = getelementptr inbounds float* %tmp15868, i64 1
+  %tmp15870 = getelementptr inbounds float* %tmp15869, i64 1
+  %tmp15871 = getelementptr inbounds float* %tmp15870, i64 1
+  %tmp15872 = getelementptr inbounds float* %tmp15871, i64 1
+  %tmp15873 = getelementptr inbounds float* %tmp15872, i64 1
+  %tmp15874 = getelementptr inbounds float* %tmp15873, i64 1
+  %tmp15875 = getelementptr inbounds float* %tmp15874, i64 1
+  %tmp15876 = getelementptr inbounds float* %tmp15875, i64 1
+  %tmp15877 = getelementptr inbounds float* %tmp15876, i64 1
+  %tmp15878 = getelementptr inbounds float* %tmp15877, i64 1
+  %tmp15879 = getelementptr inbounds float* %tmp15878, i64 1
+  %tmp15880 = getelementptr inbounds float* %tmp15879, i64 1
+  %tmp15881 = getelementptr inbounds float* %tmp15880, i64 1
+  %tmp15882 = getelementptr inbounds float* %tmp15881, i64 1
+  %tmp15883 = getelementptr inbounds float* %tmp15882, i64 1
+  %tmp15884 = getelementptr inbounds float* %tmp15883, i64 1
+  %tmp15885 = getelementptr inbounds float* %tmp15884, i64 1
+  %tmp15886 = getelementptr inbounds float* %tmp15885, i64 1
+  %tmp15887 = getelementptr inbounds float* %tmp15886, i64 1
+  %tmp15888 = getelementptr inbounds float* %tmp15887, i64 1
+  %tmp15889 = getelementptr inbounds float* %tmp15888, i64 1
+  %tmp15890 = getelementptr inbounds float* %tmp15889, i64 1
+  %tmp15891 = getelementptr inbounds float* %tmp15890, i64 1
+  %tmp15892 = getelementptr inbounds float* %tmp15891, i64 1
+  %tmp15893 = getelementptr inbounds float* %tmp15892, i64 1
+  %tmp15894 = getelementptr inbounds float* %tmp15893, i64 1
+  %tmp15895 = getelementptr inbounds float* %tmp15894, i64 1
+  %tmp15896 = getelementptr inbounds float* %tmp15895, i64 1
+  %tmp15897 = getelementptr inbounds float* %tmp15896, i64 1
+  %tmp15898 = getelementptr inbounds float* %tmp15897, i64 1
+  %tmp15899 = getelementptr inbounds float* %tmp15898, i64 1
+  %tmp15900 = getelementptr inbounds float* %tmp15899, i64 1
+  %tmp15901 = getelementptr inbounds float* %tmp15900, i64 1
+  %tmp15902 = getelementptr inbounds float* %tmp15901, i64 1
+  %tmp15903 = getelementptr inbounds float* %tmp15902, i64 1
+  %tmp15904 = getelementptr inbounds float* %tmp15903, i64 1
+  %tmp15905 = getelementptr inbounds float* %tmp15904, i64 1
+  %tmp15906 = getelementptr inbounds float* %tmp15905, i64 1
+  %tmp15907 = getelementptr inbounds float* %tmp15906, i64 1
+  %tmp15908 = getelementptr inbounds float* %tmp15907, i64 1
+  %tmp15909 = getelementptr inbounds float* %tmp15908, i64 1
+  %tmp15910 = getelementptr inbounds float* %tmp15909, i64 1
+  %tmp15911 = getelementptr inbounds float* %tmp15910, i64 1
+  %tmp15912 = getelementptr inbounds float* %tmp15911, i64 1
+  %tmp15913 = getelementptr inbounds float* %tmp15912, i64 1
+  %tmp15914 = getelementptr inbounds float* %tmp15913, i64 1
+  %tmp15915 = getelementptr inbounds float* %tmp15914, i64 1
+  %tmp15916 = getelementptr inbounds float* %tmp15915, i64 1
+  %tmp15917 = getelementptr inbounds float* %tmp15916, i64 1
+  %tmp15918 = getelementptr inbounds float* %tmp15917, i64 1
+  %tmp15919 = getelementptr inbounds float* %tmp15918, i64 1
+  %tmp15920 = getelementptr inbounds float* %tmp15919, i64 1
+  %tmp15921 = getelementptr inbounds float* %tmp15920, i64 1
+  %tmp15922 = getelementptr inbounds float* %tmp15921, i64 1
+  %tmp15923 = getelementptr inbounds float* %tmp15922, i64 1
+  %tmp15924 = getelementptr inbounds float* %tmp15923, i64 1
+  %tmp15925 = getelementptr inbounds float* %tmp15924, i64 1
+  %tmp15926 = getelementptr inbounds float* %tmp15925, i64 1
+  %tmp15927 = getelementptr inbounds float* %tmp15926, i64 1
+  %tmp15928 = getelementptr inbounds float* %tmp15927, i64 1
+  %tmp15929 = getelementptr inbounds float* %tmp15928, i64 1
+  %tmp15930 = getelementptr inbounds float* %tmp15929, i64 1
+  %tmp15931 = getelementptr inbounds float* %tmp15930, i64 1
+  %tmp15932 = getelementptr inbounds float* %tmp15931, i64 1
+  %tmp15933 = getelementptr inbounds float* %tmp15932, i64 1
+  %tmp15934 = getelementptr inbounds float* %tmp15933, i64 1
+  %tmp15935 = getelementptr inbounds float* %tmp15934, i64 1
+  %tmp15936 = getelementptr inbounds float* %tmp15935, i64 1
+  %tmp15937 = getelementptr inbounds float* %tmp15936, i64 1
+  %tmp15938 = getelementptr inbounds float* %tmp15937, i64 1
+  %tmp15939 = getelementptr inbounds float* %tmp15938, i64 1
+  %tmp15940 = getelementptr inbounds float* %tmp15939, i64 1
+  %tmp15941 = getelementptr inbounds float* %tmp15940, i64 1
+  %tmp15942 = getelementptr inbounds float* %tmp15941, i64 1
+  %tmp15943 = getelementptr inbounds float* %tmp15942, i64 1
+  %tmp15944 = getelementptr inbounds float* %tmp15943, i64 1
+  %tmp15945 = getelementptr inbounds float* %tmp15944, i64 1
+  %tmp15946 = getelementptr inbounds float* %tmp15945, i64 1
+  %tmp15947 = getelementptr inbounds float* %tmp15946, i64 1
+  %tmp15948 = getelementptr inbounds float* %tmp15947, i64 1
+  %tmp15949 = getelementptr inbounds float* %tmp15948, i64 1
+  %tmp15950 = getelementptr inbounds float* %tmp15949, i64 1
+  %tmp15951 = getelementptr inbounds float* %tmp15950, i64 1
+  %tmp15952 = getelementptr inbounds float* %tmp15951, i64 1
+  %tmp15953 = getelementptr inbounds float* %tmp15952, i64 1
+  %tmp15954 = getelementptr inbounds float* %tmp15953, i64 1
+  %tmp15955 = getelementptr inbounds float* %tmp15954, i64 1
+  %tmp15956 = getelementptr inbounds float* %tmp15955, i64 1
+  %tmp15957 = getelementptr inbounds float* %tmp15956, i64 1
+  %tmp15958 = getelementptr inbounds float* %tmp15957, i64 1
+  %tmp15959 = getelementptr inbounds float* %tmp15958, i64 1
+  %tmp15960 = getelementptr inbounds float* %tmp15959, i64 1
+  %tmp15961 = getelementptr inbounds float* %tmp15960, i64 1
+  %tmp15962 = getelementptr inbounds float* %tmp15961, i64 1
+  %tmp15963 = getelementptr inbounds float* %tmp15962, i64 1
+  %tmp15964 = getelementptr inbounds float* %tmp15963, i64 1
+  %tmp15965 = getelementptr inbounds float* %tmp15964, i64 1
+  %tmp15966 = getelementptr inbounds float* %tmp15965, i64 1
+  %tmp15967 = getelementptr inbounds float* %tmp15966, i64 1
+  %tmp15968 = getelementptr inbounds float* %tmp15967, i64 1
+  %tmp15969 = getelementptr inbounds float* %tmp15968, i64 1
+  %tmp15970 = getelementptr inbounds float* %tmp15969, i64 1
+  %tmp15971 = getelementptr inbounds float* %tmp15970, i64 1
+  %tmp15972 = getelementptr inbounds float* %tmp15971, i64 1
+  %tmp15973 = getelementptr inbounds float* %tmp15972, i64 1
+  %tmp15974 = getelementptr inbounds float* %tmp15973, i64 1
+  %tmp15975 = getelementptr inbounds float* %tmp15974, i64 1
+  %tmp15976 = getelementptr inbounds float* %tmp15975, i64 1
+  %tmp15977 = getelementptr inbounds float* %tmp15976, i64 1
+  %tmp15978 = getelementptr inbounds float* %tmp15977, i64 1
+  %tmp15979 = getelementptr inbounds float* %tmp15978, i64 1
+  %tmp15980 = getelementptr inbounds float* %tmp15979, i64 1
+  %tmp15981 = getelementptr inbounds float* %tmp15980, i64 1
+  %tmp15982 = getelementptr inbounds float* %tmp15981, i64 1
+  %tmp15983 = getelementptr inbounds float* %tmp15982, i64 1
+  %tmp15984 = getelementptr inbounds float* %tmp15983, i64 1
+  %tmp15985 = getelementptr inbounds float* %tmp15984, i64 1
+  %tmp15986 = getelementptr inbounds float* %tmp15985, i64 1
+  %tmp15987 = getelementptr inbounds float* %tmp15986, i64 1
+  %tmp15988 = getelementptr inbounds float* %tmp15987, i64 1
+  %tmp15989 = getelementptr inbounds float* %tmp15988, i64 1
+  %tmp15990 = getelementptr inbounds float* %tmp15989, i64 1
+  %tmp15991 = getelementptr inbounds float* %tmp15990, i64 1
+  %tmp15992 = getelementptr inbounds float* %tmp15991, i64 1
+  %tmp15993 = getelementptr inbounds float* %tmp15992, i64 1
+  %tmp15994 = getelementptr inbounds float* %tmp15993, i64 1
+  %tmp15995 = getelementptr inbounds float* %tmp15994, i64 1
+  %tmp15996 = getelementptr inbounds float* %tmp15995, i64 1
+  %tmp15997 = getelementptr inbounds float* %tmp15996, i64 1
+  %tmp15998 = getelementptr inbounds float* %tmp15997, i64 1
+  %tmp15999 = getelementptr inbounds float* %tmp15998, i64 1
+  %tmp16000 = getelementptr inbounds float* %tmp15999, i64 1
+  %tmp16001 = getelementptr inbounds float* %tmp16000, i64 1
+  %tmp16002 = getelementptr inbounds float* %tmp16001, i64 1
+  %tmp16003 = getelementptr inbounds float* %tmp16002, i64 1
+  %tmp16004 = getelementptr inbounds float* %tmp16003, i64 1
+  %tmp16005 = getelementptr inbounds float* %tmp16004, i64 1
+  %tmp16006 = getelementptr inbounds float* %tmp16005, i64 1
+  %tmp16007 = getelementptr inbounds float* %tmp16006, i64 1
+  %tmp16008 = getelementptr inbounds float* %tmp16007, i64 1
+  %tmp16009 = getelementptr inbounds float* %tmp16008, i64 1
+  %tmp16010 = getelementptr inbounds float* %tmp16009, i64 1
+  %tmp16011 = getelementptr inbounds float* %tmp16010, i64 1
+  %tmp16012 = getelementptr inbounds float* %tmp16011, i64 1
+  %tmp16013 = getelementptr inbounds float* %tmp16012, i64 1
+  %tmp16014 = getelementptr inbounds float* %tmp16013, i64 1
+  %tmp16015 = getelementptr inbounds float* %tmp16014, i64 1
+  %tmp16016 = getelementptr inbounds float* %tmp16015, i64 1
+  %tmp16017 = getelementptr inbounds float* %tmp16016, i64 1
+  %tmp16018 = getelementptr inbounds float* %tmp16017, i64 1
+  %tmp16019 = getelementptr inbounds float* %tmp16018, i64 1
+  %tmp16020 = getelementptr inbounds float* %tmp16019, i64 1
+  %tmp16021 = getelementptr inbounds float* %tmp16020, i64 1
+  %tmp16022 = getelementptr inbounds float* %tmp16021, i64 1
+  %tmp16023 = getelementptr inbounds float* %tmp16022, i64 1
+  %tmp16024 = getelementptr inbounds float* %tmp16023, i64 1
+  %tmp16025 = getelementptr inbounds float* %tmp16024, i64 1
+  %tmp16026 = getelementptr inbounds float* %tmp16025, i64 1
+  %tmp16027 = getelementptr inbounds float* %tmp16026, i64 1
+  %tmp16028 = getelementptr inbounds float* %tmp16027, i64 1
+  %tmp16029 = getelementptr inbounds float* %tmp16028, i64 1
+  %tmp16030 = getelementptr inbounds float* %tmp16029, i64 1
+  %tmp16031 = getelementptr inbounds float* %tmp16030, i64 1
+  %tmp16032 = getelementptr inbounds float* %tmp16031, i64 1
+  %tmp16033 = getelementptr inbounds float* %tmp16032, i64 1
+  %tmp16034 = getelementptr inbounds float* %tmp16033, i64 1
+  %tmp16035 = getelementptr inbounds float* %tmp16034, i64 1
+  %tmp16036 = getelementptr inbounds float* %tmp16035, i64 1
+  %tmp16037 = getelementptr inbounds float* %tmp16036, i64 1
+  %tmp16038 = getelementptr inbounds float* %tmp16037, i64 1
+  %tmp16039 = getelementptr inbounds float* %tmp16038, i64 1
+  %tmp16040 = getelementptr inbounds float* %tmp16039, i64 1
+  %tmp16041 = getelementptr inbounds float* %tmp16040, i64 1
+  %tmp16042 = getelementptr inbounds float* %tmp16041, i64 1
+  %tmp16043 = getelementptr inbounds float* %tmp16042, i64 1
+  %tmp16044 = getelementptr inbounds float* %tmp16043, i64 1
+  %tmp16045 = getelementptr inbounds float* %tmp16044, i64 1
+  %tmp16046 = getelementptr inbounds float* %tmp16045, i64 1
+  %tmp16047 = getelementptr inbounds float* %tmp16046, i64 1
+  %tmp16048 = getelementptr inbounds float* %tmp16047, i64 1
+  %tmp16049 = getelementptr inbounds float* %tmp16048, i64 1
+  %tmp16050 = getelementptr inbounds float* %tmp16049, i64 1
+  %tmp16051 = getelementptr inbounds float* %tmp16050, i64 1
+  %tmp16052 = getelementptr inbounds float* %tmp16051, i64 1
+  %tmp16053 = getelementptr inbounds float* %tmp16052, i64 1
+  %tmp16054 = getelementptr inbounds float* %tmp16053, i64 1
+  %tmp16055 = getelementptr inbounds float* %tmp16054, i64 1
+  %tmp16056 = getelementptr inbounds float* %tmp16055, i64 1
+  %tmp16057 = getelementptr inbounds float* %tmp16056, i64 1
+  %tmp16058 = getelementptr inbounds float* %tmp16057, i64 1
+  %tmp16059 = getelementptr inbounds float* %tmp16058, i64 1
+  %tmp16060 = getelementptr inbounds float* %tmp16059, i64 1
+  %tmp16061 = getelementptr inbounds float* %tmp16060, i64 1
+  %tmp16062 = getelementptr inbounds float* %tmp16061, i64 1
+  %tmp16063 = getelementptr inbounds float* %tmp16062, i64 1
+  %tmp16064 = getelementptr inbounds float* %tmp16063, i64 1
+  %tmp16065 = getelementptr inbounds float* %tmp16064, i64 1
+  %tmp16066 = getelementptr inbounds float* %tmp16065, i64 1
+  %tmp16067 = getelementptr inbounds float* %tmp16066, i64 1
+  %tmp16068 = getelementptr inbounds float* %tmp16067, i64 1
+  %tmp16069 = getelementptr inbounds float* %tmp16068, i64 1
+  %tmp16070 = getelementptr inbounds float* %tmp16069, i64 1
+  %tmp16071 = getelementptr inbounds float* %tmp16070, i64 1
+  %tmp16072 = getelementptr inbounds float* %tmp16071, i64 1
+  %tmp16073 = getelementptr inbounds float* %tmp16072, i64 1
+  %tmp16074 = getelementptr inbounds float* %tmp16073, i64 1
+  %tmp16075 = getelementptr inbounds float* %tmp16074, i64 1
+  %tmp16076 = getelementptr inbounds float* %tmp16075, i64 1
+  %tmp16077 = getelementptr inbounds float* %tmp16076, i64 1
+  %tmp16078 = getelementptr inbounds float* %tmp16077, i64 1
+  %tmp16079 = getelementptr inbounds float* %tmp16078, i64 1
+  %tmp16080 = getelementptr inbounds float* %tmp16079, i64 1
+  %tmp16081 = getelementptr inbounds float* %tmp16080, i64 1
+  %tmp16082 = getelementptr inbounds float* %tmp16081, i64 1
+  %tmp16083 = getelementptr inbounds float* %tmp16082, i64 1
+  %tmp16084 = getelementptr inbounds float* %tmp16083, i64 1
+  %tmp16085 = getelementptr inbounds float* %tmp16084, i64 1
+  %tmp16086 = getelementptr inbounds float* %tmp16085, i64 1
+  %tmp16087 = getelementptr inbounds float* %tmp16086, i64 1
+  %tmp16088 = getelementptr inbounds float* %tmp16087, i64 1
+  %tmp16089 = getelementptr inbounds float* %tmp16088, i64 1
+  %tmp16090 = getelementptr inbounds float* %tmp16089, i64 1
+  %tmp16091 = getelementptr inbounds float* %tmp16090, i64 1
+  %tmp16092 = getelementptr inbounds float* %tmp16091, i64 1
+  %tmp16093 = getelementptr inbounds float* %tmp16092, i64 1
+  %tmp16094 = getelementptr inbounds float* %tmp16093, i64 1
+  %tmp16095 = getelementptr inbounds float* %tmp16094, i64 1
+  %tmp16096 = getelementptr inbounds float* %tmp16095, i64 1
+  %tmp16097 = getelementptr inbounds float* %tmp16096, i64 1
+  %tmp16098 = getelementptr inbounds float* %tmp16097, i64 1
+  %tmp16099 = getelementptr inbounds float* %tmp16098, i64 1
+  %tmp16100 = getelementptr inbounds float* %tmp16099, i64 1
+  %tmp16101 = getelementptr inbounds float* %tmp16100, i64 1
+  %tmp16102 = getelementptr inbounds float* %tmp16101, i64 1
+  %tmp16103 = getelementptr inbounds float* %tmp16102, i64 1
+  %tmp16104 = getelementptr inbounds float* %tmp16103, i64 1
+  %tmp16105 = getelementptr inbounds float* %tmp16104, i64 1
+  %tmp16106 = getelementptr inbounds float* %tmp16105, i64 1
+  %tmp16107 = getelementptr inbounds float* %tmp16106, i64 1
+  %tmp16108 = getelementptr inbounds float* %tmp16107, i64 1
+  %tmp16109 = getelementptr inbounds float* %tmp16108, i64 1
+  %tmp16110 = getelementptr inbounds float* %tmp16109, i64 1
+  %tmp16111 = getelementptr inbounds float* %tmp16110, i64 1
+  %tmp16112 = getelementptr inbounds float* %tmp16111, i64 1
+  %tmp16113 = getelementptr inbounds float* %tmp16112, i64 1
+  %tmp16114 = getelementptr inbounds float* %tmp16113, i64 1
+  %tmp16115 = getelementptr inbounds float* %tmp16114, i64 1
+  %tmp16116 = getelementptr inbounds float* %tmp16115, i64 1
+  %tmp16117 = getelementptr inbounds float* %tmp16116, i64 1
+  %tmp16118 = getelementptr inbounds float* %tmp16117, i64 1
+  %tmp16119 = getelementptr inbounds float* %tmp16118, i64 1
+  %tmp16120 = getelementptr inbounds float* %tmp16119, i64 1
+  %tmp16121 = getelementptr inbounds float* %tmp16120, i64 1
+  %tmp16122 = getelementptr inbounds float* %tmp16121, i64 1
+  %tmp16123 = getelementptr inbounds float* %tmp16122, i64 1
+  %tmp16124 = getelementptr inbounds float* %tmp16123, i64 1
+  %tmp16125 = getelementptr inbounds float* %tmp16124, i64 1
+  %tmp16126 = getelementptr inbounds float* %tmp16125, i64 1
+  %tmp16127 = getelementptr inbounds float* %tmp16126, i64 1
+  %tmp16128 = getelementptr inbounds float* %tmp16127, i64 1
+  %tmp16129 = getelementptr inbounds float* %tmp16128, i64 1
+  %tmp16130 = getelementptr inbounds float* %tmp16129, i64 1
+  %tmp16131 = getelementptr inbounds float* %tmp16130, i64 1
+  %tmp16132 = getelementptr inbounds float* %tmp16131, i64 1
+  %tmp16133 = getelementptr inbounds float* %tmp16132, i64 1
+  %tmp16134 = getelementptr inbounds float* %tmp16133, i64 1
+  %tmp16135 = getelementptr inbounds float* %tmp16134, i64 1
+  %tmp16136 = getelementptr inbounds float* %tmp16135, i64 1
+  %tmp16137 = getelementptr inbounds float* %tmp16136, i64 1
+  %tmp16138 = getelementptr inbounds float* %tmp16137, i64 1
+  %tmp16139 = getelementptr inbounds float* %tmp16138, i64 1
+  %tmp16140 = getelementptr inbounds float* %tmp16139, i64 1
+  %tmp16141 = getelementptr inbounds float* %tmp16140, i64 1
+  %tmp16142 = getelementptr inbounds float* %tmp16141, i64 1
+  %tmp16143 = getelementptr inbounds float* %tmp16142, i64 1
+  %tmp16144 = getelementptr inbounds float* %tmp16143, i64 1
+  %tmp16145 = getelementptr inbounds float* %tmp16144, i64 1
+  %tmp16146 = getelementptr inbounds float* %tmp16145, i64 1
+  %tmp16147 = getelementptr inbounds float* %tmp16146, i64 1
+  %tmp16148 = getelementptr inbounds float* %tmp16147, i64 1
+  %tmp16149 = getelementptr inbounds float* %tmp16148, i64 1
+  %tmp16150 = getelementptr inbounds float* %tmp16149, i64 1
+  %tmp16151 = getelementptr inbounds float* %tmp16150, i64 1
+  %tmp16152 = getelementptr inbounds float* %tmp16151, i64 1
+  %tmp16153 = getelementptr inbounds float* %tmp16152, i64 1
+  %tmp16154 = getelementptr inbounds float* %tmp16153, i64 1
+  %tmp16155 = getelementptr inbounds float* %tmp16154, i64 1
+  %tmp16156 = getelementptr inbounds float* %tmp16155, i64 1
+  %tmp16157 = getelementptr inbounds float* %tmp16156, i64 1
+  %tmp16158 = getelementptr inbounds float* %tmp16157, i64 1
+  %tmp16159 = getelementptr inbounds float* %tmp16158, i64 1
+  %tmp16160 = getelementptr inbounds float* %tmp16159, i64 1
+  %tmp16161 = getelementptr inbounds float* %tmp16160, i64 1
+  %tmp16162 = getelementptr inbounds float* %tmp16161, i64 1
+  %tmp16163 = getelementptr inbounds float* %tmp16162, i64 1
+  %tmp16164 = getelementptr inbounds float* %tmp16163, i64 1
+  %tmp16165 = getelementptr inbounds float* %tmp16164, i64 1
+  %tmp16166 = getelementptr inbounds float* %tmp16165, i64 1
+  %tmp16167 = getelementptr inbounds float* %tmp16166, i64 1
+  %tmp16168 = getelementptr inbounds float* %tmp16167, i64 1
+  %tmp16169 = getelementptr inbounds float* %tmp16168, i64 1
+  %tmp16170 = getelementptr inbounds float* %tmp16169, i64 1
+  %tmp16171 = getelementptr inbounds float* %tmp16170, i64 1
+  %tmp16172 = getelementptr inbounds float* %tmp16171, i64 1
+  %tmp16173 = getelementptr inbounds float* %tmp16172, i64 1
+  %tmp16174 = getelementptr inbounds float* %tmp16173, i64 1
+  %tmp16175 = getelementptr inbounds float* %tmp16174, i64 1
+  %tmp16176 = getelementptr inbounds float* %tmp16175, i64 1
+  %tmp16177 = getelementptr inbounds float* %tmp16176, i64 1
+  %tmp16178 = getelementptr inbounds float* %tmp16177, i64 1
+  %tmp16179 = getelementptr inbounds float* %tmp16178, i64 1
+  %tmp16180 = getelementptr inbounds float* %tmp16179, i64 1
+  %tmp16181 = getelementptr inbounds float* %tmp16180, i64 1
+  %tmp16182 = getelementptr inbounds float* %tmp16181, i64 1
+  %tmp16183 = getelementptr inbounds float* %tmp16182, i64 1
+  %tmp16184 = getelementptr inbounds float* %tmp16183, i64 1
+  %tmp16185 = getelementptr inbounds float* %tmp16184, i64 1
+  %tmp16186 = getelementptr inbounds float* %tmp16185, i64 1
+  %tmp16187 = getelementptr inbounds float* %tmp16186, i64 1
+  %tmp16188 = getelementptr inbounds float* %tmp16187, i64 1
+  %tmp16189 = getelementptr inbounds float* %tmp16188, i64 1
+  %tmp16190 = getelementptr inbounds float* %tmp16189, i64 1
+  %tmp16191 = getelementptr inbounds float* %tmp16190, i64 1
+  %tmp16192 = getelementptr inbounds float* %tmp16191, i64 1
+  %tmp16193 = getelementptr inbounds float* %tmp16192, i64 1
+  %tmp16194 = getelementptr inbounds float* %tmp16193, i64 1
+  %tmp16195 = getelementptr inbounds float* %tmp16194, i64 1
+  %tmp16196 = getelementptr inbounds float* %tmp16195, i64 1
+  %tmp16197 = getelementptr inbounds float* %tmp16196, i64 1
+  %tmp16198 = getelementptr inbounds float* %tmp16197, i64 1
+  %tmp16199 = getelementptr inbounds float* %tmp16198, i64 1
+  %tmp16200 = getelementptr inbounds float* %tmp16199, i64 1
+  %tmp16201 = getelementptr inbounds float* %tmp16200, i64 1
+  %tmp16202 = getelementptr inbounds float* %tmp16201, i64 1
+  %tmp16203 = getelementptr inbounds float* %tmp16202, i64 1
+  %tmp16204 = getelementptr inbounds float* %tmp16203, i64 1
+  %tmp16205 = getelementptr inbounds float* %tmp16204, i64 1
+  %tmp16206 = getelementptr inbounds float* %tmp16205, i64 1
+  %tmp16207 = getelementptr inbounds float* %tmp16206, i64 1
+  %tmp16208 = getelementptr inbounds float* %tmp16207, i64 1
+  %tmp16209 = getelementptr inbounds float* %tmp16208, i64 1
+  %tmp16210 = getelementptr inbounds float* %tmp16209, i64 1
+  %tmp16211 = getelementptr inbounds float* %tmp16210, i64 1
+  %tmp16212 = getelementptr inbounds float* %tmp16211, i64 1
+  %tmp16213 = getelementptr inbounds float* %tmp16212, i64 1
+  %tmp16214 = getelementptr inbounds float* %tmp16213, i64 1
+  %tmp16215 = getelementptr inbounds float* %tmp16214, i64 1
+  %tmp16216 = getelementptr inbounds float* %tmp16215, i64 1
+  %tmp16217 = getelementptr inbounds float* %tmp16216, i64 1
+  %tmp16218 = getelementptr inbounds float* %tmp16217, i64 1
+  %tmp16219 = getelementptr inbounds float* %tmp16218, i64 1
+  %tmp16220 = getelementptr inbounds float* %tmp16219, i64 1
+  %tmp16221 = getelementptr inbounds float* %tmp16220, i64 1
+  %tmp16222 = getelementptr inbounds float* %tmp16221, i64 1
+  %tmp16223 = getelementptr inbounds float* %tmp16222, i64 1
+  %tmp16224 = getelementptr inbounds float* %tmp16223, i64 1
+  %tmp16225 = getelementptr inbounds float* %tmp16224, i64 1
+  %tmp16226 = getelementptr inbounds float* %tmp16225, i64 1
+  %tmp16227 = getelementptr inbounds float* %tmp16226, i64 1
+  %tmp16228 = getelementptr inbounds float* %tmp16227, i64 1
+  %tmp16229 = getelementptr inbounds float* %tmp16228, i64 1
+  %tmp16230 = getelementptr inbounds float* %tmp16229, i64 1
+  %tmp16231 = getelementptr inbounds float* %tmp16230, i64 1
+  %tmp16232 = getelementptr inbounds float* %tmp16231, i64 1
+  %tmp16233 = getelementptr inbounds float* %tmp16232, i64 1
+  %tmp16234 = getelementptr inbounds float* %tmp16233, i64 1
+  %tmp16235 = getelementptr inbounds float* %tmp16234, i64 1
+  %tmp16236 = getelementptr inbounds float* %tmp16235, i64 1
+  %tmp16237 = getelementptr inbounds float* %tmp16236, i64 1
+  %tmp16238 = getelementptr inbounds float* %tmp16237, i64 1
+  %tmp16239 = getelementptr inbounds float* %tmp16238, i64 1
+  %tmp16240 = getelementptr inbounds float* %tmp16239, i64 1
+  %tmp16241 = getelementptr inbounds float* %tmp16240, i64 1
+  %tmp16242 = getelementptr inbounds float* %tmp16241, i64 1
+  %tmp16243 = getelementptr inbounds float* %tmp16242, i64 1
+  %tmp16244 = getelementptr inbounds float* %tmp16243, i64 1
+  %tmp16245 = getelementptr inbounds float* %tmp16244, i64 1
+  %tmp16246 = getelementptr inbounds float* %tmp16245, i64 1
+  %tmp16247 = getelementptr inbounds float* %tmp16246, i64 1
+  %tmp16248 = getelementptr inbounds float* %tmp16247, i64 1
+  %tmp16249 = getelementptr inbounds float* %tmp16248, i64 1
+  %tmp16250 = getelementptr inbounds float* %tmp16249, i64 1
+  %tmp16251 = getelementptr inbounds float* %tmp16250, i64 1
+  %tmp16252 = getelementptr inbounds float* %tmp16251, i64 1
+  %tmp16253 = getelementptr inbounds float* %tmp16252, i64 1
+  %tmp16254 = getelementptr inbounds float* %tmp16253, i64 1
+  %tmp16255 = getelementptr inbounds float* %tmp16254, i64 1
+  %tmp16256 = getelementptr inbounds float* %tmp16255, i64 1
+  %tmp16257 = getelementptr inbounds float* %tmp16256, i64 1
+  %tmp16258 = getelementptr inbounds float* %tmp16257, i64 1
+  %tmp16259 = getelementptr inbounds float* %tmp16258, i64 1
+  %tmp16260 = getelementptr inbounds float* %tmp16259, i64 1
+  %tmp16261 = getelementptr inbounds float* %tmp16260, i64 1
+  %tmp16262 = getelementptr inbounds float* %tmp16261, i64 1
+  %tmp16263 = getelementptr inbounds float* %tmp16262, i64 1
+  %tmp16264 = getelementptr inbounds float* %tmp16263, i64 1
+  %tmp16265 = getelementptr inbounds float* %tmp16264, i64 1
+  %tmp16266 = getelementptr inbounds float* %tmp16265, i64 1
+  %tmp16267 = getelementptr inbounds float* %tmp16266, i64 1
+  %tmp16268 = getelementptr inbounds float* %tmp16267, i64 1
+  %tmp16269 = getelementptr inbounds float* %tmp16268, i64 1
+  %tmp16270 = getelementptr inbounds float* %tmp16269, i64 1
+  %tmp16271 = getelementptr inbounds float* %tmp16270, i64 1
+  %tmp16272 = getelementptr inbounds float* %tmp16271, i64 1
+  %tmp16273 = getelementptr inbounds float* %tmp16272, i64 1
+  %tmp16274 = getelementptr inbounds float* %tmp16273, i64 1
+  %tmp16275 = getelementptr inbounds float* %tmp16274, i64 1
+  %tmp16276 = getelementptr inbounds float* %tmp16275, i64 1
+  %tmp16277 = getelementptr inbounds float* %tmp16276, i64 1
+  %tmp16278 = getelementptr inbounds float* %tmp16277, i64 1
+  %tmp16279 = getelementptr inbounds float* %tmp16278, i64 1
+  %tmp16280 = getelementptr inbounds float* %tmp16279, i64 1
+  %tmp16281 = getelementptr inbounds float* %tmp16280, i64 1
+  %tmp16282 = getelementptr inbounds float* %tmp16281, i64 1
+  %tmp16283 = getelementptr inbounds float* %tmp16282, i64 1
+  %tmp16284 = getelementptr inbounds float* %tmp16283, i64 1
+  %tmp16285 = getelementptr inbounds float* %tmp16284, i64 1
+  %tmp16286 = getelementptr inbounds float* %tmp16285, i64 1
+  %tmp16287 = getelementptr inbounds float* %tmp16286, i64 1
+  %tmp16288 = getelementptr inbounds float* %tmp16287, i64 1
+  %tmp16289 = getelementptr inbounds float* %tmp16288, i64 1
+  %tmp16290 = getelementptr inbounds float* %tmp16289, i64 1
+  %tmp16291 = getelementptr inbounds float* %tmp16290, i64 1
+  %tmp16292 = getelementptr inbounds float* %tmp16291, i64 1
+  %tmp16293 = getelementptr inbounds float* %tmp16292, i64 1
+  %tmp16294 = getelementptr inbounds float* %tmp16293, i64 1
+  %tmp16295 = getelementptr inbounds float* %tmp16294, i64 1
+  %tmp16296 = getelementptr inbounds float* %tmp16295, i64 1
+  %tmp16297 = getelementptr inbounds float* %tmp16296, i64 1
+  %tmp16298 = getelementptr inbounds float* %tmp16297, i64 1
+  %tmp16299 = getelementptr inbounds float* %tmp16298, i64 1
+  %tmp16300 = getelementptr inbounds float* %tmp16299, i64 1
+  %tmp16301 = getelementptr inbounds float* %tmp16300, i64 1
+  %tmp16302 = getelementptr inbounds float* %tmp16301, i64 1
+  %tmp16303 = getelementptr inbounds float* %tmp16302, i64 1
+  %tmp16304 = getelementptr inbounds float* %tmp16303, i64 1
+  %tmp16305 = getelementptr inbounds float* %tmp16304, i64 1
+  %tmp16306 = getelementptr inbounds float* %tmp16305, i64 1
+  %tmp16307 = getelementptr inbounds float* %tmp16306, i64 1
+  %tmp16308 = getelementptr inbounds float* %tmp16307, i64 1
+  %tmp16309 = getelementptr inbounds float* %tmp16308, i64 1
+  %tmp16310 = getelementptr inbounds float* %tmp16309, i64 1
+  %tmp16311 = getelementptr inbounds float* %tmp16310, i64 1
+  %tmp16312 = getelementptr inbounds float* %tmp16311, i64 1
+  %tmp16313 = getelementptr inbounds float* %tmp16312, i64 1
+  %tmp16314 = getelementptr inbounds float* %tmp16313, i64 1
+  %tmp16315 = getelementptr inbounds float* %tmp16314, i64 1
+  %tmp16316 = getelementptr inbounds float* %tmp16315, i64 1
+  %tmp16317 = getelementptr inbounds float* %tmp16316, i64 1
+  %tmp16318 = getelementptr inbounds float* %tmp16317, i64 1
+  %tmp16319 = getelementptr inbounds float* %tmp16318, i64 1
+  %tmp16320 = getelementptr inbounds float* %tmp16319, i64 1
+  %tmp16321 = getelementptr inbounds float* %tmp16320, i64 1
+  %tmp16322 = getelementptr inbounds float* %tmp16321, i64 1
+  %tmp16323 = getelementptr inbounds float* %tmp16322, i64 1
+  %tmp16324 = getelementptr inbounds float* %tmp16323, i64 1
+  %tmp16325 = getelementptr inbounds float* %tmp16324, i64 1
+  %tmp16326 = getelementptr inbounds float* %tmp16325, i64 1
+  %tmp16327 = getelementptr inbounds float* %tmp16326, i64 1
+  %tmp16328 = getelementptr inbounds float* %tmp16327, i64 1
+  %tmp16329 = getelementptr inbounds float* %tmp16328, i64 1
+  %tmp16330 = getelementptr inbounds float* %tmp16329, i64 1
+  %tmp16331 = getelementptr inbounds float* %tmp16330, i64 1
+  %tmp16332 = getelementptr inbounds float* %tmp16331, i64 1
+  %tmp16333 = getelementptr inbounds float* %tmp16332, i64 1
+  %tmp16334 = getelementptr inbounds float* %tmp16333, i64 1
+  %tmp16335 = getelementptr inbounds float* %tmp16334, i64 1
+  %tmp16336 = getelementptr inbounds float* %tmp16335, i64 1
+  %tmp16337 = getelementptr inbounds float* %tmp16336, i64 1
+  %tmp16338 = getelementptr inbounds float* %tmp16337, i64 1
+  %tmp16339 = getelementptr inbounds float* %tmp16338, i64 1
+  %tmp16340 = getelementptr inbounds float* %tmp16339, i64 1
+  %tmp16341 = getelementptr inbounds float* %tmp16340, i64 1
+  %tmp16342 = getelementptr inbounds float* %tmp16341, i64 1
+  %tmp16343 = getelementptr inbounds float* %tmp16342, i64 1
+  %tmp16344 = getelementptr inbounds float* %tmp16343, i64 1
+  %tmp16345 = getelementptr inbounds float* %tmp16344, i64 1
+  %tmp16346 = getelementptr inbounds float* %tmp16345, i64 1
+  %tmp16347 = getelementptr inbounds float* %tmp16346, i64 1
+  %tmp16348 = getelementptr inbounds float* %tmp16347, i64 1
+  %tmp16349 = getelementptr inbounds float* %tmp16348, i64 1
+  %tmp16350 = getelementptr inbounds float* %tmp16349, i64 1
+  %tmp16351 = getelementptr inbounds float* %tmp16350, i64 1
+  %tmp16352 = getelementptr inbounds float* %tmp16351, i64 1
+  %tmp16353 = getelementptr inbounds float* %tmp16352, i64 1
+  %tmp16354 = getelementptr inbounds float* %tmp16353, i64 1
+  %tmp16355 = getelementptr inbounds float* %tmp16354, i64 1
+  %tmp16356 = getelementptr inbounds float* %tmp16355, i64 1
+  %tmp16357 = getelementptr inbounds float* %tmp16356, i64 1
+  %tmp16358 = getelementptr inbounds float* %tmp16357, i64 1
+  %tmp16359 = getelementptr inbounds float* %tmp16358, i64 1
+  %tmp16360 = getelementptr inbounds float* %tmp16359, i64 1
+  %tmp16361 = getelementptr inbounds float* %tmp16360, i64 1
+  %tmp16362 = getelementptr inbounds float* %tmp16361, i64 1
+  %tmp16363 = getelementptr inbounds float* %tmp16362, i64 1
+  %tmp16364 = getelementptr inbounds float* %tmp16363, i64 1
+  %tmp16365 = getelementptr inbounds float* %tmp16364, i64 1
+  %tmp16366 = getelementptr inbounds float* %tmp16365, i64 1
+  %tmp16367 = getelementptr inbounds float* %tmp16366, i64 1
+  %tmp16368 = getelementptr inbounds float* %tmp16367, i64 1
+  %tmp16369 = getelementptr inbounds float* %tmp16368, i64 1
+  %tmp16370 = getelementptr inbounds float* %tmp16369, i64 1
+  %tmp16371 = getelementptr inbounds float* %tmp16370, i64 1
+  %tmp16372 = getelementptr inbounds float* %tmp16371, i64 1
+  %tmp16373 = getelementptr inbounds float* %tmp16372, i64 1
+  %tmp16374 = getelementptr inbounds float* %tmp16373, i64 1
+  %tmp16375 = getelementptr inbounds float* %tmp16374, i64 1
+  %tmp16376 = getelementptr inbounds float* %tmp16375, i64 1
+  %tmp16377 = getelementptr inbounds float* %tmp16376, i64 1
+  %tmp16378 = getelementptr inbounds float* %tmp16377, i64 1
+  %tmp16379 = getelementptr inbounds float* %tmp16378, i64 1
+  %tmp16380 = getelementptr inbounds float* %tmp16379, i64 1
+  %tmp16381 = getelementptr inbounds float* %tmp16380, i64 1
+  %tmp16382 = getelementptr inbounds float* %tmp16381, i64 1
+  %tmp16383 = getelementptr inbounds float* %tmp16382, i64 1
+  %tmp16384 = getelementptr inbounds float* %tmp16383, i64 1
+  %tmp16385 = getelementptr inbounds float* %tmp16384, i64 1
+  %tmp16386 = getelementptr inbounds float* %tmp16385, i64 1
+  %tmp16387 = getelementptr inbounds float* %tmp16386, i64 1
+  %tmp16388 = getelementptr inbounds float* %tmp16387, i64 1
+  %tmp16389 = getelementptr inbounds float* %tmp16388, i64 1
+  %tmp16390 = getelementptr inbounds float* %tmp16389, i64 1
+  %tmp16391 = getelementptr inbounds float* %tmp16390, i64 1
+  %tmp16392 = getelementptr inbounds float* %tmp16391, i64 1
+  %tmp16393 = getelementptr inbounds float* %tmp16392, i64 1
+  %tmp16394 = getelementptr inbounds float* %tmp16393, i64 1
+  %tmp16395 = getelementptr inbounds float* %tmp16394, i64 1
+  %tmp16396 = getelementptr inbounds float* %tmp16395, i64 1
+  %tmp16397 = getelementptr inbounds float* %tmp16396, i64 1
+  %tmp16398 = getelementptr inbounds float* %tmp16397, i64 1
+  %tmp16399 = getelementptr inbounds float* %tmp16398, i64 1
+  %tmp16400 = getelementptr inbounds float* %tmp16399, i64 1
+  %tmp16401 = getelementptr inbounds float* %tmp16400, i64 1
+  %tmp16402 = getelementptr inbounds float* %tmp16401, i64 1
+  %tmp16403 = getelementptr inbounds float* %tmp16402, i64 1
+  %tmp16404 = getelementptr inbounds float* %tmp16403, i64 1
+  %tmp16405 = getelementptr inbounds float* %tmp16404, i64 1
+  %tmp16406 = getelementptr inbounds float* %tmp16405, i64 1
+  %tmp16407 = getelementptr inbounds float* %tmp16406, i64 1
+  %tmp16408 = getelementptr inbounds float* %tmp16407, i64 1
+  %tmp16409 = getelementptr inbounds float* %tmp16408, i64 1
+  %tmp16410 = getelementptr inbounds float* %tmp16409, i64 1
+  %tmp16411 = getelementptr inbounds float* %tmp16410, i64 1
+  %tmp16412 = getelementptr inbounds float* %tmp16411, i64 1
+  %tmp16413 = getelementptr inbounds float* %tmp16412, i64 1
+  %tmp16414 = getelementptr inbounds float* %tmp16413, i64 1
+  %tmp16415 = getelementptr inbounds float* %tmp16414, i64 1
+  %tmp16416 = getelementptr inbounds float* %tmp16415, i64 1
+  %tmp16417 = getelementptr inbounds float* %tmp16416, i64 1
+  %tmp16418 = getelementptr inbounds float* %tmp16417, i64 1
+  %tmp16419 = getelementptr inbounds float* %tmp16418, i64 1
+  %tmp16420 = getelementptr inbounds float* %tmp16419, i64 1
+  %tmp16421 = getelementptr inbounds float* %tmp16420, i64 1
+  %tmp16422 = getelementptr inbounds float* %tmp16421, i64 1
+  %tmp16423 = getelementptr inbounds float* %tmp16422, i64 1
+  %tmp16424 = getelementptr inbounds float* %tmp16423, i64 1
+  %tmp16425 = getelementptr inbounds float* %tmp16424, i64 1
+  %tmp16426 = getelementptr inbounds float* %tmp16425, i64 1
+  %tmp16427 = getelementptr inbounds float* %tmp16426, i64 1
+  %tmp16428 = getelementptr inbounds float* %tmp16427, i64 1
+  %tmp16429 = getelementptr inbounds float* %tmp16428, i64 1
+  %tmp16430 = getelementptr inbounds float* %tmp16429, i64 1
+  %tmp16431 = getelementptr inbounds float* %tmp16430, i64 1
+  %tmp16432 = getelementptr inbounds float* %tmp16431, i64 1
+  %tmp16433 = getelementptr inbounds float* %tmp16432, i64 1
+  %tmp16434 = getelementptr inbounds float* %tmp16433, i64 1
+  %tmp16435 = getelementptr inbounds float* %tmp16434, i64 1
+  %tmp16436 = getelementptr inbounds float* %tmp16435, i64 1
+  %tmp16437 = getelementptr inbounds float* %tmp16436, i64 1
+  %tmp16438 = getelementptr inbounds float* %tmp16437, i64 1
+  %tmp16439 = getelementptr inbounds float* %tmp16438, i64 1
+  %tmp16440 = getelementptr inbounds float* %tmp16439, i64 1
+  %tmp16441 = getelementptr inbounds float* %tmp16440, i64 1
+  %tmp16442 = getelementptr inbounds float* %tmp16441, i64 1
+  %tmp16443 = getelementptr inbounds float* %tmp16442, i64 1
+  %tmp16444 = getelementptr inbounds float* %tmp16443, i64 1
+  %tmp16445 = getelementptr inbounds float* %tmp16444, i64 1
+  %tmp16446 = getelementptr inbounds float* %tmp16445, i64 1
+  %tmp16447 = getelementptr inbounds float* %tmp16446, i64 1
+  %tmp16448 = getelementptr inbounds float* %tmp16447, i64 1
+  %tmp16449 = getelementptr inbounds float* %tmp16448, i64 1
+  %tmp16450 = getelementptr inbounds float* %tmp16449, i64 1
+  %tmp16451 = getelementptr inbounds float* %tmp16450, i64 1
+  %tmp16452 = getelementptr inbounds float* %tmp16451, i64 1
+  %tmp16453 = getelementptr inbounds float* %tmp16452, i64 1
+  %tmp16454 = getelementptr inbounds float* %tmp16453, i64 1
+  %tmp16455 = getelementptr inbounds float* %tmp16454, i64 1
+  %tmp16456 = getelementptr inbounds float* %tmp16455, i64 1
+  %tmp16457 = getelementptr inbounds float* %tmp16456, i64 1
+  %tmp16458 = getelementptr inbounds float* %tmp16457, i64 1
+  %tmp16459 = getelementptr inbounds float* %tmp16458, i64 1
+  %tmp16460 = getelementptr inbounds float* %tmp16459, i64 1
+  %tmp16461 = getelementptr inbounds float* %tmp16460, i64 1
+  %tmp16462 = getelementptr inbounds float* %tmp16461, i64 1
+  %tmp16463 = getelementptr inbounds float* %tmp16462, i64 1
+  %tmp16464 = getelementptr inbounds float* %tmp16463, i64 1
+  %tmp16465 = getelementptr inbounds float* %tmp16464, i64 1
+  %tmp16466 = getelementptr inbounds float* %tmp16465, i64 1
+  %tmp16467 = getelementptr inbounds float* %tmp16466, i64 1
+  %tmp16468 = getelementptr inbounds float* %tmp16467, i64 1
+  %tmp16469 = getelementptr inbounds float* %tmp16468, i64 1
+  %tmp16470 = getelementptr inbounds float* %tmp16469, i64 1
+  %tmp16471 = getelementptr inbounds float* %tmp16470, i64 1
+  %tmp16472 = getelementptr inbounds float* %tmp16471, i64 1
+  %tmp16473 = getelementptr inbounds float* %tmp16472, i64 1
+  %tmp16474 = getelementptr inbounds float* %tmp16473, i64 1
+  %tmp16475 = getelementptr inbounds float* %tmp16474, i64 1
+  %tmp16476 = getelementptr inbounds float* %tmp16475, i64 1
+  %tmp16477 = getelementptr inbounds float* %tmp16476, i64 1
+  %tmp16478 = getelementptr inbounds float* %tmp16477, i64 1
+  %tmp16479 = getelementptr inbounds float* %tmp16478, i64 1
+  %tmp16480 = getelementptr inbounds float* %tmp16479, i64 1
+  %tmp16481 = getelementptr inbounds float* %tmp16480, i64 1
+  %tmp16482 = getelementptr inbounds float* %tmp16481, i64 1
+  %tmp16483 = getelementptr inbounds float* %tmp16482, i64 1
+  %tmp16484 = getelementptr inbounds float* %tmp16483, i64 1
+  %tmp16485 = getelementptr inbounds float* %tmp16484, i64 1
+  %tmp16486 = getelementptr inbounds float* %tmp16485, i64 1
+  %tmp16487 = getelementptr inbounds float* %tmp16486, i64 1
+  %tmp16488 = getelementptr inbounds float* %tmp16487, i64 1
+  %tmp16489 = getelementptr inbounds float* %tmp16488, i64 1
+  %tmp16490 = getelementptr inbounds float* %tmp16489, i64 1
+  %tmp16491 = getelementptr inbounds float* %tmp16490, i64 1
+  %tmp16492 = getelementptr inbounds float* %tmp16491, i64 1
+  %tmp16493 = getelementptr inbounds float* %tmp16492, i64 1
+  %tmp16494 = getelementptr inbounds float* %tmp16493, i64 1
+  %tmp16495 = getelementptr inbounds float* %tmp16494, i64 1
+  %tmp16496 = getelementptr inbounds float* %tmp16495, i64 1
+  %tmp16497 = getelementptr inbounds float* %tmp16496, i64 1
+  %tmp16498 = getelementptr inbounds float* %tmp16497, i64 1
+  %tmp16499 = getelementptr inbounds float* %tmp16498, i64 1
+  %tmp16500 = getelementptr inbounds float* %tmp16499, i64 1
+  %tmp16501 = getelementptr inbounds float* %tmp16500, i64 1
+  %tmp16502 = getelementptr inbounds float* %tmp16501, i64 1
+  %tmp16503 = getelementptr inbounds float* %tmp16502, i64 1
+  %tmp16504 = getelementptr inbounds float* %tmp16503, i64 1
+  %tmp16505 = getelementptr inbounds float* %tmp16504, i64 1
+  %tmp16506 = getelementptr inbounds float* %tmp16505, i64 1
+  %tmp16507 = getelementptr inbounds float* %tmp16506, i64 1
+  %tmp16508 = getelementptr inbounds float* %tmp16507, i64 1
+  %tmp16509 = getelementptr inbounds float* %tmp16508, i64 1
+  %tmp16510 = getelementptr inbounds float* %tmp16509, i64 1
+  %tmp16511 = getelementptr inbounds float* %tmp16510, i64 1
+  %tmp16512 = getelementptr inbounds float* %tmp16511, i64 1
+  %tmp16513 = getelementptr inbounds float* %tmp16512, i64 1
+  %tmp16514 = getelementptr inbounds float* %tmp16513, i64 1
+  %tmp16515 = getelementptr inbounds float* %tmp16514, i64 1
+  %tmp16516 = getelementptr inbounds float* %tmp16515, i64 1
+  %tmp16517 = getelementptr inbounds float* %tmp16516, i64 1
+  %tmp16518 = getelementptr inbounds float* %tmp16517, i64 1
+  %tmp16519 = getelementptr inbounds float* %tmp16518, i64 1
+  %tmp16520 = getelementptr inbounds float* %tmp16519, i64 1
+  %tmp16521 = getelementptr inbounds float* %tmp16520, i64 1
+  %tmp16522 = getelementptr inbounds float* %tmp16521, i64 1
+  %tmp16523 = getelementptr inbounds float* %tmp16522, i64 1
+  %tmp16524 = getelementptr inbounds float* %tmp16523, i64 1
+  %tmp16525 = getelementptr inbounds float* %tmp16524, i64 1
+  %tmp16526 = getelementptr inbounds float* %tmp16525, i64 1
+  %tmp16527 = getelementptr inbounds float* %tmp16526, i64 1
+  %tmp16528 = getelementptr inbounds float* %tmp16527, i64 1
+  %tmp16529 = getelementptr inbounds float* %tmp16528, i64 1
+  %tmp16530 = getelementptr inbounds float* %tmp16529, i64 1
+  %tmp16531 = getelementptr inbounds float* %tmp16530, i64 1
+  %tmp16532 = getelementptr inbounds float* %tmp16531, i64 1
+  %tmp16533 = getelementptr inbounds float* %tmp16532, i64 1
+  %tmp16534 = getelementptr inbounds float* %tmp16533, i64 1
+  %tmp16535 = getelementptr inbounds float* %tmp16534, i64 1
+  %tmp16536 = getelementptr inbounds float* %tmp16535, i64 1
+  %tmp16537 = getelementptr inbounds float* %tmp16536, i64 1
+  %tmp16538 = getelementptr inbounds float* %tmp16537, i64 1
+  %tmp16539 = getelementptr inbounds float* %tmp16538, i64 1
+  %tmp16540 = getelementptr inbounds float* %tmp16539, i64 1
+  %tmp16541 = getelementptr inbounds float* %tmp16540, i64 1
+  %tmp16542 = getelementptr inbounds float* %tmp16541, i64 1
+  %tmp16543 = getelementptr inbounds float* %tmp16542, i64 1
+  %tmp16544 = getelementptr inbounds float* %tmp16543, i64 1
+  %tmp16545 = getelementptr inbounds float* %tmp16544, i64 1
+  %tmp16546 = getelementptr inbounds float* %tmp16545, i64 1
+  %tmp16547 = getelementptr inbounds float* %tmp16546, i64 1
+  %tmp16548 = getelementptr inbounds float* %tmp16547, i64 1
+  %tmp16549 = getelementptr inbounds float* %tmp16548, i64 1
+  %tmp16550 = getelementptr inbounds float* %tmp16549, i64 1
+  %tmp16551 = getelementptr inbounds float* %tmp16550, i64 1
+  %tmp16552 = getelementptr inbounds float* %tmp16551, i64 1
+  %tmp16553 = getelementptr inbounds float* %tmp16552, i64 1
+  %tmp16554 = getelementptr inbounds float* %tmp16553, i64 1
+  %tmp16555 = getelementptr inbounds float* %tmp16554, i64 1
+  %tmp16556 = getelementptr inbounds float* %tmp16555, i64 1
+  %tmp16557 = getelementptr inbounds float* %tmp16556, i64 1
+  %tmp16558 = getelementptr inbounds float* %tmp16557, i64 1
+  %tmp16559 = getelementptr inbounds float* %tmp16558, i64 1
+  %tmp16560 = getelementptr inbounds float* %tmp16559, i64 1
+  %tmp16561 = getelementptr inbounds float* %tmp16560, i64 1
+  %tmp16562 = getelementptr inbounds float* %tmp16561, i64 1
+  %tmp16563 = getelementptr inbounds float* %tmp16562, i64 1
+  %tmp16564 = getelementptr inbounds float* %tmp16563, i64 1
+  %tmp16565 = getelementptr inbounds float* %tmp16564, i64 1
+  %tmp16566 = getelementptr inbounds float* %tmp16565, i64 1
+  %tmp16567 = getelementptr inbounds float* %tmp16566, i64 1
+  %tmp16568 = getelementptr inbounds float* %tmp16567, i64 1
+  %tmp16569 = getelementptr inbounds float* %tmp16568, i64 1
+  %tmp16570 = getelementptr inbounds float* %tmp16569, i64 1
+  %tmp16571 = getelementptr inbounds float* %tmp16570, i64 1
+  %tmp16572 = getelementptr inbounds float* %tmp16571, i64 1
+  %tmp16573 = getelementptr inbounds float* %tmp16572, i64 1
+  %tmp16574 = getelementptr inbounds float* %tmp16573, i64 1
+  %tmp16575 = getelementptr inbounds float* %tmp16574, i64 1
+  %tmp16576 = getelementptr inbounds float* %tmp16575, i64 1
+  %tmp16577 = getelementptr inbounds float* %tmp16576, i64 1
+  %tmp16578 = getelementptr inbounds float* %tmp16577, i64 1
+  %tmp16579 = getelementptr inbounds float* %tmp16578, i64 1
+  %tmp16580 = getelementptr inbounds float* %tmp16579, i64 1
+  %tmp16581 = getelementptr inbounds float* %tmp16580, i64 1
+  %tmp16582 = getelementptr inbounds float* %tmp16581, i64 1
+  %tmp16583 = getelementptr inbounds float* %tmp16582, i64 1
+  %tmp16584 = getelementptr inbounds float* %tmp16583, i64 1
+  %tmp16585 = getelementptr inbounds float* %tmp16584, i64 1
+  %tmp16586 = getelementptr inbounds float* %tmp16585, i64 1
+  %tmp16587 = getelementptr inbounds float* %tmp16586, i64 1
+  %tmp16588 = getelementptr inbounds float* %tmp16587, i64 1
+  %tmp16589 = getelementptr inbounds float* %tmp16588, i64 1
+  %tmp16590 = getelementptr inbounds float* %tmp16589, i64 1
+  %tmp16591 = getelementptr inbounds float* %tmp16590, i64 1
+  %tmp16592 = getelementptr inbounds float* %tmp16591, i64 1
+  %tmp16593 = getelementptr inbounds float* %tmp16592, i64 1
+  %tmp16594 = getelementptr inbounds float* %tmp16593, i64 1
+  %tmp16595 = getelementptr inbounds float* %tmp16594, i64 1
+  %tmp16596 = getelementptr inbounds float* %tmp16595, i64 1
+  %tmp16597 = getelementptr inbounds float* %tmp16596, i64 1
+  %tmp16598 = getelementptr inbounds float* %tmp16597, i64 1
+  %tmp16599 = getelementptr inbounds float* %tmp16598, i64 1
+  %tmp16600 = getelementptr inbounds float* %tmp16599, i64 1
+  %tmp16601 = getelementptr inbounds float* %tmp16600, i64 1
+  %tmp16602 = getelementptr inbounds float* %tmp16601, i64 1
+  %tmp16603 = getelementptr inbounds float* %tmp16602, i64 1
+  %tmp16604 = getelementptr inbounds float* %tmp16603, i64 1
+  %tmp16605 = getelementptr inbounds float* %tmp16604, i64 1
+  %tmp16606 = getelementptr inbounds float* %tmp16605, i64 1
+  %tmp16607 = getelementptr inbounds float* %tmp16606, i64 1
+  %tmp16608 = getelementptr inbounds float* %tmp16607, i64 1
+  %tmp16609 = getelementptr inbounds float* %tmp16608, i64 1
+  %tmp16610 = getelementptr inbounds float* %tmp16609, i64 1
+  %tmp16611 = getelementptr inbounds float* %tmp16610, i64 1
+  %tmp16612 = getelementptr inbounds float* %tmp16611, i64 1
+  %tmp16613 = getelementptr inbounds float* %tmp16612, i64 1
+  %tmp16614 = getelementptr inbounds float* %tmp16613, i64 1
+  %tmp16615 = getelementptr inbounds float* %tmp16614, i64 1
+  %tmp16616 = getelementptr inbounds float* %tmp16615, i64 1
+  %tmp16617 = getelementptr inbounds float* %tmp16616, i64 1
+  %tmp16618 = getelementptr inbounds float* %tmp16617, i64 1
+  %tmp16619 = getelementptr inbounds float* %tmp16618, i64 1
+  %tmp16620 = getelementptr inbounds float* %tmp16619, i64 1
+  %tmp16621 = getelementptr inbounds float* %tmp16620, i64 1
+  %tmp16622 = getelementptr inbounds float* %tmp16621, i64 1
+  %tmp16623 = getelementptr inbounds float* %tmp16622, i64 1
+  %tmp16624 = getelementptr inbounds float* %tmp16623, i64 1
+  %tmp16625 = getelementptr inbounds float* %tmp16624, i64 1
+  %tmp16626 = getelementptr inbounds float* %tmp16625, i64 1
+  %tmp16627 = getelementptr inbounds float* %tmp16626, i64 1
+  %tmp16628 = getelementptr inbounds float* %tmp16627, i64 1
+  %tmp16629 = getelementptr inbounds float* %tmp16628, i64 1
+  %tmp16630 = getelementptr inbounds float* %tmp16629, i64 1
+  %tmp16631 = getelementptr inbounds float* %tmp16630, i64 1
+  %tmp16632 = getelementptr inbounds float* %tmp16631, i64 1
+  %tmp16633 = getelementptr inbounds float* %tmp16632, i64 1
+  %tmp16634 = getelementptr inbounds float* %tmp16633, i64 1
+  %tmp16635 = getelementptr inbounds float* %tmp16634, i64 1
+  %tmp16636 = getelementptr inbounds float* %tmp16635, i64 1
+  %tmp16637 = getelementptr inbounds float* %tmp16636, i64 1
+  %tmp16638 = getelementptr inbounds float* %tmp16637, i64 1
+  %tmp16639 = getelementptr inbounds float* %tmp16638, i64 1
+  %tmp16640 = getelementptr inbounds float* %tmp16639, i64 1
+  %tmp16641 = getelementptr inbounds float* %tmp16640, i64 1
+  %tmp16642 = getelementptr inbounds float* %tmp16641, i64 1
+  %tmp16643 = getelementptr inbounds float* %tmp16642, i64 1
+  %tmp16644 = getelementptr inbounds float* %tmp16643, i64 1
+  %tmp16645 = getelementptr inbounds float* %tmp16644, i64 1
+  %tmp16646 = getelementptr inbounds float* %tmp16645, i64 1
+  %tmp16647 = getelementptr inbounds float* %tmp16646, i64 1
+  %tmp16648 = getelementptr inbounds float* %tmp16647, i64 1
+  %tmp16649 = getelementptr inbounds float* %tmp16648, i64 1
+  %tmp16650 = getelementptr inbounds float* %tmp16649, i64 1
+  %tmp16651 = getelementptr inbounds float* %tmp16650, i64 1
+  %tmp16652 = getelementptr inbounds float* %tmp16651, i64 1
+  %tmp16653 = getelementptr inbounds float* %tmp16652, i64 1
+  %tmp16654 = getelementptr inbounds float* %tmp16653, i64 1
+  %tmp16655 = getelementptr inbounds float* %tmp16654, i64 1
+  %tmp16656 = getelementptr inbounds float* %tmp16655, i64 1
+  %tmp16657 = getelementptr inbounds float* %tmp16656, i64 1
+  %tmp16658 = getelementptr inbounds float* %tmp16657, i64 1
+  %tmp16659 = getelementptr inbounds float* %tmp16658, i64 1
+  %tmp16660 = getelementptr inbounds float* %tmp16659, i64 1
+  %tmp16661 = getelementptr inbounds float* %tmp16660, i64 1
+  %tmp16662 = getelementptr inbounds float* %tmp16661, i64 1
+  %tmp16663 = getelementptr inbounds float* %tmp16662, i64 1
+  %tmp16664 = getelementptr inbounds float* %tmp16663, i64 1
+  %tmp16665 = getelementptr inbounds float* %tmp16664, i64 1
+  %tmp16666 = getelementptr inbounds float* %tmp16665, i64 1
+  %tmp16667 = getelementptr inbounds float* %tmp16666, i64 1
+  %tmp16668 = getelementptr inbounds float* %tmp16667, i64 1
+  %tmp16669 = getelementptr inbounds float* %tmp16668, i64 1
+  %tmp16670 = getelementptr inbounds float* %tmp16669, i64 1
+  %tmp16671 = getelementptr inbounds float* %tmp16670, i64 1
+  %tmp16672 = getelementptr inbounds float* %tmp16671, i64 1
+  %tmp16673 = getelementptr inbounds float* %tmp16672, i64 1
+  %tmp16674 = getelementptr inbounds float* %tmp16673, i64 1
+  %tmp16675 = getelementptr inbounds float* %tmp16674, i64 1
+  %tmp16676 = getelementptr inbounds float* %tmp16675, i64 1
+  %tmp16677 = getelementptr inbounds float* %tmp16676, i64 1
+  %tmp16678 = getelementptr inbounds float* %tmp16677, i64 1
+  %tmp16679 = getelementptr inbounds float* %tmp16678, i64 1
+  %tmp16680 = getelementptr inbounds float* %tmp16679, i64 1
+  %tmp16681 = getelementptr inbounds float* %tmp16680, i64 1
+  %tmp16682 = getelementptr inbounds float* %tmp16681, i64 1
+  %tmp16683 = getelementptr inbounds float* %tmp16682, i64 1
+  %tmp16684 = getelementptr inbounds float* %tmp16683, i64 1
+  %tmp16685 = getelementptr inbounds float* %tmp16684, i64 1
+  %tmp16686 = getelementptr inbounds float* %tmp16685, i64 1
+  %tmp16687 = getelementptr inbounds float* %tmp16686, i64 1
+  %tmp16688 = getelementptr inbounds float* %tmp16687, i64 1
+  %tmp16689 = getelementptr inbounds float* %tmp16688, i64 1
+  %tmp16690 = getelementptr inbounds float* %tmp16689, i64 1
+  %tmp16691 = getelementptr inbounds float* %tmp16690, i64 1
+  %tmp16692 = getelementptr inbounds float* %tmp16691, i64 1
+  %tmp16693 = getelementptr inbounds float* %tmp16692, i64 1
+  %tmp16694 = getelementptr inbounds float* %tmp16693, i64 1
+  %tmp16695 = getelementptr inbounds float* %tmp16694, i64 1
+  %tmp16696 = getelementptr inbounds float* %tmp16695, i64 1
+  %tmp16697 = getelementptr inbounds float* %tmp16696, i64 1
+  %tmp16698 = getelementptr inbounds float* %tmp16697, i64 1
+  %tmp16699 = getelementptr inbounds float* %tmp16698, i64 1
+  %tmp16700 = getelementptr inbounds float* %tmp16699, i64 1
+  %tmp16701 = getelementptr inbounds float* %tmp16700, i64 1
+  %tmp16702 = getelementptr inbounds float* %tmp16701, i64 1
+  %tmp16703 = getelementptr inbounds float* %tmp16702, i64 1
+  %tmp16704 = getelementptr inbounds float* %tmp16703, i64 1
+  %tmp16705 = getelementptr inbounds float* %tmp16704, i64 1
+  %tmp16706 = getelementptr inbounds float* %tmp16705, i64 1
+  %tmp16707 = getelementptr inbounds float* %tmp16706, i64 1
+  %tmp16708 = getelementptr inbounds float* %tmp16707, i64 1
+  %tmp16709 = getelementptr inbounds float* %tmp16708, i64 1
+  %tmp16710 = getelementptr inbounds float* %tmp16709, i64 1
+  %tmp16711 = getelementptr inbounds float* %tmp16710, i64 1
+  %tmp16712 = getelementptr inbounds float* %tmp16711, i64 1
+  %tmp16713 = getelementptr inbounds float* %tmp16712, i64 1
+  %tmp16714 = getelementptr inbounds float* %tmp16713, i64 1
+  %tmp16715 = getelementptr inbounds float* %tmp16714, i64 1
+  %tmp16716 = getelementptr inbounds float* %tmp16715, i64 1
+  %tmp16717 = getelementptr inbounds float* %tmp16716, i64 1
+  %tmp16718 = getelementptr inbounds float* %tmp16717, i64 1
+  %tmp16719 = getelementptr inbounds float* %tmp16718, i64 1
+  %tmp16720 = getelementptr inbounds float* %tmp16719, i64 1
+  %tmp16721 = getelementptr inbounds float* %tmp16720, i64 1
+  %tmp16722 = getelementptr inbounds float* %tmp16721, i64 1
+  %tmp16723 = getelementptr inbounds float* %tmp16722, i64 1
+  %tmp16724 = getelementptr inbounds float* %tmp16723, i64 1
+  %tmp16725 = getelementptr inbounds float* %tmp16724, i64 1
+  %tmp16726 = getelementptr inbounds float* %tmp16725, i64 1
+  %tmp16727 = getelementptr inbounds float* %tmp16726, i64 1
+  %tmp16728 = getelementptr inbounds float* %tmp16727, i64 1
+  %tmp16729 = getelementptr inbounds float* %tmp16728, i64 1
+  %tmp16730 = getelementptr inbounds float* %tmp16729, i64 1
+  %tmp16731 = getelementptr inbounds float* %tmp16730, i64 1
+  %tmp16732 = getelementptr inbounds float* %tmp16731, i64 1
+  %tmp16733 = getelementptr inbounds float* %tmp16732, i64 1
+  %tmp16734 = getelementptr inbounds float* %tmp16733, i64 1
+  %tmp16735 = getelementptr inbounds float* %tmp16734, i64 1
+  %tmp16736 = getelementptr inbounds float* %tmp16735, i64 1
+  %tmp16737 = getelementptr inbounds float* %tmp16736, i64 1
+  %tmp16738 = getelementptr inbounds float* %tmp16737, i64 1
+  %tmp16739 = getelementptr inbounds float* %tmp16738, i64 1
+  %tmp16740 = getelementptr inbounds float* %tmp16739, i64 1
+  %tmp16741 = getelementptr inbounds float* %tmp16740, i64 1
+  %tmp16742 = getelementptr inbounds float* %tmp16741, i64 1
+  %tmp16743 = getelementptr inbounds float* %tmp16742, i64 1
+  %tmp16744 = getelementptr inbounds float* %tmp16743, i64 1
+  %tmp16745 = getelementptr inbounds float* %tmp16744, i64 1
+  %tmp16746 = getelementptr inbounds float* %tmp16745, i64 1
+  %tmp16747 = getelementptr inbounds float* %tmp16746, i64 1
+  %tmp16748 = getelementptr inbounds float* %tmp16747, i64 1
+  %tmp16749 = getelementptr inbounds float* %tmp16748, i64 1
+  %tmp16750 = getelementptr inbounds float* %tmp16749, i64 1
+  %tmp16751 = getelementptr inbounds float* %tmp16750, i64 1
+  %tmp16752 = getelementptr inbounds float* %tmp16751, i64 1
+  %tmp16753 = getelementptr inbounds float* %tmp16752, i64 1
+  %tmp16754 = getelementptr inbounds float* %tmp16753, i64 1
+  %tmp16755 = getelementptr inbounds float* %tmp16754, i64 1
+  %tmp16756 = getelementptr inbounds float* %tmp16755, i64 1
+  %tmp16757 = getelementptr inbounds float* %tmp16756, i64 1
+  %tmp16758 = getelementptr inbounds float* %tmp16757, i64 1
+  %tmp16759 = getelementptr inbounds float* %tmp16758, i64 1
+  %tmp16760 = getelementptr inbounds float* %tmp16759, i64 1
+  %tmp16761 = getelementptr inbounds float* %tmp16760, i64 1
+  %tmp16762 = getelementptr inbounds float* %tmp16761, i64 1
+  %tmp16763 = getelementptr inbounds float* %tmp16762, i64 1
+  %tmp16764 = getelementptr inbounds float* %tmp16763, i64 1
+  %tmp16765 = getelementptr inbounds float* %tmp16764, i64 1
+  %tmp16766 = getelementptr inbounds float* %tmp16765, i64 1
+  %tmp16767 = getelementptr inbounds float* %tmp16766, i64 1
+  %tmp16768 = getelementptr inbounds float* %tmp16767, i64 1
+  %tmp16769 = getelementptr inbounds float* %tmp16768, i64 1
+  %tmp16770 = getelementptr inbounds float* %tmp16769, i64 1
+  %tmp16771 = getelementptr inbounds float* %tmp16770, i64 1
+  %tmp16772 = getelementptr inbounds float* %tmp16771, i64 1
+  %tmp16773 = getelementptr inbounds float* %tmp16772, i64 1
+  %tmp16774 = getelementptr inbounds float* %tmp16773, i64 1
+  %tmp16775 = getelementptr inbounds float* %tmp16774, i64 1
+  %tmp16776 = getelementptr inbounds float* %tmp16775, i64 1
+  %tmp16777 = getelementptr inbounds float* %tmp16776, i64 1
+  %tmp16778 = getelementptr inbounds float* %tmp16777, i64 1
+  %tmp16779 = getelementptr inbounds float* %tmp16778, i64 1
+  %tmp16780 = getelementptr inbounds float* %tmp16779, i64 1
+  %tmp16781 = getelementptr inbounds float* %tmp16780, i64 1
+  %tmp16782 = getelementptr inbounds float* %tmp16781, i64 1
+  %tmp16783 = getelementptr inbounds float* %tmp16782, i64 1
+  %tmp16784 = getelementptr inbounds float* %tmp16783, i64 1
+  %tmp16785 = getelementptr inbounds float* %tmp16784, i64 1
+  %tmp16786 = getelementptr inbounds float* %tmp16785, i64 1
+  %tmp16787 = getelementptr inbounds float* %tmp16786, i64 1
+  %tmp16788 = getelementptr inbounds float* %tmp16787, i64 1
+  %tmp16789 = getelementptr inbounds float* %tmp16788, i64 1
+  %tmp16790 = getelementptr inbounds float* %tmp16789, i64 1
+  %tmp16791 = getelementptr inbounds float* %tmp16790, i64 1
+  %tmp16792 = getelementptr inbounds float* %tmp16791, i64 1
+  %tmp16793 = getelementptr inbounds float* %tmp16792, i64 1
+  %tmp16794 = getelementptr inbounds float* %tmp16793, i64 1
+  %tmp16795 = getelementptr inbounds float* %tmp16794, i64 1
+  %tmp16796 = getelementptr inbounds float* %tmp16795, i64 1
+  %tmp16797 = getelementptr inbounds float* %tmp16796, i64 1
+  %tmp16798 = getelementptr inbounds float* %tmp16797, i64 1
+  %tmp16799 = getelementptr inbounds float* %tmp16798, i64 1
+  %tmp16800 = getelementptr inbounds float* %tmp16799, i64 1
+  %tmp16801 = getelementptr inbounds float* %tmp16800, i64 1
+  %tmp16802 = getelementptr inbounds float* %tmp16801, i64 1
+  %tmp16803 = getelementptr inbounds float* %tmp16802, i64 1
+  %tmp16804 = getelementptr inbounds float* %tmp16803, i64 1
+  %tmp16805 = getelementptr inbounds float* %tmp16804, i64 1
+  %tmp16806 = getelementptr inbounds float* %tmp16805, i64 1
+  %tmp16807 = getelementptr inbounds float* %tmp16806, i64 1
+  %tmp16808 = getelementptr inbounds float* %tmp16807, i64 1
+  %tmp16809 = getelementptr inbounds float* %tmp16808, i64 1
+  %tmp16810 = getelementptr inbounds float* %tmp16809, i64 1
+  %tmp16811 = getelementptr inbounds float* %tmp16810, i64 1
+  %tmp16812 = getelementptr inbounds float* %tmp16811, i64 1
+  %tmp16813 = getelementptr inbounds float* %tmp16812, i64 1
+  %tmp16814 = getelementptr inbounds float* %tmp16813, i64 1
+  %tmp16815 = getelementptr inbounds float* %tmp16814, i64 1
+  %tmp16816 = getelementptr inbounds float* %tmp16815, i64 1
+  %tmp16817 = getelementptr inbounds float* %tmp16816, i64 1
+  %tmp16818 = getelementptr inbounds float* %tmp16817, i64 1
+  %tmp16819 = getelementptr inbounds float* %tmp16818, i64 1
+  %tmp16820 = getelementptr inbounds float* %tmp16819, i64 1
+  %tmp16821 = getelementptr inbounds float* %tmp16820, i64 1
+  %tmp16822 = getelementptr inbounds float* %tmp16821, i64 1
+  %tmp16823 = getelementptr inbounds float* %tmp16822, i64 1
+  %tmp16824 = getelementptr inbounds float* %tmp16823, i64 1
+  %tmp16825 = getelementptr inbounds float* %tmp16824, i64 1
+  %tmp16826 = getelementptr inbounds float* %tmp16825, i64 1
+  %tmp16827 = getelementptr inbounds float* %tmp16826, i64 1
+  %tmp16828 = getelementptr inbounds float* %tmp16827, i64 1
+  %tmp16829 = getelementptr inbounds float* %tmp16828, i64 1
+  %tmp16830 = getelementptr inbounds float* %tmp16829, i64 1
+  %tmp16831 = getelementptr inbounds float* %tmp16830, i64 1
+  %tmp16832 = getelementptr inbounds float* %tmp16831, i64 1
+  %tmp16833 = getelementptr inbounds float* %tmp16832, i64 1
+  %tmp16834 = getelementptr inbounds float* %tmp16833, i64 1
+  %tmp16835 = getelementptr inbounds float* %tmp16834, i64 1
+  %tmp16836 = getelementptr inbounds float* %tmp16835, i64 1
+  %tmp16837 = getelementptr inbounds float* %tmp16836, i64 1
+  %tmp16838 = getelementptr inbounds float* %tmp16837, i64 1
+  %tmp16839 = getelementptr inbounds float* %tmp16838, i64 1
+  %tmp16840 = getelementptr inbounds float* %tmp16839, i64 1
+  %tmp16841 = getelementptr inbounds float* %tmp16840, i64 1
+  %tmp16842 = getelementptr inbounds float* %tmp16841, i64 1
+  %tmp16843 = getelementptr inbounds float* %tmp16842, i64 1
+  %tmp16844 = getelementptr inbounds float* %tmp16843, i64 1
+  %tmp16845 = getelementptr inbounds float* %tmp16844, i64 1
+  %tmp16846 = getelementptr inbounds float* %tmp16845, i64 1
+  %tmp16847 = getelementptr inbounds float* %tmp16846, i64 1
+  %tmp16848 = getelementptr inbounds float* %tmp16847, i64 1
+  %tmp16849 = getelementptr inbounds float* %tmp16848, i64 1
+  %tmp16850 = getelementptr inbounds float* %tmp16849, i64 1
+  %tmp16851 = getelementptr inbounds float* %tmp16850, i64 1
+  %tmp16852 = getelementptr inbounds float* %tmp16851, i64 1
+  %tmp16853 = getelementptr inbounds float* %tmp16852, i64 1
+  %tmp16854 = getelementptr inbounds float* %tmp16853, i64 1
+  %tmp16855 = getelementptr inbounds float* %tmp16854, i64 1
+  %tmp16856 = getelementptr inbounds float* %tmp16855, i64 1
+  %tmp16857 = getelementptr inbounds float* %tmp16856, i64 1
+  %tmp16858 = getelementptr inbounds float* %tmp16857, i64 1
+  %tmp16859 = getelementptr inbounds float* %tmp16858, i64 1
+  %tmp16860 = getelementptr inbounds float* %tmp16859, i64 1
+  %tmp16861 = getelementptr inbounds float* %tmp16860, i64 1
+  %tmp16862 = getelementptr inbounds float* %tmp16861, i64 1
+  %tmp16863 = getelementptr inbounds float* %tmp16862, i64 1
+  %tmp16864 = getelementptr inbounds float* %tmp16863, i64 1
+  %tmp16865 = getelementptr inbounds float* %tmp16864, i64 1
+  %tmp16866 = getelementptr inbounds float* %tmp16865, i64 1
+  %tmp16867 = getelementptr inbounds float* %tmp16866, i64 1
+  %tmp16868 = getelementptr inbounds float* %tmp16867, i64 1
+  %tmp16869 = getelementptr inbounds float* %tmp16868, i64 1
+  %tmp16870 = getelementptr inbounds float* %tmp16869, i64 1
+  %tmp16871 = getelementptr inbounds float* %tmp16870, i64 1
+  %tmp16872 = getelementptr inbounds float* %tmp16871, i64 1
+  %tmp16873 = getelementptr inbounds float* %tmp16872, i64 1
+  %tmp16874 = getelementptr inbounds float* %tmp16873, i64 1
+  %tmp16875 = getelementptr inbounds float* %tmp16874, i64 1
+  %tmp16876 = getelementptr inbounds float* %tmp16875, i64 1
+  %tmp16877 = getelementptr inbounds float* %tmp16876, i64 1
+  %tmp16878 = getelementptr inbounds float* %tmp16877, i64 1
+  %tmp16879 = getelementptr inbounds float* %tmp16878, i64 1
+  %tmp16880 = getelementptr inbounds float* %tmp16879, i64 1
+  %tmp16881 = getelementptr inbounds float* %tmp16880, i64 1
+  %tmp16882 = getelementptr inbounds float* %tmp16881, i64 1
+  %tmp16883 = getelementptr inbounds float* %tmp16882, i64 1
+  %tmp16884 = getelementptr inbounds float* %tmp16883, i64 1
+  %tmp16885 = getelementptr inbounds float* %tmp16884, i64 1
+  %tmp16886 = getelementptr inbounds float* %tmp16885, i64 1
+  %tmp16887 = getelementptr inbounds float* %tmp16886, i64 1
+  %tmp16888 = getelementptr inbounds float* %tmp16887, i64 1
+  %tmp16889 = getelementptr inbounds float* %tmp16888, i64 1
+  %tmp16890 = getelementptr inbounds float* %tmp16889, i64 1
+  %tmp16891 = getelementptr inbounds float* %tmp16890, i64 1
+  %tmp16892 = getelementptr inbounds float* %tmp16891, i64 1
+  %tmp16893 = getelementptr inbounds float* %tmp16892, i64 1
+  %tmp16894 = getelementptr inbounds float* %tmp16893, i64 1
+  %tmp16895 = getelementptr inbounds float* %tmp16894, i64 1
+  %tmp16896 = getelementptr inbounds float* %tmp16895, i64 1
+  %tmp16897 = getelementptr inbounds float* %tmp16896, i64 1
+  %tmp16898 = getelementptr inbounds float* %tmp16897, i64 1
+  %tmp16899 = getelementptr inbounds float* %tmp16898, i64 1
+  %tmp16900 = getelementptr inbounds float* %tmp16899, i64 1
+  %tmp16901 = getelementptr inbounds float* %tmp16900, i64 1
+  %tmp16902 = getelementptr inbounds float* %tmp16901, i64 1
+  %tmp16903 = getelementptr inbounds float* %tmp16902, i64 1
+  %tmp16904 = getelementptr inbounds float* %tmp16903, i64 1
+  %tmp16905 = getelementptr inbounds float* %tmp16904, i64 1
+  %tmp16906 = getelementptr inbounds float* %tmp16905, i64 1
+  %tmp16907 = getelementptr inbounds float* %tmp16906, i64 1
+  %tmp16908 = getelementptr inbounds float* %tmp16907, i64 1
+  %tmp16909 = getelementptr inbounds float* %tmp16908, i64 1
+  %tmp16910 = getelementptr inbounds float* %tmp16909, i64 1
+  %tmp16911 = getelementptr inbounds float* %tmp16910, i64 1
+  %tmp16912 = getelementptr inbounds float* %tmp16911, i64 1
+  %tmp16913 = getelementptr inbounds float* %tmp16912, i64 1
+  %tmp16914 = getelementptr inbounds float* %tmp16913, i64 1
+  %tmp16915 = getelementptr inbounds float* %tmp16914, i64 1
+  %tmp16916 = getelementptr inbounds float* %tmp16915, i64 1
+  %tmp16917 = getelementptr inbounds float* %tmp16916, i64 1
+  %tmp16918 = getelementptr inbounds float* %tmp16917, i64 1
+  %tmp16919 = getelementptr inbounds float* %tmp16918, i64 1
+  %tmp16920 = getelementptr inbounds float* %tmp16919, i64 1
+  %tmp16921 = getelementptr inbounds float* %tmp16920, i64 1
+  %tmp16922 = getelementptr inbounds float* %tmp16921, i64 1
+  %tmp16923 = getelementptr inbounds float* %tmp16922, i64 1
+  %tmp16924 = getelementptr inbounds float* %tmp16923, i64 1
+  %tmp16925 = getelementptr inbounds float* %tmp16924, i64 1
+  %tmp16926 = getelementptr inbounds float* %tmp16925, i64 1
+  %tmp16927 = getelementptr inbounds float* %tmp16926, i64 1
+  %tmp16928 = getelementptr inbounds float* %tmp16927, i64 1
+  %tmp16929 = getelementptr inbounds float* %tmp16928, i64 1
+  %tmp16930 = getelementptr inbounds float* %tmp16929, i64 1
+  %tmp16931 = getelementptr inbounds float* %tmp16930, i64 1
+  %tmp16932 = getelementptr inbounds float* %tmp16931, i64 1
+  %tmp16933 = getelementptr inbounds float* %tmp16932, i64 1
+  %tmp16934 = getelementptr inbounds float* %tmp16933, i64 1
+  %tmp16935 = getelementptr inbounds float* %tmp16934, i64 1
+  %tmp16936 = getelementptr inbounds float* %tmp16935, i64 1
+  %tmp16937 = getelementptr inbounds float* %tmp16936, i64 1
+  %tmp16938 = getelementptr inbounds float* %tmp16937, i64 1
+  %tmp16939 = getelementptr inbounds float* %tmp16938, i64 1
+  %tmp16940 = getelementptr inbounds float* %tmp16939, i64 1
+  %tmp16941 = getelementptr inbounds float* %tmp16940, i64 1
+  %tmp16942 = getelementptr inbounds float* %tmp16941, i64 1
+  %tmp16943 = getelementptr inbounds float* %tmp16942, i64 1
+  %tmp16944 = getelementptr inbounds float* %tmp16943, i64 1
+  %tmp16945 = getelementptr inbounds float* %tmp16944, i64 1
+  %tmp16946 = getelementptr inbounds float* %tmp16945, i64 1
+  %tmp16947 = getelementptr inbounds float* %tmp16946, i64 1
+  %tmp16948 = getelementptr inbounds float* %tmp16947, i64 1
+  %tmp16949 = getelementptr inbounds float* %tmp16948, i64 1
+  %tmp16950 = getelementptr inbounds float* %tmp16949, i64 1
+  %tmp16951 = getelementptr inbounds float* %tmp16950, i64 1
+  %tmp16952 = getelementptr inbounds float* %tmp16951, i64 1
+  %tmp16953 = getelementptr inbounds float* %tmp16952, i64 1
+  %tmp16954 = getelementptr inbounds float* %tmp16953, i64 1
+  %tmp16955 = getelementptr inbounds float* %tmp16954, i64 1
+  %tmp16956 = getelementptr inbounds float* %tmp16955, i64 1
+  %tmp16957 = getelementptr inbounds float* %tmp16956, i64 1
+  %tmp16958 = getelementptr inbounds float* %tmp16957, i64 1
+  %tmp16959 = getelementptr inbounds float* %tmp16958, i64 1
+  %tmp16960 = getelementptr inbounds float* %tmp16959, i64 1
+  %tmp16961 = getelementptr inbounds float* %tmp16960, i64 1
+  %tmp16962 = getelementptr inbounds float* %tmp16961, i64 1
+  %tmp16963 = getelementptr inbounds float* %tmp16962, i64 1
+  %tmp16964 = getelementptr inbounds float* %tmp16963, i64 1
+  %tmp16965 = getelementptr inbounds float* %tmp16964, i64 1
+  %tmp16966 = getelementptr inbounds float* %tmp16965, i64 1
+  %tmp16967 = getelementptr inbounds float* %tmp16966, i64 1
+  %tmp16968 = getelementptr inbounds float* %tmp16967, i64 1
+  %tmp16969 = getelementptr inbounds float* %tmp16968, i64 1
+  %tmp16970 = getelementptr inbounds float* %tmp16969, i64 1
+  %tmp16971 = getelementptr inbounds float* %tmp16970, i64 1
+  %tmp16972 = getelementptr inbounds float* %tmp16971, i64 1
+  %tmp16973 = getelementptr inbounds float* %tmp16972, i64 1
+  %tmp16974 = getelementptr inbounds float* %tmp16973, i64 1
+  %tmp16975 = getelementptr inbounds float* %tmp16974, i64 1
+  %tmp16976 = getelementptr inbounds float* %tmp16975, i64 1
+  %tmp16977 = getelementptr inbounds float* %tmp16976, i64 1
+  %tmp16978 = getelementptr inbounds float* %tmp16977, i64 1
+  %tmp16979 = getelementptr inbounds float* %tmp16978, i64 1
+  %tmp16980 = getelementptr inbounds float* %tmp16979, i64 1
+  %tmp16981 = getelementptr inbounds float* %tmp16980, i64 1
+  %tmp16982 = getelementptr inbounds float* %tmp16981, i64 1
+  %tmp16983 = getelementptr inbounds float* %tmp16982, i64 1
+  %tmp16984 = getelementptr inbounds float* %tmp16983, i64 1
+  %tmp16985 = getelementptr inbounds float* %tmp16984, i64 1
+  %tmp16986 = getelementptr inbounds float* %tmp16985, i64 1
+  %tmp16987 = getelementptr inbounds float* %tmp16986, i64 1
+  %tmp16988 = getelementptr inbounds float* %tmp16987, i64 1
+  %tmp16989 = getelementptr inbounds float* %tmp16988, i64 1
+  %tmp16990 = getelementptr inbounds float* %tmp16989, i64 1
+  %tmp16991 = getelementptr inbounds float* %tmp16990, i64 1
+  %tmp16992 = getelementptr inbounds float* %tmp16991, i64 1
+  %tmp16993 = getelementptr inbounds float* %tmp16992, i64 1
+  %tmp16994 = getelementptr inbounds float* %tmp16993, i64 1
+  %tmp16995 = getelementptr inbounds float* %tmp16994, i64 1
+  %tmp16996 = getelementptr inbounds float* %tmp16995, i64 1
+  %tmp16997 = getelementptr inbounds float* %tmp16996, i64 1
+  %tmp16998 = getelementptr inbounds float* %tmp16997, i64 1
+  %tmp16999 = getelementptr inbounds float* %tmp16998, i64 1
+  %tmp17000 = getelementptr inbounds float* %tmp16999, i64 1
+  %tmp17001 = getelementptr inbounds float* %tmp17000, i64 1
+  %tmp17002 = getelementptr inbounds float* %tmp17001, i64 1
+  %tmp17003 = getelementptr inbounds float* %tmp17002, i64 1
+  %tmp17004 = getelementptr inbounds float* %tmp17003, i64 1
+  %tmp17005 = getelementptr inbounds float* %tmp17004, i64 1
+  %tmp17006 = getelementptr inbounds float* %tmp17005, i64 1
+  %tmp17007 = getelementptr inbounds float* %tmp17006, i64 1
+  %tmp17008 = getelementptr inbounds float* %tmp17007, i64 1
+  %tmp17009 = getelementptr inbounds float* %tmp17008, i64 1
+  %tmp17010 = getelementptr inbounds float* %tmp17009, i64 1
+  %tmp17011 = getelementptr inbounds float* %tmp17010, i64 1
+  %tmp17012 = getelementptr inbounds float* %tmp17011, i64 1
+  %tmp17013 = getelementptr inbounds float* %tmp17012, i64 1
+  %tmp17014 = getelementptr inbounds float* %tmp17013, i64 1
+  %tmp17015 = getelementptr inbounds float* %tmp17014, i64 1
+  %tmp17016 = getelementptr inbounds float* %tmp17015, i64 1
+  %tmp17017 = getelementptr inbounds float* %tmp17016, i64 1
+  %tmp17018 = getelementptr inbounds float* %tmp17017, i64 1
+  %tmp17019 = getelementptr inbounds float* %tmp17018, i64 1
+  %tmp17020 = getelementptr inbounds float* %tmp17019, i64 1
+  %tmp17021 = getelementptr inbounds float* %tmp17020, i64 1
+  %tmp17022 = getelementptr inbounds float* %tmp17021, i64 1
+  %tmp17023 = getelementptr inbounds float* %tmp17022, i64 1
+  %tmp17024 = getelementptr inbounds float* %tmp17023, i64 1
+  %tmp17025 = getelementptr inbounds float* %tmp17024, i64 1
+  %tmp17026 = getelementptr inbounds float* %tmp17025, i64 1
+  %tmp17027 = getelementptr inbounds float* %tmp17026, i64 1
+  %tmp17028 = getelementptr inbounds float* %tmp17027, i64 1
+  %tmp17029 = getelementptr inbounds float* %tmp17028, i64 1
+  %tmp17030 = getelementptr inbounds float* %tmp17029, i64 1
+  %tmp17031 = getelementptr inbounds float* %tmp17030, i64 1
+  %tmp17032 = getelementptr inbounds float* %tmp17031, i64 1
+  %tmp17033 = getelementptr inbounds float* %tmp17032, i64 1
+  %tmp17034 = getelementptr inbounds float* %tmp17033, i64 1
+  %tmp17035 = getelementptr inbounds float* %tmp17034, i64 1
+  %tmp17036 = getelementptr inbounds float* %tmp17035, i64 1
+  %tmp17037 = getelementptr inbounds float* %tmp17036, i64 1
+  %tmp17038 = getelementptr inbounds float* %tmp17037, i64 1
+  %tmp17039 = getelementptr inbounds float* %tmp17038, i64 1
+  %tmp17040 = getelementptr inbounds float* %tmp17039, i64 1
+  %tmp17041 = getelementptr inbounds float* %tmp17040, i64 1
+  %tmp17042 = getelementptr inbounds float* %tmp17041, i64 1
+  %tmp17043 = getelementptr inbounds float* %tmp17042, i64 1
+  %tmp17044 = getelementptr inbounds float* %tmp17043, i64 1
+  %tmp17045 = getelementptr inbounds float* %tmp17044, i64 1
+  %tmp17046 = getelementptr inbounds float* %tmp17045, i64 1
+  %tmp17047 = getelementptr inbounds float* %tmp17046, i64 1
+  %tmp17048 = getelementptr inbounds float* %tmp17047, i64 1
+  %tmp17049 = getelementptr inbounds float* %tmp17048, i64 1
+  %tmp17050 = getelementptr inbounds float* %tmp17049, i64 1
+  %tmp17051 = getelementptr inbounds float* %tmp17050, i64 1
+  %tmp17052 = getelementptr inbounds float* %tmp17051, i64 1
+  %tmp17053 = getelementptr inbounds float* %tmp17052, i64 1
+  %tmp17054 = getelementptr inbounds float* %tmp17053, i64 1
+  %tmp17055 = getelementptr inbounds float* %tmp17054, i64 1
+  %tmp17056 = getelementptr inbounds float* %tmp17055, i64 1
+  %tmp17057 = getelementptr inbounds float* %tmp17056, i64 1
+  %tmp17058 = getelementptr inbounds float* %tmp17057, i64 1
+  %tmp17059 = getelementptr inbounds float* %tmp17058, i64 1
+  %tmp17060 = getelementptr inbounds float* %tmp17059, i64 1
+  %tmp17061 = getelementptr inbounds float* %tmp17060, i64 1
+  %tmp17062 = getelementptr inbounds float* %tmp17061, i64 1
+  %tmp17063 = getelementptr inbounds float* %tmp17062, i64 1
+  %tmp17064 = getelementptr inbounds float* %tmp17063, i64 1
+  %tmp17065 = getelementptr inbounds float* %tmp17064, i64 1
+  %tmp17066 = getelementptr inbounds float* %tmp17065, i64 1
+  %tmp17067 = getelementptr inbounds float* %tmp17066, i64 1
+  %tmp17068 = getelementptr inbounds float* %tmp17067, i64 1
+  %tmp17069 = getelementptr inbounds float* %tmp17068, i64 1
+  %tmp17070 = getelementptr inbounds float* %tmp17069, i64 1
+  %tmp17071 = getelementptr inbounds float* %tmp17070, i64 1
+  %tmp17072 = getelementptr inbounds float* %tmp17071, i64 1
+  %tmp17073 = getelementptr inbounds float* %tmp17072, i64 1
+  %tmp17074 = getelementptr inbounds float* %tmp17073, i64 1
+  %tmp17075 = getelementptr inbounds float* %tmp17074, i64 1
+  %tmp17076 = getelementptr inbounds float* %tmp17075, i64 1
+  %tmp17077 = getelementptr inbounds float* %tmp17076, i64 1
+  %tmp17078 = getelementptr inbounds float* %tmp17077, i64 1
+  %tmp17079 = getelementptr inbounds float* %tmp17078, i64 1
+  %tmp17080 = getelementptr inbounds float* %tmp17079, i64 1
+  %tmp17081 = getelementptr inbounds float* %tmp17080, i64 1
+  %tmp17082 = getelementptr inbounds float* %tmp17081, i64 1
+  %tmp17083 = getelementptr inbounds float* %tmp17082, i64 1
+  %tmp17084 = getelementptr inbounds float* %tmp17083, i64 1
+  %tmp17085 = getelementptr inbounds float* %tmp17084, i64 1
+  %tmp17086 = getelementptr inbounds float* %tmp17085, i64 1
+  %tmp17087 = getelementptr inbounds float* %tmp17086, i64 1
+  %tmp17088 = getelementptr inbounds float* %tmp17087, i64 1
+  %tmp17089 = getelementptr inbounds float* %tmp17088, i64 1
+  %tmp17090 = getelementptr inbounds float* %tmp17089, i64 1
+  %tmp17091 = getelementptr inbounds float* %tmp17090, i64 1
+  %tmp17092 = getelementptr inbounds float* %tmp17091, i64 1
+  %tmp17093 = getelementptr inbounds float* %tmp17092, i64 1
+  %tmp17094 = getelementptr inbounds float* %tmp17093, i64 1
+  %tmp17095 = getelementptr inbounds float* %tmp17094, i64 1
+  %tmp17096 = getelementptr inbounds float* %tmp17095, i64 1
+  %tmp17097 = getelementptr inbounds float* %tmp17096, i64 1
+  %tmp17098 = getelementptr inbounds float* %tmp17097, i64 1
+  %tmp17099 = getelementptr inbounds float* %tmp17098, i64 1
+  %tmp17100 = getelementptr inbounds float* %tmp17099, i64 1
+  %tmp17101 = getelementptr inbounds float* %tmp17100, i64 1
+  %tmp17102 = getelementptr inbounds float* %tmp17101, i64 1
+  %tmp17103 = getelementptr inbounds float* %tmp17102, i64 1
+  %tmp17104 = getelementptr inbounds float* %tmp17103, i64 1
+  %tmp17105 = getelementptr inbounds float* %tmp17104, i64 1
+  %tmp17106 = getelementptr inbounds float* %tmp17105, i64 1
+  %tmp17107 = getelementptr inbounds float* %tmp17106, i64 1
+  %tmp17108 = getelementptr inbounds float* %tmp17107, i64 1
+  %tmp17109 = getelementptr inbounds float* %tmp17108, i64 1
+  %tmp17110 = getelementptr inbounds float* %tmp17109, i64 1
+  %tmp17111 = getelementptr inbounds float* %tmp17110, i64 1
+  %tmp17112 = getelementptr inbounds float* %tmp17111, i64 1
+  %tmp17113 = getelementptr inbounds float* %tmp17112, i64 1
+  %tmp17114 = getelementptr inbounds float* %tmp17113, i64 1
+  %tmp17115 = getelementptr inbounds float* %tmp17114, i64 1
+  %tmp17116 = getelementptr inbounds float* %tmp17115, i64 1
+  %tmp17117 = getelementptr inbounds float* %tmp17116, i64 1
+  %tmp17118 = getelementptr inbounds float* %tmp17117, i64 1
+  %tmp17119 = getelementptr inbounds float* %tmp17118, i64 1
+  %tmp17120 = getelementptr inbounds float* %tmp17119, i64 1
+  %tmp17121 = getelementptr inbounds float* %tmp17120, i64 1
+  %tmp17122 = getelementptr inbounds float* %tmp17121, i64 1
+  %tmp17123 = getelementptr inbounds float* %tmp17122, i64 1
+  %tmp17124 = getelementptr inbounds float* %tmp17123, i64 1
+  %tmp17125 = getelementptr inbounds float* %tmp17124, i64 1
+  %tmp17126 = getelementptr inbounds float* %tmp17125, i64 1
+  %tmp17127 = getelementptr inbounds float* %tmp17126, i64 1
+  %tmp17128 = getelementptr inbounds float* %tmp17127, i64 1
+  %tmp17129 = getelementptr inbounds float* %tmp17128, i64 1
+  %tmp17130 = getelementptr inbounds float* %tmp17129, i64 1
+  %tmp17131 = getelementptr inbounds float* %tmp17130, i64 1
+  %tmp17132 = getelementptr inbounds float* %tmp17131, i64 1
+  %tmp17133 = getelementptr inbounds float* %tmp17132, i64 1
+  %tmp17134 = getelementptr inbounds float* %tmp17133, i64 1
+  %tmp17135 = getelementptr inbounds float* %tmp17134, i64 1
+  %tmp17136 = getelementptr inbounds float* %tmp17135, i64 1
+  %tmp17137 = getelementptr inbounds float* %tmp17136, i64 1
+  %tmp17138 = getelementptr inbounds float* %tmp17137, i64 1
+  %tmp17139 = getelementptr inbounds float* %tmp17138, i64 1
+  %tmp17140 = getelementptr inbounds float* %tmp17139, i64 1
+  %tmp17141 = getelementptr inbounds float* %tmp17140, i64 1
+  %tmp17142 = getelementptr inbounds float* %tmp17141, i64 1
+  %tmp17143 = getelementptr inbounds float* %tmp17142, i64 1
+  %tmp17144 = getelementptr inbounds float* %tmp17143, i64 1
+  %tmp17145 = getelementptr inbounds float* %tmp17144, i64 1
+  %tmp17146 = getelementptr inbounds float* %tmp17145, i64 1
+  %tmp17147 = getelementptr inbounds float* %tmp17146, i64 1
+  %tmp17148 = getelementptr inbounds float* %tmp17147, i64 1
+  %tmp17149 = getelementptr inbounds float* %tmp17148, i64 1
+  %tmp17150 = getelementptr inbounds float* %tmp17149, i64 1
+  %tmp17151 = getelementptr inbounds float* %tmp17150, i64 1
+  %tmp17152 = getelementptr inbounds float* %tmp17151, i64 1
+  %tmp17153 = getelementptr inbounds float* %tmp17152, i64 1
+  %tmp17154 = getelementptr inbounds float* %tmp17153, i64 1
+  %tmp17155 = getelementptr inbounds float* %tmp17154, i64 1
+  %tmp17156 = getelementptr inbounds float* %tmp17155, i64 1
+  %tmp17157 = getelementptr inbounds float* %tmp17156, i64 1
+  %tmp17158 = getelementptr inbounds float* %tmp17157, i64 1
+  %tmp17159 = getelementptr inbounds float* %tmp17158, i64 1
+  %tmp17160 = getelementptr inbounds float* %tmp17159, i64 1
+  %tmp17161 = getelementptr inbounds float* %tmp17160, i64 1
+  %tmp17162 = getelementptr inbounds float* %tmp17161, i64 1
+  %tmp17163 = getelementptr inbounds float* %tmp17162, i64 1
+  %tmp17164 = getelementptr inbounds float* %tmp17163, i64 1
+  %tmp17165 = getelementptr inbounds float* %tmp17164, i64 1
+  %tmp17166 = getelementptr inbounds float* %tmp17165, i64 1
+  %tmp17167 = getelementptr inbounds float* %tmp17166, i64 1
+  %tmp17168 = getelementptr inbounds float* %tmp17167, i64 1
+  %tmp17169 = getelementptr inbounds float* %tmp17168, i64 1
+  %tmp17170 = getelementptr inbounds float* %tmp17169, i64 1
+  %tmp17171 = getelementptr inbounds float* %tmp17170, i64 1
+  %tmp17172 = getelementptr inbounds float* %tmp17171, i64 1
+  %tmp17173 = getelementptr inbounds float* %tmp17172, i64 1
+  %tmp17174 = getelementptr inbounds float* %tmp17173, i64 1
+  %tmp17175 = getelementptr inbounds float* %tmp17174, i64 1
+  %tmp17176 = getelementptr inbounds float* %tmp17175, i64 1
+  %tmp17177 = getelementptr inbounds float* %tmp17176, i64 1
+  %tmp17178 = getelementptr inbounds float* %tmp17177, i64 1
+  %tmp17179 = getelementptr inbounds float* %tmp17178, i64 1
+  %tmp17180 = getelementptr inbounds float* %tmp17179, i64 1
+  %tmp17181 = getelementptr inbounds float* %tmp17180, i64 1
+  %tmp17182 = getelementptr inbounds float* %tmp17181, i64 1
+  %tmp17183 = getelementptr inbounds float* %tmp17182, i64 1
+  %tmp17184 = getelementptr inbounds float* %tmp17183, i64 1
+  %tmp17185 = getelementptr inbounds float* %tmp17184, i64 1
+  %tmp17186 = getelementptr inbounds float* %tmp17185, i64 1
+  %tmp17187 = getelementptr inbounds float* %tmp17186, i64 1
+  %tmp17188 = getelementptr inbounds float* %tmp17187, i64 1
+  %tmp17189 = getelementptr inbounds float* %tmp17188, i64 1
+  %tmp17190 = getelementptr inbounds float* %tmp17189, i64 1
+  %tmp17191 = getelementptr inbounds float* %tmp17190, i64 1
+  %tmp17192 = getelementptr inbounds float* %tmp17191, i64 1
+  %tmp17193 = getelementptr inbounds float* %tmp17192, i64 1
+  %tmp17194 = getelementptr inbounds float* %tmp17193, i64 1
+  %tmp17195 = getelementptr inbounds float* %tmp17194, i64 1
+  %tmp17196 = getelementptr inbounds float* %tmp17195, i64 1
+  %tmp17197 = getelementptr inbounds float* %tmp17196, i64 1
+  %tmp17198 = getelementptr inbounds float* %tmp17197, i64 1
+  %tmp17199 = getelementptr inbounds float* %tmp17198, i64 1
+  %tmp17200 = getelementptr inbounds float* %tmp17199, i64 1
+  %tmp17201 = getelementptr inbounds float* %tmp17200, i64 1
+  %tmp17202 = getelementptr inbounds float* %tmp17201, i64 1
+  %tmp17203 = getelementptr inbounds float* %tmp17202, i64 1
+  %tmp17204 = getelementptr inbounds float* %tmp17203, i64 1
+  %tmp17205 = getelementptr inbounds float* %tmp17204, i64 1
+  %tmp17206 = getelementptr inbounds float* %tmp17205, i64 1
+  %tmp17207 = getelementptr inbounds float* %tmp17206, i64 1
+  %tmp17208 = getelementptr inbounds float* %tmp17207, i64 1
+  %tmp17209 = getelementptr inbounds float* %tmp17208, i64 1
+  %tmp17210 = getelementptr inbounds float* %tmp17209, i64 1
+  %tmp17211 = getelementptr inbounds float* %tmp17210, i64 1
+  %tmp17212 = getelementptr inbounds float* %tmp17211, i64 1
+  %tmp17213 = getelementptr inbounds float* %tmp17212, i64 1
+  %tmp17214 = getelementptr inbounds float* %tmp17213, i64 1
+  %tmp17215 = getelementptr inbounds float* %tmp17214, i64 1
+  %tmp17216 = getelementptr inbounds float* %tmp17215, i64 1
+  %tmp17217 = getelementptr inbounds float* %tmp17216, i64 1
+  %tmp17218 = getelementptr inbounds float* %tmp17217, i64 1
+  %tmp17219 = getelementptr inbounds float* %tmp17218, i64 1
+  %tmp17220 = getelementptr inbounds float* %tmp17219, i64 1
+  %tmp17221 = getelementptr inbounds float* %tmp17220, i64 1
+  %tmp17222 = getelementptr inbounds float* %tmp17221, i64 1
+  %tmp17223 = getelementptr inbounds float* %tmp17222, i64 1
+  %tmp17224 = getelementptr inbounds float* %tmp17223, i64 1
+  %tmp17225 = getelementptr inbounds float* %tmp17224, i64 1
+  %tmp17226 = getelementptr inbounds float* %tmp17225, i64 1
+  %tmp17227 = getelementptr inbounds float* %tmp17226, i64 1
+  %tmp17228 = getelementptr inbounds float* %tmp17227, i64 1
+  %tmp17229 = getelementptr inbounds float* %tmp17228, i64 1
+  %tmp17230 = getelementptr inbounds float* %tmp17229, i64 1
+  %tmp17231 = getelementptr inbounds float* %tmp17230, i64 1
+  %tmp17232 = getelementptr inbounds float* %tmp17231, i64 1
+  %tmp17233 = getelementptr inbounds float* %tmp17232, i64 1
+  %tmp17234 = getelementptr inbounds float* %tmp17233, i64 1
+  %tmp17235 = getelementptr inbounds float* %tmp17234, i64 1
+  %tmp17236 = getelementptr inbounds float* %tmp17235, i64 1
+  %tmp17237 = getelementptr inbounds float* %tmp17236, i64 1
+  %tmp17238 = getelementptr inbounds float* %tmp17237, i64 1
+  %tmp17239 = getelementptr inbounds float* %tmp17238, i64 1
+  %tmp17240 = getelementptr inbounds float* %tmp17239, i64 1
+  %tmp17241 = getelementptr inbounds float* %tmp17240, i64 1
+  %tmp17242 = getelementptr inbounds float* %tmp17241, i64 1
+  %tmp17243 = getelementptr inbounds float* %tmp17242, i64 1
+  %tmp17244 = getelementptr inbounds float* %tmp17243, i64 1
+  %tmp17245 = getelementptr inbounds float* %tmp17244, i64 1
+  %tmp17246 = getelementptr inbounds float* %tmp17245, i64 1
+  %tmp17247 = getelementptr inbounds float* %tmp17246, i64 1
+  %tmp17248 = getelementptr inbounds float* %tmp17247, i64 1
+  %tmp17249 = getelementptr inbounds float* %tmp17248, i64 1
+  %tmp17250 = getelementptr inbounds float* %tmp17249, i64 1
+  %tmp17251 = getelementptr inbounds float* %tmp17250, i64 1
+  %tmp17252 = getelementptr inbounds float* %tmp17251, i64 1
+  %tmp17253 = getelementptr inbounds float* %tmp17252, i64 1
+  %tmp17254 = getelementptr inbounds float* %tmp17253, i64 1
+  %tmp17255 = getelementptr inbounds float* %tmp17254, i64 1
+  %tmp17256 = getelementptr inbounds float* %tmp17255, i64 1
+  %tmp17257 = getelementptr inbounds float* %tmp17256, i64 1
+  %tmp17258 = getelementptr inbounds float* %tmp17257, i64 1
+  %tmp17259 = getelementptr inbounds float* %tmp17258, i64 1
+  %tmp17260 = getelementptr inbounds float* %tmp17259, i64 1
+  %tmp17261 = getelementptr inbounds float* %tmp17260, i64 1
+  %tmp17262 = getelementptr inbounds float* %tmp17261, i64 1
+  %tmp17263 = getelementptr inbounds float* %tmp17262, i64 1
+  %tmp17264 = getelementptr inbounds float* %tmp17263, i64 1
+  %tmp17265 = getelementptr inbounds float* %tmp17264, i64 1
+  %tmp17266 = getelementptr inbounds float* %tmp17265, i64 1
+  %tmp17267 = getelementptr inbounds float* %tmp17266, i64 1
+  %tmp17268 = getelementptr inbounds float* %tmp17267, i64 1
+  %tmp17269 = getelementptr inbounds float* %tmp17268, i64 1
+  %tmp17270 = getelementptr inbounds float* %tmp17269, i64 1
+  %tmp17271 = getelementptr inbounds float* %tmp17270, i64 1
+  %tmp17272 = getelementptr inbounds float* %tmp17271, i64 1
+  %tmp17273 = getelementptr inbounds float* %tmp17272, i64 1
+  %tmp17274 = getelementptr inbounds float* %tmp17273, i64 1
+  %tmp17275 = getelementptr inbounds float* %tmp17274, i64 1
+  %tmp17276 = getelementptr inbounds float* %tmp17275, i64 1
+  %tmp17277 = getelementptr inbounds float* %tmp17276, i64 1
+  %tmp17278 = getelementptr inbounds float* %tmp17277, i64 1
+  %tmp17279 = getelementptr inbounds float* %tmp17278, i64 1
+  %tmp17280 = getelementptr inbounds float* %tmp17279, i64 1
+  %tmp17281 = getelementptr inbounds float* %tmp17280, i64 1
+  %tmp17282 = getelementptr inbounds float* %tmp17281, i64 1
+  %tmp17283 = getelementptr inbounds float* %tmp17282, i64 1
+  %tmp17284 = getelementptr inbounds float* %tmp17283, i64 1
+  %tmp17285 = getelementptr inbounds float* %tmp17284, i64 1
+  %tmp17286 = getelementptr inbounds float* %tmp17285, i64 1
+  %tmp17287 = getelementptr inbounds float* %tmp17286, i64 1
+  %tmp17288 = getelementptr inbounds float* %tmp17287, i64 1
+  %tmp17289 = getelementptr inbounds float* %tmp17288, i64 1
+  %tmp17290 = getelementptr inbounds float* %tmp17289, i64 1
+  %tmp17291 = getelementptr inbounds float* %tmp17290, i64 1
+  %tmp17292 = getelementptr inbounds float* %tmp17291, i64 1
+  %tmp17293 = getelementptr inbounds float* %tmp17292, i64 1
+  %tmp17294 = getelementptr inbounds float* %tmp17293, i64 1
+  %tmp17295 = getelementptr inbounds float* %tmp17294, i64 1
+  %tmp17296 = getelementptr inbounds float* %tmp17295, i64 1
+  %tmp17297 = getelementptr inbounds float* %tmp17296, i64 1
+  %tmp17298 = getelementptr inbounds float* %tmp17297, i64 1
+  %tmp17299 = getelementptr inbounds float* %tmp17298, i64 1
+  %tmp17300 = getelementptr inbounds float* %tmp17299, i64 1
+  %tmp17301 = getelementptr inbounds float* %tmp17300, i64 1
+  %tmp17302 = getelementptr inbounds float* %tmp17301, i64 1
+  %tmp17303 = getelementptr inbounds float* %tmp17302, i64 1
+  %tmp17304 = getelementptr inbounds float* %tmp17303, i64 1
+  %tmp17305 = getelementptr inbounds float* %tmp17304, i64 1
+  %tmp17306 = getelementptr inbounds float* %tmp17305, i64 1
+  %tmp17307 = getelementptr inbounds float* %tmp17306, i64 1
+  %tmp17308 = getelementptr inbounds float* %tmp17307, i64 1
+  %tmp17309 = getelementptr inbounds float* %tmp17308, i64 1
+  %tmp17310 = getelementptr inbounds float* %tmp17309, i64 1
+  %tmp17311 = getelementptr inbounds float* %tmp17310, i64 1
+  %tmp17312 = getelementptr inbounds float* %tmp17311, i64 1
+  %tmp17313 = getelementptr inbounds float* %tmp17312, i64 1
+  %tmp17314 = getelementptr inbounds float* %tmp17313, i64 1
+  %tmp17315 = getelementptr inbounds float* %tmp17314, i64 1
+  %tmp17316 = getelementptr inbounds float* %tmp17315, i64 1
+  %tmp17317 = getelementptr inbounds float* %tmp17316, i64 1
+  %tmp17318 = getelementptr inbounds float* %tmp17317, i64 1
+  %tmp17319 = getelementptr inbounds float* %tmp17318, i64 1
+  %tmp17320 = getelementptr inbounds float* %tmp17319, i64 1
+  %tmp17321 = getelementptr inbounds float* %tmp17320, i64 1
+  %tmp17322 = getelementptr inbounds float* %tmp17321, i64 1
+  %tmp17323 = getelementptr inbounds float* %tmp17322, i64 1
+  %tmp17324 = getelementptr inbounds float* %tmp17323, i64 1
+  %tmp17325 = getelementptr inbounds float* %tmp17324, i64 1
+  %tmp17326 = getelementptr inbounds float* %tmp17325, i64 1
+  %tmp17327 = getelementptr inbounds float* %tmp17326, i64 1
+  %tmp17328 = getelementptr inbounds float* %tmp17327, i64 1
+  %tmp17329 = getelementptr inbounds float* %tmp17328, i64 1
+  %tmp17330 = getelementptr inbounds float* %tmp17329, i64 1
+  %tmp17331 = getelementptr inbounds float* %tmp17330, i64 1
+  %tmp17332 = getelementptr inbounds float* %tmp17331, i64 1
+  %tmp17333 = getelementptr inbounds float* %tmp17332, i64 1
+  %tmp17334 = getelementptr inbounds float* %tmp17333, i64 1
+  %tmp17335 = getelementptr inbounds float* %tmp17334, i64 1
+  %tmp17336 = getelementptr inbounds float* %tmp17335, i64 1
+  %tmp17337 = getelementptr inbounds float* %tmp17336, i64 1
+  %tmp17338 = getelementptr inbounds float* %tmp17337, i64 1
+  %tmp17339 = getelementptr inbounds float* %tmp17338, i64 1
+  %tmp17340 = getelementptr inbounds float* %tmp17339, i64 1
+  %tmp17341 = getelementptr inbounds float* %tmp17340, i64 1
+  %tmp17342 = getelementptr inbounds float* %tmp17341, i64 1
+  %tmp17343 = getelementptr inbounds float* %tmp17342, i64 1
+  %tmp17344 = getelementptr inbounds float* %tmp17343, i64 1
+  %tmp17345 = getelementptr inbounds float* %tmp17344, i64 1
+  %tmp17346 = getelementptr inbounds float* %tmp17345, i64 1
+  %tmp17347 = getelementptr inbounds float* %tmp17346, i64 1
+  %tmp17348 = getelementptr inbounds float* %tmp17347, i64 1
+  %tmp17349 = getelementptr inbounds float* %tmp17348, i64 1
+  %tmp17350 = getelementptr inbounds float* %tmp17349, i64 1
+  %tmp17351 = getelementptr inbounds float* %tmp17350, i64 1
+  %tmp17352 = getelementptr inbounds float* %tmp17351, i64 1
+  %tmp17353 = getelementptr inbounds float* %tmp17352, i64 1
+  %tmp17354 = getelementptr inbounds float* %tmp17353, i64 1
+  %tmp17355 = getelementptr inbounds float* %tmp17354, i64 1
+  %tmp17356 = getelementptr inbounds float* %tmp17355, i64 1
+  %tmp17357 = getelementptr inbounds float* %tmp17356, i64 1
+  %tmp17358 = getelementptr inbounds float* %tmp17357, i64 1
+  %tmp17359 = getelementptr inbounds float* %tmp17358, i64 1
+  %tmp17360 = getelementptr inbounds float* %tmp17359, i64 1
+  %tmp17361 = getelementptr inbounds float* %tmp17360, i64 1
+  %tmp17362 = getelementptr inbounds float* %tmp17361, i64 1
+  %tmp17363 = getelementptr inbounds float* %tmp17362, i64 1
+  %tmp17364 = getelementptr inbounds float* %tmp17363, i64 1
+  %tmp17365 = getelementptr inbounds float* %tmp17364, i64 1
+  %tmp17366 = getelementptr inbounds float* %tmp17365, i64 1
+  %tmp17367 = getelementptr inbounds float* %tmp17366, i64 1
+  %tmp17368 = getelementptr inbounds float* %tmp17367, i64 1
+  %tmp17369 = getelementptr inbounds float* %tmp17368, i64 1
+  %tmp17370 = getelementptr inbounds float* %tmp17369, i64 1
+  %tmp17371 = getelementptr inbounds float* %tmp17370, i64 1
+  %tmp17372 = getelementptr inbounds float* %tmp17371, i64 1
+  %tmp17373 = getelementptr inbounds float* %tmp17372, i64 1
+  %tmp17374 = getelementptr inbounds float* %tmp17373, i64 1
+  %tmp17375 = getelementptr inbounds float* %tmp17374, i64 1
+  %tmp17376 = getelementptr inbounds float* %tmp17375, i64 1
+  %tmp17377 = getelementptr inbounds float* %tmp17376, i64 1
+  %tmp17378 = getelementptr inbounds float* %tmp17377, i64 1
+  %tmp17379 = getelementptr inbounds float* %tmp17378, i64 1
+  %tmp17380 = getelementptr inbounds float* %tmp17379, i64 1
+  %tmp17381 = getelementptr inbounds float* %tmp17380, i64 1
+  %tmp17382 = getelementptr inbounds float* %tmp17381, i64 1
+  %tmp17383 = getelementptr inbounds float* %tmp17382, i64 1
+  %tmp17384 = getelementptr inbounds float* %tmp17383, i64 1
+  %tmp17385 = getelementptr inbounds float* %tmp17384, i64 1
+  %tmp17386 = getelementptr inbounds float* %tmp17385, i64 1
+  %tmp17387 = getelementptr inbounds float* %tmp17386, i64 1
+  %tmp17388 = getelementptr inbounds float* %tmp17387, i64 1
+  %tmp17389 = getelementptr inbounds float* %tmp17388, i64 1
+  %tmp17390 = getelementptr inbounds float* %tmp17389, i64 1
+  %tmp17391 = getelementptr inbounds float* %tmp17390, i64 1
+  %tmp17392 = getelementptr inbounds float* %tmp17391, i64 1
+  %tmp17393 = getelementptr inbounds float* %tmp17392, i64 1
+  %tmp17394 = getelementptr inbounds float* %tmp17393, i64 1
+  %tmp17395 = getelementptr inbounds float* %tmp17394, i64 1
+  %tmp17396 = getelementptr inbounds float* %tmp17395, i64 1
+  %tmp17397 = getelementptr inbounds float* %tmp17396, i64 1
+  %tmp17398 = getelementptr inbounds float* %tmp17397, i64 1
+  %tmp17399 = getelementptr inbounds float* %tmp17398, i64 1
+  %tmp17400 = getelementptr inbounds float* %tmp17399, i64 1
+  %tmp17401 = getelementptr inbounds float* %tmp17400, i64 1
+  %tmp17402 = getelementptr inbounds float* %tmp17401, i64 1
+  %tmp17403 = getelementptr inbounds float* %tmp17402, i64 1
+  %tmp17404 = getelementptr inbounds float* %tmp17403, i64 1
+  %tmp17405 = getelementptr inbounds float* %tmp17404, i64 1
+  %tmp17406 = getelementptr inbounds float* %tmp17405, i64 1
+  %tmp17407 = getelementptr inbounds float* %tmp17406, i64 1
+  %tmp17408 = getelementptr inbounds float* %tmp17407, i64 1
+  %tmp17409 = getelementptr inbounds float* %tmp17408, i64 1
+  %tmp17410 = getelementptr inbounds float* %tmp17409, i64 1
+  %tmp17411 = getelementptr inbounds float* %tmp17410, i64 1
+  %tmp17412 = getelementptr inbounds float* %tmp17411, i64 1
+  %tmp17413 = getelementptr inbounds float* %tmp17412, i64 1
+  %tmp17414 = getelementptr inbounds float* %tmp17413, i64 1
+  %tmp17415 = getelementptr inbounds float* %tmp17414, i64 1
+  %tmp17416 = getelementptr inbounds float* %tmp17415, i64 1
+  %tmp17417 = getelementptr inbounds float* %tmp17416, i64 1
+  %tmp17418 = getelementptr inbounds float* %tmp17417, i64 1
+  %tmp17419 = getelementptr inbounds float* %tmp17418, i64 1
+  %tmp17420 = getelementptr inbounds float* %tmp17419, i64 1
+  %tmp17421 = getelementptr inbounds float* %tmp17420, i64 1
+  %tmp17422 = getelementptr inbounds float* %tmp17421, i64 1
+  %tmp17423 = getelementptr inbounds float* %tmp17422, i64 1
+  %tmp17424 = getelementptr inbounds float* %tmp17423, i64 1
+  %tmp17425 = getelementptr inbounds float* %tmp17424, i64 1
+  %tmp17426 = getelementptr inbounds float* %tmp17425, i64 1
+  %tmp17427 = getelementptr inbounds float* %tmp17426, i64 1
+  %tmp17428 = getelementptr inbounds float* %tmp17427, i64 1
+  %tmp17429 = getelementptr inbounds float* %tmp17428, i64 1
+  %tmp17430 = getelementptr inbounds float* %tmp17429, i64 1
+  %tmp17431 = getelementptr inbounds float* %tmp17430, i64 1
+  %tmp17432 = getelementptr inbounds float* %tmp17431, i64 1
+  %tmp17433 = getelementptr inbounds float* %tmp17432, i64 1
+  %tmp17434 = getelementptr inbounds float* %tmp17433, i64 1
+  %tmp17435 = getelementptr inbounds float* %tmp17434, i64 1
+  %tmp17436 = getelementptr inbounds float* %tmp17435, i64 1
+  %tmp17437 = getelementptr inbounds float* %tmp17436, i64 1
+  %tmp17438 = getelementptr inbounds float* %tmp17437, i64 1
+  %tmp17439 = getelementptr inbounds float* %tmp17438, i64 1
+  %tmp17440 = getelementptr inbounds float* %tmp17439, i64 1
+  %tmp17441 = getelementptr inbounds float* %tmp17440, i64 1
+  %tmp17442 = getelementptr inbounds float* %tmp17441, i64 1
+  %tmp17443 = getelementptr inbounds float* %tmp17442, i64 1
+  %tmp17444 = getelementptr inbounds float* %tmp17443, i64 1
+  %tmp17445 = getelementptr inbounds float* %tmp17444, i64 1
+  %tmp17446 = getelementptr inbounds float* %tmp17445, i64 1
+  %tmp17447 = getelementptr inbounds float* %tmp17446, i64 1
+  %tmp17448 = getelementptr inbounds float* %tmp17447, i64 1
+  %tmp17449 = getelementptr inbounds float* %tmp17448, i64 1
+  %tmp17450 = getelementptr inbounds float* %tmp17449, i64 1
+  %tmp17451 = getelementptr inbounds float* %tmp17450, i64 1
+  %tmp17452 = getelementptr inbounds float* %tmp17451, i64 1
+  %tmp17453 = getelementptr inbounds float* %tmp17452, i64 1
+  %tmp17454 = getelementptr inbounds float* %tmp17453, i64 1
+  %tmp17455 = getelementptr inbounds float* %tmp17454, i64 1
+  %tmp17456 = getelementptr inbounds float* %tmp17455, i64 1
+  %tmp17457 = getelementptr inbounds float* %tmp17456, i64 1
+  %tmp17458 = getelementptr inbounds float* %tmp17457, i64 1
+  %tmp17459 = getelementptr inbounds float* %tmp17458, i64 1
+  %tmp17460 = getelementptr inbounds float* %tmp17459, i64 1
+  %tmp17461 = getelementptr inbounds float* %tmp17460, i64 1
+  %tmp17462 = getelementptr inbounds float* %tmp17461, i64 1
+  %tmp17463 = getelementptr inbounds float* %tmp17462, i64 1
+  %tmp17464 = getelementptr inbounds float* %tmp17463, i64 1
+  %tmp17465 = getelementptr inbounds float* %tmp17464, i64 1
+  %tmp17466 = getelementptr inbounds float* %tmp17465, i64 1
+  %tmp17467 = getelementptr inbounds float* %tmp17466, i64 1
+  %tmp17468 = getelementptr inbounds float* %tmp17467, i64 1
+  %tmp17469 = getelementptr inbounds float* %tmp17468, i64 1
+  %tmp17470 = getelementptr inbounds float* %tmp17469, i64 1
+  %tmp17471 = getelementptr inbounds float* %tmp17470, i64 1
+  %tmp17472 = getelementptr inbounds float* %tmp17471, i64 1
+  %tmp17473 = getelementptr inbounds float* %tmp17472, i64 1
+  %tmp17474 = getelementptr inbounds float* %tmp17473, i64 1
+  %tmp17475 = getelementptr inbounds float* %tmp17474, i64 1
+  %tmp17476 = getelementptr inbounds float* %tmp17475, i64 1
+  %tmp17477 = getelementptr inbounds float* %tmp17476, i64 1
+  %tmp17478 = getelementptr inbounds float* %tmp17477, i64 1
+  %tmp17479 = getelementptr inbounds float* %tmp17478, i64 1
+  %tmp17480 = getelementptr inbounds float* %tmp17479, i64 1
+  %tmp17481 = getelementptr inbounds float* %tmp17480, i64 1
+  %tmp17482 = getelementptr inbounds float* %tmp17481, i64 1
+  %tmp17483 = getelementptr inbounds float* %tmp17482, i64 1
+  %tmp17484 = getelementptr inbounds float* %tmp17483, i64 1
+  %tmp17485 = getelementptr inbounds float* %tmp17484, i64 1
+  %tmp17486 = getelementptr inbounds float* %tmp17485, i64 1
+  %tmp17487 = getelementptr inbounds float* %tmp17486, i64 1
+  %tmp17488 = getelementptr inbounds float* %tmp17487, i64 1
+  %tmp17489 = getelementptr inbounds float* %tmp17488, i64 1
+  %tmp17490 = getelementptr inbounds float* %tmp17489, i64 1
+  %tmp17491 = getelementptr inbounds float* %tmp17490, i64 1
+  %tmp17492 = getelementptr inbounds float* %tmp17491, i64 1
+  %tmp17493 = getelementptr inbounds float* %tmp17492, i64 1
+  %tmp17494 = getelementptr inbounds float* %tmp17493, i64 1
+  %tmp17495 = getelementptr inbounds float* %tmp17494, i64 1
+  %tmp17496 = getelementptr inbounds float* %tmp17495, i64 1
+  %tmp17497 = getelementptr inbounds float* %tmp17496, i64 1
+  %tmp17498 = getelementptr inbounds float* %tmp17497, i64 1
+  %tmp17499 = getelementptr inbounds float* %tmp17498, i64 1
+  %tmp17500 = getelementptr inbounds float* %tmp17499, i64 1
+  %tmp17501 = getelementptr inbounds float* %tmp17500, i64 1
+  %tmp17502 = getelementptr inbounds float* %tmp17501, i64 1
+  %tmp17503 = getelementptr inbounds float* %tmp17502, i64 1
+  %tmp17504 = getelementptr inbounds float* %tmp17503, i64 1
+  %tmp17505 = getelementptr inbounds float* %tmp17504, i64 1
+  %tmp17506 = getelementptr inbounds float* %tmp17505, i64 1
+  %tmp17507 = getelementptr inbounds float* %tmp17506, i64 1
+  %tmp17508 = getelementptr inbounds float* %tmp17507, i64 1
+  %tmp17509 = getelementptr inbounds float* %tmp17508, i64 1
+  %tmp17510 = getelementptr inbounds float* %tmp17509, i64 1
+  %tmp17511 = getelementptr inbounds float* %tmp17510, i64 1
+  %tmp17512 = getelementptr inbounds float* %tmp17511, i64 1
+  %tmp17513 = getelementptr inbounds float* %tmp17512, i64 1
+  %tmp17514 = getelementptr inbounds float* %tmp17513, i64 1
+  %tmp17515 = getelementptr inbounds float* %tmp17514, i64 1
+  %tmp17516 = getelementptr inbounds float* %tmp17515, i64 1
+  %tmp17517 = getelementptr inbounds float* %tmp17516, i64 1
+  %tmp17518 = getelementptr inbounds float* %tmp17517, i64 1
+  %tmp17519 = getelementptr inbounds float* %tmp17518, i64 1
+  %tmp17520 = getelementptr inbounds float* %tmp17519, i64 1
+  %tmp17521 = getelementptr inbounds float* %tmp17520, i64 1
+  %tmp17522 = getelementptr inbounds float* %tmp17521, i64 1
+  %tmp17523 = getelementptr inbounds float* %tmp17522, i64 1
+  %tmp17524 = getelementptr inbounds float* %tmp17523, i64 1
+  %tmp17525 = getelementptr inbounds float* %tmp17524, i64 1
+  %tmp17526 = getelementptr inbounds float* %tmp17525, i64 1
+  %tmp17527 = getelementptr inbounds float* %tmp17526, i64 1
+  %tmp17528 = getelementptr inbounds float* %tmp17527, i64 1
+  %tmp17529 = getelementptr inbounds float* %tmp17528, i64 1
+  %tmp17530 = getelementptr inbounds float* %tmp17529, i64 1
+  %tmp17531 = getelementptr inbounds float* %tmp17530, i64 1
+  %tmp17532 = getelementptr inbounds float* %tmp17531, i64 1
+  %tmp17533 = getelementptr inbounds float* %tmp17532, i64 1
+  %tmp17534 = getelementptr inbounds float* %tmp17533, i64 1
+  %tmp17535 = getelementptr inbounds float* %tmp17534, i64 1
+  %tmp17536 = getelementptr inbounds float* %tmp17535, i64 1
+  %tmp17537 = getelementptr inbounds float* %tmp17536, i64 1
+  %tmp17538 = getelementptr inbounds float* %tmp17537, i64 1
+  %tmp17539 = getelementptr inbounds float* %tmp17538, i64 1
+  %tmp17540 = getelementptr inbounds float* %tmp17539, i64 1
+  %tmp17541 = getelementptr inbounds float* %tmp17540, i64 1
+  %tmp17542 = getelementptr inbounds float* %tmp17541, i64 1
+  %tmp17543 = getelementptr inbounds float* %tmp17542, i64 1
+  %tmp17544 = getelementptr inbounds float* %tmp17543, i64 1
+  %tmp17545 = getelementptr inbounds float* %tmp17544, i64 1
+  %tmp17546 = getelementptr inbounds float* %tmp17545, i64 1
+  %tmp17547 = getelementptr inbounds float* %tmp17546, i64 1
+  %tmp17548 = getelementptr inbounds float* %tmp17547, i64 1
+  %tmp17549 = getelementptr inbounds float* %tmp17548, i64 1
+  %tmp17550 = getelementptr inbounds float* %tmp17549, i64 1
+  %tmp17551 = getelementptr inbounds float* %tmp17550, i64 1
+  %tmp17552 = getelementptr inbounds float* %tmp17551, i64 1
+  %tmp17553 = getelementptr inbounds float* %tmp17552, i64 1
+  %tmp17554 = getelementptr inbounds float* %tmp17553, i64 1
+  %tmp17555 = getelementptr inbounds float* %tmp17554, i64 1
+  %tmp17556 = getelementptr inbounds float* %tmp17555, i64 1
+  %tmp17557 = getelementptr inbounds float* %tmp17556, i64 1
+  %tmp17558 = getelementptr inbounds float* %tmp17557, i64 1
+  %tmp17559 = getelementptr inbounds float* %tmp17558, i64 1
+  %tmp17560 = getelementptr inbounds float* %tmp17559, i64 1
+  %tmp17561 = getelementptr inbounds float* %tmp17560, i64 1
+  %tmp17562 = getelementptr inbounds float* %tmp17561, i64 1
+  %tmp17563 = getelementptr inbounds float* %tmp17562, i64 1
+  %tmp17564 = getelementptr inbounds float* %tmp17563, i64 1
+  %tmp17565 = getelementptr inbounds float* %tmp17564, i64 1
+  %tmp17566 = getelementptr inbounds float* %tmp17565, i64 1
+  %tmp17567 = getelementptr inbounds float* %tmp17566, i64 1
+  %tmp17568 = getelementptr inbounds float* %tmp17567, i64 1
+  %tmp17569 = getelementptr inbounds float* %tmp17568, i64 1
+  %tmp17570 = getelementptr inbounds float* %tmp17569, i64 1
+  %tmp17571 = getelementptr inbounds float* %tmp17570, i64 1
+  %tmp17572 = getelementptr inbounds float* %tmp17571, i64 1
+  %tmp17573 = getelementptr inbounds float* %tmp17572, i64 1
+  %tmp17574 = getelementptr inbounds float* %tmp17573, i64 1
+  %tmp17575 = getelementptr inbounds float* %tmp17574, i64 1
+  %tmp17576 = getelementptr inbounds float* %tmp17575, i64 1
+  %tmp17577 = getelementptr inbounds float* %tmp17576, i64 1
+  %tmp17578 = getelementptr inbounds float* %tmp17577, i64 1
+  %tmp17579 = getelementptr inbounds float* %tmp17578, i64 1
+  %tmp17580 = getelementptr inbounds float* %tmp17579, i64 1
+  %tmp17581 = getelementptr inbounds float* %tmp17580, i64 1
+  %tmp17582 = getelementptr inbounds float* %tmp17581, i64 1
+  %tmp17583 = getelementptr inbounds float* %tmp17582, i64 1
+  %tmp17584 = getelementptr inbounds float* %tmp17583, i64 1
+  %tmp17585 = getelementptr inbounds float* %tmp17584, i64 1
+  %tmp17586 = getelementptr inbounds float* %tmp17585, i64 1
+  %tmp17587 = getelementptr inbounds float* %tmp17586, i64 1
+  %tmp17588 = getelementptr inbounds float* %tmp17587, i64 1
+  %tmp17589 = getelementptr inbounds float* %tmp17588, i64 1
+  %tmp17590 = getelementptr inbounds float* %tmp17589, i64 1
+  %tmp17591 = getelementptr inbounds float* %tmp17590, i64 1
+  %tmp17592 = getelementptr inbounds float* %tmp17591, i64 1
+  %tmp17593 = getelementptr inbounds float* %tmp17592, i64 1
+  %tmp17594 = getelementptr inbounds float* %tmp17593, i64 1
+  %tmp17595 = getelementptr inbounds float* %tmp17594, i64 1
+  %tmp17596 = getelementptr inbounds float* %tmp17595, i64 1
+  %tmp17597 = getelementptr inbounds float* %tmp17596, i64 1
+  %tmp17598 = getelementptr inbounds float* %tmp17597, i64 1
+  %tmp17599 = getelementptr inbounds float* %tmp17598, i64 1
+  %tmp17600 = getelementptr inbounds float* %tmp17599, i64 1
+  %tmp17601 = getelementptr inbounds float* %tmp17600, i64 1
+  %tmp17602 = getelementptr inbounds float* %tmp17601, i64 1
+  %tmp17603 = getelementptr inbounds float* %tmp17602, i64 1
+  %tmp17604 = getelementptr inbounds float* %tmp17603, i64 1
+  %tmp17605 = getelementptr inbounds float* %tmp17604, i64 1
+  %tmp17606 = getelementptr inbounds float* %tmp17605, i64 1
+  %tmp17607 = getelementptr inbounds float* %tmp17606, i64 1
+  %tmp17608 = getelementptr inbounds float* %tmp17607, i64 1
+  %tmp17609 = getelementptr inbounds float* %tmp17608, i64 1
+  %tmp17610 = getelementptr inbounds float* %tmp17609, i64 1
+  %tmp17611 = getelementptr inbounds float* %tmp17610, i64 1
+  %tmp17612 = getelementptr inbounds float* %tmp17611, i64 1
+  %tmp17613 = getelementptr inbounds float* %tmp17612, i64 1
+  %tmp17614 = getelementptr inbounds float* %tmp17613, i64 1
+  %tmp17615 = getelementptr inbounds float* %tmp17614, i64 1
+  %tmp17616 = getelementptr inbounds float* %tmp17615, i64 1
+  %tmp17617 = getelementptr inbounds float* %tmp17616, i64 1
+  %tmp17618 = getelementptr inbounds float* %tmp17617, i64 1
+  %tmp17619 = getelementptr inbounds float* %tmp17618, i64 1
+  %tmp17620 = getelementptr inbounds float* %tmp17619, i64 1
+  %tmp17621 = getelementptr inbounds float* %tmp17620, i64 1
+  %tmp17622 = getelementptr inbounds float* %tmp17621, i64 1
+  %tmp17623 = getelementptr inbounds float* %tmp17622, i64 1
+  %tmp17624 = getelementptr inbounds float* %tmp17623, i64 1
+  %tmp17625 = getelementptr inbounds float* %tmp17624, i64 1
+  %tmp17626 = getelementptr inbounds float* %tmp17625, i64 1
+  %tmp17627 = getelementptr inbounds float* %tmp17626, i64 1
+  %tmp17628 = getelementptr inbounds float* %tmp17627, i64 1
+  %tmp17629 = getelementptr inbounds float* %tmp17628, i64 1
+  %tmp17630 = getelementptr inbounds float* %tmp17629, i64 1
+  %tmp17631 = getelementptr inbounds float* %tmp17630, i64 1
+  %tmp17632 = getelementptr inbounds float* %tmp17631, i64 1
+  %tmp17633 = getelementptr inbounds float* %tmp17632, i64 1
+  %tmp17634 = getelementptr inbounds float* %tmp17633, i64 1
+  %tmp17635 = getelementptr inbounds float* %tmp17634, i64 1
+  %tmp17636 = getelementptr inbounds float* %tmp17635, i64 1
+  %tmp17637 = getelementptr inbounds float* %tmp17636, i64 1
+  %tmp17638 = getelementptr inbounds float* %tmp17637, i64 1
+  %tmp17639 = getelementptr inbounds float* %tmp17638, i64 1
+  %tmp17640 = getelementptr inbounds float* %tmp17639, i64 1
+  %tmp17641 = getelementptr inbounds float* %tmp17640, i64 1
+  %tmp17642 = getelementptr inbounds float* %tmp17641, i64 1
+  %tmp17643 = getelementptr inbounds float* %tmp17642, i64 1
+  %tmp17644 = getelementptr inbounds float* %tmp17643, i64 1
+  %tmp17645 = getelementptr inbounds float* %tmp17644, i64 1
+  %tmp17646 = getelementptr inbounds float* %tmp17645, i64 1
+  %tmp17647 = getelementptr inbounds float* %tmp17646, i64 1
+  %tmp17648 = getelementptr inbounds float* %tmp17647, i64 1
+  %tmp17649 = getelementptr inbounds float* %tmp17648, i64 1
+  %tmp17650 = getelementptr inbounds float* %tmp17649, i64 1
+  %tmp17651 = getelementptr inbounds float* %tmp17650, i64 1
+  %tmp17652 = getelementptr inbounds float* %tmp17651, i64 1
+  %tmp17653 = getelementptr inbounds float* %tmp17652, i64 1
+  %tmp17654 = getelementptr inbounds float* %tmp17653, i64 1
+  %tmp17655 = getelementptr inbounds float* %tmp17654, i64 1
+  %tmp17656 = getelementptr inbounds float* %tmp17655, i64 1
+  %tmp17657 = getelementptr inbounds float* %tmp17656, i64 1
+  %tmp17658 = getelementptr inbounds float* %tmp17657, i64 1
+  %tmp17659 = getelementptr inbounds float* %tmp17658, i64 1
+  %tmp17660 = getelementptr inbounds float* %tmp17659, i64 1
+  %tmp17661 = getelementptr inbounds float* %tmp17660, i64 1
+  %tmp17662 = getelementptr inbounds float* %tmp17661, i64 1
+  %tmp17663 = getelementptr inbounds float* %tmp17662, i64 1
+  %tmp17664 = getelementptr inbounds float* %tmp17663, i64 1
+  %tmp17665 = getelementptr inbounds float* %tmp17664, i64 1
+  %tmp17666 = getelementptr inbounds float* %tmp17665, i64 1
+  %tmp17667 = getelementptr inbounds float* %tmp17666, i64 1
+  %tmp17668 = getelementptr inbounds float* %tmp17667, i64 1
+  %tmp17669 = getelementptr inbounds float* %tmp17668, i64 1
+  %tmp17670 = getelementptr inbounds float* %tmp17669, i64 1
+  %tmp17671 = getelementptr inbounds float* %tmp17670, i64 1
+  %tmp17672 = getelementptr inbounds float* %tmp17671, i64 1
+  %tmp17673 = getelementptr inbounds float* %tmp17672, i64 1
+  %tmp17674 = getelementptr inbounds float* %tmp17673, i64 1
+  %tmp17675 = getelementptr inbounds float* %tmp17674, i64 1
+  %tmp17676 = getelementptr inbounds float* %tmp17675, i64 1
+  %tmp17677 = getelementptr inbounds float* %tmp17676, i64 1
+  %tmp17678 = getelementptr inbounds float* %tmp17677, i64 1
+  %tmp17679 = getelementptr inbounds float* %tmp17678, i64 1
+  %tmp17680 = getelementptr inbounds float* %tmp17679, i64 1
+  %tmp17681 = getelementptr inbounds float* %tmp17680, i64 1
+  %tmp17682 = getelementptr inbounds float* %tmp17681, i64 1
+  %tmp17683 = getelementptr inbounds float* %tmp17682, i64 1
+  %tmp17684 = getelementptr inbounds float* %tmp17683, i64 1
+  %tmp17685 = getelementptr inbounds float* %tmp17684, i64 1
+  %tmp17686 = getelementptr inbounds float* %tmp17685, i64 1
+  %tmp17687 = getelementptr inbounds float* %tmp17686, i64 1
+  %tmp17688 = getelementptr inbounds float* %tmp17687, i64 1
+  %tmp17689 = getelementptr inbounds float* %tmp17688, i64 1
+  %tmp17690 = getelementptr inbounds float* %tmp17689, i64 1
+  %tmp17691 = getelementptr inbounds float* %tmp17690, i64 1
+  %tmp17692 = getelementptr inbounds float* %tmp17691, i64 1
+  %tmp17693 = getelementptr inbounds float* %tmp17692, i64 1
+  %tmp17694 = getelementptr inbounds float* %tmp17693, i64 1
+  %tmp17695 = getelementptr inbounds float* %tmp17694, i64 1
+  %tmp17696 = getelementptr inbounds float* %tmp17695, i64 1
+  %tmp17697 = getelementptr inbounds float* %tmp17696, i64 1
+  %tmp17698 = getelementptr inbounds float* %tmp17697, i64 1
+  %tmp17699 = getelementptr inbounds float* %tmp17698, i64 1
+  %tmp17700 = getelementptr inbounds float* %tmp17699, i64 1
+  %tmp17701 = getelementptr inbounds float* %tmp17700, i64 1
+  %tmp17702 = getelementptr inbounds float* %tmp17701, i64 1
+  %tmp17703 = getelementptr inbounds float* %tmp17702, i64 1
+  %tmp17704 = getelementptr inbounds float* %tmp17703, i64 1
+  %tmp17705 = getelementptr inbounds float* %tmp17704, i64 1
+  %tmp17706 = getelementptr inbounds float* %tmp17705, i64 1
+  %tmp17707 = getelementptr inbounds float* %tmp17706, i64 1
+  %tmp17708 = getelementptr inbounds float* %tmp17707, i64 1
+  %tmp17709 = getelementptr inbounds float* %tmp17708, i64 1
+  %tmp17710 = getelementptr inbounds float* %tmp17709, i64 1
+  %tmp17711 = getelementptr inbounds float* %tmp17710, i64 1
+  %tmp17712 = getelementptr inbounds float* %tmp17711, i64 1
+  %tmp17713 = getelementptr inbounds float* %tmp17712, i64 1
+  %tmp17714 = getelementptr inbounds float* %tmp17713, i64 1
+  %tmp17715 = getelementptr inbounds float* %tmp17714, i64 1
+  %tmp17716 = getelementptr inbounds float* %tmp17715, i64 1
+  %tmp17717 = getelementptr inbounds float* %tmp17716, i64 1
+  %tmp17718 = getelementptr inbounds float* %tmp17717, i64 1
+  %tmp17719 = getelementptr inbounds float* %tmp17718, i64 1
+  %tmp17720 = getelementptr inbounds float* %tmp17719, i64 1
+  %tmp17721 = getelementptr inbounds float* %tmp17720, i64 1
+  %tmp17722 = getelementptr inbounds float* %tmp17721, i64 1
+  %tmp17723 = getelementptr inbounds float* %tmp17722, i64 1
+  %tmp17724 = getelementptr inbounds float* %tmp17723, i64 1
+  %tmp17725 = getelementptr inbounds float* %tmp17724, i64 1
+  %tmp17726 = getelementptr inbounds float* %tmp17725, i64 1
+  %tmp17727 = getelementptr inbounds float* %tmp17726, i64 1
+  %tmp17728 = getelementptr inbounds float* %tmp17727, i64 1
+  %tmp17729 = getelementptr inbounds float* %tmp17728, i64 1
+  %tmp17730 = getelementptr inbounds float* %tmp17729, i64 1
+  %tmp17731 = getelementptr inbounds float* %tmp17730, i64 1
+  %tmp17732 = getelementptr inbounds float* %tmp17731, i64 1
+  %tmp17733 = getelementptr inbounds float* %tmp17732, i64 1
+  %tmp17734 = getelementptr inbounds float* %tmp17733, i64 1
+  %tmp17735 = getelementptr inbounds float* %tmp17734, i64 1
+  %tmp17736 = getelementptr inbounds float* %tmp17735, i64 1
+  %tmp17737 = getelementptr inbounds float* %tmp17736, i64 1
+  %tmp17738 = getelementptr inbounds float* %tmp17737, i64 1
+  %tmp17739 = getelementptr inbounds float* %tmp17738, i64 1
+  %tmp17740 = getelementptr inbounds float* %tmp17739, i64 1
+  %tmp17741 = getelementptr inbounds float* %tmp17740, i64 1
+  %tmp17742 = getelementptr inbounds float* %tmp17741, i64 1
+  %tmp17743 = getelementptr inbounds float* %tmp17742, i64 1
+  %tmp17744 = getelementptr inbounds float* %tmp17743, i64 1
+  %tmp17745 = getelementptr inbounds float* %tmp17744, i64 1
+  %tmp17746 = getelementptr inbounds float* %tmp17745, i64 1
+  %tmp17747 = getelementptr inbounds float* %tmp17746, i64 1
+  %tmp17748 = getelementptr inbounds float* %tmp17747, i64 1
+  %tmp17749 = getelementptr inbounds float* %tmp17748, i64 1
+  %tmp17750 = getelementptr inbounds float* %tmp17749, i64 1
+  %tmp17751 = getelementptr inbounds float* %tmp17750, i64 1
+  %tmp17752 = getelementptr inbounds float* %tmp17751, i64 1
+  %tmp17753 = getelementptr inbounds float* %tmp17752, i64 1
+  %tmp17754 = getelementptr inbounds float* %tmp17753, i64 1
+  %tmp17755 = getelementptr inbounds float* %tmp17754, i64 1
+  %tmp17756 = getelementptr inbounds float* %tmp17755, i64 1
+  %tmp17757 = getelementptr inbounds float* %tmp17756, i64 1
+  %tmp17758 = getelementptr inbounds float* %tmp17757, i64 1
+  %tmp17759 = getelementptr inbounds float* %tmp17758, i64 1
+  %tmp17760 = getelementptr inbounds float* %tmp17759, i64 1
+  %tmp17761 = getelementptr inbounds float* %tmp17760, i64 1
+  %tmp17762 = getelementptr inbounds float* %tmp17761, i64 1
+  %tmp17763 = getelementptr inbounds float* %tmp17762, i64 1
+  %tmp17764 = getelementptr inbounds float* %tmp17763, i64 1
+  %tmp17765 = getelementptr inbounds float* %tmp17764, i64 1
+  %tmp17766 = getelementptr inbounds float* %tmp17765, i64 1
+  %tmp17767 = getelementptr inbounds float* %tmp17766, i64 1
+  %tmp17768 = getelementptr inbounds float* %tmp17767, i64 1
+  %tmp17769 = getelementptr inbounds float* %tmp17768, i64 1
+  %tmp17770 = getelementptr inbounds float* %tmp17769, i64 1
+  %tmp17771 = getelementptr inbounds float* %tmp17770, i64 1
+  %tmp17772 = getelementptr inbounds float* %tmp17771, i64 1
+  %tmp17773 = getelementptr inbounds float* %tmp17772, i64 1
+  %tmp17774 = getelementptr inbounds float* %tmp17773, i64 1
+  %tmp17775 = getelementptr inbounds float* %tmp17774, i64 1
+  %tmp17776 = getelementptr inbounds float* %tmp17775, i64 1
+  %tmp17777 = getelementptr inbounds float* %tmp17776, i64 1
+  %tmp17778 = getelementptr inbounds float* %tmp17777, i64 1
+  %tmp17779 = getelementptr inbounds float* %tmp17778, i64 1
+  %tmp17780 = getelementptr inbounds float* %tmp17779, i64 1
+  %tmp17781 = getelementptr inbounds float* %tmp17780, i64 1
+  %tmp17782 = getelementptr inbounds float* %tmp17781, i64 1
+  %tmp17783 = getelementptr inbounds float* %tmp17782, i64 1
+  %tmp17784 = getelementptr inbounds float* %tmp17783, i64 1
+  %tmp17785 = getelementptr inbounds float* %tmp17784, i64 1
+  %tmp17786 = getelementptr inbounds float* %tmp17785, i64 1
+  %tmp17787 = getelementptr inbounds float* %tmp17786, i64 1
+  %tmp17788 = getelementptr inbounds float* %tmp17787, i64 1
+  %tmp17789 = getelementptr inbounds float* %tmp17788, i64 1
+  %tmp17790 = getelementptr inbounds float* %tmp17789, i64 1
+  %tmp17791 = getelementptr inbounds float* %tmp17790, i64 1
+  %tmp17792 = getelementptr inbounds float* %tmp17791, i64 1
+  %tmp17793 = getelementptr inbounds float* %tmp17792, i64 1
+  %tmp17794 = getelementptr inbounds float* %tmp17793, i64 1
+  %tmp17795 = getelementptr inbounds float* %tmp17794, i64 1
+  %tmp17796 = getelementptr inbounds float* %tmp17795, i64 1
+  %tmp17797 = getelementptr inbounds float* %tmp17796, i64 1
+  %tmp17798 = getelementptr inbounds float* %tmp17797, i64 1
+  %tmp17799 = getelementptr inbounds float* %tmp17798, i64 1
+  %tmp17800 = getelementptr inbounds float* %tmp17799, i64 1
+  %tmp17801 = getelementptr inbounds float* %tmp17800, i64 1
+  %tmp17802 = getelementptr inbounds float* %tmp17801, i64 1
+  %tmp17803 = getelementptr inbounds float* %tmp17802, i64 1
+  %tmp17804 = getelementptr inbounds float* %tmp17803, i64 1
+  %tmp17805 = getelementptr inbounds float* %tmp17804, i64 1
+  %tmp17806 = getelementptr inbounds float* %tmp17805, i64 1
+  %tmp17807 = getelementptr inbounds float* %tmp17806, i64 1
+  %tmp17808 = getelementptr inbounds float* %tmp17807, i64 1
+  %tmp17809 = getelementptr inbounds float* %tmp17808, i64 1
+  %tmp17810 = getelementptr inbounds float* %tmp17809, i64 1
+  %tmp17811 = getelementptr inbounds float* %tmp17810, i64 1
+  %tmp17812 = getelementptr inbounds float* %tmp17811, i64 1
+  %tmp17813 = getelementptr inbounds float* %tmp17812, i64 1
+  %tmp17814 = getelementptr inbounds float* %tmp17813, i64 1
+  %tmp17815 = getelementptr inbounds float* %tmp17814, i64 1
+  %tmp17816 = getelementptr inbounds float* %tmp17815, i64 1
+  %tmp17817 = getelementptr inbounds float* %tmp17816, i64 1
+  %tmp17818 = getelementptr inbounds float* %tmp17817, i64 1
+  %tmp17819 = getelementptr inbounds float* %tmp17818, i64 1
+  %tmp17820 = getelementptr inbounds float* %tmp17819, i64 1
+  %tmp17821 = getelementptr inbounds float* %tmp17820, i64 1
+  %tmp17822 = getelementptr inbounds float* %tmp17821, i64 1
+  %tmp17823 = getelementptr inbounds float* %tmp17822, i64 1
+  %tmp17824 = getelementptr inbounds float* %tmp17823, i64 1
+  %tmp17825 = getelementptr inbounds float* %tmp17824, i64 1
+  %tmp17826 = getelementptr inbounds float* %tmp17825, i64 1
+  %tmp17827 = getelementptr inbounds float* %tmp17826, i64 1
+  %tmp17828 = getelementptr inbounds float* %tmp17827, i64 1
+  %tmp17829 = getelementptr inbounds float* %tmp17828, i64 1
+  %tmp17830 = getelementptr inbounds float* %tmp17829, i64 1
+  %tmp17831 = getelementptr inbounds float* %tmp17830, i64 1
+  %tmp17832 = getelementptr inbounds float* %tmp17831, i64 1
+  %tmp17833 = getelementptr inbounds float* %tmp17832, i64 1
+  %tmp17834 = getelementptr inbounds float* %tmp17833, i64 1
+  %tmp17835 = getelementptr inbounds float* %tmp17834, i64 1
+  %tmp17836 = getelementptr inbounds float* %tmp17835, i64 1
+  %tmp17837 = getelementptr inbounds float* %tmp17836, i64 1
+  %tmp17838 = getelementptr inbounds float* %tmp17837, i64 1
+  %tmp17839 = getelementptr inbounds float* %tmp17838, i64 1
+  %tmp17840 = getelementptr inbounds float* %tmp17839, i64 1
+  %tmp17841 = getelementptr inbounds float* %tmp17840, i64 1
+  %tmp17842 = getelementptr inbounds float* %tmp17841, i64 1
+  %tmp17843 = getelementptr inbounds float* %tmp17842, i64 1
+  %tmp17844 = getelementptr inbounds float* %tmp17843, i64 1
+  %tmp17845 = getelementptr inbounds float* %tmp17844, i64 1
+  %tmp17846 = getelementptr inbounds float* %tmp17845, i64 1
+  %tmp17847 = getelementptr inbounds float* %tmp17846, i64 1
+  %tmp17848 = getelementptr inbounds float* %tmp17847, i64 1
+  %tmp17849 = getelementptr inbounds float* %tmp17848, i64 1
+  %tmp17850 = getelementptr inbounds float* %tmp17849, i64 1
+  %tmp17851 = getelementptr inbounds float* %tmp17850, i64 1
+  %tmp17852 = getelementptr inbounds float* %tmp17851, i64 1
+  %tmp17853 = getelementptr inbounds float* %tmp17852, i64 1
+  %tmp17854 = getelementptr inbounds float* %tmp17853, i64 1
+  %tmp17855 = getelementptr inbounds float* %tmp17854, i64 1
+  %tmp17856 = getelementptr inbounds float* %tmp17855, i64 1
+  %tmp17857 = getelementptr inbounds float* %tmp17856, i64 1
+  %tmp17858 = getelementptr inbounds float* %tmp17857, i64 1
+  %tmp17859 = getelementptr inbounds float* %tmp17858, i64 1
+  %tmp17860 = getelementptr inbounds float* %tmp17859, i64 1
+  %tmp17861 = getelementptr inbounds float* %tmp17860, i64 1
+  %tmp17862 = getelementptr inbounds float* %tmp17861, i64 1
+  %tmp17863 = getelementptr inbounds float* %tmp17862, i64 1
+  %tmp17864 = getelementptr inbounds float* %tmp17863, i64 1
+  %tmp17865 = getelementptr inbounds float* %tmp17864, i64 1
+  %tmp17866 = getelementptr inbounds float* %tmp17865, i64 1
+  %tmp17867 = getelementptr inbounds float* %tmp17866, i64 1
+  %tmp17868 = getelementptr inbounds float* %tmp17867, i64 1
+  %tmp17869 = getelementptr inbounds float* %tmp17868, i64 1
+  %tmp17870 = getelementptr inbounds float* %tmp17869, i64 1
+  %tmp17871 = getelementptr inbounds float* %tmp17870, i64 1
+  %tmp17872 = getelementptr inbounds float* %tmp17871, i64 1
+  %tmp17873 = getelementptr inbounds float* %tmp17872, i64 1
+  %tmp17874 = getelementptr inbounds float* %tmp17873, i64 1
+  %tmp17875 = getelementptr inbounds float* %tmp17874, i64 1
+  %tmp17876 = getelementptr inbounds float* %tmp17875, i64 1
+  %tmp17877 = getelementptr inbounds float* %tmp17876, i64 1
+  %tmp17878 = getelementptr inbounds float* %tmp17877, i64 1
+  %tmp17879 = getelementptr inbounds float* %tmp17878, i64 1
+  %tmp17880 = getelementptr inbounds float* %tmp17879, i64 1
+  %tmp17881 = getelementptr inbounds float* %tmp17880, i64 1
+  %tmp17882 = getelementptr inbounds float* %tmp17881, i64 1
+  %tmp17883 = getelementptr inbounds float* %tmp17882, i64 1
+  %tmp17884 = getelementptr inbounds float* %tmp17883, i64 1
+  %tmp17885 = getelementptr inbounds float* %tmp17884, i64 1
+  %tmp17886 = getelementptr inbounds float* %tmp17885, i64 1
+  %tmp17887 = getelementptr inbounds float* %tmp17886, i64 1
+  %tmp17888 = getelementptr inbounds float* %tmp17887, i64 1
+  %tmp17889 = getelementptr inbounds float* %tmp17888, i64 1
+  %tmp17890 = getelementptr inbounds float* %tmp17889, i64 1
+  %tmp17891 = getelementptr inbounds float* %tmp17890, i64 1
+  %tmp17892 = getelementptr inbounds float* %tmp17891, i64 1
+  %tmp17893 = getelementptr inbounds float* %tmp17892, i64 1
+  %tmp17894 = getelementptr inbounds float* %tmp17893, i64 1
+  %tmp17895 = getelementptr inbounds float* %tmp17894, i64 1
+  %tmp17896 = getelementptr inbounds float* %tmp17895, i64 1
+  %tmp17897 = getelementptr inbounds float* %tmp17896, i64 1
+  %tmp17898 = getelementptr inbounds float* %tmp17897, i64 1
+  %tmp17899 = getelementptr inbounds float* %tmp17898, i64 1
+  %tmp17900 = getelementptr inbounds float* %tmp17899, i64 1
+  %tmp17901 = getelementptr inbounds float* %tmp17900, i64 1
+  %tmp17902 = getelementptr inbounds float* %tmp17901, i64 1
+  %tmp17903 = getelementptr inbounds float* %tmp17902, i64 1
+  %tmp17904 = getelementptr inbounds float* %tmp17903, i64 1
+  %tmp17905 = getelementptr inbounds float* %tmp17904, i64 1
+  %tmp17906 = getelementptr inbounds float* %tmp17905, i64 1
+  %tmp17907 = getelementptr inbounds float* %tmp17906, i64 1
+  %tmp17908 = getelementptr inbounds float* %tmp17907, i64 1
+  %tmp17909 = getelementptr inbounds float* %tmp17908, i64 1
+  %tmp17910 = getelementptr inbounds float* %tmp17909, i64 1
+  %tmp17911 = getelementptr inbounds float* %tmp17910, i64 1
+  %tmp17912 = getelementptr inbounds float* %tmp17911, i64 1
+  %tmp17913 = getelementptr inbounds float* %tmp17912, i64 1
+  %tmp17914 = getelementptr inbounds float* %tmp17913, i64 1
+  %tmp17915 = getelementptr inbounds float* %tmp17914, i64 1
+  %tmp17916 = getelementptr inbounds float* %tmp17915, i64 1
+  %tmp17917 = getelementptr inbounds float* %tmp17916, i64 1
+  %tmp17918 = getelementptr inbounds float* %tmp17917, i64 1
+  %tmp17919 = getelementptr inbounds float* %tmp17918, i64 1
+  %tmp17920 = getelementptr inbounds float* %tmp17919, i64 1
+  %tmp17921 = getelementptr inbounds float* %tmp17920, i64 1
+  %tmp17922 = getelementptr inbounds float* %tmp17921, i64 1
+  %tmp17923 = getelementptr inbounds float* %tmp17922, i64 1
+  %tmp17924 = getelementptr inbounds float* %tmp17923, i64 1
+  %tmp17925 = getelementptr inbounds float* %tmp17924, i64 1
+  %tmp17926 = getelementptr inbounds float* %tmp17925, i64 1
+  %tmp17927 = getelementptr inbounds float* %tmp17926, i64 1
+  %tmp17928 = getelementptr inbounds float* %tmp17927, i64 1
+  %tmp17929 = getelementptr inbounds float* %tmp17928, i64 1
+  %tmp17930 = getelementptr inbounds float* %tmp17929, i64 1
+  %tmp17931 = getelementptr inbounds float* %tmp17930, i64 1
+  %tmp17932 = getelementptr inbounds float* %tmp17931, i64 1
+  %tmp17933 = getelementptr inbounds float* %tmp17932, i64 1
+  %tmp17934 = getelementptr inbounds float* %tmp17933, i64 1
+  %tmp17935 = getelementptr inbounds float* %tmp17934, i64 1
+  %tmp17936 = getelementptr inbounds float* %tmp17935, i64 1
+  %tmp17937 = getelementptr inbounds float* %tmp17936, i64 1
+  %tmp17938 = getelementptr inbounds float* %tmp17937, i64 1
+  %tmp17939 = getelementptr inbounds float* %tmp17938, i64 1
+  %tmp17940 = getelementptr inbounds float* %tmp17939, i64 1
+  %tmp17941 = getelementptr inbounds float* %tmp17940, i64 1
+  %tmp17942 = getelementptr inbounds float* %tmp17941, i64 1
+  %tmp17943 = getelementptr inbounds float* %tmp17942, i64 1
+  %tmp17944 = getelementptr inbounds float* %tmp17943, i64 1
+  %tmp17945 = getelementptr inbounds float* %tmp17944, i64 1
+  %tmp17946 = getelementptr inbounds float* %tmp17945, i64 1
+  %tmp17947 = getelementptr inbounds float* %tmp17946, i64 1
+  %tmp17948 = getelementptr inbounds float* %tmp17947, i64 1
+  %tmp17949 = getelementptr inbounds float* %tmp17948, i64 1
+  %tmp17950 = getelementptr inbounds float* %tmp17949, i64 1
+  %tmp17951 = getelementptr inbounds float* %tmp17950, i64 1
+  %tmp17952 = getelementptr inbounds float* %tmp17951, i64 1
+  %tmp17953 = getelementptr inbounds float* %tmp17952, i64 1
+  %tmp17954 = getelementptr inbounds float* %tmp17953, i64 1
+  %tmp17955 = getelementptr inbounds float* %tmp17954, i64 1
+  %tmp17956 = getelementptr inbounds float* %tmp17955, i64 1
+  %tmp17957 = getelementptr inbounds float* %tmp17956, i64 1
+  %tmp17958 = getelementptr inbounds float* %tmp17957, i64 1
+  %tmp17959 = getelementptr inbounds float* %tmp17958, i64 1
+  %tmp17960 = getelementptr inbounds float* %tmp17959, i64 1
+  %tmp17961 = getelementptr inbounds float* %tmp17960, i64 1
+  %tmp17962 = getelementptr inbounds float* %tmp17961, i64 1
+  %tmp17963 = getelementptr inbounds float* %tmp17962, i64 1
+  %tmp17964 = getelementptr inbounds float* %tmp17963, i64 1
+  %tmp17965 = getelementptr inbounds float* %tmp17964, i64 1
+  %tmp17966 = getelementptr inbounds float* %tmp17965, i64 1
+  %tmp17967 = getelementptr inbounds float* %tmp17966, i64 1
+  %tmp17968 = getelementptr inbounds float* %tmp17967, i64 1
+  %tmp17969 = getelementptr inbounds float* %tmp17968, i64 1
+  %tmp17970 = getelementptr inbounds float* %tmp17969, i64 1
+  %tmp17971 = getelementptr inbounds float* %tmp17970, i64 1
+  %tmp17972 = getelementptr inbounds float* %tmp17971, i64 1
+  %tmp17973 = getelementptr inbounds float* %tmp17972, i64 1
+  %tmp17974 = getelementptr inbounds float* %tmp17973, i64 1
+  %tmp17975 = getelementptr inbounds float* %tmp17974, i64 1
+  %tmp17976 = getelementptr inbounds float* %tmp17975, i64 1
+  %tmp17977 = getelementptr inbounds float* %tmp17976, i64 1
+  %tmp17978 = getelementptr inbounds float* %tmp17977, i64 1
+  %tmp17979 = getelementptr inbounds float* %tmp17978, i64 1
+  %tmp17980 = getelementptr inbounds float* %tmp17979, i64 1
+  %tmp17981 = getelementptr inbounds float* %tmp17980, i64 1
+  %tmp17982 = getelementptr inbounds float* %tmp17981, i64 1
+  %tmp17983 = getelementptr inbounds float* %tmp17982, i64 1
+  %tmp17984 = getelementptr inbounds float* %tmp17983, i64 1
+  %tmp17985 = getelementptr inbounds float* %tmp17984, i64 1
+  %tmp17986 = getelementptr inbounds float* %tmp17985, i64 1
+  %tmp17987 = getelementptr inbounds float* %tmp17986, i64 1
+  %tmp17988 = getelementptr inbounds float* %tmp17987, i64 1
+  %tmp17989 = getelementptr inbounds float* %tmp17988, i64 1
+  %tmp17990 = getelementptr inbounds float* %tmp17989, i64 1
+  %tmp17991 = getelementptr inbounds float* %tmp17990, i64 1
+  %tmp17992 = getelementptr inbounds float* %tmp17991, i64 1
+  %tmp17993 = getelementptr inbounds float* %tmp17992, i64 1
+  %tmp17994 = getelementptr inbounds float* %tmp17993, i64 1
+  %tmp17995 = getelementptr inbounds float* %tmp17994, i64 1
+  %tmp17996 = getelementptr inbounds float* %tmp17995, i64 1
+  %tmp17997 = getelementptr inbounds float* %tmp17996, i64 1
+  %tmp17998 = getelementptr inbounds float* %tmp17997, i64 1
+  %tmp17999 = getelementptr inbounds float* %tmp17998, i64 1
+  %tmp18000 = getelementptr inbounds float* %tmp17999, i64 1
+  %tmp18001 = getelementptr inbounds float* %tmp18000, i64 1
+  %tmp18002 = getelementptr inbounds float* %tmp18001, i64 1
+  %tmp18003 = getelementptr inbounds float* %tmp18002, i64 1
+  %tmp18004 = getelementptr inbounds float* %tmp18003, i64 1
+  %tmp18005 = getelementptr inbounds float* %tmp18004, i64 1
+  %tmp18006 = getelementptr inbounds float* %tmp18005, i64 1
+  %tmp18007 = getelementptr inbounds float* %tmp18006, i64 1
+  %tmp18008 = getelementptr inbounds float* %tmp18007, i64 1
+  %tmp18009 = getelementptr inbounds float* %tmp18008, i64 1
+  %tmp18010 = getelementptr inbounds float* %tmp18009, i64 1
+  %tmp18011 = getelementptr inbounds float* %tmp18010, i64 1
+  %tmp18012 = getelementptr inbounds float* %tmp18011, i64 1
+  %tmp18013 = getelementptr inbounds float* %tmp18012, i64 1
+  %tmp18014 = getelementptr inbounds float* %tmp18013, i64 1
+  %tmp18015 = getelementptr inbounds float* %tmp18014, i64 1
+  %tmp18016 = getelementptr inbounds float* %tmp18015, i64 1
+  %tmp18017 = getelementptr inbounds float* %tmp18016, i64 1
+  %tmp18018 = getelementptr inbounds float* %tmp18017, i64 1
+  %tmp18019 = getelementptr inbounds float* %tmp18018, i64 1
+  %tmp18020 = getelementptr inbounds float* %tmp18019, i64 1
+  %tmp18021 = getelementptr inbounds float* %tmp18020, i64 1
+  %tmp18022 = getelementptr inbounds float* %tmp18021, i64 1
+  %tmp18023 = getelementptr inbounds float* %tmp18022, i64 1
+  %tmp18024 = getelementptr inbounds float* %tmp18023, i64 1
+  %tmp18025 = getelementptr inbounds float* %tmp18024, i64 1
+  %tmp18026 = getelementptr inbounds float* %tmp18025, i64 1
+  %tmp18027 = getelementptr inbounds float* %tmp18026, i64 1
+  %tmp18028 = getelementptr inbounds float* %tmp18027, i64 1
+  %tmp18029 = getelementptr inbounds float* %tmp18028, i64 1
+  %tmp18030 = getelementptr inbounds float* %tmp18029, i64 1
+  %tmp18031 = getelementptr inbounds float* %tmp18030, i64 1
+  %tmp18032 = getelementptr inbounds float* %tmp18031, i64 1
+  %tmp18033 = getelementptr inbounds float* %tmp18032, i64 1
+  %tmp18034 = getelementptr inbounds float* %tmp18033, i64 1
+  %tmp18035 = getelementptr inbounds float* %tmp18034, i64 1
+  %tmp18036 = getelementptr inbounds float* %tmp18035, i64 1
+  %tmp18037 = getelementptr inbounds float* %tmp18036, i64 1
+  %tmp18038 = getelementptr inbounds float* %tmp18037, i64 1
+  %tmp18039 = getelementptr inbounds float* %tmp18038, i64 1
+  %tmp18040 = getelementptr inbounds float* %tmp18039, i64 1
+  %tmp18041 = getelementptr inbounds float* %tmp18040, i64 1
+  %tmp18042 = getelementptr inbounds float* %tmp18041, i64 1
+  %tmp18043 = getelementptr inbounds float* %tmp18042, i64 1
+  %tmp18044 = getelementptr inbounds float* %tmp18043, i64 1
+  %tmp18045 = getelementptr inbounds float* %tmp18044, i64 1
+  %tmp18046 = getelementptr inbounds float* %tmp18045, i64 1
+  %tmp18047 = getelementptr inbounds float* %tmp18046, i64 1
+  %tmp18048 = getelementptr inbounds float* %tmp18047, i64 1
+  %tmp18049 = getelementptr inbounds float* %tmp18048, i64 1
+  %tmp18050 = getelementptr inbounds float* %tmp18049, i64 1
+  %tmp18051 = getelementptr inbounds float* %tmp18050, i64 1
+  %tmp18052 = getelementptr inbounds float* %tmp18051, i64 1
+  %tmp18053 = getelementptr inbounds float* %tmp18052, i64 1
+  %tmp18054 = getelementptr inbounds float* %tmp18053, i64 1
+  %tmp18055 = getelementptr inbounds float* %tmp18054, i64 1
+  %tmp18056 = getelementptr inbounds float* %tmp18055, i64 1
+  %tmp18057 = getelementptr inbounds float* %tmp18056, i64 1
+  %tmp18058 = getelementptr inbounds float* %tmp18057, i64 1
+  %tmp18059 = getelementptr inbounds float* %tmp18058, i64 1
+  %tmp18060 = getelementptr inbounds float* %tmp18059, i64 1
+  %tmp18061 = getelementptr inbounds float* %tmp18060, i64 1
+  %tmp18062 = getelementptr inbounds float* %tmp18061, i64 1
+  %tmp18063 = getelementptr inbounds float* %tmp18062, i64 1
+  %tmp18064 = getelementptr inbounds float* %tmp18063, i64 1
+  %tmp18065 = getelementptr inbounds float* %tmp18064, i64 1
+  %tmp18066 = getelementptr inbounds float* %tmp18065, i64 1
+  %tmp18067 = getelementptr inbounds float* %tmp18066, i64 1
+  %tmp18068 = getelementptr inbounds float* %tmp18067, i64 1
+  %tmp18069 = getelementptr inbounds float* %tmp18068, i64 1
+  %tmp18070 = getelementptr inbounds float* %tmp18069, i64 1
+  %tmp18071 = getelementptr inbounds float* %tmp18070, i64 1
+  %tmp18072 = getelementptr inbounds float* %tmp18071, i64 1
+  %tmp18073 = getelementptr inbounds float* %tmp18072, i64 1
+  %tmp18074 = getelementptr inbounds float* %tmp18073, i64 1
+  %tmp18075 = getelementptr inbounds float* %tmp18074, i64 1
+  %tmp18076 = getelementptr inbounds float* %tmp18075, i64 1
+  %tmp18077 = getelementptr inbounds float* %tmp18076, i64 1
+  %tmp18078 = getelementptr inbounds float* %tmp18077, i64 1
+  %tmp18079 = getelementptr inbounds float* %tmp18078, i64 1
+  %tmp18080 = getelementptr inbounds float* %tmp18079, i64 1
+  %tmp18081 = getelementptr inbounds float* %tmp18080, i64 1
+  %tmp18082 = getelementptr inbounds float* %tmp18081, i64 1
+  %tmp18083 = getelementptr inbounds float* %tmp18082, i64 1
+  %tmp18084 = getelementptr inbounds float* %tmp18083, i64 1
+  %tmp18085 = getelementptr inbounds float* %tmp18084, i64 1
+  %tmp18086 = getelementptr inbounds float* %tmp18085, i64 1
+  %tmp18087 = getelementptr inbounds float* %tmp18086, i64 1
+  %tmp18088 = getelementptr inbounds float* %tmp18087, i64 1
+  %tmp18089 = getelementptr inbounds float* %tmp18088, i64 1
+  %tmp18090 = getelementptr inbounds float* %tmp18089, i64 1
+  %tmp18091 = getelementptr inbounds float* %tmp18090, i64 1
+  %tmp18092 = getelementptr inbounds float* %tmp18091, i64 1
+  %tmp18093 = getelementptr inbounds float* %tmp18092, i64 1
+  %tmp18094 = getelementptr inbounds float* %tmp18093, i64 1
+  %tmp18095 = getelementptr inbounds float* %tmp18094, i64 1
+  %tmp18096 = getelementptr inbounds float* %tmp18095, i64 1
+  %tmp18097 = getelementptr inbounds float* %tmp18096, i64 1
+  %tmp18098 = getelementptr inbounds float* %tmp18097, i64 1
+  %tmp18099 = getelementptr inbounds float* %tmp18098, i64 1
+  %tmp18100 = getelementptr inbounds float* %tmp18099, i64 1
+  %tmp18101 = getelementptr inbounds float* %tmp18100, i64 1
+  %tmp18102 = getelementptr inbounds float* %tmp18101, i64 1
+  %tmp18103 = getelementptr inbounds float* %tmp18102, i64 1
+  %tmp18104 = getelementptr inbounds float* %tmp18103, i64 1
+  %tmp18105 = getelementptr inbounds float* %tmp18104, i64 1
+  %tmp18106 = getelementptr inbounds float* %tmp18105, i64 1
+  %tmp18107 = getelementptr inbounds float* %tmp18106, i64 1
+  %tmp18108 = getelementptr inbounds float* %tmp18107, i64 1
+  %tmp18109 = getelementptr inbounds float* %tmp18108, i64 1
+  %tmp18110 = getelementptr inbounds float* %tmp18109, i64 1
+  %tmp18111 = getelementptr inbounds float* %tmp18110, i64 1
+  %tmp18112 = getelementptr inbounds float* %tmp18111, i64 1
+  %tmp18113 = getelementptr inbounds float* %tmp18112, i64 1
+  %tmp18114 = getelementptr inbounds float* %tmp18113, i64 1
+  %tmp18115 = getelementptr inbounds float* %tmp18114, i64 1
+  %tmp18116 = getelementptr inbounds float* %tmp18115, i64 1
+  %tmp18117 = getelementptr inbounds float* %tmp18116, i64 1
+  %tmp18118 = getelementptr inbounds float* %tmp18117, i64 1
+  %tmp18119 = getelementptr inbounds float* %tmp18118, i64 1
+  %tmp18120 = getelementptr inbounds float* %tmp18119, i64 1
+  %tmp18121 = getelementptr inbounds float* %tmp18120, i64 1
+  %tmp18122 = getelementptr inbounds float* %tmp18121, i64 1
+  %tmp18123 = getelementptr inbounds float* %tmp18122, i64 1
+  %tmp18124 = getelementptr inbounds float* %tmp18123, i64 1
+  %tmp18125 = getelementptr inbounds float* %tmp18124, i64 1
+  %tmp18126 = getelementptr inbounds float* %tmp18125, i64 1
+  %tmp18127 = getelementptr inbounds float* %tmp18126, i64 1
+  %tmp18128 = getelementptr inbounds float* %tmp18127, i64 1
+  %tmp18129 = getelementptr inbounds float* %tmp18128, i64 1
+  %tmp18130 = getelementptr inbounds float* %tmp18129, i64 1
+  %tmp18131 = getelementptr inbounds float* %tmp18130, i64 1
+  %tmp18132 = getelementptr inbounds float* %tmp18131, i64 1
+  %tmp18133 = getelementptr inbounds float* %tmp18132, i64 1
+  %tmp18134 = getelementptr inbounds float* %tmp18133, i64 1
+  %tmp18135 = getelementptr inbounds float* %tmp18134, i64 1
+  %tmp18136 = getelementptr inbounds float* %tmp18135, i64 1
+  %tmp18137 = getelementptr inbounds float* %tmp18136, i64 1
+  %tmp18138 = getelementptr inbounds float* %tmp18137, i64 1
+  %tmp18139 = getelementptr inbounds float* %tmp18138, i64 1
+  %tmp18140 = getelementptr inbounds float* %tmp18139, i64 1
+  %tmp18141 = getelementptr inbounds float* %tmp18140, i64 1
+  %tmp18142 = getelementptr inbounds float* %tmp18141, i64 1
+  %tmp18143 = getelementptr inbounds float* %tmp18142, i64 1
+  %tmp18144 = getelementptr inbounds float* %tmp18143, i64 1
+  %tmp18145 = getelementptr inbounds float* %tmp18144, i64 1
+  %tmp18146 = getelementptr inbounds float* %tmp18145, i64 1
+  %tmp18147 = getelementptr inbounds float* %tmp18146, i64 1
+  %tmp18148 = getelementptr inbounds float* %tmp18147, i64 1
+  %tmp18149 = getelementptr inbounds float* %tmp18148, i64 1
+  %tmp18150 = getelementptr inbounds float* %tmp18149, i64 1
+  %tmp18151 = getelementptr inbounds float* %tmp18150, i64 1
+  %tmp18152 = getelementptr inbounds float* %tmp18151, i64 1
+  %tmp18153 = getelementptr inbounds float* %tmp18152, i64 1
+  %tmp18154 = getelementptr inbounds float* %tmp18153, i64 1
+  %tmp18155 = getelementptr inbounds float* %tmp18154, i64 1
+  %tmp18156 = getelementptr inbounds float* %tmp18155, i64 1
+  %tmp18157 = getelementptr inbounds float* %tmp18156, i64 1
+  %tmp18158 = getelementptr inbounds float* %tmp18157, i64 1
+  %tmp18159 = getelementptr inbounds float* %tmp18158, i64 1
+  %tmp18160 = getelementptr inbounds float* %tmp18159, i64 1
+  %tmp18161 = getelementptr inbounds float* %tmp18160, i64 1
+  %tmp18162 = getelementptr inbounds float* %tmp18161, i64 1
+  %tmp18163 = getelementptr inbounds float* %tmp18162, i64 1
+  %tmp18164 = getelementptr inbounds float* %tmp18163, i64 1
+  %tmp18165 = getelementptr inbounds float* %tmp18164, i64 1
+  %tmp18166 = getelementptr inbounds float* %tmp18165, i64 1
+  %tmp18167 = getelementptr inbounds float* %tmp18166, i64 1
+  %tmp18168 = getelementptr inbounds float* %tmp18167, i64 1
+  %tmp18169 = getelementptr inbounds float* %tmp18168, i64 1
+  %tmp18170 = getelementptr inbounds float* %tmp18169, i64 1
+  %tmp18171 = getelementptr inbounds float* %tmp18170, i64 1
+  %tmp18172 = getelementptr inbounds float* %tmp18171, i64 1
+  %tmp18173 = getelementptr inbounds float* %tmp18172, i64 1
+  %tmp18174 = getelementptr inbounds float* %tmp18173, i64 1
+  %tmp18175 = getelementptr inbounds float* %tmp18174, i64 1
+  %tmp18176 = getelementptr inbounds float* %tmp18175, i64 1
+  %tmp18177 = getelementptr inbounds float* %tmp18176, i64 1
+  %tmp18178 = getelementptr inbounds float* %tmp18177, i64 1
+  %tmp18179 = getelementptr inbounds float* %tmp18178, i64 1
+  %tmp18180 = getelementptr inbounds float* %tmp18179, i64 1
+  %tmp18181 = getelementptr inbounds float* %tmp18180, i64 1
+  %tmp18182 = getelementptr inbounds float* %tmp18181, i64 1
+  %tmp18183 = getelementptr inbounds float* %tmp18182, i64 1
+  %tmp18184 = getelementptr inbounds float* %tmp18183, i64 1
+  %tmp18185 = getelementptr inbounds float* %tmp18184, i64 1
+  %tmp18186 = getelementptr inbounds float* %tmp18185, i64 1
+  %tmp18187 = getelementptr inbounds float* %tmp18186, i64 1
+  %tmp18188 = getelementptr inbounds float* %tmp18187, i64 1
+  %tmp18189 = getelementptr inbounds float* %tmp18188, i64 1
+  %tmp18190 = getelementptr inbounds float* %tmp18189, i64 1
+  %tmp18191 = getelementptr inbounds float* %tmp18190, i64 1
+  %tmp18192 = getelementptr inbounds float* %tmp18191, i64 1
+  %tmp18193 = getelementptr inbounds float* %tmp18192, i64 1
+  %tmp18194 = getelementptr inbounds float* %tmp18193, i64 1
+  %tmp18195 = getelementptr inbounds float* %tmp18194, i64 1
+  %tmp18196 = getelementptr inbounds float* %tmp18195, i64 1
+  %tmp18197 = getelementptr inbounds float* %tmp18196, i64 1
+  %tmp18198 = getelementptr inbounds float* %tmp18197, i64 1
+  %tmp18199 = getelementptr inbounds float* %tmp18198, i64 1
+  %tmp18200 = getelementptr inbounds float* %tmp18199, i64 1
+  %tmp18201 = getelementptr inbounds float* %tmp18200, i64 1
+  %tmp18202 = getelementptr inbounds float* %tmp18201, i64 1
+  %tmp18203 = getelementptr inbounds float* %tmp18202, i64 1
+  %tmp18204 = getelementptr inbounds float* %tmp18203, i64 1
+  %tmp18205 = getelementptr inbounds float* %tmp18204, i64 1
+  %tmp18206 = getelementptr inbounds float* %tmp18205, i64 1
+  %tmp18207 = getelementptr inbounds float* %tmp18206, i64 1
+  %tmp18208 = getelementptr inbounds float* %tmp18207, i64 1
+  %tmp18209 = getelementptr inbounds float* %tmp18208, i64 1
+  %tmp18210 = getelementptr inbounds float* %tmp18209, i64 1
+  %tmp18211 = getelementptr inbounds float* %tmp18210, i64 1
+  %tmp18212 = getelementptr inbounds float* %tmp18211, i64 1
+  %tmp18213 = getelementptr inbounds float* %tmp18212, i64 1
+  %tmp18214 = getelementptr inbounds float* %tmp18213, i64 1
+  %tmp18215 = getelementptr inbounds float* %tmp18214, i64 1
+  %tmp18216 = getelementptr inbounds float* %tmp18215, i64 1
+  %tmp18217 = getelementptr inbounds float* %tmp18216, i64 1
+  %tmp18218 = getelementptr inbounds float* %tmp18217, i64 1
+  %tmp18219 = getelementptr inbounds float* %tmp18218, i64 1
+  %tmp18220 = getelementptr inbounds float* %tmp18219, i64 1
+  %tmp18221 = getelementptr inbounds float* %tmp18220, i64 1
+  %tmp18222 = getelementptr inbounds float* %tmp18221, i64 1
+  %tmp18223 = getelementptr inbounds float* %tmp18222, i64 1
+  %tmp18224 = getelementptr inbounds float* %tmp18223, i64 1
+  %tmp18225 = getelementptr inbounds float* %tmp18224, i64 1
+  %tmp18226 = getelementptr inbounds float* %tmp18225, i64 1
+  %tmp18227 = getelementptr inbounds float* %tmp18226, i64 1
+  %tmp18228 = getelementptr inbounds float* %tmp18227, i64 1
+  %tmp18229 = getelementptr inbounds float* %tmp18228, i64 1
+  %tmp18230 = getelementptr inbounds float* %tmp18229, i64 1
+  %tmp18231 = getelementptr inbounds float* %tmp18230, i64 1
+  %tmp18232 = getelementptr inbounds float* %tmp18231, i64 1
+  %tmp18233 = getelementptr inbounds float* %tmp18232, i64 1
+  %tmp18234 = getelementptr inbounds float* %tmp18233, i64 1
+  %tmp18235 = getelementptr inbounds float* %tmp18234, i64 1
+  %tmp18236 = getelementptr inbounds float* %tmp18235, i64 1
+  %tmp18237 = getelementptr inbounds float* %tmp18236, i64 1
+  %tmp18238 = getelementptr inbounds float* %tmp18237, i64 1
+  %tmp18239 = getelementptr inbounds float* %tmp18238, i64 1
+  %tmp18240 = getelementptr inbounds float* %tmp18239, i64 1
+  %tmp18241 = getelementptr inbounds float* %tmp18240, i64 1
+  %tmp18242 = getelementptr inbounds float* %tmp18241, i64 1
+  %tmp18243 = getelementptr inbounds float* %tmp18242, i64 1
+  %tmp18244 = getelementptr inbounds float* %tmp18243, i64 1
+  %tmp18245 = getelementptr inbounds float* %tmp18244, i64 1
+  %tmp18246 = getelementptr inbounds float* %tmp18245, i64 1
+  %tmp18247 = getelementptr inbounds float* %tmp18246, i64 1
+  %tmp18248 = getelementptr inbounds float* %tmp18247, i64 1
+  %tmp18249 = getelementptr inbounds float* %tmp18248, i64 1
+  %tmp18250 = getelementptr inbounds float* %tmp18249, i64 1
+  %tmp18251 = getelementptr inbounds float* %tmp18250, i64 1
+  %tmp18252 = getelementptr inbounds float* %tmp18251, i64 1
+  %tmp18253 = getelementptr inbounds float* %tmp18252, i64 1
+  %tmp18254 = getelementptr inbounds float* %tmp18253, i64 1
+  %tmp18255 = getelementptr inbounds float* %tmp18254, i64 1
+  %tmp18256 = getelementptr inbounds float* %tmp18255, i64 1
+  %tmp18257 = getelementptr inbounds float* %tmp18256, i64 1
+  %tmp18258 = getelementptr inbounds float* %tmp18257, i64 1
+  %tmp18259 = getelementptr inbounds float* %tmp18258, i64 1
+  %tmp18260 = getelementptr inbounds float* %tmp18259, i64 1
+  %tmp18261 = getelementptr inbounds float* %tmp18260, i64 1
+  %tmp18262 = getelementptr inbounds float* %tmp18261, i64 1
+  %tmp18263 = getelementptr inbounds float* %tmp18262, i64 1
+  %tmp18264 = getelementptr inbounds float* %tmp18263, i64 1
+  %tmp18265 = getelementptr inbounds float* %tmp18264, i64 1
+  %tmp18266 = getelementptr inbounds float* %tmp18265, i64 1
+  %tmp18267 = getelementptr inbounds float* %tmp18266, i64 1
+  %tmp18268 = getelementptr inbounds float* %tmp18267, i64 1
+  %tmp18269 = getelementptr inbounds float* %tmp18268, i64 1
+  %tmp18270 = getelementptr inbounds float* %tmp18269, i64 1
+  %tmp18271 = getelementptr inbounds float* %tmp18270, i64 1
+  %tmp18272 = getelementptr inbounds float* %tmp18271, i64 1
+  %tmp18273 = getelementptr inbounds float* %tmp18272, i64 1
+  %tmp18274 = getelementptr inbounds float* %tmp18273, i64 1
+  %tmp18275 = getelementptr inbounds float* %tmp18274, i64 1
+  %tmp18276 = getelementptr inbounds float* %tmp18275, i64 1
+  %tmp18277 = getelementptr inbounds float* %tmp18276, i64 1
+  %tmp18278 = getelementptr inbounds float* %tmp18277, i64 1
+  %tmp18279 = getelementptr inbounds float* %tmp18278, i64 1
+  %tmp18280 = getelementptr inbounds float* %tmp18279, i64 1
+  %tmp18281 = getelementptr inbounds float* %tmp18280, i64 1
+  %tmp18282 = getelementptr inbounds float* %tmp18281, i64 1
+  %tmp18283 = getelementptr inbounds float* %tmp18282, i64 1
+  %tmp18284 = getelementptr inbounds float* %tmp18283, i64 1
+  %tmp18285 = getelementptr inbounds float* %tmp18284, i64 1
+  %tmp18286 = getelementptr inbounds float* %tmp18285, i64 1
+  %tmp18287 = getelementptr inbounds float* %tmp18286, i64 1
+  %tmp18288 = getelementptr inbounds float* %tmp18287, i64 1
+  %tmp18289 = getelementptr inbounds float* %tmp18288, i64 1
+  %tmp18290 = getelementptr inbounds float* %tmp18289, i64 1
+  %tmp18291 = getelementptr inbounds float* %tmp18290, i64 1
+  %tmp18292 = getelementptr inbounds float* %tmp18291, i64 1
+  %tmp18293 = getelementptr inbounds float* %tmp18292, i64 1
+  %tmp18294 = getelementptr inbounds float* %tmp18293, i64 1
+  %tmp18295 = getelementptr inbounds float* %tmp18294, i64 1
+  %tmp18296 = getelementptr inbounds float* %tmp18295, i64 1
+  %tmp18297 = getelementptr inbounds float* %tmp18296, i64 1
+  %tmp18298 = getelementptr inbounds float* %tmp18297, i64 1
+  %tmp18299 = getelementptr inbounds float* %tmp18298, i64 1
+  %tmp18300 = getelementptr inbounds float* %tmp18299, i64 1
+  %tmp18301 = getelementptr inbounds float* %tmp18300, i64 1
+  %tmp18302 = getelementptr inbounds float* %tmp18301, i64 1
+  %tmp18303 = getelementptr inbounds float* %tmp18302, i64 1
+  %tmp18304 = getelementptr inbounds float* %tmp18303, i64 1
+  %tmp18305 = getelementptr inbounds float* %tmp18304, i64 1
+  %tmp18306 = getelementptr inbounds float* %tmp18305, i64 1
+  %tmp18307 = getelementptr inbounds float* %tmp18306, i64 1
+  %tmp18308 = getelementptr inbounds float* %tmp18307, i64 1
+  %tmp18309 = getelementptr inbounds float* %tmp18308, i64 1
+  %tmp18310 = getelementptr inbounds float* %tmp18309, i64 1
+  %tmp18311 = getelementptr inbounds float* %tmp18310, i64 1
+  %tmp18312 = getelementptr inbounds float* %tmp18311, i64 1
+  %tmp18313 = getelementptr inbounds float* %tmp18312, i64 1
+  %tmp18314 = getelementptr inbounds float* %tmp18313, i64 1
+  %tmp18315 = getelementptr inbounds float* %tmp18314, i64 1
+  %tmp18316 = getelementptr inbounds float* %tmp18315, i64 1
+  %tmp18317 = getelementptr inbounds float* %tmp18316, i64 1
+  %tmp18318 = getelementptr inbounds float* %tmp18317, i64 1
+  %tmp18319 = getelementptr inbounds float* %tmp18318, i64 1
+  %tmp18320 = getelementptr inbounds float* %tmp18319, i64 1
+  %tmp18321 = getelementptr inbounds float* %tmp18320, i64 1
+  %tmp18322 = getelementptr inbounds float* %tmp18321, i64 1
+  %tmp18323 = getelementptr inbounds float* %tmp18322, i64 1
+  %tmp18324 = getelementptr inbounds float* %tmp18323, i64 1
+  %tmp18325 = getelementptr inbounds float* %tmp18324, i64 1
+  %tmp18326 = getelementptr inbounds float* %tmp18325, i64 1
+  %tmp18327 = getelementptr inbounds float* %tmp18326, i64 1
+  %tmp18328 = getelementptr inbounds float* %tmp18327, i64 1
+  %tmp18329 = getelementptr inbounds float* %tmp18328, i64 1
+  %tmp18330 = getelementptr inbounds float* %tmp18329, i64 1
+  %tmp18331 = getelementptr inbounds float* %tmp18330, i64 1
+  %tmp18332 = getelementptr inbounds float* %tmp18331, i64 1
+  %tmp18333 = getelementptr inbounds float* %tmp18332, i64 1
+  %tmp18334 = getelementptr inbounds float* %tmp18333, i64 1
+  %tmp18335 = getelementptr inbounds float* %tmp18334, i64 1
+  %tmp18336 = getelementptr inbounds float* %tmp18335, i64 1
+  %tmp18337 = getelementptr inbounds float* %tmp18336, i64 1
+  %tmp18338 = getelementptr inbounds float* %tmp18337, i64 1
+  %tmp18339 = getelementptr inbounds float* %tmp18338, i64 1
+  %tmp18340 = getelementptr inbounds float* %tmp18339, i64 1
+  %tmp18341 = getelementptr inbounds float* %tmp18340, i64 1
+  %tmp18342 = getelementptr inbounds float* %tmp18341, i64 1
+  %tmp18343 = getelementptr inbounds float* %tmp18342, i64 1
+  %tmp18344 = getelementptr inbounds float* %tmp18343, i64 1
+  %tmp18345 = getelementptr inbounds float* %tmp18344, i64 1
+  %tmp18346 = getelementptr inbounds float* %tmp18345, i64 1
+  %tmp18347 = getelementptr inbounds float* %tmp18346, i64 1
+  %tmp18348 = getelementptr inbounds float* %tmp18347, i64 1
+  %tmp18349 = getelementptr inbounds float* %tmp18348, i64 1
+  %tmp18350 = getelementptr inbounds float* %tmp18349, i64 1
+  %tmp18351 = getelementptr inbounds float* %tmp18350, i64 1
+  %tmp18352 = getelementptr inbounds float* %tmp18351, i64 1
+  %tmp18353 = getelementptr inbounds float* %tmp18352, i64 1
+  %tmp18354 = getelementptr inbounds float* %tmp18353, i64 1
+  %tmp18355 = getelementptr inbounds float* %tmp18354, i64 1
+  %tmp18356 = getelementptr inbounds float* %tmp18355, i64 1
+  %tmp18357 = getelementptr inbounds float* %tmp18356, i64 1
+  %tmp18358 = getelementptr inbounds float* %tmp18357, i64 1
+  %tmp18359 = getelementptr inbounds float* %tmp18358, i64 1
+  %tmp18360 = getelementptr inbounds float* %tmp18359, i64 1
+  %tmp18361 = getelementptr inbounds float* %tmp18360, i64 1
+  %tmp18362 = getelementptr inbounds float* %tmp18361, i64 1
+  %tmp18363 = getelementptr inbounds float* %tmp18362, i64 1
+  %tmp18364 = getelementptr inbounds float* %tmp18363, i64 1
+  %tmp18365 = getelementptr inbounds float* %tmp18364, i64 1
+  %tmp18366 = getelementptr inbounds float* %tmp18365, i64 1
+  %tmp18367 = getelementptr inbounds float* %tmp18366, i64 1
+  %tmp18368 = getelementptr inbounds float* %tmp18367, i64 1
+  %tmp18369 = getelementptr inbounds float* %tmp18368, i64 1
+  %tmp18370 = getelementptr inbounds float* %tmp18369, i64 1
+  %tmp18371 = getelementptr inbounds float* %tmp18370, i64 1
+  %tmp18372 = getelementptr inbounds float* %tmp18371, i64 1
+  %tmp18373 = getelementptr inbounds float* %tmp18372, i64 1
+  %tmp18374 = getelementptr inbounds float* %tmp18373, i64 1
+  %tmp18375 = getelementptr inbounds float* %tmp18374, i64 1
+  %tmp18376 = getelementptr inbounds float* %tmp18375, i64 1
+  %tmp18377 = getelementptr inbounds float* %tmp18376, i64 1
+  %tmp18378 = getelementptr inbounds float* %tmp18377, i64 1
+  %tmp18379 = getelementptr inbounds float* %tmp18378, i64 1
+  %tmp18380 = getelementptr inbounds float* %tmp18379, i64 1
+  %tmp18381 = getelementptr inbounds float* %tmp18380, i64 1
+  %tmp18382 = getelementptr inbounds float* %tmp18381, i64 1
+  %tmp18383 = getelementptr inbounds float* %tmp18382, i64 1
+  %tmp18384 = getelementptr inbounds float* %tmp18383, i64 1
+  %tmp18385 = getelementptr inbounds float* %tmp18384, i64 1
+  %tmp18386 = getelementptr inbounds float* %tmp18385, i64 1
+  %tmp18387 = getelementptr inbounds float* %tmp18386, i64 1
+  %tmp18388 = getelementptr inbounds float* %tmp18387, i64 1
+  %tmp18389 = getelementptr inbounds float* %tmp18388, i64 1
+  %tmp18390 = getelementptr inbounds float* %tmp18389, i64 1
+  %tmp18391 = getelementptr inbounds float* %tmp18390, i64 1
+  %tmp18392 = getelementptr inbounds float* %tmp18391, i64 1
+  %tmp18393 = getelementptr inbounds float* %tmp18392, i64 1
+  %tmp18394 = getelementptr inbounds float* %tmp18393, i64 1
+  %tmp18395 = getelementptr inbounds float* %tmp18394, i64 1
+  %tmp18396 = getelementptr inbounds float* %tmp18395, i64 1
+  %tmp18397 = getelementptr inbounds float* %tmp18396, i64 1
+  %tmp18398 = getelementptr inbounds float* %tmp18397, i64 1
+  %tmp18399 = getelementptr inbounds float* %tmp18398, i64 1
+  %tmp18400 = getelementptr inbounds float* %tmp18399, i64 1
+  %tmp18401 = getelementptr inbounds float* %tmp18400, i64 1
+  %tmp18402 = getelementptr inbounds float* %tmp18401, i64 1
+  %tmp18403 = getelementptr inbounds float* %tmp18402, i64 1
+  %tmp18404 = getelementptr inbounds float* %tmp18403, i64 1
+  %tmp18405 = getelementptr inbounds float* %tmp18404, i64 1
+  %tmp18406 = getelementptr inbounds float* %tmp18405, i64 1
+  %tmp18407 = getelementptr inbounds float* %tmp18406, i64 1
+  %tmp18408 = getelementptr inbounds float* %tmp18407, i64 1
+  %tmp18409 = getelementptr inbounds float* %tmp18408, i64 1
+  %tmp18410 = getelementptr inbounds float* %tmp18409, i64 1
+  %tmp18411 = getelementptr inbounds float* %tmp18410, i64 1
+  %tmp18412 = getelementptr inbounds float* %tmp18411, i64 1
+  %tmp18413 = getelementptr inbounds float* %tmp18412, i64 1
+  %tmp18414 = getelementptr inbounds float* %tmp18413, i64 1
+  %tmp18415 = getelementptr inbounds float* %tmp18414, i64 1
+  %tmp18416 = getelementptr inbounds float* %tmp18415, i64 1
+  %tmp18417 = getelementptr inbounds float* %tmp18416, i64 1
+  %tmp18418 = getelementptr inbounds float* %tmp18417, i64 1
+  %tmp18419 = getelementptr inbounds float* %tmp18418, i64 1
+  %tmp18420 = getelementptr inbounds float* %tmp18419, i64 1
+  %tmp18421 = getelementptr inbounds float* %tmp18420, i64 1
+  %tmp18422 = getelementptr inbounds float* %tmp18421, i64 1
+  %tmp18423 = getelementptr inbounds float* %tmp18422, i64 1
+  %tmp18424 = getelementptr inbounds float* %tmp18423, i64 1
+  %tmp18425 = getelementptr inbounds float* %tmp18424, i64 1
+  %tmp18426 = getelementptr inbounds float* %tmp18425, i64 1
+  %tmp18427 = getelementptr inbounds float* %tmp18426, i64 1
+  %tmp18428 = getelementptr inbounds float* %tmp18427, i64 1
+  %tmp18429 = getelementptr inbounds float* %tmp18428, i64 1
+  %tmp18430 = getelementptr inbounds float* %tmp18429, i64 1
+  %tmp18431 = getelementptr inbounds float* %tmp18430, i64 1
+  %tmp18432 = getelementptr inbounds float* %tmp18431, i64 1
+  %tmp18433 = getelementptr inbounds float* %tmp18432, i64 1
+  %tmp18434 = getelementptr inbounds float* %tmp18433, i64 1
+  %tmp18435 = getelementptr inbounds float* %tmp18434, i64 1
+  %tmp18436 = getelementptr inbounds float* %tmp18435, i64 1
+  %tmp18437 = getelementptr inbounds float* %tmp18436, i64 1
+  %tmp18438 = getelementptr inbounds float* %tmp18437, i64 1
+  %tmp18439 = getelementptr inbounds float* %tmp18438, i64 1
+  %tmp18440 = getelementptr inbounds float* %tmp18439, i64 1
+  %tmp18441 = getelementptr inbounds float* %tmp18440, i64 1
+  %tmp18442 = getelementptr inbounds float* %tmp18441, i64 1
+  %tmp18443 = getelementptr inbounds float* %tmp18442, i64 1
+  %tmp18444 = getelementptr inbounds float* %tmp18443, i64 1
+  %tmp18445 = getelementptr inbounds float* %tmp18444, i64 1
+  %tmp18446 = getelementptr inbounds float* %tmp18445, i64 1
+  %tmp18447 = getelementptr inbounds float* %tmp18446, i64 1
+  %tmp18448 = getelementptr inbounds float* %tmp18447, i64 1
+  %tmp18449 = getelementptr inbounds float* %tmp18448, i64 1
+  %tmp18450 = getelementptr inbounds float* %tmp18449, i64 1
+  %tmp18451 = getelementptr inbounds float* %tmp18450, i64 1
+  %tmp18452 = getelementptr inbounds float* %tmp18451, i64 1
+  %tmp18453 = getelementptr inbounds float* %tmp18452, i64 1
+  %tmp18454 = getelementptr inbounds float* %tmp18453, i64 1
+  %tmp18455 = getelementptr inbounds float* %tmp18454, i64 1
+  %tmp18456 = getelementptr inbounds float* %tmp18455, i64 1
+  %tmp18457 = getelementptr inbounds float* %tmp18456, i64 1
+  %tmp18458 = getelementptr inbounds float* %tmp18457, i64 1
+  %tmp18459 = getelementptr inbounds float* %tmp18458, i64 1
+  %tmp18460 = getelementptr inbounds float* %tmp18459, i64 1
+  %tmp18461 = getelementptr inbounds float* %tmp18460, i64 1
+  %tmp18462 = getelementptr inbounds float* %tmp18461, i64 1
+  %tmp18463 = getelementptr inbounds float* %tmp18462, i64 1
+  %tmp18464 = getelementptr inbounds float* %tmp18463, i64 1
+  %tmp18465 = getelementptr inbounds float* %tmp18464, i64 1
+  %tmp18466 = getelementptr inbounds float* %tmp18465, i64 1
+  %tmp18467 = getelementptr inbounds float* %tmp18466, i64 1
+  %tmp18468 = getelementptr inbounds float* %tmp18467, i64 1
+  %tmp18469 = getelementptr inbounds float* %tmp18468, i64 1
+  %tmp18470 = getelementptr inbounds float* %tmp18469, i64 1
+  %tmp18471 = getelementptr inbounds float* %tmp18470, i64 1
+  %tmp18472 = getelementptr inbounds float* %tmp18471, i64 1
+  %tmp18473 = getelementptr inbounds float* %tmp18472, i64 1
+  %tmp18474 = getelementptr inbounds float* %tmp18473, i64 1
+  %tmp18475 = getelementptr inbounds float* %tmp18474, i64 1
+  %tmp18476 = getelementptr inbounds float* %tmp18475, i64 1
+  %tmp18477 = getelementptr inbounds float* %tmp18476, i64 1
+  %tmp18478 = getelementptr inbounds float* %tmp18477, i64 1
+  %tmp18479 = getelementptr inbounds float* %tmp18478, i64 1
+  %tmp18480 = getelementptr inbounds float* %tmp18479, i64 1
+  %tmp18481 = getelementptr inbounds float* %tmp18480, i64 1
+  %tmp18482 = getelementptr inbounds float* %tmp18481, i64 1
+  %tmp18483 = getelementptr inbounds float* %tmp18482, i64 1
+  %tmp18484 = getelementptr inbounds float* %tmp18483, i64 1
+  %tmp18485 = getelementptr inbounds float* %tmp18484, i64 1
+  %tmp18486 = getelementptr inbounds float* %tmp18485, i64 1
+  %tmp18487 = getelementptr inbounds float* %tmp18486, i64 1
+  %tmp18488 = getelementptr inbounds float* %tmp18487, i64 1
+  %tmp18489 = getelementptr inbounds float* %tmp18488, i64 1
+  %tmp18490 = getelementptr inbounds float* %tmp18489, i64 1
+  %tmp18491 = getelementptr inbounds float* %tmp18490, i64 1
+  %tmp18492 = getelementptr inbounds float* %tmp18491, i64 1
+  %tmp18493 = getelementptr inbounds float* %tmp18492, i64 1
+  %tmp18494 = getelementptr inbounds float* %tmp18493, i64 1
+  %tmp18495 = getelementptr inbounds float* %tmp18494, i64 1
+  %tmp18496 = getelementptr inbounds float* %tmp18495, i64 1
+  %tmp18497 = getelementptr inbounds float* %tmp18496, i64 1
+  %tmp18498 = getelementptr inbounds float* %tmp18497, i64 1
+  %tmp18499 = getelementptr inbounds float* %tmp18498, i64 1
+  %tmp18500 = getelementptr inbounds float* %tmp18499, i64 1
+  %tmp18501 = getelementptr inbounds float* %tmp18500, i64 1
+  %tmp18502 = getelementptr inbounds float* %tmp18501, i64 1
+  %tmp18503 = getelementptr inbounds float* %tmp18502, i64 1
+  %tmp18504 = getelementptr inbounds float* %tmp18503, i64 1
+  %tmp18505 = getelementptr inbounds float* %tmp18504, i64 1
+  %tmp18506 = getelementptr inbounds float* %tmp18505, i64 1
+  %tmp18507 = getelementptr inbounds float* %tmp18506, i64 1
+  %tmp18508 = getelementptr inbounds float* %tmp18507, i64 1
+  %tmp18509 = getelementptr inbounds float* %tmp18508, i64 1
+  %tmp18510 = getelementptr inbounds float* %tmp18509, i64 1
+  %tmp18511 = getelementptr inbounds float* %tmp18510, i64 1
+  %tmp18512 = getelementptr inbounds float* %tmp18511, i64 1
+  %tmp18513 = getelementptr inbounds float* %tmp18512, i64 1
+  %tmp18514 = getelementptr inbounds float* %tmp18513, i64 1
+  %tmp18515 = getelementptr inbounds float* %tmp18514, i64 1
+  %tmp18516 = getelementptr inbounds float* %tmp18515, i64 1
+  %tmp18517 = getelementptr inbounds float* %tmp18516, i64 1
+  %tmp18518 = getelementptr inbounds float* %tmp18517, i64 1
+  %tmp18519 = getelementptr inbounds float* %tmp18518, i64 1
+  %tmp18520 = getelementptr inbounds float* %tmp18519, i64 1
+  %tmp18521 = getelementptr inbounds float* %tmp18520, i64 1
+  %tmp18522 = getelementptr inbounds float* %tmp18521, i64 1
+  %tmp18523 = getelementptr inbounds float* %tmp18522, i64 1
+  %tmp18524 = getelementptr inbounds float* %tmp18523, i64 1
+  %tmp18525 = getelementptr inbounds float* %tmp18524, i64 1
+  %tmp18526 = getelementptr inbounds float* %tmp18525, i64 1
+  %tmp18527 = getelementptr inbounds float* %tmp18526, i64 1
+  %tmp18528 = getelementptr inbounds float* %tmp18527, i64 1
+  %tmp18529 = getelementptr inbounds float* %tmp18528, i64 1
+  %tmp18530 = getelementptr inbounds float* %tmp18529, i64 1
+  %tmp18531 = getelementptr inbounds float* %tmp18530, i64 1
+  %tmp18532 = getelementptr inbounds float* %tmp18531, i64 1
+  %tmp18533 = getelementptr inbounds float* %tmp18532, i64 1
+  %tmp18534 = getelementptr inbounds float* %tmp18533, i64 1
+  %tmp18535 = getelementptr inbounds float* %tmp18534, i64 1
+  %tmp18536 = getelementptr inbounds float* %tmp18535, i64 1
+  %tmp18537 = getelementptr inbounds float* %tmp18536, i64 1
+  %tmp18538 = getelementptr inbounds float* %tmp18537, i64 1
+  %tmp18539 = getelementptr inbounds float* %tmp18538, i64 1
+  %tmp18540 = getelementptr inbounds float* %tmp18539, i64 1
+  %tmp18541 = getelementptr inbounds float* %tmp18540, i64 1
+  %tmp18542 = getelementptr inbounds float* %tmp18541, i64 1
+  %tmp18543 = getelementptr inbounds float* %tmp18542, i64 1
+  %tmp18544 = getelementptr inbounds float* %tmp18543, i64 1
+  %tmp18545 = getelementptr inbounds float* %tmp18544, i64 1
+  %tmp18546 = getelementptr inbounds float* %tmp18545, i64 1
+  %tmp18547 = getelementptr inbounds float* %tmp18546, i64 1
+  %tmp18548 = getelementptr inbounds float* %tmp18547, i64 1
+  %tmp18549 = getelementptr inbounds float* %tmp18548, i64 1
+  %tmp18550 = getelementptr inbounds float* %tmp18549, i64 1
+  %tmp18551 = getelementptr inbounds float* %tmp18550, i64 1
+  %tmp18552 = getelementptr inbounds float* %tmp18551, i64 1
+  %tmp18553 = getelementptr inbounds float* %tmp18552, i64 1
+  %tmp18554 = getelementptr inbounds float* %tmp18553, i64 1
+  %tmp18555 = getelementptr inbounds float* %tmp18554, i64 1
+  %tmp18556 = getelementptr inbounds float* %tmp18555, i64 1
+  %tmp18557 = getelementptr inbounds float* %tmp18556, i64 1
+  %tmp18558 = getelementptr inbounds float* %tmp18557, i64 1
+  %tmp18559 = getelementptr inbounds float* %tmp18558, i64 1
+  %tmp18560 = getelementptr inbounds float* %tmp18559, i64 1
+  %tmp18561 = getelementptr inbounds float* %tmp18560, i64 1
+  %tmp18562 = getelementptr inbounds float* %tmp18561, i64 1
+  %tmp18563 = getelementptr inbounds float* %tmp18562, i64 1
+  %tmp18564 = getelementptr inbounds float* %tmp18563, i64 1
+  %tmp18565 = getelementptr inbounds float* %tmp18564, i64 1
+  %tmp18566 = getelementptr inbounds float* %tmp18565, i64 1
+  %tmp18567 = getelementptr inbounds float* %tmp18566, i64 1
+  %tmp18568 = getelementptr inbounds float* %tmp18567, i64 1
+  %tmp18569 = getelementptr inbounds float* %tmp18568, i64 1
+  %tmp18570 = getelementptr inbounds float* %tmp18569, i64 1
+  %tmp18571 = getelementptr inbounds float* %tmp18570, i64 1
+  %tmp18572 = getelementptr inbounds float* %tmp18571, i64 1
+  %tmp18573 = getelementptr inbounds float* %tmp18572, i64 1
+  %tmp18574 = getelementptr inbounds float* %tmp18573, i64 1
+  %tmp18575 = getelementptr inbounds float* %tmp18574, i64 1
+  %tmp18576 = getelementptr inbounds float* %tmp18575, i64 1
+  %tmp18577 = getelementptr inbounds float* %tmp18576, i64 1
+  %tmp18578 = getelementptr inbounds float* %tmp18577, i64 1
+  %tmp18579 = getelementptr inbounds float* %tmp18578, i64 1
+  %tmp18580 = getelementptr inbounds float* %tmp18579, i64 1
+  %tmp18581 = getelementptr inbounds float* %tmp18580, i64 1
+  %tmp18582 = getelementptr inbounds float* %tmp18581, i64 1
+  %tmp18583 = getelementptr inbounds float* %tmp18582, i64 1
+  %tmp18584 = getelementptr inbounds float* %tmp18583, i64 1
+  %tmp18585 = getelementptr inbounds float* %tmp18584, i64 1
+  %tmp18586 = getelementptr inbounds float* %tmp18585, i64 1
+  %tmp18587 = getelementptr inbounds float* %tmp18586, i64 1
+  %tmp18588 = getelementptr inbounds float* %tmp18587, i64 1
+  %tmp18589 = getelementptr inbounds float* %tmp18588, i64 1
+  %tmp18590 = getelementptr inbounds float* %tmp18589, i64 1
+  %tmp18591 = getelementptr inbounds float* %tmp18590, i64 1
+  %tmp18592 = getelementptr inbounds float* %tmp18591, i64 1
+  %tmp18593 = getelementptr inbounds float* %tmp18592, i64 1
+  %tmp18594 = getelementptr inbounds float* %tmp18593, i64 1
+  %tmp18595 = getelementptr inbounds float* %tmp18594, i64 1
+  %tmp18596 = getelementptr inbounds float* %tmp18595, i64 1
+  %tmp18597 = getelementptr inbounds float* %tmp18596, i64 1
+  %tmp18598 = getelementptr inbounds float* %tmp18597, i64 1
+  %tmp18599 = getelementptr inbounds float* %tmp18598, i64 1
+  %tmp18600 = getelementptr inbounds float* %tmp18599, i64 1
+  %tmp18601 = getelementptr inbounds float* %tmp18600, i64 1
+  %tmp18602 = getelementptr inbounds float* %tmp18601, i64 1
+  %tmp18603 = getelementptr inbounds float* %tmp18602, i64 1
+  %tmp18604 = getelementptr inbounds float* %tmp18603, i64 1
+  %tmp18605 = getelementptr inbounds float* %tmp18604, i64 1
+  %tmp18606 = getelementptr inbounds float* %tmp18605, i64 1
+  %tmp18607 = getelementptr inbounds float* %tmp18606, i64 1
+  %tmp18608 = getelementptr inbounds float* %tmp18607, i64 1
+  %tmp18609 = getelementptr inbounds float* %tmp18608, i64 1
+  %tmp18610 = getelementptr inbounds float* %tmp18609, i64 1
+  %tmp18611 = getelementptr inbounds float* %tmp18610, i64 1
+  %tmp18612 = getelementptr inbounds float* %tmp18611, i64 1
+  %tmp18613 = getelementptr inbounds float* %tmp18612, i64 1
+  %tmp18614 = getelementptr inbounds float* %tmp18613, i64 1
+  %tmp18615 = getelementptr inbounds float* %tmp18614, i64 1
+  %tmp18616 = getelementptr inbounds float* %tmp18615, i64 1
+  %tmp18617 = getelementptr inbounds float* %tmp18616, i64 1
+  %tmp18618 = getelementptr inbounds float* %tmp18617, i64 1
+  %tmp18619 = getelementptr inbounds float* %tmp18618, i64 1
+  %tmp18620 = getelementptr inbounds float* %tmp18619, i64 1
+  %tmp18621 = getelementptr inbounds float* %tmp18620, i64 1
+  %tmp18622 = getelementptr inbounds float* %tmp18621, i64 1
+  %tmp18623 = getelementptr inbounds float* %tmp18622, i64 1
+  %tmp18624 = getelementptr inbounds float* %tmp18623, i64 1
+  %tmp18625 = getelementptr inbounds float* %tmp18624, i64 1
+  %tmp18626 = getelementptr inbounds float* %tmp18625, i64 1
+  %tmp18627 = getelementptr inbounds float* %tmp18626, i64 1
+  %tmp18628 = getelementptr inbounds float* %tmp18627, i64 1
+  %tmp18629 = getelementptr inbounds float* %tmp18628, i64 1
+  %tmp18630 = getelementptr inbounds float* %tmp18629, i64 1
+  %tmp18631 = getelementptr inbounds float* %tmp18630, i64 1
+  %tmp18632 = getelementptr inbounds float* %tmp18631, i64 1
+  %tmp18633 = getelementptr inbounds float* %tmp18632, i64 1
+  %tmp18634 = getelementptr inbounds float* %tmp18633, i64 1
+  %tmp18635 = getelementptr inbounds float* %tmp18634, i64 1
+  %tmp18636 = getelementptr inbounds float* %tmp18635, i64 1
+  %tmp18637 = getelementptr inbounds float* %tmp18636, i64 1
+  %tmp18638 = getelementptr inbounds float* %tmp18637, i64 1
+  %tmp18639 = getelementptr inbounds float* %tmp18638, i64 1
+  %tmp18640 = getelementptr inbounds float* %tmp18639, i64 1
+  %tmp18641 = getelementptr inbounds float* %tmp18640, i64 1
+  %tmp18642 = getelementptr inbounds float* %tmp18641, i64 1
+  %tmp18643 = getelementptr inbounds float* %tmp18642, i64 1
+  %tmp18644 = getelementptr inbounds float* %tmp18643, i64 1
+  %tmp18645 = getelementptr inbounds float* %tmp18644, i64 1
+  %tmp18646 = getelementptr inbounds float* %tmp18645, i64 1
+  %tmp18647 = getelementptr inbounds float* %tmp18646, i64 1
+  %tmp18648 = getelementptr inbounds float* %tmp18647, i64 1
+  %tmp18649 = getelementptr inbounds float* %tmp18648, i64 1
+  %tmp18650 = getelementptr inbounds float* %tmp18649, i64 1
+  %tmp18651 = getelementptr inbounds float* %tmp18650, i64 1
+  %tmp18652 = getelementptr inbounds float* %tmp18651, i64 1
+  %tmp18653 = getelementptr inbounds float* %tmp18652, i64 1
+  %tmp18654 = getelementptr inbounds float* %tmp18653, i64 1
+  %tmp18655 = getelementptr inbounds float* %tmp18654, i64 1
+  %tmp18656 = getelementptr inbounds float* %tmp18655, i64 1
+  %tmp18657 = getelementptr inbounds float* %tmp18656, i64 1
+  %tmp18658 = getelementptr inbounds float* %tmp18657, i64 1
+  %tmp18659 = getelementptr inbounds float* %tmp18658, i64 1
+  %tmp18660 = getelementptr inbounds float* %tmp18659, i64 1
+  %tmp18661 = getelementptr inbounds float* %tmp18660, i64 1
+  %tmp18662 = getelementptr inbounds float* %tmp18661, i64 1
+  %tmp18663 = getelementptr inbounds float* %tmp18662, i64 1
+  %tmp18664 = getelementptr inbounds float* %tmp18663, i64 1
+  %tmp18665 = getelementptr inbounds float* %tmp18664, i64 1
+  %tmp18666 = getelementptr inbounds float* %tmp18665, i64 1
+  %tmp18667 = getelementptr inbounds float* %tmp18666, i64 1
+  %tmp18668 = getelementptr inbounds float* %tmp18667, i64 1
+  %tmp18669 = getelementptr inbounds float* %tmp18668, i64 1
+  %tmp18670 = getelementptr inbounds float* %tmp18669, i64 1
+  %tmp18671 = getelementptr inbounds float* %tmp18670, i64 1
+  %tmp18672 = getelementptr inbounds float* %tmp18671, i64 1
+  %tmp18673 = getelementptr inbounds float* %tmp18672, i64 1
+  %tmp18674 = getelementptr inbounds float* %tmp18673, i64 1
+  %tmp18675 = getelementptr inbounds float* %tmp18674, i64 1
+  %tmp18676 = getelementptr inbounds float* %tmp18675, i64 1
+  %tmp18677 = getelementptr inbounds float* %tmp18676, i64 1
+  %tmp18678 = getelementptr inbounds float* %tmp18677, i64 1
+  %tmp18679 = getelementptr inbounds float* %tmp18678, i64 1
+  %tmp18680 = getelementptr inbounds float* %tmp18679, i64 1
+  %tmp18681 = getelementptr inbounds float* %tmp18680, i64 1
+  %tmp18682 = getelementptr inbounds float* %tmp18681, i64 1
+  %tmp18683 = getelementptr inbounds float* %tmp18682, i64 1
+  %tmp18684 = getelementptr inbounds float* %tmp18683, i64 1
+  %tmp18685 = getelementptr inbounds float* %tmp18684, i64 1
+  %tmp18686 = getelementptr inbounds float* %tmp18685, i64 1
+  %tmp18687 = getelementptr inbounds float* %tmp18686, i64 1
+  %tmp18688 = getelementptr inbounds float* %tmp18687, i64 1
+  %tmp18689 = getelementptr inbounds float* %tmp18688, i64 1
+  %tmp18690 = getelementptr inbounds float* %tmp18689, i64 1
+  %tmp18691 = getelementptr inbounds float* %tmp18690, i64 1
+  %tmp18692 = getelementptr inbounds float* %tmp18691, i64 1
+  %tmp18693 = getelementptr inbounds float* %tmp18692, i64 1
+  %tmp18694 = getelementptr inbounds float* %tmp18693, i64 1
+  %tmp18695 = getelementptr inbounds float* %tmp18694, i64 1
+  %tmp18696 = getelementptr inbounds float* %tmp18695, i64 1
+  %tmp18697 = getelementptr inbounds float* %tmp18696, i64 1
+  %tmp18698 = getelementptr inbounds float* %tmp18697, i64 1
+  %tmp18699 = getelementptr inbounds float* %tmp18698, i64 1
+  %tmp18700 = getelementptr inbounds float* %tmp18699, i64 1
+  %tmp18701 = getelementptr inbounds float* %tmp18700, i64 1
+  %tmp18702 = getelementptr inbounds float* %tmp18701, i64 1
+  %tmp18703 = getelementptr inbounds float* %tmp18702, i64 1
+  %tmp18704 = getelementptr inbounds float* %tmp18703, i64 1
+  %tmp18705 = getelementptr inbounds float* %tmp18704, i64 1
+  %tmp18706 = getelementptr inbounds float* %tmp18705, i64 1
+  %tmp18707 = getelementptr inbounds float* %tmp18706, i64 1
+  %tmp18708 = getelementptr inbounds float* %tmp18707, i64 1
+  %tmp18709 = getelementptr inbounds float* %tmp18708, i64 1
+  %tmp18710 = getelementptr inbounds float* %tmp18709, i64 1
+  %tmp18711 = getelementptr inbounds float* %tmp18710, i64 1
+  %tmp18712 = getelementptr inbounds float* %tmp18711, i64 1
+  %tmp18713 = getelementptr inbounds float* %tmp18712, i64 1
+  %tmp18714 = getelementptr inbounds float* %tmp18713, i64 1
+  %tmp18715 = getelementptr inbounds float* %tmp18714, i64 1
+  %tmp18716 = getelementptr inbounds float* %tmp18715, i64 1
+  %tmp18717 = getelementptr inbounds float* %tmp18716, i64 1
+  %tmp18718 = getelementptr inbounds float* %tmp18717, i64 1
+  %tmp18719 = getelementptr inbounds float* %tmp18718, i64 1
+  %tmp18720 = getelementptr inbounds float* %tmp18719, i64 1
+  %tmp18721 = getelementptr inbounds float* %tmp18720, i64 1
+  %tmp18722 = getelementptr inbounds float* %tmp18721, i64 1
+  %tmp18723 = getelementptr inbounds float* %tmp18722, i64 1
+  %tmp18724 = getelementptr inbounds float* %tmp18723, i64 1
+  %tmp18725 = getelementptr inbounds float* %tmp18724, i64 1
+  %tmp18726 = getelementptr inbounds float* %tmp18725, i64 1
+  %tmp18727 = getelementptr inbounds float* %tmp18726, i64 1
+  %tmp18728 = getelementptr inbounds float* %tmp18727, i64 1
+  %tmp18729 = getelementptr inbounds float* %tmp18728, i64 1
+  %tmp18730 = getelementptr inbounds float* %tmp18729, i64 1
+  %tmp18731 = getelementptr inbounds float* %tmp18730, i64 1
+  %tmp18732 = getelementptr inbounds float* %tmp18731, i64 1
+  %tmp18733 = getelementptr inbounds float* %tmp18732, i64 1
+  %tmp18734 = getelementptr inbounds float* %tmp18733, i64 1
+  %tmp18735 = getelementptr inbounds float* %tmp18734, i64 1
+  %tmp18736 = getelementptr inbounds float* %tmp18735, i64 1
+  %tmp18737 = getelementptr inbounds float* %tmp18736, i64 1
+  %tmp18738 = getelementptr inbounds float* %tmp18737, i64 1
+  %tmp18739 = getelementptr inbounds float* %tmp18738, i64 1
+  %tmp18740 = getelementptr inbounds float* %tmp18739, i64 1
+  %tmp18741 = getelementptr inbounds float* %tmp18740, i64 1
+  %tmp18742 = getelementptr inbounds float* %tmp18741, i64 1
+  %tmp18743 = getelementptr inbounds float* %tmp18742, i64 1
+  %tmp18744 = getelementptr inbounds float* %tmp18743, i64 1
+  %tmp18745 = getelementptr inbounds float* %tmp18744, i64 1
+  %tmp18746 = getelementptr inbounds float* %tmp18745, i64 1
+  %tmp18747 = getelementptr inbounds float* %tmp18746, i64 1
+  %tmp18748 = getelementptr inbounds float* %tmp18747, i64 1
+  %tmp18749 = getelementptr inbounds float* %tmp18748, i64 1
+  %tmp18750 = getelementptr inbounds float* %tmp18749, i64 1
+  %tmp18751 = getelementptr inbounds float* %tmp18750, i64 1
+  %tmp18752 = getelementptr inbounds float* %tmp18751, i64 1
+  %tmp18753 = getelementptr inbounds float* %tmp18752, i64 1
+  %tmp18754 = getelementptr inbounds float* %tmp18753, i64 1
+  %tmp18755 = getelementptr inbounds float* %tmp18754, i64 1
+  %tmp18756 = getelementptr inbounds float* %tmp18755, i64 1
+  %tmp18757 = getelementptr inbounds float* %tmp18756, i64 1
+  %tmp18758 = getelementptr inbounds float* %tmp18757, i64 1
+  %tmp18759 = getelementptr inbounds float* %tmp18758, i64 1
+  %tmp18760 = getelementptr inbounds float* %tmp18759, i64 1
+  %tmp18761 = getelementptr inbounds float* %tmp18760, i64 1
+  %tmp18762 = getelementptr inbounds float* %tmp18761, i64 1
+  %tmp18763 = getelementptr inbounds float* %tmp18762, i64 1
+  %tmp18764 = getelementptr inbounds float* %tmp18763, i64 1
+  %tmp18765 = getelementptr inbounds float* %tmp18764, i64 1
+  %tmp18766 = getelementptr inbounds float* %tmp18765, i64 1
+  %tmp18767 = getelementptr inbounds float* %tmp18766, i64 1
+  %tmp18768 = getelementptr inbounds float* %tmp18767, i64 1
+  %tmp18769 = getelementptr inbounds float* %tmp18768, i64 1
+  %tmp18770 = getelementptr inbounds float* %tmp18769, i64 1
+  %tmp18771 = getelementptr inbounds float* %tmp18770, i64 1
+  %tmp18772 = getelementptr inbounds float* %tmp18771, i64 1
+  %tmp18773 = getelementptr inbounds float* %tmp18772, i64 1
+  %tmp18774 = getelementptr inbounds float* %tmp18773, i64 1
+  %tmp18775 = getelementptr inbounds float* %tmp18774, i64 1
+  %tmp18776 = getelementptr inbounds float* %tmp18775, i64 1
+  %tmp18777 = getelementptr inbounds float* %tmp18776, i64 1
+  %tmp18778 = getelementptr inbounds float* %tmp18777, i64 1
+  %tmp18779 = getelementptr inbounds float* %tmp18778, i64 1
+  %tmp18780 = getelementptr inbounds float* %tmp18779, i64 1
+  %tmp18781 = getelementptr inbounds float* %tmp18780, i64 1
+  %tmp18782 = getelementptr inbounds float* %tmp18781, i64 1
+  %tmp18783 = getelementptr inbounds float* %tmp18782, i64 1
+  %tmp18784 = getelementptr inbounds float* %tmp18783, i64 1
+  %tmp18785 = getelementptr inbounds float* %tmp18784, i64 1
+  %tmp18786 = getelementptr inbounds float* %tmp18785, i64 1
+  %tmp18787 = getelementptr inbounds float* %tmp18786, i64 1
+  %tmp18788 = getelementptr inbounds float* %tmp18787, i64 1
+  %tmp18789 = getelementptr inbounds float* %tmp18788, i64 1
+  %tmp18790 = getelementptr inbounds float* %tmp18789, i64 1
+  %tmp18791 = getelementptr inbounds float* %tmp18790, i64 1
+  %tmp18792 = getelementptr inbounds float* %tmp18791, i64 1
+  %tmp18793 = getelementptr inbounds float* %tmp18792, i64 1
+  %tmp18794 = getelementptr inbounds float* %tmp18793, i64 1
+  %tmp18795 = getelementptr inbounds float* %tmp18794, i64 1
+  %tmp18796 = getelementptr inbounds float* %tmp18795, i64 1
+  %tmp18797 = getelementptr inbounds float* %tmp18796, i64 1
+  %tmp18798 = getelementptr inbounds float* %tmp18797, i64 1
+  %tmp18799 = getelementptr inbounds float* %tmp18798, i64 1
+  %tmp18800 = getelementptr inbounds float* %tmp18799, i64 1
+  %tmp18801 = getelementptr inbounds float* %tmp18800, i64 1
+  %tmp18802 = getelementptr inbounds float* %tmp18801, i64 1
+  %tmp18803 = getelementptr inbounds float* %tmp18802, i64 1
+  %tmp18804 = getelementptr inbounds float* %tmp18803, i64 1
+  %tmp18805 = getelementptr inbounds float* %tmp18804, i64 1
+  %tmp18806 = getelementptr inbounds float* %tmp18805, i64 1
+  %tmp18807 = getelementptr inbounds float* %tmp18806, i64 1
+  %tmp18808 = getelementptr inbounds float* %tmp18807, i64 1
+  %tmp18809 = getelementptr inbounds float* %tmp18808, i64 1
+  %tmp18810 = getelementptr inbounds float* %tmp18809, i64 1
+  %tmp18811 = getelementptr inbounds float* %tmp18810, i64 1
+  %tmp18812 = getelementptr inbounds float* %tmp18811, i64 1
+  %tmp18813 = getelementptr inbounds float* %tmp18812, i64 1
+  %tmp18814 = getelementptr inbounds float* %tmp18813, i64 1
+  %tmp18815 = getelementptr inbounds float* %tmp18814, i64 1
+  %tmp18816 = getelementptr inbounds float* %tmp18815, i64 1
+  %tmp18817 = getelementptr inbounds float* %tmp18816, i64 1
+  %tmp18818 = getelementptr inbounds float* %tmp18817, i64 1
+  %tmp18819 = getelementptr inbounds float* %tmp18818, i64 1
+  %tmp18820 = getelementptr inbounds float* %tmp18819, i64 1
+  %tmp18821 = getelementptr inbounds float* %tmp18820, i64 1
+  %tmp18822 = getelementptr inbounds float* %tmp18821, i64 1
+  %tmp18823 = getelementptr inbounds float* %tmp18822, i64 1
+  %tmp18824 = getelementptr inbounds float* %tmp18823, i64 1
+  %tmp18825 = getelementptr inbounds float* %tmp18824, i64 1
+  %tmp18826 = getelementptr inbounds float* %tmp18825, i64 1
+  %tmp18827 = getelementptr inbounds float* %tmp18826, i64 1
+  %tmp18828 = getelementptr inbounds float* %tmp18827, i64 1
+  %tmp18829 = getelementptr inbounds float* %tmp18828, i64 1
+  %tmp18830 = getelementptr inbounds float* %tmp18829, i64 1
+  %tmp18831 = getelementptr inbounds float* %tmp18830, i64 1
+  %tmp18832 = getelementptr inbounds float* %tmp18831, i64 1
+  %tmp18833 = getelementptr inbounds float* %tmp18832, i64 1
+  %tmp18834 = getelementptr inbounds float* %tmp18833, i64 1
+  %tmp18835 = getelementptr inbounds float* %tmp18834, i64 1
+  %tmp18836 = getelementptr inbounds float* %tmp18835, i64 1
+  %tmp18837 = getelementptr inbounds float* %tmp18836, i64 1
+  %tmp18838 = getelementptr inbounds float* %tmp18837, i64 1
+  %tmp18839 = getelementptr inbounds float* %tmp18838, i64 1
+  %tmp18840 = getelementptr inbounds float* %tmp18839, i64 1
+  %tmp18841 = getelementptr inbounds float* %tmp18840, i64 1
+  %tmp18842 = getelementptr inbounds float* %tmp18841, i64 1
+  %tmp18843 = getelementptr inbounds float* %tmp18842, i64 1
+  %tmp18844 = getelementptr inbounds float* %tmp18843, i64 1
+  %tmp18845 = getelementptr inbounds float* %tmp18844, i64 1
+  %tmp18846 = getelementptr inbounds float* %tmp18845, i64 1
+  %tmp18847 = getelementptr inbounds float* %tmp18846, i64 1
+  %tmp18848 = getelementptr inbounds float* %tmp18847, i64 1
+  %tmp18849 = getelementptr inbounds float* %tmp18848, i64 1
+  %tmp18850 = getelementptr inbounds float* %tmp18849, i64 1
+  %tmp18851 = getelementptr inbounds float* %tmp18850, i64 1
+  %tmp18852 = getelementptr inbounds float* %tmp18851, i64 1
+  %tmp18853 = getelementptr inbounds float* %tmp18852, i64 1
+  %tmp18854 = getelementptr inbounds float* %tmp18853, i64 1
+  %tmp18855 = getelementptr inbounds float* %tmp18854, i64 1
+  %tmp18856 = getelementptr inbounds float* %tmp18855, i64 1
+  %tmp18857 = getelementptr inbounds float* %tmp18856, i64 1
+  %tmp18858 = getelementptr inbounds float* %tmp18857, i64 1
+  %tmp18859 = getelementptr inbounds float* %tmp18858, i64 1
+  %tmp18860 = getelementptr inbounds float* %tmp18859, i64 1
+  %tmp18861 = getelementptr inbounds float* %tmp18860, i64 1
+  %tmp18862 = getelementptr inbounds float* %tmp18861, i64 1
+  %tmp18863 = getelementptr inbounds float* %tmp18862, i64 1
+  %tmp18864 = getelementptr inbounds float* %tmp18863, i64 1
+  %tmp18865 = getelementptr inbounds float* %tmp18864, i64 1
+  %tmp18866 = getelementptr inbounds float* %tmp18865, i64 1
+  %tmp18867 = getelementptr inbounds float* %tmp18866, i64 1
+  %tmp18868 = getelementptr inbounds float* %tmp18867, i64 1
+  %tmp18869 = getelementptr inbounds float* %tmp18868, i64 1
+  %tmp18870 = getelementptr inbounds float* %tmp18869, i64 1
+  %tmp18871 = getelementptr inbounds float* %tmp18870, i64 1
+  %tmp18872 = getelementptr inbounds float* %tmp18871, i64 1
+  %tmp18873 = getelementptr inbounds float* %tmp18872, i64 1
+  %tmp18874 = getelementptr inbounds float* %tmp18873, i64 1
+  %tmp18875 = getelementptr inbounds float* %tmp18874, i64 1
+  %tmp18876 = getelementptr inbounds float* %tmp18875, i64 1
+  %tmp18877 = getelementptr inbounds float* %tmp18876, i64 1
+  %tmp18878 = getelementptr inbounds float* %tmp18877, i64 1
+  %tmp18879 = getelementptr inbounds float* %tmp18878, i64 1
+  %tmp18880 = getelementptr inbounds float* %tmp18879, i64 1
+  %tmp18881 = getelementptr inbounds float* %tmp18880, i64 1
+  %tmp18882 = getelementptr inbounds float* %tmp18881, i64 1
+  %tmp18883 = getelementptr inbounds float* %tmp18882, i64 1
+  %tmp18884 = getelementptr inbounds float* %tmp18883, i64 1
+  %tmp18885 = getelementptr inbounds float* %tmp18884, i64 1
+  %tmp18886 = getelementptr inbounds float* %tmp18885, i64 1
+  %tmp18887 = getelementptr inbounds float* %tmp18886, i64 1
+  %tmp18888 = getelementptr inbounds float* %tmp18887, i64 1
+  %tmp18889 = getelementptr inbounds float* %tmp18888, i64 1
+  %tmp18890 = getelementptr inbounds float* %tmp18889, i64 1
+  %tmp18891 = getelementptr inbounds float* %tmp18890, i64 1
+  %tmp18892 = getelementptr inbounds float* %tmp18891, i64 1
+  %tmp18893 = getelementptr inbounds float* %tmp18892, i64 1
+  %tmp18894 = getelementptr inbounds float* %tmp18893, i64 1
+  %tmp18895 = getelementptr inbounds float* %tmp18894, i64 1
+  %tmp18896 = getelementptr inbounds float* %tmp18895, i64 1
+  %tmp18897 = getelementptr inbounds float* %tmp18896, i64 1
+  %tmp18898 = getelementptr inbounds float* %tmp18897, i64 1
+  %tmp18899 = getelementptr inbounds float* %tmp18898, i64 1
+  %tmp18900 = getelementptr inbounds float* %tmp18899, i64 1
+  %tmp18901 = getelementptr inbounds float* %tmp18900, i64 1
+  %tmp18902 = getelementptr inbounds float* %tmp18901, i64 1
+  %tmp18903 = getelementptr inbounds float* %tmp18902, i64 1
+  %tmp18904 = getelementptr inbounds float* %tmp18903, i64 1
+  %tmp18905 = getelementptr inbounds float* %tmp18904, i64 1
+  %tmp18906 = getelementptr inbounds float* %tmp18905, i64 1
+  %tmp18907 = getelementptr inbounds float* %tmp18906, i64 1
+  %tmp18908 = getelementptr inbounds float* %tmp18907, i64 1
+  %tmp18909 = getelementptr inbounds float* %tmp18908, i64 1
+  %tmp18910 = getelementptr inbounds float* %tmp18909, i64 1
+  %tmp18911 = getelementptr inbounds float* %tmp18910, i64 1
+  %tmp18912 = getelementptr inbounds float* %tmp18911, i64 1
+  %tmp18913 = getelementptr inbounds float* %tmp18912, i64 1
+  %tmp18914 = getelementptr inbounds float* %tmp18913, i64 1
+  %tmp18915 = getelementptr inbounds float* %tmp18914, i64 1
+  %tmp18916 = getelementptr inbounds float* %tmp18915, i64 1
+  %tmp18917 = getelementptr inbounds float* %tmp18916, i64 1
+  %tmp18918 = getelementptr inbounds float* %tmp18917, i64 1
+  %tmp18919 = getelementptr inbounds float* %tmp18918, i64 1
+  %tmp18920 = getelementptr inbounds float* %tmp18919, i64 1
+  %tmp18921 = getelementptr inbounds float* %tmp18920, i64 1
+  %tmp18922 = getelementptr inbounds float* %tmp18921, i64 1
+  %tmp18923 = getelementptr inbounds float* %tmp18922, i64 1
+  %tmp18924 = getelementptr inbounds float* %tmp18923, i64 1
+  %tmp18925 = getelementptr inbounds float* %tmp18924, i64 1
+  %tmp18926 = getelementptr inbounds float* %tmp18925, i64 1
+  %tmp18927 = getelementptr inbounds float* %tmp18926, i64 1
+  %tmp18928 = getelementptr inbounds float* %tmp18927, i64 1
+  %tmp18929 = getelementptr inbounds float* %tmp18928, i64 1
+  %tmp18930 = getelementptr inbounds float* %tmp18929, i64 1
+  %tmp18931 = getelementptr inbounds float* %tmp18930, i64 1
+  %tmp18932 = getelementptr inbounds float* %tmp18931, i64 1
+  %tmp18933 = getelementptr inbounds float* %tmp18932, i64 1
+  %tmp18934 = getelementptr inbounds float* %tmp18933, i64 1
+  %tmp18935 = getelementptr inbounds float* %tmp18934, i64 1
+  %tmp18936 = getelementptr inbounds float* %tmp18935, i64 1
+  %tmp18937 = getelementptr inbounds float* %tmp18936, i64 1
+  %tmp18938 = getelementptr inbounds float* %tmp18937, i64 1
+  %tmp18939 = getelementptr inbounds float* %tmp18938, i64 1
+  %tmp18940 = getelementptr inbounds float* %tmp18939, i64 1
+  %tmp18941 = getelementptr inbounds float* %tmp18940, i64 1
+  %tmp18942 = getelementptr inbounds float* %tmp18941, i64 1
+  %tmp18943 = getelementptr inbounds float* %tmp18942, i64 1
+  %tmp18944 = getelementptr inbounds float* %tmp18943, i64 1
+  %tmp18945 = getelementptr inbounds float* %tmp18944, i64 1
+  %tmp18946 = getelementptr inbounds float* %tmp18945, i64 1
+  %tmp18947 = getelementptr inbounds float* %tmp18946, i64 1
+  %tmp18948 = getelementptr inbounds float* %tmp18947, i64 1
+  %tmp18949 = getelementptr inbounds float* %tmp18948, i64 1
+  %tmp18950 = getelementptr inbounds float* %tmp18949, i64 1
+  %tmp18951 = getelementptr inbounds float* %tmp18950, i64 1
+  %tmp18952 = getelementptr inbounds float* %tmp18951, i64 1
+  %tmp18953 = getelementptr inbounds float* %tmp18952, i64 1
+  %tmp18954 = getelementptr inbounds float* %tmp18953, i64 1
+  %tmp18955 = getelementptr inbounds float* %tmp18954, i64 1
+  %tmp18956 = getelementptr inbounds float* %tmp18955, i64 1
+  %tmp18957 = getelementptr inbounds float* %tmp18956, i64 1
+  %tmp18958 = getelementptr inbounds float* %tmp18957, i64 1
+  %tmp18959 = getelementptr inbounds float* %tmp18958, i64 1
+  %tmp18960 = getelementptr inbounds float* %tmp18959, i64 1
+  %tmp18961 = getelementptr inbounds float* %tmp18960, i64 1
+  %tmp18962 = getelementptr inbounds float* %tmp18961, i64 1
+  %tmp18963 = getelementptr inbounds float* %tmp18962, i64 1
+  %tmp18964 = getelementptr inbounds float* %tmp18963, i64 1
+  %tmp18965 = getelementptr inbounds float* %tmp18964, i64 1
+  %tmp18966 = getelementptr inbounds float* %tmp18965, i64 1
+  %tmp18967 = getelementptr inbounds float* %tmp18966, i64 1
+  %tmp18968 = getelementptr inbounds float* %tmp18967, i64 1
+  %tmp18969 = getelementptr inbounds float* %tmp18968, i64 1
+  %tmp18970 = getelementptr inbounds float* %tmp18969, i64 1
+  %tmp18971 = getelementptr inbounds float* %tmp18970, i64 1
+  %tmp18972 = getelementptr inbounds float* %tmp18971, i64 1
+  %tmp18973 = getelementptr inbounds float* %tmp18972, i64 1
+  %tmp18974 = getelementptr inbounds float* %tmp18973, i64 1
+  %tmp18975 = getelementptr inbounds float* %tmp18974, i64 1
+  %tmp18976 = getelementptr inbounds float* %tmp18975, i64 1
+  %tmp18977 = getelementptr inbounds float* %tmp18976, i64 1
+  %tmp18978 = getelementptr inbounds float* %tmp18977, i64 1
+  %tmp18979 = getelementptr inbounds float* %tmp18978, i64 1
+  %tmp18980 = getelementptr inbounds float* %tmp18979, i64 1
+  %tmp18981 = getelementptr inbounds float* %tmp18980, i64 1
+  %tmp18982 = getelementptr inbounds float* %tmp18981, i64 1
+  %tmp18983 = getelementptr inbounds float* %tmp18982, i64 1
+  %tmp18984 = getelementptr inbounds float* %tmp18983, i64 1
+  %tmp18985 = getelementptr inbounds float* %tmp18984, i64 1
+  %tmp18986 = getelementptr inbounds float* %tmp18985, i64 1
+  %tmp18987 = getelementptr inbounds float* %tmp18986, i64 1
+  %tmp18988 = getelementptr inbounds float* %tmp18987, i64 1
+  %tmp18989 = getelementptr inbounds float* %tmp18988, i64 1
+  %tmp18990 = getelementptr inbounds float* %tmp18989, i64 1
+  %tmp18991 = getelementptr inbounds float* %tmp18990, i64 1
+  %tmp18992 = getelementptr inbounds float* %tmp18991, i64 1
+  %tmp18993 = getelementptr inbounds float* %tmp18992, i64 1
+  %tmp18994 = getelementptr inbounds float* %tmp18993, i64 1
+  %tmp18995 = getelementptr inbounds float* %tmp18994, i64 1
+  %tmp18996 = getelementptr inbounds float* %tmp18995, i64 1
+  %tmp18997 = getelementptr inbounds float* %tmp18996, i64 1
+  %tmp18998 = getelementptr inbounds float* %tmp18997, i64 1
+  %tmp18999 = getelementptr inbounds float* %tmp18998, i64 1
+  %tmp19000 = getelementptr inbounds float* %tmp18999, i64 1
+  %tmp19001 = getelementptr inbounds float* %tmp19000, i64 1
+  %tmp19002 = getelementptr inbounds float* %tmp19001, i64 1
+  %tmp19003 = getelementptr inbounds float* %tmp19002, i64 1
+  %tmp19004 = getelementptr inbounds float* %tmp19003, i64 1
+  %tmp19005 = getelementptr inbounds float* %tmp19004, i64 1
+  %tmp19006 = getelementptr inbounds float* %tmp19005, i64 1
+  %tmp19007 = getelementptr inbounds float* %tmp19006, i64 1
+  %tmp19008 = getelementptr inbounds float* %tmp19007, i64 1
+  %tmp19009 = getelementptr inbounds float* %tmp19008, i64 1
+  %tmp19010 = getelementptr inbounds float* %tmp19009, i64 1
+  %tmp19011 = getelementptr inbounds float* %tmp19010, i64 1
+  %tmp19012 = getelementptr inbounds float* %tmp19011, i64 1
+  %tmp19013 = getelementptr inbounds float* %tmp19012, i64 1
+  %tmp19014 = getelementptr inbounds float* %tmp19013, i64 1
+  %tmp19015 = getelementptr inbounds float* %tmp19014, i64 1
+  %tmp19016 = getelementptr inbounds float* %tmp19015, i64 1
+  %tmp19017 = getelementptr inbounds float* %tmp19016, i64 1
+  %tmp19018 = getelementptr inbounds float* %tmp19017, i64 1
+  %tmp19019 = getelementptr inbounds float* %tmp19018, i64 1
+  %tmp19020 = getelementptr inbounds float* %tmp19019, i64 1
+  %tmp19021 = getelementptr inbounds float* %tmp19020, i64 1
+  %tmp19022 = getelementptr inbounds float* %tmp19021, i64 1
+  %tmp19023 = getelementptr inbounds float* %tmp19022, i64 1
+  %tmp19024 = getelementptr inbounds float* %tmp19023, i64 1
+  %tmp19025 = getelementptr inbounds float* %tmp19024, i64 1
+  %tmp19026 = getelementptr inbounds float* %tmp19025, i64 1
+  %tmp19027 = getelementptr inbounds float* %tmp19026, i64 1
+  %tmp19028 = getelementptr inbounds float* %tmp19027, i64 1
+  %tmp19029 = getelementptr inbounds float* %tmp19028, i64 1
+  %tmp19030 = getelementptr inbounds float* %tmp19029, i64 1
+  %tmp19031 = getelementptr inbounds float* %tmp19030, i64 1
+  %tmp19032 = getelementptr inbounds float* %tmp19031, i64 1
+  %tmp19033 = getelementptr inbounds float* %tmp19032, i64 1
+  %tmp19034 = getelementptr inbounds float* %tmp19033, i64 1
+  %tmp19035 = getelementptr inbounds float* %tmp19034, i64 1
+  %tmp19036 = getelementptr inbounds float* %tmp19035, i64 1
+  %tmp19037 = getelementptr inbounds float* %tmp19036, i64 1
+  %tmp19038 = getelementptr inbounds float* %tmp19037, i64 1
+  %tmp19039 = getelementptr inbounds float* %tmp19038, i64 1
+  %tmp19040 = getelementptr inbounds float* %tmp19039, i64 1
+  %tmp19041 = getelementptr inbounds float* %tmp19040, i64 1
+  %tmp19042 = getelementptr inbounds float* %tmp19041, i64 1
+  %tmp19043 = getelementptr inbounds float* %tmp19042, i64 1
+  %tmp19044 = getelementptr inbounds float* %tmp19043, i64 1
+  %tmp19045 = getelementptr inbounds float* %tmp19044, i64 1
+  %tmp19046 = getelementptr inbounds float* %tmp19045, i64 1
+  %tmp19047 = getelementptr inbounds float* %tmp19046, i64 1
+  %tmp19048 = getelementptr inbounds float* %tmp19047, i64 1
+  %tmp19049 = getelementptr inbounds float* %tmp19048, i64 1
+  %tmp19050 = getelementptr inbounds float* %tmp19049, i64 1
+  %tmp19051 = getelementptr inbounds float* %tmp19050, i64 1
+  %tmp19052 = getelementptr inbounds float* %tmp19051, i64 1
+  %tmp19053 = getelementptr inbounds float* %tmp19052, i64 1
+  %tmp19054 = getelementptr inbounds float* %tmp19053, i64 1
+  %tmp19055 = getelementptr inbounds float* %tmp19054, i64 1
+  %tmp19056 = getelementptr inbounds float* %tmp19055, i64 1
+  %tmp19057 = getelementptr inbounds float* %tmp19056, i64 1
+  %tmp19058 = getelementptr inbounds float* %tmp19057, i64 1
+  %tmp19059 = getelementptr inbounds float* %tmp19058, i64 1
+  %tmp19060 = getelementptr inbounds float* %tmp19059, i64 1
+  %tmp19061 = getelementptr inbounds float* %tmp19060, i64 1
+  %tmp19062 = getelementptr inbounds float* %tmp19061, i64 1
+  %tmp19063 = getelementptr inbounds float* %tmp19062, i64 1
+  %tmp19064 = getelementptr inbounds float* %tmp19063, i64 1
+  %tmp19065 = getelementptr inbounds float* %tmp19064, i64 1
+  %tmp19066 = getelementptr inbounds float* %tmp19065, i64 1
+  %tmp19067 = getelementptr inbounds float* %tmp19066, i64 1
+  %tmp19068 = getelementptr inbounds float* %tmp19067, i64 1
+  %tmp19069 = getelementptr inbounds float* %tmp19068, i64 1
+  %tmp19070 = getelementptr inbounds float* %tmp19069, i64 1
+  %tmp19071 = getelementptr inbounds float* %tmp19070, i64 1
+  %tmp19072 = getelementptr inbounds float* %tmp19071, i64 1
+  %tmp19073 = getelementptr inbounds float* %tmp19072, i64 1
+  %tmp19074 = getelementptr inbounds float* %tmp19073, i64 1
+  %tmp19075 = getelementptr inbounds float* %tmp19074, i64 1
+  %tmp19076 = getelementptr inbounds float* %tmp19075, i64 1
+  %tmp19077 = getelementptr inbounds float* %tmp19076, i64 1
+  %tmp19078 = getelementptr inbounds float* %tmp19077, i64 1
+  %tmp19079 = getelementptr inbounds float* %tmp19078, i64 1
+  %tmp19080 = getelementptr inbounds float* %tmp19079, i64 1
+  %tmp19081 = getelementptr inbounds float* %tmp19080, i64 1
+  %tmp19082 = getelementptr inbounds float* %tmp19081, i64 1
+  %tmp19083 = getelementptr inbounds float* %tmp19082, i64 1
+  %tmp19084 = getelementptr inbounds float* %tmp19083, i64 1
+  %tmp19085 = getelementptr inbounds float* %tmp19084, i64 1
+  %tmp19086 = getelementptr inbounds float* %tmp19085, i64 1
+  %tmp19087 = getelementptr inbounds float* %tmp19086, i64 1
+  %tmp19088 = getelementptr inbounds float* %tmp19087, i64 1
+  %tmp19089 = getelementptr inbounds float* %tmp19088, i64 1
+  %tmp19090 = getelementptr inbounds float* %tmp19089, i64 1
+  %tmp19091 = getelementptr inbounds float* %tmp19090, i64 1
+  %tmp19092 = getelementptr inbounds float* %tmp19091, i64 1
+  %tmp19093 = getelementptr inbounds float* %tmp19092, i64 1
+  %tmp19094 = getelementptr inbounds float* %tmp19093, i64 1
+  %tmp19095 = getelementptr inbounds float* %tmp19094, i64 1
+  %tmp19096 = getelementptr inbounds float* %tmp19095, i64 1
+  %tmp19097 = getelementptr inbounds float* %tmp19096, i64 1
+  %tmp19098 = getelementptr inbounds float* %tmp19097, i64 1
+  %tmp19099 = getelementptr inbounds float* %tmp19098, i64 1
+  %tmp19100 = getelementptr inbounds float* %tmp19099, i64 1
+  %tmp19101 = getelementptr inbounds float* %tmp19100, i64 1
+  %tmp19102 = getelementptr inbounds float* %tmp19101, i64 1
+  %tmp19103 = getelementptr inbounds float* %tmp19102, i64 1
+  %tmp19104 = getelementptr inbounds float* %tmp19103, i64 1
+  %tmp19105 = getelementptr inbounds float* %tmp19104, i64 1
+  %tmp19106 = getelementptr inbounds float* %tmp19105, i64 1
+  %tmp19107 = getelementptr inbounds float* %tmp19106, i64 1
+  %tmp19108 = getelementptr inbounds float* %tmp19107, i64 1
+  %tmp19109 = getelementptr inbounds float* %tmp19108, i64 1
+  %tmp19110 = getelementptr inbounds float* %tmp19109, i64 1
+  %tmp19111 = getelementptr inbounds float* %tmp19110, i64 1
+  %tmp19112 = getelementptr inbounds float* %tmp19111, i64 1
+  %tmp19113 = getelementptr inbounds float* %tmp19112, i64 1
+  %tmp19114 = getelementptr inbounds float* %tmp19113, i64 1
+  %tmp19115 = getelementptr inbounds float* %tmp19114, i64 1
+  %tmp19116 = getelementptr inbounds float* %tmp19115, i64 1
+  %tmp19117 = getelementptr inbounds float* %tmp19116, i64 1
+  %tmp19118 = getelementptr inbounds float* %tmp19117, i64 1
+  %tmp19119 = getelementptr inbounds float* %tmp19118, i64 1
+  %tmp19120 = getelementptr inbounds float* %tmp19119, i64 1
+  %tmp19121 = getelementptr inbounds float* %tmp19120, i64 1
+  %tmp19122 = getelementptr inbounds float* %tmp19121, i64 1
+  %tmp19123 = getelementptr inbounds float* %tmp19122, i64 1
+  %tmp19124 = getelementptr inbounds float* %tmp19123, i64 1
+  %tmp19125 = getelementptr inbounds float* %tmp19124, i64 1
+  %tmp19126 = getelementptr inbounds float* %tmp19125, i64 1
+  %tmp19127 = getelementptr inbounds float* %tmp19126, i64 1
+  %tmp19128 = getelementptr inbounds float* %tmp19127, i64 1
+  %tmp19129 = getelementptr inbounds float* %tmp19128, i64 1
+  %tmp19130 = getelementptr inbounds float* %tmp19129, i64 1
+  %tmp19131 = getelementptr inbounds float* %tmp19130, i64 1
+  %tmp19132 = getelementptr inbounds float* %tmp19131, i64 1
+  %tmp19133 = getelementptr inbounds float* %tmp19132, i64 1
+  %tmp19134 = getelementptr inbounds float* %tmp19133, i64 1
+  %tmp19135 = getelementptr inbounds float* %tmp19134, i64 1
+  %tmp19136 = getelementptr inbounds float* %tmp19135, i64 1
+  %tmp19137 = getelementptr inbounds float* %tmp19136, i64 1
+  %tmp19138 = getelementptr inbounds float* %tmp19137, i64 1
+  %tmp19139 = getelementptr inbounds float* %tmp19138, i64 1
+  %tmp19140 = getelementptr inbounds float* %tmp19139, i64 1
+  %tmp19141 = getelementptr inbounds float* %tmp19140, i64 1
+  %tmp19142 = getelementptr inbounds float* %tmp19141, i64 1
+  %tmp19143 = getelementptr inbounds float* %tmp19142, i64 1
+  %tmp19144 = getelementptr inbounds float* %tmp19143, i64 1
+  %tmp19145 = getelementptr inbounds float* %tmp19144, i64 1
+  %tmp19146 = getelementptr inbounds float* %tmp19145, i64 1
+  %tmp19147 = getelementptr inbounds float* %tmp19146, i64 1
+  %tmp19148 = getelementptr inbounds float* %tmp19147, i64 1
+  %tmp19149 = getelementptr inbounds float* %tmp19148, i64 1
+  %tmp19150 = getelementptr inbounds float* %tmp19149, i64 1
+  %tmp19151 = getelementptr inbounds float* %tmp19150, i64 1
+  %tmp19152 = getelementptr inbounds float* %tmp19151, i64 1
+  %tmp19153 = getelementptr inbounds float* %tmp19152, i64 1
+  %tmp19154 = getelementptr inbounds float* %tmp19153, i64 1
+  %tmp19155 = getelementptr inbounds float* %tmp19154, i64 1
+  %tmp19156 = getelementptr inbounds float* %tmp19155, i64 1
+  %tmp19157 = getelementptr inbounds float* %tmp19156, i64 1
+  %tmp19158 = getelementptr inbounds float* %tmp19157, i64 1
+  %tmp19159 = getelementptr inbounds float* %tmp19158, i64 1
+  %tmp19160 = getelementptr inbounds float* %tmp19159, i64 1
+  %tmp19161 = getelementptr inbounds float* %tmp19160, i64 1
+  %tmp19162 = getelementptr inbounds float* %tmp19161, i64 1
+  %tmp19163 = getelementptr inbounds float* %tmp19162, i64 1
+  %tmp19164 = getelementptr inbounds float* %tmp19163, i64 1
+  %tmp19165 = getelementptr inbounds float* %tmp19164, i64 1
+  %tmp19166 = getelementptr inbounds float* %tmp19165, i64 1
+  %tmp19167 = getelementptr inbounds float* %tmp19166, i64 1
+  %tmp19168 = getelementptr inbounds float* %tmp19167, i64 1
+  %tmp19169 = getelementptr inbounds float* %tmp19168, i64 1
+  %tmp19170 = getelementptr inbounds float* %tmp19169, i64 1
+  %tmp19171 = getelementptr inbounds float* %tmp19170, i64 1
+  %tmp19172 = getelementptr inbounds float* %tmp19171, i64 1
+  %tmp19173 = getelementptr inbounds float* %tmp19172, i64 1
+  %tmp19174 = getelementptr inbounds float* %tmp19173, i64 1
+  %tmp19175 = getelementptr inbounds float* %tmp19174, i64 1
+  %tmp19176 = getelementptr inbounds float* %tmp19175, i64 1
+  %tmp19177 = getelementptr inbounds float* %tmp19176, i64 1
+  %tmp19178 = getelementptr inbounds float* %tmp19177, i64 1
+  %tmp19179 = getelementptr inbounds float* %tmp19178, i64 1
+  %tmp19180 = getelementptr inbounds float* %tmp19179, i64 1
+  %tmp19181 = getelementptr inbounds float* %tmp19180, i64 1
+  %tmp19182 = getelementptr inbounds float* %tmp19181, i64 1
+  %tmp19183 = getelementptr inbounds float* %tmp19182, i64 1
+  %tmp19184 = getelementptr inbounds float* %tmp19183, i64 1
+  %tmp19185 = getelementptr inbounds float* %tmp19184, i64 1
+  %tmp19186 = getelementptr inbounds float* %tmp19185, i64 1
+  %tmp19187 = getelementptr inbounds float* %tmp19186, i64 1
+  %tmp19188 = getelementptr inbounds float* %tmp19187, i64 1
+  %tmp19189 = getelementptr inbounds float* %tmp19188, i64 1
+  %tmp19190 = getelementptr inbounds float* %tmp19189, i64 1
+  %tmp19191 = getelementptr inbounds float* %tmp19190, i64 1
+  %tmp19192 = getelementptr inbounds float* %tmp19191, i64 1
+  %tmp19193 = getelementptr inbounds float* %tmp19192, i64 1
+  %tmp19194 = getelementptr inbounds float* %tmp19193, i64 1
+  %tmp19195 = getelementptr inbounds float* %tmp19194, i64 1
+  %tmp19196 = getelementptr inbounds float* %tmp19195, i64 1
+  %tmp19197 = getelementptr inbounds float* %tmp19196, i64 1
+  %tmp19198 = getelementptr inbounds float* %tmp19197, i64 1
+  %tmp19199 = getelementptr inbounds float* %tmp19198, i64 1
+  %tmp19200 = getelementptr inbounds float* %tmp19199, i64 1
+  %tmp19201 = getelementptr inbounds float* %tmp19200, i64 1
+  %tmp19202 = getelementptr inbounds float* %tmp19201, i64 1
+  %tmp19203 = getelementptr inbounds float* %tmp19202, i64 1
+  %tmp19204 = getelementptr inbounds float* %tmp19203, i64 1
+  %tmp19205 = getelementptr inbounds float* %tmp19204, i64 1
+  %tmp19206 = getelementptr inbounds float* %tmp19205, i64 1
+  %tmp19207 = getelementptr inbounds float* %tmp19206, i64 1
+  %tmp19208 = getelementptr inbounds float* %tmp19207, i64 1
+  %tmp19209 = getelementptr inbounds float* %tmp19208, i64 1
+  %tmp19210 = getelementptr inbounds float* %tmp19209, i64 1
+  %tmp19211 = getelementptr inbounds float* %tmp19210, i64 1
+  %tmp19212 = getelementptr inbounds float* %tmp19211, i64 1
+  %tmp19213 = getelementptr inbounds float* %tmp19212, i64 1
+  %tmp19214 = getelementptr inbounds float* %tmp19213, i64 1
+  %tmp19215 = getelementptr inbounds float* %tmp19214, i64 1
+  %tmp19216 = getelementptr inbounds float* %tmp19215, i64 1
+  %tmp19217 = getelementptr inbounds float* %tmp19216, i64 1
+  %tmp19218 = getelementptr inbounds float* %tmp19217, i64 1
+  %tmp19219 = getelementptr inbounds float* %tmp19218, i64 1
+  %tmp19220 = getelementptr inbounds float* %tmp19219, i64 1
+  %tmp19221 = getelementptr inbounds float* %tmp19220, i64 1
+  %tmp19222 = getelementptr inbounds float* %tmp19221, i64 1
+  %tmp19223 = getelementptr inbounds float* %tmp19222, i64 1
+  %tmp19224 = getelementptr inbounds float* %tmp19223, i64 1
+  %tmp19225 = getelementptr inbounds float* %tmp19224, i64 1
+  %tmp19226 = getelementptr inbounds float* %tmp19225, i64 1
+  %tmp19227 = getelementptr inbounds float* %tmp19226, i64 1
+  %tmp19228 = getelementptr inbounds float* %tmp19227, i64 1
+  %tmp19229 = getelementptr inbounds float* %tmp19228, i64 1
+  %tmp19230 = getelementptr inbounds float* %tmp19229, i64 1
+  %tmp19231 = getelementptr inbounds float* %tmp19230, i64 1
+  %tmp19232 = getelementptr inbounds float* %tmp19231, i64 1
+  %tmp19233 = getelementptr inbounds float* %tmp19232, i64 1
+  %tmp19234 = getelementptr inbounds float* %tmp19233, i64 1
+  %tmp19235 = getelementptr inbounds float* %tmp19234, i64 1
+  %tmp19236 = getelementptr inbounds float* %tmp19235, i64 1
+  %tmp19237 = getelementptr inbounds float* %tmp19236, i64 1
+  %tmp19238 = getelementptr inbounds float* %tmp19237, i64 1
+  %tmp19239 = getelementptr inbounds float* %tmp19238, i64 1
+  %tmp19240 = getelementptr inbounds float* %tmp19239, i64 1
+  %tmp19241 = getelementptr inbounds float* %tmp19240, i64 1
+  %tmp19242 = getelementptr inbounds float* %tmp19241, i64 1
+  %tmp19243 = getelementptr inbounds float* %tmp19242, i64 1
+  %tmp19244 = getelementptr inbounds float* %tmp19243, i64 1
+  %tmp19245 = getelementptr inbounds float* %tmp19244, i64 1
+  %tmp19246 = getelementptr inbounds float* %tmp19245, i64 1
+  %tmp19247 = getelementptr inbounds float* %tmp19246, i64 1
+  %tmp19248 = getelementptr inbounds float* %tmp19247, i64 1
+  %tmp19249 = getelementptr inbounds float* %tmp19248, i64 1
+  %tmp19250 = getelementptr inbounds float* %tmp19249, i64 1
+  %tmp19251 = getelementptr inbounds float* %tmp19250, i64 1
+  %tmp19252 = getelementptr inbounds float* %tmp19251, i64 1
+  %tmp19253 = getelementptr inbounds float* %tmp19252, i64 1
+  %tmp19254 = getelementptr inbounds float* %tmp19253, i64 1
+  %tmp19255 = getelementptr inbounds float* %tmp19254, i64 1
+  %tmp19256 = getelementptr inbounds float* %tmp19255, i64 1
+  %tmp19257 = getelementptr inbounds float* %tmp19256, i64 1
+  %tmp19258 = getelementptr inbounds float* %tmp19257, i64 1
+  %tmp19259 = getelementptr inbounds float* %tmp19258, i64 1
+  %tmp19260 = getelementptr inbounds float* %tmp19259, i64 1
+  %tmp19261 = getelementptr inbounds float* %tmp19260, i64 1
+  %tmp19262 = getelementptr inbounds float* %tmp19261, i64 1
+  %tmp19263 = getelementptr inbounds float* %tmp19262, i64 1
+  %tmp19264 = getelementptr inbounds float* %tmp19263, i64 1
+  %tmp19265 = getelementptr inbounds float* %tmp19264, i64 1
+  %tmp19266 = getelementptr inbounds float* %tmp19265, i64 1
+  %tmp19267 = getelementptr inbounds float* %tmp19266, i64 1
+  %tmp19268 = getelementptr inbounds float* %tmp19267, i64 1
+  %tmp19269 = getelementptr inbounds float* %tmp19268, i64 1
+  %tmp19270 = getelementptr inbounds float* %tmp19269, i64 1
+  %tmp19271 = getelementptr inbounds float* %tmp19270, i64 1
+  %tmp19272 = getelementptr inbounds float* %tmp19271, i64 1
+  %tmp19273 = getelementptr inbounds float* %tmp19272, i64 1
+  %tmp19274 = getelementptr inbounds float* %tmp19273, i64 1
+  %tmp19275 = getelementptr inbounds float* %tmp19274, i64 1
+  %tmp19276 = getelementptr inbounds float* %tmp19275, i64 1
+  %tmp19277 = getelementptr inbounds float* %tmp19276, i64 1
+  %tmp19278 = getelementptr inbounds float* %tmp19277, i64 1
+  %tmp19279 = getelementptr inbounds float* %tmp19278, i64 1
+  %tmp19280 = getelementptr inbounds float* %tmp19279, i64 1
+  %tmp19281 = getelementptr inbounds float* %tmp19280, i64 1
+  %tmp19282 = getelementptr inbounds float* %tmp19281, i64 1
+  %tmp19283 = getelementptr inbounds float* %tmp19282, i64 1
+  %tmp19284 = getelementptr inbounds float* %tmp19283, i64 1
+  %tmp19285 = getelementptr inbounds float* %tmp19284, i64 1
+  %tmp19286 = getelementptr inbounds float* %tmp19285, i64 1
+  %tmp19287 = getelementptr inbounds float* %tmp19286, i64 1
+  %tmp19288 = getelementptr inbounds float* %tmp19287, i64 1
+  %tmp19289 = getelementptr inbounds float* %tmp19288, i64 1
+  %tmp19290 = getelementptr inbounds float* %tmp19289, i64 1
+  %tmp19291 = getelementptr inbounds float* %tmp19290, i64 1
+  %tmp19292 = getelementptr inbounds float* %tmp19291, i64 1
+  %tmp19293 = getelementptr inbounds float* %tmp19292, i64 1
+  %tmp19294 = getelementptr inbounds float* %tmp19293, i64 1
+  %tmp19295 = getelementptr inbounds float* %tmp19294, i64 1
+  %tmp19296 = getelementptr inbounds float* %tmp19295, i64 1
+  %tmp19297 = getelementptr inbounds float* %tmp19296, i64 1
+  %tmp19298 = getelementptr inbounds float* %tmp19297, i64 1
+  %tmp19299 = getelementptr inbounds float* %tmp19298, i64 1
+  %tmp19300 = getelementptr inbounds float* %tmp19299, i64 1
+  %tmp19301 = getelementptr inbounds float* %tmp19300, i64 1
+  %tmp19302 = getelementptr inbounds float* %tmp19301, i64 1
+  %tmp19303 = getelementptr inbounds float* %tmp19302, i64 1
+  %tmp19304 = getelementptr inbounds float* %tmp19303, i64 1
+  %tmp19305 = getelementptr inbounds float* %tmp19304, i64 1
+  %tmp19306 = getelementptr inbounds float* %tmp19305, i64 1
+  %tmp19307 = getelementptr inbounds float* %tmp19306, i64 1
+  %tmp19308 = getelementptr inbounds float* %tmp19307, i64 1
+  %tmp19309 = getelementptr inbounds float* %tmp19308, i64 1
+  %tmp19310 = getelementptr inbounds float* %tmp19309, i64 1
+  %tmp19311 = getelementptr inbounds float* %tmp19310, i64 1
+  %tmp19312 = getelementptr inbounds float* %tmp19311, i64 1
+  %tmp19313 = getelementptr inbounds float* %tmp19312, i64 1
+  %tmp19314 = getelementptr inbounds float* %tmp19313, i64 1
+  %tmp19315 = getelementptr inbounds float* %tmp19314, i64 1
+  %tmp19316 = getelementptr inbounds float* %tmp19315, i64 1
+  %tmp19317 = getelementptr inbounds float* %tmp19316, i64 1
+  %tmp19318 = getelementptr inbounds float* %tmp19317, i64 1
+  %tmp19319 = getelementptr inbounds float* %tmp19318, i64 1
+  %tmp19320 = getelementptr inbounds float* %tmp19319, i64 1
+  %tmp19321 = getelementptr inbounds float* %tmp19320, i64 1
+  %tmp19322 = getelementptr inbounds float* %tmp19321, i64 1
+  %tmp19323 = getelementptr inbounds float* %tmp19322, i64 1
+  %tmp19324 = getelementptr inbounds float* %tmp19323, i64 1
+  %tmp19325 = getelementptr inbounds float* %tmp19324, i64 1
+  %tmp19326 = getelementptr inbounds float* %tmp19325, i64 1
+  %tmp19327 = getelementptr inbounds float* %tmp19326, i64 1
+  %tmp19328 = getelementptr inbounds float* %tmp19327, i64 1
+  %tmp19329 = getelementptr inbounds float* %tmp19328, i64 1
+  %tmp19330 = getelementptr inbounds float* %tmp19329, i64 1
+  %tmp19331 = getelementptr inbounds float* %tmp19330, i64 1
+  %tmp19332 = getelementptr inbounds float* %tmp19331, i64 1
+  %tmp19333 = getelementptr inbounds float* %tmp19332, i64 1
+  %tmp19334 = getelementptr inbounds float* %tmp19333, i64 1
+  %tmp19335 = getelementptr inbounds float* %tmp19334, i64 1
+  %tmp19336 = getelementptr inbounds float* %tmp19335, i64 1
+  %tmp19337 = getelementptr inbounds float* %tmp19336, i64 1
+  %tmp19338 = getelementptr inbounds float* %tmp19337, i64 1
+  %tmp19339 = getelementptr inbounds float* %tmp19338, i64 1
+  %tmp19340 = getelementptr inbounds float* %tmp19339, i64 1
+  %tmp19341 = getelementptr inbounds float* %tmp19340, i64 1
+  %tmp19342 = getelementptr inbounds float* %tmp19341, i64 1
+  %tmp19343 = getelementptr inbounds float* %tmp19342, i64 1
+  %tmp19344 = getelementptr inbounds float* %tmp19343, i64 1
+  %tmp19345 = getelementptr inbounds float* %tmp19344, i64 1
+  %tmp19346 = getelementptr inbounds float* %tmp19345, i64 1
+  %tmp19347 = getelementptr inbounds float* %tmp19346, i64 1
+  %tmp19348 = getelementptr inbounds float* %tmp19347, i64 1
+  %tmp19349 = getelementptr inbounds float* %tmp19348, i64 1
+  %tmp19350 = getelementptr inbounds float* %tmp19349, i64 1
+  %tmp19351 = getelementptr inbounds float* %tmp19350, i64 1
+  %tmp19352 = getelementptr inbounds float* %tmp19351, i64 1
+  %tmp19353 = getelementptr inbounds float* %tmp19352, i64 1
+  %tmp19354 = getelementptr inbounds float* %tmp19353, i64 1
+  %tmp19355 = getelementptr inbounds float* %tmp19354, i64 1
+  %tmp19356 = getelementptr inbounds float* %tmp19355, i64 1
+  %tmp19357 = getelementptr inbounds float* %tmp19356, i64 1
+  %tmp19358 = getelementptr inbounds float* %tmp19357, i64 1
+  %tmp19359 = getelementptr inbounds float* %tmp19358, i64 1
+  %tmp19360 = getelementptr inbounds float* %tmp19359, i64 1
+  %tmp19361 = getelementptr inbounds float* %tmp19360, i64 1
+  %tmp19362 = getelementptr inbounds float* %tmp19361, i64 1
+  %tmp19363 = getelementptr inbounds float* %tmp19362, i64 1
+  %tmp19364 = getelementptr inbounds float* %tmp19363, i64 1
+  %tmp19365 = getelementptr inbounds float* %tmp19364, i64 1
+  %tmp19366 = getelementptr inbounds float* %tmp19365, i64 1
+  %tmp19367 = getelementptr inbounds float* %tmp19366, i64 1
+  %tmp19368 = getelementptr inbounds float* %tmp19367, i64 1
+  %tmp19369 = getelementptr inbounds float* %tmp19368, i64 1
+  %tmp19370 = getelementptr inbounds float* %tmp19369, i64 1
+  %tmp19371 = getelementptr inbounds float* %tmp19370, i64 1
+  %tmp19372 = getelementptr inbounds float* %tmp19371, i64 1
+  %tmp19373 = getelementptr inbounds float* %tmp19372, i64 1
+  %tmp19374 = getelementptr inbounds float* %tmp19373, i64 1
+  %tmp19375 = getelementptr inbounds float* %tmp19374, i64 1
+  %tmp19376 = getelementptr inbounds float* %tmp19375, i64 1
+  %tmp19377 = getelementptr inbounds float* %tmp19376, i64 1
+  %tmp19378 = getelementptr inbounds float* %tmp19377, i64 1
+  %tmp19379 = getelementptr inbounds float* %tmp19378, i64 1
+  %tmp19380 = getelementptr inbounds float* %tmp19379, i64 1
+  %tmp19381 = getelementptr inbounds float* %tmp19380, i64 1
+  %tmp19382 = getelementptr inbounds float* %tmp19381, i64 1
+  %tmp19383 = getelementptr inbounds float* %tmp19382, i64 1
+  %tmp19384 = getelementptr inbounds float* %tmp19383, i64 1
+  %tmp19385 = getelementptr inbounds float* %tmp19384, i64 1
+  %tmp19386 = getelementptr inbounds float* %tmp19385, i64 1
+  %tmp19387 = getelementptr inbounds float* %tmp19386, i64 1
+  %tmp19388 = getelementptr inbounds float* %tmp19387, i64 1
+  %tmp19389 = getelementptr inbounds float* %tmp19388, i64 1
+  %tmp19390 = getelementptr inbounds float* %tmp19389, i64 1
+  %tmp19391 = getelementptr inbounds float* %tmp19390, i64 1
+  %tmp19392 = getelementptr inbounds float* %tmp19391, i64 1
+  %tmp19393 = getelementptr inbounds float* %tmp19392, i64 1
+  %tmp19394 = getelementptr inbounds float* %tmp19393, i64 1
+  %tmp19395 = getelementptr inbounds float* %tmp19394, i64 1
+  %tmp19396 = getelementptr inbounds float* %tmp19395, i64 1
+  %tmp19397 = getelementptr inbounds float* %tmp19396, i64 1
+  %tmp19398 = getelementptr inbounds float* %tmp19397, i64 1
+  %tmp19399 = getelementptr inbounds float* %tmp19398, i64 1
+  %tmp19400 = getelementptr inbounds float* %tmp19399, i64 1
+  %tmp19401 = getelementptr inbounds float* %tmp19400, i64 1
+  %tmp19402 = getelementptr inbounds float* %tmp19401, i64 1
+  %tmp19403 = getelementptr inbounds float* %tmp19402, i64 1
+  %tmp19404 = getelementptr inbounds float* %tmp19403, i64 1
+  %tmp19405 = getelementptr inbounds float* %tmp19404, i64 1
+  %tmp19406 = getelementptr inbounds float* %tmp19405, i64 1
+  %tmp19407 = getelementptr inbounds float* %tmp19406, i64 1
+  %tmp19408 = getelementptr inbounds float* %tmp19407, i64 1
+  %tmp19409 = getelementptr inbounds float* %tmp19408, i64 1
+  %tmp19410 = getelementptr inbounds float* %tmp19409, i64 1
+  %tmp19411 = getelementptr inbounds float* %tmp19410, i64 1
+  %tmp19412 = getelementptr inbounds float* %tmp19411, i64 1
+  %tmp19413 = getelementptr inbounds float* %tmp19412, i64 1
+  %tmp19414 = getelementptr inbounds float* %tmp19413, i64 1
+  %tmp19415 = getelementptr inbounds float* %tmp19414, i64 1
+  %tmp19416 = getelementptr inbounds float* %tmp19415, i64 1
+  %tmp19417 = getelementptr inbounds float* %tmp19416, i64 1
+  %tmp19418 = getelementptr inbounds float* %tmp19417, i64 1
+  %tmp19419 = getelementptr inbounds float* %tmp19418, i64 1
+  %tmp19420 = getelementptr inbounds float* %tmp19419, i64 1
+  %tmp19421 = getelementptr inbounds float* %tmp19420, i64 1
+  %tmp19422 = getelementptr inbounds float* %tmp19421, i64 1
+  %tmp19423 = getelementptr inbounds float* %tmp19422, i64 1
+  %tmp19424 = getelementptr inbounds float* %tmp19423, i64 1
+  %tmp19425 = getelementptr inbounds float* %tmp19424, i64 1
+  %tmp19426 = getelementptr inbounds float* %tmp19425, i64 1
+  %tmp19427 = getelementptr inbounds float* %tmp19426, i64 1
+  %tmp19428 = getelementptr inbounds float* %tmp19427, i64 1
+  %tmp19429 = getelementptr inbounds float* %tmp19428, i64 1
+  %tmp19430 = getelementptr inbounds float* %tmp19429, i64 1
+  %tmp19431 = getelementptr inbounds float* %tmp19430, i64 1
+  %tmp19432 = getelementptr inbounds float* %tmp19431, i64 1
+  %tmp19433 = getelementptr inbounds float* %tmp19432, i64 1
+  %tmp19434 = getelementptr inbounds float* %tmp19433, i64 1
+  %tmp19435 = getelementptr inbounds float* %tmp19434, i64 1
+  %tmp19436 = getelementptr inbounds float* %tmp19435, i64 1
+  %tmp19437 = getelementptr inbounds float* %tmp19436, i64 1
+  %tmp19438 = getelementptr inbounds float* %tmp19437, i64 1
+  %tmp19439 = getelementptr inbounds float* %tmp19438, i64 1
+  %tmp19440 = getelementptr inbounds float* %tmp19439, i64 1
+  %tmp19441 = getelementptr inbounds float* %tmp19440, i64 1
+  %tmp19442 = getelementptr inbounds float* %tmp19441, i64 1
+  %tmp19443 = getelementptr inbounds float* %tmp19442, i64 1
+  %tmp19444 = getelementptr inbounds float* %tmp19443, i64 1
+  %tmp19445 = getelementptr inbounds float* %tmp19444, i64 1
+  %tmp19446 = getelementptr inbounds float* %tmp19445, i64 1
+  %tmp19447 = getelementptr inbounds float* %tmp19446, i64 1
+  %tmp19448 = getelementptr inbounds float* %tmp19447, i64 1
+  %tmp19449 = getelementptr inbounds float* %tmp19448, i64 1
+  %tmp19450 = getelementptr inbounds float* %tmp19449, i64 1
+  %tmp19451 = getelementptr inbounds float* %tmp19450, i64 1
+  %tmp19452 = getelementptr inbounds float* %tmp19451, i64 1
+  %tmp19453 = getelementptr inbounds float* %tmp19452, i64 1
+  %tmp19454 = getelementptr inbounds float* %tmp19453, i64 1
+  %tmp19455 = getelementptr inbounds float* %tmp19454, i64 1
+  %tmp19456 = getelementptr inbounds float* %tmp19455, i64 1
+  %tmp19457 = getelementptr inbounds float* %tmp19456, i64 1
+  %tmp19458 = getelementptr inbounds float* %tmp19457, i64 1
+  %tmp19459 = getelementptr inbounds float* %tmp19458, i64 1
+  %tmp19460 = getelementptr inbounds float* %tmp19459, i64 1
+  %tmp19461 = getelementptr inbounds float* %tmp19460, i64 1
+  %tmp19462 = getelementptr inbounds float* %tmp19461, i64 1
+  %tmp19463 = getelementptr inbounds float* %tmp19462, i64 1
+  %tmp19464 = getelementptr inbounds float* %tmp19463, i64 1
+  %tmp19465 = getelementptr inbounds float* %tmp19464, i64 1
+  %tmp19466 = getelementptr inbounds float* %tmp19465, i64 1
+  %tmp19467 = getelementptr inbounds float* %tmp19466, i64 1
+  %tmp19468 = getelementptr inbounds float* %tmp19467, i64 1
+  %tmp19469 = getelementptr inbounds float* %tmp19468, i64 1
+  %tmp19470 = getelementptr inbounds float* %tmp19469, i64 1
+  %tmp19471 = getelementptr inbounds float* %tmp19470, i64 1
+  %tmp19472 = getelementptr inbounds float* %tmp19471, i64 1
+  %tmp19473 = getelementptr inbounds float* %tmp19472, i64 1
+  %tmp19474 = getelementptr inbounds float* %tmp19473, i64 1
+  %tmp19475 = getelementptr inbounds float* %tmp19474, i64 1
+  %tmp19476 = getelementptr inbounds float* %tmp19475, i64 1
+  %tmp19477 = getelementptr inbounds float* %tmp19476, i64 1
+  %tmp19478 = getelementptr inbounds float* %tmp19477, i64 1
+  %tmp19479 = getelementptr inbounds float* %tmp19478, i64 1
+  %tmp19480 = getelementptr inbounds float* %tmp19479, i64 1
+  %tmp19481 = getelementptr inbounds float* %tmp19480, i64 1
+  %tmp19482 = getelementptr inbounds float* %tmp19481, i64 1
+  %tmp19483 = getelementptr inbounds float* %tmp19482, i64 1
+  %tmp19484 = getelementptr inbounds float* %tmp19483, i64 1
+  %tmp19485 = getelementptr inbounds float* %tmp19484, i64 1
+  %tmp19486 = getelementptr inbounds float* %tmp19485, i64 1
+  %tmp19487 = getelementptr inbounds float* %tmp19486, i64 1
+  %tmp19488 = getelementptr inbounds float* %tmp19487, i64 1
+  %tmp19489 = getelementptr inbounds float* %tmp19488, i64 1
+  %tmp19490 = getelementptr inbounds float* %tmp19489, i64 1
+  %tmp19491 = getelementptr inbounds float* %tmp19490, i64 1
+  %tmp19492 = getelementptr inbounds float* %tmp19491, i64 1
+  %tmp19493 = getelementptr inbounds float* %tmp19492, i64 1
+  %tmp19494 = getelementptr inbounds float* %tmp19493, i64 1
+  %tmp19495 = getelementptr inbounds float* %tmp19494, i64 1
+  %tmp19496 = getelementptr inbounds float* %tmp19495, i64 1
+  %tmp19497 = getelementptr inbounds float* %tmp19496, i64 1
+  %tmp19498 = getelementptr inbounds float* %tmp19497, i64 1
+  %tmp19499 = getelementptr inbounds float* %tmp19498, i64 1
+  %tmp19500 = getelementptr inbounds float* %tmp19499, i64 1
+  %tmp19501 = getelementptr inbounds float* %tmp19500, i64 1
+  %tmp19502 = getelementptr inbounds float* %tmp19501, i64 1
+  %tmp19503 = getelementptr inbounds float* %tmp19502, i64 1
+  %tmp19504 = getelementptr inbounds float* %tmp19503, i64 1
+  %tmp19505 = getelementptr inbounds float* %tmp19504, i64 1
+  %tmp19506 = getelementptr inbounds float* %tmp19505, i64 1
+  %tmp19507 = getelementptr inbounds float* %tmp19506, i64 1
+  %tmp19508 = getelementptr inbounds float* %tmp19507, i64 1
+  %tmp19509 = getelementptr inbounds float* %tmp19508, i64 1
+  %tmp19510 = getelementptr inbounds float* %tmp19509, i64 1
+  %tmp19511 = getelementptr inbounds float* %tmp19510, i64 1
+  %tmp19512 = getelementptr inbounds float* %tmp19511, i64 1
+  %tmp19513 = getelementptr inbounds float* %tmp19512, i64 1
+  %tmp19514 = getelementptr inbounds float* %tmp19513, i64 1
+  %tmp19515 = getelementptr inbounds float* %tmp19514, i64 1
+  %tmp19516 = getelementptr inbounds float* %tmp19515, i64 1
+  %tmp19517 = getelementptr inbounds float* %tmp19516, i64 1
+  %tmp19518 = getelementptr inbounds float* %tmp19517, i64 1
+  %tmp19519 = getelementptr inbounds float* %tmp19518, i64 1
+  %tmp19520 = getelementptr inbounds float* %tmp19519, i64 1
+  %tmp19521 = getelementptr inbounds float* %tmp19520, i64 1
+  %tmp19522 = getelementptr inbounds float* %tmp19521, i64 1
+  %tmp19523 = getelementptr inbounds float* %tmp19522, i64 1
+  %tmp19524 = getelementptr inbounds float* %tmp19523, i64 1
+  %tmp19525 = getelementptr inbounds float* %tmp19524, i64 1
+  %tmp19526 = getelementptr inbounds float* %tmp19525, i64 1
+  %tmp19527 = getelementptr inbounds float* %tmp19526, i64 1
+  %tmp19528 = getelementptr inbounds float* %tmp19527, i64 1
+  %tmp19529 = getelementptr inbounds float* %tmp19528, i64 1
+  %tmp19530 = getelementptr inbounds float* %tmp19529, i64 1
+  %tmp19531 = getelementptr inbounds float* %tmp19530, i64 1
+  %tmp19532 = getelementptr inbounds float* %tmp19531, i64 1
+  %tmp19533 = getelementptr inbounds float* %tmp19532, i64 1
+  %tmp19534 = getelementptr inbounds float* %tmp19533, i64 1
+  %tmp19535 = getelementptr inbounds float* %tmp19534, i64 1
+  %tmp19536 = getelementptr inbounds float* %tmp19535, i64 1
+  %tmp19537 = getelementptr inbounds float* %tmp19536, i64 1
+  %tmp19538 = getelementptr inbounds float* %tmp19537, i64 1
+  %tmp19539 = getelementptr inbounds float* %tmp19538, i64 1
+  %tmp19540 = getelementptr inbounds float* %tmp19539, i64 1
+  %tmp19541 = getelementptr inbounds float* %tmp19540, i64 1
+  %tmp19542 = getelementptr inbounds float* %tmp19541, i64 1
+  %tmp19543 = getelementptr inbounds float* %tmp19542, i64 1
+  %tmp19544 = getelementptr inbounds float* %tmp19543, i64 1
+  %tmp19545 = getelementptr inbounds float* %tmp19544, i64 1
+  %tmp19546 = getelementptr inbounds float* %tmp19545, i64 1
+  %tmp19547 = getelementptr inbounds float* %tmp19546, i64 1
+  %tmp19548 = getelementptr inbounds float* %tmp19547, i64 1
+  %tmp19549 = getelementptr inbounds float* %tmp19548, i64 1
+  %tmp19550 = getelementptr inbounds float* %tmp19549, i64 1
+  %tmp19551 = getelementptr inbounds float* %tmp19550, i64 1
+  %tmp19552 = getelementptr inbounds float* %tmp19551, i64 1
+  %tmp19553 = getelementptr inbounds float* %tmp19552, i64 1
+  %tmp19554 = getelementptr inbounds float* %tmp19553, i64 1
+  %tmp19555 = getelementptr inbounds float* %tmp19554, i64 1
+  %tmp19556 = getelementptr inbounds float* %tmp19555, i64 1
+  %tmp19557 = getelementptr inbounds float* %tmp19556, i64 1
+  %tmp19558 = getelementptr inbounds float* %tmp19557, i64 1
+  %tmp19559 = getelementptr inbounds float* %tmp19558, i64 1
+  %tmp19560 = getelementptr inbounds float* %tmp19559, i64 1
+  %tmp19561 = getelementptr inbounds float* %tmp19560, i64 1
+  %tmp19562 = getelementptr inbounds float* %tmp19561, i64 1
+  %tmp19563 = getelementptr inbounds float* %tmp19562, i64 1
+  %tmp19564 = getelementptr inbounds float* %tmp19563, i64 1
+  %tmp19565 = getelementptr inbounds float* %tmp19564, i64 1
+  %tmp19566 = getelementptr inbounds float* %tmp19565, i64 1
+  %tmp19567 = getelementptr inbounds float* %tmp19566, i64 1
+  %tmp19568 = getelementptr inbounds float* %tmp19567, i64 1
+  %tmp19569 = getelementptr inbounds float* %tmp19568, i64 1
+  %tmp19570 = getelementptr inbounds float* %tmp19569, i64 1
+  %tmp19571 = getelementptr inbounds float* %tmp19570, i64 1
+  %tmp19572 = getelementptr inbounds float* %tmp19571, i64 1
+  %tmp19573 = getelementptr inbounds float* %tmp19572, i64 1
+  %tmp19574 = getelementptr inbounds float* %tmp19573, i64 1
+  %tmp19575 = getelementptr inbounds float* %tmp19574, i64 1
+  %tmp19576 = getelementptr inbounds float* %tmp19575, i64 1
+  %tmp19577 = getelementptr inbounds float* %tmp19576, i64 1
+  %tmp19578 = getelementptr inbounds float* %tmp19577, i64 1
+  %tmp19579 = getelementptr inbounds float* %tmp19578, i64 1
+  %tmp19580 = getelementptr inbounds float* %tmp19579, i64 1
+  %tmp19581 = getelementptr inbounds float* %tmp19580, i64 1
+  %tmp19582 = getelementptr inbounds float* %tmp19581, i64 1
+  %tmp19583 = getelementptr inbounds float* %tmp19582, i64 1
+  %tmp19584 = getelementptr inbounds float* %tmp19583, i64 1
+  %tmp19585 = getelementptr inbounds float* %tmp19584, i64 1
+  %tmp19586 = getelementptr inbounds float* %tmp19585, i64 1
+  %tmp19587 = getelementptr inbounds float* %tmp19586, i64 1
+  %tmp19588 = getelementptr inbounds float* %tmp19587, i64 1
+  %tmp19589 = getelementptr inbounds float* %tmp19588, i64 1
+  %tmp19590 = getelementptr inbounds float* %tmp19589, i64 1
+  %tmp19591 = getelementptr inbounds float* %tmp19590, i64 1
+  %tmp19592 = getelementptr inbounds float* %tmp19591, i64 1
+  %tmp19593 = getelementptr inbounds float* %tmp19592, i64 1
+  %tmp19594 = getelementptr inbounds float* %tmp19593, i64 1
+  %tmp19595 = getelementptr inbounds float* %tmp19594, i64 1
+  %tmp19596 = getelementptr inbounds float* %tmp19595, i64 1
+  %tmp19597 = getelementptr inbounds float* %tmp19596, i64 1
+  %tmp19598 = getelementptr inbounds float* %tmp19597, i64 1
+  %tmp19599 = getelementptr inbounds float* %tmp19598, i64 1
+  %tmp19600 = getelementptr inbounds float* %tmp19599, i64 1
+  %tmp19601 = getelementptr inbounds float* %tmp19600, i64 1
+  %tmp19602 = getelementptr inbounds float* %tmp19601, i64 1
+  %tmp19603 = getelementptr inbounds float* %tmp19602, i64 1
+  %tmp19604 = getelementptr inbounds float* %tmp19603, i64 1
+  %tmp19605 = getelementptr inbounds float* %tmp19604, i64 1
+  %tmp19606 = getelementptr inbounds float* %tmp19605, i64 1
+  %tmp19607 = getelementptr inbounds float* %tmp19606, i64 1
+  %tmp19608 = getelementptr inbounds float* %tmp19607, i64 1
+  %tmp19609 = getelementptr inbounds float* %tmp19608, i64 1
+  %tmp19610 = getelementptr inbounds float* %tmp19609, i64 1
+  %tmp19611 = getelementptr inbounds float* %tmp19610, i64 1
+  %tmp19612 = getelementptr inbounds float* %tmp19611, i64 1
+  %tmp19613 = getelementptr inbounds float* %tmp19612, i64 1
+  %tmp19614 = getelementptr inbounds float* %tmp19613, i64 1
+  %tmp19615 = getelementptr inbounds float* %tmp19614, i64 1
+  %tmp19616 = getelementptr inbounds float* %tmp19615, i64 1
+  %tmp19617 = getelementptr inbounds float* %tmp19616, i64 1
+  %tmp19618 = getelementptr inbounds float* %tmp19617, i64 1
+  %tmp19619 = getelementptr inbounds float* %tmp19618, i64 1
+  %tmp19620 = getelementptr inbounds float* %tmp19619, i64 1
+  %tmp19621 = getelementptr inbounds float* %tmp19620, i64 1
+  %tmp19622 = getelementptr inbounds float* %tmp19621, i64 1
+  %tmp19623 = getelementptr inbounds float* %tmp19622, i64 1
+  %tmp19624 = getelementptr inbounds float* %tmp19623, i64 1
+  %tmp19625 = getelementptr inbounds float* %tmp19624, i64 1
+  %tmp19626 = getelementptr inbounds float* %tmp19625, i64 1
+  %tmp19627 = getelementptr inbounds float* %tmp19626, i64 1
+  %tmp19628 = getelementptr inbounds float* %tmp19627, i64 1
+  %tmp19629 = getelementptr inbounds float* %tmp19628, i64 1
+  %tmp19630 = getelementptr inbounds float* %tmp19629, i64 1
+  %tmp19631 = getelementptr inbounds float* %tmp19630, i64 1
+  %tmp19632 = getelementptr inbounds float* %tmp19631, i64 1
+  %tmp19633 = getelementptr inbounds float* %tmp19632, i64 1
+  %tmp19634 = getelementptr inbounds float* %tmp19633, i64 1
+  %tmp19635 = getelementptr inbounds float* %tmp19634, i64 1
+  %tmp19636 = getelementptr inbounds float* %tmp19635, i64 1
+  %tmp19637 = getelementptr inbounds float* %tmp19636, i64 1
+  %tmp19638 = getelementptr inbounds float* %tmp19637, i64 1
+  %tmp19639 = getelementptr inbounds float* %tmp19638, i64 1
+  %tmp19640 = getelementptr inbounds float* %tmp19639, i64 1
+  %tmp19641 = getelementptr inbounds float* %tmp19640, i64 1
+  %tmp19642 = getelementptr inbounds float* %tmp19641, i64 1
+  %tmp19643 = getelementptr inbounds float* %tmp19642, i64 1
+  %tmp19644 = getelementptr inbounds float* %tmp19643, i64 1
+  %tmp19645 = getelementptr inbounds float* %tmp19644, i64 1
+  %tmp19646 = getelementptr inbounds float* %tmp19645, i64 1
+  %tmp19647 = getelementptr inbounds float* %tmp19646, i64 1
+  %tmp19648 = getelementptr inbounds float* %tmp19647, i64 1
+  %tmp19649 = getelementptr inbounds float* %tmp19648, i64 1
+  %tmp19650 = getelementptr inbounds float* %tmp19649, i64 1
+  %tmp19651 = getelementptr inbounds float* %tmp19650, i64 1
+  %tmp19652 = getelementptr inbounds float* %tmp19651, i64 1
+  %tmp19653 = getelementptr inbounds float* %tmp19652, i64 1
+  %tmp19654 = getelementptr inbounds float* %tmp19653, i64 1
+  %tmp19655 = getelementptr inbounds float* %tmp19654, i64 1
+  %tmp19656 = getelementptr inbounds float* %tmp19655, i64 1
+  %tmp19657 = getelementptr inbounds float* %tmp19656, i64 1
+  %tmp19658 = getelementptr inbounds float* %tmp19657, i64 1
+  %tmp19659 = getelementptr inbounds float* %tmp19658, i64 1
+  %tmp19660 = getelementptr inbounds float* %tmp19659, i64 1
+  %tmp19661 = getelementptr inbounds float* %tmp19660, i64 1
+  %tmp19662 = getelementptr inbounds float* %tmp19661, i64 1
+  %tmp19663 = getelementptr inbounds float* %tmp19662, i64 1
+  %tmp19664 = getelementptr inbounds float* %tmp19663, i64 1
+  %tmp19665 = getelementptr inbounds float* %tmp19664, i64 1
+  %tmp19666 = getelementptr inbounds float* %tmp19665, i64 1
+  %tmp19667 = getelementptr inbounds float* %tmp19666, i64 1
+  %tmp19668 = getelementptr inbounds float* %tmp19667, i64 1
+  %tmp19669 = getelementptr inbounds float* %tmp19668, i64 1
+  %tmp19670 = getelementptr inbounds float* %tmp19669, i64 1
+  %tmp19671 = getelementptr inbounds float* %tmp19670, i64 1
+  %tmp19672 = getelementptr inbounds float* %tmp19671, i64 1
+  %tmp19673 = getelementptr inbounds float* %tmp19672, i64 1
+  %tmp19674 = getelementptr inbounds float* %tmp19673, i64 1
+  %tmp19675 = getelementptr inbounds float* %tmp19674, i64 1
+  %tmp19676 = getelementptr inbounds float* %tmp19675, i64 1
+  %tmp19677 = getelementptr inbounds float* %tmp19676, i64 1
+  %tmp19678 = getelementptr inbounds float* %tmp19677, i64 1
+  %tmp19679 = getelementptr inbounds float* %tmp19678, i64 1
+  %tmp19680 = getelementptr inbounds float* %tmp19679, i64 1
+  %tmp19681 = getelementptr inbounds float* %tmp19680, i64 1
+  %tmp19682 = getelementptr inbounds float* %tmp19681, i64 1
+  %tmp19683 = getelementptr inbounds float* %tmp19682, i64 1
+  %tmp19684 = getelementptr inbounds float* %tmp19683, i64 1
+  %tmp19685 = getelementptr inbounds float* %tmp19684, i64 1
+  %tmp19686 = getelementptr inbounds float* %tmp19685, i64 1
+  %tmp19687 = getelementptr inbounds float* %tmp19686, i64 1
+  %tmp19688 = getelementptr inbounds float* %tmp19687, i64 1
+  %tmp19689 = getelementptr inbounds float* %tmp19688, i64 1
+  %tmp19690 = getelementptr inbounds float* %tmp19689, i64 1
+  %tmp19691 = getelementptr inbounds float* %tmp19690, i64 1
+  %tmp19692 = getelementptr inbounds float* %tmp19691, i64 1
+  %tmp19693 = getelementptr inbounds float* %tmp19692, i64 1
+  %tmp19694 = getelementptr inbounds float* %tmp19693, i64 1
+  %tmp19695 = getelementptr inbounds float* %tmp19694, i64 1
+  %tmp19696 = getelementptr inbounds float* %tmp19695, i64 1
+  %tmp19697 = getelementptr inbounds float* %tmp19696, i64 1
+  %tmp19698 = getelementptr inbounds float* %tmp19697, i64 1
+  %tmp19699 = getelementptr inbounds float* %tmp19698, i64 1
+  %tmp19700 = getelementptr inbounds float* %tmp19699, i64 1
+  %tmp19701 = getelementptr inbounds float* %tmp19700, i64 1
+  %tmp19702 = getelementptr inbounds float* %tmp19701, i64 1
+  %tmp19703 = getelementptr inbounds float* %tmp19702, i64 1
+  %tmp19704 = getelementptr inbounds float* %tmp19703, i64 1
+  %tmp19705 = getelementptr inbounds float* %tmp19704, i64 1
+  %tmp19706 = getelementptr inbounds float* %tmp19705, i64 1
+  %tmp19707 = getelementptr inbounds float* %tmp19706, i64 1
+  %tmp19708 = getelementptr inbounds float* %tmp19707, i64 1
+  %tmp19709 = getelementptr inbounds float* %tmp19708, i64 1
+  %tmp19710 = getelementptr inbounds float* %tmp19709, i64 1
+  %tmp19711 = getelementptr inbounds float* %tmp19710, i64 1
+  %tmp19712 = getelementptr inbounds float* %tmp19711, i64 1
+  %tmp19713 = getelementptr inbounds float* %tmp19712, i64 1
+  %tmp19714 = getelementptr inbounds float* %tmp19713, i64 1
+  %tmp19715 = getelementptr inbounds float* %tmp19714, i64 1
+  %tmp19716 = getelementptr inbounds float* %tmp19715, i64 1
+  %tmp19717 = getelementptr inbounds float* %tmp19716, i64 1
+  %tmp19718 = getelementptr inbounds float* %tmp19717, i64 1
+  %tmp19719 = getelementptr inbounds float* %tmp19718, i64 1
+  %tmp19720 = getelementptr inbounds float* %tmp19719, i64 1
+  %tmp19721 = getelementptr inbounds float* %tmp19720, i64 1
+  %tmp19722 = getelementptr inbounds float* %tmp19721, i64 1
+  %tmp19723 = getelementptr inbounds float* %tmp19722, i64 1
+  %tmp19724 = getelementptr inbounds float* %tmp19723, i64 1
+  %tmp19725 = getelementptr inbounds float* %tmp19724, i64 1
+  %tmp19726 = getelementptr inbounds float* %tmp19725, i64 1
+  %tmp19727 = getelementptr inbounds float* %tmp19726, i64 1
+  %tmp19728 = getelementptr inbounds float* %tmp19727, i64 1
+  %tmp19729 = getelementptr inbounds float* %tmp19728, i64 1
+  %tmp19730 = getelementptr inbounds float* %tmp19729, i64 1
+  %tmp19731 = getelementptr inbounds float* %tmp19730, i64 1
+  %tmp19732 = getelementptr inbounds float* %tmp19731, i64 1
+  %tmp19733 = getelementptr inbounds float* %tmp19732, i64 1
+  %tmp19734 = getelementptr inbounds float* %tmp19733, i64 1
+  %tmp19735 = getelementptr inbounds float* %tmp19734, i64 1
+  %tmp19736 = getelementptr inbounds float* %tmp19735, i64 1
+  %tmp19737 = getelementptr inbounds float* %tmp19736, i64 1
+  %tmp19738 = getelementptr inbounds float* %tmp19737, i64 1
+  %tmp19739 = getelementptr inbounds float* %tmp19738, i64 1
+  %tmp19740 = getelementptr inbounds float* %tmp19739, i64 1
+  %tmp19741 = getelementptr inbounds float* %tmp19740, i64 1
+  %tmp19742 = getelementptr inbounds float* %tmp19741, i64 1
+  %tmp19743 = getelementptr inbounds float* %tmp19742, i64 1
+  %tmp19744 = getelementptr inbounds float* %tmp19743, i64 1
+  %tmp19745 = getelementptr inbounds float* %tmp19744, i64 1
+  %tmp19746 = getelementptr inbounds float* %tmp19745, i64 1
+  %tmp19747 = getelementptr inbounds float* %tmp19746, i64 1
+  %tmp19748 = getelementptr inbounds float* %tmp19747, i64 1
+  %tmp19749 = getelementptr inbounds float* %tmp19748, i64 1
+  %tmp19750 = getelementptr inbounds float* %tmp19749, i64 1
+  %tmp19751 = getelementptr inbounds float* %tmp19750, i64 1
+  %tmp19752 = getelementptr inbounds float* %tmp19751, i64 1
+  %tmp19753 = getelementptr inbounds float* %tmp19752, i64 1
+  %tmp19754 = getelementptr inbounds float* %tmp19753, i64 1
+  %tmp19755 = getelementptr inbounds float* %tmp19754, i64 1
+  %tmp19756 = getelementptr inbounds float* %tmp19755, i64 1
+  %tmp19757 = getelementptr inbounds float* %tmp19756, i64 1
+  %tmp19758 = getelementptr inbounds float* %tmp19757, i64 1
+  %tmp19759 = getelementptr inbounds float* %tmp19758, i64 1
+  %tmp19760 = getelementptr inbounds float* %tmp19759, i64 1
+  %tmp19761 = getelementptr inbounds float* %tmp19760, i64 1
+  %tmp19762 = getelementptr inbounds float* %tmp19761, i64 1
+  %tmp19763 = getelementptr inbounds float* %tmp19762, i64 1
+  %tmp19764 = getelementptr inbounds float* %tmp19763, i64 1
+  %tmp19765 = getelementptr inbounds float* %tmp19764, i64 1
+  %tmp19766 = getelementptr inbounds float* %tmp19765, i64 1
+  %tmp19767 = getelementptr inbounds float* %tmp19766, i64 1
+  %tmp19768 = getelementptr inbounds float* %tmp19767, i64 1
+  %tmp19769 = getelementptr inbounds float* %tmp19768, i64 1
+  %tmp19770 = getelementptr inbounds float* %tmp19769, i64 1
+  %tmp19771 = getelementptr inbounds float* %tmp19770, i64 1
+  %tmp19772 = getelementptr inbounds float* %tmp19771, i64 1
+  %tmp19773 = getelementptr inbounds float* %tmp19772, i64 1
+  %tmp19774 = getelementptr inbounds float* %tmp19773, i64 1
+  %tmp19775 = getelementptr inbounds float* %tmp19774, i64 1
+  %tmp19776 = getelementptr inbounds float* %tmp19775, i64 1
+  %tmp19777 = getelementptr inbounds float* %tmp19776, i64 1
+  %tmp19778 = getelementptr inbounds float* %tmp19777, i64 1
+  %tmp19779 = getelementptr inbounds float* %tmp19778, i64 1
+  %tmp19780 = getelementptr inbounds float* %tmp19779, i64 1
+  %tmp19781 = getelementptr inbounds float* %tmp19780, i64 1
+  %tmp19782 = getelementptr inbounds float* %tmp19781, i64 1
+  %tmp19783 = getelementptr inbounds float* %tmp19782, i64 1
+  %tmp19784 = getelementptr inbounds float* %tmp19783, i64 1
+  %tmp19785 = getelementptr inbounds float* %tmp19784, i64 1
+  %tmp19786 = getelementptr inbounds float* %tmp19785, i64 1
+  %tmp19787 = getelementptr inbounds float* %tmp19786, i64 1
+  %tmp19788 = getelementptr inbounds float* %tmp19787, i64 1
+  %tmp19789 = getelementptr inbounds float* %tmp19788, i64 1
+  %tmp19790 = getelementptr inbounds float* %tmp19789, i64 1
+  %tmp19791 = getelementptr inbounds float* %tmp19790, i64 1
+  %tmp19792 = getelementptr inbounds float* %tmp19791, i64 1
+  %tmp19793 = getelementptr inbounds float* %tmp19792, i64 1
+  %tmp19794 = getelementptr inbounds float* %tmp19793, i64 1
+  %tmp19795 = getelementptr inbounds float* %tmp19794, i64 1
+  %tmp19796 = getelementptr inbounds float* %tmp19795, i64 1
+  %tmp19797 = getelementptr inbounds float* %tmp19796, i64 1
+  %tmp19798 = getelementptr inbounds float* %tmp19797, i64 1
+  %tmp19799 = getelementptr inbounds float* %tmp19798, i64 1
+  %tmp19800 = getelementptr inbounds float* %tmp19799, i64 1
+  %tmp19801 = getelementptr inbounds float* %tmp19800, i64 1
+  %tmp19802 = getelementptr inbounds float* %tmp19801, i64 1
+  %tmp19803 = getelementptr inbounds float* %tmp19802, i64 1
+  %tmp19804 = getelementptr inbounds float* %tmp19803, i64 1
+  %tmp19805 = getelementptr inbounds float* %tmp19804, i64 1
+  %tmp19806 = getelementptr inbounds float* %tmp19805, i64 1
+  %tmp19807 = getelementptr inbounds float* %tmp19806, i64 1
+  %tmp19808 = getelementptr inbounds float* %tmp19807, i64 1
+  %tmp19809 = getelementptr inbounds float* %tmp19808, i64 1
+  %tmp19810 = getelementptr inbounds float* %tmp19809, i64 1
+  %tmp19811 = getelementptr inbounds float* %tmp19810, i64 1
+  %tmp19812 = getelementptr inbounds float* %tmp19811, i64 1
+  %tmp19813 = getelementptr inbounds float* %tmp19812, i64 1
+  %tmp19814 = getelementptr inbounds float* %tmp19813, i64 1
+  %tmp19815 = getelementptr inbounds float* %tmp19814, i64 1
+  %tmp19816 = getelementptr inbounds float* %tmp19815, i64 1
+  %tmp19817 = getelementptr inbounds float* %tmp19816, i64 1
+  %tmp19818 = getelementptr inbounds float* %tmp19817, i64 1
+  %tmp19819 = getelementptr inbounds float* %tmp19818, i64 1
+  %tmp19820 = getelementptr inbounds float* %tmp19819, i64 1
+  %tmp19821 = getelementptr inbounds float* %tmp19820, i64 1
+  %tmp19822 = getelementptr inbounds float* %tmp19821, i64 1
+  %tmp19823 = getelementptr inbounds float* %tmp19822, i64 1
+  %tmp19824 = getelementptr inbounds float* %tmp19823, i64 1
+  %tmp19825 = getelementptr inbounds float* %tmp19824, i64 1
+  %tmp19826 = getelementptr inbounds float* %tmp19825, i64 1
+  %tmp19827 = getelementptr inbounds float* %tmp19826, i64 1
+  %tmp19828 = getelementptr inbounds float* %tmp19827, i64 1
+  %tmp19829 = getelementptr inbounds float* %tmp19828, i64 1
+  %tmp19830 = getelementptr inbounds float* %tmp19829, i64 1
+  %tmp19831 = getelementptr inbounds float* %tmp19830, i64 1
+  %tmp19832 = getelementptr inbounds float* %tmp19831, i64 1
+  %tmp19833 = getelementptr inbounds float* %tmp19832, i64 1
+  %tmp19834 = getelementptr inbounds float* %tmp19833, i64 1
+  %tmp19835 = getelementptr inbounds float* %tmp19834, i64 1
+  %tmp19836 = getelementptr inbounds float* %tmp19835, i64 1
+  %tmp19837 = getelementptr inbounds float* %tmp19836, i64 1
+  %tmp19838 = getelementptr inbounds float* %tmp19837, i64 1
+  %tmp19839 = getelementptr inbounds float* %tmp19838, i64 1
+  %tmp19840 = getelementptr inbounds float* %tmp19839, i64 1
+  %tmp19841 = getelementptr inbounds float* %tmp19840, i64 1
+  %tmp19842 = getelementptr inbounds float* %tmp19841, i64 1
+  %tmp19843 = getelementptr inbounds float* %tmp19842, i64 1
+  %tmp19844 = getelementptr inbounds float* %tmp19843, i64 1
+  %tmp19845 = getelementptr inbounds float* %tmp19844, i64 1
+  %tmp19846 = getelementptr inbounds float* %tmp19845, i64 1
+  %tmp19847 = getelementptr inbounds float* %tmp19846, i64 1
+  %tmp19848 = getelementptr inbounds float* %tmp19847, i64 1
+  %tmp19849 = getelementptr inbounds float* %tmp19848, i64 1
+  %tmp19850 = getelementptr inbounds float* %tmp19849, i64 1
+  %tmp19851 = getelementptr inbounds float* %tmp19850, i64 1
+  %tmp19852 = getelementptr inbounds float* %tmp19851, i64 1
+  %tmp19853 = getelementptr inbounds float* %tmp19852, i64 1
+  %tmp19854 = getelementptr inbounds float* %tmp19853, i64 1
+  %tmp19855 = getelementptr inbounds float* %tmp19854, i64 1
+  %tmp19856 = getelementptr inbounds float* %tmp19855, i64 1
+  %tmp19857 = getelementptr inbounds float* %tmp19856, i64 1
+  %tmp19858 = getelementptr inbounds float* %tmp19857, i64 1
+  %tmp19859 = getelementptr inbounds float* %tmp19858, i64 1
+  %tmp19860 = getelementptr inbounds float* %tmp19859, i64 1
+  %tmp19861 = getelementptr inbounds float* %tmp19860, i64 1
+  %tmp19862 = getelementptr inbounds float* %tmp19861, i64 1
+  %tmp19863 = getelementptr inbounds float* %tmp19862, i64 1
+  %tmp19864 = getelementptr inbounds float* %tmp19863, i64 1
+  %tmp19865 = getelementptr inbounds float* %tmp19864, i64 1
+  %tmp19866 = getelementptr inbounds float* %tmp19865, i64 1
+  %tmp19867 = getelementptr inbounds float* %tmp19866, i64 1
+  %tmp19868 = getelementptr inbounds float* %tmp19867, i64 1
+  %tmp19869 = getelementptr inbounds float* %tmp19868, i64 1
+  %tmp19870 = getelementptr inbounds float* %tmp19869, i64 1
+  %tmp19871 = getelementptr inbounds float* %tmp19870, i64 1
+  %tmp19872 = getelementptr inbounds float* %tmp19871, i64 1
+  %tmp19873 = getelementptr inbounds float* %tmp19872, i64 1
+  %tmp19874 = getelementptr inbounds float* %tmp19873, i64 1
+  %tmp19875 = getelementptr inbounds float* %tmp19874, i64 1
+  %tmp19876 = getelementptr inbounds float* %tmp19875, i64 1
+  %tmp19877 = getelementptr inbounds float* %tmp19876, i64 1
+  %tmp19878 = getelementptr inbounds float* %tmp19877, i64 1
+  %tmp19879 = getelementptr inbounds float* %tmp19878, i64 1
+  %tmp19880 = getelementptr inbounds float* %tmp19879, i64 1
+  %tmp19881 = getelementptr inbounds float* %tmp19880, i64 1
+  %tmp19882 = getelementptr inbounds float* %tmp19881, i64 1
+  %tmp19883 = getelementptr inbounds float* %tmp19882, i64 1
+  %tmp19884 = getelementptr inbounds float* %tmp19883, i64 1
+  %tmp19885 = getelementptr inbounds float* %tmp19884, i64 1
+  %tmp19886 = getelementptr inbounds float* %tmp19885, i64 1
+  %tmp19887 = getelementptr inbounds float* %tmp19886, i64 1
+  %tmp19888 = getelementptr inbounds float* %tmp19887, i64 1
+  %tmp19889 = getelementptr inbounds float* %tmp19888, i64 1
+  %tmp19890 = getelementptr inbounds float* %tmp19889, i64 1
+  %tmp19891 = getelementptr inbounds float* %tmp19890, i64 1
+  %tmp19892 = getelementptr inbounds float* %tmp19891, i64 1
+  %tmp19893 = getelementptr inbounds float* %tmp19892, i64 1
+  %tmp19894 = getelementptr inbounds float* %tmp19893, i64 1
+  %tmp19895 = getelementptr inbounds float* %tmp19894, i64 1
+  %tmp19896 = getelementptr inbounds float* %tmp19895, i64 1
+  %tmp19897 = getelementptr inbounds float* %tmp19896, i64 1
+  %tmp19898 = getelementptr inbounds float* %tmp19897, i64 1
+  %tmp19899 = getelementptr inbounds float* %tmp19898, i64 1
+  %tmp19900 = getelementptr inbounds float* %tmp19899, i64 1
+  %tmp19901 = getelementptr inbounds float* %tmp19900, i64 1
+  %tmp19902 = getelementptr inbounds float* %tmp19901, i64 1
+  %tmp19903 = getelementptr inbounds float* %tmp19902, i64 1
+  %tmp19904 = getelementptr inbounds float* %tmp19903, i64 1
+  %tmp19905 = getelementptr inbounds float* %tmp19904, i64 1
+  %tmp19906 = getelementptr inbounds float* %tmp19905, i64 1
+  %tmp19907 = getelementptr inbounds float* %tmp19906, i64 1
+  %tmp19908 = getelementptr inbounds float* %tmp19907, i64 1
+  %tmp19909 = getelementptr inbounds float* %tmp19908, i64 1
+  %tmp19910 = getelementptr inbounds float* %tmp19909, i64 1
+  %tmp19911 = getelementptr inbounds float* %tmp19910, i64 1
+  %tmp19912 = getelementptr inbounds float* %tmp19911, i64 1
+  %tmp19913 = getelementptr inbounds float* %tmp19912, i64 1
+  %tmp19914 = getelementptr inbounds float* %tmp19913, i64 1
+  %tmp19915 = getelementptr inbounds float* %tmp19914, i64 1
+  %tmp19916 = getelementptr inbounds float* %tmp19915, i64 1
+  %tmp19917 = getelementptr inbounds float* %tmp19916, i64 1
+  %tmp19918 = getelementptr inbounds float* %tmp19917, i64 1
+  %tmp19919 = getelementptr inbounds float* %tmp19918, i64 1
+  %tmp19920 = getelementptr inbounds float* %tmp19919, i64 1
+  %tmp19921 = getelementptr inbounds float* %tmp19920, i64 1
+  %tmp19922 = getelementptr inbounds float* %tmp19921, i64 1
+  %tmp19923 = getelementptr inbounds float* %tmp19922, i64 1
+  %tmp19924 = getelementptr inbounds float* %tmp19923, i64 1
+  %tmp19925 = getelementptr inbounds float* %tmp19924, i64 1
+  %tmp19926 = getelementptr inbounds float* %tmp19925, i64 1
+  %tmp19927 = getelementptr inbounds float* %tmp19926, i64 1
+  %tmp19928 = getelementptr inbounds float* %tmp19927, i64 1
+  %tmp19929 = getelementptr inbounds float* %tmp19928, i64 1
+  %tmp19930 = getelementptr inbounds float* %tmp19929, i64 1
+  %tmp19931 = getelementptr inbounds float* %tmp19930, i64 1
+  %tmp19932 = getelementptr inbounds float* %tmp19931, i64 1
+  %tmp19933 = getelementptr inbounds float* %tmp19932, i64 1
+  %tmp19934 = getelementptr inbounds float* %tmp19933, i64 1
+  %tmp19935 = getelementptr inbounds float* %tmp19934, i64 1
+  %tmp19936 = getelementptr inbounds float* %tmp19935, i64 1
+  %tmp19937 = getelementptr inbounds float* %tmp19936, i64 1
+  %tmp19938 = getelementptr inbounds float* %tmp19937, i64 1
+  %tmp19939 = getelementptr inbounds float* %tmp19938, i64 1
+  %tmp19940 = getelementptr inbounds float* %tmp19939, i64 1
+  %tmp19941 = getelementptr inbounds float* %tmp19940, i64 1
+  %tmp19942 = getelementptr inbounds float* %tmp19941, i64 1
+  %tmp19943 = getelementptr inbounds float* %tmp19942, i64 1
+  %tmp19944 = getelementptr inbounds float* %tmp19943, i64 1
+  %tmp19945 = getelementptr inbounds float* %tmp19944, i64 1
+  %tmp19946 = getelementptr inbounds float* %tmp19945, i64 1
+  %tmp19947 = getelementptr inbounds float* %tmp19946, i64 1
+  %tmp19948 = getelementptr inbounds float* %tmp19947, i64 1
+  %tmp19949 = getelementptr inbounds float* %tmp19948, i64 1
+  %tmp19950 = getelementptr inbounds float* %tmp19949, i64 1
+  %tmp19951 = getelementptr inbounds float* %tmp19950, i64 1
+  %tmp19952 = getelementptr inbounds float* %tmp19951, i64 1
+  %tmp19953 = getelementptr inbounds float* %tmp19952, i64 1
+  %tmp19954 = getelementptr inbounds float* %tmp19953, i64 1
+  %tmp19955 = getelementptr inbounds float* %tmp19954, i64 1
+  %tmp19956 = getelementptr inbounds float* %tmp19955, i64 1
+  %tmp19957 = getelementptr inbounds float* %tmp19956, i64 1
+  %tmp19958 = getelementptr inbounds float* %tmp19957, i64 1
+  %tmp19959 = getelementptr inbounds float* %tmp19958, i64 1
+  %tmp19960 = getelementptr inbounds float* %tmp19959, i64 1
+  %tmp19961 = getelementptr inbounds float* %tmp19960, i64 1
+  %tmp19962 = getelementptr inbounds float* %tmp19961, i64 1
+  %tmp19963 = getelementptr inbounds float* %tmp19962, i64 1
+  %tmp19964 = getelementptr inbounds float* %tmp19963, i64 1
+  %tmp19965 = getelementptr inbounds float* %tmp19964, i64 1
+  %tmp19966 = getelementptr inbounds float* %tmp19965, i64 1
+  %tmp19967 = getelementptr inbounds float* %tmp19966, i64 1
+  %tmp19968 = getelementptr inbounds float* %tmp19967, i64 1
+  %tmp19969 = getelementptr inbounds float* %tmp19968, i64 1
+  %tmp19970 = getelementptr inbounds float* %tmp19969, i64 1
+  %tmp19971 = getelementptr inbounds float* %tmp19970, i64 1
+  %tmp19972 = getelementptr inbounds float* %tmp19971, i64 1
+  %tmp19973 = getelementptr inbounds float* %tmp19972, i64 1
+  %tmp19974 = getelementptr inbounds float* %tmp19973, i64 1
+  %tmp19975 = getelementptr inbounds float* %tmp19974, i64 1
+  %tmp19976 = getelementptr inbounds float* %tmp19975, i64 1
+  %tmp19977 = getelementptr inbounds float* %tmp19976, i64 1
+  %tmp19978 = getelementptr inbounds float* %tmp19977, i64 1
+  %tmp19979 = getelementptr inbounds float* %tmp19978, i64 1
+  %tmp19980 = getelementptr inbounds float* %tmp19979, i64 1
+  %tmp19981 = getelementptr inbounds float* %tmp19980, i64 1
+  %tmp19982 = getelementptr inbounds float* %tmp19981, i64 1
+  %tmp19983 = getelementptr inbounds float* %tmp19982, i64 1
+  %tmp19984 = getelementptr inbounds float* %tmp19983, i64 1
+  %tmp19985 = getelementptr inbounds float* %tmp19984, i64 1
+  %tmp19986 = getelementptr inbounds float* %tmp19985, i64 1
+  %tmp19987 = getelementptr inbounds float* %tmp19986, i64 1
+  %tmp19988 = getelementptr inbounds float* %tmp19987, i64 1
+  %tmp19989 = getelementptr inbounds float* %tmp19988, i64 1
+  %tmp19990 = getelementptr inbounds float* %tmp19989, i64 1
+  %tmp19991 = getelementptr inbounds float* %tmp19990, i64 1
+  %tmp19992 = getelementptr inbounds float* %tmp19991, i64 1
+  %tmp19993 = getelementptr inbounds float* %tmp19992, i64 1
+  %tmp19994 = getelementptr inbounds float* %tmp19993, i64 1
+  %tmp19995 = getelementptr inbounds float* %tmp19994, i64 1
+  %tmp19996 = getelementptr inbounds float* %tmp19995, i64 1
+  %tmp19997 = getelementptr inbounds float* %tmp19996, i64 1
+  %tmp19998 = getelementptr inbounds float* %tmp19997, i64 1
+  %tmp19999 = getelementptr inbounds float* %tmp19998, i64 1
+  %tmp20000 = getelementptr inbounds float* %tmp19999, i64 1
+  %tmp20001 = getelementptr inbounds float* %tmp20000, i64 1
+  %tmp20002 = getelementptr inbounds float* %tmp20001, i64 1
+  %tmp20003 = getelementptr inbounds float* %tmp20002, i64 1
+  %tmp20004 = getelementptr inbounds float* %tmp20003, i64 1
+  %tmp20005 = getelementptr inbounds float* %tmp20004, i64 1
+  %tmp20006 = getelementptr inbounds float* %tmp20005, i64 1
+  %tmp20007 = getelementptr inbounds float* %tmp20006, i64 1
+  %tmp20008 = getelementptr inbounds float* %tmp20007, i64 1
+  %tmp20009 = getelementptr inbounds float* %tmp20008, i64 1
+  %tmp20010 = getelementptr inbounds float* %tmp20009, i64 1
+  %tmp20011 = getelementptr inbounds float* %tmp20010, i64 1
+  %tmp20012 = getelementptr inbounds float* %tmp20011, i64 1
+  %tmp20013 = getelementptr inbounds float* %tmp20012, i64 1
+  %tmp20014 = getelementptr inbounds float* %tmp20013, i64 1
+  %tmp20015 = getelementptr inbounds float* %tmp20014, i64 1
+  %tmp20016 = getelementptr inbounds float* %tmp20015, i64 1
+  %tmp20017 = getelementptr inbounds float* %tmp20016, i64 1
+  %tmp20018 = getelementptr inbounds float* %tmp20017, i64 1
+  %tmp20019 = getelementptr inbounds float* %tmp20018, i64 1
+  %tmp20020 = getelementptr inbounds float* %tmp20019, i64 1
+  %tmp20021 = getelementptr inbounds float* %tmp20020, i64 1
+  %tmp20022 = getelementptr inbounds float* %tmp20021, i64 1
+  %tmp20023 = getelementptr inbounds float* %tmp20022, i64 1
+  %tmp20024 = getelementptr inbounds float* %tmp20023, i64 1
+  %tmp20025 = getelementptr inbounds float* %tmp20024, i64 1
+  %tmp20026 = getelementptr inbounds float* %tmp20025, i64 1
+  %tmp20027 = getelementptr inbounds float* %tmp20026, i64 1
+  %tmp20028 = getelementptr inbounds float* %tmp20027, i64 1
+  %tmp20029 = getelementptr inbounds float* %tmp20028, i64 1
+  %tmp20030 = getelementptr inbounds float* %tmp20029, i64 1
+  %tmp20031 = getelementptr inbounds float* %tmp20030, i64 1
+  %tmp20032 = getelementptr inbounds float* %tmp20031, i64 1
+  %tmp20033 = getelementptr inbounds float* %tmp20032, i64 1
+  %tmp20034 = getelementptr inbounds float* %tmp20033, i64 1
+  %tmp20035 = getelementptr inbounds float* %tmp20034, i64 1
+  %tmp20036 = getelementptr inbounds float* %tmp20035, i64 1
+  %tmp20037 = getelementptr inbounds float* %tmp20036, i64 1
+  %tmp20038 = getelementptr inbounds float* %tmp20037, i64 1
+  %tmp20039 = getelementptr inbounds float* %tmp20038, i64 1
+  %tmp20040 = getelementptr inbounds float* %tmp20039, i64 1
+  %tmp20041 = getelementptr inbounds float* %tmp20040, i64 1
+  %tmp20042 = getelementptr inbounds float* %tmp20041, i64 1
+  %tmp20043 = getelementptr inbounds float* %tmp20042, i64 1
+  %tmp20044 = getelementptr inbounds float* %tmp20043, i64 1
+  %tmp20045 = getelementptr inbounds float* %tmp20044, i64 1
+  %tmp20046 = getelementptr inbounds float* %tmp20045, i64 1
+  %tmp20047 = getelementptr inbounds float* %tmp20046, i64 1
+  %tmp20048 = getelementptr inbounds float* %tmp20047, i64 1
+  %tmp20049 = getelementptr inbounds float* %tmp20048, i64 1
+  %tmp20050 = getelementptr inbounds float* %tmp20049, i64 1
+  %tmp20051 = getelementptr inbounds float* %tmp20050, i64 1
+  %tmp20052 = getelementptr inbounds float* %tmp20051, i64 1
+  %tmp20053 = getelementptr inbounds float* %tmp20052, i64 1
+  %tmp20054 = getelementptr inbounds float* %tmp20053, i64 1
+  %tmp20055 = getelementptr inbounds float* %tmp20054, i64 1
+  %tmp20056 = getelementptr inbounds float* %tmp20055, i64 1
+  %tmp20057 = getelementptr inbounds float* %tmp20056, i64 1
+  %tmp20058 = getelementptr inbounds float* %tmp20057, i64 1
+  %tmp20059 = getelementptr inbounds float* %tmp20058, i64 1
+  %tmp20060 = getelementptr inbounds float* %tmp20059, i64 1
+  %tmp20061 = getelementptr inbounds float* %tmp20060, i64 1
+  %tmp20062 = getelementptr inbounds float* %tmp20061, i64 1
+  %tmp20063 = getelementptr inbounds float* %tmp20062, i64 1
+  %tmp20064 = getelementptr inbounds float* %tmp20063, i64 1
+  %tmp20065 = getelementptr inbounds float* %tmp20064, i64 1
+  %tmp20066 = getelementptr inbounds float* %tmp20065, i64 1
+  %tmp20067 = getelementptr inbounds float* %tmp20066, i64 1
+  %tmp20068 = getelementptr inbounds float* %tmp20067, i64 1
+  %tmp20069 = getelementptr inbounds float* %tmp20068, i64 1
+  %tmp20070 = getelementptr inbounds float* %tmp20069, i64 1
+  %tmp20071 = getelementptr inbounds float* %tmp20070, i64 1
+  %tmp20072 = getelementptr inbounds float* %tmp20071, i64 1
+  %tmp20073 = getelementptr inbounds float* %tmp20072, i64 1
+  %tmp20074 = getelementptr inbounds float* %tmp20073, i64 1
+  %tmp20075 = getelementptr inbounds float* %tmp20074, i64 1
+  %tmp20076 = getelementptr inbounds float* %tmp20075, i64 1
+  %tmp20077 = getelementptr inbounds float* %tmp20076, i64 1
+  %tmp20078 = getelementptr inbounds float* %tmp20077, i64 1
+  %tmp20079 = getelementptr inbounds float* %tmp20078, i64 1
+  %tmp20080 = getelementptr inbounds float* %tmp20079, i64 1
+  %tmp20081 = getelementptr inbounds float* %tmp20080, i64 1
+  %tmp20082 = getelementptr inbounds float* %tmp20081, i64 1
+  %tmp20083 = getelementptr inbounds float* %tmp20082, i64 1
+  %tmp20084 = getelementptr inbounds float* %tmp20083, i64 1
+  %tmp20085 = getelementptr inbounds float* %tmp20084, i64 1
+  %tmp20086 = getelementptr inbounds float* %tmp20085, i64 1
+  %tmp20087 = getelementptr inbounds float* %tmp20086, i64 1
+  %tmp20088 = getelementptr inbounds float* %tmp20087, i64 1
+  %tmp20089 = getelementptr inbounds float* %tmp20088, i64 1
+  %tmp20090 = getelementptr inbounds float* %tmp20089, i64 1
+  %tmp20091 = getelementptr inbounds float* %tmp20090, i64 1
+  %tmp20092 = getelementptr inbounds float* %tmp20091, i64 1
+  %tmp20093 = getelementptr inbounds float* %tmp20092, i64 1
+  %tmp20094 = getelementptr inbounds float* %tmp20093, i64 1
+  %tmp20095 = getelementptr inbounds float* %tmp20094, i64 1
+  %tmp20096 = getelementptr inbounds float* %tmp20095, i64 1
+  %tmp20097 = getelementptr inbounds float* %tmp20096, i64 1
+  %tmp20098 = getelementptr inbounds float* %tmp20097, i64 1
+  %tmp20099 = getelementptr inbounds float* %tmp20098, i64 1
+  %tmp20100 = getelementptr inbounds float* %tmp20099, i64 1
+  %tmp20101 = getelementptr inbounds float* %tmp20100, i64 1
+  %tmp20102 = getelementptr inbounds float* %tmp20101, i64 1
+  %tmp20103 = getelementptr inbounds float* %tmp20102, i64 1
+  %tmp20104 = getelementptr inbounds float* %tmp20103, i64 1
+  %tmp20105 = getelementptr inbounds float* %tmp20104, i64 1
+  %tmp20106 = getelementptr inbounds float* %tmp20105, i64 1
+  %tmp20107 = getelementptr inbounds float* %tmp20106, i64 1
+  %tmp20108 = getelementptr inbounds float* %tmp20107, i64 1
+  %tmp20109 = getelementptr inbounds float* %tmp20108, i64 1
+  %tmp20110 = getelementptr inbounds float* %tmp20109, i64 1
+  %tmp20111 = getelementptr inbounds float* %tmp20110, i64 1
+  %tmp20112 = getelementptr inbounds float* %tmp20111, i64 1
+  %tmp20113 = getelementptr inbounds float* %tmp20112, i64 1
+  %tmp20114 = getelementptr inbounds float* %tmp20113, i64 1
+  %tmp20115 = getelementptr inbounds float* %tmp20114, i64 1
+  %tmp20116 = getelementptr inbounds float* %tmp20115, i64 1
+  %tmp20117 = getelementptr inbounds float* %tmp20116, i64 1
+  %tmp20118 = getelementptr inbounds float* %tmp20117, i64 1
+  %tmp20119 = getelementptr inbounds float* %tmp20118, i64 1
+  %tmp20120 = getelementptr inbounds float* %tmp20119, i64 1
+  %tmp20121 = getelementptr inbounds float* %tmp20120, i64 1
+  %tmp20122 = getelementptr inbounds float* %tmp20121, i64 1
+  %tmp20123 = getelementptr inbounds float* %tmp20122, i64 1
+  %tmp20124 = getelementptr inbounds float* %tmp20123, i64 1
+  %tmp20125 = getelementptr inbounds float* %tmp20124, i64 1
+  %tmp20126 = getelementptr inbounds float* %tmp20125, i64 1
+  %tmp20127 = getelementptr inbounds float* %tmp20126, i64 1
+  %tmp20128 = getelementptr inbounds float* %tmp20127, i64 1
+  %tmp20129 = getelementptr inbounds float* %tmp20128, i64 1
+  %tmp20130 = getelementptr inbounds float* %tmp20129, i64 1
+  %tmp20131 = getelementptr inbounds float* %tmp20130, i64 1
+  %tmp20132 = getelementptr inbounds float* %tmp20131, i64 1
+  %tmp20133 = getelementptr inbounds float* %tmp20132, i64 1
+  %tmp20134 = getelementptr inbounds float* %tmp20133, i64 1
+  %tmp20135 = getelementptr inbounds float* %tmp20134, i64 1
+  %tmp20136 = getelementptr inbounds float* %tmp20135, i64 1
+  %tmp20137 = getelementptr inbounds float* %tmp20136, i64 1
+  %tmp20138 = getelementptr inbounds float* %tmp20137, i64 1
+  %tmp20139 = getelementptr inbounds float* %tmp20138, i64 1
+  %tmp20140 = getelementptr inbounds float* %tmp20139, i64 1
+  %tmp20141 = getelementptr inbounds float* %tmp20140, i64 1
+  %tmp20142 = getelementptr inbounds float* %tmp20141, i64 1
+  %tmp20143 = getelementptr inbounds float* %tmp20142, i64 1
+  %tmp20144 = getelementptr inbounds float* %tmp20143, i64 1
+  %tmp20145 = getelementptr inbounds float* %tmp20144, i64 1
+  %tmp20146 = getelementptr inbounds float* %tmp20145, i64 1
+  %tmp20147 = getelementptr inbounds float* %tmp20146, i64 1
+  %tmp20148 = getelementptr inbounds float* %tmp20147, i64 1
+  %tmp20149 = getelementptr inbounds float* %tmp20148, i64 1
+  %tmp20150 = getelementptr inbounds float* %tmp20149, i64 1
+  %tmp20151 = getelementptr inbounds float* %tmp20150, i64 1
+  %tmp20152 = getelementptr inbounds float* %tmp20151, i64 1
+  %tmp20153 = getelementptr inbounds float* %tmp20152, i64 1
+  %tmp20154 = getelementptr inbounds float* %tmp20153, i64 1
+  %tmp20155 = getelementptr inbounds float* %tmp20154, i64 1
+  %tmp20156 = getelementptr inbounds float* %tmp20155, i64 1
+  %tmp20157 = getelementptr inbounds float* %tmp20156, i64 1
+  %tmp20158 = getelementptr inbounds float* %tmp20157, i64 1
+  %tmp20159 = getelementptr inbounds float* %tmp20158, i64 1
+  %tmp20160 = getelementptr inbounds float* %tmp20159, i64 1
+  %tmp20161 = getelementptr inbounds float* %tmp20160, i64 1
+  %tmp20162 = getelementptr inbounds float* %tmp20161, i64 1
+  %tmp20163 = getelementptr inbounds float* %tmp20162, i64 1
+  %tmp20164 = getelementptr inbounds float* %tmp20163, i64 1
+  %tmp20165 = getelementptr inbounds float* %tmp20164, i64 1
+  %tmp20166 = getelementptr inbounds float* %tmp20165, i64 1
+  %tmp20167 = getelementptr inbounds float* %tmp20166, i64 1
+  %tmp20168 = getelementptr inbounds float* %tmp20167, i64 1
+  %tmp20169 = getelementptr inbounds float* %tmp20168, i64 1
+  %tmp20170 = getelementptr inbounds float* %tmp20169, i64 1
+  %tmp20171 = getelementptr inbounds float* %tmp20170, i64 1
+  %tmp20172 = getelementptr inbounds float* %tmp20171, i64 1
+  %tmp20173 = getelementptr inbounds float* %tmp20172, i64 1
+  %tmp20174 = getelementptr inbounds float* %tmp20173, i64 1
+  %tmp20175 = getelementptr inbounds float* %tmp20174, i64 1
+  %tmp20176 = getelementptr inbounds float* %tmp20175, i64 1
+  %tmp20177 = getelementptr inbounds float* %tmp20176, i64 1
+  %tmp20178 = getelementptr inbounds float* %tmp20177, i64 1
+  %tmp20179 = getelementptr inbounds float* %tmp20178, i64 1
+  %tmp20180 = getelementptr inbounds float* %tmp20179, i64 1
+  %tmp20181 = getelementptr inbounds float* %tmp20180, i64 1
+  %tmp20182 = getelementptr inbounds float* %tmp20181, i64 1
+  %tmp20183 = getelementptr inbounds float* %tmp20182, i64 1
+  %tmp20184 = getelementptr inbounds float* %tmp20183, i64 1
+  %tmp20185 = getelementptr inbounds float* %tmp20184, i64 1
+  %tmp20186 = getelementptr inbounds float* %tmp20185, i64 1
+  %tmp20187 = getelementptr inbounds float* %tmp20186, i64 1
+  %tmp20188 = getelementptr inbounds float* %tmp20187, i64 1
+  %tmp20189 = getelementptr inbounds float* %tmp20188, i64 1
+  %tmp20190 = getelementptr inbounds float* %tmp20189, i64 1
+  %tmp20191 = getelementptr inbounds float* %tmp20190, i64 1
+  %tmp20192 = getelementptr inbounds float* %tmp20191, i64 1
+  %tmp20193 = getelementptr inbounds float* %tmp20192, i64 1
+  %tmp20194 = getelementptr inbounds float* %tmp20193, i64 1
+  %tmp20195 = getelementptr inbounds float* %tmp20194, i64 1
+  %tmp20196 = getelementptr inbounds float* %tmp20195, i64 1
+  %tmp20197 = getelementptr inbounds float* %tmp20196, i64 1
+  %tmp20198 = getelementptr inbounds float* %tmp20197, i64 1
+  %tmp20199 = getelementptr inbounds float* %tmp20198, i64 1
+  %tmp20200 = getelementptr inbounds float* %tmp20199, i64 1
+  %tmp20201 = getelementptr inbounds float* %tmp20200, i64 1
+  %tmp20202 = getelementptr inbounds float* %tmp20201, i64 1
+  %tmp20203 = getelementptr inbounds float* %tmp20202, i64 1
+  %tmp20204 = getelementptr inbounds float* %tmp20203, i64 1
+  %tmp20205 = getelementptr inbounds float* %tmp20204, i64 1
+  %tmp20206 = getelementptr inbounds float* %tmp20205, i64 1
+  %tmp20207 = getelementptr inbounds float* %tmp20206, i64 1
+  %tmp20208 = getelementptr inbounds float* %tmp20207, i64 1
+  %tmp20209 = getelementptr inbounds float* %tmp20208, i64 1
+  %tmp20210 = getelementptr inbounds float* %tmp20209, i64 1
+  %tmp20211 = getelementptr inbounds float* %tmp20210, i64 1
+  %tmp20212 = getelementptr inbounds float* %tmp20211, i64 1
+  %tmp20213 = getelementptr inbounds float* %tmp20212, i64 1
+  %tmp20214 = getelementptr inbounds float* %tmp20213, i64 1
+  %tmp20215 = getelementptr inbounds float* %tmp20214, i64 1
+  %tmp20216 = getelementptr inbounds float* %tmp20215, i64 1
+  %tmp20217 = getelementptr inbounds float* %tmp20216, i64 1
+  %tmp20218 = getelementptr inbounds float* %tmp20217, i64 1
+  %tmp20219 = getelementptr inbounds float* %tmp20218, i64 1
+  %tmp20220 = getelementptr inbounds float* %tmp20219, i64 1
+  %tmp20221 = getelementptr inbounds float* %tmp20220, i64 1
+  %tmp20222 = getelementptr inbounds float* %tmp20221, i64 1
+  %tmp20223 = getelementptr inbounds float* %tmp20222, i64 1
+  %tmp20224 = getelementptr inbounds float* %tmp20223, i64 1
+  %tmp20225 = getelementptr inbounds float* %tmp20224, i64 1
+  %tmp20226 = getelementptr inbounds float* %tmp20225, i64 1
+  %tmp20227 = getelementptr inbounds float* %tmp20226, i64 1
+  %tmp20228 = getelementptr inbounds float* %tmp20227, i64 1
+  %tmp20229 = getelementptr inbounds float* %tmp20228, i64 1
+  %tmp20230 = getelementptr inbounds float* %tmp20229, i64 1
+  %tmp20231 = getelementptr inbounds float* %tmp20230, i64 1
+  %tmp20232 = getelementptr inbounds float* %tmp20231, i64 1
+  %tmp20233 = getelementptr inbounds float* %tmp20232, i64 1
+  %tmp20234 = getelementptr inbounds float* %tmp20233, i64 1
+  %tmp20235 = getelementptr inbounds float* %tmp20234, i64 1
+  %tmp20236 = getelementptr inbounds float* %tmp20235, i64 1
+  %tmp20237 = getelementptr inbounds float* %tmp20236, i64 1
+  %tmp20238 = getelementptr inbounds float* %tmp20237, i64 1
+  %tmp20239 = getelementptr inbounds float* %tmp20238, i64 1
+  %tmp20240 = getelementptr inbounds float* %tmp20239, i64 1
+  %tmp20241 = getelementptr inbounds float* %tmp20240, i64 1
+  %tmp20242 = getelementptr inbounds float* %tmp20241, i64 1
+  %tmp20243 = getelementptr inbounds float* %tmp20242, i64 1
+  %tmp20244 = getelementptr inbounds float* %tmp20243, i64 1
+  %tmp20245 = getelementptr inbounds float* %tmp20244, i64 1
+  %tmp20246 = getelementptr inbounds float* %tmp20245, i64 1
+  %tmp20247 = getelementptr inbounds float* %tmp20246, i64 1
+  %tmp20248 = getelementptr inbounds float* %tmp20247, i64 1
+  %tmp20249 = getelementptr inbounds float* %tmp20248, i64 1
+  %tmp20250 = getelementptr inbounds float* %tmp20249, i64 1
+  %tmp20251 = getelementptr inbounds float* %tmp20250, i64 1
+  %tmp20252 = getelementptr inbounds float* %tmp20251, i64 1
+  %tmp20253 = getelementptr inbounds float* %tmp20252, i64 1
+  %tmp20254 = getelementptr inbounds float* %tmp20253, i64 1
+  %tmp20255 = getelementptr inbounds float* %tmp20254, i64 1
+  %tmp20256 = getelementptr inbounds float* %tmp20255, i64 1
+  %tmp20257 = getelementptr inbounds float* %tmp20256, i64 1
+  %tmp20258 = getelementptr inbounds float* %tmp20257, i64 1
+  %tmp20259 = getelementptr inbounds float* %tmp20258, i64 1
+  %tmp20260 = getelementptr inbounds float* %tmp20259, i64 1
+  %tmp20261 = getelementptr inbounds float* %tmp20260, i64 1
+  %tmp20262 = getelementptr inbounds float* %tmp20261, i64 1
+  %tmp20263 = getelementptr inbounds float* %tmp20262, i64 1
+  %tmp20264 = getelementptr inbounds float* %tmp20263, i64 1
+  %tmp20265 = getelementptr inbounds float* %tmp20264, i64 1
+  %tmp20266 = getelementptr inbounds float* %tmp20265, i64 1
+  %tmp20267 = getelementptr inbounds float* %tmp20266, i64 1
+  %tmp20268 = getelementptr inbounds float* %tmp20267, i64 1
+  %tmp20269 = getelementptr inbounds float* %tmp20268, i64 1
+  %tmp20270 = getelementptr inbounds float* %tmp20269, i64 1
+  %tmp20271 = getelementptr inbounds float* %tmp20270, i64 1
+  %tmp20272 = getelementptr inbounds float* %tmp20271, i64 1
+  %tmp20273 = getelementptr inbounds float* %tmp20272, i64 1
+  %tmp20274 = getelementptr inbounds float* %tmp20273, i64 1
+  %tmp20275 = getelementptr inbounds float* %tmp20274, i64 1
+  %tmp20276 = getelementptr inbounds float* %tmp20275, i64 1
+  %tmp20277 = getelementptr inbounds float* %tmp20276, i64 1
+  %tmp20278 = getelementptr inbounds float* %tmp20277, i64 1
+  %tmp20279 = getelementptr inbounds float* %tmp20278, i64 1
+  %tmp20280 = getelementptr inbounds float* %tmp20279, i64 1
+  %tmp20281 = getelementptr inbounds float* %tmp20280, i64 1
+  %tmp20282 = getelementptr inbounds float* %tmp20281, i64 1
+  %tmp20283 = getelementptr inbounds float* %tmp20282, i64 1
+  %tmp20284 = getelementptr inbounds float* %tmp20283, i64 1
+  %tmp20285 = getelementptr inbounds float* %tmp20284, i64 1
+  %tmp20286 = getelementptr inbounds float* %tmp20285, i64 1
+  %tmp20287 = getelementptr inbounds float* %tmp20286, i64 1
+  %tmp20288 = getelementptr inbounds float* %tmp20287, i64 1
+  %tmp20289 = getelementptr inbounds float* %tmp20288, i64 1
+  %tmp20290 = getelementptr inbounds float* %tmp20289, i64 1
+  %tmp20291 = getelementptr inbounds float* %tmp20290, i64 1
+  %tmp20292 = getelementptr inbounds float* %tmp20291, i64 1
+  %tmp20293 = getelementptr inbounds float* %tmp20292, i64 1
+  %tmp20294 = getelementptr inbounds float* %tmp20293, i64 1
+  %tmp20295 = getelementptr inbounds float* %tmp20294, i64 1
+  %tmp20296 = getelementptr inbounds float* %tmp20295, i64 1
+  %tmp20297 = getelementptr inbounds float* %tmp20296, i64 1
+  %tmp20298 = getelementptr inbounds float* %tmp20297, i64 1
+  %tmp20299 = getelementptr inbounds float* %tmp20298, i64 1
+  %tmp20300 = getelementptr inbounds float* %tmp20299, i64 1
+  %tmp20301 = getelementptr inbounds float* %tmp20300, i64 1
+  %tmp20302 = getelementptr inbounds float* %tmp20301, i64 1
+  %tmp20303 = getelementptr inbounds float* %tmp20302, i64 1
+  %tmp20304 = getelementptr inbounds float* %tmp20303, i64 1
+  %tmp20305 = getelementptr inbounds float* %tmp20304, i64 1
+  %tmp20306 = getelementptr inbounds float* %tmp20305, i64 1
+  %tmp20307 = getelementptr inbounds float* %tmp20306, i64 1
+  %tmp20308 = getelementptr inbounds float* %tmp20307, i64 1
+  %tmp20309 = getelementptr inbounds float* %tmp20308, i64 1
+  %tmp20310 = getelementptr inbounds float* %tmp20309, i64 1
+  %tmp20311 = getelementptr inbounds float* %tmp20310, i64 1
+  %tmp20312 = getelementptr inbounds float* %tmp20311, i64 1
+  %tmp20313 = getelementptr inbounds float* %tmp20312, i64 1
+  %tmp20314 = getelementptr inbounds float* %tmp20313, i64 1
+  %tmp20315 = getelementptr inbounds float* %tmp20314, i64 1
+  %tmp20316 = getelementptr inbounds float* %tmp20315, i64 1
+  %tmp20317 = getelementptr inbounds float* %tmp20316, i64 1
+  %tmp20318 = getelementptr inbounds float* %tmp20317, i64 1
+  %tmp20319 = getelementptr inbounds float* %tmp20318, i64 1
+  %tmp20320 = getelementptr inbounds float* %tmp20319, i64 1
+  %tmp20321 = getelementptr inbounds float* %tmp20320, i64 1
+  %tmp20322 = getelementptr inbounds float* %tmp20321, i64 1
+  %tmp20323 = getelementptr inbounds float* %tmp20322, i64 1
+  %tmp20324 = getelementptr inbounds float* %tmp20323, i64 1
+  %tmp20325 = getelementptr inbounds float* %tmp20324, i64 1
+  %tmp20326 = getelementptr inbounds float* %tmp20325, i64 1
+  %tmp20327 = getelementptr inbounds float* %tmp20326, i64 1
+  %tmp20328 = getelementptr inbounds float* %tmp20327, i64 1
+  %tmp20329 = getelementptr inbounds float* %tmp20328, i64 1
+  %tmp20330 = getelementptr inbounds float* %tmp20329, i64 1
+  %tmp20331 = getelementptr inbounds float* %tmp20330, i64 1
+  %tmp20332 = getelementptr inbounds float* %tmp20331, i64 1
+  %tmp20333 = getelementptr inbounds float* %tmp20332, i64 1
+  %tmp20334 = getelementptr inbounds float* %tmp20333, i64 1
+  %tmp20335 = getelementptr inbounds float* %tmp20334, i64 1
+  %tmp20336 = getelementptr inbounds float* %tmp20335, i64 1
+  %tmp20337 = getelementptr inbounds float* %tmp20336, i64 1
+  %tmp20338 = getelementptr inbounds float* %tmp20337, i64 1
+  %tmp20339 = getelementptr inbounds float* %tmp20338, i64 1
+  %tmp20340 = getelementptr inbounds float* %tmp20339, i64 1
+  %tmp20341 = getelementptr inbounds float* %tmp20340, i64 1
+  %tmp20342 = getelementptr inbounds float* %tmp20341, i64 1
+  %tmp20343 = getelementptr inbounds float* %tmp20342, i64 1
+  %tmp20344 = getelementptr inbounds float* %tmp20343, i64 1
+  %tmp20345 = getelementptr inbounds float* %tmp20344, i64 1
+  %tmp20346 = getelementptr inbounds float* %tmp20345, i64 1
+  %tmp20347 = getelementptr inbounds float* %tmp20346, i64 1
+  %tmp20348 = getelementptr inbounds float* %tmp20347, i64 1
+  %tmp20349 = getelementptr inbounds float* %tmp20348, i64 1
+  %tmp20350 = getelementptr inbounds float* %tmp20349, i64 1
+  %tmp20351 = getelementptr inbounds float* %tmp20350, i64 1
+  %tmp20352 = getelementptr inbounds float* %tmp20351, i64 1
+  %tmp20353 = getelementptr inbounds float* %tmp20352, i64 1
+  %tmp20354 = getelementptr inbounds float* %tmp20353, i64 1
+  %tmp20355 = getelementptr inbounds float* %tmp20354, i64 1
+  %tmp20356 = getelementptr inbounds float* %tmp20355, i64 1
+  %tmp20357 = getelementptr inbounds float* %tmp20356, i64 1
+  %tmp20358 = getelementptr inbounds float* %tmp20357, i64 1
+  %tmp20359 = getelementptr inbounds float* %tmp20358, i64 1
+  %tmp20360 = getelementptr inbounds float* %tmp20359, i64 1
+  %tmp20361 = getelementptr inbounds float* %tmp20360, i64 1
+  %tmp20362 = getelementptr inbounds float* %tmp20361, i64 1
+  %tmp20363 = getelementptr inbounds float* %tmp20362, i64 1
+  %tmp20364 = getelementptr inbounds float* %tmp20363, i64 1
+  %tmp20365 = getelementptr inbounds float* %tmp20364, i64 1
+  %tmp20366 = getelementptr inbounds float* %tmp20365, i64 1
+  %tmp20367 = getelementptr inbounds float* %tmp20366, i64 1
+  %tmp20368 = getelementptr inbounds float* %tmp20367, i64 1
+  %tmp20369 = getelementptr inbounds float* %tmp20368, i64 1
+  %tmp20370 = getelementptr inbounds float* %tmp20369, i64 1
+  %tmp20371 = getelementptr inbounds float* %tmp20370, i64 1
+  %tmp20372 = getelementptr inbounds float* %tmp20371, i64 1
+  %tmp20373 = getelementptr inbounds float* %tmp20372, i64 1
+  %tmp20374 = getelementptr inbounds float* %tmp20373, i64 1
+  %tmp20375 = getelementptr inbounds float* %tmp20374, i64 1
+  %tmp20376 = getelementptr inbounds float* %tmp20375, i64 1
+  %tmp20377 = getelementptr inbounds float* %tmp20376, i64 1
+  %tmp20378 = getelementptr inbounds float* %tmp20377, i64 1
+  %tmp20379 = getelementptr inbounds float* %tmp20378, i64 1
+  %tmp20380 = getelementptr inbounds float* %tmp20379, i64 1
+  %tmp20381 = getelementptr inbounds float* %tmp20380, i64 1
+  %tmp20382 = getelementptr inbounds float* %tmp20381, i64 1
+  %tmp20383 = getelementptr inbounds float* %tmp20382, i64 1
+  %tmp20384 = getelementptr inbounds float* %tmp20383, i64 1
+  %tmp20385 = getelementptr inbounds float* %tmp20384, i64 1
+  %tmp20386 = getelementptr inbounds float* %tmp20385, i64 1
+  %tmp20387 = getelementptr inbounds float* %tmp20386, i64 1
+  %tmp20388 = getelementptr inbounds float* %tmp20387, i64 1
+  %tmp20389 = getelementptr inbounds float* %tmp20388, i64 1
+  %tmp20390 = getelementptr inbounds float* %tmp20389, i64 1
+  %tmp20391 = getelementptr inbounds float* %tmp20390, i64 1
+  %tmp20392 = getelementptr inbounds float* %tmp20391, i64 1
+  %tmp20393 = getelementptr inbounds float* %tmp20392, i64 1
+  %tmp20394 = getelementptr inbounds float* %tmp20393, i64 1
+  %tmp20395 = getelementptr inbounds float* %tmp20394, i64 1
+  %tmp20396 = getelementptr inbounds float* %tmp20395, i64 1
+  %tmp20397 = getelementptr inbounds float* %tmp20396, i64 1
+  %tmp20398 = getelementptr inbounds float* %tmp20397, i64 1
+  %tmp20399 = getelementptr inbounds float* %tmp20398, i64 1
+  %tmp20400 = getelementptr inbounds float* %tmp20399, i64 1
+  %tmp20401 = getelementptr inbounds float* %tmp20400, i64 1
+  %tmp20402 = getelementptr inbounds float* %tmp20401, i64 1
+  %tmp20403 = getelementptr inbounds float* %tmp20402, i64 1
+  %tmp20404 = getelementptr inbounds float* %tmp20403, i64 1
+  %tmp20405 = getelementptr inbounds float* %tmp20404, i64 1
+  %tmp20406 = getelementptr inbounds float* %tmp20405, i64 1
+  %tmp20407 = getelementptr inbounds float* %tmp20406, i64 1
+  %tmp20408 = getelementptr inbounds float* %tmp20407, i64 1
+  %tmp20409 = getelementptr inbounds float* %tmp20408, i64 1
+  %tmp20410 = getelementptr inbounds float* %tmp20409, i64 1
+  %tmp20411 = getelementptr inbounds float* %tmp20410, i64 1
+  %tmp20412 = getelementptr inbounds float* %tmp20411, i64 1
+  %tmp20413 = getelementptr inbounds float* %tmp20412, i64 1
+  %tmp20414 = getelementptr inbounds float* %tmp20413, i64 1
+  %tmp20415 = getelementptr inbounds float* %tmp20414, i64 1
+  %tmp20416 = getelementptr inbounds float* %tmp20415, i64 1
+  %tmp20417 = getelementptr inbounds float* %tmp20416, i64 1
+  %tmp20418 = getelementptr inbounds float* %tmp20417, i64 1
+  %tmp20419 = getelementptr inbounds float* %tmp20418, i64 1
+  %tmp20420 = getelementptr inbounds float* %tmp20419, i64 1
+  %tmp20421 = getelementptr inbounds float* %tmp20420, i64 1
+  %tmp20422 = getelementptr inbounds float* %tmp20421, i64 1
+  %tmp20423 = getelementptr inbounds float* %tmp20422, i64 1
+  %tmp20424 = getelementptr inbounds float* %tmp20423, i64 1
+  %tmp20425 = getelementptr inbounds float* %tmp20424, i64 1
+  %tmp20426 = getelementptr inbounds float* %tmp20425, i64 1
+  %tmp20427 = getelementptr inbounds float* %tmp20426, i64 1
+  %tmp20428 = getelementptr inbounds float* %tmp20427, i64 1
+  %tmp20429 = getelementptr inbounds float* %tmp20428, i64 1
+  %tmp20430 = getelementptr inbounds float* %tmp20429, i64 1
+  %tmp20431 = getelementptr inbounds float* %tmp20430, i64 1
+  %tmp20432 = getelementptr inbounds float* %tmp20431, i64 1
+  %tmp20433 = getelementptr inbounds float* %tmp20432, i64 1
+  %tmp20434 = getelementptr inbounds float* %tmp20433, i64 1
+  %tmp20435 = getelementptr inbounds float* %tmp20434, i64 1
+  %tmp20436 = getelementptr inbounds float* %tmp20435, i64 1
+  %tmp20437 = getelementptr inbounds float* %tmp20436, i64 1
+  %tmp20438 = getelementptr inbounds float* %tmp20437, i64 1
+  %tmp20439 = getelementptr inbounds float* %tmp20438, i64 1
+  %tmp20440 = getelementptr inbounds float* %tmp20439, i64 1
+  %tmp20441 = getelementptr inbounds float* %tmp20440, i64 1
+  %tmp20442 = getelementptr inbounds float* %tmp20441, i64 1
+  %tmp20443 = getelementptr inbounds float* %tmp20442, i64 1
+  %tmp20444 = getelementptr inbounds float* %tmp20443, i64 1
+  %tmp20445 = getelementptr inbounds float* %tmp20444, i64 1
+  %tmp20446 = getelementptr inbounds float* %tmp20445, i64 1
+  %tmp20447 = getelementptr inbounds float* %tmp20446, i64 1
+  %tmp20448 = getelementptr inbounds float* %tmp20447, i64 1
+  %tmp20449 = getelementptr inbounds float* %tmp20448, i64 1
+  %tmp20450 = getelementptr inbounds float* %tmp20449, i64 1
+  %tmp20451 = getelementptr inbounds float* %tmp20450, i64 1
+  %tmp20452 = getelementptr inbounds float* %tmp20451, i64 1
+  %tmp20453 = getelementptr inbounds float* %tmp20452, i64 1
+  %tmp20454 = getelementptr inbounds float* %tmp20453, i64 1
+  %tmp20455 = getelementptr inbounds float* %tmp20454, i64 1
+  %tmp20456 = getelementptr inbounds float* %tmp20455, i64 1
+  %tmp20457 = getelementptr inbounds float* %tmp20456, i64 1
+  %tmp20458 = getelementptr inbounds float* %tmp20457, i64 1
+  %tmp20459 = getelementptr inbounds float* %tmp20458, i64 1
+  %tmp20460 = getelementptr inbounds float* %tmp20459, i64 1
+  %tmp20461 = getelementptr inbounds float* %tmp20460, i64 1
+  %tmp20462 = getelementptr inbounds float* %tmp20461, i64 1
+  %tmp20463 = getelementptr inbounds float* %tmp20462, i64 1
+  %tmp20464 = getelementptr inbounds float* %tmp20463, i64 1
+  %tmp20465 = getelementptr inbounds float* %tmp20464, i64 1
+  %tmp20466 = getelementptr inbounds float* %tmp20465, i64 1
+  %tmp20467 = getelementptr inbounds float* %tmp20466, i64 1
+  %tmp20468 = getelementptr inbounds float* %tmp20467, i64 1
+  %tmp20469 = getelementptr inbounds float* %tmp20468, i64 1
+  %tmp20470 = getelementptr inbounds float* %tmp20469, i64 1
+  %tmp20471 = getelementptr inbounds float* %tmp20470, i64 1
+  %tmp20472 = getelementptr inbounds float* %tmp20471, i64 1
+  %tmp20473 = getelementptr inbounds float* %tmp20472, i64 1
+  %tmp20474 = getelementptr inbounds float* %tmp20473, i64 1
+  %tmp20475 = getelementptr inbounds float* %tmp20474, i64 1
+  %tmp20476 = getelementptr inbounds float* %tmp20475, i64 1
+  %tmp20477 = getelementptr inbounds float* %tmp20476, i64 1
+  %tmp20478 = getelementptr inbounds float* %tmp20477, i64 1
+  %tmp20479 = getelementptr inbounds float* %tmp20478, i64 1
+  %tmp20480 = getelementptr inbounds float* %tmp20479, i64 1
+  %tmp20481 = getelementptr inbounds float* %tmp20480, i64 1
+  %tmp20482 = getelementptr inbounds float* %tmp20481, i64 1
+  %tmp20483 = getelementptr inbounds float* %tmp20482, i64 1
+  %tmp20484 = getelementptr inbounds float* %tmp20483, i64 1
+  %tmp20485 = getelementptr inbounds float* %tmp20484, i64 1
+  %tmp20486 = getelementptr inbounds float* %tmp20485, i64 1
+  %tmp20487 = getelementptr inbounds float* %tmp20486, i64 1
+  %tmp20488 = getelementptr inbounds float* %tmp20487, i64 1
+  %tmp20489 = getelementptr inbounds float* %tmp20488, i64 1
+  %tmp20490 = getelementptr inbounds float* %tmp20489, i64 1
+  %tmp20491 = getelementptr inbounds float* %tmp20490, i64 1
+  %tmp20492 = getelementptr inbounds float* %tmp20491, i64 1
+  %tmp20493 = getelementptr inbounds float* %tmp20492, i64 1
+  %tmp20494 = getelementptr inbounds float* %tmp20493, i64 1
+  %tmp20495 = getelementptr inbounds float* %tmp20494, i64 1
+  %tmp20496 = getelementptr inbounds float* %tmp20495, i64 1
+  %tmp20497 = getelementptr inbounds float* %tmp20496, i64 1
+  %tmp20498 = getelementptr inbounds float* %tmp20497, i64 1
+  %tmp20499 = getelementptr inbounds float* %tmp20498, i64 1
+  %tmp20500 = getelementptr inbounds float* %tmp20499, i64 1
+  %tmp20501 = getelementptr inbounds float* %tmp20500, i64 1
+  %tmp20502 = getelementptr inbounds float* %tmp20501, i64 1
+  %tmp20503 = getelementptr inbounds float* %tmp20502, i64 1
+  %tmp20504 = getelementptr inbounds float* %tmp20503, i64 1
+  %tmp20505 = getelementptr inbounds float* %tmp20504, i64 1
+  %tmp20506 = getelementptr inbounds float* %tmp20505, i64 1
+  %tmp20507 = getelementptr inbounds float* %tmp20506, i64 1
+  %tmp20508 = getelementptr inbounds float* %tmp20507, i64 1
+  %tmp20509 = getelementptr inbounds float* %tmp20508, i64 1
+  %tmp20510 = getelementptr inbounds float* %tmp20509, i64 1
+  %tmp20511 = getelementptr inbounds float* %tmp20510, i64 1
+  %tmp20512 = getelementptr inbounds float* %tmp20511, i64 1
+  %tmp20513 = getelementptr inbounds float* %tmp20512, i64 1
+  %tmp20514 = getelementptr inbounds float* %tmp20513, i64 1
+  %tmp20515 = getelementptr inbounds float* %tmp20514, i64 1
+  %tmp20516 = getelementptr inbounds float* %tmp20515, i64 1
+  %tmp20517 = getelementptr inbounds float* %tmp20516, i64 1
+  %tmp20518 = getelementptr inbounds float* %tmp20517, i64 1
+  %tmp20519 = getelementptr inbounds float* %tmp20518, i64 1
+  %tmp20520 = getelementptr inbounds float* %tmp20519, i64 1
+  %tmp20521 = getelementptr inbounds float* %tmp20520, i64 1
+  %tmp20522 = getelementptr inbounds float* %tmp20521, i64 1
+  %tmp20523 = getelementptr inbounds float* %tmp20522, i64 1
+  %tmp20524 = getelementptr inbounds float* %tmp20523, i64 1
+  %tmp20525 = getelementptr inbounds float* %tmp20524, i64 1
+  %tmp20526 = getelementptr inbounds float* %tmp20525, i64 1
+  %tmp20527 = getelementptr inbounds float* %tmp20526, i64 1
+  %tmp20528 = getelementptr inbounds float* %tmp20527, i64 1
+  %tmp20529 = getelementptr inbounds float* %tmp20528, i64 1
+  %tmp20530 = getelementptr inbounds float* %tmp20529, i64 1
+  %tmp20531 = getelementptr inbounds float* %tmp20530, i64 1
+  %tmp20532 = getelementptr inbounds float* %tmp20531, i64 1
+  %tmp20533 = getelementptr inbounds float* %tmp20532, i64 1
+  %tmp20534 = getelementptr inbounds float* %tmp20533, i64 1
+  %tmp20535 = getelementptr inbounds float* %tmp20534, i64 1
+  %tmp20536 = getelementptr inbounds float* %tmp20535, i64 1
+  %tmp20537 = getelementptr inbounds float* %tmp20536, i64 1
+  %tmp20538 = getelementptr inbounds float* %tmp20537, i64 1
+  %tmp20539 = getelementptr inbounds float* %tmp20538, i64 1
+  %tmp20540 = getelementptr inbounds float* %tmp20539, i64 1
+  %tmp20541 = getelementptr inbounds float* %tmp20540, i64 1
+  %tmp20542 = getelementptr inbounds float* %tmp20541, i64 1
+  %tmp20543 = getelementptr inbounds float* %tmp20542, i64 1
+  %tmp20544 = getelementptr inbounds float* %tmp20543, i64 1
+  %tmp20545 = getelementptr inbounds float* %tmp20544, i64 1
+  %tmp20546 = getelementptr inbounds float* %tmp20545, i64 1
+  %tmp20547 = getelementptr inbounds float* %tmp20546, i64 1
+  %tmp20548 = getelementptr inbounds float* %tmp20547, i64 1
+  %tmp20549 = getelementptr inbounds float* %tmp20548, i64 1
+  %tmp20550 = getelementptr inbounds float* %tmp20549, i64 1
+  %tmp20551 = getelementptr inbounds float* %tmp20550, i64 1
+  %tmp20552 = getelementptr inbounds float* %tmp20551, i64 1
+  %tmp20553 = getelementptr inbounds float* %tmp20552, i64 1
+  %tmp20554 = getelementptr inbounds float* %tmp20553, i64 1
+  %tmp20555 = getelementptr inbounds float* %tmp20554, i64 1
+  %tmp20556 = getelementptr inbounds float* %tmp20555, i64 1
+  %tmp20557 = getelementptr inbounds float* %tmp20556, i64 1
+  %tmp20558 = getelementptr inbounds float* %tmp20557, i64 1
+  %tmp20559 = getelementptr inbounds float* %tmp20558, i64 1
+  %tmp20560 = getelementptr inbounds float* %tmp20559, i64 1
+  %tmp20561 = getelementptr inbounds float* %tmp20560, i64 1
+  %tmp20562 = getelementptr inbounds float* %tmp20561, i64 1
+  %tmp20563 = getelementptr inbounds float* %tmp20562, i64 1
+  %tmp20564 = getelementptr inbounds float* %tmp20563, i64 1
+  %tmp20565 = getelementptr inbounds float* %tmp20564, i64 1
+  %tmp20566 = getelementptr inbounds float* %tmp20565, i64 1
+  %tmp20567 = getelementptr inbounds float* %tmp20566, i64 1
+  %tmp20568 = getelementptr inbounds float* %tmp20567, i64 1
+  %tmp20569 = getelementptr inbounds float* %tmp20568, i64 1
+  %tmp20570 = getelementptr inbounds float* %tmp20569, i64 1
+  %tmp20571 = getelementptr inbounds float* %tmp20570, i64 1
+  %tmp20572 = getelementptr inbounds float* %tmp20571, i64 1
+  %tmp20573 = getelementptr inbounds float* %tmp20572, i64 1
+  %tmp20574 = getelementptr inbounds float* %tmp20573, i64 1
+  %tmp20575 = getelementptr inbounds float* %tmp20574, i64 1
+  %tmp20576 = getelementptr inbounds float* %tmp20575, i64 1
+  %tmp20577 = getelementptr inbounds float* %tmp20576, i64 1
+  %tmp20578 = getelementptr inbounds float* %tmp20577, i64 1
+  %tmp20579 = getelementptr inbounds float* %tmp20578, i64 1
+  %tmp20580 = getelementptr inbounds float* %tmp20579, i64 1
+  %tmp20581 = getelementptr inbounds float* %tmp20580, i64 1
+  %tmp20582 = getelementptr inbounds float* %tmp20581, i64 1
+  %tmp20583 = getelementptr inbounds float* %tmp20582, i64 1
+  %tmp20584 = getelementptr inbounds float* %tmp20583, i64 1
+  %tmp20585 = getelementptr inbounds float* %tmp20584, i64 1
+  %tmp20586 = getelementptr inbounds float* %tmp20585, i64 1
+  %tmp20587 = getelementptr inbounds float* %tmp20586, i64 1
+  %tmp20588 = getelementptr inbounds float* %tmp20587, i64 1
+  %tmp20589 = getelementptr inbounds float* %tmp20588, i64 1
+  %tmp20590 = getelementptr inbounds float* %tmp20589, i64 1
+  %tmp20591 = getelementptr inbounds float* %tmp20590, i64 1
+  %tmp20592 = getelementptr inbounds float* %tmp20591, i64 1
+  %tmp20593 = getelementptr inbounds float* %tmp20592, i64 1
+  %tmp20594 = getelementptr inbounds float* %tmp20593, i64 1
+  %tmp20595 = getelementptr inbounds float* %tmp20594, i64 1
+  %tmp20596 = getelementptr inbounds float* %tmp20595, i64 1
+  %tmp20597 = getelementptr inbounds float* %tmp20596, i64 1
+  %tmp20598 = getelementptr inbounds float* %tmp20597, i64 1
+  %tmp20599 = getelementptr inbounds float* %tmp20598, i64 1
+  %tmp20600 = getelementptr inbounds float* %tmp20599, i64 1
+  %tmp20601 = getelementptr inbounds float* %tmp20600, i64 1
+  %tmp20602 = getelementptr inbounds float* %tmp20601, i64 1
+  %tmp20603 = getelementptr inbounds float* %tmp20602, i64 1
+  %tmp20604 = getelementptr inbounds float* %tmp20603, i64 1
+  %tmp20605 = getelementptr inbounds float* %tmp20604, i64 1
+  %tmp20606 = getelementptr inbounds float* %tmp20605, i64 1
+  %tmp20607 = getelementptr inbounds float* %tmp20606, i64 1
+  %tmp20608 = getelementptr inbounds float* %tmp20607, i64 1
+  %tmp20609 = getelementptr inbounds float* %tmp20608, i64 1
+  %tmp20610 = getelementptr inbounds float* %tmp20609, i64 1
+  %tmp20611 = getelementptr inbounds float* %tmp20610, i64 1
+  %tmp20612 = getelementptr inbounds float* %tmp20611, i64 1
+  %tmp20613 = getelementptr inbounds float* %tmp20612, i64 1
+  %tmp20614 = getelementptr inbounds float* %tmp20613, i64 1
+  %tmp20615 = getelementptr inbounds float* %tmp20614, i64 1
+  %tmp20616 = getelementptr inbounds float* %tmp20615, i64 1
+  %tmp20617 = getelementptr inbounds float* %tmp20616, i64 1
+  %tmp20618 = getelementptr inbounds float* %tmp20617, i64 1
+  %tmp20619 = getelementptr inbounds float* %tmp20618, i64 1
+  %tmp20620 = getelementptr inbounds float* %tmp20619, i64 1
+  %tmp20621 = getelementptr inbounds float* %tmp20620, i64 1
+  %tmp20622 = getelementptr inbounds float* %tmp20621, i64 1
+  %tmp20623 = getelementptr inbounds float* %tmp20622, i64 1
+  %tmp20624 = getelementptr inbounds float* %tmp20623, i64 1
+  %tmp20625 = getelementptr inbounds float* %tmp20624, i64 1
+  %tmp20626 = getelementptr inbounds float* %tmp20625, i64 1
+  %tmp20627 = getelementptr inbounds float* %tmp20626, i64 1
+  %tmp20628 = getelementptr inbounds float* %tmp20627, i64 1
+  %tmp20629 = getelementptr inbounds float* %tmp20628, i64 1
+  %tmp20630 = getelementptr inbounds float* %tmp20629, i64 1
+  %tmp20631 = getelementptr inbounds float* %tmp20630, i64 1
+  %tmp20632 = getelementptr inbounds float* %tmp20631, i64 1
+  %tmp20633 = getelementptr inbounds float* %tmp20632, i64 1
+  %tmp20634 = getelementptr inbounds float* %tmp20633, i64 1
+  %tmp20635 = getelementptr inbounds float* %tmp20634, i64 1
+  %tmp20636 = getelementptr inbounds float* %tmp20635, i64 1
+  %tmp20637 = getelementptr inbounds float* %tmp20636, i64 1
+  %tmp20638 = getelementptr inbounds float* %tmp20637, i64 1
+  %tmp20639 = getelementptr inbounds float* %tmp20638, i64 1
+  %tmp20640 = getelementptr inbounds float* %tmp20639, i64 1
+  %tmp20641 = getelementptr inbounds float* %tmp20640, i64 1
+  %tmp20642 = getelementptr inbounds float* %tmp20641, i64 1
+  %tmp20643 = getelementptr inbounds float* %tmp20642, i64 1
+  %tmp20644 = getelementptr inbounds float* %tmp20643, i64 1
+  %tmp20645 = getelementptr inbounds float* %tmp20644, i64 1
+  %tmp20646 = getelementptr inbounds float* %tmp20645, i64 1
+  %tmp20647 = getelementptr inbounds float* %tmp20646, i64 1
+  %tmp20648 = getelementptr inbounds float* %tmp20647, i64 1
+  %tmp20649 = getelementptr inbounds float* %tmp20648, i64 1
+  %tmp20650 = getelementptr inbounds float* %tmp20649, i64 1
+  %tmp20651 = getelementptr inbounds float* %tmp20650, i64 1
+  %tmp20652 = getelementptr inbounds float* %tmp20651, i64 1
+  %tmp20653 = getelementptr inbounds float* %tmp20652, i64 1
+  %tmp20654 = getelementptr inbounds float* %tmp20653, i64 1
+  %tmp20655 = getelementptr inbounds float* %tmp20654, i64 1
+  %tmp20656 = getelementptr inbounds float* %tmp20655, i64 1
+  %tmp20657 = getelementptr inbounds float* %tmp20656, i64 1
+  %tmp20658 = getelementptr inbounds float* %tmp20657, i64 1
+  %tmp20659 = getelementptr inbounds float* %tmp20658, i64 1
+  %tmp20660 = getelementptr inbounds float* %tmp20659, i64 1
+  %tmp20661 = getelementptr inbounds float* %tmp20660, i64 1
+  %tmp20662 = getelementptr inbounds float* %tmp20661, i64 1
+  %tmp20663 = getelementptr inbounds float* %tmp20662, i64 1
+  %tmp20664 = getelementptr inbounds float* %tmp20663, i64 1
+  %tmp20665 = getelementptr inbounds float* %tmp20664, i64 1
+  %tmp20666 = getelementptr inbounds float* %tmp20665, i64 1
+  %tmp20667 = getelementptr inbounds float* %tmp20666, i64 1
+  %tmp20668 = getelementptr inbounds float* %tmp20667, i64 1
+  %tmp20669 = getelementptr inbounds float* %tmp20668, i64 1
+  %tmp20670 = getelementptr inbounds float* %tmp20669, i64 1
+  %tmp20671 = getelementptr inbounds float* %tmp20670, i64 1
+  %tmp20672 = getelementptr inbounds float* %tmp20671, i64 1
+  %tmp20673 = getelementptr inbounds float* %tmp20672, i64 1
+  %tmp20674 = getelementptr inbounds float* %tmp20673, i64 1
+  %tmp20675 = getelementptr inbounds float* %tmp20674, i64 1
+  %tmp20676 = getelementptr inbounds float* %tmp20675, i64 1
+  %tmp20677 = getelementptr inbounds float* %tmp20676, i64 1
+  %tmp20678 = getelementptr inbounds float* %tmp20677, i64 1
+  %tmp20679 = getelementptr inbounds float* %tmp20678, i64 1
+  %tmp20680 = getelementptr inbounds float* %tmp20679, i64 1
+  %tmp20681 = getelementptr inbounds float* %tmp20680, i64 1
+  %tmp20682 = getelementptr inbounds float* %tmp20681, i64 1
+  %tmp20683 = getelementptr inbounds float* %tmp20682, i64 1
+  %tmp20684 = getelementptr inbounds float* %tmp20683, i64 1
+  %tmp20685 = getelementptr inbounds float* %tmp20684, i64 1
+  %tmp20686 = getelementptr inbounds float* %tmp20685, i64 1
+  %tmp20687 = getelementptr inbounds float* %tmp20686, i64 1
+  %tmp20688 = getelementptr inbounds float* %tmp20687, i64 1
+  %tmp20689 = getelementptr inbounds float* %tmp20688, i64 1
+  %tmp20690 = getelementptr inbounds float* %tmp20689, i64 1
+  %tmp20691 = getelementptr inbounds float* %tmp20690, i64 1
+  %tmp20692 = getelementptr inbounds float* %tmp20691, i64 1
+  %tmp20693 = getelementptr inbounds float* %tmp20692, i64 1
+  %tmp20694 = getelementptr inbounds float* %tmp20693, i64 1
+  %tmp20695 = getelementptr inbounds float* %tmp20694, i64 1
+  %tmp20696 = getelementptr inbounds float* %tmp20695, i64 1
+  %tmp20697 = getelementptr inbounds float* %tmp20696, i64 1
+  %tmp20698 = getelementptr inbounds float* %tmp20697, i64 1
+  %tmp20699 = getelementptr inbounds float* %tmp20698, i64 1
+  %tmp20700 = getelementptr inbounds float* %tmp20699, i64 1
+  %tmp20701 = getelementptr inbounds float* %tmp20700, i64 1
+  %tmp20702 = getelementptr inbounds float* %tmp20701, i64 1
+  %tmp20703 = getelementptr inbounds float* %tmp20702, i64 1
+  %tmp20704 = getelementptr inbounds float* %tmp20703, i64 1
+  %tmp20705 = getelementptr inbounds float* %tmp20704, i64 1
+  %tmp20706 = getelementptr inbounds float* %tmp20705, i64 1
+  %tmp20707 = getelementptr inbounds float* %tmp20706, i64 1
+  %tmp20708 = getelementptr inbounds float* %tmp20707, i64 1
+  %tmp20709 = getelementptr inbounds float* %tmp20708, i64 1
+  %tmp20710 = getelementptr inbounds float* %tmp20709, i64 1
+  %tmp20711 = getelementptr inbounds float* %tmp20710, i64 1
+  %tmp20712 = getelementptr inbounds float* %tmp20711, i64 1
+  %tmp20713 = getelementptr inbounds float* %tmp20712, i64 1
+  %tmp20714 = getelementptr inbounds float* %tmp20713, i64 1
+  %tmp20715 = getelementptr inbounds float* %tmp20714, i64 1
+  %tmp20716 = getelementptr inbounds float* %tmp20715, i64 1
+  %tmp20717 = getelementptr inbounds float* %tmp20716, i64 1
+  %tmp20718 = getelementptr inbounds float* %tmp20717, i64 1
+  %tmp20719 = getelementptr inbounds float* %tmp20718, i64 1
+  %tmp20720 = getelementptr inbounds float* %tmp20719, i64 1
+  %tmp20721 = getelementptr inbounds float* %tmp20720, i64 1
+  %tmp20722 = getelementptr inbounds float* %tmp20721, i64 1
+  %tmp20723 = getelementptr inbounds float* %tmp20722, i64 1
+  %tmp20724 = getelementptr inbounds float* %tmp20723, i64 1
+  %tmp20725 = getelementptr inbounds float* %tmp20724, i64 1
+  %tmp20726 = getelementptr inbounds float* %tmp20725, i64 1
+  %tmp20727 = getelementptr inbounds float* %tmp20726, i64 1
+  %tmp20728 = getelementptr inbounds float* %tmp20727, i64 1
+  %tmp20729 = getelementptr inbounds float* %tmp20728, i64 1
+  %tmp20730 = getelementptr inbounds float* %tmp20729, i64 1
+  %tmp20731 = getelementptr inbounds float* %tmp20730, i64 1
+  %tmp20732 = getelementptr inbounds float* %tmp20731, i64 1
+  %tmp20733 = getelementptr inbounds float* %tmp20732, i64 1
+  %tmp20734 = getelementptr inbounds float* %tmp20733, i64 1
+  %tmp20735 = getelementptr inbounds float* %tmp20734, i64 1
+  %tmp20736 = getelementptr inbounds float* %tmp20735, i64 1
+  %tmp20737 = getelementptr inbounds float* %tmp20736, i64 1
+  %tmp20738 = getelementptr inbounds float* %tmp20737, i64 1
+  %tmp20739 = getelementptr inbounds float* %tmp20738, i64 1
+  %tmp20740 = getelementptr inbounds float* %tmp20739, i64 1
+  %tmp20741 = getelementptr inbounds float* %tmp20740, i64 1
+  %tmp20742 = getelementptr inbounds float* %tmp20741, i64 1
+  %tmp20743 = getelementptr inbounds float* %tmp20742, i64 1
+  %tmp20744 = getelementptr inbounds float* %tmp20743, i64 1
+  %tmp20745 = getelementptr inbounds float* %tmp20744, i64 1
+  %tmp20746 = getelementptr inbounds float* %tmp20745, i64 1
+  %tmp20747 = getelementptr inbounds float* %tmp20746, i64 1
+  %tmp20748 = getelementptr inbounds float* %tmp20747, i64 1
+  %tmp20749 = getelementptr inbounds float* %tmp20748, i64 1
+  %tmp20750 = getelementptr inbounds float* %tmp20749, i64 1
+  %tmp20751 = getelementptr inbounds float* %tmp20750, i64 1
+  %tmp20752 = getelementptr inbounds float* %tmp20751, i64 1
+  %tmp20753 = getelementptr inbounds float* %tmp20752, i64 1
+  %tmp20754 = getelementptr inbounds float* %tmp20753, i64 1
+  %tmp20755 = getelementptr inbounds float* %tmp20754, i64 1
+  %tmp20756 = getelementptr inbounds float* %tmp20755, i64 1
+  %tmp20757 = getelementptr inbounds float* %tmp20756, i64 1
+  %tmp20758 = getelementptr inbounds float* %tmp20757, i64 1
+  %tmp20759 = getelementptr inbounds float* %tmp20758, i64 1
+  %tmp20760 = getelementptr inbounds float* %tmp20759, i64 1
+  %tmp20761 = getelementptr inbounds float* %tmp20760, i64 1
+  %tmp20762 = getelementptr inbounds float* %tmp20761, i64 1
+  %tmp20763 = getelementptr inbounds float* %tmp20762, i64 1
+  %tmp20764 = getelementptr inbounds float* %tmp20763, i64 1
+  %tmp20765 = getelementptr inbounds float* %tmp20764, i64 1
+  %tmp20766 = getelementptr inbounds float* %tmp20765, i64 1
+  %tmp20767 = getelementptr inbounds float* %tmp20766, i64 1
+  %tmp20768 = getelementptr inbounds float* %tmp20767, i64 1
+  %tmp20769 = getelementptr inbounds float* %tmp20768, i64 1
+  %tmp20770 = getelementptr inbounds float* %tmp20769, i64 1
+  %tmp20771 = getelementptr inbounds float* %tmp20770, i64 1
+  %tmp20772 = getelementptr inbounds float* %tmp20771, i64 1
+  %tmp20773 = getelementptr inbounds float* %tmp20772, i64 1
+  %tmp20774 = getelementptr inbounds float* %tmp20773, i64 1
+  %tmp20775 = getelementptr inbounds float* %tmp20774, i64 1
+  %tmp20776 = getelementptr inbounds float* %tmp20775, i64 1
+  %tmp20777 = getelementptr inbounds float* %tmp20776, i64 1
+  %tmp20778 = getelementptr inbounds float* %tmp20777, i64 1
+  %tmp20779 = getelementptr inbounds float* %tmp20778, i64 1
+  %tmp20780 = getelementptr inbounds float* %tmp20779, i64 1
+  %tmp20781 = getelementptr inbounds float* %tmp20780, i64 1
+  %tmp20782 = getelementptr inbounds float* %tmp20781, i64 1
+  %tmp20783 = getelementptr inbounds float* %tmp20782, i64 1
+  %tmp20784 = getelementptr inbounds float* %tmp20783, i64 1
+  %tmp20785 = getelementptr inbounds float* %tmp20784, i64 1
+  %tmp20786 = getelementptr inbounds float* %tmp20785, i64 1
+  %tmp20787 = getelementptr inbounds float* %tmp20786, i64 1
+  %tmp20788 = getelementptr inbounds float* %tmp20787, i64 1
+  %tmp20789 = getelementptr inbounds float* %tmp20788, i64 1
+  %tmp20790 = getelementptr inbounds float* %tmp20789, i64 1
+  %tmp20791 = getelementptr inbounds float* %tmp20790, i64 1
+  %tmp20792 = getelementptr inbounds float* %tmp20791, i64 1
+  %tmp20793 = getelementptr inbounds float* %tmp20792, i64 1
+  %tmp20794 = getelementptr inbounds float* %tmp20793, i64 1
+  %tmp20795 = getelementptr inbounds float* %tmp20794, i64 1
+  %tmp20796 = getelementptr inbounds float* %tmp20795, i64 1
+  %tmp20797 = getelementptr inbounds float* %tmp20796, i64 1
+  %tmp20798 = getelementptr inbounds float* %tmp20797, i64 1
+  %tmp20799 = getelementptr inbounds float* %tmp20798, i64 1
+  %tmp20800 = getelementptr inbounds float* %tmp20799, i64 1
+  %tmp20801 = getelementptr inbounds float* %tmp20800, i64 1
+  %tmp20802 = getelementptr inbounds float* %tmp20801, i64 1
+  %tmp20803 = getelementptr inbounds float* %tmp20802, i64 1
+  %tmp20804 = getelementptr inbounds float* %tmp20803, i64 1
+  %tmp20805 = getelementptr inbounds float* %tmp20804, i64 1
+  %tmp20806 = getelementptr inbounds float* %tmp20805, i64 1
+  %tmp20807 = getelementptr inbounds float* %tmp20806, i64 1
+  %tmp20808 = getelementptr inbounds float* %tmp20807, i64 1
+  %tmp20809 = getelementptr inbounds float* %tmp20808, i64 1
+  %tmp20810 = getelementptr inbounds float* %tmp20809, i64 1
+  %tmp20811 = getelementptr inbounds float* %tmp20810, i64 1
+  %tmp20812 = getelementptr inbounds float* %tmp20811, i64 1
+  %tmp20813 = getelementptr inbounds float* %tmp20812, i64 1
+  %tmp20814 = getelementptr inbounds float* %tmp20813, i64 1
+  %tmp20815 = getelementptr inbounds float* %tmp20814, i64 1
+  %tmp20816 = getelementptr inbounds float* %tmp20815, i64 1
+  %tmp20817 = getelementptr inbounds float* %tmp20816, i64 1
+  %tmp20818 = getelementptr inbounds float* %tmp20817, i64 1
+  %tmp20819 = getelementptr inbounds float* %tmp20818, i64 1
+  %tmp20820 = getelementptr inbounds float* %tmp20819, i64 1
+  %tmp20821 = getelementptr inbounds float* %tmp20820, i64 1
+  %tmp20822 = getelementptr inbounds float* %tmp20821, i64 1
+  %tmp20823 = getelementptr inbounds float* %tmp20822, i64 1
+  %tmp20824 = getelementptr inbounds float* %tmp20823, i64 1
+  %tmp20825 = getelementptr inbounds float* %tmp20824, i64 1
+  %tmp20826 = getelementptr inbounds float* %tmp20825, i64 1
+  %tmp20827 = getelementptr inbounds float* %tmp20826, i64 1
+  %tmp20828 = getelementptr inbounds float* %tmp20827, i64 1
+  %tmp20829 = getelementptr inbounds float* %tmp20828, i64 1
+  %tmp20830 = getelementptr inbounds float* %tmp20829, i64 1
+  %tmp20831 = getelementptr inbounds float* %tmp20830, i64 1
+  %tmp20832 = getelementptr inbounds float* %tmp20831, i64 1
+  %tmp20833 = getelementptr inbounds float* %tmp20832, i64 1
+  %tmp20834 = getelementptr inbounds float* %tmp20833, i64 1
+  %tmp20835 = getelementptr inbounds float* %tmp20834, i64 1
+  %tmp20836 = getelementptr inbounds float* %tmp20835, i64 1
+  %tmp20837 = getelementptr inbounds float* %tmp20836, i64 1
+  %tmp20838 = getelementptr inbounds float* %tmp20837, i64 1
+  %tmp20839 = getelementptr inbounds float* %tmp20838, i64 1
+  %tmp20840 = getelementptr inbounds float* %tmp20839, i64 1
+  %tmp20841 = getelementptr inbounds float* %tmp20840, i64 1
+  %tmp20842 = getelementptr inbounds float* %tmp20841, i64 1
+  %tmp20843 = getelementptr inbounds float* %tmp20842, i64 1
+  %tmp20844 = getelementptr inbounds float* %tmp20843, i64 1
+  %tmp20845 = getelementptr inbounds float* %tmp20844, i64 1
+  %tmp20846 = getelementptr inbounds float* %tmp20845, i64 1
+  %tmp20847 = getelementptr inbounds float* %tmp20846, i64 1
+  %tmp20848 = getelementptr inbounds float* %tmp20847, i64 1
+  %tmp20849 = getelementptr inbounds float* %tmp20848, i64 1
+  %tmp20850 = getelementptr inbounds float* %tmp20849, i64 1
+  %tmp20851 = getelementptr inbounds float* %tmp20850, i64 1
+  %tmp20852 = getelementptr inbounds float* %tmp20851, i64 1
+  %tmp20853 = getelementptr inbounds float* %tmp20852, i64 1
+  %tmp20854 = getelementptr inbounds float* %tmp20853, i64 1
+  %tmp20855 = getelementptr inbounds float* %tmp20854, i64 1
+  %tmp20856 = getelementptr inbounds float* %tmp20855, i64 1
+  %tmp20857 = getelementptr inbounds float* %tmp20856, i64 1
+  %tmp20858 = getelementptr inbounds float* %tmp20857, i64 1
+  %tmp20859 = getelementptr inbounds float* %tmp20858, i64 1
+  %tmp20860 = getelementptr inbounds float* %tmp20859, i64 1
+  %tmp20861 = getelementptr inbounds float* %tmp20860, i64 1
+  %tmp20862 = getelementptr inbounds float* %tmp20861, i64 1
+  %tmp20863 = getelementptr inbounds float* %tmp20862, i64 1
+  %tmp20864 = getelementptr inbounds float* %tmp20863, i64 1
+  %tmp20865 = getelementptr inbounds float* %tmp20864, i64 1
+  %tmp20866 = getelementptr inbounds float* %tmp20865, i64 1
+  %tmp20867 = getelementptr inbounds float* %tmp20866, i64 1
+  %tmp20868 = getelementptr inbounds float* %tmp20867, i64 1
+  %tmp20869 = getelementptr inbounds float* %tmp20868, i64 1
+  %tmp20870 = getelementptr inbounds float* %tmp20869, i64 1
+  %tmp20871 = getelementptr inbounds float* %tmp20870, i64 1
+  %tmp20872 = getelementptr inbounds float* %tmp20871, i64 1
+  %tmp20873 = getelementptr inbounds float* %tmp20872, i64 1
+  %tmp20874 = getelementptr inbounds float* %tmp20873, i64 1
+  %tmp20875 = getelementptr inbounds float* %tmp20874, i64 1
+  %tmp20876 = getelementptr inbounds float* %tmp20875, i64 1
+  %tmp20877 = getelementptr inbounds float* %tmp20876, i64 1
+  %tmp20878 = getelementptr inbounds float* %tmp20877, i64 1
+  %tmp20879 = getelementptr inbounds float* %tmp20878, i64 1
+  %tmp20880 = getelementptr inbounds float* %tmp20879, i64 1
+  %tmp20881 = getelementptr inbounds float* %tmp20880, i64 1
+  %tmp20882 = getelementptr inbounds float* %tmp20881, i64 1
+  %tmp20883 = getelementptr inbounds float* %tmp20882, i64 1
+  %tmp20884 = getelementptr inbounds float* %tmp20883, i64 1
+  %tmp20885 = getelementptr inbounds float* %tmp20884, i64 1
+  %tmp20886 = getelementptr inbounds float* %tmp20885, i64 1
+  %tmp20887 = getelementptr inbounds float* %tmp20886, i64 1
+  %tmp20888 = getelementptr inbounds float* %tmp20887, i64 1
+  %tmp20889 = getelementptr inbounds float* %tmp20888, i64 1
+  %tmp20890 = getelementptr inbounds float* %tmp20889, i64 1
+  %tmp20891 = getelementptr inbounds float* %tmp20890, i64 1
+  %tmp20892 = getelementptr inbounds float* %tmp20891, i64 1
+  %tmp20893 = getelementptr inbounds float* %tmp20892, i64 1
+  %tmp20894 = getelementptr inbounds float* %tmp20893, i64 1
+  %tmp20895 = getelementptr inbounds float* %tmp20894, i64 1
+  %tmp20896 = getelementptr inbounds float* %tmp20895, i64 1
+  %tmp20897 = getelementptr inbounds float* %tmp20896, i64 1
+  %tmp20898 = getelementptr inbounds float* %tmp20897, i64 1
+  %tmp20899 = getelementptr inbounds float* %tmp20898, i64 1
+  %tmp20900 = getelementptr inbounds float* %tmp20899, i64 1
+  %tmp20901 = getelementptr inbounds float* %tmp20900, i64 1
+  %tmp20902 = getelementptr inbounds float* %tmp20901, i64 1
+  %tmp20903 = getelementptr inbounds float* %tmp20902, i64 1
+  %tmp20904 = getelementptr inbounds float* %tmp20903, i64 1
+  %tmp20905 = getelementptr inbounds float* %tmp20904, i64 1
+  %tmp20906 = getelementptr inbounds float* %tmp20905, i64 1
+  %tmp20907 = getelementptr inbounds float* %tmp20906, i64 1
+  %tmp20908 = getelementptr inbounds float* %tmp20907, i64 1
+  %tmp20909 = getelementptr inbounds float* %tmp20908, i64 1
+  %tmp20910 = getelementptr inbounds float* %tmp20909, i64 1
+  %tmp20911 = getelementptr inbounds float* %tmp20910, i64 1
+  %tmp20912 = getelementptr inbounds float* %tmp20911, i64 1
+  %tmp20913 = getelementptr inbounds float* %tmp20912, i64 1
+  %tmp20914 = getelementptr inbounds float* %tmp20913, i64 1
+  %tmp20915 = getelementptr inbounds float* %tmp20914, i64 1
+  %tmp20916 = getelementptr inbounds float* %tmp20915, i64 1
+  %tmp20917 = getelementptr inbounds float* %tmp20916, i64 1
+  %tmp20918 = getelementptr inbounds float* %tmp20917, i64 1
+  %tmp20919 = getelementptr inbounds float* %tmp20918, i64 1
+  %tmp20920 = getelementptr inbounds float* %tmp20919, i64 1
+  %tmp20921 = getelementptr inbounds float* %tmp20920, i64 1
+  %tmp20922 = getelementptr inbounds float* %tmp20921, i64 1
+  %tmp20923 = getelementptr inbounds float* %tmp20922, i64 1
+  %tmp20924 = getelementptr inbounds float* %tmp20923, i64 1
+  %tmp20925 = getelementptr inbounds float* %tmp20924, i64 1
+  %tmp20926 = getelementptr inbounds float* %tmp20925, i64 1
+  %tmp20927 = getelementptr inbounds float* %tmp20926, i64 1
+  %tmp20928 = getelementptr inbounds float* %tmp20927, i64 1
+  %tmp20929 = getelementptr inbounds float* %tmp20928, i64 1
+  %tmp20930 = getelementptr inbounds float* %tmp20929, i64 1
+  %tmp20931 = getelementptr inbounds float* %tmp20930, i64 1
+  %tmp20932 = getelementptr inbounds float* %tmp20931, i64 1
+  %tmp20933 = getelementptr inbounds float* %tmp20932, i64 1
+  %tmp20934 = getelementptr inbounds float* %tmp20933, i64 1
+  %tmp20935 = getelementptr inbounds float* %tmp20934, i64 1
+  %tmp20936 = getelementptr inbounds float* %tmp20935, i64 1
+  %tmp20937 = getelementptr inbounds float* %tmp20936, i64 1
+  %tmp20938 = getelementptr inbounds float* %tmp20937, i64 1
+  %tmp20939 = getelementptr inbounds float* %tmp20938, i64 1
+  %tmp20940 = getelementptr inbounds float* %tmp20939, i64 1
+  %tmp20941 = getelementptr inbounds float* %tmp20940, i64 1
+  %tmp20942 = getelementptr inbounds float* %tmp20941, i64 1
+  %tmp20943 = getelementptr inbounds float* %tmp20942, i64 1
+  %tmp20944 = getelementptr inbounds float* %tmp20943, i64 1
+  %tmp20945 = getelementptr inbounds float* %tmp20944, i64 1
+  %tmp20946 = getelementptr inbounds float* %tmp20945, i64 1
+  %tmp20947 = getelementptr inbounds float* %tmp20946, i64 1
+  %tmp20948 = getelementptr inbounds float* %tmp20947, i64 1
+  %tmp20949 = getelementptr inbounds float* %tmp20948, i64 1
+  %tmp20950 = getelementptr inbounds float* %tmp20949, i64 1
+  %tmp20951 = getelementptr inbounds float* %tmp20950, i64 1
+  %tmp20952 = getelementptr inbounds float* %tmp20951, i64 1
+  %tmp20953 = getelementptr inbounds float* %tmp20952, i64 1
+  %tmp20954 = getelementptr inbounds float* %tmp20953, i64 1
+  %tmp20955 = getelementptr inbounds float* %tmp20954, i64 1
+  %tmp20956 = getelementptr inbounds float* %tmp20955, i64 1
+  %tmp20957 = getelementptr inbounds float* %tmp20956, i64 1
+  %tmp20958 = getelementptr inbounds float* %tmp20957, i64 1
+  %tmp20959 = getelementptr inbounds float* %tmp20958, i64 1
+  %tmp20960 = getelementptr inbounds float* %tmp20959, i64 1
+  %tmp20961 = getelementptr inbounds float* %tmp20960, i64 1
+  %tmp20962 = getelementptr inbounds float* %tmp20961, i64 1
+  %tmp20963 = getelementptr inbounds float* %tmp20962, i64 1
+  %tmp20964 = getelementptr inbounds float* %tmp20963, i64 1
+  %tmp20965 = getelementptr inbounds float* %tmp20964, i64 1
+  %tmp20966 = getelementptr inbounds float* %tmp20965, i64 1
+  %tmp20967 = getelementptr inbounds float* %tmp20966, i64 1
+  %tmp20968 = getelementptr inbounds float* %tmp20967, i64 1
+  %tmp20969 = getelementptr inbounds float* %tmp20968, i64 1
+  %tmp20970 = getelementptr inbounds float* %tmp20969, i64 1
+  %tmp20971 = getelementptr inbounds float* %tmp20970, i64 1
+  %tmp20972 = getelementptr inbounds float* %tmp20971, i64 1
+  %tmp20973 = getelementptr inbounds float* %tmp20972, i64 1
+  %tmp20974 = getelementptr inbounds float* %tmp20973, i64 1
+  %tmp20975 = getelementptr inbounds float* %tmp20974, i64 1
+  %tmp20976 = getelementptr inbounds float* %tmp20975, i64 1
+  %tmp20977 = getelementptr inbounds float* %tmp20976, i64 1
+  %tmp20978 = getelementptr inbounds float* %tmp20977, i64 1
+  %tmp20979 = getelementptr inbounds float* %tmp20978, i64 1
+  %tmp20980 = getelementptr inbounds float* %tmp20979, i64 1
+  %tmp20981 = getelementptr inbounds float* %tmp20980, i64 1
+  %tmp20982 = getelementptr inbounds float* %tmp20981, i64 1
+  %tmp20983 = getelementptr inbounds float* %tmp20982, i64 1
+  %tmp20984 = getelementptr inbounds float* %tmp20983, i64 1
+  %tmp20985 = getelementptr inbounds float* %tmp20984, i64 1
+  %tmp20986 = getelementptr inbounds float* %tmp20985, i64 1
+  %tmp20987 = getelementptr inbounds float* %tmp20986, i64 1
+  %tmp20988 = getelementptr inbounds float* %tmp20987, i64 1
+  %tmp20989 = getelementptr inbounds float* %tmp20988, i64 1
+  %tmp20990 = getelementptr inbounds float* %tmp20989, i64 1
+  %tmp20991 = getelementptr inbounds float* %tmp20990, i64 1
+  %tmp20992 = getelementptr inbounds float* %tmp20991, i64 1
+  %tmp20993 = getelementptr inbounds float* %tmp20992, i64 1
+  %tmp20994 = getelementptr inbounds float* %tmp20993, i64 1
+  %tmp20995 = getelementptr inbounds float* %tmp20994, i64 1
+  %tmp20996 = getelementptr inbounds float* %tmp20995, i64 1
+  %tmp20997 = getelementptr inbounds float* %tmp20996, i64 1
+  %tmp20998 = getelementptr inbounds float* %tmp20997, i64 1
+  %tmp20999 = getelementptr inbounds float* %tmp20998, i64 1
+  %tmp21000 = getelementptr inbounds float* %tmp20999, i64 1
+  %tmp21001 = getelementptr inbounds float* %tmp21000, i64 1
+  %tmp21002 = getelementptr inbounds float* %tmp21001, i64 1
+  %tmp21003 = getelementptr inbounds float* %tmp21002, i64 1
+  %tmp21004 = getelementptr inbounds float* %tmp21003, i64 1
+  %tmp21005 = getelementptr inbounds float* %tmp21004, i64 1
+  %tmp21006 = getelementptr inbounds float* %tmp21005, i64 1
+  %tmp21007 = getelementptr inbounds float* %tmp21006, i64 1
+  %tmp21008 = getelementptr inbounds float* %tmp21007, i64 1
+  %tmp21009 = getelementptr inbounds float* %tmp21008, i64 1
+  %tmp21010 = getelementptr inbounds float* %tmp21009, i64 1
+  %tmp21011 = getelementptr inbounds float* %tmp21010, i64 1
+  %tmp21012 = getelementptr inbounds float* %tmp21011, i64 1
+  %tmp21013 = getelementptr inbounds float* %tmp21012, i64 1
+  %tmp21014 = getelementptr inbounds float* %tmp21013, i64 1
+  %tmp21015 = getelementptr inbounds float* %tmp21014, i64 1
+  %tmp21016 = getelementptr inbounds float* %tmp21015, i64 1
+  %tmp21017 = getelementptr inbounds float* %tmp21016, i64 1
+  %tmp21018 = getelementptr inbounds float* %tmp21017, i64 1
+  %tmp21019 = getelementptr inbounds float* %tmp21018, i64 1
+  %tmp21020 = getelementptr inbounds float* %tmp21019, i64 1
+  %tmp21021 = getelementptr inbounds float* %tmp21020, i64 1
+  %tmp21022 = getelementptr inbounds float* %tmp21021, i64 1
+  %tmp21023 = getelementptr inbounds float* %tmp21022, i64 1
+  %tmp21024 = getelementptr inbounds float* %tmp21023, i64 1
+  %tmp21025 = getelementptr inbounds float* %tmp21024, i64 1
+  %tmp21026 = getelementptr inbounds float* %tmp21025, i64 1
+  %tmp21027 = getelementptr inbounds float* %tmp21026, i64 1
+  %tmp21028 = getelementptr inbounds float* %tmp21027, i64 1
+  %tmp21029 = getelementptr inbounds float* %tmp21028, i64 1
+  %tmp21030 = getelementptr inbounds float* %tmp21029, i64 1
+  %tmp21031 = getelementptr inbounds float* %tmp21030, i64 1
+  %tmp21032 = getelementptr inbounds float* %tmp21031, i64 1
+  %tmp21033 = getelementptr inbounds float* %tmp21032, i64 1
+  %tmp21034 = getelementptr inbounds float* %tmp21033, i64 1
+  %tmp21035 = getelementptr inbounds float* %tmp21034, i64 1
+  %tmp21036 = getelementptr inbounds float* %tmp21035, i64 1
+  %tmp21037 = getelementptr inbounds float* %tmp21036, i64 1
+  %tmp21038 = getelementptr inbounds float* %tmp21037, i64 1
+  %tmp21039 = getelementptr inbounds float* %tmp21038, i64 1
+  %tmp21040 = getelementptr inbounds float* %tmp21039, i64 1
+  %tmp21041 = getelementptr inbounds float* %tmp21040, i64 1
+  %tmp21042 = getelementptr inbounds float* %tmp21041, i64 1
+  %tmp21043 = getelementptr inbounds float* %tmp21042, i64 1
+  %tmp21044 = getelementptr inbounds float* %tmp21043, i64 1
+  %tmp21045 = getelementptr inbounds float* %tmp21044, i64 1
+  %tmp21046 = getelementptr inbounds float* %tmp21045, i64 1
+  %tmp21047 = getelementptr inbounds float* %tmp21046, i64 1
+  %tmp21048 = getelementptr inbounds float* %tmp21047, i64 1
+  %tmp21049 = getelementptr inbounds float* %tmp21048, i64 1
+  %tmp21050 = getelementptr inbounds float* %tmp21049, i64 1
+  %tmp21051 = getelementptr inbounds float* %tmp21050, i64 1
+  %tmp21052 = getelementptr inbounds float* %tmp21051, i64 1
+  %tmp21053 = getelementptr inbounds float* %tmp21052, i64 1
+  %tmp21054 = getelementptr inbounds float* %tmp21053, i64 1
+  %tmp21055 = getelementptr inbounds float* %tmp21054, i64 1
+  %tmp21056 = getelementptr inbounds float* %tmp21055, i64 1
+  %tmp21057 = getelementptr inbounds float* %tmp21056, i64 1
+  %tmp21058 = getelementptr inbounds float* %tmp21057, i64 1
+  %tmp21059 = getelementptr inbounds float* %tmp21058, i64 1
+  %tmp21060 = getelementptr inbounds float* %tmp21059, i64 1
+  %tmp21061 = getelementptr inbounds float* %tmp21060, i64 1
+  %tmp21062 = getelementptr inbounds float* %tmp21061, i64 1
+  %tmp21063 = getelementptr inbounds float* %tmp21062, i64 1
+  %tmp21064 = getelementptr inbounds float* %tmp21063, i64 1
+  %tmp21065 = getelementptr inbounds float* %tmp21064, i64 1
+  %tmp21066 = getelementptr inbounds float* %tmp21065, i64 1
+  %tmp21067 = getelementptr inbounds float* %tmp21066, i64 1
+  %tmp21068 = getelementptr inbounds float* %tmp21067, i64 1
+  %tmp21069 = getelementptr inbounds float* %tmp21068, i64 1
+  %tmp21070 = getelementptr inbounds float* %tmp21069, i64 1
+  %tmp21071 = getelementptr inbounds float* %tmp21070, i64 1
+  %tmp21072 = getelementptr inbounds float* %tmp21071, i64 1
+  %tmp21073 = getelementptr inbounds float* %tmp21072, i64 1
+  %tmp21074 = getelementptr inbounds float* %tmp21073, i64 1
+  %tmp21075 = getelementptr inbounds float* %tmp21074, i64 1
+  %tmp21076 = getelementptr inbounds float* %tmp21075, i64 1
+  %tmp21077 = getelementptr inbounds float* %tmp21076, i64 1
+  %tmp21078 = getelementptr inbounds float* %tmp21077, i64 1
+  %tmp21079 = getelementptr inbounds float* %tmp21078, i64 1
+  %tmp21080 = getelementptr inbounds float* %tmp21079, i64 1
+  %tmp21081 = getelementptr inbounds float* %tmp21080, i64 1
+  %tmp21082 = getelementptr inbounds float* %tmp21081, i64 1
+  %tmp21083 = getelementptr inbounds float* %tmp21082, i64 1
+  %tmp21084 = getelementptr inbounds float* %tmp21083, i64 1
+  %tmp21085 = getelementptr inbounds float* %tmp21084, i64 1
+  %tmp21086 = getelementptr inbounds float* %tmp21085, i64 1
+  %tmp21087 = getelementptr inbounds float* %tmp21086, i64 1
+  %tmp21088 = getelementptr inbounds float* %tmp21087, i64 1
+  %tmp21089 = getelementptr inbounds float* %tmp21088, i64 1
+  %tmp21090 = getelementptr inbounds float* %tmp21089, i64 1
+  %tmp21091 = getelementptr inbounds float* %tmp21090, i64 1
+  %tmp21092 = getelementptr inbounds float* %tmp21091, i64 1
+  %tmp21093 = getelementptr inbounds float* %tmp21092, i64 1
+  %tmp21094 = getelementptr inbounds float* %tmp21093, i64 1
+  %tmp21095 = getelementptr inbounds float* %tmp21094, i64 1
+  %tmp21096 = getelementptr inbounds float* %tmp21095, i64 1
+  %tmp21097 = getelementptr inbounds float* %tmp21096, i64 1
+  %tmp21098 = getelementptr inbounds float* %tmp21097, i64 1
+  %tmp21099 = getelementptr inbounds float* %tmp21098, i64 1
+  %tmp21100 = getelementptr inbounds float* %tmp21099, i64 1
+  %tmp21101 = getelementptr inbounds float* %tmp21100, i64 1
+  %tmp21102 = getelementptr inbounds float* %tmp21101, i64 1
+  %tmp21103 = getelementptr inbounds float* %tmp21102, i64 1
+  %tmp21104 = getelementptr inbounds float* %tmp21103, i64 1
+  %tmp21105 = getelementptr inbounds float* %tmp21104, i64 1
+  %tmp21106 = getelementptr inbounds float* %tmp21105, i64 1
+  %tmp21107 = getelementptr inbounds float* %tmp21106, i64 1
+  %tmp21108 = getelementptr inbounds float* %tmp21107, i64 1
+  %tmp21109 = getelementptr inbounds float* %tmp21108, i64 1
+  %tmp21110 = getelementptr inbounds float* %tmp21109, i64 1
+  %tmp21111 = getelementptr inbounds float* %tmp21110, i64 1
+  %tmp21112 = getelementptr inbounds float* %tmp21111, i64 1
+  %tmp21113 = getelementptr inbounds float* %tmp21112, i64 1
+  %tmp21114 = getelementptr inbounds float* %tmp21113, i64 1
+  %tmp21115 = getelementptr inbounds float* %tmp21114, i64 1
+  %tmp21116 = getelementptr inbounds float* %tmp21115, i64 1
+  %tmp21117 = getelementptr inbounds float* %tmp21116, i64 1
+  %tmp21118 = getelementptr inbounds float* %tmp21117, i64 1
+  %tmp21119 = getelementptr inbounds float* %tmp21118, i64 1
+  %tmp21120 = getelementptr inbounds float* %tmp21119, i64 1
+  %tmp21121 = getelementptr inbounds float* %tmp21120, i64 1
+  %tmp21122 = getelementptr inbounds float* %tmp21121, i64 1
+  %tmp21123 = getelementptr inbounds float* %tmp21122, i64 1
+  %tmp21124 = getelementptr inbounds float* %tmp21123, i64 1
+  %tmp21125 = getelementptr inbounds float* %tmp21124, i64 1
+  %tmp21126 = getelementptr inbounds float* %tmp21125, i64 1
+  %tmp21127 = getelementptr inbounds float* %tmp21126, i64 1
+  %tmp21128 = getelementptr inbounds float* %tmp21127, i64 1
+  %tmp21129 = getelementptr inbounds float* %tmp21128, i64 1
+  %tmp21130 = getelementptr inbounds float* %tmp21129, i64 1
+  %tmp21131 = getelementptr inbounds float* %tmp21130, i64 1
+  %tmp21132 = getelementptr inbounds float* %tmp21131, i64 1
+  %tmp21133 = getelementptr inbounds float* %tmp21132, i64 1
+  %tmp21134 = getelementptr inbounds float* %tmp21133, i64 1
+  %tmp21135 = getelementptr inbounds float* %tmp21134, i64 1
+  %tmp21136 = getelementptr inbounds float* %tmp21135, i64 1
+  %tmp21137 = getelementptr inbounds float* %tmp21136, i64 1
+  %tmp21138 = getelementptr inbounds float* %tmp21137, i64 1
+  %tmp21139 = getelementptr inbounds float* %tmp21138, i64 1
+  %tmp21140 = getelementptr inbounds float* %tmp21139, i64 1
+  %tmp21141 = getelementptr inbounds float* %tmp21140, i64 1
+  %tmp21142 = getelementptr inbounds float* %tmp21141, i64 1
+  %tmp21143 = getelementptr inbounds float* %tmp21142, i64 1
+  %tmp21144 = getelementptr inbounds float* %tmp21143, i64 1
+  %tmp21145 = getelementptr inbounds float* %tmp21144, i64 1
+  %tmp21146 = getelementptr inbounds float* %tmp21145, i64 1
+  %tmp21147 = getelementptr inbounds float* %tmp21146, i64 1
+  %tmp21148 = getelementptr inbounds float* %tmp21147, i64 1
+  %tmp21149 = getelementptr inbounds float* %tmp21148, i64 1
+  %tmp21150 = getelementptr inbounds float* %tmp21149, i64 1
+  %tmp21151 = getelementptr inbounds float* %tmp21150, i64 1
+  %tmp21152 = getelementptr inbounds float* %tmp21151, i64 1
+  %tmp21153 = getelementptr inbounds float* %tmp21152, i64 1
+  %tmp21154 = getelementptr inbounds float* %tmp21153, i64 1
+  %tmp21155 = getelementptr inbounds float* %tmp21154, i64 1
+  %tmp21156 = getelementptr inbounds float* %tmp21155, i64 1
+  %tmp21157 = getelementptr inbounds float* %tmp21156, i64 1
+  %tmp21158 = getelementptr inbounds float* %tmp21157, i64 1
+  %tmp21159 = getelementptr inbounds float* %tmp21158, i64 1
+  %tmp21160 = getelementptr inbounds float* %tmp21159, i64 1
+  %tmp21161 = getelementptr inbounds float* %tmp21160, i64 1
+  %tmp21162 = getelementptr inbounds float* %tmp21161, i64 1
+  %tmp21163 = getelementptr inbounds float* %tmp21162, i64 1
+  %tmp21164 = getelementptr inbounds float* %tmp21163, i64 1
+  %tmp21165 = getelementptr inbounds float* %tmp21164, i64 1
+  %tmp21166 = getelementptr inbounds float* %tmp21165, i64 1
+  %tmp21167 = getelementptr inbounds float* %tmp21166, i64 1
+  %tmp21168 = getelementptr inbounds float* %tmp21167, i64 1
+  %tmp21169 = getelementptr inbounds float* %tmp21168, i64 1
+  %tmp21170 = getelementptr inbounds float* %tmp21169, i64 1
+  %tmp21171 = getelementptr inbounds float* %tmp21170, i64 1
+  %tmp21172 = getelementptr inbounds float* %tmp21171, i64 1
+  %tmp21173 = getelementptr inbounds float* %tmp21172, i64 1
+  %tmp21174 = getelementptr inbounds float* %tmp21173, i64 1
+  %tmp21175 = getelementptr inbounds float* %tmp21174, i64 1
+  %tmp21176 = getelementptr inbounds float* %tmp21175, i64 1
+  %tmp21177 = getelementptr inbounds float* %tmp21176, i64 1
+  %tmp21178 = getelementptr inbounds float* %tmp21177, i64 1
+  %tmp21179 = getelementptr inbounds float* %tmp21178, i64 1
+  %tmp21180 = getelementptr inbounds float* %tmp21179, i64 1
+  %tmp21181 = getelementptr inbounds float* %tmp21180, i64 1
+  %tmp21182 = getelementptr inbounds float* %tmp21181, i64 1
+  %tmp21183 = getelementptr inbounds float* %tmp21182, i64 1
+  %tmp21184 = getelementptr inbounds float* %tmp21183, i64 1
+  %tmp21185 = getelementptr inbounds float* %tmp21184, i64 1
+  %tmp21186 = getelementptr inbounds float* %tmp21185, i64 1
+  %tmp21187 = getelementptr inbounds float* %tmp21186, i64 1
+  %tmp21188 = getelementptr inbounds float* %tmp21187, i64 1
+  %tmp21189 = getelementptr inbounds float* %tmp21188, i64 1
+  %tmp21190 = getelementptr inbounds float* %tmp21189, i64 1
+  %tmp21191 = getelementptr inbounds float* %tmp21190, i64 1
+  %tmp21192 = getelementptr inbounds float* %tmp21191, i64 1
+  %tmp21193 = getelementptr inbounds float* %tmp21192, i64 1
+  %tmp21194 = getelementptr inbounds float* %tmp21193, i64 1
+  %tmp21195 = getelementptr inbounds float* %tmp21194, i64 1
+  %tmp21196 = getelementptr inbounds float* %tmp21195, i64 1
+  %tmp21197 = getelementptr inbounds float* %tmp21196, i64 1
+  %tmp21198 = getelementptr inbounds float* %tmp21197, i64 1
+  %tmp21199 = getelementptr inbounds float* %tmp21198, i64 1
+  %tmp21200 = getelementptr inbounds float* %tmp21199, i64 1
+  %tmp21201 = getelementptr inbounds float* %tmp21200, i64 1
+  %tmp21202 = getelementptr inbounds float* %tmp21201, i64 1
+  %tmp21203 = getelementptr inbounds float* %tmp21202, i64 1
+  %tmp21204 = getelementptr inbounds float* %tmp21203, i64 1
+  %tmp21205 = getelementptr inbounds float* %tmp21204, i64 1
+  %tmp21206 = getelementptr inbounds float* %tmp21205, i64 1
+  %tmp21207 = getelementptr inbounds float* %tmp21206, i64 1
+  %tmp21208 = getelementptr inbounds float* %tmp21207, i64 1
+  %tmp21209 = getelementptr inbounds float* %tmp21208, i64 1
+  %tmp21210 = getelementptr inbounds float* %tmp21209, i64 1
+  %tmp21211 = getelementptr inbounds float* %tmp21210, i64 1
+  %tmp21212 = getelementptr inbounds float* %tmp21211, i64 1
+  %tmp21213 = getelementptr inbounds float* %tmp21212, i64 1
+  %tmp21214 = getelementptr inbounds float* %tmp21213, i64 1
+  %tmp21215 = getelementptr inbounds float* %tmp21214, i64 1
+  %tmp21216 = getelementptr inbounds float* %tmp21215, i64 1
+  %tmp21217 = getelementptr inbounds float* %tmp21216, i64 1
+  %tmp21218 = getelementptr inbounds float* %tmp21217, i64 1
+  %tmp21219 = getelementptr inbounds float* %tmp21218, i64 1
+  %tmp21220 = getelementptr inbounds float* %tmp21219, i64 1
+  %tmp21221 = getelementptr inbounds float* %tmp21220, i64 1
+  %tmp21222 = getelementptr inbounds float* %tmp21221, i64 1
+  %tmp21223 = getelementptr inbounds float* %tmp21222, i64 1
+  %tmp21224 = getelementptr inbounds float* %tmp21223, i64 1
+  %tmp21225 = getelementptr inbounds float* %tmp21224, i64 1
+  %tmp21226 = getelementptr inbounds float* %tmp21225, i64 1
+  %tmp21227 = getelementptr inbounds float* %tmp21226, i64 1
+  %tmp21228 = getelementptr inbounds float* %tmp21227, i64 1
+  %tmp21229 = getelementptr inbounds float* %tmp21228, i64 1
+  %tmp21230 = getelementptr inbounds float* %tmp21229, i64 1
+  %tmp21231 = getelementptr inbounds float* %tmp21230, i64 1
+  %tmp21232 = getelementptr inbounds float* %tmp21231, i64 1
+  %tmp21233 = getelementptr inbounds float* %tmp21232, i64 1
+  %tmp21234 = getelementptr inbounds float* %tmp21233, i64 1
+  %tmp21235 = getelementptr inbounds float* %tmp21234, i64 1
+  %tmp21236 = getelementptr inbounds float* %tmp21235, i64 1
+  %tmp21237 = getelementptr inbounds float* %tmp21236, i64 1
+  %tmp21238 = getelementptr inbounds float* %tmp21237, i64 1
+  %tmp21239 = getelementptr inbounds float* %tmp21238, i64 1
+  %tmp21240 = getelementptr inbounds float* %tmp21239, i64 1
+  %tmp21241 = getelementptr inbounds float* %tmp21240, i64 1
+  %tmp21242 = getelementptr inbounds float* %tmp21241, i64 1
+  %tmp21243 = getelementptr inbounds float* %tmp21242, i64 1
+  %tmp21244 = getelementptr inbounds float* %tmp21243, i64 1
+  %tmp21245 = getelementptr inbounds float* %tmp21244, i64 1
+  %tmp21246 = getelementptr inbounds float* %tmp21245, i64 1
+  %tmp21247 = getelementptr inbounds float* %tmp21246, i64 1
+  %tmp21248 = getelementptr inbounds float* %tmp21247, i64 1
+  %tmp21249 = getelementptr inbounds float* %tmp21248, i64 1
+  %tmp21250 = getelementptr inbounds float* %tmp21249, i64 1
+  %tmp21251 = getelementptr inbounds float* %tmp21250, i64 1
+  %tmp21252 = getelementptr inbounds float* %tmp21251, i64 1
+  %tmp21253 = getelementptr inbounds float* %tmp21252, i64 1
+  %tmp21254 = getelementptr inbounds float* %tmp21253, i64 1
+  %tmp21255 = getelementptr inbounds float* %tmp21254, i64 1
+  %tmp21256 = getelementptr inbounds float* %tmp21255, i64 1
+  %tmp21257 = getelementptr inbounds float* %tmp21256, i64 1
+  %tmp21258 = getelementptr inbounds float* %tmp21257, i64 1
+  %tmp21259 = getelementptr inbounds float* %tmp21258, i64 1
+  %tmp21260 = getelementptr inbounds float* %tmp21259, i64 1
+  %tmp21261 = getelementptr inbounds float* %tmp21260, i64 1
+  %tmp21262 = getelementptr inbounds float* %tmp21261, i64 1
+  %tmp21263 = getelementptr inbounds float* %tmp21262, i64 1
+  %tmp21264 = getelementptr inbounds float* %tmp21263, i64 1
+  %tmp21265 = getelementptr inbounds float* %tmp21264, i64 1
+  %tmp21266 = getelementptr inbounds float* %tmp21265, i64 1
+  %tmp21267 = getelementptr inbounds float* %tmp21266, i64 1
+  %tmp21268 = getelementptr inbounds float* %tmp21267, i64 1
+  %tmp21269 = getelementptr inbounds float* %tmp21268, i64 1
+  %tmp21270 = getelementptr inbounds float* %tmp21269, i64 1
+  %tmp21271 = getelementptr inbounds float* %tmp21270, i64 1
+  %tmp21272 = getelementptr inbounds float* %tmp21271, i64 1
+  %tmp21273 = getelementptr inbounds float* %tmp21272, i64 1
+  %tmp21274 = getelementptr inbounds float* %tmp21273, i64 1
+  %tmp21275 = getelementptr inbounds float* %tmp21274, i64 1
+  %tmp21276 = getelementptr inbounds float* %tmp21275, i64 1
+  %tmp21277 = getelementptr inbounds float* %tmp21276, i64 1
+  %tmp21278 = getelementptr inbounds float* %tmp21277, i64 1
+  %tmp21279 = getelementptr inbounds float* %tmp21278, i64 1
+  %tmp21280 = getelementptr inbounds float* %tmp21279, i64 1
+  %tmp21281 = getelementptr inbounds float* %tmp21280, i64 1
+  %tmp21282 = getelementptr inbounds float* %tmp21281, i64 1
+  %tmp21283 = getelementptr inbounds float* %tmp21282, i64 1
+  %tmp21284 = getelementptr inbounds float* %tmp21283, i64 1
+  %tmp21285 = getelementptr inbounds float* %tmp21284, i64 1
+  %tmp21286 = getelementptr inbounds float* %tmp21285, i64 1
+  %tmp21287 = getelementptr inbounds float* %tmp21286, i64 1
+  %tmp21288 = getelementptr inbounds float* %tmp21287, i64 1
+  %tmp21289 = getelementptr inbounds float* %tmp21288, i64 1
+  %tmp21290 = getelementptr inbounds float* %tmp21289, i64 1
+  %tmp21291 = getelementptr inbounds float* %tmp21290, i64 1
+  %tmp21292 = getelementptr inbounds float* %tmp21291, i64 1
+  %tmp21293 = getelementptr inbounds float* %tmp21292, i64 1
+  %tmp21294 = getelementptr inbounds float* %tmp21293, i64 1
+  %tmp21295 = getelementptr inbounds float* %tmp21294, i64 1
+  %tmp21296 = getelementptr inbounds float* %tmp21295, i64 1
+  %tmp21297 = getelementptr inbounds float* %tmp21296, i64 1
+  %tmp21298 = getelementptr inbounds float* %tmp21297, i64 1
+  %tmp21299 = getelementptr inbounds float* %tmp21298, i64 1
+  %tmp21300 = getelementptr inbounds float* %tmp21299, i64 1
+  %tmp21301 = getelementptr inbounds float* %tmp21300, i64 1
+  %tmp21302 = getelementptr inbounds float* %tmp21301, i64 1
+  %tmp21303 = getelementptr inbounds float* %tmp21302, i64 1
+  %tmp21304 = getelementptr inbounds float* %tmp21303, i64 1
+  %tmp21305 = getelementptr inbounds float* %tmp21304, i64 1
+  %tmp21306 = getelementptr inbounds float* %tmp21305, i64 1
+  %tmp21307 = getelementptr inbounds float* %tmp21306, i64 1
+  %tmp21308 = getelementptr inbounds float* %tmp21307, i64 1
+  %tmp21309 = getelementptr inbounds float* %tmp21308, i64 1
+  %tmp21310 = getelementptr inbounds float* %tmp21309, i64 1
+  %tmp21311 = getelementptr inbounds float* %tmp21310, i64 1
+  %tmp21312 = getelementptr inbounds float* %tmp21311, i64 1
+  %tmp21313 = getelementptr inbounds float* %tmp21312, i64 1
+  %tmp21314 = getelementptr inbounds float* %tmp21313, i64 1
+  %tmp21315 = getelementptr inbounds float* %tmp21314, i64 1
+  %tmp21316 = getelementptr inbounds float* %tmp21315, i64 1
+  %tmp21317 = getelementptr inbounds float* %tmp21316, i64 1
+  %tmp21318 = getelementptr inbounds float* %tmp21317, i64 1
+  %tmp21319 = getelementptr inbounds float* %tmp21318, i64 1
+  %tmp21320 = getelementptr inbounds float* %tmp21319, i64 1
+  %tmp21321 = getelementptr inbounds float* %tmp21320, i64 1
+  %tmp21322 = getelementptr inbounds float* %tmp21321, i64 1
+  %tmp21323 = getelementptr inbounds float* %tmp21322, i64 1
+  %tmp21324 = getelementptr inbounds float* %tmp21323, i64 1
+  %tmp21325 = getelementptr inbounds float* %tmp21324, i64 1
+  %tmp21326 = getelementptr inbounds float* %tmp21325, i64 1
+  %tmp21327 = getelementptr inbounds float* %tmp21326, i64 1
+  %tmp21328 = getelementptr inbounds float* %tmp21327, i64 1
+  %tmp21329 = getelementptr inbounds float* %tmp21328, i64 1
+  %tmp21330 = getelementptr inbounds float* %tmp21329, i64 1
+  %tmp21331 = getelementptr inbounds float* %tmp21330, i64 1
+  %tmp21332 = getelementptr inbounds float* %tmp21331, i64 1
+  %tmp21333 = getelementptr inbounds float* %tmp21332, i64 1
+  %tmp21334 = getelementptr inbounds float* %tmp21333, i64 1
+  %tmp21335 = getelementptr inbounds float* %tmp21334, i64 1
+  %tmp21336 = getelementptr inbounds float* %tmp21335, i64 1
+  %tmp21337 = getelementptr inbounds float* %tmp21336, i64 1
+  %tmp21338 = getelementptr inbounds float* %tmp21337, i64 1
+  %tmp21339 = getelementptr inbounds float* %tmp21338, i64 1
+  %tmp21340 = getelementptr inbounds float* %tmp21339, i64 1
+  %tmp21341 = getelementptr inbounds float* %tmp21340, i64 1
+  %tmp21342 = getelementptr inbounds float* %tmp21341, i64 1
+  %tmp21343 = getelementptr inbounds float* %tmp21342, i64 1
+  %tmp21344 = getelementptr inbounds float* %tmp21343, i64 1
+  %tmp21345 = getelementptr inbounds float* %tmp21344, i64 1
+  %tmp21346 = getelementptr inbounds float* %tmp21345, i64 1
+  %tmp21347 = getelementptr inbounds float* %tmp21346, i64 1
+  %tmp21348 = getelementptr inbounds float* %tmp21347, i64 1
+  %tmp21349 = getelementptr inbounds float* %tmp21348, i64 1
+  %tmp21350 = getelementptr inbounds float* %tmp21349, i64 1
+  %tmp21351 = getelementptr inbounds float* %tmp21350, i64 1
+  %tmp21352 = getelementptr inbounds float* %tmp21351, i64 1
+  %tmp21353 = getelementptr inbounds float* %tmp21352, i64 1
+  %tmp21354 = getelementptr inbounds float* %tmp21353, i64 1
+  %tmp21355 = getelementptr inbounds float* %tmp21354, i64 1
+  %tmp21356 = getelementptr inbounds float* %tmp21355, i64 1
+  %tmp21357 = getelementptr inbounds float* %tmp21356, i64 1
+  %tmp21358 = getelementptr inbounds float* %tmp21357, i64 1
+  %tmp21359 = getelementptr inbounds float* %tmp21358, i64 1
+  %tmp21360 = getelementptr inbounds float* %tmp21359, i64 1
+  %tmp21361 = getelementptr inbounds float* %tmp21360, i64 1
+  %tmp21362 = getelementptr inbounds float* %tmp21361, i64 1
+  %tmp21363 = getelementptr inbounds float* %tmp21362, i64 1
+  %tmp21364 = getelementptr inbounds float* %tmp21363, i64 1
+  %tmp21365 = getelementptr inbounds float* %tmp21364, i64 1
+  %tmp21366 = getelementptr inbounds float* %tmp21365, i64 1
+  %tmp21367 = getelementptr inbounds float* %tmp21366, i64 1
+  %tmp21368 = getelementptr inbounds float* %tmp21367, i64 1
+  %tmp21369 = getelementptr inbounds float* %tmp21368, i64 1
+  %tmp21370 = getelementptr inbounds float* %tmp21369, i64 1
+  %tmp21371 = getelementptr inbounds float* %tmp21370, i64 1
+  %tmp21372 = getelementptr inbounds float* %tmp21371, i64 1
+  %tmp21373 = getelementptr inbounds float* %tmp21372, i64 1
+  %tmp21374 = getelementptr inbounds float* %tmp21373, i64 1
+  %tmp21375 = getelementptr inbounds float* %tmp21374, i64 1
+  %tmp21376 = getelementptr inbounds float* %tmp21375, i64 1
+  %tmp21377 = getelementptr inbounds float* %tmp21376, i64 1
+  %tmp21378 = getelementptr inbounds float* %tmp21377, i64 1
+  %tmp21379 = getelementptr inbounds float* %tmp21378, i64 1
+  %tmp21380 = getelementptr inbounds float* %tmp21379, i64 1
+  %tmp21381 = getelementptr inbounds float* %tmp21380, i64 1
+  %tmp21382 = getelementptr inbounds float* %tmp21381, i64 1
+  %tmp21383 = getelementptr inbounds float* %tmp21382, i64 1
+  %tmp21384 = getelementptr inbounds float* %tmp21383, i64 1
+  %tmp21385 = getelementptr inbounds float* %tmp21384, i64 1
+  %tmp21386 = getelementptr inbounds float* %tmp21385, i64 1
+  %tmp21387 = getelementptr inbounds float* %tmp21386, i64 1
+  %tmp21388 = getelementptr inbounds float* %tmp21387, i64 1
+  %tmp21389 = getelementptr inbounds float* %tmp21388, i64 1
+  %tmp21390 = getelementptr inbounds float* %tmp21389, i64 1
+  %tmp21391 = getelementptr inbounds float* %tmp21390, i64 1
+  %tmp21392 = getelementptr inbounds float* %tmp21391, i64 1
+  %tmp21393 = getelementptr inbounds float* %tmp21392, i64 1
+  %tmp21394 = getelementptr inbounds float* %tmp21393, i64 1
+  %tmp21395 = getelementptr inbounds float* %tmp21394, i64 1
+  %tmp21396 = getelementptr inbounds float* %tmp21395, i64 1
+  %tmp21397 = getelementptr inbounds float* %tmp21396, i64 1
+  %tmp21398 = getelementptr inbounds float* %tmp21397, i64 1
+  %tmp21399 = getelementptr inbounds float* %tmp21398, i64 1
+  %tmp21400 = getelementptr inbounds float* %tmp21399, i64 1
+  %tmp21401 = getelementptr inbounds float* %tmp21400, i64 1
+  %tmp21402 = getelementptr inbounds float* %tmp21401, i64 1
+  %tmp21403 = getelementptr inbounds float* %tmp21402, i64 1
+  %tmp21404 = getelementptr inbounds float* %tmp21403, i64 1
+  %tmp21405 = getelementptr inbounds float* %tmp21404, i64 1
+  %tmp21406 = getelementptr inbounds float* %tmp21405, i64 1
+  %tmp21407 = getelementptr inbounds float* %tmp21406, i64 1
+  %tmp21408 = getelementptr inbounds float* %tmp21407, i64 1
+  %tmp21409 = getelementptr inbounds float* %tmp21408, i64 1
+  %tmp21410 = getelementptr inbounds float* %tmp21409, i64 1
+  %tmp21411 = getelementptr inbounds float* %tmp21410, i64 1
+  %tmp21412 = getelementptr inbounds float* %tmp21411, i64 1
+  %tmp21413 = getelementptr inbounds float* %tmp21412, i64 1
+  %tmp21414 = getelementptr inbounds float* %tmp21413, i64 1
+  %tmp21415 = getelementptr inbounds float* %tmp21414, i64 1
+  %tmp21416 = getelementptr inbounds float* %tmp21415, i64 1
+  %tmp21417 = getelementptr inbounds float* %tmp21416, i64 1
+  %tmp21418 = getelementptr inbounds float* %tmp21417, i64 1
+  %tmp21419 = getelementptr inbounds float* %tmp21418, i64 1
+  %tmp21420 = getelementptr inbounds float* %tmp21419, i64 1
+  %tmp21421 = getelementptr inbounds float* %tmp21420, i64 1
+  %tmp21422 = getelementptr inbounds float* %tmp21421, i64 1
+  %tmp21423 = getelementptr inbounds float* %tmp21422, i64 1
+  %tmp21424 = getelementptr inbounds float* %tmp21423, i64 1
+  %tmp21425 = getelementptr inbounds float* %tmp21424, i64 1
+  %tmp21426 = getelementptr inbounds float* %tmp21425, i64 1
+  %tmp21427 = getelementptr inbounds float* %tmp21426, i64 1
+  %tmp21428 = getelementptr inbounds float* %tmp21427, i64 1
+  %tmp21429 = getelementptr inbounds float* %tmp21428, i64 1
+  %tmp21430 = getelementptr inbounds float* %tmp21429, i64 1
+  %tmp21431 = getelementptr inbounds float* %tmp21430, i64 1
+  %tmp21432 = getelementptr inbounds float* %tmp21431, i64 1
+  %tmp21433 = getelementptr inbounds float* %tmp21432, i64 1
+  %tmp21434 = getelementptr inbounds float* %tmp21433, i64 1
+  %tmp21435 = getelementptr inbounds float* %tmp21434, i64 1
+  %tmp21436 = getelementptr inbounds float* %tmp21435, i64 1
+  %tmp21437 = getelementptr inbounds float* %tmp21436, i64 1
+  %tmp21438 = getelementptr inbounds float* %tmp21437, i64 1
+  %tmp21439 = getelementptr inbounds float* %tmp21438, i64 1
+  %tmp21440 = getelementptr inbounds float* %tmp21439, i64 1
+  %tmp21441 = getelementptr inbounds float* %tmp21440, i64 1
+  %tmp21442 = getelementptr inbounds float* %tmp21441, i64 1
+  %tmp21443 = getelementptr inbounds float* %tmp21442, i64 1
+  %tmp21444 = getelementptr inbounds float* %tmp21443, i64 1
+  %tmp21445 = getelementptr inbounds float* %tmp21444, i64 1
+  %tmp21446 = getelementptr inbounds float* %tmp21445, i64 1
+  %tmp21447 = getelementptr inbounds float* %tmp21446, i64 1
+  %tmp21448 = getelementptr inbounds float* %tmp21447, i64 1
+  %tmp21449 = getelementptr inbounds float* %tmp21448, i64 1
+  %tmp21450 = getelementptr inbounds float* %tmp21449, i64 1
+  %tmp21451 = getelementptr inbounds float* %tmp21450, i64 1
+  %tmp21452 = getelementptr inbounds float* %tmp21451, i64 1
+  %tmp21453 = getelementptr inbounds float* %tmp21452, i64 1
+  %tmp21454 = getelementptr inbounds float* %tmp21453, i64 1
+  %tmp21455 = getelementptr inbounds float* %tmp21454, i64 1
+  %tmp21456 = getelementptr inbounds float* %tmp21455, i64 1
+  %tmp21457 = getelementptr inbounds float* %tmp21456, i64 1
+  %tmp21458 = getelementptr inbounds float* %tmp21457, i64 1
+  %tmp21459 = getelementptr inbounds float* %tmp21458, i64 1
+  %tmp21460 = getelementptr inbounds float* %tmp21459, i64 1
+  %tmp21461 = getelementptr inbounds float* %tmp21460, i64 1
+  %tmp21462 = getelementptr inbounds float* %tmp21461, i64 1
+  %tmp21463 = getelementptr inbounds float* %tmp21462, i64 1
+  %tmp21464 = getelementptr inbounds float* %tmp21463, i64 1
+  %tmp21465 = getelementptr inbounds float* %tmp21464, i64 1
+  %tmp21466 = getelementptr inbounds float* %tmp21465, i64 1
+  %tmp21467 = getelementptr inbounds float* %tmp21466, i64 1
+  %tmp21468 = getelementptr inbounds float* %tmp21467, i64 1
+  %tmp21469 = getelementptr inbounds float* %tmp21468, i64 1
+  %tmp21470 = getelementptr inbounds float* %tmp21469, i64 1
+  %tmp21471 = getelementptr inbounds float* %tmp21470, i64 1
+  %tmp21472 = getelementptr inbounds float* %tmp21471, i64 1
+  %tmp21473 = getelementptr inbounds float* %tmp21472, i64 1
+  %tmp21474 = getelementptr inbounds float* %tmp21473, i64 1
+  %tmp21475 = getelementptr inbounds float* %tmp21474, i64 1
+  %tmp21476 = getelementptr inbounds float* %tmp21475, i64 1
+  %tmp21477 = getelementptr inbounds float* %tmp21476, i64 1
+  %tmp21478 = getelementptr inbounds float* %tmp21477, i64 1
+  %tmp21479 = getelementptr inbounds float* %tmp21478, i64 1
+  %tmp21480 = getelementptr inbounds float* %tmp21479, i64 1
+  %tmp21481 = getelementptr inbounds float* %tmp21480, i64 1
+  %tmp21482 = getelementptr inbounds float* %tmp21481, i64 1
+  %tmp21483 = getelementptr inbounds float* %tmp21482, i64 1
+  %tmp21484 = getelementptr inbounds float* %tmp21483, i64 1
+  %tmp21485 = getelementptr inbounds float* %tmp21484, i64 1
+  %tmp21486 = getelementptr inbounds float* %tmp21485, i64 1
+  %tmp21487 = getelementptr inbounds float* %tmp21486, i64 1
+  %tmp21488 = getelementptr inbounds float* %tmp21487, i64 1
+  %tmp21489 = getelementptr inbounds float* %tmp21488, i64 1
+  %tmp21490 = getelementptr inbounds float* %tmp21489, i64 1
+  %tmp21491 = getelementptr inbounds float* %tmp21490, i64 1
+  %tmp21492 = getelementptr inbounds float* %tmp21491, i64 1
+  %tmp21493 = getelementptr inbounds float* %tmp21492, i64 1
+  %tmp21494 = getelementptr inbounds float* %tmp21493, i64 1
+  %tmp21495 = getelementptr inbounds float* %tmp21494, i64 1
+  %tmp21496 = getelementptr inbounds float* %tmp21495, i64 1
+  %tmp21497 = getelementptr inbounds float* %tmp21496, i64 1
+  %tmp21498 = getelementptr inbounds float* %tmp21497, i64 1
+  %tmp21499 = getelementptr inbounds float* %tmp21498, i64 1
+  %tmp21500 = getelementptr inbounds float* %tmp21499, i64 1
+  %tmp21501 = getelementptr inbounds float* %tmp21500, i64 1
+  %tmp21502 = getelementptr inbounds float* %tmp21501, i64 1
+  %tmp21503 = getelementptr inbounds float* %tmp21502, i64 1
+  %tmp21504 = getelementptr inbounds float* %tmp21503, i64 1
+  %tmp21505 = getelementptr inbounds float* %tmp21504, i64 1
+  %tmp21506 = getelementptr inbounds float* %tmp21505, i64 1
+  %tmp21507 = getelementptr inbounds float* %tmp21506, i64 1
+  %tmp21508 = getelementptr inbounds float* %tmp21507, i64 1
+  %tmp21509 = getelementptr inbounds float* %tmp21508, i64 1
+  %tmp21510 = getelementptr inbounds float* %tmp21509, i64 1
+  %tmp21511 = getelementptr inbounds float* %tmp21510, i64 1
+  %tmp21512 = getelementptr inbounds float* %tmp21511, i64 1
+  %tmp21513 = getelementptr inbounds float* %tmp21512, i64 1
+  %tmp21514 = getelementptr inbounds float* %tmp21513, i64 1
+  %tmp21515 = getelementptr inbounds float* %tmp21514, i64 1
+  %tmp21516 = getelementptr inbounds float* %tmp21515, i64 1
+  %tmp21517 = getelementptr inbounds float* %tmp21516, i64 1
+  %tmp21518 = getelementptr inbounds float* %tmp21517, i64 1
+  %tmp21519 = getelementptr inbounds float* %tmp21518, i64 1
+  %tmp21520 = getelementptr inbounds float* %tmp21519, i64 1
+  %tmp21521 = getelementptr inbounds float* %tmp21520, i64 1
+  %tmp21522 = getelementptr inbounds float* %tmp21521, i64 1
+  %tmp21523 = getelementptr inbounds float* %tmp21522, i64 1
+  %tmp21524 = getelementptr inbounds float* %tmp21523, i64 1
+  %tmp21525 = getelementptr inbounds float* %tmp21524, i64 1
+  %tmp21526 = getelementptr inbounds float* %tmp21525, i64 1
+  %tmp21527 = getelementptr inbounds float* %tmp21526, i64 1
+  %tmp21528 = getelementptr inbounds float* %tmp21527, i64 1
+  %tmp21529 = getelementptr inbounds float* %tmp21528, i64 1
+  %tmp21530 = getelementptr inbounds float* %tmp21529, i64 1
+  %tmp21531 = getelementptr inbounds float* %tmp21530, i64 1
+  %tmp21532 = getelementptr inbounds float* %tmp21531, i64 1
+  %tmp21533 = getelementptr inbounds float* %tmp21532, i64 1
+  %tmp21534 = getelementptr inbounds float* %tmp21533, i64 1
+  %tmp21535 = getelementptr inbounds float* %tmp21534, i64 1
+  %tmp21536 = getelementptr inbounds float* %tmp21535, i64 1
+  %tmp21537 = getelementptr inbounds float* %tmp21536, i64 1
+  %tmp21538 = getelementptr inbounds float* %tmp21537, i64 1
+  %tmp21539 = getelementptr inbounds float* %tmp21538, i64 1
+  %tmp21540 = getelementptr inbounds float* %tmp21539, i64 1
+  %tmp21541 = getelementptr inbounds float* %tmp21540, i64 1
+  %tmp21542 = getelementptr inbounds float* %tmp21541, i64 1
+  %tmp21543 = getelementptr inbounds float* %tmp21542, i64 1
+  %tmp21544 = getelementptr inbounds float* %tmp21543, i64 1
+  %tmp21545 = getelementptr inbounds float* %tmp21544, i64 1
+  %tmp21546 = getelementptr inbounds float* %tmp21545, i64 1
+  %tmp21547 = getelementptr inbounds float* %tmp21546, i64 1
+  %tmp21548 = getelementptr inbounds float* %tmp21547, i64 1
+  %tmp21549 = getelementptr inbounds float* %tmp21548, i64 1
+  %tmp21550 = getelementptr inbounds float* %tmp21549, i64 1
+  %tmp21551 = getelementptr inbounds float* %tmp21550, i64 1
+  %tmp21552 = getelementptr inbounds float* %tmp21551, i64 1
+  %tmp21553 = getelementptr inbounds float* %tmp21552, i64 1
+  %tmp21554 = getelementptr inbounds float* %tmp21553, i64 1
+  %tmp21555 = getelementptr inbounds float* %tmp21554, i64 1
+  %tmp21556 = getelementptr inbounds float* %tmp21555, i64 1
+  %tmp21557 = getelementptr inbounds float* %tmp21556, i64 1
+  %tmp21558 = getelementptr inbounds float* %tmp21557, i64 1
+  %tmp21559 = getelementptr inbounds float* %tmp21558, i64 1
+  %tmp21560 = getelementptr inbounds float* %tmp21559, i64 1
+  %tmp21561 = getelementptr inbounds float* %tmp21560, i64 1
+  %tmp21562 = getelementptr inbounds float* %tmp21561, i64 1
+  %tmp21563 = getelementptr inbounds float* %tmp21562, i64 1
+  %tmp21564 = getelementptr inbounds float* %tmp21563, i64 1
+  %tmp21565 = getelementptr inbounds float* %tmp21564, i64 1
+  %tmp21566 = getelementptr inbounds float* %tmp21565, i64 1
+  %tmp21567 = getelementptr inbounds float* %tmp21566, i64 1
+  %tmp21568 = getelementptr inbounds float* %tmp21567, i64 1
+  %tmp21569 = getelementptr inbounds float* %tmp21568, i64 1
+  %tmp21570 = getelementptr inbounds float* %tmp21569, i64 1
+  %tmp21571 = getelementptr inbounds float* %tmp21570, i64 1
+  %tmp21572 = getelementptr inbounds float* %tmp21571, i64 1
+  %tmp21573 = getelementptr inbounds float* %tmp21572, i64 1
+  %tmp21574 = getelementptr inbounds float* %tmp21573, i64 1
+  %tmp21575 = getelementptr inbounds float* %tmp21574, i64 1
+  %tmp21576 = getelementptr inbounds float* %tmp21575, i64 1
+  %tmp21577 = getelementptr inbounds float* %tmp21576, i64 1
+  %tmp21578 = getelementptr inbounds float* %tmp21577, i64 1
+  %tmp21579 = getelementptr inbounds float* %tmp21578, i64 1
+  %tmp21580 = getelementptr inbounds float* %tmp21579, i64 1
+  %tmp21581 = getelementptr inbounds float* %tmp21580, i64 1
+  %tmp21582 = getelementptr inbounds float* %tmp21581, i64 1
+  %tmp21583 = getelementptr inbounds float* %tmp21582, i64 1
+  %tmp21584 = getelementptr inbounds float* %tmp21583, i64 1
+  %tmp21585 = getelementptr inbounds float* %tmp21584, i64 1
+  %tmp21586 = getelementptr inbounds float* %tmp21585, i64 1
+  %tmp21587 = getelementptr inbounds float* %tmp21586, i64 1
+  %tmp21588 = getelementptr inbounds float* %tmp21587, i64 1
+  %tmp21589 = getelementptr inbounds float* %tmp21588, i64 1
+  %tmp21590 = getelementptr inbounds float* %tmp21589, i64 1
+  %tmp21591 = getelementptr inbounds float* %tmp21590, i64 1
+  %tmp21592 = getelementptr inbounds float* %tmp21591, i64 1
+  %tmp21593 = getelementptr inbounds float* %tmp21592, i64 1
+  %tmp21594 = getelementptr inbounds float* %tmp21593, i64 1
+  %tmp21595 = getelementptr inbounds float* %tmp21594, i64 1
+  %tmp21596 = getelementptr inbounds float* %tmp21595, i64 1
+  %tmp21597 = getelementptr inbounds float* %tmp21596, i64 1
+  %tmp21598 = getelementptr inbounds float* %tmp21597, i64 1
+  %tmp21599 = getelementptr inbounds float* %tmp21598, i64 1
+  %tmp21600 = getelementptr inbounds float* %tmp21599, i64 1
+  %tmp21601 = getelementptr inbounds float* %tmp21600, i64 1
+  %tmp21602 = getelementptr inbounds float* %tmp21601, i64 1
+  %tmp21603 = getelementptr inbounds float* %tmp21602, i64 1
+  %tmp21604 = getelementptr inbounds float* %tmp21603, i64 1
+  %tmp21605 = getelementptr inbounds float* %tmp21604, i64 1
+  %tmp21606 = getelementptr inbounds float* %tmp21605, i64 1
+  %tmp21607 = getelementptr inbounds float* %tmp21606, i64 1
+  %tmp21608 = getelementptr inbounds float* %tmp21607, i64 1
+  %tmp21609 = getelementptr inbounds float* %tmp21608, i64 1
+  %tmp21610 = getelementptr inbounds float* %tmp21609, i64 1
+  %tmp21611 = getelementptr inbounds float* %tmp21610, i64 1
+  %tmp21612 = getelementptr inbounds float* %tmp21611, i64 1
+  %tmp21613 = getelementptr inbounds float* %tmp21612, i64 1
+  %tmp21614 = getelementptr inbounds float* %tmp21613, i64 1
+  %tmp21615 = getelementptr inbounds float* %tmp21614, i64 1
+  %tmp21616 = getelementptr inbounds float* %tmp21615, i64 1
+  %tmp21617 = getelementptr inbounds float* %tmp21616, i64 1
+  %tmp21618 = getelementptr inbounds float* %tmp21617, i64 1
+  %tmp21619 = getelementptr inbounds float* %tmp21618, i64 1
+  %tmp21620 = getelementptr inbounds float* %tmp21619, i64 1
+  %tmp21621 = getelementptr inbounds float* %tmp21620, i64 1
+  %tmp21622 = getelementptr inbounds float* %tmp21621, i64 1
+  %tmp21623 = getelementptr inbounds float* %tmp21622, i64 1
+  %tmp21624 = getelementptr inbounds float* %tmp21623, i64 1
+  %tmp21625 = getelementptr inbounds float* %tmp21624, i64 1
+  %tmp21626 = getelementptr inbounds float* %tmp21625, i64 1
+  %tmp21627 = getelementptr inbounds float* %tmp21626, i64 1
+  %tmp21628 = getelementptr inbounds float* %tmp21627, i64 1
+  %tmp21629 = getelementptr inbounds float* %tmp21628, i64 1
+  %tmp21630 = getelementptr inbounds float* %tmp21629, i64 1
+  %tmp21631 = getelementptr inbounds float* %tmp21630, i64 1
+  %tmp21632 = getelementptr inbounds float* %tmp21631, i64 1
+  %tmp21633 = getelementptr inbounds float* %tmp21632, i64 1
+  %tmp21634 = getelementptr inbounds float* %tmp21633, i64 1
+  %tmp21635 = getelementptr inbounds float* %tmp21634, i64 1
+  %tmp21636 = getelementptr inbounds float* %tmp21635, i64 1
+  %tmp21637 = getelementptr inbounds float* %tmp21636, i64 1
+  %tmp21638 = getelementptr inbounds float* %tmp21637, i64 1
+  %tmp21639 = getelementptr inbounds float* %tmp21638, i64 1
+  %tmp21640 = getelementptr inbounds float* %tmp21639, i64 1
+  %tmp21641 = getelementptr inbounds float* %tmp21640, i64 1
+  %tmp21642 = getelementptr inbounds float* %tmp21641, i64 1
+  %tmp21643 = getelementptr inbounds float* %tmp21642, i64 1
+  %tmp21644 = getelementptr inbounds float* %tmp21643, i64 1
+  %tmp21645 = getelementptr inbounds float* %tmp21644, i64 1
+  %tmp21646 = getelementptr inbounds float* %tmp21645, i64 1
+  %tmp21647 = getelementptr inbounds float* %tmp21646, i64 1
+  %tmp21648 = getelementptr inbounds float* %tmp21647, i64 1
+  %tmp21649 = getelementptr inbounds float* %tmp21648, i64 1
+  %tmp21650 = getelementptr inbounds float* %tmp21649, i64 1
+  %tmp21651 = getelementptr inbounds float* %tmp21650, i64 1
+  %tmp21652 = getelementptr inbounds float* %tmp21651, i64 1
+  %tmp21653 = getelementptr inbounds float* %tmp21652, i64 1
+  %tmp21654 = getelementptr inbounds float* %tmp21653, i64 1
+  %tmp21655 = getelementptr inbounds float* %tmp21654, i64 1
+  %tmp21656 = getelementptr inbounds float* %tmp21655, i64 1
+  %tmp21657 = getelementptr inbounds float* %tmp21656, i64 1
+  %tmp21658 = getelementptr inbounds float* %tmp21657, i64 1
+  %tmp21659 = getelementptr inbounds float* %tmp21658, i64 1
+  %tmp21660 = getelementptr inbounds float* %tmp21659, i64 1
+  %tmp21661 = getelementptr inbounds float* %tmp21660, i64 1
+  %tmp21662 = getelementptr inbounds float* %tmp21661, i64 1
+  %tmp21663 = getelementptr inbounds float* %tmp21662, i64 1
+  %tmp21664 = getelementptr inbounds float* %tmp21663, i64 1
+  %tmp21665 = getelementptr inbounds float* %tmp21664, i64 1
+  %tmp21666 = getelementptr inbounds float* %tmp21665, i64 1
+  %tmp21667 = getelementptr inbounds float* %tmp21666, i64 1
+  %tmp21668 = getelementptr inbounds float* %tmp21667, i64 1
+  %tmp21669 = getelementptr inbounds float* %tmp21668, i64 1
+  %tmp21670 = getelementptr inbounds float* %tmp21669, i64 1
+  %tmp21671 = getelementptr inbounds float* %tmp21670, i64 1
+  %tmp21672 = getelementptr inbounds float* %tmp21671, i64 1
+  %tmp21673 = getelementptr inbounds float* %tmp21672, i64 1
+  %tmp21674 = getelementptr inbounds float* %tmp21673, i64 1
+  %tmp21675 = getelementptr inbounds float* %tmp21674, i64 1
+  %tmp21676 = getelementptr inbounds float* %tmp21675, i64 1
+  %tmp21677 = getelementptr inbounds float* %tmp21676, i64 1
+  %tmp21678 = getelementptr inbounds float* %tmp21677, i64 1
+  %tmp21679 = getelementptr inbounds float* %tmp21678, i64 1
+  %tmp21680 = getelementptr inbounds float* %tmp21679, i64 1
+  %tmp21681 = getelementptr inbounds float* %tmp21680, i64 1
+  %tmp21682 = getelementptr inbounds float* %tmp21681, i64 1
+  %tmp21683 = getelementptr inbounds float* %tmp21682, i64 1
+  %tmp21684 = getelementptr inbounds float* %tmp21683, i64 1
+  %tmp21685 = getelementptr inbounds float* %tmp21684, i64 1
+  %tmp21686 = getelementptr inbounds float* %tmp21685, i64 1
+  %tmp21687 = getelementptr inbounds float* %tmp21686, i64 1
+  %tmp21688 = getelementptr inbounds float* %tmp21687, i64 1
+  %tmp21689 = getelementptr inbounds float* %tmp21688, i64 1
+  %tmp21690 = getelementptr inbounds float* %tmp21689, i64 1
+  %tmp21691 = getelementptr inbounds float* %tmp21690, i64 1
+  %tmp21692 = getelementptr inbounds float* %tmp21691, i64 1
+  %tmp21693 = getelementptr inbounds float* %tmp21692, i64 1
+  %tmp21694 = getelementptr inbounds float* %tmp21693, i64 1
+  %tmp21695 = getelementptr inbounds float* %tmp21694, i64 1
+  %tmp21696 = getelementptr inbounds float* %tmp21695, i64 1
+  %tmp21697 = getelementptr inbounds float* %tmp21696, i64 1
+  %tmp21698 = getelementptr inbounds float* %tmp21697, i64 1
+  %tmp21699 = getelementptr inbounds float* %tmp21698, i64 1
+  %tmp21700 = getelementptr inbounds float* %tmp21699, i64 1
+  %tmp21701 = getelementptr inbounds float* %tmp21700, i64 1
+  %tmp21702 = getelementptr inbounds float* %tmp21701, i64 1
+  %tmp21703 = getelementptr inbounds float* %tmp21702, i64 1
+  %tmp21704 = getelementptr inbounds float* %tmp21703, i64 1
+  %tmp21705 = getelementptr inbounds float* %tmp21704, i64 1
+  %tmp21706 = getelementptr inbounds float* %tmp21705, i64 1
+  %tmp21707 = getelementptr inbounds float* %tmp21706, i64 1
+  %tmp21708 = getelementptr inbounds float* %tmp21707, i64 1
+  %tmp21709 = getelementptr inbounds float* %tmp21708, i64 1
+  %tmp21710 = getelementptr inbounds float* %tmp21709, i64 1
+  %tmp21711 = getelementptr inbounds float* %tmp21710, i64 1
+  %tmp21712 = getelementptr inbounds float* %tmp21711, i64 1
+  %tmp21713 = getelementptr inbounds float* %tmp21712, i64 1
+  %tmp21714 = getelementptr inbounds float* %tmp21713, i64 1
+  %tmp21715 = getelementptr inbounds float* %tmp21714, i64 1
+  %tmp21716 = getelementptr inbounds float* %tmp21715, i64 1
+  %tmp21717 = getelementptr inbounds float* %tmp21716, i64 1
+  %tmp21718 = getelementptr inbounds float* %tmp21717, i64 1
+  %tmp21719 = getelementptr inbounds float* %tmp21718, i64 1
+  %tmp21720 = getelementptr inbounds float* %tmp21719, i64 1
+  %tmp21721 = getelementptr inbounds float* %tmp21720, i64 1
+  %tmp21722 = getelementptr inbounds float* %tmp21721, i64 1
+  %tmp21723 = getelementptr inbounds float* %tmp21722, i64 1
+  %tmp21724 = getelementptr inbounds float* %tmp21723, i64 1
+  %tmp21725 = getelementptr inbounds float* %tmp21724, i64 1
+  %tmp21726 = getelementptr inbounds float* %tmp21725, i64 1
+  %tmp21727 = getelementptr inbounds float* %tmp21726, i64 1
+  %tmp21728 = getelementptr inbounds float* %tmp21727, i64 1
+  %tmp21729 = getelementptr inbounds float* %tmp21728, i64 1
+  %tmp21730 = getelementptr inbounds float* %tmp21729, i64 1
+  %tmp21731 = getelementptr inbounds float* %tmp21730, i64 1
+  %tmp21732 = getelementptr inbounds float* %tmp21731, i64 1
+  %tmp21733 = getelementptr inbounds float* %tmp21732, i64 1
+  %tmp21734 = getelementptr inbounds float* %tmp21733, i64 1
+  %tmp21735 = getelementptr inbounds float* %tmp21734, i64 1
+  %tmp21736 = getelementptr inbounds float* %tmp21735, i64 1
+  %tmp21737 = getelementptr inbounds float* %tmp21736, i64 1
+  %tmp21738 = getelementptr inbounds float* %tmp21737, i64 1
+  %tmp21739 = getelementptr inbounds float* %tmp21738, i64 1
+  %tmp21740 = getelementptr inbounds float* %tmp21739, i64 1
+  %tmp21741 = getelementptr inbounds float* %tmp21740, i64 1
+  %tmp21742 = getelementptr inbounds float* %tmp21741, i64 1
+  %tmp21743 = getelementptr inbounds float* %tmp21742, i64 1
+  %tmp21744 = getelementptr inbounds float* %tmp21743, i64 1
+  %tmp21745 = getelementptr inbounds float* %tmp21744, i64 1
+  %tmp21746 = getelementptr inbounds float* %tmp21745, i64 1
+  %tmp21747 = getelementptr inbounds float* %tmp21746, i64 1
+  %tmp21748 = getelementptr inbounds float* %tmp21747, i64 1
+  %tmp21749 = getelementptr inbounds float* %tmp21748, i64 1
+  %tmp21750 = getelementptr inbounds float* %tmp21749, i64 1
+  %tmp21751 = getelementptr inbounds float* %tmp21750, i64 1
+  %tmp21752 = getelementptr inbounds float* %tmp21751, i64 1
+  %tmp21753 = getelementptr inbounds float* %tmp21752, i64 1
+  %tmp21754 = getelementptr inbounds float* %tmp21753, i64 1
+  %tmp21755 = getelementptr inbounds float* %tmp21754, i64 1
+  %tmp21756 = getelementptr inbounds float* %tmp21755, i64 1
+  %tmp21757 = getelementptr inbounds float* %tmp21756, i64 1
+  %tmp21758 = getelementptr inbounds float* %tmp21757, i64 1
+  %tmp21759 = getelementptr inbounds float* %tmp21758, i64 1
+  %tmp21760 = getelementptr inbounds float* %tmp21759, i64 1
+  %tmp21761 = getelementptr inbounds float* %tmp21760, i64 1
+  %tmp21762 = getelementptr inbounds float* %tmp21761, i64 1
+  %tmp21763 = getelementptr inbounds float* %tmp21762, i64 1
+  %tmp21764 = getelementptr inbounds float* %tmp21763, i64 1
+  %tmp21765 = getelementptr inbounds float* %tmp21764, i64 1
+  %tmp21766 = getelementptr inbounds float* %tmp21765, i64 1
+  %tmp21767 = getelementptr inbounds float* %tmp21766, i64 1
+  %tmp21768 = getelementptr inbounds float* %tmp21767, i64 1
+  %tmp21769 = getelementptr inbounds float* %tmp21768, i64 1
+  %tmp21770 = getelementptr inbounds float* %tmp21769, i64 1
+  %tmp21771 = getelementptr inbounds float* %tmp21770, i64 1
+  %tmp21772 = getelementptr inbounds float* %tmp21771, i64 1
+  %tmp21773 = getelementptr inbounds float* %tmp21772, i64 1
+  %tmp21774 = getelementptr inbounds float* %tmp21773, i64 1
+  %tmp21775 = getelementptr inbounds float* %tmp21774, i64 1
+  %tmp21776 = getelementptr inbounds float* %tmp21775, i64 1
+  %tmp21777 = getelementptr inbounds float* %tmp21776, i64 1
+  %tmp21778 = getelementptr inbounds float* %tmp21777, i64 1
+  %tmp21779 = getelementptr inbounds float* %tmp21778, i64 1
+  %tmp21780 = getelementptr inbounds float* %tmp21779, i64 1
+  %tmp21781 = getelementptr inbounds float* %tmp21780, i64 1
+  %tmp21782 = getelementptr inbounds float* %tmp21781, i64 1
+  %tmp21783 = getelementptr inbounds float* %tmp21782, i64 1
+  %tmp21784 = getelementptr inbounds float* %tmp21783, i64 1
+  %tmp21785 = getelementptr inbounds float* %tmp21784, i64 1
+  %tmp21786 = getelementptr inbounds float* %tmp21785, i64 1
+  %tmp21787 = getelementptr inbounds float* %tmp21786, i64 1
+  %tmp21788 = getelementptr inbounds float* %tmp21787, i64 1
+  %tmp21789 = getelementptr inbounds float* %tmp21788, i64 1
+  %tmp21790 = getelementptr inbounds float* %tmp21789, i64 1
+  %tmp21791 = getelementptr inbounds float* %tmp21790, i64 1
+  %tmp21792 = getelementptr inbounds float* %tmp21791, i64 1
+  %tmp21793 = getelementptr inbounds float* %tmp21792, i64 1
+  %tmp21794 = getelementptr inbounds float* %tmp21793, i64 1
+  %tmp21795 = getelementptr inbounds float* %tmp21794, i64 1
+  %tmp21796 = getelementptr inbounds float* %tmp21795, i64 1
+  %tmp21797 = getelementptr inbounds float* %tmp21796, i64 1
+  %tmp21798 = getelementptr inbounds float* %tmp21797, i64 1
+  %tmp21799 = getelementptr inbounds float* %tmp21798, i64 1
+  %tmp21800 = getelementptr inbounds float* %tmp21799, i64 1
+  %tmp21801 = getelementptr inbounds float* %tmp21800, i64 1
+  %tmp21802 = getelementptr inbounds float* %tmp21801, i64 1
+  %tmp21803 = getelementptr inbounds float* %tmp21802, i64 1
+  %tmp21804 = getelementptr inbounds float* %tmp21803, i64 1
+  %tmp21805 = getelementptr inbounds float* %tmp21804, i64 1
+  %tmp21806 = getelementptr inbounds float* %tmp21805, i64 1
+  %tmp21807 = getelementptr inbounds float* %tmp21806, i64 1
+  %tmp21808 = getelementptr inbounds float* %tmp21807, i64 1
+  %tmp21809 = getelementptr inbounds float* %tmp21808, i64 1
+  %tmp21810 = getelementptr inbounds float* %tmp21809, i64 1
+  %tmp21811 = getelementptr inbounds float* %tmp21810, i64 1
+  %tmp21812 = getelementptr inbounds float* %tmp21811, i64 1
+  %tmp21813 = getelementptr inbounds float* %tmp21812, i64 1
+  %tmp21814 = getelementptr inbounds float* %tmp21813, i64 1
+  %tmp21815 = getelementptr inbounds float* %tmp21814, i64 1
+  %tmp21816 = getelementptr inbounds float* %tmp21815, i64 1
+  %tmp21817 = getelementptr inbounds float* %tmp21816, i64 1
+  %tmp21818 = getelementptr inbounds float* %tmp21817, i64 1
+  %tmp21819 = getelementptr inbounds float* %tmp21818, i64 1
+  %tmp21820 = getelementptr inbounds float* %tmp21819, i64 1
+  %tmp21821 = getelementptr inbounds float* %tmp21820, i64 1
+  %tmp21822 = getelementptr inbounds float* %tmp21821, i64 1
+  %tmp21823 = getelementptr inbounds float* %tmp21822, i64 1
+  %tmp21824 = getelementptr inbounds float* %tmp21823, i64 1
+  %tmp21825 = getelementptr inbounds float* %tmp21824, i64 1
+  %tmp21826 = getelementptr inbounds float* %tmp21825, i64 1
+  %tmp21827 = getelementptr inbounds float* %tmp21826, i64 1
+  %tmp21828 = getelementptr inbounds float* %tmp21827, i64 1
+  %tmp21829 = getelementptr inbounds float* %tmp21828, i64 1
+  %tmp21830 = getelementptr inbounds float* %tmp21829, i64 1
+  %tmp21831 = getelementptr inbounds float* %tmp21830, i64 1
+  %tmp21832 = getelementptr inbounds float* %tmp21831, i64 1
+  %tmp21833 = getelementptr inbounds float* %tmp21832, i64 1
+  %tmp21834 = getelementptr inbounds float* %tmp21833, i64 1
+  %tmp21835 = getelementptr inbounds float* %tmp21834, i64 1
+  %tmp21836 = getelementptr inbounds float* %tmp21835, i64 1
+  %tmp21837 = getelementptr inbounds float* %tmp21836, i64 1
+  %tmp21838 = getelementptr inbounds float* %tmp21837, i64 1
+  %tmp21839 = getelementptr inbounds float* %tmp21838, i64 1
+  %tmp21840 = getelementptr inbounds float* %tmp21839, i64 1
+  %tmp21841 = getelementptr inbounds float* %tmp21840, i64 1
+  %tmp21842 = getelementptr inbounds float* %tmp21841, i64 1
+  %tmp21843 = getelementptr inbounds float* %tmp21842, i64 1
+  %tmp21844 = getelementptr inbounds float* %tmp21843, i64 1
+  %tmp21845 = getelementptr inbounds float* %tmp21844, i64 1
+  %tmp21846 = getelementptr inbounds float* %tmp21845, i64 1
+  %tmp21847 = getelementptr inbounds float* %tmp21846, i64 1
+  %tmp21848 = getelementptr inbounds float* %tmp21847, i64 1
+  %tmp21849 = getelementptr inbounds float* %tmp21848, i64 1
+  %tmp21850 = getelementptr inbounds float* %tmp21849, i64 1
+  %tmp21851 = getelementptr inbounds float* %tmp21850, i64 1
+  %tmp21852 = getelementptr inbounds float* %tmp21851, i64 1
+  %tmp21853 = getelementptr inbounds float* %tmp21852, i64 1
+  %tmp21854 = getelementptr inbounds float* %tmp21853, i64 1
+  %tmp21855 = getelementptr inbounds float* %tmp21854, i64 1
+  %tmp21856 = getelementptr inbounds float* %tmp21855, i64 1
+  %tmp21857 = getelementptr inbounds float* %tmp21856, i64 1
+  %tmp21858 = getelementptr inbounds float* %tmp21857, i64 1
+  %tmp21859 = getelementptr inbounds float* %tmp21858, i64 1
+  %tmp21860 = getelementptr inbounds float* %tmp21859, i64 1
+  %tmp21861 = getelementptr inbounds float* %tmp21860, i64 1
+  %tmp21862 = getelementptr inbounds float* %tmp21861, i64 1
+  %tmp21863 = getelementptr inbounds float* %tmp21862, i64 1
+  %tmp21864 = getelementptr inbounds float* %tmp21863, i64 1
+  %tmp21865 = getelementptr inbounds float* %tmp21864, i64 1
+  %tmp21866 = getelementptr inbounds float* %tmp21865, i64 1
+  %tmp21867 = getelementptr inbounds float* %tmp21866, i64 1
+  %tmp21868 = getelementptr inbounds float* %tmp21867, i64 1
+  %tmp21869 = getelementptr inbounds float* %tmp21868, i64 1
+  %tmp21870 = getelementptr inbounds float* %tmp21869, i64 1
+  %tmp21871 = getelementptr inbounds float* %tmp21870, i64 1
+  %tmp21872 = getelementptr inbounds float* %tmp21871, i64 1
+  %tmp21873 = getelementptr inbounds float* %tmp21872, i64 1
+  %tmp21874 = getelementptr inbounds float* %tmp21873, i64 1
+  %tmp21875 = getelementptr inbounds float* %tmp21874, i64 1
+  %tmp21876 = getelementptr inbounds float* %tmp21875, i64 1
+  %tmp21877 = getelementptr inbounds float* %tmp21876, i64 1
+  %tmp21878 = getelementptr inbounds float* %tmp21877, i64 1
+  %tmp21879 = getelementptr inbounds float* %tmp21878, i64 1
+  %tmp21880 = getelementptr inbounds float* %tmp21879, i64 1
+  %tmp21881 = getelementptr inbounds float* %tmp21880, i64 1
+  %tmp21882 = getelementptr inbounds float* %tmp21881, i64 1
+  %tmp21883 = getelementptr inbounds float* %tmp21882, i64 1
+  %tmp21884 = getelementptr inbounds float* %tmp21883, i64 1
+  %tmp21885 = getelementptr inbounds float* %tmp21884, i64 1
+  %tmp21886 = getelementptr inbounds float* %tmp21885, i64 1
+  %tmp21887 = getelementptr inbounds float* %tmp21886, i64 1
+  %tmp21888 = getelementptr inbounds float* %tmp21887, i64 1
+  %tmp21889 = getelementptr inbounds float* %tmp21888, i64 1
+  %tmp21890 = getelementptr inbounds float* %tmp21889, i64 1
+  %tmp21891 = getelementptr inbounds float* %tmp21890, i64 1
+  %tmp21892 = getelementptr inbounds float* %tmp21891, i64 1
+  %tmp21893 = getelementptr inbounds float* %tmp21892, i64 1
+  %tmp21894 = getelementptr inbounds float* %tmp21893, i64 1
+  %tmp21895 = getelementptr inbounds float* %tmp21894, i64 1
+  %tmp21896 = getelementptr inbounds float* %tmp21895, i64 1
+  %tmp21897 = getelementptr inbounds float* %tmp21896, i64 1
+  %tmp21898 = getelementptr inbounds float* %tmp21897, i64 1
+  %tmp21899 = getelementptr inbounds float* %tmp21898, i64 1
+  %tmp21900 = getelementptr inbounds float* %tmp21899, i64 1
+  %tmp21901 = getelementptr inbounds float* %tmp21900, i64 1
+  %tmp21902 = getelementptr inbounds float* %tmp21901, i64 1
+  %tmp21903 = getelementptr inbounds float* %tmp21902, i64 1
+  %tmp21904 = getelementptr inbounds float* %tmp21903, i64 1
+  %tmp21905 = getelementptr inbounds float* %tmp21904, i64 1
+  %tmp21906 = getelementptr inbounds float* %tmp21905, i64 1
+  %tmp21907 = getelementptr inbounds float* %tmp21906, i64 1
+  %tmp21908 = getelementptr inbounds float* %tmp21907, i64 1
+  %tmp21909 = getelementptr inbounds float* %tmp21908, i64 1
+  %tmp21910 = getelementptr inbounds float* %tmp21909, i64 1
+  %tmp21911 = getelementptr inbounds float* %tmp21910, i64 1
+  %tmp21912 = getelementptr inbounds float* %tmp21911, i64 1
+  %tmp21913 = getelementptr inbounds float* %tmp21912, i64 1
+  %tmp21914 = getelementptr inbounds float* %tmp21913, i64 1
+  %tmp21915 = getelementptr inbounds float* %tmp21914, i64 1
+  %tmp21916 = getelementptr inbounds float* %tmp21915, i64 1
+  %tmp21917 = getelementptr inbounds float* %tmp21916, i64 1
+  %tmp21918 = getelementptr inbounds float* %tmp21917, i64 1
+  %tmp21919 = getelementptr inbounds float* %tmp21918, i64 1
+  %tmp21920 = getelementptr inbounds float* %tmp21919, i64 1
+  %tmp21921 = getelementptr inbounds float* %tmp21920, i64 1
+  %tmp21922 = getelementptr inbounds float* %tmp21921, i64 1
+  %tmp21923 = getelementptr inbounds float* %tmp21922, i64 1
+  %tmp21924 = getelementptr inbounds float* %tmp21923, i64 1
+  %tmp21925 = getelementptr inbounds float* %tmp21924, i64 1
+  %tmp21926 = getelementptr inbounds float* %tmp21925, i64 1
+  %tmp21927 = getelementptr inbounds float* %tmp21926, i64 1
+  %tmp21928 = getelementptr inbounds float* %tmp21927, i64 1
+  %tmp21929 = getelementptr inbounds float* %tmp21928, i64 1
+  %tmp21930 = getelementptr inbounds float* %tmp21929, i64 1
+  %tmp21931 = getelementptr inbounds float* %tmp21930, i64 1
+  %tmp21932 = getelementptr inbounds float* %tmp21931, i64 1
+  %tmp21933 = getelementptr inbounds float* %tmp21932, i64 1
+  %tmp21934 = getelementptr inbounds float* %tmp21933, i64 1
+  %tmp21935 = getelementptr inbounds float* %tmp21934, i64 1
+  %tmp21936 = getelementptr inbounds float* %tmp21935, i64 1
+  %tmp21937 = getelementptr inbounds float* %tmp21936, i64 1
+  %tmp21938 = getelementptr inbounds float* %tmp21937, i64 1
+  %tmp21939 = getelementptr inbounds float* %tmp21938, i64 1
+  %tmp21940 = getelementptr inbounds float* %tmp21939, i64 1
+  %tmp21941 = getelementptr inbounds float* %tmp21940, i64 1
+  %tmp21942 = getelementptr inbounds float* %tmp21941, i64 1
+  %tmp21943 = getelementptr inbounds float* %tmp21942, i64 1
+  %tmp21944 = getelementptr inbounds float* %tmp21943, i64 1
+  %tmp21945 = getelementptr inbounds float* %tmp21944, i64 1
+  %tmp21946 = getelementptr inbounds float* %tmp21945, i64 1
+  %tmp21947 = getelementptr inbounds float* %tmp21946, i64 1
+  %tmp21948 = getelementptr inbounds float* %tmp21947, i64 1
+  %tmp21949 = getelementptr inbounds float* %tmp21948, i64 1
+  %tmp21950 = getelementptr inbounds float* %tmp21949, i64 1
+  %tmp21951 = getelementptr inbounds float* %tmp21950, i64 1
+  %tmp21952 = getelementptr inbounds float* %tmp21951, i64 1
+  %tmp21953 = getelementptr inbounds float* %tmp21952, i64 1
+  %tmp21954 = getelementptr inbounds float* %tmp21953, i64 1
+  %tmp21955 = getelementptr inbounds float* %tmp21954, i64 1
+  %tmp21956 = getelementptr inbounds float* %tmp21955, i64 1
+  %tmp21957 = getelementptr inbounds float* %tmp21956, i64 1
+  %tmp21958 = getelementptr inbounds float* %tmp21957, i64 1
+  %tmp21959 = getelementptr inbounds float* %tmp21958, i64 1
+  %tmp21960 = getelementptr inbounds float* %tmp21959, i64 1
+  %tmp21961 = getelementptr inbounds float* %tmp21960, i64 1
+  %tmp21962 = getelementptr inbounds float* %tmp21961, i64 1
+  %tmp21963 = getelementptr inbounds float* %tmp21962, i64 1
+  %tmp21964 = getelementptr inbounds float* %tmp21963, i64 1
+  %tmp21965 = getelementptr inbounds float* %tmp21964, i64 1
+  %tmp21966 = getelementptr inbounds float* %tmp21965, i64 1
+  %tmp21967 = getelementptr inbounds float* %tmp21966, i64 1
+  %tmp21968 = getelementptr inbounds float* %tmp21967, i64 1
+  %tmp21969 = getelementptr inbounds float* %tmp21968, i64 1
+  %tmp21970 = getelementptr inbounds float* %tmp21969, i64 1
+  %tmp21971 = getelementptr inbounds float* %tmp21970, i64 1
+  %tmp21972 = getelementptr inbounds float* %tmp21971, i64 1
+  %tmp21973 = getelementptr inbounds float* %tmp21972, i64 1
+  %tmp21974 = getelementptr inbounds float* %tmp21973, i64 1
+  %tmp21975 = getelementptr inbounds float* %tmp21974, i64 1
+  %tmp21976 = getelementptr inbounds float* %tmp21975, i64 1
+  %tmp21977 = getelementptr inbounds float* %tmp21976, i64 1
+  %tmp21978 = getelementptr inbounds float* %tmp21977, i64 1
+  %tmp21979 = getelementptr inbounds float* %tmp21978, i64 1
+  %tmp21980 = getelementptr inbounds float* %tmp21979, i64 1
+  %tmp21981 = getelementptr inbounds float* %tmp21980, i64 1
+  %tmp21982 = getelementptr inbounds float* %tmp21981, i64 1
+  %tmp21983 = getelementptr inbounds float* %tmp21982, i64 1
+  %tmp21984 = getelementptr inbounds float* %tmp21983, i64 1
+  %tmp21985 = getelementptr inbounds float* %tmp21984, i64 1
+  %tmp21986 = getelementptr inbounds float* %tmp21985, i64 1
+  %tmp21987 = getelementptr inbounds float* %tmp21986, i64 1
+  %tmp21988 = getelementptr inbounds float* %tmp21987, i64 1
+  %tmp21989 = getelementptr inbounds float* %tmp21988, i64 1
+  %tmp21990 = getelementptr inbounds float* %tmp21989, i64 1
+  %tmp21991 = getelementptr inbounds float* %tmp21990, i64 1
+  %tmp21992 = getelementptr inbounds float* %tmp21991, i64 1
+  %tmp21993 = getelementptr inbounds float* %tmp21992, i64 1
+  %tmp21994 = getelementptr inbounds float* %tmp21993, i64 1
+  %tmp21995 = getelementptr inbounds float* %tmp21994, i64 1
+  %tmp21996 = getelementptr inbounds float* %tmp21995, i64 1
+  %tmp21997 = getelementptr inbounds float* %tmp21996, i64 1
+  %tmp21998 = getelementptr inbounds float* %tmp21997, i64 1
+  %tmp21999 = getelementptr inbounds float* %tmp21998, i64 1
+  %tmp22000 = getelementptr inbounds float* %tmp21999, i64 1
+  %tmp22001 = getelementptr inbounds float* %tmp22000, i64 1
+  %tmp22002 = getelementptr inbounds float* %tmp22001, i64 1
+  %tmp22003 = getelementptr inbounds float* %tmp22002, i64 1
+  %tmp22004 = getelementptr inbounds float* %tmp22003, i64 1
+  %tmp22005 = getelementptr inbounds float* %tmp22004, i64 1
+  %tmp22006 = getelementptr inbounds float* %tmp22005, i64 1
+  %tmp22007 = getelementptr inbounds float* %tmp22006, i64 1
+  %tmp22008 = getelementptr inbounds float* %tmp22007, i64 1
+  %tmp22009 = getelementptr inbounds float* %tmp22008, i64 1
+  %tmp22010 = getelementptr inbounds float* %tmp22009, i64 1
+  %tmp22011 = getelementptr inbounds float* %tmp22010, i64 1
+  %tmp22012 = getelementptr inbounds float* %tmp22011, i64 1
+  %tmp22013 = getelementptr inbounds float* %tmp22012, i64 1
+  %tmp22014 = getelementptr inbounds float* %tmp22013, i64 1
+  %tmp22015 = getelementptr inbounds float* %tmp22014, i64 1
+  %tmp22016 = getelementptr inbounds float* %tmp22015, i64 1
+  %tmp22017 = getelementptr inbounds float* %tmp22016, i64 1
+  %tmp22018 = getelementptr inbounds float* %tmp22017, i64 1
+  %tmp22019 = getelementptr inbounds float* %tmp22018, i64 1
+  %tmp22020 = getelementptr inbounds float* %tmp22019, i64 1
+  %tmp22021 = getelementptr inbounds float* %tmp22020, i64 1
+  %tmp22022 = getelementptr inbounds float* %tmp22021, i64 1
+  %tmp22023 = getelementptr inbounds float* %tmp22022, i64 1
+  %tmp22024 = getelementptr inbounds float* %tmp22023, i64 1
+  %tmp22025 = getelementptr inbounds float* %tmp22024, i64 1
+  %tmp22026 = getelementptr inbounds float* %tmp22025, i64 1
+  %tmp22027 = getelementptr inbounds float* %tmp22026, i64 1
+  %tmp22028 = getelementptr inbounds float* %tmp22027, i64 1
+  %tmp22029 = getelementptr inbounds float* %tmp22028, i64 1
+  %tmp22030 = getelementptr inbounds float* %tmp22029, i64 1
+  %tmp22031 = getelementptr inbounds float* %tmp22030, i64 1
+  %tmp22032 = getelementptr inbounds float* %tmp22031, i64 1
+  %tmp22033 = getelementptr inbounds float* %tmp22032, i64 1
+  %tmp22034 = getelementptr inbounds float* %tmp22033, i64 1
+  %tmp22035 = getelementptr inbounds float* %tmp22034, i64 1
+  %tmp22036 = getelementptr inbounds float* %tmp22035, i64 1
+  %tmp22037 = getelementptr inbounds float* %tmp22036, i64 1
+  %tmp22038 = getelementptr inbounds float* %tmp22037, i64 1
+  %tmp22039 = getelementptr inbounds float* %tmp22038, i64 1
+  %tmp22040 = getelementptr inbounds float* %tmp22039, i64 1
+  %tmp22041 = getelementptr inbounds float* %tmp22040, i64 1
+  %tmp22042 = getelementptr inbounds float* %tmp22041, i64 1
+  %tmp22043 = getelementptr inbounds float* %tmp22042, i64 1
+  %tmp22044 = getelementptr inbounds float* %tmp22043, i64 1
+  %tmp22045 = getelementptr inbounds float* %tmp22044, i64 1
+  %tmp22046 = getelementptr inbounds float* %tmp22045, i64 1
+  %tmp22047 = getelementptr inbounds float* %tmp22046, i64 1
+  %tmp22048 = getelementptr inbounds float* %tmp22047, i64 1
+  %tmp22049 = getelementptr inbounds float* %tmp22048, i64 1
+  %tmp22050 = getelementptr inbounds float* %tmp22049, i64 1
+  %tmp22051 = getelementptr inbounds float* %tmp22050, i64 1
+  %tmp22052 = getelementptr inbounds float* %tmp22051, i64 1
+  %tmp22053 = getelementptr inbounds float* %tmp22052, i64 1
+  %tmp22054 = getelementptr inbounds float* %tmp22053, i64 1
+  %tmp22055 = getelementptr inbounds float* %tmp22054, i64 1
+  %tmp22056 = getelementptr inbounds float* %tmp22055, i64 1
+  %tmp22057 = getelementptr inbounds float* %tmp22056, i64 1
+  %tmp22058 = getelementptr inbounds float* %tmp22057, i64 1
+  %tmp22059 = getelementptr inbounds float* %tmp22058, i64 1
+  %tmp22060 = getelementptr inbounds float* %tmp22059, i64 1
+  %tmp22061 = getelementptr inbounds float* %tmp22060, i64 1
+  %tmp22062 = getelementptr inbounds float* %tmp22061, i64 1
+  %tmp22063 = getelementptr inbounds float* %tmp22062, i64 1
+  %tmp22064 = getelementptr inbounds float* %tmp22063, i64 1
+  %tmp22065 = getelementptr inbounds float* %tmp22064, i64 1
+  %tmp22066 = getelementptr inbounds float* %tmp22065, i64 1
+  %tmp22067 = getelementptr inbounds float* %tmp22066, i64 1
+  %tmp22068 = getelementptr inbounds float* %tmp22067, i64 1
+  %tmp22069 = getelementptr inbounds float* %tmp22068, i64 1
+  %tmp22070 = getelementptr inbounds float* %tmp22069, i64 1
+  %tmp22071 = getelementptr inbounds float* %tmp22070, i64 1
+  %tmp22072 = getelementptr inbounds float* %tmp22071, i64 1
+  %tmp22073 = getelementptr inbounds float* %tmp22072, i64 1
+  %tmp22074 = getelementptr inbounds float* %tmp22073, i64 1
+  %tmp22075 = getelementptr inbounds float* %tmp22074, i64 1
+  %tmp22076 = getelementptr inbounds float* %tmp22075, i64 1
+  %tmp22077 = getelementptr inbounds float* %tmp22076, i64 1
+  %tmp22078 = getelementptr inbounds float* %tmp22077, i64 1
+  %tmp22079 = getelementptr inbounds float* %tmp22078, i64 1
+  %tmp22080 = getelementptr inbounds float* %tmp22079, i64 1
+  %tmp22081 = getelementptr inbounds float* %tmp22080, i64 1
+  %tmp22082 = getelementptr inbounds float* %tmp22081, i64 1
+  %tmp22083 = getelementptr inbounds float* %tmp22082, i64 1
+  %tmp22084 = getelementptr inbounds float* %tmp22083, i64 1
+  %tmp22085 = getelementptr inbounds float* %tmp22084, i64 1
+  %tmp22086 = getelementptr inbounds float* %tmp22085, i64 1
+  %tmp22087 = getelementptr inbounds float* %tmp22086, i64 1
+  %tmp22088 = getelementptr inbounds float* %tmp22087, i64 1
+  %tmp22089 = getelementptr inbounds float* %tmp22088, i64 1
+  %tmp22090 = getelementptr inbounds float* %tmp22089, i64 1
+  %tmp22091 = getelementptr inbounds float* %tmp22090, i64 1
+  %tmp22092 = getelementptr inbounds float* %tmp22091, i64 1
+  %tmp22093 = getelementptr inbounds float* %tmp22092, i64 1
+  %tmp22094 = getelementptr inbounds float* %tmp22093, i64 1
+  %tmp22095 = getelementptr inbounds float* %tmp22094, i64 1
+  %tmp22096 = getelementptr inbounds float* %tmp22095, i64 1
+  %tmp22097 = getelementptr inbounds float* %tmp22096, i64 1
+  %tmp22098 = getelementptr inbounds float* %tmp22097, i64 1
+  %tmp22099 = getelementptr inbounds float* %tmp22098, i64 1
+  %tmp22100 = getelementptr inbounds float* %tmp22099, i64 1
+  %tmp22101 = getelementptr inbounds float* %tmp22100, i64 1
+  %tmp22102 = getelementptr inbounds float* %tmp22101, i64 1
+  %tmp22103 = getelementptr inbounds float* %tmp22102, i64 1
+  %tmp22104 = getelementptr inbounds float* %tmp22103, i64 1
+  %tmp22105 = getelementptr inbounds float* %tmp22104, i64 1
+  %tmp22106 = getelementptr inbounds float* %tmp22105, i64 1
+  %tmp22107 = getelementptr inbounds float* %tmp22106, i64 1
+  %tmp22108 = getelementptr inbounds float* %tmp22107, i64 1
+  %tmp22109 = getelementptr inbounds float* %tmp22108, i64 1
+  %tmp22110 = getelementptr inbounds float* %tmp22109, i64 1
+  %tmp22111 = getelementptr inbounds float* %tmp22110, i64 1
+  %tmp22112 = getelementptr inbounds float* %tmp22111, i64 1
+  %tmp22113 = getelementptr inbounds float* %tmp22112, i64 1
+  %tmp22114 = getelementptr inbounds float* %tmp22113, i64 1
+  %tmp22115 = getelementptr inbounds float* %tmp22114, i64 1
+  %tmp22116 = getelementptr inbounds float* %tmp22115, i64 1
+  %tmp22117 = getelementptr inbounds float* %tmp22116, i64 1
+  %tmp22118 = getelementptr inbounds float* %tmp22117, i64 1
+  %tmp22119 = getelementptr inbounds float* %tmp22118, i64 1
+  %tmp22120 = getelementptr inbounds float* %tmp22119, i64 1
+  %tmp22121 = getelementptr inbounds float* %tmp22120, i64 1
+  %tmp22122 = getelementptr inbounds float* %tmp22121, i64 1
+  %tmp22123 = getelementptr inbounds float* %tmp22122, i64 1
+  %tmp22124 = getelementptr inbounds float* %tmp22123, i64 1
+  %tmp22125 = getelementptr inbounds float* %tmp22124, i64 1
+  %tmp22126 = getelementptr inbounds float* %tmp22125, i64 1
+  %tmp22127 = getelementptr inbounds float* %tmp22126, i64 1
+  %tmp22128 = getelementptr inbounds float* %tmp22127, i64 1
+  %tmp22129 = getelementptr inbounds float* %tmp22128, i64 1
+  %tmp22130 = getelementptr inbounds float* %tmp22129, i64 1
+  %tmp22131 = getelementptr inbounds float* %tmp22130, i64 1
+  %tmp22132 = getelementptr inbounds float* %tmp22131, i64 1
+  %tmp22133 = getelementptr inbounds float* %tmp22132, i64 1
+  %tmp22134 = getelementptr inbounds float* %tmp22133, i64 1
+  %tmp22135 = getelementptr inbounds float* %tmp22134, i64 1
+  %tmp22136 = getelementptr inbounds float* %tmp22135, i64 1
+  %tmp22137 = getelementptr inbounds float* %tmp22136, i64 1
+  %tmp22138 = getelementptr inbounds float* %tmp22137, i64 1
+  %tmp22139 = getelementptr inbounds float* %tmp22138, i64 1
+  %tmp22140 = getelementptr inbounds float* %tmp22139, i64 1
+  %tmp22141 = getelementptr inbounds float* %tmp22140, i64 1
+  %tmp22142 = getelementptr inbounds float* %tmp22141, i64 1
+  %tmp22143 = getelementptr inbounds float* %tmp22142, i64 1
+  %tmp22144 = getelementptr inbounds float* %tmp22143, i64 1
+  %tmp22145 = getelementptr inbounds float* %tmp22144, i64 1
+  %tmp22146 = getelementptr inbounds float* %tmp22145, i64 1
+  %tmp22147 = getelementptr inbounds float* %tmp22146, i64 1
+  %tmp22148 = getelementptr inbounds float* %tmp22147, i64 1
+  %tmp22149 = getelementptr inbounds float* %tmp22148, i64 1
+  %tmp22150 = getelementptr inbounds float* %tmp22149, i64 1
+  %tmp22151 = getelementptr inbounds float* %tmp22150, i64 1
+  %tmp22152 = getelementptr inbounds float* %tmp22151, i64 1
+  %tmp22153 = getelementptr inbounds float* %tmp22152, i64 1
+  %tmp22154 = getelementptr inbounds float* %tmp22153, i64 1
+  %tmp22155 = getelementptr inbounds float* %tmp22154, i64 1
+  %tmp22156 = getelementptr inbounds float* %tmp22155, i64 1
+  %tmp22157 = getelementptr inbounds float* %tmp22156, i64 1
+  %tmp22158 = getelementptr inbounds float* %tmp22157, i64 1
+  %tmp22159 = getelementptr inbounds float* %tmp22158, i64 1
+  %tmp22160 = getelementptr inbounds float* %tmp22159, i64 1
+  %tmp22161 = getelementptr inbounds float* %tmp22160, i64 1
+  %tmp22162 = getelementptr inbounds float* %tmp22161, i64 1
+  %tmp22163 = getelementptr inbounds float* %tmp22162, i64 1
+  %tmp22164 = getelementptr inbounds float* %tmp22163, i64 1
+  %tmp22165 = getelementptr inbounds float* %tmp22164, i64 1
+  %tmp22166 = getelementptr inbounds float* %tmp22165, i64 1
+  %tmp22167 = getelementptr inbounds float* %tmp22166, i64 1
+  %tmp22168 = getelementptr inbounds float* %tmp22167, i64 1
+  %tmp22169 = getelementptr inbounds float* %tmp22168, i64 1
+  %tmp22170 = getelementptr inbounds float* %tmp22169, i64 1
+  %tmp22171 = getelementptr inbounds float* %tmp22170, i64 1
+  %tmp22172 = getelementptr inbounds float* %tmp22171, i64 1
+  %tmp22173 = getelementptr inbounds float* %tmp22172, i64 1
+  %tmp22174 = getelementptr inbounds float* %tmp22173, i64 1
+  %tmp22175 = getelementptr inbounds float* %tmp22174, i64 1
+  %tmp22176 = getelementptr inbounds float* %tmp22175, i64 1
+  %tmp22177 = getelementptr inbounds float* %tmp22176, i64 1
+  %tmp22178 = getelementptr inbounds float* %tmp22177, i64 1
+  %tmp22179 = getelementptr inbounds float* %tmp22178, i64 1
+  %tmp22180 = getelementptr inbounds float* %tmp22179, i64 1
+  %tmp22181 = getelementptr inbounds float* %tmp22180, i64 1
+  %tmp22182 = getelementptr inbounds float* %tmp22181, i64 1
+  %tmp22183 = getelementptr inbounds float* %tmp22182, i64 1
+  %tmp22184 = getelementptr inbounds float* %tmp22183, i64 1
+  %tmp22185 = getelementptr inbounds float* %tmp22184, i64 1
+  %tmp22186 = getelementptr inbounds float* %tmp22185, i64 1
+  %tmp22187 = getelementptr inbounds float* %tmp22186, i64 1
+  %tmp22188 = getelementptr inbounds float* %tmp22187, i64 1
+  %tmp22189 = getelementptr inbounds float* %tmp22188, i64 1
+  %tmp22190 = getelementptr inbounds float* %tmp22189, i64 1
+  %tmp22191 = getelementptr inbounds float* %tmp22190, i64 1
+  %tmp22192 = getelementptr inbounds float* %tmp22191, i64 1
+  %tmp22193 = getelementptr inbounds float* %tmp22192, i64 1
+  %tmp22194 = getelementptr inbounds float* %tmp22193, i64 1
+  %tmp22195 = getelementptr inbounds float* %tmp22194, i64 1
+  %tmp22196 = getelementptr inbounds float* %tmp22195, i64 1
+  %tmp22197 = getelementptr inbounds float* %tmp22196, i64 1
+  %tmp22198 = getelementptr inbounds float* %tmp22197, i64 1
+  %tmp22199 = getelementptr inbounds float* %tmp22198, i64 1
+  %tmp22200 = getelementptr inbounds float* %tmp22199, i64 1
+  %tmp22201 = getelementptr inbounds float* %tmp22200, i64 1
+  %tmp22202 = getelementptr inbounds float* %tmp22201, i64 1
+  %tmp22203 = getelementptr inbounds float* %tmp22202, i64 1
+  %tmp22204 = getelementptr inbounds float* %tmp22203, i64 1
+  %tmp22205 = getelementptr inbounds float* %tmp22204, i64 1
+  %tmp22206 = getelementptr inbounds float* %tmp22205, i64 1
+  %tmp22207 = getelementptr inbounds float* %tmp22206, i64 1
+  %tmp22208 = getelementptr inbounds float* %tmp22207, i64 1
+  %tmp22209 = getelementptr inbounds float* %tmp22208, i64 1
+  %tmp22210 = getelementptr inbounds float* %tmp22209, i64 1
+  %tmp22211 = getelementptr inbounds float* %tmp22210, i64 1
+  %tmp22212 = getelementptr inbounds float* %tmp22211, i64 1
+  %tmp22213 = getelementptr inbounds float* %tmp22212, i64 1
+  %tmp22214 = getelementptr inbounds float* %tmp22213, i64 1
+  %tmp22215 = getelementptr inbounds float* %tmp22214, i64 1
+  %tmp22216 = getelementptr inbounds float* %tmp22215, i64 1
+  %tmp22217 = getelementptr inbounds float* %tmp22216, i64 1
+  %tmp22218 = getelementptr inbounds float* %tmp22217, i64 1
+  %tmp22219 = getelementptr inbounds float* %tmp22218, i64 1
+  %tmp22220 = getelementptr inbounds float* %tmp22219, i64 1
+  %tmp22221 = getelementptr inbounds float* %tmp22220, i64 1
+  %tmp22222 = getelementptr inbounds float* %tmp22221, i64 1
+  %tmp22223 = getelementptr inbounds float* %tmp22222, i64 1
+  %tmp22224 = getelementptr inbounds float* %tmp22223, i64 1
+  %tmp22225 = getelementptr inbounds float* %tmp22224, i64 1
+  %tmp22226 = getelementptr inbounds float* %tmp22225, i64 1
+  %tmp22227 = getelementptr inbounds float* %tmp22226, i64 1
+  %tmp22228 = getelementptr inbounds float* %tmp22227, i64 1
+  %tmp22229 = getelementptr inbounds float* %tmp22228, i64 1
+  %tmp22230 = getelementptr inbounds float* %tmp22229, i64 1
+  %tmp22231 = getelementptr inbounds float* %tmp22230, i64 1
+  %tmp22232 = getelementptr inbounds float* %tmp22231, i64 1
+  %tmp22233 = getelementptr inbounds float* %tmp22232, i64 1
+  %tmp22234 = getelementptr inbounds float* %tmp22233, i64 1
+  %tmp22235 = getelementptr inbounds float* %tmp22234, i64 1
+  %tmp22236 = getelementptr inbounds float* %tmp22235, i64 1
+  %tmp22237 = getelementptr inbounds float* %tmp22236, i64 1
+  %tmp22238 = getelementptr inbounds float* %tmp22237, i64 1
+  %tmp22239 = getelementptr inbounds float* %tmp22238, i64 1
+  %tmp22240 = getelementptr inbounds float* %tmp22239, i64 1
+  %tmp22241 = getelementptr inbounds float* %tmp22240, i64 1
+  %tmp22242 = getelementptr inbounds float* %tmp22241, i64 1
+  %tmp22243 = getelementptr inbounds float* %tmp22242, i64 1
+  %tmp22244 = getelementptr inbounds float* %tmp22243, i64 1
+  %tmp22245 = getelementptr inbounds float* %tmp22244, i64 1
+  %tmp22246 = getelementptr inbounds float* %tmp22245, i64 1
+  %tmp22247 = getelementptr inbounds float* %tmp22246, i64 1
+  %tmp22248 = getelementptr inbounds float* %tmp22247, i64 1
+  %tmp22249 = getelementptr inbounds float* %tmp22248, i64 1
+  %tmp22250 = getelementptr inbounds float* %tmp22249, i64 1
+  %tmp22251 = getelementptr inbounds float* %tmp22250, i64 1
+  %tmp22252 = getelementptr inbounds float* %tmp22251, i64 1
+  %tmp22253 = getelementptr inbounds float* %tmp22252, i64 1
+  %tmp22254 = getelementptr inbounds float* %tmp22253, i64 1
+  %tmp22255 = getelementptr inbounds float* %tmp22254, i64 1
+  %tmp22256 = getelementptr inbounds float* %tmp22255, i64 1
+  %tmp22257 = getelementptr inbounds float* %tmp22256, i64 1
+  %tmp22258 = getelementptr inbounds float* %tmp22257, i64 1
+  %tmp22259 = getelementptr inbounds float* %tmp22258, i64 1
+  %tmp22260 = getelementptr inbounds float* %tmp22259, i64 1
+  %tmp22261 = getelementptr inbounds float* %tmp22260, i64 1
+  %tmp22262 = getelementptr inbounds float* %tmp22261, i64 1
+  %tmp22263 = getelementptr inbounds float* %tmp22262, i64 1
+  %tmp22264 = getelementptr inbounds float* %tmp22263, i64 1
+  %tmp22265 = getelementptr inbounds float* %tmp22264, i64 1
+  %tmp22266 = getelementptr inbounds float* %tmp22265, i64 1
+  %tmp22267 = getelementptr inbounds float* %tmp22266, i64 1
+  %tmp22268 = getelementptr inbounds float* %tmp22267, i64 1
+  %tmp22269 = getelementptr inbounds float* %tmp22268, i64 1
+  %tmp22270 = getelementptr inbounds float* %tmp22269, i64 1
+  %tmp22271 = getelementptr inbounds float* %tmp22270, i64 1
+  %tmp22272 = getelementptr inbounds float* %tmp22271, i64 1
+  %tmp22273 = getelementptr inbounds float* %tmp22272, i64 1
+  %tmp22274 = getelementptr inbounds float* %tmp22273, i64 1
+  %tmp22275 = getelementptr inbounds float* %tmp22274, i64 1
+  %tmp22276 = getelementptr inbounds float* %tmp22275, i64 1
+  %tmp22277 = getelementptr inbounds float* %tmp22276, i64 1
+  %tmp22278 = getelementptr inbounds float* %tmp22277, i64 1
+  %tmp22279 = getelementptr inbounds float* %tmp22278, i64 1
+  %tmp22280 = getelementptr inbounds float* %tmp22279, i64 1
+  %tmp22281 = getelementptr inbounds float* %tmp22280, i64 1
+  %tmp22282 = getelementptr inbounds float* %tmp22281, i64 1
+  %tmp22283 = getelementptr inbounds float* %tmp22282, i64 1
+  %tmp22284 = getelementptr inbounds float* %tmp22283, i64 1
+  %tmp22285 = getelementptr inbounds float* %tmp22284, i64 1
+  %tmp22286 = getelementptr inbounds float* %tmp22285, i64 1
+  %tmp22287 = getelementptr inbounds float* %tmp22286, i64 1
+  %tmp22288 = getelementptr inbounds float* %tmp22287, i64 1
+  %tmp22289 = getelementptr inbounds float* %tmp22288, i64 1
+  %tmp22290 = getelementptr inbounds float* %tmp22289, i64 1
+  %tmp22291 = getelementptr inbounds float* %tmp22290, i64 1
+  %tmp22292 = getelementptr inbounds float* %tmp22291, i64 1
+  %tmp22293 = getelementptr inbounds float* %tmp22292, i64 1
+  %tmp22294 = getelementptr inbounds float* %tmp22293, i64 1
+  %tmp22295 = getelementptr inbounds float* %tmp22294, i64 1
+  %tmp22296 = getelementptr inbounds float* %tmp22295, i64 1
+  %tmp22297 = getelementptr inbounds float* %tmp22296, i64 1
+  %tmp22298 = getelementptr inbounds float* %tmp22297, i64 1
+  %tmp22299 = getelementptr inbounds float* %tmp22298, i64 1
+  %tmp22300 = getelementptr inbounds float* %tmp22299, i64 1
+  %tmp22301 = getelementptr inbounds float* %tmp22300, i64 1
+  %tmp22302 = getelementptr inbounds float* %tmp22301, i64 1
+  %tmp22303 = getelementptr inbounds float* %tmp22302, i64 1
+  %tmp22304 = getelementptr inbounds float* %tmp22303, i64 1
+  %tmp22305 = getelementptr inbounds float* %tmp22304, i64 1
+  %tmp22306 = getelementptr inbounds float* %tmp22305, i64 1
+  %tmp22307 = getelementptr inbounds float* %tmp22306, i64 1
+  %tmp22308 = getelementptr inbounds float* %tmp22307, i64 1
+  %tmp22309 = getelementptr inbounds float* %tmp22308, i64 1
+  %tmp22310 = getelementptr inbounds float* %tmp22309, i64 1
+  %tmp22311 = getelementptr inbounds float* %tmp22310, i64 1
+  %tmp22312 = getelementptr inbounds float* %tmp22311, i64 1
+  %tmp22313 = getelementptr inbounds float* %tmp22312, i64 1
+  %tmp22314 = getelementptr inbounds float* %tmp22313, i64 1
+  %tmp22315 = getelementptr inbounds float* %tmp22314, i64 1
+  %tmp22316 = getelementptr inbounds float* %tmp22315, i64 1
+  %tmp22317 = getelementptr inbounds float* %tmp22316, i64 1
+  %tmp22318 = getelementptr inbounds float* %tmp22317, i64 1
+  %tmp22319 = getelementptr inbounds float* %tmp22318, i64 1
+  %tmp22320 = getelementptr inbounds float* %tmp22319, i64 1
+  %tmp22321 = getelementptr inbounds float* %tmp22320, i64 1
+  %tmp22322 = getelementptr inbounds float* %tmp22321, i64 1
+  %tmp22323 = getelementptr inbounds float* %tmp22322, i64 1
+  %tmp22324 = getelementptr inbounds float* %tmp22323, i64 1
+  %tmp22325 = getelementptr inbounds float* %tmp22324, i64 1
+  %tmp22326 = getelementptr inbounds float* %tmp22325, i64 1
+  %tmp22327 = getelementptr inbounds float* %tmp22326, i64 1
+  %tmp22328 = getelementptr inbounds float* %tmp22327, i64 1
+  %tmp22329 = getelementptr inbounds float* %tmp22328, i64 1
+  %tmp22330 = getelementptr inbounds float* %tmp22329, i64 1
+  %tmp22331 = getelementptr inbounds float* %tmp22330, i64 1
+  %tmp22332 = getelementptr inbounds float* %tmp22331, i64 1
+  %tmp22333 = getelementptr inbounds float* %tmp22332, i64 1
+  %tmp22334 = getelementptr inbounds float* %tmp22333, i64 1
+  %tmp22335 = getelementptr inbounds float* %tmp22334, i64 1
+  %tmp22336 = getelementptr inbounds float* %tmp22335, i64 1
+  %tmp22337 = getelementptr inbounds float* %tmp22336, i64 1
+  %tmp22338 = getelementptr inbounds float* %tmp22337, i64 1
+  %tmp22339 = getelementptr inbounds float* %tmp22338, i64 1
+  %tmp22340 = getelementptr inbounds float* %tmp22339, i64 1
+  %tmp22341 = getelementptr inbounds float* %tmp22340, i64 1
+  %tmp22342 = getelementptr inbounds float* %tmp22341, i64 1
+  %tmp22343 = getelementptr inbounds float* %tmp22342, i64 1
+  %tmp22344 = getelementptr inbounds float* %tmp22343, i64 1
+  %tmp22345 = getelementptr inbounds float* %tmp22344, i64 1
+  %tmp22346 = getelementptr inbounds float* %tmp22345, i64 1
+  %tmp22347 = getelementptr inbounds float* %tmp22346, i64 1
+  %tmp22348 = getelementptr inbounds float* %tmp22347, i64 1
+  %tmp22349 = getelementptr inbounds float* %tmp22348, i64 1
+  %tmp22350 = getelementptr inbounds float* %tmp22349, i64 1
+  %tmp22351 = getelementptr inbounds float* %tmp22350, i64 1
+  %tmp22352 = getelementptr inbounds float* %tmp22351, i64 1
+  %tmp22353 = getelementptr inbounds float* %tmp22352, i64 1
+  %tmp22354 = getelementptr inbounds float* %tmp22353, i64 1
+  %tmp22355 = getelementptr inbounds float* %tmp22354, i64 1
+  %tmp22356 = getelementptr inbounds float* %tmp22355, i64 1
+  %tmp22357 = getelementptr inbounds float* %tmp22356, i64 1
+  %tmp22358 = getelementptr inbounds float* %tmp22357, i64 1
+  %tmp22359 = getelementptr inbounds float* %tmp22358, i64 1
+  %tmp22360 = getelementptr inbounds float* %tmp22359, i64 1
+  %tmp22361 = getelementptr inbounds float* %tmp22360, i64 1
+  %tmp22362 = getelementptr inbounds float* %tmp22361, i64 1
+  %tmp22363 = getelementptr inbounds float* %tmp22362, i64 1
+  %tmp22364 = getelementptr inbounds float* %tmp22363, i64 1
+  %tmp22365 = getelementptr inbounds float* %tmp22364, i64 1
+  %tmp22366 = getelementptr inbounds float* %tmp22365, i64 1
+  %tmp22367 = getelementptr inbounds float* %tmp22366, i64 1
+  %tmp22368 = getelementptr inbounds float* %tmp22367, i64 1
+  %tmp22369 = getelementptr inbounds float* %tmp22368, i64 1
+  %tmp22370 = getelementptr inbounds float* %tmp22369, i64 1
+  %tmp22371 = getelementptr inbounds float* %tmp22370, i64 1
+  %tmp22372 = getelementptr inbounds float* %tmp22371, i64 1
+  %tmp22373 = getelementptr inbounds float* %tmp22372, i64 1
+  %tmp22374 = getelementptr inbounds float* %tmp22373, i64 1
+  %tmp22375 = getelementptr inbounds float* %tmp22374, i64 1
+  %tmp22376 = getelementptr inbounds float* %tmp22375, i64 1
+  %tmp22377 = getelementptr inbounds float* %tmp22376, i64 1
+  %tmp22378 = getelementptr inbounds float* %tmp22377, i64 1
+  %tmp22379 = getelementptr inbounds float* %tmp22378, i64 1
+  %tmp22380 = getelementptr inbounds float* %tmp22379, i64 1
+  %tmp22381 = getelementptr inbounds float* %tmp22380, i64 1
+  %tmp22382 = getelementptr inbounds float* %tmp22381, i64 1
+  %tmp22383 = getelementptr inbounds float* %tmp22382, i64 1
+  %tmp22384 = getelementptr inbounds float* %tmp22383, i64 1
+  %tmp22385 = getelementptr inbounds float* %tmp22384, i64 1
+  %tmp22386 = getelementptr inbounds float* %tmp22385, i64 1
+  %tmp22387 = getelementptr inbounds float* %tmp22386, i64 1
+  %tmp22388 = getelementptr inbounds float* %tmp22387, i64 1
+  %tmp22389 = getelementptr inbounds float* %tmp22388, i64 1
+  %tmp22390 = getelementptr inbounds float* %tmp22389, i64 1
+  %tmp22391 = getelementptr inbounds float* %tmp22390, i64 1
+  %tmp22392 = getelementptr inbounds float* %tmp22391, i64 1
+  %tmp22393 = getelementptr inbounds float* %tmp22392, i64 1
+  %tmp22394 = getelementptr inbounds float* %tmp22393, i64 1
+  %tmp22395 = getelementptr inbounds float* %tmp22394, i64 1
+  %tmp22396 = getelementptr inbounds float* %tmp22395, i64 1
+  %tmp22397 = getelementptr inbounds float* %tmp22396, i64 1
+  %tmp22398 = getelementptr inbounds float* %tmp22397, i64 1
+  %tmp22399 = getelementptr inbounds float* %tmp22398, i64 1
+  %tmp22400 = getelementptr inbounds float* %tmp22399, i64 1
+  %tmp22401 = getelementptr inbounds float* %tmp22400, i64 1
+  %tmp22402 = getelementptr inbounds float* %tmp22401, i64 1
+  %tmp22403 = getelementptr inbounds float* %tmp22402, i64 1
+  %tmp22404 = getelementptr inbounds float* %tmp22403, i64 1
+  %tmp22405 = getelementptr inbounds float* %tmp22404, i64 1
+  %tmp22406 = getelementptr inbounds float* %tmp22405, i64 1
+  %tmp22407 = getelementptr inbounds float* %tmp22406, i64 1
+  %tmp22408 = getelementptr inbounds float* %tmp22407, i64 1
+  %tmp22409 = getelementptr inbounds float* %tmp22408, i64 1
+  %tmp22410 = getelementptr inbounds float* %tmp22409, i64 1
+  %tmp22411 = getelementptr inbounds float* %tmp22410, i64 1
+  %tmp22412 = getelementptr inbounds float* %tmp22411, i64 1
+  %tmp22413 = getelementptr inbounds float* %tmp22412, i64 1
+  %tmp22414 = getelementptr inbounds float* %tmp22413, i64 1
+  %tmp22415 = getelementptr inbounds float* %tmp22414, i64 1
+  %tmp22416 = getelementptr inbounds float* %tmp22415, i64 1
+  %tmp22417 = getelementptr inbounds float* %tmp22416, i64 1
+  %tmp22418 = getelementptr inbounds float* %tmp22417, i64 1
+  %tmp22419 = getelementptr inbounds float* %tmp22418, i64 1
+  %tmp22420 = getelementptr inbounds float* %tmp22419, i64 1
+  %tmp22421 = getelementptr inbounds float* %tmp22420, i64 1
+  %tmp22422 = getelementptr inbounds float* %tmp22421, i64 1
+  %tmp22423 = getelementptr inbounds float* %tmp22422, i64 1
+  %tmp22424 = getelementptr inbounds float* %tmp22423, i64 1
+  %tmp22425 = getelementptr inbounds float* %tmp22424, i64 1
+  %tmp22426 = getelementptr inbounds float* %tmp22425, i64 1
+  %tmp22427 = getelementptr inbounds float* %tmp22426, i64 1
+  %tmp22428 = getelementptr inbounds float* %tmp22427, i64 1
+  %tmp22429 = getelementptr inbounds float* %tmp22428, i64 1
+  %tmp22430 = getelementptr inbounds float* %tmp22429, i64 1
+  %tmp22431 = getelementptr inbounds float* %tmp22430, i64 1
+  %tmp22432 = getelementptr inbounds float* %tmp22431, i64 1
+  %tmp22433 = getelementptr inbounds float* %tmp22432, i64 1
+  %tmp22434 = getelementptr inbounds float* %tmp22433, i64 1
+  %tmp22435 = getelementptr inbounds float* %tmp22434, i64 1
+  %tmp22436 = getelementptr inbounds float* %tmp22435, i64 1
+  %tmp22437 = getelementptr inbounds float* %tmp22436, i64 1
+  %tmp22438 = getelementptr inbounds float* %tmp22437, i64 1
+  %tmp22439 = getelementptr inbounds float* %tmp22438, i64 1
+  %tmp22440 = getelementptr inbounds float* %tmp22439, i64 1
+  %tmp22441 = getelementptr inbounds float* %tmp22440, i64 1
+  %tmp22442 = getelementptr inbounds float* %tmp22441, i64 1
+  %tmp22443 = getelementptr inbounds float* %tmp22442, i64 1
+  %tmp22444 = getelementptr inbounds float* %tmp22443, i64 1
+  %tmp22445 = getelementptr inbounds float* %tmp22444, i64 1
+  %tmp22446 = getelementptr inbounds float* %tmp22445, i64 1
+  %tmp22447 = getelementptr inbounds float* %tmp22446, i64 1
+  %tmp22448 = getelementptr inbounds float* %tmp22447, i64 1
+  %tmp22449 = getelementptr inbounds float* %tmp22448, i64 1
+  %tmp22450 = getelementptr inbounds float* %tmp22449, i64 1
+  %tmp22451 = getelementptr inbounds float* %tmp22450, i64 1
+  %tmp22452 = getelementptr inbounds float* %tmp22451, i64 1
+  %tmp22453 = getelementptr inbounds float* %tmp22452, i64 1
+  %tmp22454 = getelementptr inbounds float* %tmp22453, i64 1
+  %tmp22455 = getelementptr inbounds float* %tmp22454, i64 1
+  %tmp22456 = getelementptr inbounds float* %tmp22455, i64 1
+  %tmp22457 = getelementptr inbounds float* %tmp22456, i64 1
+  %tmp22458 = getelementptr inbounds float* %tmp22457, i64 1
+  %tmp22459 = getelementptr inbounds float* %tmp22458, i64 1
+  %tmp22460 = getelementptr inbounds float* %tmp22459, i64 1
+  %tmp22461 = getelementptr inbounds float* %tmp22460, i64 1
+  %tmp22462 = getelementptr inbounds float* %tmp22461, i64 1
+  %tmp22463 = getelementptr inbounds float* %tmp22462, i64 1
+  %tmp22464 = getelementptr inbounds float* %tmp22463, i64 1
+  %tmp22465 = getelementptr inbounds float* %tmp22464, i64 1
+  %tmp22466 = getelementptr inbounds float* %tmp22465, i64 1
+  %tmp22467 = getelementptr inbounds float* %tmp22466, i64 1
+  %tmp22468 = getelementptr inbounds float* %tmp22467, i64 1
+  %tmp22469 = getelementptr inbounds float* %tmp22468, i64 1
+  %tmp22470 = getelementptr inbounds float* %tmp22469, i64 1
+  %tmp22471 = getelementptr inbounds float* %tmp22470, i64 1
+  %tmp22472 = getelementptr inbounds float* %tmp22471, i64 1
+  %tmp22473 = getelementptr inbounds float* %tmp22472, i64 1
+  %tmp22474 = getelementptr inbounds float* %tmp22473, i64 1
+  %tmp22475 = getelementptr inbounds float* %tmp22474, i64 1
+  %tmp22476 = getelementptr inbounds float* %tmp22475, i64 1
+  %tmp22477 = getelementptr inbounds float* %tmp22476, i64 1
+  %tmp22478 = getelementptr inbounds float* %tmp22477, i64 1
+  %tmp22479 = getelementptr inbounds float* %tmp22478, i64 1
+  %tmp22480 = getelementptr inbounds float* %tmp22479, i64 1
+  %tmp22481 = getelementptr inbounds float* %tmp22480, i64 1
+  %tmp22482 = getelementptr inbounds float* %tmp22481, i64 1
+  %tmp22483 = getelementptr inbounds float* %tmp22482, i64 1
+  %tmp22484 = getelementptr inbounds float* %tmp22483, i64 1
+  %tmp22485 = getelementptr inbounds float* %tmp22484, i64 1
+  %tmp22486 = getelementptr inbounds float* %tmp22485, i64 1
+  %tmp22487 = getelementptr inbounds float* %tmp22486, i64 1
+  %tmp22488 = getelementptr inbounds float* %tmp22487, i64 1
+  %tmp22489 = getelementptr inbounds float* %tmp22488, i64 1
+  %tmp22490 = getelementptr inbounds float* %tmp22489, i64 1
+  %tmp22491 = getelementptr inbounds float* %tmp22490, i64 1
+  %tmp22492 = getelementptr inbounds float* %tmp22491, i64 1
+  %tmp22493 = getelementptr inbounds float* %tmp22492, i64 1
+  %tmp22494 = getelementptr inbounds float* %tmp22493, i64 1
+  %tmp22495 = getelementptr inbounds float* %tmp22494, i64 1
+  %tmp22496 = getelementptr inbounds float* %tmp22495, i64 1
+  %tmp22497 = getelementptr inbounds float* %tmp22496, i64 1
+  %tmp22498 = getelementptr inbounds float* %tmp22497, i64 1
+  %tmp22499 = getelementptr inbounds float* %tmp22498, i64 1
+  %tmp22500 = getelementptr inbounds float* %tmp22499, i64 1
+  %tmp22501 = getelementptr inbounds float* %tmp22500, i64 1
+  %tmp22502 = getelementptr inbounds float* %tmp22501, i64 1
+  %tmp22503 = getelementptr inbounds float* %tmp22502, i64 1
+  %tmp22504 = getelementptr inbounds float* %tmp22503, i64 1
+  %tmp22505 = getelementptr inbounds float* %tmp22504, i64 1
+  %tmp22506 = getelementptr inbounds float* %tmp22505, i64 1
+  %tmp22507 = getelementptr inbounds float* %tmp22506, i64 1
+  %tmp22508 = getelementptr inbounds float* %tmp22507, i64 1
+  %tmp22509 = getelementptr inbounds float* %tmp22508, i64 1
+  %tmp22510 = getelementptr inbounds float* %tmp22509, i64 1
+  %tmp22511 = getelementptr inbounds float* %tmp22510, i64 1
+  %tmp22512 = getelementptr inbounds float* %tmp22511, i64 1
+  %tmp22513 = getelementptr inbounds float* %tmp22512, i64 1
+  %tmp22514 = getelementptr inbounds float* %tmp22513, i64 1
+  %tmp22515 = getelementptr inbounds float* %tmp22514, i64 1
+  %tmp22516 = getelementptr inbounds float* %tmp22515, i64 1
+  %tmp22517 = getelementptr inbounds float* %tmp22516, i64 1
+  %tmp22518 = getelementptr inbounds float* %tmp22517, i64 1
+  %tmp22519 = getelementptr inbounds float* %tmp22518, i64 1
+  %tmp22520 = getelementptr inbounds float* %tmp22519, i64 1
+  %tmp22521 = getelementptr inbounds float* %tmp22520, i64 1
+  %tmp22522 = getelementptr inbounds float* %tmp22521, i64 1
+  %tmp22523 = getelementptr inbounds float* %tmp22522, i64 1
+  %tmp22524 = getelementptr inbounds float* %tmp22523, i64 1
+  %tmp22525 = getelementptr inbounds float* %tmp22524, i64 1
+  %tmp22526 = getelementptr inbounds float* %tmp22525, i64 1
+  %tmp22527 = getelementptr inbounds float* %tmp22526, i64 1
+  %tmp22528 = getelementptr inbounds float* %tmp22527, i64 1
+  %tmp22529 = getelementptr inbounds float* %tmp22528, i64 1
+  %tmp22530 = getelementptr inbounds float* %tmp22529, i64 1
+  %tmp22531 = getelementptr inbounds float* %tmp22530, i64 1
+  %tmp22532 = getelementptr inbounds float* %tmp22531, i64 1
+  %tmp22533 = getelementptr inbounds float* %tmp22532, i64 1
+  %tmp22534 = getelementptr inbounds float* %tmp22533, i64 1
+  %tmp22535 = getelementptr inbounds float* %tmp22534, i64 1
+  %tmp22536 = getelementptr inbounds float* %tmp22535, i64 1
+  %tmp22537 = getelementptr inbounds float* %tmp22536, i64 1
+  %tmp22538 = getelementptr inbounds float* %tmp22537, i64 1
+  %tmp22539 = getelementptr inbounds float* %tmp22538, i64 1
+  %tmp22540 = getelementptr inbounds float* %tmp22539, i64 1
+  %tmp22541 = getelementptr inbounds float* %tmp22540, i64 1
+  %tmp22542 = getelementptr inbounds float* %tmp22541, i64 1
+  %tmp22543 = getelementptr inbounds float* %tmp22542, i64 1
+  %tmp22544 = getelementptr inbounds float* %tmp22543, i64 1
+  %tmp22545 = getelementptr inbounds float* %tmp22544, i64 1
+  %tmp22546 = getelementptr inbounds float* %tmp22545, i64 1
+  %tmp22547 = getelementptr inbounds float* %tmp22546, i64 1
+  %tmp22548 = getelementptr inbounds float* %tmp22547, i64 1
+  %tmp22549 = getelementptr inbounds float* %tmp22548, i64 1
+  %tmp22550 = getelementptr inbounds float* %tmp22549, i64 1
+  %tmp22551 = getelementptr inbounds float* %tmp22550, i64 1
+  %tmp22552 = getelementptr inbounds float* %tmp22551, i64 1
+  %tmp22553 = getelementptr inbounds float* %tmp22552, i64 1
+  %tmp22554 = getelementptr inbounds float* %tmp22553, i64 1
+  %tmp22555 = getelementptr inbounds float* %tmp22554, i64 1
+  %tmp22556 = getelementptr inbounds float* %tmp22555, i64 1
+  %tmp22557 = getelementptr inbounds float* %tmp22556, i64 1
+  %tmp22558 = getelementptr inbounds float* %tmp22557, i64 1
+  %tmp22559 = getelementptr inbounds float* %tmp22558, i64 1
+  %tmp22560 = getelementptr inbounds float* %tmp22559, i64 1
+  %tmp22561 = getelementptr inbounds float* %tmp22560, i64 1
+  %tmp22562 = getelementptr inbounds float* %tmp22561, i64 1
+  %tmp22563 = getelementptr inbounds float* %tmp22562, i64 1
+  %tmp22564 = getelementptr inbounds float* %tmp22563, i64 1
+  %tmp22565 = getelementptr inbounds float* %tmp22564, i64 1
+  %tmp22566 = getelementptr inbounds float* %tmp22565, i64 1
+  %tmp22567 = getelementptr inbounds float* %tmp22566, i64 1
+  %tmp22568 = getelementptr inbounds float* %tmp22567, i64 1
+  %tmp22569 = getelementptr inbounds float* %tmp22568, i64 1
+  %tmp22570 = getelementptr inbounds float* %tmp22569, i64 1
+  %tmp22571 = getelementptr inbounds float* %tmp22570, i64 1
+  %tmp22572 = getelementptr inbounds float* %tmp22571, i64 1
+  %tmp22573 = getelementptr inbounds float* %tmp22572, i64 1
+  %tmp22574 = getelementptr inbounds float* %tmp22573, i64 1
+  %tmp22575 = getelementptr inbounds float* %tmp22574, i64 1
+  %tmp22576 = getelementptr inbounds float* %tmp22575, i64 1
+  %tmp22577 = getelementptr inbounds float* %tmp22576, i64 1
+  %tmp22578 = getelementptr inbounds float* %tmp22577, i64 1
+  %tmp22579 = getelementptr inbounds float* %tmp22578, i64 1
+  %tmp22580 = getelementptr inbounds float* %tmp22579, i64 1
+  %tmp22581 = getelementptr inbounds float* %tmp22580, i64 1
+  %tmp22582 = getelementptr inbounds float* %tmp22581, i64 1
+  %tmp22583 = getelementptr inbounds float* %tmp22582, i64 1
+  %tmp22584 = getelementptr inbounds float* %tmp22583, i64 1
+  %tmp22585 = getelementptr inbounds float* %tmp22584, i64 1
+  %tmp22586 = getelementptr inbounds float* %tmp22585, i64 1
+  %tmp22587 = getelementptr inbounds float* %tmp22586, i64 1
+  %tmp22588 = getelementptr inbounds float* %tmp22587, i64 1
+  %tmp22589 = getelementptr inbounds float* %tmp22588, i64 1
+  %tmp22590 = getelementptr inbounds float* %tmp22589, i64 1
+  %tmp22591 = getelementptr inbounds float* %tmp22590, i64 1
+  %tmp22592 = getelementptr inbounds float* %tmp22591, i64 1
+  %tmp22593 = getelementptr inbounds float* %tmp22592, i64 1
+  %tmp22594 = getelementptr inbounds float* %tmp22593, i64 1
+  %tmp22595 = getelementptr inbounds float* %tmp22594, i64 1
+  %tmp22596 = getelementptr inbounds float* %tmp22595, i64 1
+  %tmp22597 = getelementptr inbounds float* %tmp22596, i64 1
+  %tmp22598 = getelementptr inbounds float* %tmp22597, i64 1
+  %tmp22599 = getelementptr inbounds float* %tmp22598, i64 1
+  %tmp22600 = getelementptr inbounds float* %tmp22599, i64 1
+  %tmp22601 = getelementptr inbounds float* %tmp22600, i64 1
+  %tmp22602 = getelementptr inbounds float* %tmp22601, i64 1
+  %tmp22603 = getelementptr inbounds float* %tmp22602, i64 1
+  %tmp22604 = getelementptr inbounds float* %tmp22603, i64 1
+  %tmp22605 = getelementptr inbounds float* %tmp22604, i64 1
+  %tmp22606 = getelementptr inbounds float* %tmp22605, i64 1
+  %tmp22607 = getelementptr inbounds float* %tmp22606, i64 1
+  %tmp22608 = getelementptr inbounds float* %tmp22607, i64 1
+  %tmp22609 = getelementptr inbounds float* %tmp22608, i64 1
+  %tmp22610 = getelementptr inbounds float* %tmp22609, i64 1
+  %tmp22611 = getelementptr inbounds float* %tmp22610, i64 1
+  %tmp22612 = getelementptr inbounds float* %tmp22611, i64 1
+  %tmp22613 = getelementptr inbounds float* %tmp22612, i64 1
+  %tmp22614 = getelementptr inbounds float* %tmp22613, i64 1
+  %tmp22615 = getelementptr inbounds float* %tmp22614, i64 1
+  %tmp22616 = getelementptr inbounds float* %tmp22615, i64 1
+  %tmp22617 = getelementptr inbounds float* %tmp22616, i64 1
+  %tmp22618 = getelementptr inbounds float* %tmp22617, i64 1
+  %tmp22619 = getelementptr inbounds float* %tmp22618, i64 1
+  %tmp22620 = getelementptr inbounds float* %tmp22619, i64 1
+  %tmp22621 = getelementptr inbounds float* %tmp22620, i64 1
+  %tmp22622 = getelementptr inbounds float* %tmp22621, i64 1
+  %tmp22623 = getelementptr inbounds float* %tmp22622, i64 1
+  %tmp22624 = getelementptr inbounds float* %tmp22623, i64 1
+  %tmp22625 = getelementptr inbounds float* %tmp22624, i64 1
+  %tmp22626 = getelementptr inbounds float* %tmp22625, i64 1
+  %tmp22627 = getelementptr inbounds float* %tmp22626, i64 1
+  %tmp22628 = getelementptr inbounds float* %tmp22627, i64 1
+  %tmp22629 = getelementptr inbounds float* %tmp22628, i64 1
+  %tmp22630 = getelementptr inbounds float* %tmp22629, i64 1
+  %tmp22631 = getelementptr inbounds float* %tmp22630, i64 1
+  %tmp22632 = getelementptr inbounds float* %tmp22631, i64 1
+  %tmp22633 = getelementptr inbounds float* %tmp22632, i64 1
+  %tmp22634 = getelementptr inbounds float* %tmp22633, i64 1
+  %tmp22635 = getelementptr inbounds float* %tmp22634, i64 1
+  %tmp22636 = getelementptr inbounds float* %tmp22635, i64 1
+  %tmp22637 = getelementptr inbounds float* %tmp22636, i64 1
+  %tmp22638 = getelementptr inbounds float* %tmp22637, i64 1
+  %tmp22639 = getelementptr inbounds float* %tmp22638, i64 1
+  %tmp22640 = getelementptr inbounds float* %tmp22639, i64 1
+  %tmp22641 = getelementptr inbounds float* %tmp22640, i64 1
+  %tmp22642 = getelementptr inbounds float* %tmp22641, i64 1
+  %tmp22643 = getelementptr inbounds float* %tmp22642, i64 1
+  %tmp22644 = getelementptr inbounds float* %tmp22643, i64 1
+  %tmp22645 = getelementptr inbounds float* %tmp22644, i64 1
+  %tmp22646 = getelementptr inbounds float* %tmp22645, i64 1
+  %tmp22647 = getelementptr inbounds float* %tmp22646, i64 1
+  %tmp22648 = getelementptr inbounds float* %tmp22647, i64 1
+  %tmp22649 = getelementptr inbounds float* %tmp22648, i64 1
+  %tmp22650 = getelementptr inbounds float* %tmp22649, i64 1
+  %tmp22651 = getelementptr inbounds float* %tmp22650, i64 1
+  %tmp22652 = getelementptr inbounds float* %tmp22651, i64 1
+  %tmp22653 = getelementptr inbounds float* %tmp22652, i64 1
+  %tmp22654 = getelementptr inbounds float* %tmp22653, i64 1
+  %tmp22655 = getelementptr inbounds float* %tmp22654, i64 1
+  %tmp22656 = getelementptr inbounds float* %tmp22655, i64 1
+  %tmp22657 = getelementptr inbounds float* %tmp22656, i64 1
+  %tmp22658 = getelementptr inbounds float* %tmp22657, i64 1
+  %tmp22659 = getelementptr inbounds float* %tmp22658, i64 1
+  %tmp22660 = getelementptr inbounds float* %tmp22659, i64 1
+  %tmp22661 = getelementptr inbounds float* %tmp22660, i64 1
+  %tmp22662 = getelementptr inbounds float* %tmp22661, i64 1
+  %tmp22663 = getelementptr inbounds float* %tmp22662, i64 1
+  %tmp22664 = getelementptr inbounds float* %tmp22663, i64 1
+  %tmp22665 = getelementptr inbounds float* %tmp22664, i64 1
+  %tmp22666 = getelementptr inbounds float* %tmp22665, i64 1
+  %tmp22667 = getelementptr inbounds float* %tmp22666, i64 1
+  %tmp22668 = getelementptr inbounds float* %tmp22667, i64 1
+  %tmp22669 = getelementptr inbounds float* %tmp22668, i64 1
+  %tmp22670 = getelementptr inbounds float* %tmp22669, i64 1
+  %tmp22671 = getelementptr inbounds float* %tmp22670, i64 1
+  %tmp22672 = getelementptr inbounds float* %tmp22671, i64 1
+  %tmp22673 = getelementptr inbounds float* %tmp22672, i64 1
+  %tmp22674 = getelementptr inbounds float* %tmp22673, i64 1
+  %tmp22675 = getelementptr inbounds float* %tmp22674, i64 1
+  %tmp22676 = getelementptr inbounds float* %tmp22675, i64 1
+  %tmp22677 = getelementptr inbounds float* %tmp22676, i64 1
+  %tmp22678 = getelementptr inbounds float* %tmp22677, i64 1
+  %tmp22679 = getelementptr inbounds float* %tmp22678, i64 1
+  %tmp22680 = getelementptr inbounds float* %tmp22679, i64 1
+  %tmp22681 = getelementptr inbounds float* %tmp22680, i64 1
+  %tmp22682 = getelementptr inbounds float* %tmp22681, i64 1
+  %tmp22683 = getelementptr inbounds float* %tmp22682, i64 1
+  %tmp22684 = getelementptr inbounds float* %tmp22683, i64 1
+  %tmp22685 = getelementptr inbounds float* %tmp22684, i64 1
+  %tmp22686 = getelementptr inbounds float* %tmp22685, i64 1
+  %tmp22687 = getelementptr inbounds float* %tmp22686, i64 1
+  %tmp22688 = getelementptr inbounds float* %tmp22687, i64 1
+  %tmp22689 = getelementptr inbounds float* %tmp22688, i64 1
+  %tmp22690 = getelementptr inbounds float* %tmp22689, i64 1
+  %tmp22691 = getelementptr inbounds float* %tmp22690, i64 1
+  %tmp22692 = getelementptr inbounds float* %tmp22691, i64 1
+  %tmp22693 = getelementptr inbounds float* %tmp22692, i64 1
+  %tmp22694 = getelementptr inbounds float* %tmp22693, i64 1
+  %tmp22695 = getelementptr inbounds float* %tmp22694, i64 1
+  %tmp22696 = getelementptr inbounds float* %tmp22695, i64 1
+  %tmp22697 = getelementptr inbounds float* %tmp22696, i64 1
+  %tmp22698 = getelementptr inbounds float* %tmp22697, i64 1
+  %tmp22699 = getelementptr inbounds float* %tmp22698, i64 1
+  %tmp22700 = getelementptr inbounds float* %tmp22699, i64 1
+  %tmp22701 = getelementptr inbounds float* %tmp22700, i64 1
+  %tmp22702 = getelementptr inbounds float* %tmp22701, i64 1
+  %tmp22703 = getelementptr inbounds float* %tmp22702, i64 1
+  %tmp22704 = getelementptr inbounds float* %tmp22703, i64 1
+  %tmp22705 = getelementptr inbounds float* %tmp22704, i64 1
+  %tmp22706 = getelementptr inbounds float* %tmp22705, i64 1
+  %tmp22707 = getelementptr inbounds float* %tmp22706, i64 1
+  %tmp22708 = getelementptr inbounds float* %tmp22707, i64 1
+  %tmp22709 = getelementptr inbounds float* %tmp22708, i64 1
+  %tmp22710 = getelementptr inbounds float* %tmp22709, i64 1
+  %tmp22711 = getelementptr inbounds float* %tmp22710, i64 1
+  %tmp22712 = getelementptr inbounds float* %tmp22711, i64 1
+  %tmp22713 = getelementptr inbounds float* %tmp22712, i64 1
+  %tmp22714 = getelementptr inbounds float* %tmp22713, i64 1
+  %tmp22715 = getelementptr inbounds float* %tmp22714, i64 1
+  %tmp22716 = getelementptr inbounds float* %tmp22715, i64 1
+  %tmp22717 = getelementptr inbounds float* %tmp22716, i64 1
+  %tmp22718 = getelementptr inbounds float* %tmp22717, i64 1
+  %tmp22719 = getelementptr inbounds float* %tmp22718, i64 1
+  %tmp22720 = getelementptr inbounds float* %tmp22719, i64 1
+  %tmp22721 = getelementptr inbounds float* %tmp22720, i64 1
+  %tmp22722 = getelementptr inbounds float* %tmp22721, i64 1
+  %tmp22723 = getelementptr inbounds float* %tmp22722, i64 1
+  %tmp22724 = getelementptr inbounds float* %tmp22723, i64 1
+  %tmp22725 = getelementptr inbounds float* %tmp22724, i64 1
+  %tmp22726 = getelementptr inbounds float* %tmp22725, i64 1
+  %tmp22727 = getelementptr inbounds float* %tmp22726, i64 1
+  %tmp22728 = getelementptr inbounds float* %tmp22727, i64 1
+  %tmp22729 = getelementptr inbounds float* %tmp22728, i64 1
+  %tmp22730 = getelementptr inbounds float* %tmp22729, i64 1
+  %tmp22731 = getelementptr inbounds float* %tmp22730, i64 1
+  %tmp22732 = getelementptr inbounds float* %tmp22731, i64 1
+  %tmp22733 = getelementptr inbounds float* %tmp22732, i64 1
+  %tmp22734 = getelementptr inbounds float* %tmp22733, i64 1
+  %tmp22735 = getelementptr inbounds float* %tmp22734, i64 1
+  %tmp22736 = getelementptr inbounds float* %tmp22735, i64 1
+  %tmp22737 = getelementptr inbounds float* %tmp22736, i64 1
+  %tmp22738 = getelementptr inbounds float* %tmp22737, i64 1
+  %tmp22739 = getelementptr inbounds float* %tmp22738, i64 1
+  %tmp22740 = getelementptr inbounds float* %tmp22739, i64 1
+  %tmp22741 = getelementptr inbounds float* %tmp22740, i64 1
+  %tmp22742 = getelementptr inbounds float* %tmp22741, i64 1
+  %tmp22743 = getelementptr inbounds float* %tmp22742, i64 1
+  %tmp22744 = getelementptr inbounds float* %tmp22743, i64 1
+  %tmp22745 = getelementptr inbounds float* %tmp22744, i64 1
+  %tmp22746 = getelementptr inbounds float* %tmp22745, i64 1
+  %tmp22747 = getelementptr inbounds float* %tmp22746, i64 1
+  %tmp22748 = getelementptr inbounds float* %tmp22747, i64 1
+  %tmp22749 = getelementptr inbounds float* %tmp22748, i64 1
+  %tmp22750 = getelementptr inbounds float* %tmp22749, i64 1
+  %tmp22751 = getelementptr inbounds float* %tmp22750, i64 1
+  %tmp22752 = getelementptr inbounds float* %tmp22751, i64 1
+  %tmp22753 = getelementptr inbounds float* %tmp22752, i64 1
+  %tmp22754 = getelementptr inbounds float* %tmp22753, i64 1
+  %tmp22755 = getelementptr inbounds float* %tmp22754, i64 1
+  %tmp22756 = getelementptr inbounds float* %tmp22755, i64 1
+  %tmp22757 = getelementptr inbounds float* %tmp22756, i64 1
+  %tmp22758 = getelementptr inbounds float* %tmp22757, i64 1
+  %tmp22759 = getelementptr inbounds float* %tmp22758, i64 1
+  %tmp22760 = getelementptr inbounds float* %tmp22759, i64 1
+  %tmp22761 = getelementptr inbounds float* %tmp22760, i64 1
+  %tmp22762 = getelementptr inbounds float* %tmp22761, i64 1
+  %tmp22763 = getelementptr inbounds float* %tmp22762, i64 1
+  %tmp22764 = getelementptr inbounds float* %tmp22763, i64 1
+  %tmp22765 = getelementptr inbounds float* %tmp22764, i64 1
+  %tmp22766 = getelementptr inbounds float* %tmp22765, i64 1
+  %tmp22767 = getelementptr inbounds float* %tmp22766, i64 1
+  %tmp22768 = getelementptr inbounds float* %tmp22767, i64 1
+  %tmp22769 = getelementptr inbounds float* %tmp22768, i64 1
+  %tmp22770 = getelementptr inbounds float* %tmp22769, i64 1
+  %tmp22771 = getelementptr inbounds float* %tmp22770, i64 1
+  %tmp22772 = getelementptr inbounds float* %tmp22771, i64 1
+  %tmp22773 = getelementptr inbounds float* %tmp22772, i64 1
+  %tmp22774 = getelementptr inbounds float* %tmp22773, i64 1
+  %tmp22775 = getelementptr inbounds float* %tmp22774, i64 1
+  %tmp22776 = getelementptr inbounds float* %tmp22775, i64 1
+  %tmp22777 = getelementptr inbounds float* %tmp22776, i64 1
+  %tmp22778 = getelementptr inbounds float* %tmp22777, i64 1
+  %tmp22779 = getelementptr inbounds float* %tmp22778, i64 1
+  %tmp22780 = getelementptr inbounds float* %tmp22779, i64 1
+  %tmp22781 = getelementptr inbounds float* %tmp22780, i64 1
+  %tmp22782 = getelementptr inbounds float* %tmp22781, i64 1
+  %tmp22783 = getelementptr inbounds float* %tmp22782, i64 1
+  %tmp22784 = getelementptr inbounds float* %tmp22783, i64 1
+  %tmp22785 = getelementptr inbounds float* %tmp22784, i64 1
+  %tmp22786 = getelementptr inbounds float* %tmp22785, i64 1
+  %tmp22787 = getelementptr inbounds float* %tmp22786, i64 1
+  %tmp22788 = getelementptr inbounds float* %tmp22787, i64 1
+  %tmp22789 = getelementptr inbounds float* %tmp22788, i64 1
+  %tmp22790 = getelementptr inbounds float* %tmp22789, i64 1
+  %tmp22791 = getelementptr inbounds float* %tmp22790, i64 1
+  %tmp22792 = getelementptr inbounds float* %tmp22791, i64 1
+  %tmp22793 = getelementptr inbounds float* %tmp22792, i64 1
+  %tmp22794 = getelementptr inbounds float* %tmp22793, i64 1
+  %tmp22795 = getelementptr inbounds float* %tmp22794, i64 1
+  %tmp22796 = getelementptr inbounds float* %tmp22795, i64 1
+  %tmp22797 = getelementptr inbounds float* %tmp22796, i64 1
+  %tmp22798 = getelementptr inbounds float* %tmp22797, i64 1
+  %tmp22799 = getelementptr inbounds float* %tmp22798, i64 1
+  %tmp22800 = getelementptr inbounds float* %tmp22799, i64 1
+  %tmp22801 = getelementptr inbounds float* %tmp22800, i64 1
+  %tmp22802 = getelementptr inbounds float* %tmp22801, i64 1
+  %tmp22803 = getelementptr inbounds float* %tmp22802, i64 1
+  %tmp22804 = getelementptr inbounds float* %tmp22803, i64 1
+  %tmp22805 = getelementptr inbounds float* %tmp22804, i64 1
+  %tmp22806 = getelementptr inbounds float* %tmp22805, i64 1
+  %tmp22807 = getelementptr inbounds float* %tmp22806, i64 1
+  %tmp22808 = getelementptr inbounds float* %tmp22807, i64 1
+  %tmp22809 = getelementptr inbounds float* %tmp22808, i64 1
+  %tmp22810 = getelementptr inbounds float* %tmp22809, i64 1
+  %tmp22811 = getelementptr inbounds float* %tmp22810, i64 1
+  %tmp22812 = getelementptr inbounds float* %tmp22811, i64 1
+  %tmp22813 = getelementptr inbounds float* %tmp22812, i64 1
+  %tmp22814 = getelementptr inbounds float* %tmp22813, i64 1
+  %tmp22815 = getelementptr inbounds float* %tmp22814, i64 1
+  %tmp22816 = getelementptr inbounds float* %tmp22815, i64 1
+  %tmp22817 = getelementptr inbounds float* %tmp22816, i64 1
+  %tmp22818 = getelementptr inbounds float* %tmp22817, i64 1
+  %tmp22819 = getelementptr inbounds float* %tmp22818, i64 1
+  %tmp22820 = getelementptr inbounds float* %tmp22819, i64 1
+  %tmp22821 = getelementptr inbounds float* %tmp22820, i64 1
+  %tmp22822 = getelementptr inbounds float* %tmp22821, i64 1
+  %tmp22823 = getelementptr inbounds float* %tmp22822, i64 1
+  %tmp22824 = getelementptr inbounds float* %tmp22823, i64 1
+  %tmp22825 = getelementptr inbounds float* %tmp22824, i64 1
+  %tmp22826 = getelementptr inbounds float* %tmp22825, i64 1
+  %tmp22827 = getelementptr inbounds float* %tmp22826, i64 1
+  %tmp22828 = getelementptr inbounds float* %tmp22827, i64 1
+  %tmp22829 = getelementptr inbounds float* %tmp22828, i64 1
+  %tmp22830 = getelementptr inbounds float* %tmp22829, i64 1
+  %tmp22831 = getelementptr inbounds float* %tmp22830, i64 1
+  %tmp22832 = getelementptr inbounds float* %tmp22831, i64 1
+  %tmp22833 = getelementptr inbounds float* %tmp22832, i64 1
+  %tmp22834 = getelementptr inbounds float* %tmp22833, i64 1
+  %tmp22835 = getelementptr inbounds float* %tmp22834, i64 1
+  %tmp22836 = getelementptr inbounds float* %tmp22835, i64 1
+  %tmp22837 = getelementptr inbounds float* %tmp22836, i64 1
+  %tmp22838 = getelementptr inbounds float* %tmp22837, i64 1
+  %tmp22839 = getelementptr inbounds float* %tmp22838, i64 1
+  %tmp22840 = getelementptr inbounds float* %tmp22839, i64 1
+  %tmp22841 = getelementptr inbounds float* %tmp22840, i64 1
+  %tmp22842 = getelementptr inbounds float* %tmp22841, i64 1
+  %tmp22843 = getelementptr inbounds float* %tmp22842, i64 1
+  %tmp22844 = getelementptr inbounds float* %tmp22843, i64 1
+  %tmp22845 = getelementptr inbounds float* %tmp22844, i64 1
+  %tmp22846 = getelementptr inbounds float* %tmp22845, i64 1
+  %tmp22847 = getelementptr inbounds float* %tmp22846, i64 1
+  %tmp22848 = getelementptr inbounds float* %tmp22847, i64 1
+  %tmp22849 = getelementptr inbounds float* %tmp22848, i64 1
+  %tmp22850 = getelementptr inbounds float* %tmp22849, i64 1
+  %tmp22851 = getelementptr inbounds float* %tmp22850, i64 1
+  %tmp22852 = getelementptr inbounds float* %tmp22851, i64 1
+  %tmp22853 = getelementptr inbounds float* %tmp22852, i64 1
+  %tmp22854 = getelementptr inbounds float* %tmp22853, i64 1
+  %tmp22855 = getelementptr inbounds float* %tmp22854, i64 1
+  %tmp22856 = getelementptr inbounds float* %tmp22855, i64 1
+  %tmp22857 = getelementptr inbounds float* %tmp22856, i64 1
+  %tmp22858 = getelementptr inbounds float* %tmp22857, i64 1
+  %tmp22859 = getelementptr inbounds float* %tmp22858, i64 1
+  %tmp22860 = getelementptr inbounds float* %tmp22859, i64 1
+  %tmp22861 = getelementptr inbounds float* %tmp22860, i64 1
+  %tmp22862 = getelementptr inbounds float* %tmp22861, i64 1
+  %tmp22863 = getelementptr inbounds float* %tmp22862, i64 1
+  %tmp22864 = getelementptr inbounds float* %tmp22863, i64 1
+  %tmp22865 = getelementptr inbounds float* %tmp22864, i64 1
+  %tmp22866 = getelementptr inbounds float* %tmp22865, i64 1
+  %tmp22867 = getelementptr inbounds float* %tmp22866, i64 1
+  %tmp22868 = getelementptr inbounds float* %tmp22867, i64 1
+  %tmp22869 = getelementptr inbounds float* %tmp22868, i64 1
+  %tmp22870 = getelementptr inbounds float* %tmp22869, i64 1
+  %tmp22871 = getelementptr inbounds float* %tmp22870, i64 1
+  %tmp22872 = getelementptr inbounds float* %tmp22871, i64 1
+  %tmp22873 = getelementptr inbounds float* %tmp22872, i64 1
+  %tmp22874 = getelementptr inbounds float* %tmp22873, i64 1
+  %tmp22875 = getelementptr inbounds float* %tmp22874, i64 1
+  %tmp22876 = getelementptr inbounds float* %tmp22875, i64 1
+  %tmp22877 = getelementptr inbounds float* %tmp22876, i64 1
+  %tmp22878 = getelementptr inbounds float* %tmp22877, i64 1
+  %tmp22879 = getelementptr inbounds float* %tmp22878, i64 1
+  %tmp22880 = getelementptr inbounds float* %tmp22879, i64 1
+  %tmp22881 = getelementptr inbounds float* %tmp22880, i64 1
+  %tmp22882 = getelementptr inbounds float* %tmp22881, i64 1
+  %tmp22883 = getelementptr inbounds float* %tmp22882, i64 1
+  %tmp22884 = getelementptr inbounds float* %tmp22883, i64 1
+  %tmp22885 = getelementptr inbounds float* %tmp22884, i64 1
+  %tmp22886 = getelementptr inbounds float* %tmp22885, i64 1
+  %tmp22887 = getelementptr inbounds float* %tmp22886, i64 1
+  %tmp22888 = getelementptr inbounds float* %tmp22887, i64 1
+  %tmp22889 = getelementptr inbounds float* %tmp22888, i64 1
+  %tmp22890 = getelementptr inbounds float* %tmp22889, i64 1
+  %tmp22891 = getelementptr inbounds float* %tmp22890, i64 1
+  %tmp22892 = getelementptr inbounds float* %tmp22891, i64 1
+  %tmp22893 = getelementptr inbounds float* %tmp22892, i64 1
+  %tmp22894 = getelementptr inbounds float* %tmp22893, i64 1
+  %tmp22895 = getelementptr inbounds float* %tmp22894, i64 1
+  %tmp22896 = getelementptr inbounds float* %tmp22895, i64 1
+  %tmp22897 = getelementptr inbounds float* %tmp22896, i64 1
+  %tmp22898 = getelementptr inbounds float* %tmp22897, i64 1
+  %tmp22899 = getelementptr inbounds float* %tmp22898, i64 1
+  %tmp22900 = getelementptr inbounds float* %tmp22899, i64 1
+  %tmp22901 = getelementptr inbounds float* %tmp22900, i64 1
+  %tmp22902 = getelementptr inbounds float* %tmp22901, i64 1
+  %tmp22903 = getelementptr inbounds float* %tmp22902, i64 1
+  %tmp22904 = getelementptr inbounds float* %tmp22903, i64 1
+  %tmp22905 = getelementptr inbounds float* %tmp22904, i64 1
+  %tmp22906 = getelementptr inbounds float* %tmp22905, i64 1
+  %tmp22907 = getelementptr inbounds float* %tmp22906, i64 1
+  %tmp22908 = getelementptr inbounds float* %tmp22907, i64 1
+  %tmp22909 = getelementptr inbounds float* %tmp22908, i64 1
+  %tmp22910 = getelementptr inbounds float* %tmp22909, i64 1
+  %tmp22911 = getelementptr inbounds float* %tmp22910, i64 1
+  %tmp22912 = getelementptr inbounds float* %tmp22911, i64 1
+  %tmp22913 = getelementptr inbounds float* %tmp22912, i64 1
+  %tmp22914 = getelementptr inbounds float* %tmp22913, i64 1
+  %tmp22915 = getelementptr inbounds float* %tmp22914, i64 1
+  %tmp22916 = getelementptr inbounds float* %tmp22915, i64 1
+  %tmp22917 = getelementptr inbounds float* %tmp22916, i64 1
+  %tmp22918 = getelementptr inbounds float* %tmp22917, i64 1
+  %tmp22919 = getelementptr inbounds float* %tmp22918, i64 1
+  %tmp22920 = getelementptr inbounds float* %tmp22919, i64 1
+  %tmp22921 = getelementptr inbounds float* %tmp22920, i64 1
+  %tmp22922 = getelementptr inbounds float* %tmp22921, i64 1
+  %tmp22923 = getelementptr inbounds float* %tmp22922, i64 1
+  %tmp22924 = getelementptr inbounds float* %tmp22923, i64 1
+  %tmp22925 = getelementptr inbounds float* %tmp22924, i64 1
+  %tmp22926 = getelementptr inbounds float* %tmp22925, i64 1
+  %tmp22927 = getelementptr inbounds float* %tmp22926, i64 1
+  %tmp22928 = getelementptr inbounds float* %tmp22927, i64 1
+  %tmp22929 = getelementptr inbounds float* %tmp22928, i64 1
+  %tmp22930 = getelementptr inbounds float* %tmp22929, i64 1
+  %tmp22931 = getelementptr inbounds float* %tmp22930, i64 1
+  %tmp22932 = getelementptr inbounds float* %tmp22931, i64 1
+  %tmp22933 = getelementptr inbounds float* %tmp22932, i64 1
+  %tmp22934 = getelementptr inbounds float* %tmp22933, i64 1
+  %tmp22935 = getelementptr inbounds float* %tmp22934, i64 1
+  %tmp22936 = getelementptr inbounds float* %tmp22935, i64 1
+  %tmp22937 = getelementptr inbounds float* %tmp22936, i64 1
+  %tmp22938 = getelementptr inbounds float* %tmp22937, i64 1
+  %tmp22939 = getelementptr inbounds float* %tmp22938, i64 1
+  %tmp22940 = getelementptr inbounds float* %tmp22939, i64 1
+  %tmp22941 = getelementptr inbounds float* %tmp22940, i64 1
+  %tmp22942 = getelementptr inbounds float* %tmp22941, i64 1
+  %tmp22943 = getelementptr inbounds float* %tmp22942, i64 1
+  %tmp22944 = getelementptr inbounds float* %tmp22943, i64 1
+  %tmp22945 = getelementptr inbounds float* %tmp22944, i64 1
+  %tmp22946 = getelementptr inbounds float* %tmp22945, i64 1
+  %tmp22947 = getelementptr inbounds float* %tmp22946, i64 1
+  %tmp22948 = getelementptr inbounds float* %tmp22947, i64 1
+  %tmp22949 = getelementptr inbounds float* %tmp22948, i64 1
+  %tmp22950 = getelementptr inbounds float* %tmp22949, i64 1
+  %tmp22951 = getelementptr inbounds float* %tmp22950, i64 1
+  %tmp22952 = getelementptr inbounds float* %tmp22951, i64 1
+  %tmp22953 = getelementptr inbounds float* %tmp22952, i64 1
+  %tmp22954 = getelementptr inbounds float* %tmp22953, i64 1
+  %tmp22955 = getelementptr inbounds float* %tmp22954, i64 1
+  %tmp22956 = getelementptr inbounds float* %tmp22955, i64 1
+  %tmp22957 = getelementptr inbounds float* %tmp22956, i64 1
+  %tmp22958 = getelementptr inbounds float* %tmp22957, i64 1
+  %tmp22959 = getelementptr inbounds float* %tmp22958, i64 1
+  %tmp22960 = getelementptr inbounds float* %tmp22959, i64 1
+  %tmp22961 = getelementptr inbounds float* %tmp22960, i64 1
+  %tmp22962 = getelementptr inbounds float* %tmp22961, i64 1
+  %tmp22963 = getelementptr inbounds float* %tmp22962, i64 1
+  %tmp22964 = getelementptr inbounds float* %tmp22963, i64 1
+  %tmp22965 = getelementptr inbounds float* %tmp22964, i64 1
+  %tmp22966 = getelementptr inbounds float* %tmp22965, i64 1
+  %tmp22967 = getelementptr inbounds float* %tmp22966, i64 1
+  %tmp22968 = getelementptr inbounds float* %tmp22967, i64 1
+  %tmp22969 = getelementptr inbounds float* %tmp22968, i64 1
+  %tmp22970 = getelementptr inbounds float* %tmp22969, i64 1
+  %tmp22971 = getelementptr inbounds float* %tmp22970, i64 1
+  %tmp22972 = getelementptr inbounds float* %tmp22971, i64 1
+  %tmp22973 = getelementptr inbounds float* %tmp22972, i64 1
+  %tmp22974 = getelementptr inbounds float* %tmp22973, i64 1
+  %tmp22975 = getelementptr inbounds float* %tmp22974, i64 1
+  %tmp22976 = getelementptr inbounds float* %tmp22975, i64 1
+  %tmp22977 = getelementptr inbounds float* %tmp22976, i64 1
+  %tmp22978 = getelementptr inbounds float* %tmp22977, i64 1
+  %tmp22979 = getelementptr inbounds float* %tmp22978, i64 1
+  %tmp22980 = getelementptr inbounds float* %tmp22979, i64 1
+  %tmp22981 = getelementptr inbounds float* %tmp22980, i64 1
+  %tmp22982 = getelementptr inbounds float* %tmp22981, i64 1
+  %tmp22983 = getelementptr inbounds float* %tmp22982, i64 1
+  %tmp22984 = getelementptr inbounds float* %tmp22983, i64 1
+  %tmp22985 = getelementptr inbounds float* %tmp22984, i64 1
+  %tmp22986 = getelementptr inbounds float* %tmp22985, i64 1
+  %tmp22987 = getelementptr inbounds float* %tmp22986, i64 1
+  %tmp22988 = getelementptr inbounds float* %tmp22987, i64 1
+  %tmp22989 = getelementptr inbounds float* %tmp22988, i64 1
+  %tmp22990 = getelementptr inbounds float* %tmp22989, i64 1
+  %tmp22991 = getelementptr inbounds float* %tmp22990, i64 1
+  %tmp22992 = getelementptr inbounds float* %tmp22991, i64 1
+  %tmp22993 = getelementptr inbounds float* %tmp22992, i64 1
+  %tmp22994 = getelementptr inbounds float* %tmp22993, i64 1
+  %tmp22995 = getelementptr inbounds float* %tmp22994, i64 1
+  %tmp22996 = getelementptr inbounds float* %tmp22995, i64 1
+  %tmp22997 = getelementptr inbounds float* %tmp22996, i64 1
+  %tmp22998 = getelementptr inbounds float* %tmp22997, i64 1
+  %tmp22999 = getelementptr inbounds float* %tmp22998, i64 1
+  %tmp23000 = getelementptr inbounds float* %tmp22999, i64 1
+  %tmp23001 = getelementptr inbounds float* %tmp23000, i64 1
+  %tmp23002 = getelementptr inbounds float* %tmp23001, i64 1
+  %tmp23003 = getelementptr inbounds float* %tmp23002, i64 1
+  %tmp23004 = getelementptr inbounds float* %tmp23003, i64 1
+  %tmp23005 = getelementptr inbounds float* %tmp23004, i64 1
+  %tmp23006 = getelementptr inbounds float* %tmp23005, i64 1
+  %tmp23007 = getelementptr inbounds float* %tmp23006, i64 1
+  %tmp23008 = getelementptr inbounds float* %tmp23007, i64 1
+  %tmp23009 = getelementptr inbounds float* %tmp23008, i64 1
+  %tmp23010 = getelementptr inbounds float* %tmp23009, i64 1
+  %tmp23011 = getelementptr inbounds float* %tmp23010, i64 1
+  %tmp23012 = getelementptr inbounds float* %tmp23011, i64 1
+  %tmp23013 = getelementptr inbounds float* %tmp23012, i64 1
+  %tmp23014 = getelementptr inbounds float* %tmp23013, i64 1
+  %tmp23015 = getelementptr inbounds float* %tmp23014, i64 1
+  %tmp23016 = getelementptr inbounds float* %tmp23015, i64 1
+  %tmp23017 = getelementptr inbounds float* %tmp23016, i64 1
+  %tmp23018 = getelementptr inbounds float* %tmp23017, i64 1
+  %tmp23019 = getelementptr inbounds float* %tmp23018, i64 1
+  %tmp23020 = getelementptr inbounds float* %tmp23019, i64 1
+  %tmp23021 = getelementptr inbounds float* %tmp23020, i64 1
+  %tmp23022 = getelementptr inbounds float* %tmp23021, i64 1
+  %tmp23023 = getelementptr inbounds float* %tmp23022, i64 1
+  %tmp23024 = getelementptr inbounds float* %tmp23023, i64 1
+  %tmp23025 = getelementptr inbounds float* %tmp23024, i64 1
+  %tmp23026 = getelementptr inbounds float* %tmp23025, i64 1
+  %tmp23027 = getelementptr inbounds float* %tmp23026, i64 1
+  %tmp23028 = getelementptr inbounds float* %tmp23027, i64 1
+  %tmp23029 = getelementptr inbounds float* %tmp23028, i64 1
+  %tmp23030 = getelementptr inbounds float* %tmp23029, i64 1
+  %tmp23031 = getelementptr inbounds float* %tmp23030, i64 1
+  %tmp23032 = getelementptr inbounds float* %tmp23031, i64 1
+  %tmp23033 = getelementptr inbounds float* %tmp23032, i64 1
+  %tmp23034 = getelementptr inbounds float* %tmp23033, i64 1
+  %tmp23035 = getelementptr inbounds float* %tmp23034, i64 1
+  %tmp23036 = getelementptr inbounds float* %tmp23035, i64 1
+  %tmp23037 = getelementptr inbounds float* %tmp23036, i64 1
+  %tmp23038 = getelementptr inbounds float* %tmp23037, i64 1
+  %tmp23039 = getelementptr inbounds float* %tmp23038, i64 1
+  %tmp23040 = getelementptr inbounds float* %tmp23039, i64 1
+  %tmp23041 = getelementptr inbounds float* %tmp23040, i64 1
+  %tmp23042 = getelementptr inbounds float* %tmp23041, i64 1
+  %tmp23043 = getelementptr inbounds float* %tmp23042, i64 1
+  %tmp23044 = getelementptr inbounds float* %tmp23043, i64 1
+  %tmp23045 = getelementptr inbounds float* %tmp23044, i64 1
+  %tmp23046 = getelementptr inbounds float* %tmp23045, i64 1
+  %tmp23047 = getelementptr inbounds float* %tmp23046, i64 1
+  %tmp23048 = getelementptr inbounds float* %tmp23047, i64 1
+  %tmp23049 = getelementptr inbounds float* %tmp23048, i64 1
+  %tmp23050 = getelementptr inbounds float* %tmp23049, i64 1
+  %tmp23051 = getelementptr inbounds float* %tmp23050, i64 1
+  %tmp23052 = getelementptr inbounds float* %tmp23051, i64 1
+  %tmp23053 = getelementptr inbounds float* %tmp23052, i64 1
+  %tmp23054 = getelementptr inbounds float* %tmp23053, i64 1
+  %tmp23055 = getelementptr inbounds float* %tmp23054, i64 1
+  %tmp23056 = getelementptr inbounds float* %tmp23055, i64 1
+  %tmp23057 = getelementptr inbounds float* %tmp23056, i64 1
+  %tmp23058 = getelementptr inbounds float* %tmp23057, i64 1
+  %tmp23059 = getelementptr inbounds float* %tmp23058, i64 1
+  %tmp23060 = getelementptr inbounds float* %tmp23059, i64 1
+  %tmp23061 = getelementptr inbounds float* %tmp23060, i64 1
+  %tmp23062 = getelementptr inbounds float* %tmp23061, i64 1
+  %tmp23063 = getelementptr inbounds float* %tmp23062, i64 1
+  %tmp23064 = getelementptr inbounds float* %tmp23063, i64 1
+  %tmp23065 = getelementptr inbounds float* %tmp23064, i64 1
+  %tmp23066 = getelementptr inbounds float* %tmp23065, i64 1
+  %tmp23067 = getelementptr inbounds float* %tmp23066, i64 1
+  %tmp23068 = getelementptr inbounds float* %tmp23067, i64 1
+  %tmp23069 = getelementptr inbounds float* %tmp23068, i64 1
+  %tmp23070 = getelementptr inbounds float* %tmp23069, i64 1
+  %tmp23071 = getelementptr inbounds float* %tmp23070, i64 1
+  %tmp23072 = getelementptr inbounds float* %tmp23071, i64 1
+  %tmp23073 = getelementptr inbounds float* %tmp23072, i64 1
+  %tmp23074 = getelementptr inbounds float* %tmp23073, i64 1
+  %tmp23075 = getelementptr inbounds float* %tmp23074, i64 1
+  %tmp23076 = getelementptr inbounds float* %tmp23075, i64 1
+  %tmp23077 = getelementptr inbounds float* %tmp23076, i64 1
+  %tmp23078 = getelementptr inbounds float* %tmp23077, i64 1
+  %tmp23079 = getelementptr inbounds float* %tmp23078, i64 1
+  %tmp23080 = getelementptr inbounds float* %tmp23079, i64 1
+  %tmp23081 = getelementptr inbounds float* %tmp23080, i64 1
+  %tmp23082 = getelementptr inbounds float* %tmp23081, i64 1
+  %tmp23083 = getelementptr inbounds float* %tmp23082, i64 1
+  %tmp23084 = getelementptr inbounds float* %tmp23083, i64 1
+  %tmp23085 = getelementptr inbounds float* %tmp23084, i64 1
+  %tmp23086 = getelementptr inbounds float* %tmp23085, i64 1
+  %tmp23087 = getelementptr inbounds float* %tmp23086, i64 1
+  %tmp23088 = getelementptr inbounds float* %tmp23087, i64 1
+  %tmp23089 = getelementptr inbounds float* %tmp23088, i64 1
+  %tmp23090 = getelementptr inbounds float* %tmp23089, i64 1
+  %tmp23091 = getelementptr inbounds float* %tmp23090, i64 1
+  %tmp23092 = getelementptr inbounds float* %tmp23091, i64 1
+  %tmp23093 = getelementptr inbounds float* %tmp23092, i64 1
+  %tmp23094 = getelementptr inbounds float* %tmp23093, i64 1
+  %tmp23095 = getelementptr inbounds float* %tmp23094, i64 1
+  %tmp23096 = getelementptr inbounds float* %tmp23095, i64 1
+  %tmp23097 = getelementptr inbounds float* %tmp23096, i64 1
+  %tmp23098 = getelementptr inbounds float* %tmp23097, i64 1
+  %tmp23099 = getelementptr inbounds float* %tmp23098, i64 1
+  %tmp23100 = getelementptr inbounds float* %tmp23099, i64 1
+  %tmp23101 = getelementptr inbounds float* %tmp23100, i64 1
+  %tmp23102 = getelementptr inbounds float* %tmp23101, i64 1
+  %tmp23103 = getelementptr inbounds float* %tmp23102, i64 1
+  %tmp23104 = getelementptr inbounds float* %tmp23103, i64 1
+  %tmp23105 = getelementptr inbounds float* %tmp23104, i64 1
+  %tmp23106 = getelementptr inbounds float* %tmp23105, i64 1
+  %tmp23107 = getelementptr inbounds float* %tmp23106, i64 1
+  %tmp23108 = getelementptr inbounds float* %tmp23107, i64 1
+  %tmp23109 = getelementptr inbounds float* %tmp23108, i64 1
+  %tmp23110 = getelementptr inbounds float* %tmp23109, i64 1
+  %tmp23111 = getelementptr inbounds float* %tmp23110, i64 1
+  %tmp23112 = getelementptr inbounds float* %tmp23111, i64 1
+  %tmp23113 = getelementptr inbounds float* %tmp23112, i64 1
+  %tmp23114 = getelementptr inbounds float* %tmp23113, i64 1
+  %tmp23115 = getelementptr inbounds float* %tmp23114, i64 1
+  %tmp23116 = getelementptr inbounds float* %tmp23115, i64 1
+  %tmp23117 = getelementptr inbounds float* %tmp23116, i64 1
+  %tmp23118 = getelementptr inbounds float* %tmp23117, i64 1
+  %tmp23119 = getelementptr inbounds float* %tmp23118, i64 1
+  %tmp23120 = getelementptr inbounds float* %tmp23119, i64 1
+  %tmp23121 = getelementptr inbounds float* %tmp23120, i64 1
+  %tmp23122 = getelementptr inbounds float* %tmp23121, i64 1
+  %tmp23123 = getelementptr inbounds float* %tmp23122, i64 1
+  %tmp23124 = getelementptr inbounds float* %tmp23123, i64 1
+  %tmp23125 = getelementptr inbounds float* %tmp23124, i64 1
+  %tmp23126 = getelementptr inbounds float* %tmp23125, i64 1
+  %tmp23127 = getelementptr inbounds float* %tmp23126, i64 1
+  %tmp23128 = getelementptr inbounds float* %tmp23127, i64 1
+  %tmp23129 = getelementptr inbounds float* %tmp23128, i64 1
+  %tmp23130 = getelementptr inbounds float* %tmp23129, i64 1
+  %tmp23131 = getelementptr inbounds float* %tmp23130, i64 1
+  %tmp23132 = getelementptr inbounds float* %tmp23131, i64 1
+  %tmp23133 = getelementptr inbounds float* %tmp23132, i64 1
+  %tmp23134 = getelementptr inbounds float* %tmp23133, i64 1
+  %tmp23135 = getelementptr inbounds float* %tmp23134, i64 1
+  %tmp23136 = getelementptr inbounds float* %tmp23135, i64 1
+  %tmp23137 = getelementptr inbounds float* %tmp23136, i64 1
+  %tmp23138 = getelementptr inbounds float* %tmp23137, i64 1
+  %tmp23139 = getelementptr inbounds float* %tmp23138, i64 1
+  %tmp23140 = getelementptr inbounds float* %tmp23139, i64 1
+  %tmp23141 = getelementptr inbounds float* %tmp23140, i64 1
+  %tmp23142 = getelementptr inbounds float* %tmp23141, i64 1
+  %tmp23143 = getelementptr inbounds float* %tmp23142, i64 1
+  %tmp23144 = getelementptr inbounds float* %tmp23143, i64 1
+  %tmp23145 = getelementptr inbounds float* %tmp23144, i64 1
+  %tmp23146 = getelementptr inbounds float* %tmp23145, i64 1
+  %tmp23147 = getelementptr inbounds float* %tmp23146, i64 1
+  %tmp23148 = getelementptr inbounds float* %tmp23147, i64 1
+  %tmp23149 = getelementptr inbounds float* %tmp23148, i64 1
+  %tmp23150 = getelementptr inbounds float* %tmp23149, i64 1
+  %tmp23151 = getelementptr inbounds float* %tmp23150, i64 1
+  %tmp23152 = getelementptr inbounds float* %tmp23151, i64 1
+  %tmp23153 = getelementptr inbounds float* %tmp23152, i64 1
+  %tmp23154 = getelementptr inbounds float* %tmp23153, i64 1
+  %tmp23155 = getelementptr inbounds float* %tmp23154, i64 1
+  %tmp23156 = getelementptr inbounds float* %tmp23155, i64 1
+  %tmp23157 = getelementptr inbounds float* %tmp23156, i64 1
+  %tmp23158 = getelementptr inbounds float* %tmp23157, i64 1
+  %tmp23159 = getelementptr inbounds float* %tmp23158, i64 1
+  %tmp23160 = getelementptr inbounds float* %tmp23159, i64 1
+  %tmp23161 = getelementptr inbounds float* %tmp23160, i64 1
+  %tmp23162 = getelementptr inbounds float* %tmp23161, i64 1
+  %tmp23163 = getelementptr inbounds float* %tmp23162, i64 1
+  %tmp23164 = getelementptr inbounds float* %tmp23163, i64 1
+  %tmp23165 = getelementptr inbounds float* %tmp23164, i64 1
+  %tmp23166 = getelementptr inbounds float* %tmp23165, i64 1
+  %tmp23167 = getelementptr inbounds float* %tmp23166, i64 1
+  %tmp23168 = getelementptr inbounds float* %tmp23167, i64 1
+  %tmp23169 = getelementptr inbounds float* %tmp23168, i64 1
+  %tmp23170 = getelementptr inbounds float* %tmp23169, i64 1
+  %tmp23171 = getelementptr inbounds float* %tmp23170, i64 1
+  %tmp23172 = getelementptr inbounds float* %tmp23171, i64 1
+  %tmp23173 = getelementptr inbounds float* %tmp23172, i64 1
+  %tmp23174 = getelementptr inbounds float* %tmp23173, i64 1
+  %tmp23175 = getelementptr inbounds float* %tmp23174, i64 1
+  %tmp23176 = getelementptr inbounds float* %tmp23175, i64 1
+  %tmp23177 = getelementptr inbounds float* %tmp23176, i64 1
+  %tmp23178 = getelementptr inbounds float* %tmp23177, i64 1
+  %tmp23179 = getelementptr inbounds float* %tmp23178, i64 1
+  %tmp23180 = getelementptr inbounds float* %tmp23179, i64 1
+  %tmp23181 = getelementptr inbounds float* %tmp23180, i64 1
+  %tmp23182 = getelementptr inbounds float* %tmp23181, i64 1
+  %tmp23183 = getelementptr inbounds float* %tmp23182, i64 1
+  %tmp23184 = getelementptr inbounds float* %tmp23183, i64 1
+  %tmp23185 = getelementptr inbounds float* %tmp23184, i64 1
+  %tmp23186 = getelementptr inbounds float* %tmp23185, i64 1
+  %tmp23187 = getelementptr inbounds float* %tmp23186, i64 1
+  %tmp23188 = getelementptr inbounds float* %tmp23187, i64 1
+  %tmp23189 = getelementptr inbounds float* %tmp23188, i64 1
+  %tmp23190 = getelementptr inbounds float* %tmp23189, i64 1
+  %tmp23191 = getelementptr inbounds float* %tmp23190, i64 1
+  %tmp23192 = getelementptr inbounds float* %tmp23191, i64 1
+  %tmp23193 = getelementptr inbounds float* %tmp23192, i64 1
+  %tmp23194 = getelementptr inbounds float* %tmp23193, i64 1
+  %tmp23195 = getelementptr inbounds float* %tmp23194, i64 1
+  %tmp23196 = getelementptr inbounds float* %tmp23195, i64 1
+  %tmp23197 = getelementptr inbounds float* %tmp23196, i64 1
+  %tmp23198 = getelementptr inbounds float* %tmp23197, i64 1
+  %tmp23199 = getelementptr inbounds float* %tmp23198, i64 1
+  %tmp23200 = getelementptr inbounds float* %tmp23199, i64 1
+  %tmp23201 = getelementptr inbounds float* %tmp23200, i64 1
+  %tmp23202 = getelementptr inbounds float* %tmp23201, i64 1
+  %tmp23203 = getelementptr inbounds float* %tmp23202, i64 1
+  %tmp23204 = getelementptr inbounds float* %tmp23203, i64 1
+  %tmp23205 = getelementptr inbounds float* %tmp23204, i64 1
+  %tmp23206 = getelementptr inbounds float* %tmp23205, i64 1
+  %tmp23207 = getelementptr inbounds float* %tmp23206, i64 1
+  %tmp23208 = getelementptr inbounds float* %tmp23207, i64 1
+  %tmp23209 = getelementptr inbounds float* %tmp23208, i64 1
+  %tmp23210 = getelementptr inbounds float* %tmp23209, i64 1
+  %tmp23211 = getelementptr inbounds float* %tmp23210, i64 1
+  %tmp23212 = getelementptr inbounds float* %tmp23211, i64 1
+  %tmp23213 = getelementptr inbounds float* %tmp23212, i64 1
+  %tmp23214 = getelementptr inbounds float* %tmp23213, i64 1
+  %tmp23215 = getelementptr inbounds float* %tmp23214, i64 1
+  %tmp23216 = getelementptr inbounds float* %tmp23215, i64 1
+  %tmp23217 = getelementptr inbounds float* %tmp23216, i64 1
+  %tmp23218 = getelementptr inbounds float* %tmp23217, i64 1
+  %tmp23219 = getelementptr inbounds float* %tmp23218, i64 1
+  %tmp23220 = getelementptr inbounds float* %tmp23219, i64 1
+  %tmp23221 = getelementptr inbounds float* %tmp23220, i64 1
+  %tmp23222 = getelementptr inbounds float* %tmp23221, i64 1
+  %tmp23223 = getelementptr inbounds float* %tmp23222, i64 1
+  %tmp23224 = getelementptr inbounds float* %tmp23223, i64 1
+  %tmp23225 = getelementptr inbounds float* %tmp23224, i64 1
+  %tmp23226 = getelementptr inbounds float* %tmp23225, i64 1
+  %tmp23227 = getelementptr inbounds float* %tmp23226, i64 1
+  %tmp23228 = getelementptr inbounds float* %tmp23227, i64 1
+  %tmp23229 = getelementptr inbounds float* %tmp23228, i64 1
+  %tmp23230 = getelementptr inbounds float* %tmp23229, i64 1
+  %tmp23231 = getelementptr inbounds float* %tmp23230, i64 1
+  %tmp23232 = getelementptr inbounds float* %tmp23231, i64 1
+  %tmp23233 = getelementptr inbounds float* %tmp23232, i64 1
+  %tmp23234 = getelementptr inbounds float* %tmp23233, i64 1
+  %tmp23235 = getelementptr inbounds float* %tmp23234, i64 1
+  %tmp23236 = getelementptr inbounds float* %tmp23235, i64 1
+  %tmp23237 = getelementptr inbounds float* %tmp23236, i64 1
+  %tmp23238 = getelementptr inbounds float* %tmp23237, i64 1
+  %tmp23239 = getelementptr inbounds float* %tmp23238, i64 1
+  %tmp23240 = getelementptr inbounds float* %tmp23239, i64 1
+  %tmp23241 = getelementptr inbounds float* %tmp23240, i64 1
+  %tmp23242 = getelementptr inbounds float* %tmp23241, i64 1
+  %tmp23243 = getelementptr inbounds float* %tmp23242, i64 1
+  %tmp23244 = getelementptr inbounds float* %tmp23243, i64 1
+  %tmp23245 = getelementptr inbounds float* %tmp23244, i64 1
+  %tmp23246 = getelementptr inbounds float* %tmp23245, i64 1
+  %tmp23247 = getelementptr inbounds float* %tmp23246, i64 1
+  %tmp23248 = getelementptr inbounds float* %tmp23247, i64 1
+  %tmp23249 = getelementptr inbounds float* %tmp23248, i64 1
+  %tmp23250 = getelementptr inbounds float* %tmp23249, i64 1
+  %tmp23251 = getelementptr inbounds float* %tmp23250, i64 1
+  %tmp23252 = getelementptr inbounds float* %tmp23251, i64 1
+  %tmp23253 = getelementptr inbounds float* %tmp23252, i64 1
+  %tmp23254 = getelementptr inbounds float* %tmp23253, i64 1
+  %tmp23255 = getelementptr inbounds float* %tmp23254, i64 1
+  %tmp23256 = getelementptr inbounds float* %tmp23255, i64 1
+  %tmp23257 = getelementptr inbounds float* %tmp23256, i64 1
+  %tmp23258 = getelementptr inbounds float* %tmp23257, i64 1
+  %tmp23259 = getelementptr inbounds float* %tmp23258, i64 1
+  %tmp23260 = getelementptr inbounds float* %tmp23259, i64 1
+  %tmp23261 = getelementptr inbounds float* %tmp23260, i64 1
+  %tmp23262 = getelementptr inbounds float* %tmp23261, i64 1
+  %tmp23263 = getelementptr inbounds float* %tmp23262, i64 1
+  %tmp23264 = getelementptr inbounds float* %tmp23263, i64 1
+  %tmp23265 = getelementptr inbounds float* %tmp23264, i64 1
+  %tmp23266 = getelementptr inbounds float* %tmp23265, i64 1
+  %tmp23267 = getelementptr inbounds float* %tmp23266, i64 1
+  %tmp23268 = getelementptr inbounds float* %tmp23267, i64 1
+  %tmp23269 = getelementptr inbounds float* %tmp23268, i64 1
+  %tmp23270 = getelementptr inbounds float* %tmp23269, i64 1
+  %tmp23271 = getelementptr inbounds float* %tmp23270, i64 1
+  %tmp23272 = getelementptr inbounds float* %tmp23271, i64 1
+  %tmp23273 = getelementptr inbounds float* %tmp23272, i64 1
+  %tmp23274 = getelementptr inbounds float* %tmp23273, i64 1
+  %tmp23275 = getelementptr inbounds float* %tmp23274, i64 1
+  %tmp23276 = getelementptr inbounds float* %tmp23275, i64 1
+  %tmp23277 = getelementptr inbounds float* %tmp23276, i64 1
+  %tmp23278 = getelementptr inbounds float* %tmp23277, i64 1
+  %tmp23279 = getelementptr inbounds float* %tmp23278, i64 1
+  %tmp23280 = getelementptr inbounds float* %tmp23279, i64 1
+  %tmp23281 = getelementptr inbounds float* %tmp23280, i64 1
+  %tmp23282 = getelementptr inbounds float* %tmp23281, i64 1
+  %tmp23283 = getelementptr inbounds float* %tmp23282, i64 1
+  %tmp23284 = getelementptr inbounds float* %tmp23283, i64 1
+  %tmp23285 = getelementptr inbounds float* %tmp23284, i64 1
+  %tmp23286 = getelementptr inbounds float* %tmp23285, i64 1
+  %tmp23287 = getelementptr inbounds float* %tmp23286, i64 1
+  %tmp23288 = getelementptr inbounds float* %tmp23287, i64 1
+  %tmp23289 = getelementptr inbounds float* %tmp23288, i64 1
+  %tmp23290 = getelementptr inbounds float* %tmp23289, i64 1
+  %tmp23291 = getelementptr inbounds float* %tmp23290, i64 1
+  %tmp23292 = getelementptr inbounds float* %tmp23291, i64 1
+  %tmp23293 = getelementptr inbounds float* %tmp23292, i64 1
+  %tmp23294 = getelementptr inbounds float* %tmp23293, i64 1
+  %tmp23295 = getelementptr inbounds float* %tmp23294, i64 1
+  %tmp23296 = getelementptr inbounds float* %tmp23295, i64 1
+  %tmp23297 = getelementptr inbounds float* %tmp23296, i64 1
+  %tmp23298 = getelementptr inbounds float* %tmp23297, i64 1
+  %tmp23299 = getelementptr inbounds float* %tmp23298, i64 1
+  %tmp23300 = getelementptr inbounds float* %tmp23299, i64 1
+  %tmp23301 = getelementptr inbounds float* %tmp23300, i64 1
+  %tmp23302 = getelementptr inbounds float* %tmp23301, i64 1
+  %tmp23303 = getelementptr inbounds float* %tmp23302, i64 1
+  %tmp23304 = getelementptr inbounds float* %tmp23303, i64 1
+  %tmp23305 = getelementptr inbounds float* %tmp23304, i64 1
+  %tmp23306 = getelementptr inbounds float* %tmp23305, i64 1
+  %tmp23307 = getelementptr inbounds float* %tmp23306, i64 1
+  %tmp23308 = getelementptr inbounds float* %tmp23307, i64 1
+  %tmp23309 = getelementptr inbounds float* %tmp23308, i64 1
+  %tmp23310 = getelementptr inbounds float* %tmp23309, i64 1
+  %tmp23311 = getelementptr inbounds float* %tmp23310, i64 1
+  %tmp23312 = getelementptr inbounds float* %tmp23311, i64 1
+  %tmp23313 = getelementptr inbounds float* %tmp23312, i64 1
+  %tmp23314 = getelementptr inbounds float* %tmp23313, i64 1
+  %tmp23315 = getelementptr inbounds float* %tmp23314, i64 1
+  %tmp23316 = getelementptr inbounds float* %tmp23315, i64 1
+  %tmp23317 = getelementptr inbounds float* %tmp23316, i64 1
+  %tmp23318 = getelementptr inbounds float* %tmp23317, i64 1
+  %tmp23319 = getelementptr inbounds float* %tmp23318, i64 1
+  %tmp23320 = getelementptr inbounds float* %tmp23319, i64 1
+  %tmp23321 = getelementptr inbounds float* %tmp23320, i64 1
+  %tmp23322 = getelementptr inbounds float* %tmp23321, i64 1
+  %tmp23323 = getelementptr inbounds float* %tmp23322, i64 1
+  %tmp23324 = getelementptr inbounds float* %tmp23323, i64 1
+  %tmp23325 = getelementptr inbounds float* %tmp23324, i64 1
+  %tmp23326 = getelementptr inbounds float* %tmp23325, i64 1
+  %tmp23327 = getelementptr inbounds float* %tmp23326, i64 1
+  %tmp23328 = getelementptr inbounds float* %tmp23327, i64 1
+  %tmp23329 = getelementptr inbounds float* %tmp23328, i64 1
+  %tmp23330 = getelementptr inbounds float* %tmp23329, i64 1
+  %tmp23331 = getelementptr inbounds float* %tmp23330, i64 1
+  %tmp23332 = getelementptr inbounds float* %tmp23331, i64 1
+  %tmp23333 = getelementptr inbounds float* %tmp23332, i64 1
+  %tmp23334 = getelementptr inbounds float* %tmp23333, i64 1
+  %tmp23335 = getelementptr inbounds float* %tmp23334, i64 1
+  %tmp23336 = getelementptr inbounds float* %tmp23335, i64 1
+  %tmp23337 = getelementptr inbounds float* %tmp23336, i64 1
+  %tmp23338 = getelementptr inbounds float* %tmp23337, i64 1
+  %tmp23339 = getelementptr inbounds float* %tmp23338, i64 1
+  %tmp23340 = getelementptr inbounds float* %tmp23339, i64 1
+  %tmp23341 = getelementptr inbounds float* %tmp23340, i64 1
+  %tmp23342 = getelementptr inbounds float* %tmp23341, i64 1
+  %tmp23343 = getelementptr inbounds float* %tmp23342, i64 1
+  %tmp23344 = getelementptr inbounds float* %tmp23343, i64 1
+  %tmp23345 = getelementptr inbounds float* %tmp23344, i64 1
+  %tmp23346 = getelementptr inbounds float* %tmp23345, i64 1
+  %tmp23347 = getelementptr inbounds float* %tmp23346, i64 1
+  %tmp23348 = getelementptr inbounds float* %tmp23347, i64 1
+  %tmp23349 = getelementptr inbounds float* %tmp23348, i64 1
+  %tmp23350 = getelementptr inbounds float* %tmp23349, i64 1
+  %tmp23351 = getelementptr inbounds float* %tmp23350, i64 1
+  %tmp23352 = getelementptr inbounds float* %tmp23351, i64 1
+  %tmp23353 = getelementptr inbounds float* %tmp23352, i64 1
+  %tmp23354 = getelementptr inbounds float* %tmp23353, i64 1
+  %tmp23355 = getelementptr inbounds float* %tmp23354, i64 1
+  %tmp23356 = getelementptr inbounds float* %tmp23355, i64 1
+  %tmp23357 = getelementptr inbounds float* %tmp23356, i64 1
+  %tmp23358 = getelementptr inbounds float* %tmp23357, i64 1
+  %tmp23359 = getelementptr inbounds float* %tmp23358, i64 1
+  %tmp23360 = getelementptr inbounds float* %tmp23359, i64 1
+  %tmp23361 = getelementptr inbounds float* %tmp23360, i64 1
+  %tmp23362 = getelementptr inbounds float* %tmp23361, i64 1
+  %tmp23363 = getelementptr inbounds float* %tmp23362, i64 1
+  %tmp23364 = getelementptr inbounds float* %tmp23363, i64 1
+  %tmp23365 = getelementptr inbounds float* %tmp23364, i64 1
+  %tmp23366 = getelementptr inbounds float* %tmp23365, i64 1
+  %tmp23367 = getelementptr inbounds float* %tmp23366, i64 1
+  %tmp23368 = getelementptr inbounds float* %tmp23367, i64 1
+  %tmp23369 = getelementptr inbounds float* %tmp23368, i64 1
+  %tmp23370 = getelementptr inbounds float* %tmp23369, i64 1
+  %tmp23371 = getelementptr inbounds float* %tmp23370, i64 1
+  %tmp23372 = getelementptr inbounds float* %tmp23371, i64 1
+  %tmp23373 = getelementptr inbounds float* %tmp23372, i64 1
+  %tmp23374 = getelementptr inbounds float* %tmp23373, i64 1
+  %tmp23375 = getelementptr inbounds float* %tmp23374, i64 1
+  %tmp23376 = getelementptr inbounds float* %tmp23375, i64 1
+  %tmp23377 = getelementptr inbounds float* %tmp23376, i64 1
+  %tmp23378 = getelementptr inbounds float* %tmp23377, i64 1
+  %tmp23379 = getelementptr inbounds float* %tmp23378, i64 1
+  %tmp23380 = getelementptr inbounds float* %tmp23379, i64 1
+  %tmp23381 = getelementptr inbounds float* %tmp23380, i64 1
+  %tmp23382 = getelementptr inbounds float* %tmp23381, i64 1
+  %tmp23383 = getelementptr inbounds float* %tmp23382, i64 1
+  %tmp23384 = getelementptr inbounds float* %tmp23383, i64 1
+  %tmp23385 = getelementptr inbounds float* %tmp23384, i64 1
+  %tmp23386 = getelementptr inbounds float* %tmp23385, i64 1
+  %tmp23387 = getelementptr inbounds float* %tmp23386, i64 1
+  %tmp23388 = getelementptr inbounds float* %tmp23387, i64 1
+  %tmp23389 = getelementptr inbounds float* %tmp23388, i64 1
+  %tmp23390 = getelementptr inbounds float* %tmp23389, i64 1
+  %tmp23391 = getelementptr inbounds float* %tmp23390, i64 1
+  %tmp23392 = getelementptr inbounds float* %tmp23391, i64 1
+  %tmp23393 = getelementptr inbounds float* %tmp23392, i64 1
+  %tmp23394 = getelementptr inbounds float* %tmp23393, i64 1
+  %tmp23395 = getelementptr inbounds float* %tmp23394, i64 1
+  %tmp23396 = getelementptr inbounds float* %tmp23395, i64 1
+  %tmp23397 = getelementptr inbounds float* %tmp23396, i64 1
+  %tmp23398 = getelementptr inbounds float* %tmp23397, i64 1
+  %tmp23399 = getelementptr inbounds float* %tmp23398, i64 1
+  %tmp23400 = getelementptr inbounds float* %tmp23399, i64 1
+  %tmp23401 = getelementptr inbounds float* %tmp23400, i64 1
+  %tmp23402 = getelementptr inbounds float* %tmp23401, i64 1
+  %tmp23403 = getelementptr inbounds float* %tmp23402, i64 1
+  %tmp23404 = getelementptr inbounds float* %tmp23403, i64 1
+  %tmp23405 = getelementptr inbounds float* %tmp23404, i64 1
+  %tmp23406 = getelementptr inbounds float* %tmp23405, i64 1
+  %tmp23407 = getelementptr inbounds float* %tmp23406, i64 1
+  %tmp23408 = getelementptr inbounds float* %tmp23407, i64 1
+  %tmp23409 = getelementptr inbounds float* %tmp23408, i64 1
+  %tmp23410 = getelementptr inbounds float* %tmp23409, i64 1
+  %tmp23411 = getelementptr inbounds float* %tmp23410, i64 1
+  %tmp23412 = getelementptr inbounds float* %tmp23411, i64 1
+  %tmp23413 = getelementptr inbounds float* %tmp23412, i64 1
+  %tmp23414 = getelementptr inbounds float* %tmp23413, i64 1
+  %tmp23415 = getelementptr inbounds float* %tmp23414, i64 1
+  %tmp23416 = getelementptr inbounds float* %tmp23415, i64 1
+  %tmp23417 = getelementptr inbounds float* %tmp23416, i64 1
+  %tmp23418 = getelementptr inbounds float* %tmp23417, i64 1
+  %tmp23419 = getelementptr inbounds float* %tmp23418, i64 1
+  %tmp23420 = getelementptr inbounds float* %tmp23419, i64 1
+  %tmp23421 = getelementptr inbounds float* %tmp23420, i64 1
+  %tmp23422 = getelementptr inbounds float* %tmp23421, i64 1
+  %tmp23423 = getelementptr inbounds float* %tmp23422, i64 1
+  %tmp23424 = getelementptr inbounds float* %tmp23423, i64 1
+  %tmp23425 = getelementptr inbounds float* %tmp23424, i64 1
+  %tmp23426 = getelementptr inbounds float* %tmp23425, i64 1
+  %tmp23427 = getelementptr inbounds float* %tmp23426, i64 1
+  %tmp23428 = getelementptr inbounds float* %tmp23427, i64 1
+  %tmp23429 = getelementptr inbounds float* %tmp23428, i64 1
+  %tmp23430 = getelementptr inbounds float* %tmp23429, i64 1
+  %tmp23431 = getelementptr inbounds float* %tmp23430, i64 1
+  %tmp23432 = getelementptr inbounds float* %tmp23431, i64 1
+  %tmp23433 = getelementptr inbounds float* %tmp23432, i64 1
+  %tmp23434 = getelementptr inbounds float* %tmp23433, i64 1
+  %tmp23435 = getelementptr inbounds float* %tmp23434, i64 1
+  %tmp23436 = getelementptr inbounds float* %tmp23435, i64 1
+  %tmp23437 = getelementptr inbounds float* %tmp23436, i64 1
+  %tmp23438 = getelementptr inbounds float* %tmp23437, i64 1
+  %tmp23439 = getelementptr inbounds float* %tmp23438, i64 1
+  %tmp23440 = getelementptr inbounds float* %tmp23439, i64 1
+  %tmp23441 = getelementptr inbounds float* %tmp23440, i64 1
+  %tmp23442 = getelementptr inbounds float* %tmp23441, i64 1
+  %tmp23443 = getelementptr inbounds float* %tmp23442, i64 1
+  %tmp23444 = getelementptr inbounds float* %tmp23443, i64 1
+  %tmp23445 = getelementptr inbounds float* %tmp23444, i64 1
+  %tmp23446 = getelementptr inbounds float* %tmp23445, i64 1
+  %tmp23447 = getelementptr inbounds float* %tmp23446, i64 1
+  %tmp23448 = getelementptr inbounds float* %tmp23447, i64 1
+  %tmp23449 = getelementptr inbounds float* %tmp23448, i64 1
+  %tmp23450 = getelementptr inbounds float* %tmp23449, i64 1
+  %tmp23451 = getelementptr inbounds float* %tmp23450, i64 1
+  %tmp23452 = getelementptr inbounds float* %tmp23451, i64 1
+  %tmp23453 = getelementptr inbounds float* %tmp23452, i64 1
+  %tmp23454 = getelementptr inbounds float* %tmp23453, i64 1
+  %tmp23455 = getelementptr inbounds float* %tmp23454, i64 1
+  %tmp23456 = getelementptr inbounds float* %tmp23455, i64 1
+  %tmp23457 = getelementptr inbounds float* %tmp23456, i64 1
+  %tmp23458 = getelementptr inbounds float* %tmp23457, i64 1
+  %tmp23459 = getelementptr inbounds float* %tmp23458, i64 1
+  %tmp23460 = getelementptr inbounds float* %tmp23459, i64 1
+  %tmp23461 = getelementptr inbounds float* %tmp23460, i64 1
+  %tmp23462 = getelementptr inbounds float* %tmp23461, i64 1
+  %tmp23463 = getelementptr inbounds float* %tmp23462, i64 1
+  %tmp23464 = getelementptr inbounds float* %tmp23463, i64 1
+  %tmp23465 = getelementptr inbounds float* %tmp23464, i64 1
+  %tmp23466 = getelementptr inbounds float* %tmp23465, i64 1
+  %tmp23467 = getelementptr inbounds float* %tmp23466, i64 1
+  %tmp23468 = getelementptr inbounds float* %tmp23467, i64 1
+  %tmp23469 = getelementptr inbounds float* %tmp23468, i64 1
+  %tmp23470 = getelementptr inbounds float* %tmp23469, i64 1
+  %tmp23471 = getelementptr inbounds float* %tmp23470, i64 1
+  %tmp23472 = getelementptr inbounds float* %tmp23471, i64 1
+  %tmp23473 = getelementptr inbounds float* %tmp23472, i64 1
+  %tmp23474 = getelementptr inbounds float* %tmp23473, i64 1
+  %tmp23475 = getelementptr inbounds float* %tmp23474, i64 1
+  %tmp23476 = getelementptr inbounds float* %tmp23475, i64 1
+  %tmp23477 = getelementptr inbounds float* %tmp23476, i64 1
+  %tmp23478 = getelementptr inbounds float* %tmp23477, i64 1
+  %tmp23479 = getelementptr inbounds float* %tmp23478, i64 1
+  %tmp23480 = getelementptr inbounds float* %tmp23479, i64 1
+  %tmp23481 = getelementptr inbounds float* %tmp23480, i64 1
+  %tmp23482 = getelementptr inbounds float* %tmp23481, i64 1
+  %tmp23483 = getelementptr inbounds float* %tmp23482, i64 1
+  %tmp23484 = getelementptr inbounds float* %tmp23483, i64 1
+  %tmp23485 = getelementptr inbounds float* %tmp23484, i64 1
+  %tmp23486 = getelementptr inbounds float* %tmp23485, i64 1
+  %tmp23487 = getelementptr inbounds float* %tmp23486, i64 1
+  %tmp23488 = getelementptr inbounds float* %tmp23487, i64 1
+  %tmp23489 = getelementptr inbounds float* %tmp23488, i64 1
+  %tmp23490 = getelementptr inbounds float* %tmp23489, i64 1
+  %tmp23491 = getelementptr inbounds float* %tmp23490, i64 1
+  %tmp23492 = getelementptr inbounds float* %tmp23491, i64 1
+  %tmp23493 = getelementptr inbounds float* %tmp23492, i64 1
+  %tmp23494 = getelementptr inbounds float* %tmp23493, i64 1
+  %tmp23495 = getelementptr inbounds float* %tmp23494, i64 1
+  %tmp23496 = getelementptr inbounds float* %tmp23495, i64 1
+  %tmp23497 = getelementptr inbounds float* %tmp23496, i64 1
+  %tmp23498 = getelementptr inbounds float* %tmp23497, i64 1
+  %tmp23499 = getelementptr inbounds float* %tmp23498, i64 1
+  %tmp23500 = getelementptr inbounds float* %tmp23499, i64 1
+  %tmp23501 = getelementptr inbounds float* %tmp23500, i64 1
+  %tmp23502 = getelementptr inbounds float* %tmp23501, i64 1
+  %tmp23503 = getelementptr inbounds float* %tmp23502, i64 1
+  %tmp23504 = getelementptr inbounds float* %tmp23503, i64 1
+  %tmp23505 = getelementptr inbounds float* %tmp23504, i64 1
+  %tmp23506 = getelementptr inbounds float* %tmp23505, i64 1
+  %tmp23507 = getelementptr inbounds float* %tmp23506, i64 1
+  %tmp23508 = getelementptr inbounds float* %tmp23507, i64 1
+  %tmp23509 = getelementptr inbounds float* %tmp23508, i64 1
+  %tmp23510 = getelementptr inbounds float* %tmp23509, i64 1
+  %tmp23511 = getelementptr inbounds float* %tmp23510, i64 1
+  %tmp23512 = getelementptr inbounds float* %tmp23511, i64 1
+  %tmp23513 = getelementptr inbounds float* %tmp23512, i64 1
+  %tmp23514 = getelementptr inbounds float* %tmp23513, i64 1
+  %tmp23515 = getelementptr inbounds float* %tmp23514, i64 1
+  %tmp23516 = getelementptr inbounds float* %tmp23515, i64 1
+  %tmp23517 = getelementptr inbounds float* %tmp23516, i64 1
+  %tmp23518 = getelementptr inbounds float* %tmp23517, i64 1
+  %tmp23519 = getelementptr inbounds float* %tmp23518, i64 1
+  %tmp23520 = getelementptr inbounds float* %tmp23519, i64 1
+  %tmp23521 = getelementptr inbounds float* %tmp23520, i64 1
+  %tmp23522 = getelementptr inbounds float* %tmp23521, i64 1
+  %tmp23523 = getelementptr inbounds float* %tmp23522, i64 1
+  %tmp23524 = getelementptr inbounds float* %tmp23523, i64 1
+  %tmp23525 = getelementptr inbounds float* %tmp23524, i64 1
+  %tmp23526 = getelementptr inbounds float* %tmp23525, i64 1
+  %tmp23527 = getelementptr inbounds float* %tmp23526, i64 1
+  %tmp23528 = getelementptr inbounds float* %tmp23527, i64 1
+  %tmp23529 = getelementptr inbounds float* %tmp23528, i64 1
+  %tmp23530 = getelementptr inbounds float* %tmp23529, i64 1
+  %tmp23531 = getelementptr inbounds float* %tmp23530, i64 1
+  %tmp23532 = getelementptr inbounds float* %tmp23531, i64 1
+  %tmp23533 = getelementptr inbounds float* %tmp23532, i64 1
+  %tmp23534 = getelementptr inbounds float* %tmp23533, i64 1
+  %tmp23535 = getelementptr inbounds float* %tmp23534, i64 1
+  %tmp23536 = getelementptr inbounds float* %tmp23535, i64 1
+  %tmp23537 = getelementptr inbounds float* %tmp23536, i64 1
+  %tmp23538 = getelementptr inbounds float* %tmp23537, i64 1
+  %tmp23539 = getelementptr inbounds float* %tmp23538, i64 1
+  %tmp23540 = getelementptr inbounds float* %tmp23539, i64 1
+  %tmp23541 = getelementptr inbounds float* %tmp23540, i64 1
+  %tmp23542 = getelementptr inbounds float* %tmp23541, i64 1
+  %tmp23543 = getelementptr inbounds float* %tmp23542, i64 1
+  %tmp23544 = getelementptr inbounds float* %tmp23543, i64 1
+  %tmp23545 = getelementptr inbounds float* %tmp23544, i64 1
+  %tmp23546 = getelementptr inbounds float* %tmp23545, i64 1
+  %tmp23547 = getelementptr inbounds float* %tmp23546, i64 1
+  %tmp23548 = getelementptr inbounds float* %tmp23547, i64 1
+  %tmp23549 = getelementptr inbounds float* %tmp23548, i64 1
+  %tmp23550 = getelementptr inbounds float* %tmp23549, i64 1
+  %tmp23551 = getelementptr inbounds float* %tmp23550, i64 1
+  %tmp23552 = getelementptr inbounds float* %tmp23551, i64 1
+  %tmp23553 = getelementptr inbounds float* %tmp23552, i64 1
+  %tmp23554 = getelementptr inbounds float* %tmp23553, i64 1
+  %tmp23555 = getelementptr inbounds float* %tmp23554, i64 1
+  %tmp23556 = getelementptr inbounds float* %tmp23555, i64 1
+  %tmp23557 = getelementptr inbounds float* %tmp23556, i64 1
+  %tmp23558 = getelementptr inbounds float* %tmp23557, i64 1
+  %tmp23559 = getelementptr inbounds float* %tmp23558, i64 1
+  %tmp23560 = getelementptr inbounds float* %tmp23559, i64 1
+  %tmp23561 = getelementptr inbounds float* %tmp23560, i64 1
+  %tmp23562 = getelementptr inbounds float* %tmp23561, i64 1
+  %tmp23563 = getelementptr inbounds float* %tmp23562, i64 1
+  %tmp23564 = getelementptr inbounds float* %tmp23563, i64 1
+  %tmp23565 = getelementptr inbounds float* %tmp23564, i64 1
+  %tmp23566 = getelementptr inbounds float* %tmp23565, i64 1
+  %tmp23567 = getelementptr inbounds float* %tmp23566, i64 1
+  %tmp23568 = getelementptr inbounds float* %tmp23567, i64 1
+  %tmp23569 = getelementptr inbounds float* %tmp23568, i64 1
+  %tmp23570 = getelementptr inbounds float* %tmp23569, i64 1
+  %tmp23571 = getelementptr inbounds float* %tmp23570, i64 1
+  %tmp23572 = getelementptr inbounds float* %tmp23571, i64 1
+  %tmp23573 = getelementptr inbounds float* %tmp23572, i64 1
+  %tmp23574 = getelementptr inbounds float* %tmp23573, i64 1
+  %tmp23575 = getelementptr inbounds float* %tmp23574, i64 1
+  %tmp23576 = getelementptr inbounds float* %tmp23575, i64 1
+  %tmp23577 = getelementptr inbounds float* %tmp23576, i64 1
+  %tmp23578 = getelementptr inbounds float* %tmp23577, i64 1
+  %tmp23579 = getelementptr inbounds float* %tmp23578, i64 1
+  %tmp23580 = getelementptr inbounds float* %tmp23579, i64 1
+  %tmp23581 = getelementptr inbounds float* %tmp23580, i64 1
+  %tmp23582 = getelementptr inbounds float* %tmp23581, i64 1
+  %tmp23583 = getelementptr inbounds float* %tmp23582, i64 1
+  %tmp23584 = getelementptr inbounds float* %tmp23583, i64 1
+  %tmp23585 = getelementptr inbounds float* %tmp23584, i64 1
+  %tmp23586 = getelementptr inbounds float* %tmp23585, i64 1
+  %tmp23587 = getelementptr inbounds float* %tmp23586, i64 1
+  %tmp23588 = getelementptr inbounds float* %tmp23587, i64 1
+  %tmp23589 = getelementptr inbounds float* %tmp23588, i64 1
+  %tmp23590 = getelementptr inbounds float* %tmp23589, i64 1
+  %tmp23591 = getelementptr inbounds float* %tmp23590, i64 1
+  %tmp23592 = getelementptr inbounds float* %tmp23591, i64 1
+  %tmp23593 = getelementptr inbounds float* %tmp23592, i64 1
+  %tmp23594 = getelementptr inbounds float* %tmp23593, i64 1
+  %tmp23595 = getelementptr inbounds float* %tmp23594, i64 1
+  %tmp23596 = getelementptr inbounds float* %tmp23595, i64 1
+  %tmp23597 = getelementptr inbounds float* %tmp23596, i64 1
+  %tmp23598 = getelementptr inbounds float* %tmp23597, i64 1
+  %tmp23599 = getelementptr inbounds float* %tmp23598, i64 1
+  %tmp23600 = getelementptr inbounds float* %tmp23599, i64 1
+  %tmp23601 = getelementptr inbounds float* %tmp23600, i64 1
+  %tmp23602 = getelementptr inbounds float* %tmp23601, i64 1
+  %tmp23603 = getelementptr inbounds float* %tmp23602, i64 1
+  %tmp23604 = getelementptr inbounds float* %tmp23603, i64 1
+  %tmp23605 = getelementptr inbounds float* %tmp23604, i64 1
+  %tmp23606 = getelementptr inbounds float* %tmp23605, i64 1
+  %tmp23607 = getelementptr inbounds float* %tmp23606, i64 1
+  %tmp23608 = getelementptr inbounds float* %tmp23607, i64 1
+  %tmp23609 = getelementptr inbounds float* %tmp23608, i64 1
+  %tmp23610 = getelementptr inbounds float* %tmp23609, i64 1
+  %tmp23611 = getelementptr inbounds float* %tmp23610, i64 1
+  %tmp23612 = getelementptr inbounds float* %tmp23611, i64 1
+  %tmp23613 = getelementptr inbounds float* %tmp23612, i64 1
+  %tmp23614 = getelementptr inbounds float* %tmp23613, i64 1
+  %tmp23615 = getelementptr inbounds float* %tmp23614, i64 1
+  %tmp23616 = getelementptr inbounds float* %tmp23615, i64 1
+  %tmp23617 = getelementptr inbounds float* %tmp23616, i64 1
+  %tmp23618 = getelementptr inbounds float* %tmp23617, i64 1
+  %tmp23619 = getelementptr inbounds float* %tmp23618, i64 1
+  %tmp23620 = getelementptr inbounds float* %tmp23619, i64 1
+  %tmp23621 = getelementptr inbounds float* %tmp23620, i64 1
+  %tmp23622 = getelementptr inbounds float* %tmp23621, i64 1
+  %tmp23623 = getelementptr inbounds float* %tmp23622, i64 1
+  %tmp23624 = getelementptr inbounds float* %tmp23623, i64 1
+  %tmp23625 = getelementptr inbounds float* %tmp23624, i64 1
+  %tmp23626 = getelementptr inbounds float* %tmp23625, i64 1
+  %tmp23627 = getelementptr inbounds float* %tmp23626, i64 1
+  %tmp23628 = getelementptr inbounds float* %tmp23627, i64 1
+  %tmp23629 = getelementptr inbounds float* %tmp23628, i64 1
+  %tmp23630 = getelementptr inbounds float* %tmp23629, i64 1
+  %tmp23631 = getelementptr inbounds float* %tmp23630, i64 1
+  %tmp23632 = getelementptr inbounds float* %tmp23631, i64 1
+  %tmp23633 = getelementptr inbounds float* %tmp23632, i64 1
+  %tmp23634 = getelementptr inbounds float* %tmp23633, i64 1
+  %tmp23635 = getelementptr inbounds float* %tmp23634, i64 1
+  %tmp23636 = getelementptr inbounds float* %tmp23635, i64 1
+  %tmp23637 = getelementptr inbounds float* %tmp23636, i64 1
+  %tmp23638 = getelementptr inbounds float* %tmp23637, i64 1
+  %tmp23639 = getelementptr inbounds float* %tmp23638, i64 1
+  %tmp23640 = getelementptr inbounds float* %tmp23639, i64 1
+  %tmp23641 = getelementptr inbounds float* %tmp23640, i64 1
+  %tmp23642 = getelementptr inbounds float* %tmp23641, i64 1
+  %tmp23643 = getelementptr inbounds float* %tmp23642, i64 1
+  %tmp23644 = getelementptr inbounds float* %tmp23643, i64 1
+  %tmp23645 = getelementptr inbounds float* %tmp23644, i64 1
+  %tmp23646 = getelementptr inbounds float* %tmp23645, i64 1
+  %tmp23647 = getelementptr inbounds float* %tmp23646, i64 1
+  %tmp23648 = getelementptr inbounds float* %tmp23647, i64 1
+  %tmp23649 = getelementptr inbounds float* %tmp23648, i64 1
+  %tmp23650 = getelementptr inbounds float* %tmp23649, i64 1
+  %tmp23651 = getelementptr inbounds float* %tmp23650, i64 1
+  %tmp23652 = getelementptr inbounds float* %tmp23651, i64 1
+  %tmp23653 = getelementptr inbounds float* %tmp23652, i64 1
+  %tmp23654 = getelementptr inbounds float* %tmp23653, i64 1
+  %tmp23655 = getelementptr inbounds float* %tmp23654, i64 1
+  %tmp23656 = getelementptr inbounds float* %tmp23655, i64 1
+  %tmp23657 = getelementptr inbounds float* %tmp23656, i64 1
+  %tmp23658 = getelementptr inbounds float* %tmp23657, i64 1
+  %tmp23659 = getelementptr inbounds float* %tmp23658, i64 1
+  %tmp23660 = getelementptr inbounds float* %tmp23659, i64 1
+  %tmp23661 = getelementptr inbounds float* %tmp23660, i64 1
+  %tmp23662 = getelementptr inbounds float* %tmp23661, i64 1
+  %tmp23663 = getelementptr inbounds float* %tmp23662, i64 1
+  %tmp23664 = getelementptr inbounds float* %tmp23663, i64 1
+  %tmp23665 = getelementptr inbounds float* %tmp23664, i64 1
+  %tmp23666 = getelementptr inbounds float* %tmp23665, i64 1
+  %tmp23667 = getelementptr inbounds float* %tmp23666, i64 1
+  %tmp23668 = getelementptr inbounds float* %tmp23667, i64 1
+  %tmp23669 = getelementptr inbounds float* %tmp23668, i64 1
+  %tmp23670 = getelementptr inbounds float* %tmp23669, i64 1
+  %tmp23671 = getelementptr inbounds float* %tmp23670, i64 1
+  %tmp23672 = getelementptr inbounds float* %tmp23671, i64 1
+  %tmp23673 = getelementptr inbounds float* %tmp23672, i64 1
+  %tmp23674 = getelementptr inbounds float* %tmp23673, i64 1
+  %tmp23675 = getelementptr inbounds float* %tmp23674, i64 1
+  %tmp23676 = getelementptr inbounds float* %tmp23675, i64 1
+  %tmp23677 = getelementptr inbounds float* %tmp23676, i64 1
+  %tmp23678 = getelementptr inbounds float* %tmp23677, i64 1
+  %tmp23679 = getelementptr inbounds float* %tmp23678, i64 1
+  %tmp23680 = getelementptr inbounds float* %tmp23679, i64 1
+  %tmp23681 = getelementptr inbounds float* %tmp23680, i64 1
+  %tmp23682 = getelementptr inbounds float* %tmp23681, i64 1
+  %tmp23683 = getelementptr inbounds float* %tmp23682, i64 1
+  %tmp23684 = getelementptr inbounds float* %tmp23683, i64 1
+  %tmp23685 = getelementptr inbounds float* %tmp23684, i64 1
+  %tmp23686 = getelementptr inbounds float* %tmp23685, i64 1
+  %tmp23687 = getelementptr inbounds float* %tmp23686, i64 1
+  %tmp23688 = getelementptr inbounds float* %tmp23687, i64 1
+  %tmp23689 = getelementptr inbounds float* %tmp23688, i64 1
+  %tmp23690 = getelementptr inbounds float* %tmp23689, i64 1
+  %tmp23691 = getelementptr inbounds float* %tmp23690, i64 1
+  %tmp23692 = getelementptr inbounds float* %tmp23691, i64 1
+  %tmp23693 = getelementptr inbounds float* %tmp23692, i64 1
+  %tmp23694 = getelementptr inbounds float* %tmp23693, i64 1
+  %tmp23695 = getelementptr inbounds float* %tmp23694, i64 1
+  %tmp23696 = getelementptr inbounds float* %tmp23695, i64 1
+  %tmp23697 = getelementptr inbounds float* %tmp23696, i64 1
+  %tmp23698 = getelementptr inbounds float* %tmp23697, i64 1
+  %tmp23699 = getelementptr inbounds float* %tmp23698, i64 1
+  %tmp23700 = getelementptr inbounds float* %tmp23699, i64 1
+  %tmp23701 = getelementptr inbounds float* %tmp23700, i64 1
+  %tmp23702 = getelementptr inbounds float* %tmp23701, i64 1
+  %tmp23703 = getelementptr inbounds float* %tmp23702, i64 1
+  %tmp23704 = getelementptr inbounds float* %tmp23703, i64 1
+  %tmp23705 = getelementptr inbounds float* %tmp23704, i64 1
+  %tmp23706 = getelementptr inbounds float* %tmp23705, i64 1
+  %tmp23707 = getelementptr inbounds float* %tmp23706, i64 1
+  %tmp23708 = getelementptr inbounds float* %tmp23707, i64 1
+  %tmp23709 = getelementptr inbounds float* %tmp23708, i64 1
+  %tmp23710 = getelementptr inbounds float* %tmp23709, i64 1
+  %tmp23711 = getelementptr inbounds float* %tmp23710, i64 1
+  %tmp23712 = getelementptr inbounds float* %tmp23711, i64 1
+  %tmp23713 = getelementptr inbounds float* %tmp23712, i64 1
+  %tmp23714 = getelementptr inbounds float* %tmp23713, i64 1
+  %tmp23715 = getelementptr inbounds float* %tmp23714, i64 1
+  %tmp23716 = getelementptr inbounds float* %tmp23715, i64 1
+  %tmp23717 = getelementptr inbounds float* %tmp23716, i64 1
+  %tmp23718 = getelementptr inbounds float* %tmp23717, i64 1
+  %tmp23719 = getelementptr inbounds float* %tmp23718, i64 1
+  %tmp23720 = getelementptr inbounds float* %tmp23719, i64 1
+  %tmp23721 = getelementptr inbounds float* %tmp23720, i64 1
+  %tmp23722 = getelementptr inbounds float* %tmp23721, i64 1
+  %tmp23723 = getelementptr inbounds float* %tmp23722, i64 1
+  %tmp23724 = getelementptr inbounds float* %tmp23723, i64 1
+  %tmp23725 = getelementptr inbounds float* %tmp23724, i64 1
+  %tmp23726 = getelementptr inbounds float* %tmp23725, i64 1
+  %tmp23727 = getelementptr inbounds float* %tmp23726, i64 1
+  %tmp23728 = getelementptr inbounds float* %tmp23727, i64 1
+  %tmp23729 = getelementptr inbounds float* %tmp23728, i64 1
+  %tmp23730 = getelementptr inbounds float* %tmp23729, i64 1
+  %tmp23731 = getelementptr inbounds float* %tmp23730, i64 1
+  %tmp23732 = getelementptr inbounds float* %tmp23731, i64 1
+  %tmp23733 = getelementptr inbounds float* %tmp23732, i64 1
+  %tmp23734 = getelementptr inbounds float* %tmp23733, i64 1
+  %tmp23735 = getelementptr inbounds float* %tmp23734, i64 1
+  %tmp23736 = getelementptr inbounds float* %tmp23735, i64 1
+  %tmp23737 = getelementptr inbounds float* %tmp23736, i64 1
+  %tmp23738 = getelementptr inbounds float* %tmp23737, i64 1
+  %tmp23739 = getelementptr inbounds float* %tmp23738, i64 1
+  %tmp23740 = getelementptr inbounds float* %tmp23739, i64 1
+  %tmp23741 = getelementptr inbounds float* %tmp23740, i64 1
+  %tmp23742 = getelementptr inbounds float* %tmp23741, i64 1
+  %tmp23743 = getelementptr inbounds float* %tmp23742, i64 1
+  %tmp23744 = getelementptr inbounds float* %tmp23743, i64 1
+  %tmp23745 = getelementptr inbounds float* %tmp23744, i64 1
+  %tmp23746 = getelementptr inbounds float* %tmp23745, i64 1
+  %tmp23747 = getelementptr inbounds float* %tmp23746, i64 1
+  %tmp23748 = getelementptr inbounds float* %tmp23747, i64 1
+  %tmp23749 = getelementptr inbounds float* %tmp23748, i64 1
+  %tmp23750 = getelementptr inbounds float* %tmp23749, i64 1
+  %tmp23751 = getelementptr inbounds float* %tmp23750, i64 1
+  %tmp23752 = getelementptr inbounds float* %tmp23751, i64 1
+  %tmp23753 = getelementptr inbounds float* %tmp23752, i64 1
+  %tmp23754 = getelementptr inbounds float* %tmp23753, i64 1
+  %tmp23755 = getelementptr inbounds float* %tmp23754, i64 1
+  %tmp23756 = getelementptr inbounds float* %tmp23755, i64 1
+  %tmp23757 = getelementptr inbounds float* %tmp23756, i64 1
+  %tmp23758 = getelementptr inbounds float* %tmp23757, i64 1
+  %tmp23759 = getelementptr inbounds float* %tmp23758, i64 1
+  %tmp23760 = getelementptr inbounds float* %tmp23759, i64 1
+  %tmp23761 = getelementptr inbounds float* %tmp23760, i64 1
+  %tmp23762 = getelementptr inbounds float* %tmp23761, i64 1
+  %tmp23763 = getelementptr inbounds float* %tmp23762, i64 1
+  %tmp23764 = getelementptr inbounds float* %tmp23763, i64 1
+  %tmp23765 = getelementptr inbounds float* %tmp23764, i64 1
+  %tmp23766 = getelementptr inbounds float* %tmp23765, i64 1
+  %tmp23767 = getelementptr inbounds float* %tmp23766, i64 1
+  %tmp23768 = getelementptr inbounds float* %tmp23767, i64 1
+  %tmp23769 = getelementptr inbounds float* %tmp23768, i64 1
+  %tmp23770 = getelementptr inbounds float* %tmp23769, i64 1
+  %tmp23771 = getelementptr inbounds float* %tmp23770, i64 1
+  %tmp23772 = getelementptr inbounds float* %tmp23771, i64 1
+  %tmp23773 = getelementptr inbounds float* %tmp23772, i64 1
+  %tmp23774 = getelementptr inbounds float* %tmp23773, i64 1
+  %tmp23775 = getelementptr inbounds float* %tmp23774, i64 1
+  %tmp23776 = getelementptr inbounds float* %tmp23775, i64 1
+  %tmp23777 = getelementptr inbounds float* %tmp23776, i64 1
+  %tmp23778 = getelementptr inbounds float* %tmp23777, i64 1
+  %tmp23779 = getelementptr inbounds float* %tmp23778, i64 1
+  %tmp23780 = getelementptr inbounds float* %tmp23779, i64 1
+  %tmp23781 = getelementptr inbounds float* %tmp23780, i64 1
+  %tmp23782 = getelementptr inbounds float* %tmp23781, i64 1
+  %tmp23783 = getelementptr inbounds float* %tmp23782, i64 1
+  %tmp23784 = getelementptr inbounds float* %tmp23783, i64 1
+  %tmp23785 = getelementptr inbounds float* %tmp23784, i64 1
+  %tmp23786 = getelementptr inbounds float* %tmp23785, i64 1
+  %tmp23787 = getelementptr inbounds float* %tmp23786, i64 1
+  %tmp23788 = getelementptr inbounds float* %tmp23787, i64 1
+  %tmp23789 = getelementptr inbounds float* %tmp23788, i64 1
+  %tmp23790 = getelementptr inbounds float* %tmp23789, i64 1
+  %tmp23791 = getelementptr inbounds float* %tmp23790, i64 1
+  %tmp23792 = getelementptr inbounds float* %tmp23791, i64 1
+  %tmp23793 = getelementptr inbounds float* %tmp23792, i64 1
+  %tmp23794 = getelementptr inbounds float* %tmp23793, i64 1
+  %tmp23795 = getelementptr inbounds float* %tmp23794, i64 1
+  %tmp23796 = getelementptr inbounds float* %tmp23795, i64 1
+  %tmp23797 = getelementptr inbounds float* %tmp23796, i64 1
+  %tmp23798 = getelementptr inbounds float* %tmp23797, i64 1
+  %tmp23799 = getelementptr inbounds float* %tmp23798, i64 1
+  %tmp23800 = getelementptr inbounds float* %tmp23799, i64 1
+  %tmp23801 = getelementptr inbounds float* %tmp23800, i64 1
+  %tmp23802 = getelementptr inbounds float* %tmp23801, i64 1
+  %tmp23803 = getelementptr inbounds float* %tmp23802, i64 1
+  %tmp23804 = getelementptr inbounds float* %tmp23803, i64 1
+  %tmp23805 = getelementptr inbounds float* %tmp23804, i64 1
+  %tmp23806 = getelementptr inbounds float* %tmp23805, i64 1
+  %tmp23807 = getelementptr inbounds float* %tmp23806, i64 1
+  %tmp23808 = getelementptr inbounds float* %tmp23807, i64 1
+  %tmp23809 = getelementptr inbounds float* %tmp23808, i64 1
+  %tmp23810 = getelementptr inbounds float* %tmp23809, i64 1
+  %tmp23811 = getelementptr inbounds float* %tmp23810, i64 1
+  %tmp23812 = getelementptr inbounds float* %tmp23811, i64 1
+  %tmp23813 = getelementptr inbounds float* %tmp23812, i64 1
+  %tmp23814 = getelementptr inbounds float* %tmp23813, i64 1
+  %tmp23815 = getelementptr inbounds float* %tmp23814, i64 1
+  %tmp23816 = getelementptr inbounds float* %tmp23815, i64 1
+  %tmp23817 = getelementptr inbounds float* %tmp23816, i64 1
+  %tmp23818 = getelementptr inbounds float* %tmp23817, i64 1
+  %tmp23819 = getelementptr inbounds float* %tmp23818, i64 1
+  %tmp23820 = getelementptr inbounds float* %tmp23819, i64 1
+  %tmp23821 = getelementptr inbounds float* %tmp23820, i64 1
+  %tmp23822 = getelementptr inbounds float* %tmp23821, i64 1
+  %tmp23823 = getelementptr inbounds float* %tmp23822, i64 1
+  %tmp23824 = getelementptr inbounds float* %tmp23823, i64 1
+  %tmp23825 = getelementptr inbounds float* %tmp23824, i64 1
+  %tmp23826 = getelementptr inbounds float* %tmp23825, i64 1
+  %tmp23827 = getelementptr inbounds float* %tmp23826, i64 1
+  %tmp23828 = getelementptr inbounds float* %tmp23827, i64 1
+  %tmp23829 = getelementptr inbounds float* %tmp23828, i64 1
+  %tmp23830 = getelementptr inbounds float* %tmp23829, i64 1
+  %tmp23831 = getelementptr inbounds float* %tmp23830, i64 1
+  %tmp23832 = getelementptr inbounds float* %tmp23831, i64 1
+  %tmp23833 = getelementptr inbounds float* %tmp23832, i64 1
+  %tmp23834 = getelementptr inbounds float* %tmp23833, i64 1
+  %tmp23835 = getelementptr inbounds float* %tmp23834, i64 1
+  %tmp23836 = getelementptr inbounds float* %tmp23835, i64 1
+  %tmp23837 = getelementptr inbounds float* %tmp23836, i64 1
+  %tmp23838 = getelementptr inbounds float* %tmp23837, i64 1
+  %tmp23839 = getelementptr inbounds float* %tmp23838, i64 1
+  %tmp23840 = getelementptr inbounds float* %tmp23839, i64 1
+  %tmp23841 = getelementptr inbounds float* %tmp23840, i64 1
+  %tmp23842 = getelementptr inbounds float* %tmp23841, i64 1
+  %tmp23843 = getelementptr inbounds float* %tmp23842, i64 1
+  %tmp23844 = getelementptr inbounds float* %tmp23843, i64 1
+  %tmp23845 = getelementptr inbounds float* %tmp23844, i64 1
+  %tmp23846 = getelementptr inbounds float* %tmp23845, i64 1
+  %tmp23847 = getelementptr inbounds float* %tmp23846, i64 1
+  %tmp23848 = getelementptr inbounds float* %tmp23847, i64 1
+  %tmp23849 = getelementptr inbounds float* %tmp23848, i64 1
+  %tmp23850 = getelementptr inbounds float* %tmp23849, i64 1
+  %tmp23851 = getelementptr inbounds float* %tmp23850, i64 1
+  %tmp23852 = getelementptr inbounds float* %tmp23851, i64 1
+  %tmp23853 = getelementptr inbounds float* %tmp23852, i64 1
+  %tmp23854 = getelementptr inbounds float* %tmp23853, i64 1
+  %tmp23855 = getelementptr inbounds float* %tmp23854, i64 1
+  %tmp23856 = getelementptr inbounds float* %tmp23855, i64 1
+  %tmp23857 = getelementptr inbounds float* %tmp23856, i64 1
+  %tmp23858 = getelementptr inbounds float* %tmp23857, i64 1
+  %tmp23859 = getelementptr inbounds float* %tmp23858, i64 1
+  %tmp23860 = getelementptr inbounds float* %tmp23859, i64 1
+  %tmp23861 = getelementptr inbounds float* %tmp23860, i64 1
+  %tmp23862 = getelementptr inbounds float* %tmp23861, i64 1
+  %tmp23863 = getelementptr inbounds float* %tmp23862, i64 1
+  %tmp23864 = getelementptr inbounds float* %tmp23863, i64 1
+  %tmp23865 = getelementptr inbounds float* %tmp23864, i64 1
+  %tmp23866 = getelementptr inbounds float* %tmp23865, i64 1
+  %tmp23867 = getelementptr inbounds float* %tmp23866, i64 1
+  %tmp23868 = getelementptr inbounds float* %tmp23867, i64 1
+  %tmp23869 = getelementptr inbounds float* %tmp23868, i64 1
+  %tmp23870 = getelementptr inbounds float* %tmp23869, i64 1
+  %tmp23871 = getelementptr inbounds float* %tmp23870, i64 1
+  %tmp23872 = getelementptr inbounds float* %tmp23871, i64 1
+  %tmp23873 = getelementptr inbounds float* %tmp23872, i64 1
+  %tmp23874 = getelementptr inbounds float* %tmp23873, i64 1
+  %tmp23875 = getelementptr inbounds float* %tmp23874, i64 1
+  %tmp23876 = getelementptr inbounds float* %tmp23875, i64 1
+  %tmp23877 = getelementptr inbounds float* %tmp23876, i64 1
+  %tmp23878 = getelementptr inbounds float* %tmp23877, i64 1
+  %tmp23879 = getelementptr inbounds float* %tmp23878, i64 1
+  %tmp23880 = getelementptr inbounds float* %tmp23879, i64 1
+  %tmp23881 = getelementptr inbounds float* %tmp23880, i64 1
+  %tmp23882 = getelementptr inbounds float* %tmp23881, i64 1
+  %tmp23883 = getelementptr inbounds float* %tmp23882, i64 1
+  %tmp23884 = getelementptr inbounds float* %tmp23883, i64 1
+  %tmp23885 = getelementptr inbounds float* %tmp23884, i64 1
+  %tmp23886 = getelementptr inbounds float* %tmp23885, i64 1
+  %tmp23887 = getelementptr inbounds float* %tmp23886, i64 1
+  %tmp23888 = getelementptr inbounds float* %tmp23887, i64 1
+  %tmp23889 = getelementptr inbounds float* %tmp23888, i64 1
+  %tmp23890 = getelementptr inbounds float* %tmp23889, i64 1
+  %tmp23891 = getelementptr inbounds float* %tmp23890, i64 1
+  %tmp23892 = getelementptr inbounds float* %tmp23891, i64 1
+  %tmp23893 = getelementptr inbounds float* %tmp23892, i64 1
+  %tmp23894 = getelementptr inbounds float* %tmp23893, i64 1
+  %tmp23895 = getelementptr inbounds float* %tmp23894, i64 1
+  %tmp23896 = getelementptr inbounds float* %tmp23895, i64 1
+  %tmp23897 = getelementptr inbounds float* %tmp23896, i64 1
+  %tmp23898 = getelementptr inbounds float* %tmp23897, i64 1
+  %tmp23899 = getelementptr inbounds float* %tmp23898, i64 1
+  %tmp23900 = getelementptr inbounds float* %tmp23899, i64 1
+  %tmp23901 = getelementptr inbounds float* %tmp23900, i64 1
+  %tmp23902 = getelementptr inbounds float* %tmp23901, i64 1
+  %tmp23903 = getelementptr inbounds float* %tmp23902, i64 1
+  %tmp23904 = getelementptr inbounds float* %tmp23903, i64 1
+  %tmp23905 = getelementptr inbounds float* %tmp23904, i64 1
+  %tmp23906 = getelementptr inbounds float* %tmp23905, i64 1
+  %tmp23907 = getelementptr inbounds float* %tmp23906, i64 1
+  %tmp23908 = getelementptr inbounds float* %tmp23907, i64 1
+  %tmp23909 = getelementptr inbounds float* %tmp23908, i64 1
+  %tmp23910 = getelementptr inbounds float* %tmp23909, i64 1
+  %tmp23911 = getelementptr inbounds float* %tmp23910, i64 1
+  %tmp23912 = getelementptr inbounds float* %tmp23911, i64 1
+  %tmp23913 = getelementptr inbounds float* %tmp23912, i64 1
+  %tmp23914 = getelementptr inbounds float* %tmp23913, i64 1
+  %tmp23915 = getelementptr inbounds float* %tmp23914, i64 1
+  %tmp23916 = getelementptr inbounds float* %tmp23915, i64 1
+  %tmp23917 = getelementptr inbounds float* %tmp23916, i64 1
+  %tmp23918 = getelementptr inbounds float* %tmp23917, i64 1
+  %tmp23919 = getelementptr inbounds float* %tmp23918, i64 1
+  %tmp23920 = getelementptr inbounds float* %tmp23919, i64 1
+  %tmp23921 = getelementptr inbounds float* %tmp23920, i64 1
+  %tmp23922 = getelementptr inbounds float* %tmp23921, i64 1
+  %tmp23923 = getelementptr inbounds float* %tmp23922, i64 1
+  %tmp23924 = getelementptr inbounds float* %tmp23923, i64 1
+  %tmp23925 = getelementptr inbounds float* %tmp23924, i64 1
+  %tmp23926 = getelementptr inbounds float* %tmp23925, i64 1
+  %tmp23927 = getelementptr inbounds float* %tmp23926, i64 1
+  %tmp23928 = getelementptr inbounds float* %tmp23927, i64 1
+  %tmp23929 = getelementptr inbounds float* %tmp23928, i64 1
+  %tmp23930 = getelementptr inbounds float* %tmp23929, i64 1
+  %tmp23931 = getelementptr inbounds float* %tmp23930, i64 1
+  %tmp23932 = getelementptr inbounds float* %tmp23931, i64 1
+  %tmp23933 = getelementptr inbounds float* %tmp23932, i64 1
+  %tmp23934 = getelementptr inbounds float* %tmp23933, i64 1
+  %tmp23935 = getelementptr inbounds float* %tmp23934, i64 1
+  %tmp23936 = getelementptr inbounds float* %tmp23935, i64 1
+  %tmp23937 = getelementptr inbounds float* %tmp23936, i64 1
+  %tmp23938 = getelementptr inbounds float* %tmp23937, i64 1
+  %tmp23939 = getelementptr inbounds float* %tmp23938, i64 1
+  %tmp23940 = getelementptr inbounds float* %tmp23939, i64 1
+  %tmp23941 = getelementptr inbounds float* %tmp23940, i64 1
+  %tmp23942 = getelementptr inbounds float* %tmp23941, i64 1
+  %tmp23943 = getelementptr inbounds float* %tmp23942, i64 1
+  %tmp23944 = getelementptr inbounds float* %tmp23943, i64 1
+  %tmp23945 = getelementptr inbounds float* %tmp23944, i64 1
+  %tmp23946 = getelementptr inbounds float* %tmp23945, i64 1
+  %tmp23947 = getelementptr inbounds float* %tmp23946, i64 1
+  %tmp23948 = getelementptr inbounds float* %tmp23947, i64 1
+  %tmp23949 = getelementptr inbounds float* %tmp23948, i64 1
+  %tmp23950 = getelementptr inbounds float* %tmp23949, i64 1
+  %tmp23951 = getelementptr inbounds float* %tmp23950, i64 1
+  %tmp23952 = getelementptr inbounds float* %tmp23951, i64 1
+  %tmp23953 = getelementptr inbounds float* %tmp23952, i64 1
+  %tmp23954 = getelementptr inbounds float* %tmp23953, i64 1
+  %tmp23955 = getelementptr inbounds float* %tmp23954, i64 1
+  %tmp23956 = getelementptr inbounds float* %tmp23955, i64 1
+  %tmp23957 = getelementptr inbounds float* %tmp23956, i64 1
+  %tmp23958 = getelementptr inbounds float* %tmp23957, i64 1
+  %tmp23959 = getelementptr inbounds float* %tmp23958, i64 1
+  %tmp23960 = getelementptr inbounds float* %tmp23959, i64 1
+  %tmp23961 = getelementptr inbounds float* %tmp23960, i64 1
+  %tmp23962 = getelementptr inbounds float* %tmp23961, i64 1
+  %tmp23963 = getelementptr inbounds float* %tmp23962, i64 1
+  %tmp23964 = getelementptr inbounds float* %tmp23963, i64 1
+  %tmp23965 = getelementptr inbounds float* %tmp23964, i64 1
+  %tmp23966 = getelementptr inbounds float* %tmp23965, i64 1
+  %tmp23967 = getelementptr inbounds float* %tmp23966, i64 1
+  %tmp23968 = getelementptr inbounds float* %tmp23967, i64 1
+  %tmp23969 = getelementptr inbounds float* %tmp23968, i64 1
+  %tmp23970 = getelementptr inbounds float* %tmp23969, i64 1
+  %tmp23971 = getelementptr inbounds float* %tmp23970, i64 1
+  %tmp23972 = getelementptr inbounds float* %tmp23971, i64 1
+  %tmp23973 = getelementptr inbounds float* %tmp23972, i64 1
+  %tmp23974 = getelementptr inbounds float* %tmp23973, i64 1
+  %tmp23975 = getelementptr inbounds float* %tmp23974, i64 1
+  %tmp23976 = getelementptr inbounds float* %tmp23975, i64 1
+  %tmp23977 = getelementptr inbounds float* %tmp23976, i64 1
+  %tmp23978 = getelementptr inbounds float* %tmp23977, i64 1
+  %tmp23979 = getelementptr inbounds float* %tmp23978, i64 1
+  %tmp23980 = getelementptr inbounds float* %tmp23979, i64 1
+  %tmp23981 = getelementptr inbounds float* %tmp23980, i64 1
+  %tmp23982 = getelementptr inbounds float* %tmp23981, i64 1
+  %tmp23983 = getelementptr inbounds float* %tmp23982, i64 1
+  %tmp23984 = getelementptr inbounds float* %tmp23983, i64 1
+  %tmp23985 = getelementptr inbounds float* %tmp23984, i64 1
+  %tmp23986 = getelementptr inbounds float* %tmp23985, i64 1
+  %tmp23987 = getelementptr inbounds float* %tmp23986, i64 1
+  %tmp23988 = getelementptr inbounds float* %tmp23987, i64 1
+  %tmp23989 = getelementptr inbounds float* %tmp23988, i64 1
+  %tmp23990 = getelementptr inbounds float* %tmp23989, i64 1
+  %tmp23991 = getelementptr inbounds float* %tmp23990, i64 1
+  %tmp23992 = getelementptr inbounds float* %tmp23991, i64 1
+  %tmp23993 = getelementptr inbounds float* %tmp23992, i64 1
+  %tmp23994 = getelementptr inbounds float* %tmp23993, i64 1
+  %tmp23995 = getelementptr inbounds float* %tmp23994, i64 1
+  %tmp23996 = getelementptr inbounds float* %tmp23995, i64 1
+  %tmp23997 = getelementptr inbounds float* %tmp23996, i64 1
+  %tmp23998 = getelementptr inbounds float* %tmp23997, i64 1
+  %tmp23999 = getelementptr inbounds float* %tmp23998, i64 1
+  %tmp24000 = getelementptr inbounds float* %tmp23999, i64 1
+  %tmp24001 = getelementptr inbounds float* %tmp24000, i64 1
+  %tmp24002 = getelementptr inbounds float* %tmp24001, i64 1
+  %tmp24003 = getelementptr inbounds float* %tmp24002, i64 1
+  %tmp24004 = getelementptr inbounds float* %tmp24003, i64 1
+  %tmp24005 = getelementptr inbounds float* %tmp24004, i64 1
+  %tmp24006 = getelementptr inbounds float* %tmp24005, i64 1
+  %tmp24007 = getelementptr inbounds float* %tmp24006, i64 1
+  %tmp24008 = getelementptr inbounds float* %tmp24007, i64 1
+  %tmp24009 = getelementptr inbounds float* %tmp24008, i64 1
+  %tmp24010 = getelementptr inbounds float* %tmp24009, i64 1
+  %tmp24011 = getelementptr inbounds float* %tmp24010, i64 1
+  %tmp24012 = getelementptr inbounds float* %tmp24011, i64 1
+  %tmp24013 = getelementptr inbounds float* %tmp24012, i64 1
+  %tmp24014 = getelementptr inbounds float* %tmp24013, i64 1
+  %tmp24015 = getelementptr inbounds float* %tmp24014, i64 1
+  %tmp24016 = getelementptr inbounds float* %tmp24015, i64 1
+  %tmp24017 = getelementptr inbounds float* %tmp24016, i64 1
+  %tmp24018 = getelementptr inbounds float* %tmp24017, i64 1
+  %tmp24019 = getelementptr inbounds float* %tmp24018, i64 1
+  %tmp24020 = getelementptr inbounds float* %tmp24019, i64 1
+  %tmp24021 = getelementptr inbounds float* %tmp24020, i64 1
+  %tmp24022 = getelementptr inbounds float* %tmp24021, i64 1
+  %tmp24023 = getelementptr inbounds float* %tmp24022, i64 1
+  %tmp24024 = getelementptr inbounds float* %tmp24023, i64 1
+  %tmp24025 = getelementptr inbounds float* %tmp24024, i64 1
+  %tmp24026 = getelementptr inbounds float* %tmp24025, i64 1
+  %tmp24027 = getelementptr inbounds float* %tmp24026, i64 1
+  %tmp24028 = getelementptr inbounds float* %tmp24027, i64 1
+  %tmp24029 = getelementptr inbounds float* %tmp24028, i64 1
+  %tmp24030 = getelementptr inbounds float* %tmp24029, i64 1
+  %tmp24031 = getelementptr inbounds float* %tmp24030, i64 1
+  %tmp24032 = getelementptr inbounds float* %tmp24031, i64 1
+  %tmp24033 = getelementptr inbounds float* %tmp24032, i64 1
+  %tmp24034 = getelementptr inbounds float* %tmp24033, i64 1
+  %tmp24035 = getelementptr inbounds float* %tmp24034, i64 1
+  %tmp24036 = getelementptr inbounds float* %tmp24035, i64 1
+  %tmp24037 = getelementptr inbounds float* %tmp24036, i64 1
+  %tmp24038 = getelementptr inbounds float* %tmp24037, i64 1
+  %tmp24039 = getelementptr inbounds float* %tmp24038, i64 1
+  %tmp24040 = getelementptr inbounds float* %tmp24039, i64 1
+  %tmp24041 = getelementptr inbounds float* %tmp24040, i64 1
+  %tmp24042 = getelementptr inbounds float* %tmp24041, i64 1
+  %tmp24043 = getelementptr inbounds float* %tmp24042, i64 1
+  %tmp24044 = getelementptr inbounds float* %tmp24043, i64 1
+  %tmp24045 = getelementptr inbounds float* %tmp24044, i64 1
+  %tmp24046 = getelementptr inbounds float* %tmp24045, i64 1
+  %tmp24047 = getelementptr inbounds float* %tmp24046, i64 1
+  %tmp24048 = getelementptr inbounds float* %tmp24047, i64 1
+  %tmp24049 = getelementptr inbounds float* %tmp24048, i64 1
+  %tmp24050 = getelementptr inbounds float* %tmp24049, i64 1
+  %tmp24051 = getelementptr inbounds float* %tmp24050, i64 1
+  %tmp24052 = getelementptr inbounds float* %tmp24051, i64 1
+  %tmp24053 = getelementptr inbounds float* %tmp24052, i64 1
+  %tmp24054 = getelementptr inbounds float* %tmp24053, i64 1
+  %tmp24055 = getelementptr inbounds float* %tmp24054, i64 1
+  %tmp24056 = getelementptr inbounds float* %tmp24055, i64 1
+  %tmp24057 = getelementptr inbounds float* %tmp24056, i64 1
+  %tmp24058 = getelementptr inbounds float* %tmp24057, i64 1
+  %tmp24059 = getelementptr inbounds float* %tmp24058, i64 1
+  %tmp24060 = getelementptr inbounds float* %tmp24059, i64 1
+  %tmp24061 = getelementptr inbounds float* %tmp24060, i64 1
+  %tmp24062 = getelementptr inbounds float* %tmp24061, i64 1
+  %tmp24063 = getelementptr inbounds float* %tmp24062, i64 1
+  %tmp24064 = getelementptr inbounds float* %tmp24063, i64 1
+  %tmp24065 = getelementptr inbounds float* %tmp24064, i64 1
+  %tmp24066 = getelementptr inbounds float* %tmp24065, i64 1
+  %tmp24067 = getelementptr inbounds float* %tmp24066, i64 1
+  %tmp24068 = getelementptr inbounds float* %tmp24067, i64 1
+  %tmp24069 = getelementptr inbounds float* %tmp24068, i64 1
+  %tmp24070 = getelementptr inbounds float* %tmp24069, i64 1
+  %tmp24071 = getelementptr inbounds float* %tmp24070, i64 1
+  %tmp24072 = getelementptr inbounds float* %tmp24071, i64 1
+  %tmp24073 = getelementptr inbounds float* %tmp24072, i64 1
+  %tmp24074 = getelementptr inbounds float* %tmp24073, i64 1
+  %tmp24075 = getelementptr inbounds float* %tmp24074, i64 1
+  %tmp24076 = getelementptr inbounds float* %tmp24075, i64 1
+  %tmp24077 = getelementptr inbounds float* %tmp24076, i64 1
+  %tmp24078 = getelementptr inbounds float* %tmp24077, i64 1
+  %tmp24079 = getelementptr inbounds float* %tmp24078, i64 1
+  %tmp24080 = getelementptr inbounds float* %tmp24079, i64 1
+  %tmp24081 = getelementptr inbounds float* %tmp24080, i64 1
+  %tmp24082 = getelementptr inbounds float* %tmp24081, i64 1
+  %tmp24083 = getelementptr inbounds float* %tmp24082, i64 1
+  %tmp24084 = getelementptr inbounds float* %tmp24083, i64 1
+  %tmp24085 = getelementptr inbounds float* %tmp24084, i64 1
+  %tmp24086 = getelementptr inbounds float* %tmp24085, i64 1
+  %tmp24087 = getelementptr inbounds float* %tmp24086, i64 1
+  %tmp24088 = getelementptr inbounds float* %tmp24087, i64 1
+  %tmp24089 = getelementptr inbounds float* %tmp24088, i64 1
+  %tmp24090 = getelementptr inbounds float* %tmp24089, i64 1
+  %tmp24091 = getelementptr inbounds float* %tmp24090, i64 1
+  %tmp24092 = getelementptr inbounds float* %tmp24091, i64 1
+  %tmp24093 = getelementptr inbounds float* %tmp24092, i64 1
+  %tmp24094 = getelementptr inbounds float* %tmp24093, i64 1
+  %tmp24095 = getelementptr inbounds float* %tmp24094, i64 1
+  %tmp24096 = getelementptr inbounds float* %tmp24095, i64 1
+  %tmp24097 = getelementptr inbounds float* %tmp24096, i64 1
+  %tmp24098 = getelementptr inbounds float* %tmp24097, i64 1
+  %tmp24099 = getelementptr inbounds float* %tmp24098, i64 1
+  %tmp24100 = getelementptr inbounds float* %tmp24099, i64 1
+  %tmp24101 = getelementptr inbounds float* %tmp24100, i64 1
+  %tmp24102 = getelementptr inbounds float* %tmp24101, i64 1
+  %tmp24103 = getelementptr inbounds float* %tmp24102, i64 1
+  %tmp24104 = getelementptr inbounds float* %tmp24103, i64 1
+  %tmp24105 = getelementptr inbounds float* %tmp24104, i64 1
+  %tmp24106 = getelementptr inbounds float* %tmp24105, i64 1
+  %tmp24107 = getelementptr inbounds float* %tmp24106, i64 1
+  %tmp24108 = getelementptr inbounds float* %tmp24107, i64 1
+  %tmp24109 = getelementptr inbounds float* %tmp24108, i64 1
+  %tmp24110 = getelementptr inbounds float* %tmp24109, i64 1
+  %tmp24111 = getelementptr inbounds float* %tmp24110, i64 1
+  %tmp24112 = getelementptr inbounds float* %tmp24111, i64 1
+  %tmp24113 = getelementptr inbounds float* %tmp24112, i64 1
+  %tmp24114 = getelementptr inbounds float* %tmp24113, i64 1
+  %tmp24115 = getelementptr inbounds float* %tmp24114, i64 1
+  %tmp24116 = getelementptr inbounds float* %tmp24115, i64 1
+  %tmp24117 = getelementptr inbounds float* %tmp24116, i64 1
+  %tmp24118 = getelementptr inbounds float* %tmp24117, i64 1
+  %tmp24119 = getelementptr inbounds float* %tmp24118, i64 1
+  %tmp24120 = getelementptr inbounds float* %tmp24119, i64 1
+  %tmp24121 = getelementptr inbounds float* %tmp24120, i64 1
+  %tmp24122 = getelementptr inbounds float* %tmp24121, i64 1
+  %tmp24123 = getelementptr inbounds float* %tmp24122, i64 1
+  %tmp24124 = getelementptr inbounds float* %tmp24123, i64 1
+  %tmp24125 = getelementptr inbounds float* %tmp24124, i64 1
+  %tmp24126 = getelementptr inbounds float* %tmp24125, i64 1
+  %tmp24127 = getelementptr inbounds float* %tmp24126, i64 1
+  %tmp24128 = getelementptr inbounds float* %tmp24127, i64 1
+  %tmp24129 = getelementptr inbounds float* %tmp24128, i64 1
+  %tmp24130 = getelementptr inbounds float* %tmp24129, i64 1
+  %tmp24131 = getelementptr inbounds float* %tmp24130, i64 1
+  %tmp24132 = getelementptr inbounds float* %tmp24131, i64 1
+  %tmp24133 = getelementptr inbounds float* %tmp24132, i64 1
+  %tmp24134 = getelementptr inbounds float* %tmp24133, i64 1
+  %tmp24135 = getelementptr inbounds float* %tmp24134, i64 1
+  %tmp24136 = getelementptr inbounds float* %tmp24135, i64 1
+  %tmp24137 = getelementptr inbounds float* %tmp24136, i64 1
+  %tmp24138 = getelementptr inbounds float* %tmp24137, i64 1
+  %tmp24139 = getelementptr inbounds float* %tmp24138, i64 1
+  %tmp24140 = getelementptr inbounds float* %tmp24139, i64 1
+  %tmp24141 = getelementptr inbounds float* %tmp24140, i64 1
+  %tmp24142 = getelementptr inbounds float* %tmp24141, i64 1
+  %tmp24143 = getelementptr inbounds float* %tmp24142, i64 1
+  %tmp24144 = getelementptr inbounds float* %tmp24143, i64 1
+  %tmp24145 = getelementptr inbounds float* %tmp24144, i64 1
+  %tmp24146 = getelementptr inbounds float* %tmp24145, i64 1
+  %tmp24147 = getelementptr inbounds float* %tmp24146, i64 1
+  %tmp24148 = getelementptr inbounds float* %tmp24147, i64 1
+  %tmp24149 = getelementptr inbounds float* %tmp24148, i64 1
+  %tmp24150 = getelementptr inbounds float* %tmp24149, i64 1
+  %tmp24151 = getelementptr inbounds float* %tmp24150, i64 1
+  %tmp24152 = getelementptr inbounds float* %tmp24151, i64 1
+  %tmp24153 = getelementptr inbounds float* %tmp24152, i64 1
+  %tmp24154 = getelementptr inbounds float* %tmp24153, i64 1
+  %tmp24155 = getelementptr inbounds float* %tmp24154, i64 1
+  %tmp24156 = getelementptr inbounds float* %tmp24155, i64 1
+  %tmp24157 = getelementptr inbounds float* %tmp24156, i64 1
+  %tmp24158 = getelementptr inbounds float* %tmp24157, i64 1
+  %tmp24159 = getelementptr inbounds float* %tmp24158, i64 1
+  %tmp24160 = getelementptr inbounds float* %tmp24159, i64 1
+  %tmp24161 = getelementptr inbounds float* %tmp24160, i64 1
+  %tmp24162 = getelementptr inbounds float* %tmp24161, i64 1
+  %tmp24163 = getelementptr inbounds float* %tmp24162, i64 1
+  %tmp24164 = getelementptr inbounds float* %tmp24163, i64 1
+  %tmp24165 = getelementptr inbounds float* %tmp24164, i64 1
+  %tmp24166 = getelementptr inbounds float* %tmp24165, i64 1
+  %tmp24167 = getelementptr inbounds float* %tmp24166, i64 1
+  %tmp24168 = getelementptr inbounds float* %tmp24167, i64 1
+  %tmp24169 = getelementptr inbounds float* %tmp24168, i64 1
+  %tmp24170 = getelementptr inbounds float* %tmp24169, i64 1
+  %tmp24171 = getelementptr inbounds float* %tmp24170, i64 1
+  %tmp24172 = getelementptr inbounds float* %tmp24171, i64 1
+  %tmp24173 = getelementptr inbounds float* %tmp24172, i64 1
+  %tmp24174 = getelementptr inbounds float* %tmp24173, i64 1
+  %tmp24175 = getelementptr inbounds float* %tmp24174, i64 1
+  %tmp24176 = getelementptr inbounds float* %tmp24175, i64 1
+  %tmp24177 = getelementptr inbounds float* %tmp24176, i64 1
+  %tmp24178 = getelementptr inbounds float* %tmp24177, i64 1
+  %tmp24179 = getelementptr inbounds float* %tmp24178, i64 1
+  %tmp24180 = getelementptr inbounds float* %tmp24179, i64 1
+  %tmp24181 = getelementptr inbounds float* %tmp24180, i64 1
+  %tmp24182 = getelementptr inbounds float* %tmp24181, i64 1
+  %tmp24183 = getelementptr inbounds float* %tmp24182, i64 1
+  %tmp24184 = getelementptr inbounds float* %tmp24183, i64 1
+  %tmp24185 = getelementptr inbounds float* %tmp24184, i64 1
+  %tmp24186 = getelementptr inbounds float* %tmp24185, i64 1
+  %tmp24187 = getelementptr inbounds float* %tmp24186, i64 1
+  %tmp24188 = getelementptr inbounds float* %tmp24187, i64 1
+  %tmp24189 = getelementptr inbounds float* %tmp24188, i64 1
+  %tmp24190 = getelementptr inbounds float* %tmp24189, i64 1
+  %tmp24191 = getelementptr inbounds float* %tmp24190, i64 1
+  %tmp24192 = getelementptr inbounds float* %tmp24191, i64 1
+  %tmp24193 = getelementptr inbounds float* %tmp24192, i64 1
+  %tmp24194 = getelementptr inbounds float* %tmp24193, i64 1
+  %tmp24195 = getelementptr inbounds float* %tmp24194, i64 1
+  %tmp24196 = getelementptr inbounds float* %tmp24195, i64 1
+  %tmp24197 = getelementptr inbounds float* %tmp24196, i64 1
+  %tmp24198 = getelementptr inbounds float* %tmp24197, i64 1
+  %tmp24199 = getelementptr inbounds float* %tmp24198, i64 1
+  %tmp24200 = getelementptr inbounds float* %tmp24199, i64 1
+  %tmp24201 = getelementptr inbounds float* %tmp24200, i64 1
+  %tmp24202 = getelementptr inbounds float* %tmp24201, i64 1
+  %tmp24203 = getelementptr inbounds float* %tmp24202, i64 1
+  %tmp24204 = getelementptr inbounds float* %tmp24203, i64 1
+  %tmp24205 = getelementptr inbounds float* %tmp24204, i64 1
+  %tmp24206 = getelementptr inbounds float* %tmp24205, i64 1
+  %tmp24207 = getelementptr inbounds float* %tmp24206, i64 1
+  %tmp24208 = getelementptr inbounds float* %tmp24207, i64 1
+  %tmp24209 = getelementptr inbounds float* %tmp24208, i64 1
+  %tmp24210 = getelementptr inbounds float* %tmp24209, i64 1
+  %tmp24211 = getelementptr inbounds float* %tmp24210, i64 1
+  %tmp24212 = getelementptr inbounds float* %tmp24211, i64 1
+  %tmp24213 = getelementptr inbounds float* %tmp24212, i64 1
+  %tmp24214 = getelementptr inbounds float* %tmp24213, i64 1
+  %tmp24215 = getelementptr inbounds float* %tmp24214, i64 1
+  %tmp24216 = getelementptr inbounds float* %tmp24215, i64 1
+  %tmp24217 = getelementptr inbounds float* %tmp24216, i64 1
+  %tmp24218 = getelementptr inbounds float* %tmp24217, i64 1
+  %tmp24219 = getelementptr inbounds float* %tmp24218, i64 1
+  %tmp24220 = getelementptr inbounds float* %tmp24219, i64 1
+  %tmp24221 = getelementptr inbounds float* %tmp24220, i64 1
+  %tmp24222 = getelementptr inbounds float* %tmp24221, i64 1
+  %tmp24223 = getelementptr inbounds float* %tmp24222, i64 1
+  %tmp24224 = getelementptr inbounds float* %tmp24223, i64 1
+  %tmp24225 = getelementptr inbounds float* %tmp24224, i64 1
+  %tmp24226 = getelementptr inbounds float* %tmp24225, i64 1
+  %tmp24227 = getelementptr inbounds float* %tmp24226, i64 1
+  %tmp24228 = getelementptr inbounds float* %tmp24227, i64 1
+  %tmp24229 = getelementptr inbounds float* %tmp24228, i64 1
+  %tmp24230 = getelementptr inbounds float* %tmp24229, i64 1
+  %tmp24231 = getelementptr inbounds float* %tmp24230, i64 1
+  %tmp24232 = getelementptr inbounds float* %tmp24231, i64 1
+  %tmp24233 = getelementptr inbounds float* %tmp24232, i64 1
+  %tmp24234 = getelementptr inbounds float* %tmp24233, i64 1
+  %tmp24235 = getelementptr inbounds float* %tmp24234, i64 1
+  %tmp24236 = getelementptr inbounds float* %tmp24235, i64 1
+  %tmp24237 = getelementptr inbounds float* %tmp24236, i64 1
+  %tmp24238 = getelementptr inbounds float* %tmp24237, i64 1
+  %tmp24239 = getelementptr inbounds float* %tmp24238, i64 1
+  %tmp24240 = getelementptr inbounds float* %tmp24239, i64 1
+  %tmp24241 = getelementptr inbounds float* %tmp24240, i64 1
+  %tmp24242 = getelementptr inbounds float* %tmp24241, i64 1
+  %tmp24243 = getelementptr inbounds float* %tmp24242, i64 1
+  %tmp24244 = getelementptr inbounds float* %tmp24243, i64 1
+  %tmp24245 = getelementptr inbounds float* %tmp24244, i64 1
+  %tmp24246 = getelementptr inbounds float* %tmp24245, i64 1
+  %tmp24247 = getelementptr inbounds float* %tmp24246, i64 1
+  %tmp24248 = getelementptr inbounds float* %tmp24247, i64 1
+  %tmp24249 = getelementptr inbounds float* %tmp24248, i64 1
+  %tmp24250 = getelementptr inbounds float* %tmp24249, i64 1
+  %tmp24251 = getelementptr inbounds float* %tmp24250, i64 1
+  %tmp24252 = getelementptr inbounds float* %tmp24251, i64 1
+  %tmp24253 = getelementptr inbounds float* %tmp24252, i64 1
+  %tmp24254 = getelementptr inbounds float* %tmp24253, i64 1
+  %tmp24255 = getelementptr inbounds float* %tmp24254, i64 1
+  %tmp24256 = getelementptr inbounds float* %tmp24255, i64 1
+  %tmp24257 = getelementptr inbounds float* %tmp24256, i64 1
+  %tmp24258 = getelementptr inbounds float* %tmp24257, i64 1
+  %tmp24259 = getelementptr inbounds float* %tmp24258, i64 1
+  %tmp24260 = getelementptr inbounds float* %tmp24259, i64 1
+  %tmp24261 = getelementptr inbounds float* %tmp24260, i64 1
+  %tmp24262 = getelementptr inbounds float* %tmp24261, i64 1
+  %tmp24263 = getelementptr inbounds float* %tmp24262, i64 1
+  %tmp24264 = getelementptr inbounds float* %tmp24263, i64 1
+  %tmp24265 = getelementptr inbounds float* %tmp24264, i64 1
+  %tmp24266 = getelementptr inbounds float* %tmp24265, i64 1
+  %tmp24267 = getelementptr inbounds float* %tmp24266, i64 1
+  %tmp24268 = getelementptr inbounds float* %tmp24267, i64 1
+  %tmp24269 = getelementptr inbounds float* %tmp24268, i64 1
+  %tmp24270 = getelementptr inbounds float* %tmp24269, i64 1
+  %tmp24271 = getelementptr inbounds float* %tmp24270, i64 1
+  %tmp24272 = getelementptr inbounds float* %tmp24271, i64 1
+  %tmp24273 = getelementptr inbounds float* %tmp24272, i64 1
+  %tmp24274 = getelementptr inbounds float* %tmp24273, i64 1
+  %tmp24275 = getelementptr inbounds float* %tmp24274, i64 1
+  %tmp24276 = getelementptr inbounds float* %tmp24275, i64 1
+  %tmp24277 = getelementptr inbounds float* %tmp24276, i64 1
+  %tmp24278 = getelementptr inbounds float* %tmp24277, i64 1
+  %tmp24279 = getelementptr inbounds float* %tmp24278, i64 1
+  %tmp24280 = getelementptr inbounds float* %tmp24279, i64 1
+  %tmp24281 = getelementptr inbounds float* %tmp24280, i64 1
+  %tmp24282 = getelementptr inbounds float* %tmp24281, i64 1
+  %tmp24283 = getelementptr inbounds float* %tmp24282, i64 1
+  %tmp24284 = getelementptr inbounds float* %tmp24283, i64 1
+  %tmp24285 = getelementptr inbounds float* %tmp24284, i64 1
+  %tmp24286 = getelementptr inbounds float* %tmp24285, i64 1
+  %tmp24287 = getelementptr inbounds float* %tmp24286, i64 1
+  %tmp24288 = getelementptr inbounds float* %tmp24287, i64 1
+  %tmp24289 = getelementptr inbounds float* %tmp24288, i64 1
+  %tmp24290 = getelementptr inbounds float* %tmp24289, i64 1
+  %tmp24291 = getelementptr inbounds float* %tmp24290, i64 1
+  %tmp24292 = getelementptr inbounds float* %tmp24291, i64 1
+  %tmp24293 = getelementptr inbounds float* %tmp24292, i64 1
+  %tmp24294 = getelementptr inbounds float* %tmp24293, i64 1
+  %tmp24295 = getelementptr inbounds float* %tmp24294, i64 1
+  %tmp24296 = getelementptr inbounds float* %tmp24295, i64 1
+  %tmp24297 = getelementptr inbounds float* %tmp24296, i64 1
+  %tmp24298 = getelementptr inbounds float* %tmp24297, i64 1
+  %tmp24299 = getelementptr inbounds float* %tmp24298, i64 1
+  %tmp24300 = getelementptr inbounds float* %tmp24299, i64 1
+  %tmp24301 = getelementptr inbounds float* %tmp24300, i64 1
+  %tmp24302 = getelementptr inbounds float* %tmp24301, i64 1
+  %tmp24303 = getelementptr inbounds float* %tmp24302, i64 1
+  %tmp24304 = getelementptr inbounds float* %tmp24303, i64 1
+  %tmp24305 = getelementptr inbounds float* %tmp24304, i64 1
+  %tmp24306 = getelementptr inbounds float* %tmp24305, i64 1
+  %tmp24307 = getelementptr inbounds float* %tmp24306, i64 1
+  %tmp24308 = getelementptr inbounds float* %tmp24307, i64 1
+  %tmp24309 = getelementptr inbounds float* %tmp24308, i64 1
+  %tmp24310 = getelementptr inbounds float* %tmp24309, i64 1
+  %tmp24311 = getelementptr inbounds float* %tmp24310, i64 1
+  %tmp24312 = getelementptr inbounds float* %tmp24311, i64 1
+  %tmp24313 = getelementptr inbounds float* %tmp24312, i64 1
+  %tmp24314 = getelementptr inbounds float* %tmp24313, i64 1
+  %tmp24315 = getelementptr inbounds float* %tmp24314, i64 1
+  %tmp24316 = getelementptr inbounds float* %tmp24315, i64 1
+  %tmp24317 = getelementptr inbounds float* %tmp24316, i64 1
+  %tmp24318 = getelementptr inbounds float* %tmp24317, i64 1
+  %tmp24319 = getelementptr inbounds float* %tmp24318, i64 1
+  %tmp24320 = getelementptr inbounds float* %tmp24319, i64 1
+  %tmp24321 = getelementptr inbounds float* %tmp24320, i64 1
+  %tmp24322 = getelementptr inbounds float* %tmp24321, i64 1
+  %tmp24323 = getelementptr inbounds float* %tmp24322, i64 1
+  %tmp24324 = getelementptr inbounds float* %tmp24323, i64 1
+  %tmp24325 = getelementptr inbounds float* %tmp24324, i64 1
+  %tmp24326 = getelementptr inbounds float* %tmp24325, i64 1
+  %tmp24327 = getelementptr inbounds float* %tmp24326, i64 1
+  %tmp24328 = getelementptr inbounds float* %tmp24327, i64 1
+  %tmp24329 = getelementptr inbounds float* %tmp24328, i64 1
+  %tmp24330 = getelementptr inbounds float* %tmp24329, i64 1
+  %tmp24331 = getelementptr inbounds float* %tmp24330, i64 1
+  %tmp24332 = getelementptr inbounds float* %tmp24331, i64 1
+  %tmp24333 = getelementptr inbounds float* %tmp24332, i64 1
+  %tmp24334 = getelementptr inbounds float* %tmp24333, i64 1
+  %tmp24335 = getelementptr inbounds float* %tmp24334, i64 1
+  %tmp24336 = getelementptr inbounds float* %tmp24335, i64 1
+  %tmp24337 = getelementptr inbounds float* %tmp24336, i64 1
+  %tmp24338 = getelementptr inbounds float* %tmp24337, i64 1
+  %tmp24339 = getelementptr inbounds float* %tmp24338, i64 1
+  %tmp24340 = getelementptr inbounds float* %tmp24339, i64 1
+  %tmp24341 = getelementptr inbounds float* %tmp24340, i64 1
+  %tmp24342 = getelementptr inbounds float* %tmp24341, i64 1
+  %tmp24343 = getelementptr inbounds float* %tmp24342, i64 1
+  %tmp24344 = getelementptr inbounds float* %tmp24343, i64 1
+  %tmp24345 = getelementptr inbounds float* %tmp24344, i64 1
+  %tmp24346 = getelementptr inbounds float* %tmp24345, i64 1
+  %tmp24347 = getelementptr inbounds float* %tmp24346, i64 1
+  %tmp24348 = getelementptr inbounds float* %tmp24347, i64 1
+  %tmp24349 = getelementptr inbounds float* %tmp24348, i64 1
+  %tmp24350 = getelementptr inbounds float* %tmp24349, i64 1
+  %tmp24351 = getelementptr inbounds float* %tmp24350, i64 1
+  %tmp24352 = getelementptr inbounds float* %tmp24351, i64 1
+  %tmp24353 = getelementptr inbounds float* %tmp24352, i64 1
+  %tmp24354 = getelementptr inbounds float* %tmp24353, i64 1
+  %tmp24355 = getelementptr inbounds float* %tmp24354, i64 1
+  %tmp24356 = getelementptr inbounds float* %tmp24355, i64 1
+  %tmp24357 = getelementptr inbounds float* %tmp24356, i64 1
+  %tmp24358 = getelementptr inbounds float* %tmp24357, i64 1
+  %tmp24359 = getelementptr inbounds float* %tmp24358, i64 1
+  %tmp24360 = getelementptr inbounds float* %tmp24359, i64 1
+  %tmp24361 = getelementptr inbounds float* %tmp24360, i64 1
+  %tmp24362 = getelementptr inbounds float* %tmp24361, i64 1
+  %tmp24363 = getelementptr inbounds float* %tmp24362, i64 1
+  %tmp24364 = getelementptr inbounds float* %tmp24363, i64 1
+  %tmp24365 = getelementptr inbounds float* %tmp24364, i64 1
+  %tmp24366 = getelementptr inbounds float* %tmp24365, i64 1
+  %tmp24367 = getelementptr inbounds float* %tmp24366, i64 1
+  %tmp24368 = getelementptr inbounds float* %tmp24367, i64 1
+  %tmp24369 = getelementptr inbounds float* %tmp24368, i64 1
+  %tmp24370 = getelementptr inbounds float* %tmp24369, i64 1
+  %tmp24371 = getelementptr inbounds float* %tmp24370, i64 1
+  %tmp24372 = getelementptr inbounds float* %tmp24371, i64 1
+  %tmp24373 = getelementptr inbounds float* %tmp24372, i64 1
+  %tmp24374 = getelementptr inbounds float* %tmp24373, i64 1
+  %tmp24375 = getelementptr inbounds float* %tmp24374, i64 1
+  %tmp24376 = getelementptr inbounds float* %tmp24375, i64 1
+  %tmp24377 = getelementptr inbounds float* %tmp24376, i64 1
+  %tmp24378 = getelementptr inbounds float* %tmp24377, i64 1
+  %tmp24379 = getelementptr inbounds float* %tmp24378, i64 1
+  %tmp24380 = getelementptr inbounds float* %tmp24379, i64 1
+  %tmp24381 = getelementptr inbounds float* %tmp24380, i64 1
+  %tmp24382 = getelementptr inbounds float* %tmp24381, i64 1
+  %tmp24383 = getelementptr inbounds float* %tmp24382, i64 1
+  %tmp24384 = getelementptr inbounds float* %tmp24383, i64 1
+  %tmp24385 = getelementptr inbounds float* %tmp24384, i64 1
+  %tmp24386 = getelementptr inbounds float* %tmp24385, i64 1
+  %tmp24387 = getelementptr inbounds float* %tmp24386, i64 1
+  %tmp24388 = getelementptr inbounds float* %tmp24387, i64 1
+  %tmp24389 = getelementptr inbounds float* %tmp24388, i64 1
+  %tmp24390 = getelementptr inbounds float* %tmp24389, i64 1
+  %tmp24391 = getelementptr inbounds float* %tmp24390, i64 1
+  %tmp24392 = getelementptr inbounds float* %tmp24391, i64 1
+  %tmp24393 = getelementptr inbounds float* %tmp24392, i64 1
+  %tmp24394 = getelementptr inbounds float* %tmp24393, i64 1
+  %tmp24395 = getelementptr inbounds float* %tmp24394, i64 1
+  %tmp24396 = getelementptr inbounds float* %tmp24395, i64 1
+  %tmp24397 = getelementptr inbounds float* %tmp24396, i64 1
+  %tmp24398 = getelementptr inbounds float* %tmp24397, i64 1
+  %tmp24399 = getelementptr inbounds float* %tmp24398, i64 1
+  %tmp24400 = getelementptr inbounds float* %tmp24399, i64 1
+  %tmp24401 = getelementptr inbounds float* %tmp24400, i64 1
+  %tmp24402 = getelementptr inbounds float* %tmp24401, i64 1
+  %tmp24403 = getelementptr inbounds float* %tmp24402, i64 1
+  %tmp24404 = getelementptr inbounds float* %tmp24403, i64 1
+  %tmp24405 = getelementptr inbounds float* %tmp24404, i64 1
+  %tmp24406 = getelementptr inbounds float* %tmp24405, i64 1
+  %tmp24407 = getelementptr inbounds float* %tmp24406, i64 1
+  %tmp24408 = getelementptr inbounds float* %tmp24407, i64 1
+  %tmp24409 = getelementptr inbounds float* %tmp24408, i64 1
+  %tmp24410 = getelementptr inbounds float* %tmp24409, i64 1
+  %tmp24411 = getelementptr inbounds float* %tmp24410, i64 1
+  %tmp24412 = getelementptr inbounds float* %tmp24411, i64 1
+  %tmp24413 = getelementptr inbounds float* %tmp24412, i64 1
+  %tmp24414 = getelementptr inbounds float* %tmp24413, i64 1
+  %tmp24415 = getelementptr inbounds float* %tmp24414, i64 1
+  %tmp24416 = getelementptr inbounds float* %tmp24415, i64 1
+  %tmp24417 = getelementptr inbounds float* %tmp24416, i64 1
+  %tmp24418 = getelementptr inbounds float* %tmp24417, i64 1
+  %tmp24419 = getelementptr inbounds float* %tmp24418, i64 1
+  %tmp24420 = getelementptr inbounds float* %tmp24419, i64 1
+  %tmp24421 = getelementptr inbounds float* %tmp24420, i64 1
+  %tmp24422 = getelementptr inbounds float* %tmp24421, i64 1
+  %tmp24423 = getelementptr inbounds float* %tmp24422, i64 1
+  %tmp24424 = getelementptr inbounds float* %tmp24423, i64 1
+  %tmp24425 = getelementptr inbounds float* %tmp24424, i64 1
+  %tmp24426 = getelementptr inbounds float* %tmp24425, i64 1
+  %tmp24427 = getelementptr inbounds float* %tmp24426, i64 1
+  %tmp24428 = getelementptr inbounds float* %tmp24427, i64 1
+  %tmp24429 = getelementptr inbounds float* %tmp24428, i64 1
+  %tmp24430 = getelementptr inbounds float* %tmp24429, i64 1
+  %tmp24431 = getelementptr inbounds float* %tmp24430, i64 1
+  %tmp24432 = getelementptr inbounds float* %tmp24431, i64 1
+  %tmp24433 = getelementptr inbounds float* %tmp24432, i64 1
+  %tmp24434 = getelementptr inbounds float* %tmp24433, i64 1
+  %tmp24435 = getelementptr inbounds float* %tmp24434, i64 1
+  %tmp24436 = getelementptr inbounds float* %tmp24435, i64 1
+  %tmp24437 = getelementptr inbounds float* %tmp24436, i64 1
+  %tmp24438 = getelementptr inbounds float* %tmp24437, i64 1
+  %tmp24439 = getelementptr inbounds float* %tmp24438, i64 1
+  %tmp24440 = getelementptr inbounds float* %tmp24439, i64 1
+  %tmp24441 = getelementptr inbounds float* %tmp24440, i64 1
+  %tmp24442 = getelementptr inbounds float* %tmp24441, i64 1
+  %tmp24443 = getelementptr inbounds float* %tmp24442, i64 1
+  %tmp24444 = getelementptr inbounds float* %tmp24443, i64 1
+  %tmp24445 = getelementptr inbounds float* %tmp24444, i64 1
+  %tmp24446 = getelementptr inbounds float* %tmp24445, i64 1
+  %tmp24447 = getelementptr inbounds float* %tmp24446, i64 1
+  %tmp24448 = getelementptr inbounds float* %tmp24447, i64 1
+  %tmp24449 = getelementptr inbounds float* %tmp24448, i64 1
+  %tmp24450 = getelementptr inbounds float* %tmp24449, i64 1
+  %tmp24451 = getelementptr inbounds float* %tmp24450, i64 1
+  %tmp24452 = getelementptr inbounds float* %tmp24451, i64 1
+  %tmp24453 = getelementptr inbounds float* %tmp24452, i64 1
+  %tmp24454 = getelementptr inbounds float* %tmp24453, i64 1
+  %tmp24455 = getelementptr inbounds float* %tmp24454, i64 1
+  %tmp24456 = getelementptr inbounds float* %tmp24455, i64 1
+  %tmp24457 = getelementptr inbounds float* %tmp24456, i64 1
+  %tmp24458 = getelementptr inbounds float* %tmp24457, i64 1
+  %tmp24459 = getelementptr inbounds float* %tmp24458, i64 1
+  %tmp24460 = getelementptr inbounds float* %tmp24459, i64 1
+  %tmp24461 = getelementptr inbounds float* %tmp24460, i64 1
+  %tmp24462 = getelementptr inbounds float* %tmp24461, i64 1
+  %tmp24463 = getelementptr inbounds float* %tmp24462, i64 1
+  %tmp24464 = getelementptr inbounds float* %tmp24463, i64 1
+  %tmp24465 = getelementptr inbounds float* %tmp24464, i64 1
+  %tmp24466 = getelementptr inbounds float* %tmp24465, i64 1
+  %tmp24467 = getelementptr inbounds float* %tmp24466, i64 1
+  %tmp24468 = getelementptr inbounds float* %tmp24467, i64 1
+  %tmp24469 = getelementptr inbounds float* %tmp24468, i64 1
+  %tmp24470 = getelementptr inbounds float* %tmp24469, i64 1
+  %tmp24471 = getelementptr inbounds float* %tmp24470, i64 1
+  %tmp24472 = getelementptr inbounds float* %tmp24471, i64 1
+  %tmp24473 = getelementptr inbounds float* %tmp24472, i64 1
+  %tmp24474 = getelementptr inbounds float* %tmp24473, i64 1
+  %tmp24475 = getelementptr inbounds float* %tmp24474, i64 1
+  %tmp24476 = getelementptr inbounds float* %tmp24475, i64 1
+  %tmp24477 = getelementptr inbounds float* %tmp24476, i64 1
+  %tmp24478 = getelementptr inbounds float* %tmp24477, i64 1
+  %tmp24479 = getelementptr inbounds float* %tmp24478, i64 1
+  %tmp24480 = getelementptr inbounds float* %tmp24479, i64 1
+  %tmp24481 = getelementptr inbounds float* %tmp24480, i64 1
+  %tmp24482 = getelementptr inbounds float* %tmp24481, i64 1
+  %tmp24483 = getelementptr inbounds float* %tmp24482, i64 1
+  %tmp24484 = getelementptr inbounds float* %tmp24483, i64 1
+  %tmp24485 = getelementptr inbounds float* %tmp24484, i64 1
+  %tmp24486 = getelementptr inbounds float* %tmp24485, i64 1
+  %tmp24487 = getelementptr inbounds float* %tmp24486, i64 1
+  %tmp24488 = getelementptr inbounds float* %tmp24487, i64 1
+  %tmp24489 = getelementptr inbounds float* %tmp24488, i64 1
+  %tmp24490 = getelementptr inbounds float* %tmp24489, i64 1
+  %tmp24491 = getelementptr inbounds float* %tmp24490, i64 1
+  %tmp24492 = getelementptr inbounds float* %tmp24491, i64 1
+  %tmp24493 = getelementptr inbounds float* %tmp24492, i64 1
+  %tmp24494 = getelementptr inbounds float* %tmp24493, i64 1
+  %tmp24495 = getelementptr inbounds float* %tmp24494, i64 1
+  %tmp24496 = getelementptr inbounds float* %tmp24495, i64 1
+  %tmp24497 = getelementptr inbounds float* %tmp24496, i64 1
+  %tmp24498 = getelementptr inbounds float* %tmp24497, i64 1
+  %tmp24499 = getelementptr inbounds float* %tmp24498, i64 1
+  %tmp24500 = getelementptr inbounds float* %tmp24499, i64 1
+  %tmp24501 = getelementptr inbounds float* %tmp24500, i64 1
+  %tmp24502 = getelementptr inbounds float* %tmp24501, i64 1
+  %tmp24503 = getelementptr inbounds float* %tmp24502, i64 1
+  %tmp24504 = getelementptr inbounds float* %tmp24503, i64 1
+  %tmp24505 = getelementptr inbounds float* %tmp24504, i64 1
+  %tmp24506 = getelementptr inbounds float* %tmp24505, i64 1
+  %tmp24507 = getelementptr inbounds float* %tmp24506, i64 1
+  %tmp24508 = getelementptr inbounds float* %tmp24507, i64 1
+  %tmp24509 = getelementptr inbounds float* %tmp24508, i64 1
+  %tmp24510 = getelementptr inbounds float* %tmp24509, i64 1
+  %tmp24511 = getelementptr inbounds float* %tmp24510, i64 1
+  %tmp24512 = getelementptr inbounds float* %tmp24511, i64 1
+  %tmp24513 = getelementptr inbounds float* %tmp24512, i64 1
+  %tmp24514 = getelementptr inbounds float* %tmp24513, i64 1
+  %tmp24515 = getelementptr inbounds float* %tmp24514, i64 1
+  %tmp24516 = getelementptr inbounds float* %tmp24515, i64 1
+  %tmp24517 = getelementptr inbounds float* %tmp24516, i64 1
+  %tmp24518 = getelementptr inbounds float* %tmp24517, i64 1
+  %tmp24519 = getelementptr inbounds float* %tmp24518, i64 1
+  %tmp24520 = getelementptr inbounds float* %tmp24519, i64 1
+  %tmp24521 = getelementptr inbounds float* %tmp24520, i64 1
+  %tmp24522 = getelementptr inbounds float* %tmp24521, i64 1
+  %tmp24523 = getelementptr inbounds float* %tmp24522, i64 1
+  %tmp24524 = getelementptr inbounds float* %tmp24523, i64 1
+  %tmp24525 = getelementptr inbounds float* %tmp24524, i64 1
+  %tmp24526 = getelementptr inbounds float* %tmp24525, i64 1
+  %tmp24527 = getelementptr inbounds float* %tmp24526, i64 1
+  %tmp24528 = getelementptr inbounds float* %tmp24527, i64 1
+  %tmp24529 = getelementptr inbounds float* %tmp24528, i64 1
+  %tmp24530 = getelementptr inbounds float* %tmp24529, i64 1
+  %tmp24531 = getelementptr inbounds float* %tmp24530, i64 1
+  %tmp24532 = getelementptr inbounds float* %tmp24531, i64 1
+  %tmp24533 = getelementptr inbounds float* %tmp24532, i64 1
+  %tmp24534 = getelementptr inbounds float* %tmp24533, i64 1
+  %tmp24535 = getelementptr inbounds float* %tmp24534, i64 1
+  %tmp24536 = getelementptr inbounds float* %tmp24535, i64 1
+  %tmp24537 = getelementptr inbounds float* %tmp24536, i64 1
+  %tmp24538 = getelementptr inbounds float* %tmp24537, i64 1
+  %tmp24539 = getelementptr inbounds float* %tmp24538, i64 1
+  %tmp24540 = getelementptr inbounds float* %tmp24539, i64 1
+  %tmp24541 = getelementptr inbounds float* %tmp24540, i64 1
+  %tmp24542 = getelementptr inbounds float* %tmp24541, i64 1
+  %tmp24543 = getelementptr inbounds float* %tmp24542, i64 1
+  %tmp24544 = getelementptr inbounds float* %tmp24543, i64 1
+  %tmp24545 = getelementptr inbounds float* %tmp24544, i64 1
+  %tmp24546 = getelementptr inbounds float* %tmp24545, i64 1
+  %tmp24547 = getelementptr inbounds float* %tmp24546, i64 1
+  %tmp24548 = getelementptr inbounds float* %tmp24547, i64 1
+  %tmp24549 = getelementptr inbounds float* %tmp24548, i64 1
+  %tmp24550 = getelementptr inbounds float* %tmp24549, i64 1
+  %tmp24551 = getelementptr inbounds float* %tmp24550, i64 1
+  %tmp24552 = getelementptr inbounds float* %tmp24551, i64 1
+  %tmp24553 = getelementptr inbounds float* %tmp24552, i64 1
+  %tmp24554 = getelementptr inbounds float* %tmp24553, i64 1
+  %tmp24555 = getelementptr inbounds float* %tmp24554, i64 1
+  %tmp24556 = getelementptr inbounds float* %tmp24555, i64 1
+  %tmp24557 = getelementptr inbounds float* %tmp24556, i64 1
+  %tmp24558 = getelementptr inbounds float* %tmp24557, i64 1
+  %tmp24559 = getelementptr inbounds float* %tmp24558, i64 1
+  %tmp24560 = getelementptr inbounds float* %tmp24559, i64 1
+  %tmp24561 = getelementptr inbounds float* %tmp24560, i64 1
+  %tmp24562 = getelementptr inbounds float* %tmp24561, i64 1
+  %tmp24563 = getelementptr inbounds float* %tmp24562, i64 1
+  %tmp24564 = getelementptr inbounds float* %tmp24563, i64 1
+  %tmp24565 = getelementptr inbounds float* %tmp24564, i64 1
+  %tmp24566 = getelementptr inbounds float* %tmp24565, i64 1
+  %tmp24567 = getelementptr inbounds float* %tmp24566, i64 1
+  %tmp24568 = getelementptr inbounds float* %tmp24567, i64 1
+  %tmp24569 = getelementptr inbounds float* %tmp24568, i64 1
+  %tmp24570 = getelementptr inbounds float* %tmp24569, i64 1
+  %tmp24571 = getelementptr inbounds float* %tmp24570, i64 1
+  %tmp24572 = getelementptr inbounds float* %tmp24571, i64 1
+  %tmp24573 = getelementptr inbounds float* %tmp24572, i64 1
+  %tmp24574 = getelementptr inbounds float* %tmp24573, i64 1
+  %tmp24575 = getelementptr inbounds float* %tmp24574, i64 1
+  %tmp24576 = getelementptr inbounds float* %tmp24575, i64 1
+  %tmp24577 = getelementptr inbounds float* %tmp24576, i64 1
+  %tmp24578 = getelementptr inbounds float* %tmp24577, i64 1
+  %tmp24579 = getelementptr inbounds float* %tmp24578, i64 1
+  %tmp24580 = getelementptr inbounds float* %tmp24579, i64 1
+  %tmp24581 = getelementptr inbounds float* %tmp24580, i64 1
+  %tmp24582 = getelementptr inbounds float* %tmp24581, i64 1
+  %tmp24583 = getelementptr inbounds float* %tmp24582, i64 1
+  %tmp24584 = getelementptr inbounds float* %tmp24583, i64 1
+  %tmp24585 = getelementptr inbounds float* %tmp24584, i64 1
+  %tmp24586 = getelementptr inbounds float* %tmp24585, i64 1
+  %tmp24587 = getelementptr inbounds float* %tmp24586, i64 1
+  %tmp24588 = getelementptr inbounds float* %tmp24587, i64 1
+  %tmp24589 = getelementptr inbounds float* %tmp24588, i64 1
+  %tmp24590 = getelementptr inbounds float* %tmp24589, i64 1
+  %tmp24591 = getelementptr inbounds float* %tmp24590, i64 1
+  %tmp24592 = getelementptr inbounds float* %tmp24591, i64 1
+  %tmp24593 = getelementptr inbounds float* %tmp24592, i64 1
+  %tmp24594 = getelementptr inbounds float* %tmp24593, i64 1
+  %tmp24595 = getelementptr inbounds float* %tmp24594, i64 1
+  %tmp24596 = getelementptr inbounds float* %tmp24595, i64 1
+  %tmp24597 = getelementptr inbounds float* %tmp24596, i64 1
+  %tmp24598 = getelementptr inbounds float* %tmp24597, i64 1
+  %tmp24599 = getelementptr inbounds float* %tmp24598, i64 1
+  %tmp24600 = getelementptr inbounds float* %tmp24599, i64 1
+  %tmp24601 = getelementptr inbounds float* %tmp24600, i64 1
+  %tmp24602 = getelementptr inbounds float* %tmp24601, i64 1
+  %tmp24603 = getelementptr inbounds float* %tmp24602, i64 1
+  %tmp24604 = getelementptr inbounds float* %tmp24603, i64 1
+  %tmp24605 = getelementptr inbounds float* %tmp24604, i64 1
+  %tmp24606 = getelementptr inbounds float* %tmp24605, i64 1
+  %tmp24607 = getelementptr inbounds float* %tmp24606, i64 1
+  %tmp24608 = getelementptr inbounds float* %tmp24607, i64 1
+  %tmp24609 = getelementptr inbounds float* %tmp24608, i64 1
+  %tmp24610 = getelementptr inbounds float* %tmp24609, i64 1
+  %tmp24611 = getelementptr inbounds float* %tmp24610, i64 1
+  %tmp24612 = getelementptr inbounds float* %tmp24611, i64 1
+  %tmp24613 = getelementptr inbounds float* %tmp24612, i64 1
+  %tmp24614 = getelementptr inbounds float* %tmp24613, i64 1
+  %tmp24615 = getelementptr inbounds float* %tmp24614, i64 1
+  %tmp24616 = getelementptr inbounds float* %tmp24615, i64 1
+  %tmp24617 = getelementptr inbounds float* %tmp24616, i64 1
+  %tmp24618 = getelementptr inbounds float* %tmp24617, i64 1
+  %tmp24619 = getelementptr inbounds float* %tmp24618, i64 1
+  %tmp24620 = getelementptr inbounds float* %tmp24619, i64 1
+  %tmp24621 = getelementptr inbounds float* %tmp24620, i64 1
+  %tmp24622 = getelementptr inbounds float* %tmp24621, i64 1
+  %tmp24623 = getelementptr inbounds float* %tmp24622, i64 1
+  %tmp24624 = getelementptr inbounds float* %tmp24623, i64 1
+  %tmp24625 = getelementptr inbounds float* %tmp24624, i64 1
+  %tmp24626 = getelementptr inbounds float* %tmp24625, i64 1
+  %tmp24627 = getelementptr inbounds float* %tmp24626, i64 1
+  %tmp24628 = getelementptr inbounds float* %tmp24627, i64 1
+  %tmp24629 = getelementptr inbounds float* %tmp24628, i64 1
+  %tmp24630 = getelementptr inbounds float* %tmp24629, i64 1
+  %tmp24631 = getelementptr inbounds float* %tmp24630, i64 1
+  %tmp24632 = getelementptr inbounds float* %tmp24631, i64 1
+  %tmp24633 = getelementptr inbounds float* %tmp24632, i64 1
+  %tmp24634 = getelementptr inbounds float* %tmp24633, i64 1
+  %tmp24635 = getelementptr inbounds float* %tmp24634, i64 1
+  %tmp24636 = getelementptr inbounds float* %tmp24635, i64 1
+  %tmp24637 = getelementptr inbounds float* %tmp24636, i64 1
+  %tmp24638 = getelementptr inbounds float* %tmp24637, i64 1
+  %tmp24639 = getelementptr inbounds float* %tmp24638, i64 1
+  %tmp24640 = getelementptr inbounds float* %tmp24639, i64 1
+  %tmp24641 = getelementptr inbounds float* %tmp24640, i64 1
+  %tmp24642 = getelementptr inbounds float* %tmp24641, i64 1
+  %tmp24643 = getelementptr inbounds float* %tmp24642, i64 1
+  %tmp24644 = getelementptr inbounds float* %tmp24643, i64 1
+  %tmp24645 = getelementptr inbounds float* %tmp24644, i64 1
+  %tmp24646 = getelementptr inbounds float* %tmp24645, i64 1
+  %tmp24647 = getelementptr inbounds float* %tmp24646, i64 1
+  %tmp24648 = getelementptr inbounds float* %tmp24647, i64 1
+  %tmp24649 = getelementptr inbounds float* %tmp24648, i64 1
+  %tmp24650 = getelementptr inbounds float* %tmp24649, i64 1
+  %tmp24651 = getelementptr inbounds float* %tmp24650, i64 1
+  %tmp24652 = getelementptr inbounds float* %tmp24651, i64 1
+  %tmp24653 = getelementptr inbounds float* %tmp24652, i64 1
+  %tmp24654 = getelementptr inbounds float* %tmp24653, i64 1
+  %tmp24655 = getelementptr inbounds float* %tmp24654, i64 1
+  %tmp24656 = getelementptr inbounds float* %tmp24655, i64 1
+  %tmp24657 = getelementptr inbounds float* %tmp24656, i64 1
+  %tmp24658 = getelementptr inbounds float* %tmp24657, i64 1
+  %tmp24659 = getelementptr inbounds float* %tmp24658, i64 1
+  %tmp24660 = getelementptr inbounds float* %tmp24659, i64 1
+  %tmp24661 = getelementptr inbounds float* %tmp24660, i64 1
+  %tmp24662 = getelementptr inbounds float* %tmp24661, i64 1
+  %tmp24663 = getelementptr inbounds float* %tmp24662, i64 1
+  %tmp24664 = getelementptr inbounds float* %tmp24663, i64 1
+  %tmp24665 = getelementptr inbounds float* %tmp24664, i64 1
+  %tmp24666 = getelementptr inbounds float* %tmp24665, i64 1
+  %tmp24667 = getelementptr inbounds float* %tmp24666, i64 1
+  %tmp24668 = getelementptr inbounds float* %tmp24667, i64 1
+  %tmp24669 = getelementptr inbounds float* %tmp24668, i64 1
+  %tmp24670 = getelementptr inbounds float* %tmp24669, i64 1
+  %tmp24671 = getelementptr inbounds float* %tmp24670, i64 1
+  %tmp24672 = getelementptr inbounds float* %tmp24671, i64 1
+  %tmp24673 = getelementptr inbounds float* %tmp24672, i64 1
+  %tmp24674 = getelementptr inbounds float* %tmp24673, i64 1
+  %tmp24675 = getelementptr inbounds float* %tmp24674, i64 1
+  %tmp24676 = getelementptr inbounds float* %tmp24675, i64 1
+  %tmp24677 = getelementptr inbounds float* %tmp24676, i64 1
+  %tmp24678 = getelementptr inbounds float* %tmp24677, i64 1
+  %tmp24679 = getelementptr inbounds float* %tmp24678, i64 1
+  %tmp24680 = getelementptr inbounds float* %tmp24679, i64 1
+  %tmp24681 = getelementptr inbounds float* %tmp24680, i64 1
+  %tmp24682 = getelementptr inbounds float* %tmp24681, i64 1
+  %tmp24683 = getelementptr inbounds float* %tmp24682, i64 1
+  %tmp24684 = getelementptr inbounds float* %tmp24683, i64 1
+  %tmp24685 = getelementptr inbounds float* %tmp24684, i64 1
+  %tmp24686 = getelementptr inbounds float* %tmp24685, i64 1
+  %tmp24687 = getelementptr inbounds float* %tmp24686, i64 1
+  %tmp24688 = getelementptr inbounds float* %tmp24687, i64 1
+  %tmp24689 = getelementptr inbounds float* %tmp24688, i64 1
+  %tmp24690 = getelementptr inbounds float* %tmp24689, i64 1
+  %tmp24691 = getelementptr inbounds float* %tmp24690, i64 1
+  %tmp24692 = getelementptr inbounds float* %tmp24691, i64 1
+  %tmp24693 = getelementptr inbounds float* %tmp24692, i64 1
+  %tmp24694 = getelementptr inbounds float* %tmp24693, i64 1
+  %tmp24695 = getelementptr inbounds float* %tmp24694, i64 1
+  %tmp24696 = getelementptr inbounds float* %tmp24695, i64 1
+  %tmp24697 = getelementptr inbounds float* %tmp24696, i64 1
+  %tmp24698 = getelementptr inbounds float* %tmp24697, i64 1
+  %tmp24699 = getelementptr inbounds float* %tmp24698, i64 1
+  %tmp24700 = getelementptr inbounds float* %tmp24699, i64 1
+  %tmp24701 = getelementptr inbounds float* %tmp24700, i64 1
+  %tmp24702 = getelementptr inbounds float* %tmp24701, i64 1
+  %tmp24703 = getelementptr inbounds float* %tmp24702, i64 1
+  %tmp24704 = getelementptr inbounds float* %tmp24703, i64 1
+  %tmp24705 = getelementptr inbounds float* %tmp24704, i64 1
+  %tmp24706 = getelementptr inbounds float* %tmp24705, i64 1
+  %tmp24707 = getelementptr inbounds float* %tmp24706, i64 1
+  %tmp24708 = getelementptr inbounds float* %tmp24707, i64 1
+  %tmp24709 = getelementptr inbounds float* %tmp24708, i64 1
+  %tmp24710 = getelementptr inbounds float* %tmp24709, i64 1
+  %tmp24711 = getelementptr inbounds float* %tmp24710, i64 1
+  %tmp24712 = getelementptr inbounds float* %tmp24711, i64 1
+  %tmp24713 = getelementptr inbounds float* %tmp24712, i64 1
+  %tmp24714 = getelementptr inbounds float* %tmp24713, i64 1
+  %tmp24715 = getelementptr inbounds float* %tmp24714, i64 1
+  %tmp24716 = getelementptr inbounds float* %tmp24715, i64 1
+  %tmp24717 = getelementptr inbounds float* %tmp24716, i64 1
+  %tmp24718 = getelementptr inbounds float* %tmp24717, i64 1
+  %tmp24719 = getelementptr inbounds float* %tmp24718, i64 1
+  %tmp24720 = getelementptr inbounds float* %tmp24719, i64 1
+  %tmp24721 = getelementptr inbounds float* %tmp24720, i64 1
+  %tmp24722 = getelementptr inbounds float* %tmp24721, i64 1
+  %tmp24723 = getelementptr inbounds float* %tmp24722, i64 1
+  %tmp24724 = getelementptr inbounds float* %tmp24723, i64 1
+  %tmp24725 = getelementptr inbounds float* %tmp24724, i64 1
+  %tmp24726 = getelementptr inbounds float* %tmp24725, i64 1
+  %tmp24727 = getelementptr inbounds float* %tmp24726, i64 1
+  %tmp24728 = getelementptr inbounds float* %tmp24727, i64 1
+  %tmp24729 = getelementptr inbounds float* %tmp24728, i64 1
+  %tmp24730 = getelementptr inbounds float* %tmp24729, i64 1
+  %tmp24731 = getelementptr inbounds float* %tmp24730, i64 1
+  %tmp24732 = getelementptr inbounds float* %tmp24731, i64 1
+  %tmp24733 = getelementptr inbounds float* %tmp24732, i64 1
+  %tmp24734 = getelementptr inbounds float* %tmp24733, i64 1
+  %tmp24735 = getelementptr inbounds float* %tmp24734, i64 1
+  %tmp24736 = getelementptr inbounds float* %tmp24735, i64 1
+  %tmp24737 = getelementptr inbounds float* %tmp24736, i64 1
+  %tmp24738 = getelementptr inbounds float* %tmp24737, i64 1
+  %tmp24739 = getelementptr inbounds float* %tmp24738, i64 1
+  %tmp24740 = getelementptr inbounds float* %tmp24739, i64 1
+  %tmp24741 = getelementptr inbounds float* %tmp24740, i64 1
+  %tmp24742 = getelementptr inbounds float* %tmp24741, i64 1
+  %tmp24743 = getelementptr inbounds float* %tmp24742, i64 1
+  %tmp24744 = getelementptr inbounds float* %tmp24743, i64 1
+  %tmp24745 = getelementptr inbounds float* %tmp24744, i64 1
+  %tmp24746 = getelementptr inbounds float* %tmp24745, i64 1
+  %tmp24747 = getelementptr inbounds float* %tmp24746, i64 1
+  %tmp24748 = getelementptr inbounds float* %tmp24747, i64 1
+  %tmp24749 = getelementptr inbounds float* %tmp24748, i64 1
+  %tmp24750 = getelementptr inbounds float* %tmp24749, i64 1
+  %tmp24751 = getelementptr inbounds float* %tmp24750, i64 1
+  %tmp24752 = getelementptr inbounds float* %tmp24751, i64 1
+  %tmp24753 = getelementptr inbounds float* %tmp24752, i64 1
+  %tmp24754 = getelementptr inbounds float* %tmp24753, i64 1
+  %tmp24755 = getelementptr inbounds float* %tmp24754, i64 1
+  %tmp24756 = getelementptr inbounds float* %tmp24755, i64 1
+  %tmp24757 = getelementptr inbounds float* %tmp24756, i64 1
+  %tmp24758 = getelementptr inbounds float* %tmp24757, i64 1
+  %tmp24759 = getelementptr inbounds float* %tmp24758, i64 1
+  %tmp24760 = getelementptr inbounds float* %tmp24759, i64 1
+  %tmp24761 = getelementptr inbounds float* %tmp24760, i64 1
+  %tmp24762 = getelementptr inbounds float* %tmp24761, i64 1
+  %tmp24763 = getelementptr inbounds float* %tmp24762, i64 1
+  %tmp24764 = getelementptr inbounds float* %tmp24763, i64 1
+  %tmp24765 = getelementptr inbounds float* %tmp24764, i64 1
+  %tmp24766 = getelementptr inbounds float* %tmp24765, i64 1
+  %tmp24767 = getelementptr inbounds float* %tmp24766, i64 1
+  %tmp24768 = getelementptr inbounds float* %tmp24767, i64 1
+  %tmp24769 = getelementptr inbounds float* %tmp24768, i64 1
+  %tmp24770 = getelementptr inbounds float* %tmp24769, i64 1
+  %tmp24771 = getelementptr inbounds float* %tmp24770, i64 1
+  %tmp24772 = getelementptr inbounds float* %tmp24771, i64 1
+  %tmp24773 = getelementptr inbounds float* %tmp24772, i64 1
+  %tmp24774 = getelementptr inbounds float* %tmp24773, i64 1
+  %tmp24775 = getelementptr inbounds float* %tmp24774, i64 1
+  %tmp24776 = getelementptr inbounds float* %tmp24775, i64 1
+  %tmp24777 = getelementptr inbounds float* %tmp24776, i64 1
+  %tmp24778 = getelementptr inbounds float* %tmp24777, i64 1
+  %tmp24779 = getelementptr inbounds float* %tmp24778, i64 1
+  %tmp24780 = getelementptr inbounds float* %tmp24779, i64 1
+  %tmp24781 = getelementptr inbounds float* %tmp24780, i64 1
+  %tmp24782 = getelementptr inbounds float* %tmp24781, i64 1
+  %tmp24783 = getelementptr inbounds float* %tmp24782, i64 1
+  %tmp24784 = getelementptr inbounds float* %tmp24783, i64 1
+  %tmp24785 = getelementptr inbounds float* %tmp24784, i64 1
+  %tmp24786 = getelementptr inbounds float* %tmp24785, i64 1
+  %tmp24787 = getelementptr inbounds float* %tmp24786, i64 1
+  %tmp24788 = getelementptr inbounds float* %tmp24787, i64 1
+  %tmp24789 = getelementptr inbounds float* %tmp24788, i64 1
+  %tmp24790 = getelementptr inbounds float* %tmp24789, i64 1
+  %tmp24791 = getelementptr inbounds float* %tmp24790, i64 1
+  %tmp24792 = getelementptr inbounds float* %tmp24791, i64 1
+  %tmp24793 = getelementptr inbounds float* %tmp24792, i64 1
+  %tmp24794 = getelementptr inbounds float* %tmp24793, i64 1
+  %tmp24795 = getelementptr inbounds float* %tmp24794, i64 1
+  %tmp24796 = getelementptr inbounds float* %tmp24795, i64 1
+  %tmp24797 = getelementptr inbounds float* %tmp24796, i64 1
+  %tmp24798 = getelementptr inbounds float* %tmp24797, i64 1
+  %tmp24799 = getelementptr inbounds float* %tmp24798, i64 1
+  %tmp24800 = getelementptr inbounds float* %tmp24799, i64 1
+  %tmp24801 = getelementptr inbounds float* %tmp24800, i64 1
+  %tmp24802 = getelementptr inbounds float* %tmp24801, i64 1
+  %tmp24803 = getelementptr inbounds float* %tmp24802, i64 1
+  %tmp24804 = getelementptr inbounds float* %tmp24803, i64 1
+  %tmp24805 = getelementptr inbounds float* %tmp24804, i64 1
+  %tmp24806 = getelementptr inbounds float* %tmp24805, i64 1
+  %tmp24807 = getelementptr inbounds float* %tmp24806, i64 1
+  %tmp24808 = getelementptr inbounds float* %tmp24807, i64 1
+  %tmp24809 = getelementptr inbounds float* %tmp24808, i64 1
+  %tmp24810 = getelementptr inbounds float* %tmp24809, i64 1
+  %tmp24811 = getelementptr inbounds float* %tmp24810, i64 1
+  %tmp24812 = getelementptr inbounds float* %tmp24811, i64 1
+  %tmp24813 = getelementptr inbounds float* %tmp24812, i64 1
+  %tmp24814 = getelementptr inbounds float* %tmp24813, i64 1
+  %tmp24815 = getelementptr inbounds float* %tmp24814, i64 1
+  %tmp24816 = getelementptr inbounds float* %tmp24815, i64 1
+  %tmp24817 = getelementptr inbounds float* %tmp24816, i64 1
+  %tmp24818 = getelementptr inbounds float* %tmp24817, i64 1
+  %tmp24819 = getelementptr inbounds float* %tmp24818, i64 1
+  %tmp24820 = getelementptr inbounds float* %tmp24819, i64 1
+  %tmp24821 = getelementptr inbounds float* %tmp24820, i64 1
+  %tmp24822 = getelementptr inbounds float* %tmp24821, i64 1
+  %tmp24823 = getelementptr inbounds float* %tmp24822, i64 1
+  %tmp24824 = getelementptr inbounds float* %tmp24823, i64 1
+  %tmp24825 = getelementptr inbounds float* %tmp24824, i64 1
+  %tmp24826 = getelementptr inbounds float* %tmp24825, i64 1
+  %tmp24827 = getelementptr inbounds float* %tmp24826, i64 1
+  %tmp24828 = getelementptr inbounds float* %tmp24827, i64 1
+  %tmp24829 = getelementptr inbounds float* %tmp24828, i64 1
+  %tmp24830 = getelementptr inbounds float* %tmp24829, i64 1
+  %tmp24831 = getelementptr inbounds float* %tmp24830, i64 1
+  %tmp24832 = getelementptr inbounds float* %tmp24831, i64 1
+  %tmp24833 = getelementptr inbounds float* %tmp24832, i64 1
+  %tmp24834 = getelementptr inbounds float* %tmp24833, i64 1
+  %tmp24835 = getelementptr inbounds float* %tmp24834, i64 1
+  %tmp24836 = getelementptr inbounds float* %tmp24835, i64 1
+  %tmp24837 = getelementptr inbounds float* %tmp24836, i64 1
+  %tmp24838 = getelementptr inbounds float* %tmp24837, i64 1
+  %tmp24839 = getelementptr inbounds float* %tmp24838, i64 1
+  %tmp24840 = getelementptr inbounds float* %tmp24839, i64 1
+  %tmp24841 = getelementptr inbounds float* %tmp24840, i64 1
+  %tmp24842 = getelementptr inbounds float* %tmp24841, i64 1
+  %tmp24843 = getelementptr inbounds float* %tmp24842, i64 1
+  %tmp24844 = getelementptr inbounds float* %tmp24843, i64 1
+  %tmp24845 = getelementptr inbounds float* %tmp24844, i64 1
+  %tmp24846 = getelementptr inbounds float* %tmp24845, i64 1
+  %tmp24847 = getelementptr inbounds float* %tmp24846, i64 1
+  %tmp24848 = getelementptr inbounds float* %tmp24847, i64 1
+  %tmp24849 = getelementptr inbounds float* %tmp24848, i64 1
+  %tmp24850 = getelementptr inbounds float* %tmp24849, i64 1
+  %tmp24851 = getelementptr inbounds float* %tmp24850, i64 1
+  %tmp24852 = getelementptr inbounds float* %tmp24851, i64 1
+  %tmp24853 = getelementptr inbounds float* %tmp24852, i64 1
+  %tmp24854 = getelementptr inbounds float* %tmp24853, i64 1
+  %tmp24855 = getelementptr inbounds float* %tmp24854, i64 1
+  %tmp24856 = getelementptr inbounds float* %tmp24855, i64 1
+  %tmp24857 = getelementptr inbounds float* %tmp24856, i64 1
+  %tmp24858 = getelementptr inbounds float* %tmp24857, i64 1
+  %tmp24859 = getelementptr inbounds float* %tmp24858, i64 1
+  %tmp24860 = getelementptr inbounds float* %tmp24859, i64 1
+  %tmp24861 = getelementptr inbounds float* %tmp24860, i64 1
+  %tmp24862 = getelementptr inbounds float* %tmp24861, i64 1
+  %tmp24863 = getelementptr inbounds float* %tmp24862, i64 1
+  %tmp24864 = getelementptr inbounds float* %tmp24863, i64 1
+  %tmp24865 = getelementptr inbounds float* %tmp24864, i64 1
+  %tmp24866 = getelementptr inbounds float* %tmp24865, i64 1
+  %tmp24867 = getelementptr inbounds float* %tmp24866, i64 1
+  %tmp24868 = getelementptr inbounds float* %tmp24867, i64 1
+  %tmp24869 = getelementptr inbounds float* %tmp24868, i64 1
+  %tmp24870 = getelementptr inbounds float* %tmp24869, i64 1
+  %tmp24871 = getelementptr inbounds float* %tmp24870, i64 1
+  %tmp24872 = getelementptr inbounds float* %tmp24871, i64 1
+  %tmp24873 = getelementptr inbounds float* %tmp24872, i64 1
+  %tmp24874 = getelementptr inbounds float* %tmp24873, i64 1
+  %tmp24875 = getelementptr inbounds float* %tmp24874, i64 1
+  %tmp24876 = getelementptr inbounds float* %tmp24875, i64 1
+  %tmp24877 = getelementptr inbounds float* %tmp24876, i64 1
+  %tmp24878 = getelementptr inbounds float* %tmp24877, i64 1
+  %tmp24879 = getelementptr inbounds float* %tmp24878, i64 1
+  %tmp24880 = getelementptr inbounds float* %tmp24879, i64 1
+  %tmp24881 = getelementptr inbounds float* %tmp24880, i64 1
+  %tmp24882 = getelementptr inbounds float* %tmp24881, i64 1
+  %tmp24883 = getelementptr inbounds float* %tmp24882, i64 1
+  %tmp24884 = getelementptr inbounds float* %tmp24883, i64 1
+  %tmp24885 = getelementptr inbounds float* %tmp24884, i64 1
+  %tmp24886 = getelementptr inbounds float* %tmp24885, i64 1
+  %tmp24887 = getelementptr inbounds float* %tmp24886, i64 1
+  %tmp24888 = getelementptr inbounds float* %tmp24887, i64 1
+  %tmp24889 = getelementptr inbounds float* %tmp24888, i64 1
+  %tmp24890 = getelementptr inbounds float* %tmp24889, i64 1
+  %tmp24891 = getelementptr inbounds float* %tmp24890, i64 1
+  %tmp24892 = getelementptr inbounds float* %tmp24891, i64 1
+  %tmp24893 = getelementptr inbounds float* %tmp24892, i64 1
+  %tmp24894 = getelementptr inbounds float* %tmp24893, i64 1
+  %tmp24895 = getelementptr inbounds float* %tmp24894, i64 1
+  %tmp24896 = getelementptr inbounds float* %tmp24895, i64 1
+  %tmp24897 = getelementptr inbounds float* %tmp24896, i64 1
+  %tmp24898 = getelementptr inbounds float* %tmp24897, i64 1
+  %tmp24899 = getelementptr inbounds float* %tmp24898, i64 1
+  %tmp24900 = getelementptr inbounds float* %tmp24899, i64 1
+  %tmp24901 = getelementptr inbounds float* %tmp24900, i64 1
+  %tmp24902 = getelementptr inbounds float* %tmp24901, i64 1
+  %tmp24903 = getelementptr inbounds float* %tmp24902, i64 1
+  %tmp24904 = getelementptr inbounds float* %tmp24903, i64 1
+  %tmp24905 = getelementptr inbounds float* %tmp24904, i64 1
+  %tmp24906 = getelementptr inbounds float* %tmp24905, i64 1
+  %tmp24907 = getelementptr inbounds float* %tmp24906, i64 1
+  %tmp24908 = getelementptr inbounds float* %tmp24907, i64 1
+  %tmp24909 = getelementptr inbounds float* %tmp24908, i64 1
+  %tmp24910 = getelementptr inbounds float* %tmp24909, i64 1
+  %tmp24911 = getelementptr inbounds float* %tmp24910, i64 1
+  %tmp24912 = getelementptr inbounds float* %tmp24911, i64 1
+  %tmp24913 = getelementptr inbounds float* %tmp24912, i64 1
+  %tmp24914 = getelementptr inbounds float* %tmp24913, i64 1
+  %tmp24915 = getelementptr inbounds float* %tmp24914, i64 1
+  %tmp24916 = getelementptr inbounds float* %tmp24915, i64 1
+  %tmp24917 = getelementptr inbounds float* %tmp24916, i64 1
+  %tmp24918 = getelementptr inbounds float* %tmp24917, i64 1
+  %tmp24919 = getelementptr inbounds float* %tmp24918, i64 1
+  %tmp24920 = getelementptr inbounds float* %tmp24919, i64 1
+  %tmp24921 = getelementptr inbounds float* %tmp24920, i64 1
+  %tmp24922 = getelementptr inbounds float* %tmp24921, i64 1
+  %tmp24923 = getelementptr inbounds float* %tmp24922, i64 1
+  %tmp24924 = getelementptr inbounds float* %tmp24923, i64 1
+  %tmp24925 = getelementptr inbounds float* %tmp24924, i64 1
+  %tmp24926 = getelementptr inbounds float* %tmp24925, i64 1
+  %tmp24927 = getelementptr inbounds float* %tmp24926, i64 1
+  %tmp24928 = getelementptr inbounds float* %tmp24927, i64 1
+  %tmp24929 = getelementptr inbounds float* %tmp24928, i64 1
+  %tmp24930 = getelementptr inbounds float* %tmp24929, i64 1
+  %tmp24931 = getelementptr inbounds float* %tmp24930, i64 1
+  %tmp24932 = getelementptr inbounds float* %tmp24931, i64 1
+  %tmp24933 = getelementptr inbounds float* %tmp24932, i64 1
+  %tmp24934 = getelementptr inbounds float* %tmp24933, i64 1
+  %tmp24935 = getelementptr inbounds float* %tmp24934, i64 1
+  %tmp24936 = getelementptr inbounds float* %tmp24935, i64 1
+  %tmp24937 = getelementptr inbounds float* %tmp24936, i64 1
+  %tmp24938 = getelementptr inbounds float* %tmp24937, i64 1
+  %tmp24939 = getelementptr inbounds float* %tmp24938, i64 1
+  %tmp24940 = getelementptr inbounds float* %tmp24939, i64 1
+  %tmp24941 = getelementptr inbounds float* %tmp24940, i64 1
+  %tmp24942 = getelementptr inbounds float* %tmp24941, i64 1
+  %tmp24943 = getelementptr inbounds float* %tmp24942, i64 1
+  %tmp24944 = getelementptr inbounds float* %tmp24943, i64 1
+  %tmp24945 = getelementptr inbounds float* %tmp24944, i64 1
+  %tmp24946 = getelementptr inbounds float* %tmp24945, i64 1
+  store float 0x3F43FD0D00000000, float* %tmp24946
+  %tmp24947 = getelementptr inbounds float* undef, i64 1
+  %tmp24948 = getelementptr inbounds float* undef, i64 1
+  %tmp24949 = getelementptr inbounds float* undef, i64 1
+  %tmp24950 = getelementptr inbounds float* undef, i64 1
+  %tmp24951 = getelementptr inbounds float* %tmp24950, i64 1
+  %tmp24952 = getelementptr inbounds float* undef, i64 1
+  %tmp24953 = getelementptr inbounds float* undef, i64 1
+  %tmp24954 = getelementptr inbounds float* undef, i64 1
+  %tmp24955 = getelementptr inbounds float* undef, i64 1
+  %tmp24956 = getelementptr inbounds float* undef, i64 1
+  %tmp24957 = getelementptr inbounds float* undef, i64 1
+  %tmp24958 = getelementptr inbounds float* %tmp24957, i64 1
+  %tmp24959 = getelementptr inbounds float* undef, i64 1
+  %tmp24960 = getelementptr inbounds float* undef, i64 1
+  %tmp24961 = getelementptr inbounds float* undef, i64 1
+  %tmp24962 = getelementptr inbounds float* undef, i64 1
+  %tmp24963 = getelementptr inbounds float* undef, i64 1
+  %tmp24964 = getelementptr inbounds float* undef, i64 1
+  %tmp24965 = getelementptr inbounds float* undef, i64 1
+  %tmp24966 = getelementptr inbounds float* %tmp24965, i64 1
+  %tmp24967 = getelementptr inbounds float* undef, i64 1
+  %tmp24968 = getelementptr inbounds float* undef, i64 1
+  %tmp24969 = getelementptr inbounds float* undef, i64 1
+  %tmp24970 = getelementptr inbounds float* undef, i64 1
+  %tmp24971 = getelementptr inbounds float* %tmp24970, i64 1
+  %tmp24972 = getelementptr inbounds float* %tmp24971, i64 1
+  %tmp24973 = getelementptr inbounds float* %tmp24972, i64 1
+  %tmp24974 = getelementptr inbounds float* undef, i64 1
+  %tmp24975 = getelementptr inbounds float* undef, i64 1
+  %tmp24976 = getelementptr inbounds float* %tmp24975, i64 1
+  %tmp24977 = getelementptr inbounds float* undef, i64 1
+  %tmp24978 = getelementptr inbounds float* undef, i64 1
+  %tmp24979 = getelementptr inbounds float* undef, i64 1
+  %tmp24980 = getelementptr inbounds float* undef, i64 1
+  %tmp24981 = getelementptr inbounds float* undef, i64 1
+  %tmp24982 = getelementptr inbounds float* undef, i64 1
+  %tmp24983 = getelementptr inbounds float* %tmp24982, i64 1
+  %tmp24984 = getelementptr inbounds float* undef, i64 1
+  %tmp24985 = getelementptr inbounds float* %tmp24984, i64 1
+  %tmp24986 = getelementptr inbounds float* undef, i64 1
+  %tmp24987 = getelementptr inbounds float* %tmp24986, i64 1
+  %tmp24988 = getelementptr inbounds float* %tmp24987, i64 1
+  %tmp24989 = getelementptr inbounds float* undef, i64 1
+  %tmp24990 = getelementptr inbounds float* undef, i64 1
+  %tmp24991 = getelementptr inbounds float* %tmp24990, i64 1
+  %tmp24992 = getelementptr inbounds float* undef, i64 1
+  %tmp24993 = getelementptr inbounds float* %tmp24992, i64 1
+  %tmp24994 = getelementptr inbounds float* %tmp24993, i64 1
+  %tmp24995 = getelementptr inbounds float* undef, i64 1
+  %tmp24996 = getelementptr inbounds float* undef, i64 1
+  %tmp24997 = getelementptr inbounds float* undef, i64 1
+  %tmp24998 = getelementptr inbounds float* undef, i64 1
+  %tmp24999 = getelementptr inbounds float* undef, i64 1
+  %tmp25000 = getelementptr inbounds float* undef, i64 1
+  %tmp25001 = getelementptr inbounds float* undef, i64 1
+  %tmp25002 = getelementptr inbounds float* undef, i64 1
+  %tmp25003 = getelementptr inbounds float* undef, i64 1
+  %tmp25004 = getelementptr inbounds float* undef, i64 1
+  %tmp25005 = getelementptr inbounds float* undef, i64 1
+  %tmp25006 = getelementptr inbounds float* undef, i64 1
+  %tmp25007 = getelementptr inbounds float* undef, i64 1
+  %tmp25008 = getelementptr inbounds float* undef, i64 1
+  %tmp25009 = getelementptr inbounds float* undef, i64 1
+  %tmp25010 = getelementptr inbounds float* undef, i64 1
+  %tmp25011 = getelementptr inbounds float* undef, i64 1
+  %tmp25012 = getelementptr inbounds float* %tmp25011, i64 1
+  %tmp25013 = getelementptr inbounds float* undef, i64 1
+  %tmp25014 = getelementptr inbounds float* undef, i64 1
+  %tmp25015 = getelementptr inbounds float* undef, i64 1
+  %tmp25016 = getelementptr inbounds float* undef, i64 1
+  %tmp25017 = getelementptr inbounds float* %tmp25016, i64 1
+  %tmp25018 = getelementptr inbounds float* undef, i64 1
+  %tmp25019 = getelementptr inbounds float* undef, i64 1
+  %tmp25020 = getelementptr inbounds float* undef, i64 1
+  %tmp25021 = getelementptr inbounds float* undef, i64 1
+  %tmp25022 = getelementptr inbounds float* undef, i64 1
+  %tmp25023 = getelementptr inbounds float* %tmp25022, i64 1
+  %tmp25024 = getelementptr inbounds float* %tmp25023, i64 1
+  %tmp25025 = getelementptr inbounds float* undef, i64 1
+  %tmp25026 = getelementptr inbounds float* undef, i64 1
+  %tmp25027 = getelementptr inbounds float* undef, i64 1
+  %tmp25028 = getelementptr inbounds float* undef, i64 1
+  %tmp25029 = getelementptr inbounds float* undef, i64 1
+  %tmp25030 = getelementptr inbounds float* undef, i64 1
+  %tmp25031 = getelementptr inbounds float* undef, i64 1
+  %tmp25032 = getelementptr inbounds float* undef, i64 1
+  %tmp25033 = getelementptr inbounds float* undef, i64 1
+  %tmp25034 = getelementptr inbounds float* undef, i64 1
+  %tmp25035 = getelementptr inbounds float* %tmp25034, i64 1
+  %tmp25036 = getelementptr inbounds float* undef, i64 1
+  %tmp25037 = getelementptr inbounds float* undef, i64 1
+  %tmp25038 = getelementptr inbounds float* %tmp25037, i64 1
+  %tmp25039 = getelementptr inbounds float* undef, i64 1
+  %tmp25040 = getelementptr inbounds float* undef, i64 1
+  %tmp25041 = getelementptr inbounds float* undef, i64 1
+  %tmp25042 = getelementptr inbounds float* undef, i64 1
+  %tmp25043 = getelementptr inbounds float* undef, i64 1
+  %tmp25044 = getelementptr inbounds float* undef, i64 1
+  %tmp25045 = getelementptr inbounds float* %tmp25044, i64 1
+  %tmp25046 = getelementptr inbounds float* undef, i64 1
+  %tmp25047 = getelementptr inbounds float* %tmp25046, i64 1
+  %tmp25048 = getelementptr inbounds float* undef, i64 1
+  %tmp25049 = getelementptr inbounds float* %tmp25048, i64 1
+  %tmp25050 = getelementptr inbounds float* %tmp25049, i64 1
+  %tmp25051 = getelementptr inbounds float* undef, i64 1
+  %tmp25052 = getelementptr inbounds float* undef, i64 1
+  %tmp25053 = getelementptr inbounds float* undef, i64 1
+  %tmp25054 = getelementptr inbounds float* undef, i64 1
+  %tmp25055 = getelementptr inbounds float* undef, i64 1
+  %tmp25056 = getelementptr inbounds float* undef, i64 1
+  %tmp25057 = getelementptr inbounds float* undef, i64 1
+  %tmp25058 = getelementptr inbounds float* undef, i64 1
+  %tmp25059 = getelementptr inbounds float* undef, i64 1
+  %tmp25060 = getelementptr inbounds float* undef, i64 1
+  %tmp25061 = getelementptr inbounds float* undef, i64 1
+  %tmp25062 = getelementptr inbounds float* undef, i64 1
+  %tmp25063 = getelementptr inbounds float* undef, i64 1
+  %tmp25064 = getelementptr inbounds float* undef, i64 1
+  %tmp25065 = getelementptr inbounds float* undef, i64 1
+  %tmp25066 = getelementptr inbounds float* undef, i64 1
+  %tmp25067 = getelementptr inbounds float* %tmp25066, i64 1
+  %tmp25068 = getelementptr inbounds float* undef, i64 1
+  %tmp25069 = getelementptr inbounds float* %tmp25068, i64 1
+  %tmp25070 = getelementptr inbounds float* undef, i64 1
+  %tmp25071 = getelementptr inbounds float* undef, i64 1
+  %tmp25072 = getelementptr inbounds float* undef, i64 1
+  %tmp25073 = getelementptr inbounds float* undef, i64 1
+  %tmp25074 = getelementptr inbounds float* undef, i64 1
+  %tmp25075 = getelementptr inbounds float* %tmp25074, i64 1
+  %tmp25076 = getelementptr inbounds float* undef, i64 1
+  %tmp25077 = getelementptr inbounds float* undef, i64 1
+  %tmp25078 = getelementptr inbounds float* undef, i64 1
+  %tmp25079 = getelementptr inbounds float* undef, i64 1
+  %tmp25080 = getelementptr inbounds float* undef, i64 1
+  %tmp25081 = getelementptr inbounds float* undef, i64 1
+  %tmp25082 = getelementptr inbounds float* undef, i64 1
+  %tmp25083 = getelementptr inbounds float* undef, i64 1
+  %tmp25084 = getelementptr inbounds float* undef, i64 1
+  %tmp25085 = getelementptr inbounds float* undef, i64 1
+  %tmp25086 = getelementptr inbounds float* undef, i64 1
+  %tmp25087 = getelementptr inbounds float* undef, i64 1
+  %tmp25088 = getelementptr inbounds float* undef, i64 1
+  %tmp25089 = getelementptr inbounds float* undef, i64 1
+  %tmp25090 = getelementptr inbounds float* undef, i64 1
+  %tmp25091 = getelementptr inbounds float* undef, i64 1
+  %tmp25092 = getelementptr inbounds float* undef, i64 1
+  %tmp25093 = getelementptr inbounds float* undef, i64 1
+  %tmp25094 = getelementptr inbounds float* undef, i64 1
+  %tmp25095 = getelementptr inbounds float* %tmp25094, i64 1
+  %tmp25096 = getelementptr inbounds float* undef, i64 1
+  %tmp25097 = getelementptr inbounds float* %tmp25096, i64 1
+  %tmp25098 = getelementptr inbounds float* %tmp25097, i64 1
+  %tmp25099 = getelementptr inbounds float* undef, i64 1
+  %tmp25100 = getelementptr inbounds float* undef, i64 1
+  %tmp25101 = getelementptr inbounds float* undef, i64 1
+  %tmp25102 = getelementptr inbounds float* undef, i64 1
+  %tmp25103 = getelementptr inbounds float* undef, i64 1
+  %tmp25104 = getelementptr inbounds float* undef, i64 1
+  %tmp25105 = getelementptr inbounds float* undef, i64 1
+  %tmp25106 = getelementptr inbounds float* undef, i64 1
+  %tmp25107 = getelementptr inbounds float* %tmp25106, i64 1
+  %tmp25108 = getelementptr inbounds float* undef, i64 1
+  %tmp25109 = getelementptr inbounds float* undef, i64 1
+  %tmp25110 = getelementptr inbounds float* undef, i64 1
+  %tmp25111 = getelementptr inbounds float* undef, i64 1
+  %tmp25112 = getelementptr inbounds float* undef, i64 1
+  %tmp25113 = getelementptr inbounds float* undef, i64 1
+  %tmp25114 = getelementptr inbounds float* undef, i64 1
+  %tmp25115 = getelementptr inbounds float* undef, i64 1
+  %tmp25116 = getelementptr inbounds float* undef, i64 1
+  %tmp25117 = getelementptr inbounds float* undef, i64 1
+  %tmp25118 = getelementptr inbounds float* undef, i64 1
+  %tmp25119 = getelementptr inbounds float* undef, i64 1
+  %tmp25120 = getelementptr inbounds float* undef, i64 1
+  %tmp25121 = getelementptr inbounds float* undef, i64 1
+  %tmp25122 = getelementptr inbounds float* %tmp25121, i64 1
+  %tmp25123 = getelementptr inbounds float* undef, i64 1
+  %tmp25124 = getelementptr inbounds float* undef, i64 1
+  %tmp25125 = getelementptr inbounds float* undef, i64 1
+  %tmp25126 = getelementptr inbounds float* undef, i64 1
+  %tmp25127 = getelementptr inbounds float* undef, i64 1
+  %tmp25128 = getelementptr inbounds float* undef, i64 1
+  %tmp25129 = getelementptr inbounds float* undef, i64 1
+  %tmp25130 = getelementptr inbounds float* undef, i64 1
+  %tmp25131 = getelementptr inbounds float* undef, i64 1
+  %tmp25132 = getelementptr inbounds float* undef, i64 1
+  %tmp25133 = getelementptr inbounds float* undef, i64 1
+  %tmp25134 = getelementptr inbounds float* undef, i64 1
+  %tmp25135 = getelementptr inbounds float* undef, i64 1
+  %tmp25136 = getelementptr inbounds float* undef, i64 1
+  %tmp25137 = getelementptr inbounds float* undef, i64 1
+  %tmp25138 = getelementptr inbounds float* undef, i64 1
+  %tmp25139 = getelementptr inbounds float* undef, i64 1
+  %tmp25140 = getelementptr inbounds float* undef, i64 1
+  %tmp25141 = getelementptr inbounds float* undef, i64 1
+  %tmp25142 = getelementptr inbounds float* undef, i64 1
+  %tmp25143 = getelementptr inbounds float* undef, i64 1
+  %tmp25144 = getelementptr inbounds float* undef, i64 1
+  %tmp25145 = getelementptr inbounds float* undef, i64 1
+  %tmp25146 = getelementptr inbounds float* %tmp25145, i64 1
+  %tmp25147 = getelementptr inbounds float* undef, i64 1
+  %tmp25148 = getelementptr inbounds float* %tmp25147, i64 1
+  %tmp25149 = getelementptr inbounds float* undef, i64 1
+  %tmp25150 = getelementptr inbounds float* undef, i64 1
+  %tmp25151 = getelementptr inbounds float* undef, i64 1
+  %tmp25152 = getelementptr inbounds float* undef, i64 1
+  %tmp25153 = getelementptr inbounds float* %tmp25152, i64 1
+  %tmp25154 = getelementptr inbounds float* undef, i64 1
+  %tmp25155 = getelementptr inbounds float* undef, i64 1
+  %tmp25156 = getelementptr inbounds float* undef, i64 1
+  %tmp25157 = getelementptr inbounds float* undef, i64 1
+  %tmp25158 = getelementptr inbounds float* undef, i64 1
+  %tmp25159 = getelementptr inbounds float* undef, i64 1
+  %tmp25160 = getelementptr inbounds float* undef, i64 1
+  %tmp25161 = getelementptr inbounds float* undef, i64 1
+  %tmp25162 = getelementptr inbounds float* %tmp25161, i64 1
+  %tmp25163 = getelementptr inbounds float* undef, i64 1
+  %tmp25164 = getelementptr inbounds float* undef, i64 1
+  %tmp25165 = getelementptr inbounds float* undef, i64 1
+  %tmp25166 = getelementptr inbounds float* undef, i64 1
+  %tmp25167 = getelementptr inbounds float* undef, i64 1
+  %tmp25168 = getelementptr inbounds float* undef, i64 1
+  %tmp25169 = getelementptr inbounds float* undef, i64 1
+  %tmp25170 = getelementptr inbounds float* %tmp25169, i64 1
+  %tmp25171 = getelementptr inbounds float* undef, i64 1
+  %tmp25172 = getelementptr inbounds float* undef, i64 1
+  %tmp25173 = getelementptr inbounds float* undef, i64 1
+  %tmp25174 = getelementptr inbounds float* undef, i64 1
+  %tmp25175 = getelementptr inbounds float* %tmp25174, i64 1
+  %tmp25176 = getelementptr inbounds float* undef, i64 1
+  %tmp25177 = getelementptr inbounds float* undef, i64 1
+  %tmp25178 = getelementptr inbounds float* %tmp25177, i64 1
+  %tmp25179 = getelementptr inbounds float* undef, i64 1
+  %tmp25180 = getelementptr inbounds float* undef, i64 1
+  %tmp25181 = getelementptr inbounds float* undef, i64 1
+  %tmp25182 = getelementptr inbounds float* undef, i64 1
+  %tmp25183 = getelementptr inbounds float* undef, i64 1
+  %tmp25184 = getelementptr inbounds float* undef, i64 1
+  %tmp25185 = getelementptr inbounds float* undef, i64 1
+  %tmp25186 = getelementptr inbounds float* undef, i64 1
+  %tmp25187 = getelementptr inbounds float* %tmp25186, i64 1
+  %tmp25188 = getelementptr inbounds float* %tmp25187, i64 1
+  %tmp25189 = getelementptr inbounds float* undef, i64 1
+  %tmp25190 = getelementptr inbounds float* undef, i64 1
+  %tmp25191 = getelementptr inbounds float* undef, i64 1
+  %tmp25192 = getelementptr inbounds float* %tmp25191, i64 1
+  %tmp25193 = getelementptr inbounds float* undef, i64 1
+  %tmp25194 = getelementptr inbounds float* undef, i64 1
+  %tmp25195 = getelementptr inbounds float* undef, i64 1
+  %tmp25196 = getelementptr inbounds float* undef, i64 1
+  %tmp25197 = getelementptr inbounds float* undef, i64 1
+  %tmp25198 = getelementptr inbounds float* undef, i64 1
+  %tmp25199 = getelementptr inbounds float* undef, i64 1
+  %tmp25200 = getelementptr inbounds float* undef, i64 1
+  %tmp25201 = getelementptr inbounds float* %tmp25200, i64 1
+  %tmp25202 = getelementptr inbounds float* undef, i64 1
+  %tmp25203 = getelementptr inbounds float* undef, i64 1
+  %tmp25204 = getelementptr inbounds float* undef, i64 1
+  %tmp25205 = getelementptr inbounds float* undef, i64 1
+  %tmp25206 = getelementptr inbounds float* undef, i64 1
+  %tmp25207 = getelementptr inbounds float* undef, i64 1
+  %tmp25208 = getelementptr inbounds float* undef, i64 1
+  %tmp25209 = getelementptr inbounds float* undef, i64 1
+  %tmp25210 = getelementptr inbounds float* undef, i64 1
+  %tmp25211 = getelementptr inbounds float* undef, i64 1
+  %tmp25212 = getelementptr inbounds float* undef, i64 1
+  %tmp25213 = getelementptr inbounds float* undef, i64 1
+  %tmp25214 = getelementptr inbounds float* undef, i64 1
+  %tmp25215 = getelementptr inbounds float* undef, i64 1
+  %tmp25216 = getelementptr inbounds float* undef, i64 1
+  %tmp25217 = getelementptr inbounds float* undef, i64 1
+  %tmp25218 = getelementptr inbounds float* undef, i64 1
+  %tmp25219 = getelementptr inbounds float* undef, i64 1
+  %tmp25220 = getelementptr inbounds float* undef, i64 1
+  %tmp25221 = getelementptr inbounds float* undef, i64 1
+  %tmp25222 = getelementptr inbounds float* undef, i64 1
+  %tmp25223 = getelementptr inbounds float* undef, i64 1
+  %tmp25224 = getelementptr inbounds float* undef, i64 1
+  %tmp25225 = getelementptr inbounds float* undef, i64 1
+  %tmp25226 = getelementptr inbounds float* undef, i64 1
+  %tmp25227 = getelementptr inbounds float* undef, i64 1
+  %tmp25228 = getelementptr inbounds float* undef, i64 1
+  %tmp25229 = getelementptr inbounds float* undef, i64 1
+  %tmp25230 = getelementptr inbounds float* %tmp25229, i64 1
+  %tmp25231 = getelementptr inbounds float* undef, i64 1
+  %tmp25232 = getelementptr inbounds float* undef, i64 1
+  %tmp25233 = getelementptr inbounds float* undef, i64 1
+  %tmp25234 = getelementptr inbounds float* undef, i64 1
+  %tmp25235 = getelementptr inbounds float* %tmp25234, i64 1
+  %tmp25236 = getelementptr inbounds float* undef, i64 1
+  %tmp25237 = getelementptr inbounds float* %tmp25236, i64 1
+  %tmp25238 = getelementptr inbounds float* undef, i64 1
+  %tmp25239 = getelementptr inbounds float* undef, i64 1
+  %tmp25240 = getelementptr inbounds float* undef, i64 1
+  %tmp25241 = getelementptr inbounds float* undef, i64 1
+  %tmp25242 = getelementptr inbounds float* undef, i64 1
+  %tmp25243 = getelementptr inbounds float* undef, i64 1
+  %tmp25244 = getelementptr inbounds float* undef, i64 1
+  %tmp25245 = getelementptr inbounds float* undef, i64 1
+  %tmp25246 = getelementptr inbounds float* undef, i64 1
+  %tmp25247 = getelementptr inbounds float* undef, i64 1
+  %tmp25248 = getelementptr inbounds float* %tmp25247, i64 1
+  %tmp25249 = getelementptr inbounds float* undef, i64 1
+  %tmp25250 = getelementptr inbounds float* undef, i64 1
+  %tmp25251 = getelementptr inbounds float* undef, i64 1
+  %tmp25252 = getelementptr inbounds float* undef, i64 1
+  %tmp25253 = getelementptr inbounds float* undef, i64 1
+  %tmp25254 = getelementptr inbounds float* undef, i64 1
+  %tmp25255 = getelementptr inbounds float* undef, i64 1
+  %tmp25256 = getelementptr inbounds float* undef, i64 1
+  %tmp25257 = getelementptr inbounds float* undef, i64 1
+  %tmp25258 = getelementptr inbounds float* undef, i64 1
+  %tmp25259 = getelementptr inbounds float* undef, i64 1
+  %tmp25260 = getelementptr inbounds float* undef, i64 1
+  %tmp25261 = getelementptr inbounds float* undef, i64 1
+  %tmp25262 = getelementptr inbounds float* undef, i64 1
+  %tmp25263 = getelementptr inbounds float* undef, i64 1
+  %tmp25264 = getelementptr inbounds float* undef, i64 1
+  %tmp25265 = getelementptr inbounds float* undef, i64 1
+  %tmp25266 = getelementptr inbounds float* undef, i64 1
+  %tmp25267 = getelementptr inbounds float* undef, i64 1
+  %tmp25268 = getelementptr inbounds float* undef, i64 1
+  %tmp25269 = getelementptr inbounds float* undef, i64 1
+  br i1 undef, label %bb25270, label %bb25271
+
+bb25270:                                          ; preds = %bb2
+  br label %bb25362
+
+bb25271:                                          ; preds = %bb2
+  br label %bb25272
+
+bb25272:                                          ; preds = %bb25275, %bb25271
+  br i1 false, label %bb25273, label %bb25278
+
+bb25273:                                          ; preds = %bb25272
+  invoke void @foo()
+          to label %bb25274 unwind label %bb25276
+
+bb25274:                                          ; preds = %bb25273
+  invoke void @bar()
+          to label %bb25275 unwind label %bb25276
+
+bb25275:                                          ; preds = %bb25274
+  br label %bb25272
+
+bb25276:                                          ; preds = %bb25283, %bb25274, %bb25273
+  %tmp25277 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  br label %bb25361
+
+bb25278:                                          ; preds = %bb25272
+  br label %bb25279
+
+bb25279:                                          ; preds = %bb25284, %bb25278
+  br i1 undef, label %bb25280, label %bb25285
+
+bb25280:                                          ; preds = %bb25279
+  br label %bb25281
+
+bb25281:                                          ; preds = %bb25282, %bb25280
+  br i1 undef, label %bb25282, label %bb25283
+
+bb25282:                                          ; preds = %bb25281
+  br label %bb25281
+
+bb25283:                                          ; preds = %bb25281
+  invoke void @bar()
+          to label %bb25284 unwind label %bb25276
+
+bb25284:                                          ; preds = %bb25283
+  br label %bb25279
+
+bb25285:                                          ; preds = %bb25279
+  br label %bb25286
+
+bb25286:                                          ; preds = %bb25303, %bb25285
+  br i1 undef, label %bb25287, label %bb25304
+
+bb25287:                                          ; preds = %bb25286
+  invoke void @bar()
+          to label %bb25288 unwind label %bb25298
+
+bb25288:                                          ; preds = %bb25287
+  br i1 undef, label %bb25289, label %bb25300
+
+bb25289:                                          ; preds = %bb25288
+  br i1 undef, label %bb25290, label %bb25300
+
+bb25290:                                          ; preds = %bb25289
+  invoke void @bar()
+          to label %bb25291 unwind label %bb25298
+
+bb25291:                                          ; preds = %bb25290
+  br i1 undef, label %bb25292, label %bb25295
+
+bb25292:                                          ; preds = %bb25291
+  br i1 undef, label %bb25294, label %bb25293
+
+bb25293:                                          ; preds = %bb25292
+  br label %bb25294
+
+bb25294:                                          ; preds = %bb25293, %bb25292
+  br label %bb25296
+
+bb25295:                                          ; preds = %bb25291
+  invoke void @quuuux()
+          to label %bb25296 unwind label %bb25298
+
+bb25296:                                          ; preds = %bb25295, %bb25294
+  invoke void @baz()
+          to label %bb25297 unwind label %bb25298
+
+bb25297:                                          ; preds = %bb25296
+  br label %bb25300
+
+bb25298:                                          ; preds = %bb25296, %bb25295, %bb25290, %bb25287
+  %tmp25299 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  br label %bb25360
+
+bb25300:                                          ; preds = %bb25297, %bb25289, %bb25288
+  br i1 undef, label %bb25301, label %bb25302
+
+bb25301:                                          ; preds = %bb25300
+  br label %bb25303
+
+bb25302:                                          ; preds = %bb25300
+  br label %bb25303
+
+bb25303:                                          ; preds = %bb25302, %bb25301
+  br label %bb25286
+
+bb25304:                                          ; preds = %bb25286
+  br label %bb25305
+
+bb25305:                                          ; preds = %bb25331, %bb25304
+  br i1 undef, label %bb25306, label %bb25332
+
+bb25306:                                          ; preds = %bb25305
+  invoke void @quuux()
+          to label %bb25307 unwind label %bb25324
+
+bb25307:                                          ; preds = %bb25306
+  invoke void @quux()
+          to label %bb25308 unwind label %bb25324
+
+bb25308:                                          ; preds = %bb25307
+  br i1 undef, label %bb25309, label %bb25330
+
+bb25309:                                          ; preds = %bb25308
+  br i1 undef, label %bb25310, label %bb25330
+
+bb25310:                                          ; preds = %bb25309
+  br i1 undef, label %bb25311, label %bb25317
+
+bb25311:                                          ; preds = %bb25310
+  br label %bb25312
+
+bb25312:                                          ; preds = %bb25316, %bb25315, %bb25311
+  br i1 undef, label %bb25313, label %bb25317
+
+bb25313:                                          ; preds = %bb25312
+  %tmp25314 = invoke zeroext i1 undef(%0* undef, %0* undef)
+          to label %bb25315 unwind label %bb25324
+
+bb25315:                                          ; preds = %bb25313
+  br i1 %tmp25314, label %bb25316, label %bb25312
+
+bb25316:                                          ; preds = %bb25315
+  br label %bb25312
+
+bb25317:                                          ; preds = %bb25312, %bb25310
+  br i1 undef, label %bb25318, label %bb25326
+
+bb25318:                                          ; preds = %bb25317
+  br i1 undef, label %bb25319, label %bb25326
+
+bb25319:                                          ; preds = %bb25318
+  br i1 undef, label %bb25320, label %bb25323
+
+bb25320:                                          ; preds = %bb25319
+  br i1 undef, label %bb25322, label %bb25321
+
+bb25321:                                          ; preds = %bb25320
+  br label %bb25322
+
+bb25322:                                          ; preds = %bb25321, %bb25320
+  br label %bb25326
+
+bb25323:                                          ; preds = %bb25319
+  invoke void @qux()
+          to label %bb25326 unwind label %bb25324
+
+bb25324:                                          ; preds = %bb25357, %bb25344, %bb25343, %bb25342, %bb25337, %bb25334, %bb25333, %bb25323, %bb25313, %bb25307, %bb25306
+  %tmp25325 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  br label %bb25359
+
+bb25326:                                          ; preds = %bb25323, %bb25322, %bb25318, %bb25317
+  br label %bb25327
+
+bb25327:                                          ; preds = %bb25328, %bb25326
+  br i1 undef, label %bb25328, label %bb25329
+
+bb25328:                                          ; preds = %bb25327
+  br label %bb25327
+
+bb25329:                                          ; preds = %bb25327
+  br label %bb25330
+
+bb25330:                                          ; preds = %bb25329, %bb25309, %bb25308
+  br i1 undef, label %bb25332, label %bb25331
+
+bb25331:                                          ; preds = %bb25330
+  br label %bb25305
+
+bb25332:                                          ; preds = %bb25330, %bb25305
+  br i1 undef, label %bb25333, label %bb25357
+
+bb25333:                                          ; preds = %bb25332
+  invoke void (...)* @printf()
+          to label %bb25334 unwind label %bb25324
+
+bb25334:                                          ; preds = %bb25333
+  invoke void (...)* @printf(i32 undef)
+          to label %bb25335 unwind label %bb25324
+
+bb25335:                                          ; preds = %bb25334
+  br label %bb25336
+
+bb25336:                                          ; preds = %bb25338, %bb25335
+  br i1 undef, label %bb25337, label %bb25339
+
+bb25337:                                          ; preds = %bb25336
+  invoke void (...)* @printf(i32 undef, double undef)
+          to label %bb25338 unwind label %bb25324
+
+bb25338:                                          ; preds = %bb25337
+  br label %bb25336
+
+bb25339:                                          ; preds = %bb25336
+  br label %bb25340
+
+bb25340:                                          ; preds = %bb25341, %bb25339
+  br i1 undef, label %bb25341, label %bb25342
+
+bb25341:                                          ; preds = %bb25340
+  br label %bb25340
+
+bb25342:                                          ; preds = %bb25340
+  invoke void (...)* @printf()
+          to label %bb25343 unwind label %bb25324
+
+bb25343:                                          ; preds = %bb25342
+  invoke void (...)* @printf(double undef, double undef)
+          to label %bb25344 unwind label %bb25324
+
+bb25344:                                          ; preds = %bb25343
+  invoke void @mux()
+          to label %bb25345 unwind label %bb25324
+
+bb25345:                                          ; preds = %bb25344
+  br label %bb25346
+
+bb25346:                                          ; preds = %bb25347, %bb25345
+  br i1 undef, label %bb25347, label %bb25348
+
+bb25347:                                          ; preds = %bb25346
+  br label %bb25346
+
+bb25348:                                          ; preds = %bb25346
+  br label %bb25349
+
+bb25349:                                          ; preds = %bb25350, %bb25348
+  br i1 undef, label %bb25350, label %bb25351
+
+bb25350:                                          ; preds = %bb25349
+  br label %bb25349
+
+bb25351:                                          ; preds = %bb25349
+  invoke void (...)* @printf()
+          to label %bb25352 unwind label %bb25355
+
+bb25352:                                          ; preds = %bb25351
+  invoke void (...)* @printf(double undef)
+          to label %bb25353 unwind label %bb25355
+
+bb25353:                                          ; preds = %bb25352
+  invoke void (...)* @printf()
+          to label %bb25354 unwind label %bb25355
+
+bb25354:                                          ; preds = %bb25353
+  br label %bb25358
+
+bb25355:                                          ; preds = %bb25353, %bb25352, %bb25351
+  %tmp25356 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  br label %bb25359
+
+bb25357:                                          ; preds = %bb25332
+  invoke void (...)* @printf()
+          to label %bb25358 unwind label %bb25324
+
+bb25358:                                          ; preds = %bb25357, %bb25354
+  br label %bb25362
+
+bb25359:                                          ; preds = %bb25355, %bb25324
+  br label %bb25360
+
+bb25360:                                          ; preds = %bb25359, %bb25298
+  br label %bb25361
+
+bb25361:                                          ; preds = %bb25360, %bb25276
+  resume { i8*, i32 } undef
+
+bb25362:                                          ; preds = %bb25358, %bb25270, %bb1
+  ret void
+}
+
+declare void @foo()
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @bar() uwtable ssp align 2
+
+declare hidden void @baz() uwtable ssp align 2
+
+declare void @printf(...)
+
+declare void @mux() unnamed_addr uwtable ssp align 2
+
+declare hidden void @qux() uwtable ssp align 2
+
+declare void @quux() uwtable ssp
+
+declare void @quuux() uwtable ssp
+
+declare hidden void @quuuux() uwtable ssp align 2
diff --git a/test/CodeGen/X86/lea-recursion.ll b/test/CodeGen/X86/lea-recursion.ll
index 3f32fd2..9480600 100644
--- a/test/CodeGen/X86/lea-recursion.ll
+++ b/test/CodeGen/X86/lea-recursion.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep lea | count 12
+; RUN: llc < %s -march=x86-64 | grep lea | count 13
 
 ; This testcase was written to demonstrate an instruction-selection problem,
 ; however it also happens to expose a limitation in the DAGCombiner's
@@ -44,4 +44,3 @@ entry:
 	store i32 %tmp10.6, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 7)
 	ret void
 }
-
diff --git a/test/CodeGen/X86/lea.ll b/test/CodeGen/X86/lea.ll
index affd6bf..93cfe46 100644
--- a/test/CodeGen/X86/lea.ll
+++ b/test/CodeGen/X86/lea.ll
@@ -28,8 +28,7 @@ bb.nph:
 bb2:
 	ret i32 %x_offs
 ; CHECK-LABEL: test2:
-; CHECK: movl %e[[A0]], %eax
-; CHECK: addl $-5, %eax
+; CHECK:        leal    -5(%r[[A0:..]]), %eax
 ; CHECK:	andl	$-4, %eax
 ; CHECK:	negl	%eax
 ; CHECK:	leal	-4(%r[[A0]],%rax), %eax
diff --git a/test/CodeGen/X86/leaf-fp-elim.ll b/test/CodeGen/X86/leaf-fp-elim.ll
index 7eebf8d..1bb3c75 100644
--- a/test/CodeGen/X86/leaf-fp-elim.ll
+++ b/test/CodeGen/X86/leaf-fp-elim.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-darwin11.0"
 @msg = internal global i8* null                   ; <i8**> [#uses=1]
 @.str = private constant [2 x i8] c"x\00", align 1 ; <[2 x i8]*> [#uses=1]
 
-define void @test(i8* %p) "no-frame-pointer-elim-non-leaf"="true" nounwind optsize ssp {
+define void @test(i8* %p) "no-frame-pointer-elim-non-leaf" nounwind optsize ssp {
 
 ; No stack frame, please.
 ; CHECK:     _test
diff --git a/test/CodeGen/X86/legalize-shift-64.ll b/test/CodeGen/X86/legalize-shift-64.ll
index 7736468..64460bb 100644
--- a/test/CodeGen/X86/legalize-shift-64.ll
+++ b/test/CodeGen/X86/legalize-shift-64.ll
@@ -64,3 +64,31 @@ define <2 x i64> @test5(<2 x i64> %A, <2 x i64> %B) {
 ; CHECK: shl
 ; CHECK: shldl
 }
+
+; PR16108
+define i32 @test6() {
+  %x = alloca i32, align 4
+  %t = alloca i64, align 8
+  store i32 1, i32* %x, align 4
+  store i64 1, i64* %t, align 8  ;; DEAD
+  %load = load i32* %x, align 4
+  %shl = shl i32 %load, 8
+  %add = add i32 %shl, -224
+  %sh_prom = zext i32 %add to i64
+  %shl1 = shl i64 1, %sh_prom
+  %cmp = icmp ne i64 %shl1, 4294967296
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret i32 1
+
+if.end:                                           ; preds = %entry
+  ret i32 0
+
+; CHECK-LABEL: test6:
+; CHECK-NOT: andb $31
+; CHECK: sete
+; CHECK: movzbl
+; CHECK: xorl $1
+; CHECK: orl
+}
diff --git a/test/CodeGen/X86/lit.local.cfg b/test/CodeGen/X86/lit.local.cfg
index 9d285bf..1637fa4 100644
--- a/test/CodeGen/X86/lit.local.cfg
+++ b/test/CodeGen/X86/lit.local.cfg
@@ -1,4 +1,10 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.test']
+# FIXME: For now, override suffixes to exclude any .s tests, because some of the
+# buildbots have a stray misched-copy.s output file lying around that causes
+# failures. See misched-copy.s where we try and clean up that file.
+#
+# It should be possible to remove this override once all the bots have cycled
+# cleanly.
+config.suffixes = ['.ll', '.c', '.cpp', '.test', '.txt']
 
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
diff --git a/test/CodeGen/X86/load-slice.ll b/test/CodeGen/X86/load-slice.ll
new file mode 100644
index 0000000..85fd7f0
--- /dev/null
+++ b/test/CodeGen/X86/load-slice.ll
@@ -0,0 +1,139 @@
+; RUN: llc -mtriple x86_64-apple-macosx -mcpu=corei7-avx -combiner-stress-load-slicing < %s -o - | FileCheck %s --check-prefix=STRESS
+; RUN: llc -mtriple x86_64-apple-macosx -mcpu=corei7-avx < %s -o - | FileCheck %s --check-prefix=REGULAR
+;
+; <rdar://problem/14477220>
+
+%class.Complex = type { float, float }
+
+
+; Check that independant slices leads to independant loads then the slices leads to
+; different register file.
+;
+; The layout is:
+; LSB 0 1 2 3 | 4 5 6 7 MSB
+;       Low      High
+; The base address points to 0 and is 8-bytes aligned.
+; Low slice starts at 0 (base) and is 8-bytes aligned.
+; High slice starts at 4 (base + 4-bytes) and is 4-bytes aligned.
+;
+; STRESS-LABEL: t1:
+; Load out[out_start + 8].real, this is base + 8 * 8 + 0.
+; STRESS: vmovss 64([[BASE:[^(]+]]), [[OUT_Real:%xmm[0-9]+]]
+; Add low slice: out[out_start].real, this is base + 0.
+; STRESS-NEXT: vaddss ([[BASE]]), [[OUT_Real]], [[RES_Real:%xmm[0-9]+]]
+; Load out[out_start + 8].imm, this is base + 8 * 8 + 4.
+; STRESS-NEXT: vmovss 68([[BASE]]), [[OUT_Imm:%xmm[0-9]+]]
+; Add high slice: out[out_start].imm, this is base + 4.
+; STRESS-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]]
+; Swap Imm and Real.
+; STRESS-NEXT: vinsertps $16, [[RES_Imm]], [[RES_Real]], [[RES_Vec:%xmm[0-9]+]]
+; Put the results back into out[out_start].
+; STRESS-NEXT: vmovq [[RES_Vec]], ([[BASE]])
+;
+; Same for REGULAR, we eliminate register bank copy with each slices.
+; REGULAR-LABEL: t1:
+; Load out[out_start + 8].real, this is base + 8 * 8 + 0.
+; REGULAR: vmovss 64([[BASE:[^)]+]]), [[OUT_Real:%xmm[0-9]+]]
+; Add low slice: out[out_start].real, this is base + 0.
+; REGULAR-NEXT: vaddss ([[BASE]]), [[OUT_Real]], [[RES_Real:%xmm[0-9]+]]
+; Load out[out_start + 8].imm, this is base + 8 * 8 + 4.
+; REGULAR-NEXT: vmovss 68([[BASE]]), [[OUT_Imm:%xmm[0-9]+]]
+; Add high slice: out[out_start].imm, this is base + 4.
+; REGULAR-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]]
+; Swap Imm and Real.
+; REGULAR-NEXT: vinsertps $16, [[RES_Imm]], [[RES_Real]], [[RES_Vec:%xmm[0-9]+]]
+; Put the results back into out[out_start].
+; REGULAR-NEXT: vmovq [[RES_Vec]], ([[BASE]])
+define void @t1(%class.Complex* nocapture %out, i64 %out_start) {
+entry:
+  %arrayidx = getelementptr inbounds %class.Complex* %out, i64 %out_start
+  %tmp = bitcast %class.Complex* %arrayidx to i64*
+  %tmp1 = load i64* %tmp, align 8
+  %t0.sroa.0.0.extract.trunc = trunc i64 %tmp1 to i32
+  %tmp2 = bitcast i32 %t0.sroa.0.0.extract.trunc to float
+  %t0.sroa.2.0.extract.shift = lshr i64 %tmp1, 32
+  %t0.sroa.2.0.extract.trunc = trunc i64 %t0.sroa.2.0.extract.shift to i32
+  %tmp3 = bitcast i32 %t0.sroa.2.0.extract.trunc to float
+  %add = add i64 %out_start, 8
+  %arrayidx2 = getelementptr inbounds %class.Complex* %out, i64 %add
+  %i.i = getelementptr inbounds %class.Complex* %arrayidx2, i64 0, i32 0
+  %tmp4 = load float* %i.i, align 4
+  %add.i = fadd float %tmp4, %tmp2
+  %retval.sroa.0.0.vec.insert.i = insertelement <2 x float> undef, float %add.i, i32 0
+  %r.i = getelementptr inbounds %class.Complex* %arrayidx2, i64 0, i32 1
+  %tmp5 = load float* %r.i, align 4
+  %add5.i = fadd float %tmp5, %tmp3
+  %retval.sroa.0.4.vec.insert.i = insertelement <2 x float> %retval.sroa.0.0.vec.insert.i, float %add5.i, i32 1
+  %ref.tmp.sroa.0.0.cast = bitcast %class.Complex* %arrayidx to <2 x float>*
+  store <2 x float> %retval.sroa.0.4.vec.insert.i, <2 x float>* %ref.tmp.sroa.0.0.cast, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+
+; Check that we do not read outside of the chunk of bits of the original loads.
+;
+; The 64-bits should have been split in one 32-bits and one 16-bits slices.
+; The 16-bits should be zero extended to match the final type.
+;
+; The memory layout is:
+; LSB 0 1 2 3 | 4 5 | 6 7 MSB
+;      Low            High
+; The base address points to 0 and is 8-bytes aligned.
+; Low slice starts at 0 (base) and is 8-bytes aligned.
+; High slice starts at 6 (base + 6-bytes) and is 2-bytes aligned.
+;
+; STRESS-LABEL: t2:
+; STRESS: movzwl 6([[BASE:[^)]+]]), %eax
+; STRESS-NEXT: addl ([[BASE]]), %eax
+; STRESS-NEXT: ret
+;
+; For the REGULAR heuristic, this is not profitable to slice things that are not
+; next to each other in memory. Here we have a hole with bytes #4-5.
+; REGULAR-LABEL: t2:
+; REGULAR: shrq $48
+define i32 @t2(%class.Complex* nocapture %out, i64 %out_start) {
+  %arrayidx = getelementptr inbounds %class.Complex* %out, i64 %out_start
+  %bitcast = bitcast %class.Complex* %arrayidx to i64*
+  %chunk64 = load i64* %bitcast, align 8
+  %slice32_low = trunc i64 %chunk64 to i32
+  %shift48 = lshr i64 %chunk64, 48
+  %slice32_high = trunc i64 %shift48 to i32
+  %res = add i32 %slice32_high, %slice32_low
+  ret i32 %res
+}
+
+; Check that we do not optimize overlapping slices.
+;
+; The 64-bits should NOT have been split in as slices are overlapping.
+; First slice uses bytes numbered 0 to 3.
+; Second slice uses bytes numbered 6 and 7.
+; Third slice uses bytes numbered 4 to 7.
+;
+; STRESS-LABEL: t3:
+; STRESS: shrq $48
+; STRESS: shrq $32
+;
+; REGULAR-LABEL: t3:
+; REGULAR: shrq $48
+; REGULAR: shrq $32
+define i32 @t3(%class.Complex* nocapture %out, i64 %out_start) {
+  %arrayidx = getelementptr inbounds %class.Complex* %out, i64 %out_start
+  %bitcast = bitcast %class.Complex* %arrayidx to i64*
+  %chunk64 = load i64* %bitcast, align 8
+  %slice32_low = trunc i64 %chunk64 to i32
+  %shift48 = lshr i64 %chunk64, 48
+  %slice32_high = trunc i64 %shift48 to i32
+  %shift32 = lshr i64 %chunk64, 32
+  %slice32_lowhigh = trunc i64 %shift32 to i32
+  %tmpres = add i32 %slice32_high, %slice32_low
+  %res = add i32 %slice32_lowhigh, %tmpres
+  ret i32 %res
+}
diff --git a/test/CodeGen/X86/long-extend.ll b/test/CodeGen/X86/long-extend.ll
new file mode 100644
index 0000000..5bbd41d
--- /dev/null
+++ b/test/CodeGen/X86/long-extend.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mcpu=core-avx-i -mtriple=x86_64-linux -asm-verbose=0| FileCheck %s
+define void @test_long_extend(<16 x i8> %a, <16 x i32>* %p) nounwind {
+; CHECK-LABEL: test_long_extend
+; CHECK: vpunpcklbw	%xmm1, %xmm0, [[REG1:%xmm[0-9]+]]
+; CHECK: vpunpckhwd	%xmm1, [[REG1]], [[REG2:%xmm[0-9]+]]
+; CHECK: vpunpcklwd	%xmm1, [[REG1]], %x[[REG3:mm[0-9]+]]
+; CHECK: vinsertf128	$1, [[REG2]], %y[[REG3]], [[REG_result0:%ymm[0-9]+]]
+; CHECK: vpunpckhbw	%xmm1, %xmm0, [[REG4:%xmm[0-9]+]]
+; CHECK: vpunpckhwd	%xmm1, [[REG4]], [[REG5:%xmm[0-9]+]]
+; CHECK: vpunpcklwd	%xmm1, [[REG4]], %x[[REG6:mm[0-9]+]]
+; CHECK: vinsertf128	$1, [[REG5]], %y[[REG6]], [[REG_result1:%ymm[0-9]+]]
+; CHECK: vmovaps	[[REG_result1]], 32(%rdi)
+; CHECK: vmovaps	[[REG_result0]], (%rdi)
+
+  %tmp = zext <16 x i8> %a to <16 x i32>
+  store <16 x i32> %tmp, <16 x i32>*%p
+  ret void
+}
diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll
index c7a3186..e7d74a9 100644
--- a/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -2,12 +2,12 @@
 ; RUN: llc -mtriple=x86_64-darwin -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s
 
 ; CHECK-LABEL: t:
-; CHECK: decq
-; CHECK-NEXT: movl (%r9,%rax,4), %eax
+; CHECK: movl (%r9,%rax,4), %e{{..}}
+; CHECK-NEXT: decq
 ; CHECK-NEXT: jne
 
 ; ATOM-LABEL: t:
-; ATOM: movl (%r9,%r{{.+}},4), %eax
+; ATOM: movl (%r9,%r{{.+}},4), %e{{..}}
 ; ATOM-NEXT: decq
 ; ATOM-NEXT: jne
 
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index c33cac2..4a4d178 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -1,15 +1,13 @@
-; RUN: llc < %s -mcpu=generic -march=x86-64 > %t
-; RUN: not grep and %t
-; RUN: not grep movz %t
-; RUN: not grep sar %t
-; RUN: not grep shl %t
-; RUN: grep add %t | count 5
-; RUN: grep inc %t | count 2
-; RUN: grep lea %t | count 3
+; RUN: llc < %s -mcpu=generic -march=x86-64 | FileCheck %s
 
 ; Optimize away zext-inreg and sext-inreg on the loop induction
 ; variable using trip-count information.
 
+; CHECK-LABEL: count_up
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: inc
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: jne
 define void @count_up(double* %d, i64 %n) nounwind {
 entry:
 	br label %loop
@@ -38,6 +36,11 @@ return:
 	ret void
 }
 
+; CHECK-LABEL: count_down
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: addq
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: jne
 define void @count_down(double* %d, i64 %n) nounwind {
 entry:
 	br label %loop
@@ -66,6 +69,11 @@ return:
 	ret void
 }
 
+; CHECK-LABEL: count_up_signed
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: inc
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: jne
 define void @count_up_signed(double* %d, i64 %n) nounwind {
 entry:
 	br label %loop
@@ -96,6 +104,11 @@ return:
 	ret void
 }
 
+; CHECK-LABEL: count_down_signed
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: addq
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: jne
 define void @count_down_signed(double* %d, i64 %n) nounwind {
 entry:
 	br label %loop
@@ -126,6 +139,11 @@ return:
 	ret void
 }
 
+; CHECK-LABEL: another_count_up
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: addq
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: jne
 define void @another_count_up(double* %d, i64 %n) nounwind {
 entry:
 	br label %loop
@@ -154,6 +172,11 @@ return:
 	ret void
 }
 
+; CHECK-LABEL: another_count_down
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: decq
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: jne
 define void @another_count_down(double* %d, i64 %n) nounwind {
 entry:
 	br label %loop
@@ -182,6 +205,11 @@ return:
 	ret void
 }
 
+; CHECK-LABEL: another_count_up_signed
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: addq
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: jne
 define void @another_count_up_signed(double* %d, i64 %n) nounwind {
 entry:
 	br label %loop
@@ -212,6 +240,11 @@ return:
 	ret void
 }
 
+; CHECK-LABEL: another_count_down_signed
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: decq
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: jne
 define void @another_count_down_signed(double* %d, i64 %n) nounwind {
 entry:
 	br label %loop
diff --git a/test/CodeGen/X86/maskmovdqu.ll b/test/CodeGen/X86/maskmovdqu.ll
index 7796f0e..0b3334d 100644
--- a/test/CodeGen/X86/maskmovdqu.ll
+++ b/test/CodeGen/X86/maskmovdqu.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=x86    -mattr=+sse2 | grep -i EDI
-; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep -i RDI
+; RUN: llc < %s -march=x86    -mattr=+sse2,-avx | grep -i EDI
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-avx | grep -i RDI
+; RUN: llc < %s -march=x86    -mattr=+avx | grep -i EDI
+; RUN: llc < %s -march=x86-64 -mattr=+avx | grep -i RDI
 ; rdar://6573467
 
 define void @test(<16 x i8> %a, <16 x i8> %b, i32 %dummy, i8* %c) nounwind {
diff --git a/test/CodeGen/X86/mcinst-avx-lowering.ll b/test/CodeGen/X86/mcinst-avx-lowering.ll
index 41f96e8..db72e08 100644
--- a/test/CodeGen/X86/mcinst-avx-lowering.ll
+++ b/test/CodeGen/X86/mcinst-avx-lowering.ll
@@ -4,7 +4,7 @@ define i64 @t1(double %d_ivar) nounwind uwtable ssp {
 entry:
 ; CHECK: t1
   %0 = bitcast double %d_ivar to i64
-; CHECK: vmovd
+; CHECK: vmovq
 ; CHECK: encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
   ret i64 %0
 }
@@ -13,7 +13,7 @@ define double @t2(i64 %d_ivar) nounwind uwtable ssp {
 entry:
 ; CHECK: t2
   %0 = bitcast i64 %d_ivar to double
-; CHECK: vmovd
+; CHECK: vmovq
 ; CHECK: encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
   ret double %0
 }
diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll
index c17cc7f..6ae7807 100644
--- a/test/CodeGen/X86/memcpy-2.ll
+++ b/test/CodeGen/X86/memcpy-2.ll
@@ -56,15 +56,15 @@ entry:
 define void @t2(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
 entry:
 ; SSE2-Darwin-LABEL: t2:
-; SSE2-Darwin: movaps (%eax), %xmm0
+; SSE2-Darwin: movaps (%ecx), %xmm0
 ; SSE2-Darwin: movaps %xmm0, (%eax)
 
 ; SSE2-Mingw32-LABEL: t2:
-; SSE2-Mingw32: movaps (%eax), %xmm0
+; SSE2-Mingw32: movaps (%ecx), %xmm0
 ; SSE2-Mingw32: movaps %xmm0, (%eax)
 
 ; SSE1-LABEL: t2:
-; SSE1: movaps (%eax), %xmm0
+; SSE1: movaps (%ecx), %xmm0
 ; SSE1: movaps %xmm0, (%eax)
 
 ; NOSSE-LABEL: t2:
@@ -91,14 +91,14 @@ entry:
 define void @t3(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
 entry:
 ; SSE2-Darwin-LABEL: t3:
-; SSE2-Darwin: movsd (%eax), %xmm0
-; SSE2-Darwin: movsd 8(%eax), %xmm1
+; SSE2-Darwin: movsd (%ecx), %xmm0
+; SSE2-Darwin: movsd 8(%ecx), %xmm1
 ; SSE2-Darwin: movsd %xmm1, 8(%eax)
 ; SSE2-Darwin: movsd %xmm0, (%eax)
 
 ; SSE2-Mingw32-LABEL: t3:
-; SSE2-Mingw32: movsd (%eax), %xmm0
-; SSE2-Mingw32: movsd 8(%eax), %xmm1
+; SSE2-Mingw32: movsd (%ecx), %xmm0
+; SSE2-Mingw32: movsd 8(%ecx), %xmm1
 ; SSE2-Mingw32: movsd %xmm1, 8(%eax)
 ; SSE2-Mingw32: movsd %xmm0, (%eax)
 
diff --git a/test/CodeGen/X86/merge_store.ll b/test/CodeGen/X86/merge_store.ll
new file mode 100644
index 0000000..940688c
--- /dev/null
+++ b/test/CodeGen/X86/merge_store.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s
+
+define void @merge_store(i32* nocapture %a) {
+; CHECK-LABEL: merge_store:
+; CHECK: movq
+; CHECK: movq
+entry:
+  br label %for.body
+
+  for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  store i32 1, i32* %arrayidx, align 4
+  %0 = or i64 %indvars.iv, 1
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %0
+  store i32 1, i32* %arrayidx2, align 4
+  %1 = or i64 %indvars.iv, 2
+  %arrayidx5 = getelementptr inbounds i32* %a, i64 %1
+  store i32 1, i32* %arrayidx5, align 4
+  %2 = or i64 %indvars.iv, 3
+  %arrayidx8 = getelementptr inbounds i32* %a, i64 %2
+  store i32 1, i32* %arrayidx8, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
+  %3 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %3, 1000
+  br i1 %cmp, label %for.body, label %for.end
+
+  for.end:
+  ret void
+}
diff --git a/test/CodeGen/X86/mingw-alloca.ll b/test/CodeGen/X86/mingw-alloca.ll
index ded4b73..72b6940 100644
--- a/test/CodeGen/X86/mingw-alloca.ll
+++ b/test/CodeGen/X86/mingw-alloca.ll
@@ -1,12 +1,14 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -mtriple=i386-pc-mingw32      | FileCheck %s -check-prefix=COFF
+; RUN: llc < %s -mtriple=i386-pc-mingw32-elf  | FileCheck %s -check-prefix=ELF
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "i386-pc-mingw32"
 
 define void @foo1(i32 %N) nounwind {
 entry:
-; CHECK: _foo1:
-; CHECK: calll __alloca
+; COFF: _foo1:
+; COFF: calll __alloca
+; ELF: foo1:
+; ELF: calll _alloca
 	%tmp14 = alloca i32, i32 %N		; <i32*> [#uses=1]
 	call void @bar1( i32* %tmp14 )
 	ret void
@@ -16,11 +18,16 @@ declare void @bar1(i32*)
 
 define void @foo2(i32 inreg  %N) nounwind {
 entry:
-; CHECK: _foo2:
-; CHECK: andl $-16, %esp
-; CHECK: pushl %eax
-; CHECK: calll __alloca
-; CHECK: movl	8028(%esp), %eax
+; COFF: _foo2:
+; COFF: andl $-16, %esp
+; COFF: pushl %eax
+; COFF: calll __alloca
+; COFF: movl	8028(%esp), %eax
+; ELF: foo2:
+; ELF: andl $-16, %esp
+; ELF: pushl %eax
+; ELF: calll _alloca
+; ELF: movl	8028(%esp), %eax
 	%A2 = alloca [2000 x i32], align 16		; <[2000 x i32]*> [#uses=1]
 	%A2.sub = getelementptr [2000 x i32]* %A2, i32 0, i32 0		; <i32*> [#uses=1]
 	call void @bar2( i32* %A2.sub, i32 %N )
diff --git a/test/CodeGen/X86/misched-balance.ll b/test/CodeGen/X86/misched-balance.ll
index 5f6c501..1900802 100644
--- a/test/CodeGen/X86/misched-balance.ll
+++ b/test/CodeGen/X86/misched-balance.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 -pre-RA-sched=source -enable-misched -verify-machineinstrs | FileCheck %s
 ;
 ; Verify that misched resource/latency balancy heuristics are sane.
 
@@ -15,7 +15,7 @@ entry:
 ; Since mmult1 IR is already in good order, this effectively ensure
 ; the scheduler maintains source order.
 ;
-; CHECK: %for.body
+; CHECK-LABEL: %for.body
 ; CHECK-NOT: %rsp
 ; CHECK: imull 4
 ; CHECK-NOT: {{imull|rsp}}
@@ -45,7 +45,7 @@ entry:
 ; CHECK-NOT: {{imull|rsp}}
 ; CHECK: addl
 ; CHECK-NOT: {{imull|rsp}}
-; CHECK: %end
+; CHECK-LABEL: %end
 for.body:
   %indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ]
   %tmp57 = load i32* %tmp56, align 4
@@ -120,7 +120,7 @@ end:
 ; Unlike the above loop, this IR starts out bad and must be
 ; rescheduled.
 ;
-; CHECK: %for.body
+; CHECK-LABEL: %for.body
 ; CHECK-NOT: %rsp
 ; CHECK: imull 4
 ; CHECK-NOT: {{imull|rsp}}
@@ -150,7 +150,7 @@ end:
 ; CHECK-NOT: {{imull|rsp}}
 ; CHECK: addl
 ; CHECK-NOT: {{imull|rsp}}
-; CHECK: %end
+; CHECK-LABEL: %end
 define void @unrolled_mmult2(i32* %tmp55, i32* %tmp56, i32* %pre, i32* %pre94,
   i32* %pre95, i32* %pre96, i32* %pre97, i32* %pre98, i32* %pre99,
   i32* %pre100, i32* %pre101, i32* %pre102, i32* %pre103, i32* %pre104)
@@ -232,8 +232,8 @@ end:
 ; balanced heuristics are interesting here because we have resource,
 ; latency, and register limits all at once. For now, simply check that
 ; we don't use any callee-saves.
-; CHECK: @encpc1
-; CHECK: %entry
+; CHECK-LABEL: @encpc1
+; CHECK-LABEL: %entry
 ; CHECK-NOT: push
 ; CHECK-NOT: pop
 ; CHECK: ret
diff --git a/test/CodeGen/X86/misched-copy.ll b/test/CodeGen/X86/misched-copy.ll
index 0450cfb..4485b8a 100644
--- a/test/CodeGen/X86/misched-copy.ll
+++ b/test/CodeGen/X86/misched-copy.ll
@@ -8,11 +8,11 @@
 ; MUL_HiLo PhysReg use copies should be just above the mul.
 ; MUL_HiLo PhysReg def copies should be just below the mul.
 ;
-; CHECK:      *** Final schedule for BB#1 ***
-; CHECK-NEXT: %EAX<def> = COPY
-; CHECK:      MUL32r %vreg{{[0-9]+}}, %EAX<imp-def>, %EDX<imp-def>, %EFLAGS<imp-def,dead>, %EAX<imp-use>;
-; CHECK-NEXT: COPY %E{{[AD]}}X;
-; CHECK-NEXT: COPY %E{{[AD]}}X;
+; CHECK: *** Final schedule for BB#1 ***
+; CHECK:      %EAX<def> = COPY
+; CHECK-NEXT: MUL32r %vreg{{[0-9]+}}, %EAX<imp-def>, %EDX<imp-def>, %EFLAGS<imp-def,dead>, %EAX<imp-use>;
+; CHECK-NEXT: COPY %E{{[AD]}}X
+; CHECK-NEXT: COPY %E{{[AD]}}X
 ; CHECK:      DIVSSrm
 define i64 @mulhoist(i32 %a, i32 %b) #0 {
 entry:
@@ -42,7 +42,7 @@ end:
   ret i64 %add
 }
 
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 !0 = metadata !{metadata !"float", metadata !1}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
diff --git a/test/CodeGen/X86/misched-matmul.ll b/test/CodeGen/X86/misched-matmul.ll
index 6b67607..5454b7c 100644
--- a/test/CodeGen/X86/misched-matmul.ll
+++ b/test/CodeGen/X86/misched-matmul.ll
@@ -3,11 +3,14 @@
 ;
 ; Verify that register pressure heuristics are working in MachineScheduler.
 ;
-; When we enable subtree scheduling heuristics on X86, we may need a
-; flag to disable it for this test case.
+; We can further reduce spills in this case with a global register
+; pressure heuristic, like sethi-ullman numbers or biasing toward
+; scheduled subtrees. However, these heuristics are marginally
+; beneficial on x86_64 and exacerbate register pressure in other
+; more complex cases.
 ;
 ; CHECK: @wrap_mul4
-; CHECK: 22 regalloc - Number of spills inserted
+; CHECK: 23 regalloc - Number of spills inserted
 
 define void @wrap_mul4(double* nocapture %Out, [4 x double]* nocapture %A, [4 x double]* nocapture %B) #0 {
 entry:
@@ -221,4 +224,4 @@ entry:
   ret void
 }
 
-attributes #0 = { noinline nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/X86/misched-matrix.ll b/test/CodeGen/X86/misched-matrix.ll
index 4dc95c5..23b561f 100644
--- a/test/CodeGen/X86/misched-matrix.ll
+++ b/test/CodeGen/X86/misched-matrix.ll
@@ -15,19 +15,19 @@
 ; been reordered with the stores. This tests the scheduler's cheap
 ; alias analysis ability (that doesn't require any AliasAnalysis pass).
 ;
-; TOPDOWN: %for.body
+; TOPDOWN-LABEL: %for.body
 ; TOPDOWN: movl %{{.*}}, (
 ; TOPDOWN: imull {{[0-9]*}}(
 ; TOPDOWN: movl %{{.*}}, 4(
 ; TOPDOWN: imull {{[0-9]*}}(
 ; TOPDOWN: movl %{{.*}}, 8(
 ; TOPDOWN: movl %{{.*}}, 12(
-; TOPDOWN: %for.end
+; TOPDOWN-LABEL: %for.end
 ;
 ; For -misched=ilpmin, verify that each expression subtree is
 ; scheduled independently, and that the imull/adds are interleaved.
 ;
-; ILPMIN: %for.body
+; ILPMIN-LABEL: %for.body
 ; ILPMIN: movl %{{.*}}, (
 ; ILPMIN: imull
 ; ILPMIN: imull
@@ -53,12 +53,12 @@
 ; ILPMIN: imull
 ; ILPMIN: addl
 ; ILPMIN: movl %{{.*}}, 12(
-; ILPMIN: %for.end
+; ILPMIN-LABEL: %for.end
 ;
 ; For -misched=ilpmax, verify that each expression subtree is
 ; scheduled independently, and that the imull/adds are clustered.
 ;
-; ILPMAX: %for.body
+; ILPMAX-LABEL: %for.body
 ; ILPMAX: movl %{{.*}}, (
 ; ILPMAX: imull
 ; ILPMAX: imull
@@ -84,7 +84,7 @@
 ; ILPMAX: addl
 ; ILPMAX: addl
 ; ILPMAX: movl %{{.*}}, 12(
-; ILPMAX: %for.end
+; ILPMAX-LABEL: %for.end
 
 define void @mmult([4 x i32]* noalias nocapture %m1, [4 x i32]* noalias nocapture %m2,
 [4 x i32]* noalias nocapture %m3) nounwind uwtable ssp {
diff --git a/test/CodeGen/X86/mmx-builtins.ll b/test/CodeGen/X86/mmx-builtins.ll
index f5b3f76..aabdd53 100644
--- a/test/CodeGen/X86/mmx-builtins.ll
+++ b/test/CodeGen/X86/mmx-builtins.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3,-avx | FileCheck %s
 ; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+ssse3,-avx | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s
 
 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
 
diff --git a/test/CodeGen/X86/mmx-punpckhdq.ll b/test/CodeGen/X86/mmx-punpckhdq.ll
index 206cb33..9e8f5bf 100644
--- a/test/CodeGen/X86/mmx-punpckhdq.ll
+++ b/test/CodeGen/X86/mmx-punpckhdq.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx,+sse42 -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse4.2 -mtriple=x86_64-apple-darwin10 | FileCheck %s
 ; There are no MMX operations in bork; promoted to XMM.
 
 define void @bork(<1 x i64>* %x) {
diff --git a/test/CodeGen/X86/movbe.ll b/test/CodeGen/X86/movbe.ll
index aa58c10..3f459be 100644
--- a/test/CodeGen/X86/movbe.ll
+++ b/test/CodeGen/X86/movbe.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=x86_64-linux -mcpu=atom < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-linux -mcpu=slm < %s | FileCheck %s -check-prefix=SLM
 
 declare i32 @llvm.bswap.i32(i32) nounwind readnone
 declare i64 @llvm.bswap.i64(i64) nounwind readnone
@@ -9,6 +10,8 @@ define void @test1(i32* nocapture %x, i32 %y) nounwind {
   ret void
 ; CHECK-LABEL: test1:
 ; CHECK: movbel	%esi, (%rdi)
+; SLM-LABEL: test1:
+; SLM: movbel	%esi, (%rdi)
 }
 
 define i32 @test2(i32* %x) nounwind {
@@ -17,6 +20,8 @@ define i32 @test2(i32* %x) nounwind {
   ret i32 %bswap
 ; CHECK-LABEL: test2:
 ; CHECK: movbel	(%rdi), %eax
+; SLM-LABEL: test2:
+; SLM: movbel	(%rdi), %eax
 }
 
 define void @test3(i64* %x, i64 %y) nounwind {
@@ -25,6 +30,8 @@ define void @test3(i64* %x, i64 %y) nounwind {
   ret void
 ; CHECK-LABEL: test3:
 ; CHECK: movbeq	%rsi, (%rdi)
+; SLM-LABEL: test3:
+; SLM: movbeq	%rsi, (%rdi)
 }
 
 define i64 @test4(i64* %x) nounwind {
@@ -33,4 +40,6 @@ define i64 @test4(i64* %x) nounwind {
   ret i64 %bswap
 ; CHECK-LABEL: test4:
 ; CHECK: movbeq	(%rdi), %rax
+; SLM-LABEL: test4:
+; SLM: movbeq	(%rdi), %rax
 }
diff --git a/test/CodeGen/X86/movgs.ll b/test/CodeGen/X86/movgs.ll
index d3930fa..71b0723 100644
--- a/test/CodeGen/X86/movgs.ll
+++ b/test/CodeGen/X86/movgs.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=sse4.1 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -mattr=sse4.1 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -mattr=sse4.1 | FileCheck %s --check-prefix=X64
 
 define i32 @test1() nounwind readonly {
 entry:
diff --git a/test/CodeGen/X86/neg_fp.ll b/test/CodeGen/X86/neg_fp.ll
index 57164f2..efb02f8 100644
--- a/test/CodeGen/X86/neg_fp.ll
+++ b/test/CodeGen/X86/neg_fp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
+; RUN: llc < %s -march=x86 -mattr=+sse4.1 -o %t
 ; RUN: grep xorps %t | count 1
 
 ; Test that when we don't -enable-unsafe-fp-math, we don't do the optimization
diff --git a/test/CodeGen/X86/newline-and-quote.ll b/test/CodeGen/X86/newline-and-quote.ll
new file mode 100644
index 0000000..9206e9f
--- /dev/null
+++ b/test/CodeGen/X86/newline-and-quote.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu | FileCheck %s
+@"foo\22bar" = global i32 42
+; CHECK: .globl "foo\"bar"
+
+@"foo\0abar" = global i32 42
+; CHECK: .globl "foo\nbar"
diff --git a/test/CodeGen/X86/no-compact-unwind.ll b/test/CodeGen/X86/no-compact-unwind.ll
index 627f7da..991cd4e 100644
--- a/test/CodeGen/X86/no-compact-unwind.ll
+++ b/test/CodeGen/X86/no-compact-unwind.ll
@@ -1,4 +1,10 @@
-; RUN: llc < %s -mtriple x86_64-apple-macosx10.8.0 -disable-cfi | FileCheck %s
+; RUN: llc < %s -mtriple x86_64-apple-macosx10.8.0 -mcpu corei7 -filetype=obj -o - \
+; RUN:  | llvm-objdump -triple x86_64-apple-macosx10.8.0 -s - \
+; RUN:  | FileCheck -check-prefix=CU %s
+; RUN: llc < %s -mtriple x86_64-apple-darwin11 -mcpu corei7 \
+; RUN:  | llvm-mc -triple x86_64-apple-darwin11 -filetype=obj -o - \
+; RUN:  | llvm-objdump -triple x86_64-apple-darwin11 -s - \
+; RUN:  | FileCheck -check-prefix=FROM-ASM %s
 
 %"struct.dyld::MappedRanges" = type { [400 x %struct.anon], %"struct.dyld::MappedRanges"* }
 %struct.anon = type { %class.ImageLoader*, i64, i64 }
@@ -12,13 +18,15 @@ declare void @OSMemoryBarrier() optsize
 ; This compact unwind encoding indicates that we could not generate correct
 ; compact unwind encodings for this function. This then defaults to using the
 ; DWARF EH frame.
-;
-; CHECK: .section __LD,__compact_unwind,regular,debug
-; CHECK: .quad _func
-; CHECK: .long 67108864                ## Compact Unwind Encoding: 0x4000000
-; CHECK: .quad 0                       ## Personality Function
-; CHECK: .quad 0                       ## LSDA
-;
+
+; CU:      Contents of section __compact_unwind:
+; CU-NEXT: 0048 00000000 00000000 42000000 00000004
+; CU-NEXT: 0058 00000000 00000000 00000000 00000000
+
+; FROM-ASM:      Contents of section __compact_unwind:
+; FROM-ASM-NEXT: 0048 00000000 00000000 42000000 00000004
+; FROM-ASM-NEXT: 0058 00000000 00000000 00000000 00000000
+
 define void @func(%class.ImageLoader* %image) optsize ssp uwtable {
 entry:
   br label %for.cond1.preheader
diff --git a/test/CodeGen/X86/no-elf-compact-unwind.ll b/test/CodeGen/X86/no-elf-compact-unwind.ll
new file mode 100644
index 0000000..8a15817
--- /dev/null
+++ b/test/CodeGen/X86/no-elf-compact-unwind.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple x86_64-apple-macosx10.8.0 -disable-cfi | FileCheck -check-prefix=MACHO %s
+; RUN: llc < %s -mtriple x86_64-unknown-linux -disable-cfi | FileCheck -check-prefix=ELF %s
+
+; Make sure we don't generate a compact unwind for ELF.
+
+; MACHO-LABEL: _Z3barv:
+; MACHO:       __compact_unwind
+
+; ELF-LABEL:   _Z3barv:
+; ELF-NOT:     __compact_unwind
+
+@_ZTIi = external constant i8*
+
+define void @_Z3barv() uwtable {
+entry:
+  invoke void @_Z3foov()
+          to label %try.cont unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %1 = extractvalue { i8*, i32 } %0, 1
+  %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
+  %matches = icmp eq i32 %1, %2
+  br i1 %matches, label %catch, label %eh.resume
+
+catch:                                            ; preds = %lpad
+  %3 = extractvalue { i8*, i32 } %0, 0
+  %4 = tail call i8* @__cxa_begin_catch(i8* %3)
+  tail call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:                                         ; preds = %entry, %catch
+  ret void
+
+eh.resume:                                        ; preds = %lpad
+  resume { i8*, i32 } %0
+}
+
+declare void @_Z3foov()
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.typeid.for(i8*)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
diff --git a/test/CodeGen/X86/nocx16.ll b/test/CodeGen/X86/nocx16.ll
new file mode 100644
index 0000000..cceaac4
--- /dev/null
+++ b/test/CodeGen/X86/nocx16.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=-cx16 | FileCheck %s
+define void @test(i128* %a) nounwind {
+entry:
+; CHECK: __sync_val_compare_and_swap_16
+  %0 = cmpxchg i128* %a, i128 1, i128 1 seq_cst
+; CHECK: __sync_lock_test_and_set_16
+  %1 = atomicrmw xchg i128* %a, i128 1 seq_cst
+; CHECK: __sync_fetch_and_add_16
+  %2 = atomicrmw add i128* %a, i128 1 seq_cst
+; CHECK: __sync_fetch_and_sub_16
+  %3 = atomicrmw sub i128* %a, i128 1 seq_cst
+; CHECK: __sync_fetch_and_and_16
+  %4 = atomicrmw and i128* %a, i128 1 seq_cst
+; CHECK: __sync_fetch_and_nand_16
+  %5 = atomicrmw nand i128* %a, i128 1 seq_cst
+; CHECK: __sync_fetch_and_or_16
+  %6 = atomicrmw or i128* %a, i128 1 seq_cst
+; CHECK: __sync_fetch_and_xor_16
+  %7 = atomicrmw xor i128* %a, i128 1 seq_cst
+  ret void
+}
diff --git a/test/CodeGen/X86/object-size.ll b/test/CodeGen/X86/object-size.ll
index 8f1eabd..ec35d29 100644
--- a/test/CodeGen/X86/object-size.ll
+++ b/test/CodeGen/X86/object-size.ll
@@ -10,7 +10,7 @@ target triple = "x86_64-apple-darwin10.0"
 define void @bar() nounwind ssp {
 entry:
   %tmp = load i8** @p                             ; <i8*> [#uses=1]
-  %0 = call i64 @llvm.objectsize.i64(i8* %tmp, i1 0) ; <i64> [#uses=1]
+  %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %tmp, i1 0) ; <i64> [#uses=1]
   %cmp = icmp ne i64 %0, -1                       ; <i1> [#uses=1]
 ; X64: movabsq $-1, [[RAX:%r..]]
 ; X64: cmpq    $-1, [[RAX]]
@@ -19,7 +19,7 @@ entry:
 cond.true:                                        ; preds = %entry
   %tmp1 = load i8** @p                            ; <i8*> [#uses=1]
   %tmp2 = load i8** @p                            ; <i8*> [#uses=1]
-  %1 = call i64 @llvm.objectsize.i64(i8* %tmp2, i1 1) ; <i64> [#uses=1]
+  %1 = call i64 @llvm.objectsize.i64.p0i8(i8* %tmp2, i1 1) ; <i64> [#uses=1]
   %call = call i8* @__strcpy_chk(i8* %tmp1, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 %1) ssp ; <i8*> [#uses=1]
   br label %cond.end
 
@@ -33,7 +33,7 @@ cond.end:                                         ; preds = %cond.false, %cond.t
   ret void
 }
 
-declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readonly
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) nounwind readonly
 
 declare i8* @__strcpy_chk(i8*, i8*, i64) ssp
 
@@ -47,7 +47,7 @@ entry:
   %tmp = load i8** %__dest.addr                   ; <i8*> [#uses=1]
   %tmp1 = load i8** %__src.addr                   ; <i8*> [#uses=1]
   %tmp2 = load i8** %__dest.addr                  ; <i8*> [#uses=1]
-  %0 = call i64 @llvm.objectsize.i64(i8* %tmp2, i1 1) ; <i64> [#uses=1]
+  %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %tmp2, i1 1) ; <i64> [#uses=1]
   %call = call i8* @__strcpy_chk(i8* %tmp, i8* %tmp1, i64 %0) ssp ; <i8*> [#uses=1]
   store i8* %call, i8** %retval
   %1 = load i8** %retval                          ; <i8*> [#uses=1]
diff --git a/test/CodeGen/X86/opt-shuff-tstore.ll b/test/CodeGen/X86/opt-shuff-tstore.ll
index 3e72084..fc43e81 100644
--- a/test/CodeGen/X86/opt-shuff-tstore.ll
+++ b/test/CodeGen/X86/opt-shuff-tstore.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s  -mattr=+sse2,+sse41 | FileCheck %s
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s  -mattr=+sse2,+sse4.1 | FileCheck %s
 
 ; CHECK: func_4_8
 ; A single memory write
diff --git a/test/CodeGen/X86/palignr.ll b/test/CodeGen/X86/palignr.ll
index c76cbbe..ec6564d 100644
--- a/test/CodeGen/X86/palignr.ll
+++ b/test/CodeGen/X86/palignr.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mcpu=core2 -mattr=+ssse3 | FileCheck %s
-; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck --check-prefix=YONAH %s
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck --check-prefix=CHECK-YONAH %s
 
 define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
 ; CHECK-LABEL: test1:
diff --git a/test/CodeGen/X86/patchpoint.ll b/test/CodeGen/X86/patchpoint.ll
new file mode 100644
index 0000000..d534639
--- /dev/null
+++ b/test/CodeGen/X86/patchpoint.ll
@@ -0,0 +1,100 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -disable-fp-elim | FileCheck %s
+
+; Trivial patchpoint codegen
+;
+define i64 @trivial_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+; CHECK-LABEL: trivial_patchpoint_codegen:
+; CHECK:      movabsq $-559038736, %r11
+; CHECK-NEXT: callq *%r11
+; CHECK-NEXT: nop
+; CHECK:      movq %rax, %[[REG:r.+]]
+; CHECK:      callq *%r11
+; CHECK-NEXT: nop
+; CHECK:      movq %[[REG]], %rax
+; CHECK:      ret
+  %resolveCall2 = inttoptr i64 -559038736 to i8*
+  %result = tail call i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 2, i32 15, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
+  %resolveCall3 = inttoptr i64 -559038737 to i8*
+  tail call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 3, i32 15, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
+  ret i64 %result
+}
+
+; Caller frame metadata with stackmaps. This should not be optimized
+; as a leaf function.
+;
+; CHECK-LABEL: caller_meta_leaf
+; CHECK: subq $32, %rsp
+; CHECK: Ltmp
+; CHECK: addq $32, %rsp
+; CHECK: ret
+define void @caller_meta_leaf() {
+entry:
+  %metadata = alloca i64, i32 3, align 8
+  store i64 11, i64* %metadata
+  store i64 12, i64* %metadata
+  store i64 13, i64* %metadata
+  call void (i32, i32, ...)* @llvm.experimental.stackmap(i32 4, i32 0, i64* %metadata)
+  ret void
+}
+
+; Test the webkit_jscc calling convention.
+; Two arguments will be pushed on the stack.
+; Return value in $rax.
+define void @jscall_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+; CHECK-LABEL: jscall_patchpoint_codegen:
+; CHECK:      Ltmp
+; CHECK:      movq %r{{.+}}, 8(%rsp)
+; CHECK:      movq %r{{.+}}, (%rsp)
+; CHECK:      Ltmp
+; CHECK-NEXT: movabsq $-559038736, %r11
+; CHECK-NEXT: callq *%r11
+; CHECK:      movq %rax, 8(%rsp)
+; CHECK:      callq
+  %resolveCall2 = inttoptr i64 -559038736 to i8*
+  %result = tail call webkit_jscc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 5, i32 15, i8* %resolveCall2, i32 2, i64 %p1, i64 %p2)
+  %resolveCall3 = inttoptr i64 -559038737 to i8*
+  tail call webkit_jscc void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 6, i32 15, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
+  ret void
+}
+
+; Test patchpoints reusing the same TargetConstant.
+; <rdar:15390785> Assertion failed: (CI.getNumArgOperands() >= NumArgs + 4)
+; There is no way to verify this, since it depends on memory allocation.
+; But I think it's useful to include as a working example.
+define i64 @testLowerConstant(i64 %arg, i64 %tmp2, i64 %tmp10, i64* %tmp33, i64 %tmp79) {
+entry:
+  %tmp80 = add i64 %tmp79, -16
+  %tmp81 = inttoptr i64 %tmp80 to i64*
+  %tmp82 = load i64* %tmp81, align 8
+  tail call void (i32, i32, ...)* @llvm.experimental.stackmap(i32 14, i32 5, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
+  tail call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 15, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
+  %tmp83 = load i64* %tmp33, align 8
+  %tmp84 = add i64 %tmp83, -24
+  %tmp85 = inttoptr i64 %tmp84 to i64*
+  %tmp86 = load i64* %tmp85, align 8
+  tail call void (i32, i32, ...)* @llvm.experimental.stackmap(i32 17, i32 5, i64 %arg, i64 %tmp10, i64 %tmp86)
+  tail call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 18, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
+  ret i64 10
+}
+
+; Test small patchpoints that don't emit calls.
+define void @small_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+; CHECK-LABEL: small_patchpoint_codegen:
+; CHECK:      Ltmp
+; CHECK:      nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: popq
+; CHECK-NEXT: ret
+  %result = tail call i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 5, i32 5, i8* null, i32 2, i64 %p1, i64 %p2)
+  ret void
+}
+
+declare void @llvm.experimental.stackmap(i32, i32, ...)
+declare void @llvm.experimental.patchpoint.void(i32, i32, i8*, i32, ...)
+declare i64 @llvm.experimental.patchpoint.i64(i32, i32, i8*, i32, ...)
diff --git a/test/CodeGen/X86/peep-vector-extract-concat.ll b/test/CodeGen/X86/peep-vector-extract-concat.ll
index 606a9be..f73ebb9 100644
--- a/test/CodeGen/X86/peep-vector-extract-concat.ll
+++ b/test/CodeGen/X86/peep-vector-extract-concat.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2,-sse41 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2,-sse4.1 | FileCheck %s
 ; CHECK: pshufd $3, %xmm0, %xmm0
 
-; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse2,-sse41 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse2,-sse4.1 | FileCheck %s -check-prefix=WIN64
 ; %a is passed indirectly on Win64.
 ; WIN64: movss   12(%rcx), %xmm0
 
diff --git a/test/CodeGen/X86/pmovext.ll b/test/CodeGen/X86/pmovext.ll
index b85b4c3..f0e468f 100644
--- a/test/CodeGen/X86/pmovext.ll
+++ b/test/CodeGen/X86/pmovext.ll
@@ -18,5 +18,28 @@ define void @intrin_pmov(i16* noalias %dest, i8* noalias %src) nounwind uwtable
 }
 
 declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
-
 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
+
+; rdar://15245794
+
+define <4 x i32> @foo0(double %v.coerce) nounwind ssp {
+; CHECK-LABEL: foo0
+; CHECK: pmovzxwd %xmm0, %xmm0
+; CHECK-NEXT: ret
+  %tmp = bitcast double %v.coerce to <4 x i16>
+  %tmp1 = shufflevector <4 x i16> %tmp, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %tmp2 = tail call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp1) nounwind
+  ret <4 x i32> %tmp2
+}
+
+define <8 x i16> @foo1(double %v.coerce) nounwind ssp {
+; CHECK-LABEL: foo1
+; CHECK: pmovzxbw %xmm0, %xmm0
+; CHECK-NEXT: ret
+  %tmp = bitcast double %v.coerce to <8 x i8>
+  %tmp1 = shufflevector <8 x i8> %tmp, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %tmp2 = tail call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %tmp1)
+  ret <8 x i16> %tmp2
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
diff --git a/test/CodeGen/X86/pmovsx-inreg.ll b/test/CodeGen/X86/pmovsx-inreg.ll
index d30d7d0..07979f6 100644
--- a/test/CodeGen/X86/pmovsx-inreg.ll
+++ b/test/CodeGen/X86/pmovsx-inreg.ll
@@ -86,8 +86,7 @@ define void @test6(<16 x i8>* %in, <16 x i16>* %out) nounwind {
   ret void
 
 ; AVX2-LABEL: test6:
-; FIXME: v16i8 -> v16i16 is scalarized.
-; AVX2-NOT: pmovsx
+; AVX2: vpmovsxbw
 }
 
 define void @test7(<2 x i16>* %in, <2 x i64>* %out) nounwind {
diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll
index da4af81..7bf8a61 100644
--- a/test/CodeGen/X86/pmul.ll
+++ b/test/CodeGen/X86/pmul.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 -mcpu=nehalem -stack-alignment=16 > %t
+; RUN: llc < %s -march=x86 -mattr=sse4.1 -mcpu=nehalem -stack-alignment=16 > %t
 ; RUN: grep pmul %t | count 12
-; RUN: grep mov %t | count 11
+; RUN: grep mov %t | count 14
 
 define <4 x i32> @a(<4 x i32> %i) nounwind  {
         %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
diff --git a/test/CodeGen/X86/pmulld.ll b/test/CodeGen/X86/pmulld.ll
index 4103eab..3db0f73 100644
--- a/test/CodeGen/X86/pmulld.ll
+++ b/test/CodeGen/X86/pmulld.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse41 -asm-verbose=0 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse41 -asm-verbose=0 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse4.1 -asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse4.1 -asm-verbose=0 | FileCheck %s -check-prefix=WIN64
 
 define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
 ; CHECK-LABEL: test1:
diff --git a/test/CodeGen/X86/pr10523.ll b/test/CodeGen/X86/pr10523.ll
index 7191d69..0ec22a0 100644
--- a/test/CodeGen/X86/pr10523.ll
+++ b/test/CodeGen/X86/pr10523.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse4.1
 
 ; No check in a crash test
 
diff --git a/test/CodeGen/X86/pr10524.ll b/test/CodeGen/X86/pr10524.ll
index ed3e7c5..12bdba9 100644
--- a/test/CodeGen/X86/pr10524.ll
+++ b/test/CodeGen/X86/pr10524.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse4.1
 
 ; No check in a crash test
 
diff --git a/test/CodeGen/X86/pr10525.ll b/test/CodeGen/X86/pr10525.ll
index 342c1d6..30ce297 100644
--- a/test/CodeGen/X86/pr10525.ll
+++ b/test/CodeGen/X86/pr10525.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse4.1
 
 ; No check in a crash test
 
diff --git a/test/CodeGen/X86/pr10526.ll b/test/CodeGen/X86/pr10526.ll
index 6963fe5..9fa83ce 100644
--- a/test/CodeGen/X86/pr10526.ll
+++ b/test/CodeGen/X86/pr10526.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse4.1
 
 ; No check in a crash test
 
diff --git a/test/CodeGen/X86/pr12312.ll b/test/CodeGen/X86/pr12312.ll
index 087b8d7..81aaf91 100644
--- a/test/CodeGen/X86/pr12312.ll
+++ b/test/CodeGen/X86/pr12312.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse41,-avx < %s | FileCheck %s --check-prefix SSE41
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1,-avx < %s | FileCheck %s --check-prefix SSE41
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx,-avx2 < %s | FileCheck %s --check-prefix AVX
 
 define i32 @veccond128(<4 x i32> %input) {
diff --git a/test/CodeGen/X86/pr14088.ll b/test/CodeGen/X86/pr14088.ll
index 505e3b5..16f20d0 100644
--- a/test/CodeGen/X86/pr14088.ll
+++ b/test/CodeGen/X86/pr14088.ll
@@ -19,7 +19,14 @@ return:
   ret i32 %retval.0
 }
 
-; We were miscompiling this and using %ax instead of %cx in the movw.
-; CHECK: movswl	%cx, %ecx
-; CHECK: movw	%cx, (%rsi)
-; CHECK: movslq	%ecx, %rcx
+; We were miscompiling this and using %ax instead of %cx in the movw
+; in the following sequence:
+;	movswl	%cx, %ecx
+;	movw	%cx, (%rsi)
+;	movslq	%ecx, %rcx
+;
+; We can't produce the above sequence without special SD-level
+; heuristics. Now we produce this:
+; CHECK: movw	%ax, (%rsi)
+; CHECK: cwtl
+; CHECK: cltq
diff --git a/test/CodeGen/X86/pr14090.ll b/test/CodeGen/X86/pr14090.ll
index d76b912..2f7c720 100644
--- a/test/CodeGen/X86/pr14090.ll
+++ b/test/CodeGen/X86/pr14090.ll
@@ -48,11 +48,11 @@ entry:
   %fifteen = bitcast i64* %retval.i.i to i32**
   %sixteen = bitcast i64* %retval.i.i to i8*
   call void @llvm.lifetime.start(i64 8, i8* %sixteen)
-  store i32* %.ph.i80, i32** %fifteen, align 8, !tbaa !0
+  store i32* %.ph.i80, i32** %fifteen, align 8
   %sunkaddr = ptrtoint i64* %retval.i.i to i32
   %sunkaddr86 = add i32 %sunkaddr, 4
   %sunkaddr87 = inttoptr i32 %sunkaddr86 to i32*
-  store i32 %fourteen, i32* %sunkaddr87, align 4, !tbaa !3
+  store i32 %fourteen, i32* %sunkaddr87, align 4
   %seventeen = load i64* %retval.i.i, align 8
   call void @llvm.lifetime.end(i64 8, i8* %sixteen)
   %eighteen = lshr i64 %seventeen, 32
@@ -68,9 +68,3 @@ entry:
 declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
 
 declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"any pointer", metadata !1}
-!4 = metadata !{metadata !"vtable pointer", metadata !2}
diff --git a/test/CodeGen/X86/pr1505b.ll b/test/CodeGen/X86/pr1505b.ll
index 9b0ef83..c348fec 100644
--- a/test/CodeGen/X86/pr1505b.ll
+++ b/test/CodeGen/X86/pr1505b.ll
@@ -57,11 +57,10 @@ entry:
 	%tmp22 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp16, double %tmp1920 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
 	%tmp30 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp22 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
 ; reload:
-; CHECK: fld
-; CHECK: fstps
 ; CHECK: ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc
 	%tmp34 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4cout, i8* getelementptr ([13 x i8]* @.str1, i32 0, i32 0) )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
 	%tmp3940 = fpext float %tmp1314 to double		; <double> [#uses=1]
+; CHECK: fld
 ; CHECK: fstpl
 ; CHECK: ZNSolsEd
 	%tmp42 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp34, double %tmp3940 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
diff --git a/test/CodeGen/X86/pr16031.ll b/test/CodeGen/X86/pr16031.ll
index ab0b5ef..ecf6218 100644
--- a/test/CodeGen/X86/pr16031.ll
+++ b/test/CodeGen/X86/pr16031.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=corei7-avx -enable-misched=false | FileCheck %s
 
 ; CHECK-LABEL: main:
 ; CHECK: pushl %esi
diff --git a/test/CodeGen/X86/pr16807.ll b/test/CodeGen/X86/pr16807.ll
new file mode 100644
index 0000000..6d55d99
--- /dev/null
+++ b/test/CodeGen/X86/pr16807.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=core-avx-i | FileCheck %s
+
+define <16 x i16> @f_fu(<16 x i16> %bf) {
+allocas:
+  %avg.i.i = sdiv <16 x i16> %bf, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
+  ret <16 x i16> %avg.i.i
+}
+
+; CHECK: f_fu
+; CHECK: psraw
+; CHECK: psrlw
+; CHECK: paddw
+; CHECK: psraw
+; CHECK: psraw
+; CHECK: psrlw
+; CHECK: paddw
+; CHECK: psraw
+; CHECK: ret
diff --git a/test/CodeGen/X86/pr17546.ll b/test/CodeGen/X86/pr17546.ll
new file mode 100644
index 0000000..174fa5c
--- /dev/null
+++ b/test/CodeGen/X86/pr17546.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=core-avx2 | FileCheck %s
+
+define i32 @f_f___un_3C_unf_3E_un_3C_unf_3E_(<8 x i32> %__mask, i64 %BBBB) {
+  %QQQ = trunc i64 %BBBB to i32
+  %1 = extractelement <8 x i32> %__mask, i32 %QQQ
+  ret i32 %1
+}
+
+; CHECK: f_f___un_3C_unf_3E_un_3C_unf_3E_
+; CHECK: ret
diff --git a/test/CodeGen/X86/pr17631.ll b/test/CodeGen/X86/pr17631.ll
new file mode 100644
index 0000000..98f951f
--- /dev/null
+++ b/test/CodeGen/X86/pr17631.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mcpu=core-avx-i -mtriple=i386-pc-win32 | FileCheck %s
+
+%struct_type = type { [64 x <8 x float>], <8 x float> }
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>)
+
+; Function Attrs: nounwind
+define i32 @equal(<8 x i32> %A) {
+allocas:
+  %first_alloc  = alloca [64 x <8 x i32>]
+  %second_alloc = alloca %struct_type
+
+  %A1 = bitcast <8 x i32> %A to <8 x float>
+  %A2 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %A1)
+  ret i32 %A2
+}
+
+; CHECK: equal
+; CHECK-NOT: vzeroupper
+; CHECK: _chkstk
+; CHECK: ret
+
+define <8 x float> @foo(<8 x float> %y, i64* %p, double %x) {
+  %i = fptoui double %x to i64
+  store i64 %i, i64* %p
+  %ret = fadd <8 x float> %y, %y
+  ret <8 x float> %ret
+}
+
+; CHECK: foo
+; CHECK-NOT: vzeroupper
+; CHECK: _ftol2
+; CHECK: ret
diff --git a/test/CodeGen/X86/pr17764.ll b/test/CodeGen/X86/pr17764.ll
new file mode 100644
index 0000000..7a3fd6d
--- /dev/null
+++ b/test/CodeGen/X86/pr17764.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 | FileCheck %s
+
+define <16 x i16> @foo(<16 x i1> %mask, <16 x i16> %x, <16 x i16> %y) {
+  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %y
+  ret <16 x i16> %ret
+}
+
+; CHECK: foo
+; CHECK: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
+; CHECK: ret
diff --git a/test/CodeGen/X86/pr18014.ll b/test/CodeGen/X86/pr18014.ll
new file mode 100644
index 0000000..e3860b8
--- /dev/null
+++ b/test/CodeGen/X86/pr18014.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-linux-pc -mcpu=penryn | FileCheck %s
+
+; Ensure PSRAD is generated as the condition is consumed by both PADD and
+; BLENDVPS. PAND requires all bits setting properly.
+
+define <4 x i32> @foo(<4 x i32>* %p, <4 x i1> %cond, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+  %sext_cond = sext <4 x i1> %cond to <4 x i32>
+  %t1 = add <4 x i32> %v1, %sext_cond
+  %t2 = select <4 x i1> %cond, <4 x i32> %v1, <4 x i32> %v2
+  store <4 x i32> %t2, <4 x i32>* %p
+  ret <4 x i32> %t1
+; CHECK: foo
+; CHECK: pslld
+; CHECK: psrad
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/pr18023.ll b/test/CodeGen/X86/pr18023.ll
new file mode 100644
index 0000000..4c6f8cf
--- /dev/null
+++ b/test/CodeGen/X86/pr18023.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple x86_64-apple-macosx10.9.0 | FileCheck %s
+; PR18023
+
+; CHECK: movabsq $4294967296, %rcx
+; CHECK: movq  %rcx, (%rax)
+; CHECK: movl  $1, 4(%rax)
+; CHECK: movl  $0, 4(%rax)
+; CHECK: movq  $1, 4(%rax)
+
+@c = common global i32 0, align 4
+@a = common global [3 x i32] zeroinitializer, align 4
+@b = common global i32 0, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+define void @func() {
+  store i32 1, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 1), align 4
+  store i32 0, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 0), align 4
+  %1 = load volatile i32* @b, align 4
+  store i32 1, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 1), align 4
+  store i32 0, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 1), align 4
+  %2 = load volatile i32* @b, align 4
+  store i32 1, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 1), align 4
+  store i32 0, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 2), align 4
+  %3 = load volatile i32* @b, align 4
+  store i32 3, i32* @c, align 4
+  %4 = load i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 1), align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %4)
+  ret void
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/X86/pr18054.ll b/test/CodeGen/X86/pr18054.ll
new file mode 100644
index 0000000..b7af516
--- /dev/null
+++ b/test/CodeGen/X86/pr18054.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=penryn | FileCheck %s
+
+define void @foo(<16 x i32>* %p, <16 x i1> %x) {
+  %ret = sext <16 x i1> %x to <16 x i32>
+  store <16 x i32> %ret, <16 x i32>* %p
+  ret void
+; CHECK: foo
+; CHECK-NOT: pmovsxbd
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/pr18162.ll b/test/CodeGen/X86/pr18162.ll
new file mode 100644
index 0000000..523e47d
--- /dev/null
+++ b/test/CodeGen/X86/pr18162.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s
+
+; Make sure we are not crashing on this one.
+
+target triple = "x86_64-unknown-linux-gnu"
+
+%"Iterator" = type { i32* }
+
+declare { i64, <2 x float> } @Call() 
+declare { i64, <2 x float> }* @CallPtr() 
+
+define { i64, <2 x float> } @Foo(%"Iterator"* %this) {
+entry:
+  %retval = alloca i32
+  %this.addr = alloca %"Iterator"*
+  %this1 = load %"Iterator"** %this.addr
+  %bundle_ = getelementptr inbounds %"Iterator"* %this1, i32 0, i32 0
+  %0 = load i32** %bundle_
+  %1 = call { i64, <2 x float> } @Call()
+  %2 = call { i64, <2 x float> }* @CallPtr()
+  %3 = getelementptr { i64, <2 x float> }* %2, i32 0, i32 1
+  %4 = extractvalue { i64, <2 x float> } %1, 1
+  store <2 x float> %4, <2 x float>* %3
+  %5 = load { i64, <2 x float> }* %2
+  ret { i64, <2 x float> } %5
+}
+
diff --git a/test/CodeGen/X86/pre-ra-sched.ll b/test/CodeGen/X86/pre-ra-sched.ll
index b792ffa..70135d4 100644
--- a/test/CodeGen/X86/pre-ra-sched.ll
+++ b/test/CodeGen/X86/pre-ra-sched.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -mtriple=x86_64-apple-macosx -debug-only=pre-RA-sched \
-; RUN:     2>&1 | FileCheck %s
+; RUN-disabled: llc < %s -mtriple=x86_64-apple-macosx -pre-RA-sched=ilp -debug-only=pre-RA-sched \
+; RUN-disabled:     2>&1 | FileCheck %s
+; RUN: true
 ; REQUIRES: asserts
 ;
 ; rdar:13279013: pre-RA-sched should not check all interferences and
diff --git a/test/CodeGen/X86/prefetch.ll b/test/CodeGen/X86/prefetch.ll
index efb5191..d6571ac 100644
--- a/test/CodeGen/X86/prefetch.ll
+++ b/test/CodeGen/X86/prefetch.ll
@@ -1,6 +1,9 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s
 ; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
 ; RUN: llc < %s -march=x86 -mattr=+sse -mattr=+prfchw | FileCheck %s -check-prefix=PRFCHW
+; RUN: llc < %s -march=x86 -mcpu=slm | FileCheck %s -check-prefix=SLM
+; RUN: llc < %s -march=x86 -mcpu=btver2 | FileCheck %s -check-prefix=PRFCHW
+; RUN: llc < %s -march=x86 -mcpu=btver2 -mattr=-prfchw | FileCheck %s -check-prefix=NOPRFCHW
 
 ; rdar://10538297
 
@@ -11,6 +14,8 @@ entry:
 ; CHECK: prefetcht0
 ; CHECK: prefetchnta
 ; PRFCHW: prefetchw
+; NOPRFCHW-NOT: prefetchw
+; SLM: prefetchw
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1, i32 1 )
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2, i32 1 )
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
diff --git a/test/CodeGen/X86/prefixdata.ll b/test/CodeGen/X86/prefixdata.ll
new file mode 100644
index 0000000..2ec1892
--- /dev/null
+++ b/test/CodeGen/X86/prefixdata.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+@i = linkonce_odr global i32 1
+
+; CHECK: f:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: .long	1
+define void @f() prefix i32 1 {
+  ret void
+}
+
+; CHECK: g:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: .quad	i
+define void @g() prefix i32* @i {
+  ret void
+}
diff --git a/test/CodeGen/X86/rdrand.ll b/test/CodeGen/X86/rdrand.ll
index 1b16a2d..48182d0 100644
--- a/test/CodeGen/X86/rdrand.ll
+++ b/test/CodeGen/X86/rdrand.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core-avx-i -mattr=+rdrand | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core-avx-i -mattr=+rdrnd | FileCheck %s
 declare {i16, i32} @llvm.x86.rdrand.16()
 declare {i32, i32} @llvm.x86.rdrand.32()
 declare {i64, i32} @llvm.x86.rdrand.64()
@@ -11,10 +11,10 @@ define i32 @_rdrand16_step(i16* %random_val) {
   ret i32 %isvalid
 ; CHECK-LABEL: _rdrand16_step:
 ; CHECK: rdrandw	%ax
-; CHECK: movw	%ax, (%r[[A0:di|cx]])
 ; CHECK: movzwl	%ax, %ecx
 ; CHECK: movl	$1, %eax
 ; CHECK: cmovael	%ecx, %eax
+; CHECK: movw	%cx, (%r[[A0:di|cx]])
 ; CHECK: ret
 }
 
@@ -26,9 +26,9 @@ define i32 @_rdrand32_step(i32* %random_val) {
   ret i32 %isvalid
 ; CHECK-LABEL: _rdrand32_step:
 ; CHECK: rdrandl	%e[[T0:[a-z]+]]
-; CHECK: movl	%e[[T0]], (%r[[A0]])
 ; CHECK: movl	$1, %eax
 ; CHECK: cmovael	%e[[T0]], %eax
+; CHECK: movl	%e[[T0]], (%r[[A0]])
 ; CHECK: ret
 }
 
@@ -40,9 +40,9 @@ define i32 @_rdrand64_step(i64* %random_val) {
   ret i32 %isvalid
 ; CHECK-LABEL: _rdrand64_step:
 ; CHECK: rdrandq	%r[[T1:[a-z]+]]
-; CHECK: movq	%r[[T1]], (%r[[A0]])
 ; CHECK: movl	$1, %eax
 ; CHECK: cmovael	%e[[T1]], %eax
+; CHECK: movq	%r[[T1]], (%r[[A0]])
 ; CHECK: ret
 }
 
diff --git a/test/CodeGen/X86/rdseed.ll b/test/CodeGen/X86/rdseed.ll
index edc5069..c219b4a 100644
--- a/test/CodeGen/X86/rdseed.ll
+++ b/test/CodeGen/X86/rdseed.ll
@@ -12,10 +12,10 @@ define i32 @_rdseed16_step(i16* %random_val) {
   ret i32 %isvalid
 ; CHECK-LABEL: _rdseed16_step:
 ; CHECK: rdseedw	%ax
-; CHECK: movw	%ax, (%r[[A0:di|cx]])
 ; CHECK: movzwl	%ax, %ecx
 ; CHECK: movl	$1, %eax
 ; CHECK: cmovael	%ecx, %eax
+; CHECK: movw	%cx, (%r[[A0:di|cx]])
 ; CHECK: ret
 }
 
@@ -27,9 +27,9 @@ define i32 @_rdseed32_step(i32* %random_val) {
   ret i32 %isvalid
 ; CHECK-LABEL: _rdseed32_step:
 ; CHECK: rdseedl	%e[[T0:[a-z]+]]
-; CHECK: movl	%e[[T0]], (%r[[A0]])
 ; CHECK: movl	$1, %eax
 ; CHECK: cmovael	%e[[T0]], %eax
+; CHECK: movl	%e[[T0]], (%r[[A0]])
 ; CHECK: ret
 }
 
@@ -41,8 +41,8 @@ define i32 @_rdseed64_step(i64* %random_val) {
   ret i32 %isvalid
 ; CHECK-LABEL: _rdseed64_step:
 ; CHECK: rdseedq	%r[[T1:[a-z]+]]
-; CHECK: movq	%r[[T1]], (%r[[A0]])
 ; CHECK: movl	$1, %eax
 ; CHECK: cmovael	%e[[T1]], %eax
+; CHECK: movq	%r[[T1]], (%r[[A0]])
 ; CHECK: ret
 }
diff --git a/test/CodeGen/X86/rem-2.ll b/test/CodeGen/X86/rem-2.ll
deleted file mode 100644
index 1b2af4b..0000000
--- a/test/CodeGen/X86/rem-2.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=x86 | not grep cltd
-
-define i32 @test(i32 %X) nounwind readnone {
-entry:
-	%0 = srem i32 41, %X
-	ret i32 %0
-}
diff --git a/test/CodeGen/X86/rem.ll b/test/CodeGen/X86/rem.ll
index 394070e..733b794 100644
--- a/test/CodeGen/X86/rem.ll
+++ b/test/CodeGen/X86/rem.ll
@@ -1,22 +1,37 @@
-; RUN: llc < %s -march=x86 | not grep div
+; RUN: llc < %s -march=x86 | FileCheck %s
 
+; CHECK-LABEL: test1:
+; CHECK-NOT: div
 define i32 @test1(i32 %X) {
         %tmp1 = srem i32 %X, 255                ; <i32> [#uses=1]
         ret i32 %tmp1
 }
 
+; CHECK-LABEL: test2:
+; CHECK-NOT: div
 define i32 @test2(i32 %X) {
         %tmp1 = srem i32 %X, 256                ; <i32> [#uses=1]
         ret i32 %tmp1
 }
 
+; CHECK-LABEL: test3:
+; CHECK-NOT: div
 define i32 @test3(i32 %X) {
         %tmp1 = urem i32 %X, 255                ; <i32> [#uses=1]
         ret i32 %tmp1
 }
 
+; CHECK-LABEL: test4:
+; CHECK-NOT: div
 define i32 @test4(i32 %X) {
         %tmp1 = urem i32 %X, 256                ; <i32> [#uses=1]
         ret i32 %tmp1
 }
 
+; CHECK-LABEL: test5:
+; CHECK-NOT: cltd
+define i32 @test5(i32 %X) nounwind readnone {
+entry:
+	%0 = srem i32 41, %X
+	ret i32 %0
+}
diff --git a/test/CodeGen/X86/rounding-ops.ll b/test/CodeGen/X86/rounding-ops.ll
index ace31cf..69f4bfb 100644
--- a/test/CodeGen/X86/rounding-ops.ll
+++ b/test/CodeGen/X86/rounding-ops.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse41 | FileCheck -check-prefix=CHECK-SSE %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse4.1 | FileCheck -check-prefix=CHECK-SSE %s
 ; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx | FileCheck -check-prefix=CHECK-AVX %s
 
 define float @test1(float %x) nounwind  {
diff --git a/test/CodeGen/X86/scalar_widen_div.ll b/test/CodeGen/X86/scalar_widen_div.ll
index e99ea93..5807d5b 100644
--- a/test/CodeGen/X86/scalar_widen_div.ll
+++ b/test/CodeGen/X86/scalar_widen_div.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse42 |  FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse4.2 |  FileCheck %s
 
 ; Verify when widening a divide/remainder operation, we only generate a
 ; divide/rem per element since divide/remainder can trap.
diff --git a/test/CodeGen/X86/segmented-stacks-dynamic.ll b/test/CodeGen/X86/segmented-stacks-dynamic.ll
index c2aa617..e170762 100644
--- a/test/CodeGen/X86/segmented-stacks-dynamic.ll
+++ b/test/CodeGen/X86/segmented-stacks-dynamic.ll
@@ -31,7 +31,7 @@ false:
 ; X32-NEXT: ret
 
 ; X32:      movl %esp, %eax
-; X32-NEXT: subl %ecx, %eax
+; X32:      subl %ecx, %eax
 ; X32-NEXT: cmpl %eax, %gs:48
 
 ; X32:      movl %eax, %esp
@@ -52,7 +52,7 @@ false:
 ; X64-NEXT: ret
 
 ; X64:      movq %rsp, %[[RDI:rdi|rax]]
-; X64-NEXT: subq %{{.*}}, %[[RDI]]
+; X64:      subq %{{.*}}, %[[RDI]]
 ; X64-NEXT: cmpq %[[RDI]], %fs:112
 
 ; X64:      movq %[[RDI]], %rsp
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index 5fe2b70..cdd258d 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -34,12 +34,12 @@ bb90:		; preds = %bb84, %bb72
 bb91:		; preds = %bb84
 	ret i32 0
 ; CHECK-LABEL: test2:
-; CHECK: movnew
-; CHECK: movswl
+; CHECK: cmovnew
+; CHECK: cwtl
 
 ; ATOM-LABEL: test2:
-; ATOM: movnew
-; ATOM: movswl
+; ATOM: cmovnew
+; ATOM: cwtl
 }
 
 declare i1 @return_false()
@@ -256,8 +256,8 @@ entry:
   %call = tail call noalias i8* @_Znam(i64 %D) nounwind noredzone
   ret i8* %call
 ; CHECK-LABEL: test12:
-; CHECK: movq $-1, %[[R:r..]]
 ; CHECK: mulq
+; CHECK: movq $-1, %[[R:r..]]
 ; CHECK: cmovnoq	%rax, %[[R]]
 ; CHECK: jmp	__Znam
 
diff --git a/test/CodeGen/X86/setcc-narrowing.ll b/test/CodeGen/X86/setcc-narrowing.ll
new file mode 100644
index 0000000..25cb2c8
--- /dev/null
+++ b/test/CodeGen/X86/setcc-narrowing.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s
+; PR17338
+
+@t1.global = internal global i64 -1, align 8
+
+define i32 @t1() nounwind ssp {
+entry:
+; CHECK-LABEL: t1:
+; CHECK: cmpl	$0, _t1.global
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: ret
+  %0 = load i64* @t1.global, align 8
+  %and = and i64 4294967295, %0
+  %cmp = icmp sgt i64 %and, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
diff --git a/test/CodeGen/X86/setcc-sentinals.ll b/test/CodeGen/X86/setcc-sentinals.ll
new file mode 100644
index 0000000..d36e678
--- /dev/null
+++ b/test/CodeGen/X86/setcc-sentinals.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mcpu=generic -march=x86-64 -asm-verbose=false | FileCheck %s
+
+define zeroext i1 @test0(i64 %x) nounwind {
+; CHECK-LABEL: test0:
+; CHECK-NEXT: incq %[[X:rdi|rcx]]
+; CHECK-NEXT: cmpq $1, %[[X]]
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: ret
+  %cmp1 = icmp ne i64 %x, -1
+  %not.cmp = icmp ne i64 %x, 0
+  %.cmp1 = and i1 %cmp1, %not.cmp
+  ret i1 %.cmp1
+}
diff --git a/test/CodeGen/X86/sha.ll b/test/CodeGen/X86/sha.ll
new file mode 100644
index 0000000..bf81e99
--- /dev/null
+++ b/test/CodeGen/X86/sha.ll
@@ -0,0 +1,139 @@
+; RUN: llc < %s -mattr=+sha -mtriple=x86_64-unknown-unknown | FileCheck %s
+; RUN: not llc < %s -mtriple=x86_64-unknown-unknown
+
+declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) nounwind readnone
+
+define <4 x i32> @test_sha1rnds4rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3)
+  ret <4 x i32> %0
+  ; CHECK: test_sha1rnds4rr
+  ; CHECK: sha1rnds4 $3, %xmm1, %xmm0
+}
+
+define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
+entry:
+  %0 = load <4 x i32>* %b
+  %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
+  ret <4 x i32> %1
+  ; CHECK: test_sha1rnds4rm
+  ; CHECK: sha1rnds4 $3, (%rdi), %xmm0
+}
+
+declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha1nexterr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+  ; CHECK: test_sha1nexterr
+  ; CHECK: sha1nexte %xmm1, %xmm0
+}
+
+define <4 x i32> @test_sha1nexterm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
+entry:
+  %0 = load <4 x i32>* %b
+  %1 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %0)
+  ret <4 x i32> %1
+  ; CHECK: test_sha1nexterm
+  ; CHECK: sha1nexte (%rdi), %xmm0
+}
+
+declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha1msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+  ; CHECK: test_sha1msg1rr
+  ; CHECK: sha1msg1 %xmm1, %xmm0
+}
+
+define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
+entry:
+  %0 = load <4 x i32>* %b
+  %1 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %0)
+  ret <4 x i32> %1
+  ; CHECK: test_sha1msg1rm
+  ; CHECK: sha1msg1 (%rdi), %xmm0
+}
+
+declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha1msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+  ; CHECK: test_sha1msg2rr
+  ; CHECK: sha1msg2 %xmm1, %xmm0
+}
+
+define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
+entry:
+  %0 = load <4 x i32>* %b
+  %1 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %0)
+  ret <4 x i32> %1
+  ; CHECK: test_sha1msg2rm
+  ; CHECK: sha1msg2 (%rdi), %xmm0
+}
+
+declare <4 x i32> @llvm.x86.sha256rnds2(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha256rnds2rr(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind uwtable {
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+  ret <4 x i32> %0
+  ; CHECK: test_sha256rnds2rr
+  ; CHECK: movaps %xmm0, [[XMM_TMP1:%xmm[1-9][0-9]?]]
+  ; CHECK: movaps %xmm2, %xmm0
+  ; CHECK: sha256rnds2 %xmm1, [[XMM_TMP1]]
+}
+
+define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, <4 x i32>* %b, <4 x i32> %c) nounwind uwtable {
+entry:
+  %0 = load <4 x i32>* %b
+  %1 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %0, <4 x i32> %c)
+  ret <4 x i32> %1
+  ; CHECK: test_sha256rnds2rm
+  ; CHECK: movaps %xmm0, [[XMM_TMP2:%xmm[1-9][0-9]?]]
+  ; CHECK: movaps %xmm1, %xmm0
+  ; CHECK: sha256rnds2 (%rdi), [[XMM_TMP2]]
+}
+
+declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha256msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+  ; CHECK: test_sha256msg1rr
+  ; CHECK: sha256msg1 %xmm1, %xmm0
+}
+
+define <4 x i32> @test_sha256msg1rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
+entry:
+  %0 = load <4 x i32>* %b
+  %1 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %0)
+  ret <4 x i32> %1
+  ; CHECK: test_sha256msg1rm
+  ; CHECK: sha256msg1 (%rdi), %xmm0
+}
+
+declare <4 x i32> @llvm.x86.sha256msg2(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha256msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+  ; CHECK: test_sha256msg2rr
+  ; CHECK: sha256msg2 %xmm1, %xmm0
+}
+
+define <4 x i32> @test_sha256msg2rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
+entry:
+  %0 = load <4 x i32>* %b
+  %1 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %0)
+  ret <4 x i32> %1
+  ; CHECK: test_sha256msg2rm
+  ; CHECK: sha256msg2 (%rdi), %xmm0
+}
+\ No newline at end of file
diff --git a/test/CodeGen/X86/shift-bmi2.ll b/test/CodeGen/X86/shift-bmi2.ll
index 0116789..7615754 100644
--- a/test/CodeGen/X86/shift-bmi2.ll
+++ b/test/CodeGen/X86/shift-bmi2.ll
@@ -30,10 +30,11 @@ entry:
   %x = load i32* %p
   %shl = shl i32 %x, %shamt
 ; BMI2: shl32p
-; BMI2: shlxl %{{.+}}, ({{.+}}), %{{.+}}
+; Source order scheduling prevents folding, rdar:14208996.
+; BMI2: shlxl %{{.+}}, %{{.+}}, %{{.+}}
 ; BMI2: ret
 ; BMI264: shl32p
-; BMI264: shlxl %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: shlxl %{{.+}}, %{{.+}}, %{{.+}}
 ; BMI264: ret
   ret i32 %shl
 }
@@ -74,7 +75,7 @@ entry:
   %x = load i64* %p
   %shl = shl i64 %x, %shamt
 ; BMI264: shl64p
-; BMI264: shlxq %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: shlxq %{{.+}}, %{{.+}}, %{{.+}}
 ; BMI264: ret
   ret i64 %shl
 }
@@ -106,10 +107,11 @@ entry:
   %x = load i32* %p
   %shl = lshr i32 %x, %shamt
 ; BMI2: lshr32p
-; BMI2: shrxl %{{.+}}, ({{.+}}), %{{.+}}
+; Source order scheduling prevents folding, rdar:14208996.
+; BMI2: shrxl %{{.+}}, %{{.+}}, %{{.+}}
 ; BMI2: ret
 ; BMI264: lshr32p
-; BMI264: shrxl %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: shrxl %{{.+}}, %{{.+}}, %{{.+}}
 ; BMI264: ret
   ret i32 %shl
 }
@@ -128,7 +130,7 @@ entry:
   %x = load i64* %p
   %shl = lshr i64 %x, %shamt
 ; BMI264: lshr64p
-; BMI264: shrxq %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: shrxq %{{.+}}, %{{.+}}, %{{.+}}
 ; BMI264: ret
   ret i64 %shl
 }
@@ -150,10 +152,11 @@ entry:
   %x = load i32* %p
   %shl = ashr i32 %x, %shamt
 ; BMI2: ashr32p
-; BMI2: sarxl %{{.+}}, ({{.+}}), %{{.+}}
+; Source order scheduling prevents folding, rdar:14208996.
+; BMI2: sarxl %{{.+}}, %{{.+}}, %{{.+}}
 ; BMI2: ret
 ; BMI264: ashr32p
-; BMI264: sarxl %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: sarxl %{{.+}}, %{{.+}}, %{{.+}}
 ; BMI264: ret
   ret i32 %shl
 }
@@ -172,7 +175,7 @@ entry:
   %x = load i64* %p
   %shl = ashr i64 %x, %shamt
 ; BMI264: ashr64p
-; BMI264: sarxq %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: sarxq %{{.+}}, %{{.+}}, %{{.+}}
 ; BMI264: ret
   ret i64 %shl
 }
diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll
index 7b774f6..589e9ec 100644
--- a/test/CodeGen/X86/sibcall.ll
+++ b/test/CodeGen/X86/sibcall.ll
@@ -106,10 +106,10 @@ declare i32 @bar2(i32, i32, i32)
 define signext i16 @t8() nounwind ssp {
 entry:
 ; 32-LABEL: t8:
-; 32: calll {{_?}}bar3
+; 32: jmp {{_?}}bar3
 
 ; 64-LABEL: t8:
-; 64: callq {{_?}}bar3
+; 64: jmp {{_?}}bar3
   %0 = tail call signext i16 @bar3() nounwind      ; <i16> [#uses=1]
   ret i16 %0
 }
@@ -122,7 +122,7 @@ entry:
 ; 32: calll *
 
 ; 64-LABEL: t9:
-; 64: callq *
+; 64: jmpq *
   %0 = bitcast i32 (i32)* %x to i16 (i32)*
   %1 = tail call signext i16 %0(i32 0) nounwind
   ret i16 %1
diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll
index 0741635..64f5311 100644
--- a/test/CodeGen/X86/sink-hoist.ll
+++ b/test/CodeGen/X86/sink-hoist.ll
@@ -26,11 +26,10 @@ define double @foo(double %x, double %y, i1 %c) nounwind {
 
 ; CHECK-LABEL: split:
 ; CHECK-NEXT: testb $1, %dil
-; CHECK-NEXT: jne
-; CHECK-NEXT: movaps
-; CHECK-NEXT: ret
+; CHECK-NEXT: je
 ; CHECK:      divsd
-; CHECK-NEXT: ret
+; CHECK:      movaps
+; CHECK:      ret
 define double @split(double %x, double %y, i1 %c) nounwind {
   %a = fdiv double %x, 3.2
   %z = select i1 %c, double %a, double %y
@@ -65,7 +64,7 @@ return:
 ; Sink instructions with dead EFLAGS defs.
 
 ; FIXME: Unfail the zzz test if we can correctly mark pregs with the kill flag.
-; 
+;
 ; See <rdar://problem/8030636>. This test isn't valid after we made machine
 ; sinking more conservative about sinking instructions that define a preg into a
 ; block when we don't know if the preg is killed within the current block.
diff --git a/test/CodeGen/X86/sqrt-fastmath.ll b/test/CodeGen/X86/sqrt-fastmath.ll
index 9b5179e..fc79e31 100644
--- a/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/test/CodeGen/X86/sqrt-fastmath.ll
@@ -55,6 +55,6 @@ entry:
 ; Function Attrs: nounwind readnone
 declare x86_fp80 @__sqrtl_finite(x86_fp80) #1
 
-attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
diff --git a/test/CodeGen/X86/sse-intrinsics-x86.ll b/test/CodeGen/X86/sse-intrinsics-x86.ll
new file mode 100644
index 0000000..65d44bf
--- /dev/null
+++ b/test/CodeGen/X86/sse-intrinsics-x86.ll
@@ -0,0 +1,308 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse | FileCheck %s
+
+define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: addss
+  %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: cmpordps
+  %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: cmpordss
+  %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: comiss
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: comiss
+  ; CHECK: setae
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: comiss
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: comiss
+  ; CHECK: setbe
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: comiss
+  ; CHECK: sbb
+  %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: comiss
+  ; CHECK: setne
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) {
+  ; CHECK: movl
+  ; CHECK: cvtsi2ss
+  %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
+
+
+define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) {
+  ; CHECK: cvtss2si
+  %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
+  ; CHECK: cvttss2si
+  %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: divss
+  %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_ldmxcsr(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: ldmxcsr
+  call void @llvm.x86.sse.ldmxcsr(i8* %a0)
+  ret void
+}
+declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
+
+
+
+define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: maxps
+  %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: maxss
+  %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: minps
+  %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: minss
+  %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
+  ; CHECK: movmskps
+  %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
+
+
+
+define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: mulss
+  %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
+  ; CHECK: rcpps
+  %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) {
+  ; CHECK: rcpss
+  %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) {
+  ; CHECK: rsqrtps
+  %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) {
+  ; CHECK: rsqrtss
+  %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
+  ; CHECK: sqrtps
+  %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
+  ; CHECK: sqrtss
+  %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_stmxcsr(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: stmxcsr
+  call void @llvm.x86.sse.stmxcsr(i8* %a0)
+  ret void
+}
+declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
+
+
+define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
+  ; CHECK: movl
+  ; CHECK: movups
+  call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
+  ret void
+}
+declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
+
+
+define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: subss
+  %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: ucomiss
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: ucomiss
+  ; CHECK: setae
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: ucomiss
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: ucomiss
+  ; CHECK: setbe
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: ucomiss
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: ucomiss
+  ; CHECK: setne
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll
index 30a0fbe..1ac9832 100644
--- a/test/CodeGen/X86/sse2-blend.ll
+++ b/test/CodeGen/X86/sse2-blend.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse41 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse4.1 | FileCheck %s
 
 ; CHECK: vsel_float
 ; CHECK: pandn
diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll
new file mode 100644
index 0000000..ff6c10b
--- /dev/null
+++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll
@@ -0,0 +1,712 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s
+
+define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: addsd
+  %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: cmpordpd
+  %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: cmpordsd
+  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: comisd
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: comisd
+  ; CHECK: setae
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: comisd
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: comisd
+  ; CHECK: setbe
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: comisd
+  ; CHECK: sbbl    %eax, %eax
+  ; CHECK: andl    $1, %eax
+  %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: comisd
+  ; CHECK: setne
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
+  ; CHECK: cvtdq2pd
+  %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
+  ; CHECK: cvtdq2ps
+  %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
+  ; CHECK: cvtpd2dq
+  %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
+  ; CHECK: cvtpd2ps
+  %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
+  ; CHECK: cvtps2dq
+  %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
+  ; CHECK: cvtps2pd
+  %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
+  ; CHECK: cvtsd2si
+  %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
+  ; CHECK: cvtsd2ss 
+  ; CHECK-NOT: cvtsd2ss %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}} 
+  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) {
+  ; CHECK: movl
+  ; CHECK: cvtsi2sd
+  %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
+  ; CHECK: cvtss2sd
+  %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
+  ; CHECK: cvttpd2dq
+  %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
+  ; CHECK: cvttps2dq
+  %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
+  ; CHECK: cvttsd2si
+  %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: divsd
+  %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+
+define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: maxpd
+  %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: maxsd
+  %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: minpd
+  %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: minsd
+  %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
+  ; CHECK: movmskpd
+  %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
+
+
+
+
+define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_mul_sd
+  ; CHECK: mulsd
+  %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: packssdw
+  %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: packsswb
+  %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: packuswb
+  %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: paddsb
+  %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: paddsw
+  %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: paddusb
+  %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: paddusw
+  %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pavgb
+  %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: pavgw
+  %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: pmaddwd
+  %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: pmaxsw
+  %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pmaxub
+  %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: pminsw
+  %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pminub
+  %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
+  ; CHECK: pmovmskb
+  %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: pmulhw
+  %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: pmulhuw
+  %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: pmuludq
+  %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: psadbw
+  %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: pslld
+  %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
+  ; CHECK: pslldq
+  %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
+  ; CHECK: pslldq
+  %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: psllq
+  %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: psllw
+  %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
+  ; CHECK: pslld
+  %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
+  ; CHECK: psllq
+  %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
+  ; CHECK: psllw
+  %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: psrad
+  %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: psraw
+  %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
+  ; CHECK: psrad
+  %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
+  ; CHECK: psraw
+  %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: psrld
+  %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
+  ; CHECK: psrldq
+  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
+  ; CHECK: psrldq
+  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: psrlq
+  %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: psrlw
+  %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
+  ; CHECK: psrld
+  %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
+  ; CHECK: psrlq
+  %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
+  ; CHECK: psrlw
+  %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: psubsb
+  %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: psubsw
+  %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: psubusb
+  %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: psubusw
+  %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
+  ; CHECK: sqrtpd
+  %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
+  ; CHECK: sqrtsd
+  %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
+
+
+define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
+  ; CHECK: test_x86_sse2_storel_dq
+  ; CHECK: movl
+  ; CHECK: movq
+  call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
+  ret void
+}
+declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
+
+
+define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
+  ; CHECK: test_x86_sse2_storeu_dq
+  ; CHECK: movl
+  ; CHECK: movdqu
+  ; add operation forces the execution domain.
+  %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
+  ret void
+}
+declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
+
+
+define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_storeu_pd
+  ; CHECK: movl
+  ; CHECK: movupd
+  ; fadd operation forces the execution domain.
+  %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
+  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
+  ret void
+}
+declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
+
+
+define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_sub_sd
+  ; CHECK: subsd
+  %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: ucomisd
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: ucomisd
+  ; CHECK: setae
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: ucomisd
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: ucomisd
+  ; CHECK: setbe
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: ucomisd
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: ucomisd
+  ; CHECK: setne
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
diff --git a/test/CodeGen/X86/sse2-vector-shifts.ll b/test/CodeGen/X86/sse2-vector-shifts.ll
index e2d6125..462def9 100644
--- a/test/CodeGen/X86/sse2-vector-shifts.ll
+++ b/test/CodeGen/X86/sse2-vector-shifts.ll
@@ -121,7 +121,7 @@ entry:
 }
 
 ; CHECK-LABEL: test_sraw_3:
-; CHECK: psraw   $16, %xmm0
+; CHECK: psraw   $15, %xmm0
 ; CHECK-NEXT: ret
 
 define <4 x i32> @test_srad_1(<4 x i32> %InVec) {
@@ -151,7 +151,7 @@ entry:
 }
 
 ; CHECK-LABEL: test_srad_3:
-; CHECK: psrad   $32, %xmm0
+; CHECK: psrad   $31, %xmm0
 ; CHECK-NEXT: ret
 
 ; SSE Logical Shift Right
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index 217139a..9147c22 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -7,7 +7,7 @@ define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
 	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
 	store <2 x double> %tmp9, <2 x double>* %r, align 16
 	ret void
-        
+
 ; CHECK-LABEL: test1:
 ; CHECK: 	movl	8(%esp), %eax
 ; CHECK-NEXT: 	movapd	(%eax), %xmm0
@@ -23,12 +23,12 @@ define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
 	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 >
 	store <2 x double> %tmp9, <2 x double>* %r, align 16
 	ret void
-        
+
 ; CHECK-LABEL: test2:
-; CHECK: 	movl	8(%esp), %eax
-; CHECK-NEXT: 	movapd	(%eax), %xmm0
+; CHECK: 	movl	4(%esp), %eax
+; CHECK: 	movl	8(%esp), %ecx
+; CHECK-NEXT: 	movapd	(%ecx), %xmm0
 ; CHECK-NEXT: 	movhpd	12(%esp), %xmm0
-; CHECK-NEXT: 	movl	4(%esp), %eax
 ; CHECK-NEXT: 	movapd	%xmm0, (%eax)
 ; CHECK-NEXT: 	ret
 }
@@ -48,7 +48,7 @@ define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind
 	store <4 x float> %tmp13, <4 x float>* %res
 	ret void
 ; CHECK: @test3
-; CHECK: 	unpcklps	
+; CHECK: 	unpcklps
 }
 
 define void @test4(<4 x float> %X, <4 x float>* %res) nounwind {
@@ -85,9 +85,9 @@ define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind {
         %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >          ; <<4 x float>> [#uses=1]
         store <4 x float> %tmp2, <4 x float>* %res
         ret void
-        
+
 ; CHECK-LABEL: test6:
-; CHECK: 	movaps	(%eax), %xmm0
+; CHECK: 	movaps	(%ecx), %xmm0
 ; CHECK:	movaps	%xmm0, (%eax)
 }
 
@@ -96,7 +96,7 @@ define void @test7() nounwind {
         shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer         ; <<4 x float>>:2 [#uses=1]
         store <4 x float> %2, <4 x float>* null
         ret void
-        
+
 ; CHECK-LABEL: test7:
 ; CHECK:	xorps	%xmm0, %xmm0
 ; CHECK:	movaps	%xmm0, 0
@@ -166,7 +166,7 @@ define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x fl
         store <4 x float> %tmp11, <4 x float>* %res
         ret void
 ; CHECK: test13
-; CHECK: shufps	$69, (%eax), %xmm0
+; CHECK: shufps	$69, (%ecx), %xmm0
 ; CHECK: pshufd	$-40, %xmm0, %xmm0
 }
 
@@ -178,8 +178,8 @@ define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
         %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
         ret <4 x float> %tmp27
 ; CHECK-LABEL: test14:
-; CHECK: 	subps	[[X1:%xmm[0-9]+]], [[X2:%xmm[0-9]+]]
-; CHECK: 	addps	[[X1]], [[X0:%xmm[0-9]+]]
+; CHECK: 	addps	[[X1:%xmm[0-9]+]], [[X0:%xmm[0-9]+]]
+; CHECK: 	subps	[[X1]], [[X2:%xmm[0-9]+]]
 ; CHECK: 	movlhps	[[X2]], [[X0]]
 }
 
@@ -221,4 +221,3 @@ entry:
  %double2float.i = fptrunc <4 x double> %0 to <4 x float>
  ret <4 x float> %double2float.i
 }
-
diff --git a/test/CodeGen/X86/sse3-intrinsics-x86.ll b/test/CodeGen/X86/sse3-intrinsics-x86.ll
new file mode 100644
index 0000000..dbd14b8
--- /dev/null
+++ b/test/CodeGen/X86/sse3-intrinsics-x86.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse3 | FileCheck %s
+
+define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: addsubpd
+  %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: addsubps
+  %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: haddpd
+  %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: haddps
+  %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: hsubpd
+  %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: hsubps
+  %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: lddqu
+  %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
diff --git a/test/CodeGen/X86/sse41-blend.ll b/test/CodeGen/X86/sse41-blend.ll
index bd92d22..a32f5de 100644
--- a/test/CodeGen/X86/sse41-blend.ll
+++ b/test/CodeGen/X86/sse41-blend.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
 
 ;CHECK-LABEL: vsel_float:
 ;CHECK: blendvps
diff --git a/test/CodeGen/X86/sse41-intrinsics-x86.ll b/test/CodeGen/X86/sse41-intrinsics-x86.ll
new file mode 100644
index 0000000..37eff43
--- /dev/null
+++ b/test/CodeGen/X86/sse41-intrinsics-x86.ll
@@ -0,0 +1,326 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse4.1 | FileCheck %s
+
+define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: blendpd
+  %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: blendps
+  %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+  ; CHECK: blendvpd
+  %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  ; CHECK: blendvps
+  %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: dppd
+  %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: dpps
+  %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: insertps
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+
+define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: mpsadbw
+  %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: packusdw
+  %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
+  ; CHECK: pblendvb
+  %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: pblendw
+  %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
+  ; CHECK: phminposuw
+  %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pmaxsb
+  %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: pmaxsd
+  %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: pmaxud
+  %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: pmaxuw
+  %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pminsb
+  %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: pminsd
+  %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: pminud
+  %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: pminuw
+  %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
+  ; CHECK: pmovsxbd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
+  ; CHECK: pmovsxbq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
+  ; CHECK: pmovsxbw
+  %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
+  ; CHECK: pmovsxdq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
+  ; CHECK: pmovsxwd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
+  ; CHECK: pmovsxwq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
+  ; CHECK: pmovzxbd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
+  ; CHECK: pmovzxbq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
+  ; CHECK: pmovzxbw
+  %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
+  ; CHECK: pmovzxdq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
+  ; CHECK: pmovzxwd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
+  ; CHECK: pmovzxwq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: pmuldq
+  %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: ptest 
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: ptest 
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: ptest 
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
+  ; CHECK: roundpd
+  %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) {
+  ; CHECK: roundps
+  %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: roundsd
+  %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: roundss
+  %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index 87b64e5..c15e24c 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse41 -mcpu=penryn | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse41 -mcpu=penryn | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s -check-prefix=X64
 
 @g16 = external global i16
 
diff --git a/test/CodeGen/X86/sse42-intrinsics-x86.ll b/test/CodeGen/X86/sse42-intrinsics-x86.ll
new file mode 100644
index 0000000..5ca8009
--- /dev/null
+++ b/test/CodeGen/X86/sse42-intrinsics-x86.ll
@@ -0,0 +1,182 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse4.2 | FileCheck %s
+
+define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl $7
+  ; CHECK: movl $7
+  ; CHECK: pcmpestri $7
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) {
+  ; CHECK: movl $7
+  ; CHECK: movl $7
+  ; CHECK: pcmpestri $7, (
+  ; CHECK: movl
+  %1 = load <16 x i8>* %a0
+  %2 = load <16 x i8>* %a2
+  %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+
+
+define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: pcmpestri
+  ; CHECK: seta
+  %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: pcmpestri
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: pcmpestri
+  ; CHECK: seto
+  %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: pcmpestri
+  ; CHECK: sets
+  %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: pcmpestri
+  ; CHECK: sete
+  %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: pcmpestrm
+  ; CHECK-NOT: vmov
+  %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) {
+  ; CHECK: movl $7
+  ; CHECK: movl $7
+  ; CHECK: pcmpestrm $7,
+  ; CHECK-NOT: vmov
+  %1 = load <16 x i8>* %a2
+  %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+
+
+define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pcmpistri $7
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) {
+  ; CHECK: pcmpistri $7, (
+  ; CHECK: movl
+  %1 = load <16 x i8>* %a0
+  %2 = load <16 x i8>* %a1
+  %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+
+
+define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pcmpistri
+  ; CHECK: seta
+  %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pcmpistri
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pcmpistri
+  ; CHECK: seto
+  %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pcmpistri
+  ; CHECK: sets
+  %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pcmpistri
+  ; CHECK: sete
+  %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pcmpistrm $7
+  ; CHECK-NOT: vmov
+  %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) {
+  ; CHECK: pcmpistrm $7, (
+  ; CHECK-NOT: vmov
+  %1 = load <16 x i8>* %a1
+  %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
diff --git a/test/CodeGen/X86/sse42.ll b/test/CodeGen/X86/sse42.ll
index c787523..db51d99 100644
--- a/test/CodeGen/X86/sse42.ll
+++ b/test/CodeGen/X86/sse42.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse4.2 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse4.2 | FileCheck %s -check-prefix=X64
 
 declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
 declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
diff --git a/test/CodeGen/X86/sse42_64.ll b/test/CodeGen/X86/sse42_64.ll
index 8b3a69b..b39e76c 100644
--- a/test/CodeGen/X86/sse42_64.ll
+++ b/test/CodeGen/X86/sse42_64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse4.2 | FileCheck %s -check-prefix=X64
 
 declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind
 declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind
diff --git a/test/CodeGen/X86/ssse3-intrinsics-x86.ll b/test/CodeGen/X86/ssse3-intrinsics-x86.ll
new file mode 100644
index 0000000..728cbc9
--- /dev/null
+++ b/test/CodeGen/X86/ssse3-intrinsics-x86.ll
@@ -0,0 +1,120 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+ssse3 | FileCheck %s
+
+define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) {
+  ; CHECK: pabsb
+  %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) {
+  ; CHECK: pabsd
+  %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) {
+  ; CHECK: pabsw
+  %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: phaddd
+  %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: phaddsw
+  %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: phaddw
+  %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: phsubd
+  %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: phsubsw
+  %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: phsubw
+  %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pmaddubsw
+  %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: pmulhrsw
+  %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pshufb
+  %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: psignb
+  %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: psignd
+  %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: psignw
+  %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
diff --git a/test/CodeGen/X86/stack-protector-dbginfo.ll b/test/CodeGen/X86/stack-protector-dbginfo.ll
new file mode 100644
index 0000000..bd27ac3
--- /dev/null
+++ b/test/CodeGen/X86/stack-protector-dbginfo.ll
@@ -0,0 +1,97 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s -o -
+
+; PR16954
+;
+; Make sure that when we splice off the end of a machine basic block, we include
+; DBG_VALUE MI in the terminator sequence.
+
+@a = external global { i64, [56 x i8] }, align 32
+
+; Function Attrs: nounwind sspreq
+define i32 @_Z18read_response_sizev() #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata !22, i64 0, metadata !23), !dbg !39
+  %0 = load i64* getelementptr inbounds ({ i64, [56 x i8] }* @a, i32 0, i32 0), align 8, !dbg !40
+  tail call void @llvm.dbg.value(metadata !63, i64 0, metadata !64), !dbg !71
+  %1 = trunc i64 %0 to i32
+  ret i32 %1
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata)
+
+attributes #0 = { sspreq }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21, !72}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 true, metadata !"", i32 0, metadata !2, metadata !5, metadata !8, metadata !20, metadata !5, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/matt/ryan_bug/<unknown>] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"<unknown>", metadata !"/Users/matt/ryan_bug"}
+!2 = metadata !{metadata !3}
+!3 = metadata !{i32 786436, metadata !1, metadata !4, metadata !"", i32 20, i64 32, i64 32, i32 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [line 20, size 32, align 32, offset 0] [def] [from ]
+!4 = metadata !{i32 786451, metadata !1, null, metadata !"C", i32 19, i64 8, i64 8, i32 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [C] [line 19, size 8, align 8, offset 0] [def] [from ]
+!5 = metadata !{i32 0}
+!6 = metadata !{metadata !7}
+!7 = metadata !{i32 786472, metadata !"max_frame_size", i64 0} ; [ DW_TAG_enumerator ] [max_frame_size :: 0]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786478, metadata !1, metadata !10, metadata !"read_response_size", metadata !"read_response_size", metadata !"_Z18read_response_sizev", i32 27, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @_Z18read_response_sizev, null, null, metadata !14, i32 27} ; [ DW_TAG_subprogram ] [line 27] [def] [read_response_size]
+!10 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/Users/matt/ryan_bug/<unknown>]
+!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!14 = metadata !{metadata !15, metadata !19}
+!15 = metadata !{i32 786688, metadata !9, metadata !"b", metadata !10, i32 28, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [b] [line 28]
+!16 = metadata !{i32 786451, metadata !1, null, metadata !"B", i32 16, i64 32, i64 32, i32 0, i32 0, null, metadata !17, i32 0, null, null} ; [ DW_TAG_structure_type ] [B] [line 16, size 32, align 32, offset 0] [def] [from ]
+!17 = metadata !{metadata !18}
+!18 = metadata !{i32 786445, metadata !1, metadata !16, metadata !"end_of_file", i32 17, i64 32, i64 32, i64 0, i32 0, metadata !13} ; [ DW_TAG_member ] [end_of_file] [line 17, size 32, align 32, offset 0] [from int]
+!19 = metadata !{i32 786688, metadata !9, metadata !"c", metadata !10, i32 29, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [c] [line 29]
+!20 = metadata !{}
+!21 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!22 = metadata !{i64* getelementptr inbounds ({ i64, [56 x i8] }* @a, i32 0, i32 0)}
+!23 = metadata !{i32 786689, metadata !24, metadata !"p2", metadata !10, i32 33554444, metadata !32, i32 0, metadata !38} ; [ DW_TAG_arg_variable ] [p2] [line 12]
+!24 = metadata !{i32 786478, metadata !1, metadata !25, metadata !"min<unsigned long long>", metadata !"min<unsigned long long>", metadata !"_ZN3__13minIyEERKT_S3_RS1_", i32 12, metadata !27, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, metadata !33, null, metadata !35, i32 12} ; [ DW_TAG_subprogram ] [line 12] [def] [min<unsigned long long>]
+!25 = metadata !{i32 786489, metadata !26, null, metadata !"__1", i32 1} ; [ DW_TAG_namespace ] [__1] [line 1]
+!26 = metadata !{metadata !"main.cpp", metadata !"/Users/matt/ryan_bug"}
+!27 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !28, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!28 = metadata !{metadata !29, metadata !29, metadata !32}
+!29 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !30} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
+!30 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !31} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from long long unsigned int]
+!31 = metadata !{i32 786468, null, null, metadata !"long long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
+!32 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !31} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from long long unsigned int]
+!33 = metadata !{metadata !34}
+!34 = metadata !{i32 786479, null, metadata !"_Tp", metadata !31, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
+!35 = metadata !{metadata !36, metadata !37}
+!36 = metadata !{i32 786689, metadata !24, metadata !"p1", metadata !10, i32 16777228, metadata !29, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p1] [line 12]
+!37 = metadata !{i32 786689, metadata !24, metadata !"p2", metadata !10, i32 33554444, metadata !32, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p2] [line 12]
+!38 = metadata !{i32 33, i32 0, metadata !9, null}
+!39 = metadata !{i32 12, i32 0, metadata !24, metadata !38}
+!40 = metadata !{i32 9, i32 0, metadata !41, metadata !59}
+!41 = metadata !{i32 786478, metadata !1, metadata !25, metadata !"min<unsigned long long, __1::A>", metadata !"min<unsigned long long, __1::A>", metadata !"_ZN3__13minIyNS_1AEEERKT_S4_RS2_T0_", i32 7, metadata !42, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, metadata !53, null, metadata !55, i32 8} ; [ DW_TAG_subprogram ] [line 7] [def] [scope 8] [min<unsigned long long, __1::A>]
+!42 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !43, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!43 = metadata !{metadata !29, metadata !29, metadata !32, metadata !44}
+!44 = metadata !{i32 786451, metadata !1, metadata !25, metadata !"A", i32 0, i64 8, i64 8, i32 0, i32 0, null, metadata !45, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [A] [line 0, size 8, align 8, offset 0] [def] [from ]
+!45 = metadata !{metadata !46}
+!46 = metadata !{i32 786478, metadata !1, metadata !44, metadata !"operator()", metadata !"operator()", metadata !"_ZN3__11AclERKiS2_", i32 1, metadata !47, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !52, i32 1} ; [ DW_TAG_subprogram ] [line 1] [operator()]
+!47 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !48, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!48 = metadata !{metadata !13, metadata !49, metadata !50, metadata !50}
+!49 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !44} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A]
+!50 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !51} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
+!51 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from int]
+!52 = metadata !{i32 786468}
+!53 = metadata !{metadata !34, metadata !54}
+!54 = metadata !{i32 786479, null, metadata !"_Compare", metadata !44, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
+!55 = metadata !{metadata !56, metadata !57, metadata !58}
+!56 = metadata !{i32 786689, metadata !41, metadata !"p1", metadata !10, i32 16777223, metadata !29, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p1] [line 7]
+!57 = metadata !{i32 786689, metadata !41, metadata !"p2", metadata !10, i32 33554439, metadata !32, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p2] [line 7]
+!58 = metadata !{i32 786689, metadata !41, metadata !"p3", metadata !10, i32 50331656, metadata !44, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p3] [line 8]
+!59 = metadata !{i32 13, i32 0, metadata !24, metadata !38}
+!63 = metadata !{i32 undef}
+!64 = metadata !{i32 786689, metadata !65, metadata !"p1", metadata !10, i32 33554433, metadata !50, i32 0, metadata !40} ; [ DW_TAG_arg_variable ] [p1] [line 1]
+!65 = metadata !{i32 786478, metadata !1, metadata !25, metadata !"operator()", metadata !"operator()", metadata !"_ZN3__11AclERKiS2_", i32 1, metadata !47, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, metadata !46, metadata !66, i32 2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [operator()]
+!66 = metadata !{metadata !67, metadata !69, metadata !70}
+!67 = metadata !{i32 786689, metadata !65, metadata !"this", null, i32 16777216, metadata !68, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!68 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !44} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!69 = metadata !{i32 786689, metadata !65, metadata !"p1", metadata !10, i32 33554433, metadata !50, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p1] [line 1]
+!70 = metadata !{i32 786689, metadata !65, metadata !"", metadata !10, i32 50331650, metadata !50, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 2]
+!71 = metadata !{i32 1, i32 0, metadata !65, metadata !40}
+!72 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/stack-protector-vreg-to-vreg-copy.ll b/test/CodeGen/X86/stack-protector-vreg-to-vreg-copy.ll
new file mode 100644
index 0000000..7d499f9
--- /dev/null
+++ b/test/CodeGen/X86/stack-protector-vreg-to-vreg-copy.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mtriple i386-unknown-freebsd10.0 -march=x86 --relocation-model=pic %s -o -
+
+; PR16979
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-unknown-freebsd10.0"
+
+@state = internal unnamed_addr global i32 0, align 4
+
+; Function Attrs: nounwind sspreq
+define void @set_state(i32 %s) #0 {
+entry:
+  store i32 %s, i32* @state, align 4
+  ret void
+}
+
+; Function Attrs: nounwind sspreq
+define void @zero_char(i8* nocapture %p) #0 {
+entry:
+  store i8 0, i8* %p, align 1
+  tail call void @g(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) #2
+  ret void
+}
+
+declare void @g(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
+; Function Attrs: nounwind sspreq
+define void @do_something(i32 %i) #0 {
+entry:
+  %data = alloca [8 x i8], align 1
+  %0 = load i32* @state, align 4
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  tail call fastcc void @send_int(i32 0)
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call fastcc void @send_int(i32 %i)
+  %arrayidx = getelementptr inbounds [8 x i8]* %data, i32 0, i32 0
+  call void @zero_char(i8* %arrayidx)
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+; Function Attrs: nounwind sspreq
+define internal fastcc void @send_int(i32 %p) #0 {
+entry:
+  tail call void @f(i32 %p) #2
+  tail call void @g(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) #2
+  ret void
+}
+
+declare void @f(i32) #1
+
+attributes #0 = { nounwind sspreq "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
diff --git a/test/CodeGen/X86/stack-protector.ll b/test/CodeGen/X86/stack-protector.ll
index a4dbbb9..265ec80 100644
--- a/test/CodeGen/X86/stack-protector.ll
+++ b/test/CodeGen/X86/stack-protector.ll
@@ -2313,18 +2313,22 @@ entry:
 ; LINUX-I386-LABEL: test19d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
+; LINUX-I386-NOT: calll __stack_chk_fail
 
 ; LINUX-X64-LABEL: test19d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
+; LINUX-X64-NOT: callq __stack_chk_fail
 
 ; LINUX-KERNEL-X64-LABEL: test19d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
+; LINUX-KERNEL-X64-NOT: callq ___stack_chk_fail
 
 ; DARWIN-X64-LABEL: test19d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
+; DARWIN-X64-NOT: callq ___stack_chk_fail
   %c = alloca %struct.pair, align 4
   %exn.slot = alloca i8*
   %ehselector.slot = alloca i32
diff --git a/test/CodeGen/X86/stackmap.ll b/test/CodeGen/X86/stackmap.ll
new file mode 100644
index 0000000..ed95583
--- /dev/null
+++ b/test/CodeGen/X86/stackmap.ll
@@ -0,0 +1,292 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -disable-fp-elim | FileCheck %s
+;
+; Note: Print verbose stackmaps using -debug-only=stackmaps.
+
+; CHECK-LABEL:  .section  __LLVM_STACKMAPS,__llvm_stackmaps
+; CHECK-NEXT:  __LLVM_StackMaps:
+; CHECK-NEXT:   .long   0
+; Num LargeConstants
+; CHECK-NEXT:   .long   1
+; CHECK-NEXT:   .quad   4294967296
+; Num Callsites
+; CHECK-NEXT:   .long   11
+
+; Constant arguments
+;
+; CHECK-NEXT:   .long   1
+; CHECK-NEXT:   .long   L{{.*}}-_constantargs
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  4
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   65535
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   65536
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   -1
+; LargeConstant at index 0
+; CHECK-NEXT:   .byte   5
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+
+define void @constantargs() {
+entry:
+  %0 = inttoptr i64 12345 to i8*
+  tail call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 1, i32 15, i8* %0, i32 0, i64 65535, i64 65536, i64 4294967295, i64 4294967296)
+  ret void
+}
+
+; Inline OSR Exit
+;
+; CHECK-NEXT:   .long   3
+; CHECK-NEXT:   .long   L{{.*}}-_osrinline
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long  0
+define void @osrinline(i64 %a, i64 %b) {
+entry:
+  ; Runtime void->void call.
+  call void inttoptr (i64 -559038737 to void ()*)()
+  ; Followed by inline OSR patchpoint with 12-byte shadow and 2 live vars.
+  call void (i32, i32, ...)* @llvm.experimental.stackmap(i32 3, i32 12, i64 %a, i64 %b)
+  ret void
+}
+
+; Cold OSR Exit
+;
+; 2 live variables in register.
+;
+; CHECK-NEXT:   .long  4
+; CHECK-NEXT:   .long   L{{.*}}-_osrcold
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long  0
+define void @osrcold(i64 %a, i64 %b) {
+entry:
+  %test = icmp slt i64 %a, %b
+  br i1 %test, label %ret, label %cold
+cold:
+  ; OSR patchpoint with 12-byte nop-slide and 2 live vars.
+  %thunk = inttoptr i64 -559038737 to i8*
+  call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 4, i32 15, i8* %thunk, i32 0, i64 %a, i64 %b)
+  unreachable
+ret:
+  ret void
+}
+
+; Property Read
+; CHECK-NEXT:  .long  5
+; CHECK-NEXT:   .long   L{{.*}}-_propertyRead
+; CHECK-NEXT:  .short  0
+; CHECK-NEXT:  .short  0
+;
+; FIXME: There are currently no stackmap entries. After moving to
+; AnyRegCC, we will have entries for the object and return value.
+define i64 @propertyRead(i64* %obj) {
+entry:
+  %resolveRead = inttoptr i64 -559038737 to i8*
+  %result = call i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 5, i32 15, i8* %resolveRead, i32 1, i64* %obj)
+  %add = add i64 %result, 3
+  ret i64 %add
+}
+
+; Property Write
+; CHECK-NEXT:  .long  6
+; CHECK-NEXT:   .long   L{{.*}}-_propertyWrite
+; CHECK-NEXT:  .short  0
+; CHECK-NEXT:  .short  0
+;
+; FIXME: There are currently no stackmap entries. After moving to
+; AnyRegCC, we will have entries for the object and return value.
+define void @propertyWrite(i64 %dummy1, i64* %obj, i64 %dummy2, i64 %a) {
+entry:
+  %resolveWrite = inttoptr i64 -559038737 to i8*
+  call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 6, i32 15, i8* %resolveWrite, i32 2, i64* %obj, i64 %a)
+  ret void
+}
+
+; Void JS Call
+;
+; 2 live variables in registers.
+;
+; CHECK-NEXT:   .long  7
+; CHECK-NEXT:   .long   L{{.*}}-_jsVoidCall
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+define void @jsVoidCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
+entry:
+  %resolveCall = inttoptr i64 -559038737 to i8*
+  call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 7, i32 15, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+  ret void
+}
+
+; i64 JS Call
+;
+; 2 live variables in registers.
+;
+; CHECK:        .long  8
+; CHECK-NEXT:   .long   L{{.*}}-_jsIntCall
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+define i64 @jsIntCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
+entry:
+  %resolveCall = inttoptr i64 -559038737 to i8*
+  %result = call i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 8, i32 15, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+  %add = add i64 %result, 3
+  ret i64 %add
+}
+
+; Spilled stack map values.
+;
+; Verify 17 stack map entries.
+;
+; CHECK:      .long 11
+; CHECK-NEXT: .long L{{.*}}-_spilledValue
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .short 17
+;
+; Check that at least one is a spilled entry from RBP.
+; Location: Indirect RBP + ...
+; CHECK:      .byte 3
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .short 6
+define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16) {
+entry:
+  call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 11, i32 15, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16)
+  ret void
+}
+
+; Spilled stack map values.
+;
+; Verify 17 stack map entries.
+;
+; CHECK:       .long 12
+; CHECK-LABEL: .long L{{.*}}-_spilledStackMapValue
+; CHECK-NEXT:  .short 0
+; CHECK-NEXT:  .short 17
+;
+; Check that at least one is a spilled entry from RBP.
+; Location: Indirect RBP + ...
+; CHECK:      .byte 3
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .short 6
+define webkit_jscc void @spilledStackMapValue(i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16) {
+entry:
+  call void (i32, i32, ...)* @llvm.experimental.stackmap(i32 12, i32 15, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16)
+  ret void
+}
+
+; Spill a subregister stackmap operand.
+;
+; CHECK:       .long 13
+; CHECK-LABEL: .long L{{.*}}-_spillSubReg
+; CHECK-NEXT:  .short 0
+; 4 locations
+; CHECK-NEXT:  .short 1
+;
+; Check that the subregister operand is a 4-byte spill.
+; Location: Indirect, 4-byte, RBP + ...
+; CHECK:      .byte 3
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .short 6
+define void @spillSubReg(i64 %arg) #0 {
+bb:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+  unreachable
+
+bb2:
+  %tmp = load i64* inttoptr (i64 140685446136880 to i64*)
+  br i1 undef, label %bb16, label %bb17
+
+bb16:
+  unreachable
+
+bb17:
+  %tmp32 = trunc i64 %tmp to i32
+  br i1 undef, label %bb60, label %bb61
+
+bb60:
+  tail call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind
+  tail call void (i32, i32, ...)* @llvm.experimental.stackmap(i32 13, i32 5, i32 %tmp32)
+  unreachable
+
+bb61:
+  unreachable
+}
+
+; Map a single byte subregister. There is no DWARF register number, so
+; we expect the register to be encoded with the proper size and spill offset. We don't know which
+;
+; CHECK:       .long 14
+; CHECK-LABEL: .long L{{.*}}-_subRegOffset
+; CHECK-NEXT:  .short 0
+; 2 locations
+; CHECK-NEXT:  .short 2
+;
+; Check that the subregister operands are 1-byte spills.
+; Location 0: Register, 4-byte, AL
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .long 0
+;
+; Location 1: Register, 4-byte, BL
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .short 3
+; CHECK-NEXT: .long 0
+define void @subRegOffset(i16 %arg) {
+  %v = mul i16 %arg, 5
+  %a0 = trunc i16 %v to i8
+  tail call void asm sideeffect "nop", "~{bx}"() nounwind
+  %arghi = lshr i16 %v, 8
+  %a1 = trunc i16 %arghi to i8
+  tail call void asm sideeffect "nop", "~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind
+  tail call void (i32, i32, ...)* @llvm.experimental.stackmap(i32 14, i32 5, i8 %a0, i8 %a1)
+  ret void
+}
+
+declare void @llvm.experimental.stackmap(i32, i32, ...)
+declare void @llvm.experimental.patchpoint.void(i32, i32, i8*, i32, ...)
+declare i64 @llvm.experimental.patchpoint.i64(i32, i32, i8*, i32, ...)
diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll
index fab266f..7557f25 100644
--- a/test/CodeGen/X86/store-narrow.ll
+++ b/test/CodeGen/X86/store-narrow.ll
@@ -12,7 +12,7 @@ entry:
   %D = or i32 %C, %B
   store i32 %D, i32* %a0, align 4
   ret void
-  
+
 ; X64-LABEL: test1:
 ; X64: movb	%sil, (%rdi)
 
@@ -34,8 +34,8 @@ entry:
 ; X64: movb	%sil, 1(%rdi)
 
 ; X32-LABEL: test2:
-; X32: movb	8(%esp), %al
-; X32: movb	%al, 1(%{{.*}})
+; X32: movb	8(%esp), %[[REG:[abcd]l]]
+; X32: movb	%[[REG]], 1(%{{.*}})
 }
 
 define void @test3(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
@@ -67,8 +67,8 @@ entry:
 ; X64: movw	%si, 2(%rdi)
 
 ; X32-LABEL: test4:
-; X32: movl	8(%esp), %eax
-; X32: movw	%ax, 2(%{{.*}})
+; X32: movl	8(%esp), %e[[REG:[abcd]x]]
+; X32: movw	%[[REG]], 2(%{{.*}})
 }
 
 define void @test5(i64* nocapture %a0, i16 zeroext %a1) nounwind ssp {
@@ -84,8 +84,8 @@ entry:
 ; X64: movw	%si, 2(%rdi)
 
 ; X32-LABEL: test5:
-; X32: movzwl	8(%esp), %eax
-; X32: movw	%ax, 2(%{{.*}})
+; X32: movzwl	8(%esp), %e[[REG:[abcd]x]]
+; X32: movw	%[[REG]], 2(%{{.*}})
 }
 
 define void @test6(i64* nocapture %a0, i8 zeroext %a1) nounwind ssp {
@@ -102,8 +102,8 @@ entry:
 
 
 ; X32-LABEL: test6:
-; X32: movb	8(%esp), %al
-; X32: movb	%al, 5(%{{.*}})
+; X32: movb	8(%esp), %[[REG:[abcd]l]]
+; X32: movb	%[[REG]], 5(%{{.*}})
 }
 
 define i32 @test7(i64* nocapture %a0, i8 zeroext %a1, i32* %P2) nounwind {
@@ -121,8 +121,8 @@ entry:
 
 
 ; X32-LABEL: test7:
-; X32: movb	8(%esp), %cl
-; X32: movb	%cl, 5(%{{.*}})
+; X32: movb	8(%esp), %[[REG:[abcd]l]]
+; X32: movb	%[[REG]], 5(%{{.*}})
 }
 
 ; PR7833
diff --git a/test/CodeGen/X86/tail-call-attrs.ll b/test/CodeGen/X86/tail-call-attrs.ll
new file mode 100644
index 0000000..17ebe99
--- /dev/null
+++ b/test/CodeGen/X86/tail-call-attrs.ll
@@ -0,0 +1,56 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -o - %s | FileCheck %s
+
+; Simple case: completely identical returns, even with extensions, shouldn't be
+; a barrier to tail calls.
+declare zeroext i1 @give_bool()
+define zeroext i1 @test_bool() {
+; CHECK-LABEL: test_bool:
+; CHECK: jmp
+  %call = tail call zeroext i1 @give_bool()
+  ret i1 %call
+}
+
+; Here, there's more zero extension to be done between the call and the return,
+; so a tail call is impossible (well, according to current Clang practice
+; anyway. The AMD64 ABI isn't crystal clear on the matter).
+declare zeroext i32 @give_i32()
+define zeroext i8 @test_i32() {
+; CHECK-LABEL: test_i32:
+; CHECK: callq _give_i32
+; CHECK: movzbl %al, %eax
+; CHECK: ret
+
+  %call = tail call zeroext i32 @give_i32()
+  %val = trunc i32 %call to i8
+  ret i8 %val
+}
+
+; Here, one function is zeroext and the other is signext. To the extent that
+; these both mean something they are incompatible so no tail call is possible.
+declare zeroext i16 @give_unsigned_i16()
+define signext i16 @test_incompatible_i16() {
+; CHECK-LABEL: test_incompatible_i16:
+; CHECK: callq _give_unsigned_i16
+; CHECK: cwtl
+; CHECK: ret
+
+  %call = tail call zeroext i16 @give_unsigned_i16()
+  ret i16 %call
+}
+
+declare inreg i32 @give_i32_inreg()
+define i32 @test_inreg_to_normal() {
+; CHECK-LABEL: test_inreg_to_normal:
+; CHECK: callq _give_i32_inreg
+; CHECK: ret
+  %val = tail call inreg i32 @give_i32_inreg()
+  ret i32 %val
+}
+
+define inreg i32 @test_normal_to_inreg() {
+; CHECK-LABEL: test_normal_to_inreg:
+; CHECK: callq _give_i32
+; CHECK: ret
+  %val = tail call i32 @give_i32()
+  ret i32 %val
+}
diff --git a/test/CodeGen/X86/tailcall-largecode.ll b/test/CodeGen/X86/tailcall-largecode.ll
index e9b8721..f5662d9 100644
--- a/test/CodeGen/X86/tailcall-largecode.ll
+++ b/test/CodeGen/X86/tailcall-largecode.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large -enable-misched=false | FileCheck %s
 
 declare fastcc i32 @callee(i32 %arg)
 define fastcc i32 @directcall(i32 %arg) {
diff --git a/test/CodeGen/X86/tbm-intrinsics-x86_64.ll b/test/CodeGen/X86/tbm-intrinsics-x86_64.ll
new file mode 100644
index 0000000..1bc6175
--- /dev/null
+++ b/test/CodeGen/X86/tbm-intrinsics-x86_64.ll
@@ -0,0 +1,43 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+tbm < %s | FileCheck %s
+
+define i32 @test_x86_tbm_bextri_u32(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_bextri_u32:
+  ; CHECK-NOT: mov
+  ; CHECK: bextr $
+  %0 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %a, i32 2814)
+  ret i32 %0
+}
+
+declare i32 @llvm.x86.tbm.bextri.u32(i32, i32) nounwind readnone
+
+define i32 @test_x86_tbm_bextri_u32_m(i32* nocapture %a) nounwind readonly {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_bextri_u32_m:
+  ; CHECK-NOT: mov
+  ; CHECK: bextr $
+  %tmp1 = load i32* %a, align 4
+  %0 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %tmp1, i32 2814)
+  ret i32 %0
+}
+
+define i64 @test_x86_tbm_bextri_u64(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_bextri_u64:
+  ; CHECK-NOT: mov
+  ; CHECK: bextr $
+  %0 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a, i64 2814)
+  ret i64 %0
+}
+
+declare i64 @llvm.x86.tbm.bextri.u64(i64, i64) nounwind readnone
+
+define i64 @test_x86_tbm_bextri_u64_m(i64* nocapture %a) nounwind readonly {
+entry:
+  ; CHECK-LABEl: test_x86_tbm_bextri_u64_m:
+  ; CHECK-NOT: mov
+  ; CHECK: bextr $
+  %tmp1 = load i64* %a, align 8
+  %0 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %tmp1, i64 2814)
+  ret i64 %0
+}
diff --git a/test/CodeGen/X86/tbm_patterns.ll b/test/CodeGen/X86/tbm_patterns.ll
new file mode 100644
index 0000000..79eea10
--- /dev/null
+++ b/test/CodeGen/X86/tbm_patterns.ll
@@ -0,0 +1,253 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+tbm < %s | FileCheck %s
+
+define i32 @test_x86_tbm_bextri_u32(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_bextri_u32:
+  ; CHECK-NOT: mov
+  ; CHECK: bextr $
+  %0 = lshr i32 %a, 4
+  %1 = and i32 %0, 4095
+  ret i32 %1
+}
+
+define i32 @test_x86_tbm_bextri_u32_m(i32* nocapture %a) nounwind readonly {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_bextri_u32_m:
+  ; CHECK-NOT: mov
+  ; CHECK: bextr $
+  %0 = load i32* %a
+  %1 = lshr i32 %0, 4
+  %2 = and i32 %1, 4095
+  ret i32 %2
+}
+
+define i64 @test_x86_tbm_bextri_u64(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_bextri_u64:
+  ; CHECK-NOT: mov
+  ; CHECK: bextr $
+  %0 = lshr i64 %a, 4
+  %1 = and i64 %0, 4095
+  ret i64 %1
+}
+
+define i64 @test_x86_tbm_bextri_u64_m(i64* nocapture %a) nounwind readonly {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_bextri_u64_m:
+  ; CHECK-NOT: mov
+  ; CHECK: bextr $
+  %0 = load i64* %a
+  %1 = lshr i64 %0, 4
+  %2 = and i64 %1, 4095
+  ret i64 %2
+}
+
+define i32 @test_x86_tbm_blcfill_u32(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blcfill_u32:
+  ; CHECK-NOT: mov
+  ; CHECK: blcfill %
+  %0 = add i32 %a, 1
+  %1 = and i32 %0, %a
+  ret i32 %1
+}
+
+define i64 @test_x86_tbm_blcfill_u64(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blcfill_u64:
+  ; CHECK-NOT: mov
+  ; CHECK: blcfill %
+  %0 = add i64 %a, 1
+  %1 = and i64 %0, %a
+  ret i64 %1
+}
+
+define i32 @test_x86_tbm_blci_u32(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blci_u32:
+  ; CHECK-NOT: mov
+  ; CHECK: blci %
+  %0 = add i32 1, %a
+  %1 = xor i32 %0, -1
+  %2 = or i32 %1, %a
+  ret i32 %2
+}
+
+define i64 @test_x86_tbm_blci_u64(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blci_u64:
+  ; CHECK-NOT: mov
+  ; CHECK: blci %
+  %0 = add i64 1, %a
+  %1 = xor i64 %0, -1
+  %2 = or i64 %1, %a
+  ret i64 %2
+}
+
+define i32 @test_x86_tbm_blci_u32_b(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blci_u32_b:
+  ; CHECK-NOT: mov
+  ; CHECK: blci %
+  %0 = sub i32 -2, %a
+  %1 = or i32 %0, %a
+  ret i32 %1
+}
+
+define i64 @test_x86_tbm_blci_u64_b(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blci_u64_b:
+  ; CHECK-NOT: mov
+  ; CHECK: blci %
+  %0 = sub i64 -2, %a
+  %1 = or i64 %0, %a
+  ret i64 %1
+}
+
+define i32 @test_x86_tbm_blcic_u32(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blcic_u32:
+  ; CHECK-NOT: mov
+  ; CHECK: blcic %
+  %0 = xor i32 %a, -1
+  %1 = add i32 %a, 1
+  %2 = and i32 %1, %0
+  ret i32 %2
+}
+
+define i64 @test_x86_tbm_blcic_u64(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blcic_u64:
+  ; CHECK-NOT: mov
+  ; CHECK: blcic %
+  %0 = xor i64 %a, -1
+  %1 = add i64 %a, 1
+  %2 = and i64 %1, %0
+  ret i64 %2
+}
+
+define i32 @test_x86_tbm_blcmsk_u32(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blcmsk_u32:
+  ; CHECK-NOT: mov
+  ; CHECK: blcmsk %
+  %0 = add i32 %a, 1
+  %1 = xor i32 %0, %a
+  ret i32 %1
+}
+
+define i64 @test_x86_tbm_blcmsk_u64(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blcmsk_u64:
+  ; CHECK-NOT: mov
+  ; CHECK: blcmsk %
+  %0 = add i64 %a, 1
+  %1 = xor i64 %0, %a
+  ret i64 %1
+}
+
+define i32 @test_x86_tbm_blcs_u32(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blcs_u32:
+  ; CHECK-NOT: mov
+  ; CHECK: blcs %
+  %0 = add i32 %a, 1
+  %1 = or i32 %0, %a
+  ret i32 %1
+}
+
+define i64 @test_x86_tbm_blcs_u64(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blcs_u64:
+  ; CHECK-NOT: mov
+  ; CHECK: blcs %
+  %0 = add i64 %a, 1
+  %1 = or i64 %0, %a
+  ret i64 %1
+}
+
+define i32 @test_x86_tbm_blsfill_u32(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blsfill_u32:
+  ; CHECK-NOT: mov
+  ; CHECK: blsfill %
+  %0 = add i32 %a, -1
+  %1 = or i32 %0, %a
+  ret i32 %1
+}
+
+define i64 @test_x86_tbm_blsfill_u64(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blsfill_u64:
+  ; CHECK-NOT: mov
+  ; CHECK: blsfill %
+  %0 = add i64 %a, -1
+  %1 = or i64 %0, %a
+  ret i64 %1
+}
+
+define i32 @test_x86_tbm_blsic_u32(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blsic_u32:
+  ; CHECK-NOT: mov
+  ; CHECK: blsic %
+  %0 = xor i32 %a, -1
+  %1 = add i32 %a, -1
+  %2 = or i32 %0, %1
+  ret i32 %2
+}
+
+define i64 @test_x86_tbm_blsic_u64(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blsic_u64:
+  ; CHECK-NOT: mov
+  ; CHECK: blsic %
+  %0 = xor i64 %a, -1
+  %1 = add i64 %a, -1
+  %2 = or i64 %0, %1
+  ret i64 %2
+}
+
+define i32 @test_x86_tbm_t1mskc_u32(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_t1mskc_u32:
+  ; CHECK-NOT: mov
+  ; CHECK: t1mskc %
+  %0 = xor i32 %a, -1
+  %1 = add i32 %a, 1
+  %2 = or i32 %0, %1
+  ret i32 %2
+}
+
+define i64 @Ttest_x86_tbm_t1mskc_u64(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_t1mskc_u64:
+  ; CHECK-NOT: mov
+  ; CHECK: t1mskc %
+  %0 = xor i64 %a, -1
+  %1 = add i64 %a, 1
+  %2 = or i64 %0, %1
+  ret i64 %2
+}
+
+define i32 @test_x86_tbm_tzmsk_u32(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_tzmsk_u32:
+  ; CHECK-NOT: mov
+  ; CHECK: tzmsk %
+  %0 = xor i32 %a, -1
+  %1 = add i32 %a, -1
+  %2 = and i32 %0, %1
+  ret i32 %2
+}
+
+define i64 @test_x86_tbm_tzmsk_u64(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_tzmsk_u64:
+  ; CHECK-NOT: mov
+  ; CHECK: tzmsk %
+  %0 = xor i64 %a, -1
+  %1 = add i64 %a, -1
+  %2 = and i64 %0, %1
+  ret i64 %2
+}
diff --git a/test/CodeGen/X86/test-nofold.ll b/test/CodeGen/X86/test-nofold.ll
index 97db1b3..19fbaaf 100644
--- a/test/CodeGen/X86/test-nofold.ll
+++ b/test/CodeGen/X86/test-nofold.ll
@@ -2,10 +2,10 @@
 ; rdar://5752025
 
 ; We want:
-;      CHECK: movl	$42, %ecx
-; CHECK-NEXT: movl	4(%esp), %eax
-; CHECK-NEXT: andl	$15, %eax
-; CHECK-NEXT: cmovnel	%ecx, %eax
+;      CHECK: movl	4(%esp), %ecx
+; CHECK-NEXT: andl	$15, %ecx
+; CHECK-NEXT: movl	$42, %eax
+; CHECK-NEXT: cmovel	%ecx, %eax
 ; CHECK-NEXT: ret
 ;
 ; We don't want:
@@ -39,4 +39,3 @@ entry:
 	%retval = select i1 %tmp4, i32 %tmp2, i32 42		; <i32> [#uses=1]
 	ret i32 %retval
 }
-
diff --git a/test/CodeGen/X86/tls.ll b/test/CodeGen/X86/tls.ll
index 24284e5..76a8402 100644
--- a/test/CodeGen/X86/tls.ll
+++ b/test/CodeGen/X86/tls.ll
@@ -223,27 +223,22 @@ entry:
 define i16 @f11() {
 ; X32_LINUX-LABEL: f11:
 ; X32_LINUX:      movzwl %gs:s1@NTPOFF, %eax
-; Why is this kill line here, but no where else?
-; X32_LINUX-NEXT: # kill
-; X32_LINUX-NEXT: ret
+; X32_LINUX:      ret
 ; X64_LINUX-LABEL: f11:
 ; X64_LINUX:      movzwl %fs:s1@TPOFF, %eax
-; X64_LINUX-NEXT: # kill
-; X64_LINUX-NEXT: ret
+; X64_LINUX:      ret
 ; X32_WIN-LABEL: f11:
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
 ; X32_WIN-NEXT: movzwl _s1@SECREL32(%eax), %eax
-; X32_WIN-NEXT: # kill
-; X32_WIN-NEXT: ret
+; X32_WIN:      ret
 ; X64_WIN-LABEL: f11:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
 ; X64_WIN-NEXT: movzwl s1@SECREL32(%rax), %eax
-; X64_WIN-NEXT: # kill
-; X64_WIN-NEXT: ret
+; X64_WIN:      ret
 
 entry:
 	%tmp1 = load i16* @s1
diff --git a/test/CodeGen/X86/tlv-3.ll b/test/CodeGen/X86/tlv-3.ll
new file mode 100644
index 0000000..4f79305
--- /dev/null
+++ b/test/CodeGen/X86/tlv-3.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple x86_64-apple-darwin | FileCheck %s
+; PR17964
+
+; CHECK: __DATA,__thread_data,thread_local_regular
+; CHECK: _foo$tlv$init
+@foo = weak_odr thread_local global i8 1, align 4
+
+define i32 @main() {
+    ret i32 0
+}
diff --git a/test/CodeGen/X86/trunc-ext-ld-st.ll b/test/CodeGen/X86/trunc-ext-ld-st.ll
index 408bdc8..d230f1f 100644
--- a/test/CodeGen/X86/trunc-ext-ld-st.ll
+++ b/test/CodeGen/X86/trunc-ext-ld-st.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
 
 ;CHECK-LABEL: load_2_i8:
 ; A single 16-bit load
diff --git a/test/CodeGen/X86/trunc-to-bool.ll b/test/CodeGen/X86/trunc-to-bool.ll
index 3711cf1..0ed6347 100644
--- a/test/CodeGen/X86/trunc-to-bool.ll
+++ b/test/CodeGen/X86/trunc-to-bool.ll
@@ -22,7 +22,7 @@ ret_false:
     ret i1 false
 }
 ; CHECK-LABEL: test2:
-; CHECK: btl %eax
+; CHECK: btl
 
 define i32 @test3(i8* %ptr) nounwind {
     %val = load i8* %ptr
diff --git a/test/CodeGen/X86/unaligned-spill-folding.ll b/test/CodeGen/X86/unaligned-spill-folding.ll
new file mode 100644
index 0000000..154ce9e
--- /dev/null
+++ b/test/CodeGen/X86/unaligned-spill-folding.ll
@@ -0,0 +1,49 @@
+; RUN: llc -mtriple=i386-unknown-freebsd -mcpu=core2 -stack-alignment=4 -relocation-model=pic < %s | FileCheck %s -check-prefix=UNALIGNED
+; RUN: llc -mtriple=i386-unknown-freebsd -mcpu=core2 -stack-alignment=16 -relocation-model=pic < %s | FileCheck %s -check-prefix=ALIGNED
+; RUN: llc -mtriple=i386-unknown-freebsd -mcpu=core2 -stack-alignment=4 -force-align-stack -relocation-model=pic < %s | FileCheck %s -check-prefix=FORCEALIGNED
+
+@arr = internal unnamed_addr global [32 x i32] zeroinitializer, align 16
+
+; PR12250
+define i32 @test1() {
+vector.ph:
+  br label %vector.body
+
+vector.body:
+  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds [32 x i32]* @arr, i32 0, i32 %index
+  %1 = bitcast i32* %0 to <4 x i32>*
+  %wide.load = load <4 x i32>* %1, align 16
+  %2 = add nsw <4 x i32> %wide.load, <i32 10, i32 10, i32 10, i32 10>
+  %3 = xor <4 x i32> %2, <i32 123345, i32 123345, i32 123345, i32 123345>
+  %4 = add nsw <4 x i32> %3, <i32 112, i32 112, i32 112, i32 112>
+  %5 = xor <4 x i32> %4, <i32 543345, i32 543345, i32 543345, i32 543345>
+  %6 = add nsw <4 x i32> %5, <i32 73, i32 73, i32 73, i32 73>
+  %7 = xor <4 x i32> %6, <i32 345987, i32 345987, i32 345987, i32 345987>
+  %8 = add nsw <4 x i32> %7, <i32 48, i32 48, i32 48, i32 48>
+  %9 = xor <4 x i32> %8, <i32 123987, i32 123987, i32 123987, i32 123987>
+  store <4 x i32> %9, <4 x i32>* %1, align 16
+  %index.next = add i32 %index, 4
+  %10 = icmp eq i32 %index.next, 32
+  br i1 %10, label %middle.block, label %vector.body
+
+middle.block:
+  ret i32 0
+
+; We can't fold the spill into a padd unless the stack is aligned. Just spilling
+; doesn't force stack realignment though
+; UNALIGNED-LABEL: @test1
+; UNALIGNED-NOT: andl $-{{..}}, %esp
+; UNALIGNED: movdqu {{.*}} # 16-byte Folded Spill
+; UNALIGNED-NOT: paddd {{.*}} # 16-byte Folded Reload
+
+; ALIGNED-LABEL: @test1
+; ALIGNED-NOT: andl $-{{..}}, %esp
+; ALIGNED: movdqa {{.*}} # 16-byte Spill
+; ALIGNED: paddd {{.*}} # 16-byte Folded Reload
+
+; FORCEALIGNED-LABEL: @test1
+; FORCEALIGNED: andl $-{{..}}, %esp
+; FORCEALIGNED: movdqa {{.*}} # 16-byte Spill
+; FORCEALIGNED: paddd {{.*}} # 16-byte Folded Reload
+}
diff --git a/test/CodeGen/X86/unknown-location.ll b/test/CodeGen/X86/unknown-location.ll
index 2422de9..d7ae469 100644
--- a/test/CodeGen/X86/unknown-location.ll
+++ b/test/CodeGen/X86/unknown-location.ll
@@ -19,12 +19,13 @@ entry:
 }
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!12}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !2, i32 1, metadata !6} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 786478, metadata !10, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", i32 1, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, i32, i32, i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786449, metadata !10, i32 12, metadata !"producer", i1 false, metadata !"", i32 0, metadata !11, metadata !11, metadata !9, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !10, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 786453, metadata !10, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6}
 !6 = metadata !{i32 786468, metadata !10, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 786443, metadata !2, metadata !1, i32 1, i32 30, i32 0} ; [ DW_TAG_lexical_block ]
@@ -32,3 +33,4 @@ entry:
 !9 = metadata !{metadata !1}
 !10 = metadata !{metadata !"test.c", metadata !"/dir"}
 !11 = metadata !{i32 0}
+!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/v-binop-widen.ll b/test/CodeGen/X86/v-binop-widen.ll
index 8655c6c..fca4da6 100644
--- a/test/CodeGen/X86/v-binop-widen.ll
+++ b/test/CodeGen/X86/v-binop-widen.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mcpu=generic -march=x86 -mattr=+sse < %s | FileCheck %s
-; CHECK: divss
 ; CHECK: divps
 ; CHECK: divps
+; CHECK: divss
 
 %vec = type <9 x float>
 define %vec @vecdiv( %vec %p1, %vec %p2)
@@ -9,4 +9,3 @@ define %vec @vecdiv( %vec %p1, %vec %p2)
   %result = fdiv %vec %p1, %p2
   ret %vec %result
 }
-
diff --git a/test/CodeGen/X86/v-binop-widen2.ll b/test/CodeGen/X86/v-binop-widen2.ll
index 569586a..3342111 100644
--- a/test/CodeGen/X86/v-binop-widen2.ll
+++ b/test/CodeGen/X86/v-binop-widen2.ll
@@ -2,9 +2,9 @@
 ; RUN: llc -march=x86 -mcpu=atom -mattr=+sse < %s | FileCheck -check-prefix=ATOM %s
 
 %vec = type <6 x float>
+; CHECK: divps
 ; CHECK: divss
 ; CHECK: divss
-; CHECK: divps
 
 ; Scheduler causes a different instruction order to be produced on Intel Atom
 ; ATOM: divps
diff --git a/test/CodeGen/X86/v4i32load-crash.ll b/test/CodeGen/X86/v4i32load-crash.ll
new file mode 100644
index 0000000..052c4c3
--- /dev/null
+++ b/test/CodeGen/X86/v4i32load-crash.ll
@@ -0,0 +1,27 @@
+; RUN: llc --mcpu=x86-64 --mattr=ssse3 < %s
+
+;PR18045:
+;Issue of selection for 'v4i32 load'.
+;This instruction is not legal for X86 CPUs with sse < 'sse4.1'.
+;This node was generated by X86ISelLowering.cpp, EltsFromConsecutiveLoads
+;static function after legilize stage.
+
+@e = external global [4 x i32], align 4
+@f = external global [4 x i32], align 4
+
+; Function Attrs: nounwind
+define void @fn3(i32 %el) {
+entry:
+  %0 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 0)
+  %1 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 1)
+  %2 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 2)
+  %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 3)
+  %4 = insertelement <4 x i32> undef, i32 %0, i32 0
+  %5 = insertelement <4 x i32> %4, i32 %1, i32 1
+  %6 = insertelement <4 x i32> %5, i32 %2, i32 2
+  %7 = insertelement <4 x i32> %6, i32 %3, i32 3
+  %8 = add <4 x i32> %6, %7
+  store <4 x i32> %8, <4 x i32>* bitcast ([4 x i32]* @f to <4 x i32>*)
+  ret void
+}
+
diff --git a/test/CodeGen/X86/vec_compare-sse4.ll b/test/CodeGen/X86/vec_compare-sse4.ll
index a08d9f5..084d611 100644
--- a/test/CodeGen/X86/vec_compare-sse4.ll
+++ b/test/CodeGen/X86/vec_compare-sse4.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86 -mattr=-sse3,+sse2 | FileCheck %s -check-prefix=SSE2
-; RUN: llc < %s -march=x86 -mattr=-sse42,+sse41 | FileCheck %s -check-prefix=SSE41
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s -check-prefix=SSE42
+; RUN: llc < %s -march=x86 -mattr=-sse4.2,+sse4.1 | FileCheck %s -check-prefix=SSE41
+; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s -check-prefix=SSE42
 
 define <2 x i64> @test1(<2 x i64> %A, <2 x i64> %B) nounwind {
 ; SSE42-LABEL: test1:
diff --git a/test/CodeGen/X86/vec_extract-sse4.ll b/test/CodeGen/X86/vec_extract-sse4.ll
index 42d7f27..3cb519a 100644
--- a/test/CodeGen/X86/vec_extract-sse4.ll
+++ b/test/CodeGen/X86/vec_extract-sse4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse41 -o %t
+; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse4.1 -o %t
 ; RUN: not grep extractps   %t
 ; RUN: not grep pextrd      %t
 ; RUN: not grep pshufd  %t
diff --git a/test/CodeGen/X86/vec_extract.ll b/test/CodeGen/X86/vec_extract.ll
index 2c8796b..88f5a58 100644
--- a/test/CodeGen/X86/vec_extract.ll
+++ b/test/CodeGen/X86/vec_extract.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse2,-sse41 -o %t
+; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse2,-sse4.1 -o %t
 ; RUN: grep movss    %t | count 4
 ; RUN: grep movhlps  %t | count 1
 ; RUN: not grep pshufd   %t 
diff --git a/test/CodeGen/X86/vec_fpext.ll b/test/CodeGen/X86/vec_fpext.ll
index 863712f..7ec07ae 100644
--- a/test/CodeGen/X86/vec_fpext.ll
+++ b/test/CodeGen/X86/vec_fpext.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse41,-avx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.1,-avx | FileCheck %s
 ; RUN: llc < %s -march=x86 -mcpu=corei7-avx | FileCheck --check-prefix=AVX %s
 
 ; PR11674
diff --git a/test/CodeGen/X86/vec_insert-2.ll b/test/CodeGen/X86/vec_insert-2.ll
index bfac1ba..fe20a47 100644
--- a/test/CodeGen/X86/vec_insert-2.ll
+++ b/test/CodeGen/X86/vec_insert-2.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | FileCheck --check-prefix=X32 %s
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | FileCheck --check-prefix=X64 %s
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse4.1 | FileCheck --check-prefix=X32 %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse4.1 | FileCheck --check-prefix=X64 %s
 
 define <4 x float> @t1(float %s, <4 x float> %tmp) nounwind {
 ; X32-LABEL: t1:
diff --git a/test/CodeGen/X86/vec_insert-3.ll b/test/CodeGen/X86/vec_insert-3.ll
index a18cd864..a871339 100644
--- a/test/CodeGen/X86/vec_insert-3.ll
+++ b/test/CodeGen/X86/vec_insert-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | grep punpcklqdq | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse4.1 | grep punpcklqdq | count 1
 
 define <2 x i64> @t1(i64 %s, <2 x i64> %tmp) nounwind {
         %tmp1 = insertelement <2 x i64> %tmp, i64 %s, i32 1
diff --git a/test/CodeGen/X86/vec_insert-7.ll b/test/CodeGen/X86/vec_insert-7.ll
index 268b5c4..6d4f828 100644
--- a/test/CodeGen/X86/vec_insert-7.ll
+++ b/test/CodeGen/X86/vec_insert-7.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx,+sse42 -mtriple=i686-apple-darwin9 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse4.2 -mtriple=i686-apple-darwin9 | FileCheck %s
 ; MMX insertelement is not available; these are promoted to XMM.
 ; (Without SSE they are split to two ints, and the code is much better.)
 
diff --git a/test/CodeGen/X86/vec_insert-8.ll b/test/CodeGen/X86/vec_insert-8.ll
index 650951c..917832c 100644
--- a/test/CodeGen/X86/vec_insert-8.ll
+++ b/test/CodeGen/X86/vec_insert-8.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
+; RUN: llc < %s -march=x86 -mattr=+sse4.1 -o %t
 
 ; tests variable insert and extract of a 4 x i32
 
diff --git a/test/CodeGen/X86/vec_insert-9.ll b/test/CodeGen/X86/vec_insert-9.ll
index e5a7ccc..5f2e676 100644
--- a/test/CodeGen/X86/vec_insert-9.ll
+++ b/test/CodeGen/X86/vec_insert-9.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse41 > %t
+; RUN: llc < %s -march=x86 -mattr=+sse4.1 > %t
 ; RUN: grep pinsrd %t | count 1
 
 define <4 x i32> @var_insert2(<4 x i32> %x, i32 %val, i32 %idx) nounwind  {
diff --git a/test/CodeGen/X86/vec_insert.ll b/test/CodeGen/X86/vec_insert.ll
index 4e5d445..0ed8f10 100644
--- a/test/CodeGen/X86/vec_insert.ll
+++ b/test/CodeGen/X86/vec_insert.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movss | count 1
-; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | not grep pinsrw
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse4.1 | grep movss | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse4.1 | not grep pinsrw
 
 define void @test(<4 x float>* %F, i32 %I) nounwind {
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_round.ll b/test/CodeGen/X86/vec_round.ll
new file mode 100644
index 0000000..baa2f58
--- /dev/null
+++ b/test/CodeGen/X86/vec_round.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mcpu=nehalem -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @use(<2 x double>)
+
+; CHECK-LABEL: @test
+; CHECK callq round
+
+; Function Attrs: nounwind uwtable
+define void @test() {
+entry:
+  %tmp = call <2 x double> @llvm.round.v2f64(<2 x double> undef)
+  call void @use(<2 x double> %tmp)
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare <2 x double> @llvm.round.v2f64(<2 x double>) #0
+
+attributes #0 = { nounwind readonly }
+
diff --git a/test/CodeGen/X86/vec_set-8.ll b/test/CodeGen/X86/vec_set-8.ll
index 66056d0..41061ae 100644
--- a/test/CodeGen/X86/vec_set-8.ll
+++ b/test/CodeGen/X86/vec_set-8.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=-avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=-avx | FileCheck %s
 ; CHECK-NOT: movsd
 ; CHECK: movd {{%rdi|%rcx}}, %xmm0
 ; CHECK-NOT: movsd
diff --git a/test/CodeGen/X86/vec_set-9.ll b/test/CodeGen/X86/vec_set-9.ll
index 6979f6b..a739090 100644
--- a/test/CodeGen/X86/vec_set-9.ll
+++ b/test/CodeGen/X86/vec_set-9.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=-avx,-pad-short-functions | FileCheck %s
 
 ; CHECK: test3
 ; CHECK: movd
diff --git a/test/CodeGen/X86/vec_set-C.ll b/test/CodeGen/X86/vec_set-C.ll
index 133f23b..052da30 100644
--- a/test/CodeGen/X86/vec_set-C.ll
+++ b/test/CodeGen/X86/vec_set-C.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+sse2 | grep movq
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+sse2 | grep mov | count 1
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-linux -mattr=+sse2 | grep movd
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+sse2,-avx | grep movq
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+sse2,-avx | grep mov | count 1
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-linux -mattr=+sse2,-avx | grep movd
 
 define <2 x i64> @t1(i64 %x) nounwind  {
 	%tmp8 = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
diff --git a/test/CodeGen/X86/vec_set.ll b/test/CodeGen/X86/vec_set.ll
index 7f5f8dd..53d880b 100644
--- a/test/CodeGen/X86/vec_set.ll
+++ b/test/CodeGen/X86/vec_set.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep punpckl | count 7
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse4.1 | grep punpckl | count 7
 
 define void @test(<8 x i16>* %b, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
         %tmp = insertelement <8 x i16> zeroinitializer, i16 %a0, i32 0          ; <<8 x i16>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_setcc.ll b/test/CodeGen/X86/vec_setcc.ll
index bcfd4d3..fc8a56d 100644
--- a/test/CodeGen/X86/vec_setcc.ll
+++ b/test/CodeGen/X86/vec_setcc.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse2 | FileCheck %s -check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse41 | FileCheck %s -check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse4.1 | FileCheck %s -check-prefix=SSE41
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx | FileCheck %s -check-prefix=AVX
 
 define <16 x i8> @v16i8_icmp_uge(<16 x i8> %a, <16 x i8> %b) nounwind readnone ssp uwtable {
@@ -124,3 +124,64 @@ define <4 x i32> @v4i32_icmp_ule(<4 x i32> %a, <4 x i32> %b) nounwind readnone s
 ; AVX: pcmpeqd %xmm1, %xmm0, %xmm0
 }
 
+; At one point we were incorrectly constant-folding a setcc to 0x1 instead of
+; 0xff, leading to a constpool load. The instruction doesn't matter here, but it
+; should set all bits to 1.
+define <16 x i8> @test_setcc_constfold_vi8(<16 x i8> %l, <16 x i8> %r) {
+  %test1 = icmp eq <16 x i8> %l, %r
+  %mask1 = sext <16 x i1> %test1 to <16 x i8>
+
+  %test2 = icmp ne <16 x i8> %l, %r
+  %mask2 = sext <16 x i1> %test2 to <16 x i8>
+
+  %res = or <16 x i8> %mask1, %mask2
+  ret <16 x i8> %res
+; SSE2-LABEL: test_setcc_constfold_vi8:
+; SSE2: pcmpeqd %xmm0, %xmm0
+
+; SSE41-LABEL: test_setcc_constfold_vi8:
+; SSE41: pcmpeqd %xmm0, %xmm0
+
+; AVX-LABEL: test_setcc_constfold_vi8:
+; AVX: vpcmpeqd %xmm0, %xmm0, %xmm0
+}
+
+; Make sure sensible results come from doing extension afterwards
+define <16 x i8> @test_setcc_constfold_vi1(<16 x i8> %l, <16 x i8> %r) {
+  %test1 = icmp eq <16 x i8> %l, %r
+  %test2 = icmp ne <16 x i8> %l, %r
+
+  %res = or <16 x i1> %test1, %test2
+  %mask = sext <16 x i1> %res to <16 x i8>
+  ret <16 x i8> %mask
+; SSE2-LABEL: test_setcc_constfold_vi1:
+; SSE2: pcmpeqd %xmm0, %xmm0
+
+; SSE41-LABEL: test_setcc_constfold_vi1:
+; SSE41: pcmpeqd %xmm0, %xmm0
+
+; AVX-LABEL: test_setcc_constfold_vi1:
+; AVX: vpcmpeqd %xmm0, %xmm0, %xmm0
+}
+
+
+; 64-bit case is also particularly important, as the constant "-1" is probably
+; just 32-bits wide.
+define <2 x i64> @test_setcc_constfold_vi64(<2 x i64> %l, <2 x i64> %r) {
+  %test1 = icmp eq <2 x i64> %l, %r
+  %mask1 = sext <2 x i1> %test1 to <2 x i64>
+
+  %test2 = icmp ne <2 x i64> %l, %r
+  %mask2 = sext <2 x i1> %test2 to <2 x i64>
+
+  %res = or <2 x i64> %mask1, %mask2
+  ret <2 x i64> %res
+; SSE2-LABEL: test_setcc_constfold_vi64:
+; SSE2: pcmpeqd %xmm0, %xmm0
+
+; SSE41-LABEL: test_setcc_constfold_vi64:
+; SSE41: pcmpeqd %xmm0, %xmm0
+
+; AVX-LABEL: test_setcc_constfold_vi64:
+; AVX: vpcmpeqd %xmm0, %xmm0, %xmm0
+}
diff --git a/test/CodeGen/X86/vec_shift4.ll b/test/CodeGen/X86/vec_shift4.ll
index 9ef7fbd..e2fe45c 100644
--- a/test/CodeGen/X86/vec_shift4.ll
+++ b/test/CodeGen/X86/vec_shift4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.1 | FileCheck %s
 
 define <2 x i64> @shl1(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-14.ll b/test/CodeGen/X86/vec_shuffle-14.ll
index 95e9a18..8f25197 100644
--- a/test/CodeGen/X86/vec_shuffle-14.ll
+++ b/test/CodeGen/X86/vec_shuffle-14.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s -check-prefix=X86-32
-; RUN: llc < %s -march=x86-64 -mattr=+sse2 | FileCheck %s -check-prefix=X86-64
+; RUN: llc < %s -march=x86 -mattr=+sse2,-avx | FileCheck %s -check-prefix=X86-32
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-avx | FileCheck %s -check-prefix=X86-64
 
 define <4 x i32> @t1(i32 %a) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-17.ll b/test/CodeGen/X86/vec_shuffle-17.ll
index ebc8c5b..f2f96ba 100644
--- a/test/CodeGen/X86/vec_shuffle-17.ll
+++ b/test/CodeGen/X86/vec_shuffle-17.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=-avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=-avx | FileCheck %s
 ; CHECK-NOT: xor
 ; CHECK: movd {{%rdi|%rcx}}, %xmm0
 ; CHECK-NOT: xor
diff --git a/test/CodeGen/X86/vec_shuffle-25.ll b/test/CodeGen/X86/vec_shuffle-25.ll
index d9b2388..3f42a13 100644
--- a/test/CodeGen/X86/vec_shuffle-25.ll
+++ b/test/CodeGen/X86/vec_shuffle-25.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
+; RUN: llc < %s -march=x86 -mattr=sse4.1 -o %t
 ; RUN: grep unpcklps %t | count 3
 ; RUN: grep unpckhps %t | count 1
  
diff --git a/test/CodeGen/X86/vec_shuffle-26.ll b/test/CodeGen/X86/vec_shuffle-26.ll
index 4c56f84..00e8e73 100644
--- a/test/CodeGen/X86/vec_shuffle-26.ll
+++ b/test/CodeGen/X86/vec_shuffle-26.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=generic -mattr=sse41 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=generic -mattr=sse4.1 | FileCheck %s
 ; RUN: llc < %s -march=x86 -mcpu=atom | FileCheck -check-prefix=ATOM %s
 
 ; Transpose example using the more generic vector shuffle. Return float8
diff --git a/test/CodeGen/X86/vec_shuffle-27.ll b/test/CodeGen/X86/vec_shuffle-27.ll
index 0aff822..c9b2fb5 100644
--- a/test/CodeGen/X86/vec_shuffle-27.ll
+++ b/test/CodeGen/X86/vec_shuffle-27.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=sse41 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=sse4.1 | FileCheck %s
 
 ; ModuleID = 'vec_shuffle-27.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
@@ -7,10 +7,10 @@ target triple = "i686-apple-cl.1.0"
 define <8 x float> @my2filter4_1d(<4 x float> %a, <8 x float> %T0, <8 x float> %T1) nounwind readnone {
 entry:
 ; CHECK: subps
-; CHECK: mulps
-; CHECK: addps
 ; CHECK: subps
 ; CHECK: mulps
+; CHECK: mulps
+; CHECK: addps
 ; CHECK: addps
 	%tmp7 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3 >		; <<8 x float>> [#uses=1]
 	%sub = fsub <8 x float> %T1, %T0		; <<8 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_shuffle-36.ll b/test/CodeGen/X86/vec_shuffle-36.ll
index 9a06015..f1d0f93 100644
--- a/test/CodeGen/X86/vec_shuffle-36.ll
+++ b/test/CodeGen/X86/vec_shuffle-36.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=penryn -mattr=sse41 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=penryn -mattr=sse4.1 | FileCheck %s
 
 define <8 x i16> @shuf6(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
 ; CHECK: pshufb
diff --git a/test/CodeGen/X86/vec_shuffle-39.ll b/test/CodeGen/X86/vec_shuffle-39.ll
index 1560454..8fd9a5c 100644
--- a/test/CodeGen/X86/vec_shuffle-39.ll
+++ b/test/CodeGen/X86/vec_shuffle-39.ll
@@ -54,8 +54,8 @@ entry:
 define <2 x double> @t3() nounwind readonly {
 bb:
 ; CHECK-LABEL: t3:
-; CHECK: punpcklqdq %xmm1, %xmm0
 ; CHECK: movq (%rax), %xmm1
+; CHECK: punpcklqdq %xmm2, %xmm0
 ; CHECK: movsd %xmm1, %xmm0
   %tmp0 = load i128* null, align 1
   %tmp1 = load <2 x i32>* undef, align 8
@@ -72,9 +72,9 @@ bb:
 define <2 x i64> @t4() nounwind readonly {
 bb:
 ; CHECK-LABEL: t4:
-; CHECK: punpcklqdq %xmm0, %xmm1
 ; CHECK: movq (%rax), %xmm0
-; CHECK: movsd %xmm1, %xmm0
+; CHECK: punpcklqdq %{{xmm.}}, %[[XMM:xmm[0-9]]]
+; CHECK: movsd %[[XMM]], %xmm0
   %tmp0 = load i128* null, align 1
   %tmp1 = load <2 x i32>* undef, align 8
   %tmp2 = bitcast i128 %tmp0 to <16 x i8>
diff --git a/test/CodeGen/X86/vec_splat-3.ll b/test/CodeGen/X86/vec_splat-3.ll
index 60e3005..754cbf4 100644
--- a/test/CodeGen/X86/vec_splat-3.ll
+++ b/test/CodeGen/X86/vec_splat-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc <%s -march=x86 -mcpu=penryn -mattr=sse41 | FileCheck %s
+; RUN: llc <%s -march=x86 -mcpu=penryn -mattr=sse4.1 | FileCheck %s
 
 ; Splat test for v8i16
 define <8 x i16> @shuf_8i16_0(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
diff --git a/test/CodeGen/X86/vec_split.ll b/test/CodeGen/X86/vec_split.ll
new file mode 100644
index 0000000..f9e7c20
--- /dev/null
+++ b/test/CodeGen/X86/vec_split.ll
@@ -0,0 +1,42 @@
+; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4
+; RUN: llc -march=x86-64 -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
+; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
+
+define <16 x i16> @split16(<16 x i16> %a, <16 x i16> %b, <16 x i8> %__mask) {
+; SSE4-LABEL: split16:
+; SSE4: pminuw
+; SSE4: pminuw
+; SSE4: ret
+; AVX1-LABEL: split16:
+; AVX1: vpminuw
+; AVX1: vpminuw
+; AVX1: ret
+; AVX2-LABEL: split16:
+; AVX2: vpminuw
+; AVX2: ret
+  %1 = icmp ult <16 x i16> %a, %b
+  %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
+  ret <16 x i16> %2
+}
+
+define <32 x i16> @split32(<32 x i16> %a, <32 x i16> %b, <32 x i8> %__mask) {
+; SSE4-LABEL: split32:
+; SSE4: pminuw
+; SSE4: pminuw
+; SSE4: pminuw
+; SSE4: pminuw
+; SSE4: ret
+; AVX1-LABEL: split32:
+; AVX1: vpminuw
+; AVX1: vpminuw
+; AVX1: vpminuw
+; AVX1: vpminuw
+; AVX1: ret
+; AVX2-LABEL: split32:
+; AVX2: vpminuw
+; AVX2: vpminuw
+; AVX2: ret
+  %1 = icmp ult <32 x i16> %a, %b
+  %2 = select <32 x i1> %1, <32 x i16> %a, <32 x i16> %b
+  ret <32 x i16> %2
+}
diff --git a/test/CodeGen/X86/vec_ss_load_fold.ll b/test/CodeGen/X86/vec_ss_load_fold.ll
index 2eb911f..80f12a2 100644
--- a/test/CodeGen/X86/vec_ss_load_fold.ll
+++ b/test/CodeGen/X86/vec_ss_load_fold.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse,+sse2,+sse41 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse,+sse2,+sse4.1 | FileCheck %s
 
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin8.7.2"
diff --git a/test/CodeGen/X86/vector-variable-idx2.ll b/test/CodeGen/X86/vector-variable-idx2.ll
index d47df90..6e8ae2e 100644
--- a/test/CodeGen/X86/vector-variable-idx2.ll
+++ b/test/CodeGen/X86/vector-variable-idx2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse41
+; RUN: llc < %s -march=x86-64 -mattr=+sse4.1
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin11.0.0"
diff --git a/test/CodeGen/X86/vsplit-and.ll b/test/CodeGen/X86/vsplit-and.ll
index 3b7fdff..c16b294 100644
--- a/test/CodeGen/X86/vsplit-and.ll
+++ b/test/CodeGen/X86/vsplit-and.ll
@@ -14,7 +14,7 @@ define void @t0(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind read
 
 define void @t2(<3 x i64>* %dst, <3 x i64> %src1, <3 x i64> %src2) nounwind readonly {
 ; CHECK: t2
-; CHECK-NOT: pand
+; CHECK: pand
 ; CHECK: ret
   %cmp1 = icmp ne <3 x i64> %src1, zeroinitializer
   %cmp2 = icmp ne <3 x i64> %src2, zeroinitializer
diff --git a/test/CodeGen/X86/weak_def_can_be_hidden.ll b/test/CodeGen/X86/weak_def_can_be_hidden.ll
new file mode 100644
index 0000000..f78f357
--- /dev/null
+++ b/test/CodeGen/X86/weak_def_can_be_hidden.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mtriple=x86_64-apple-darwin  -O0 < %s | FileCheck %s
+
+@v1 = linkonce_odr global i32 32
+; CHECK: .globl  _v1
+; CHECK: .weak_def_can_be_hidden _v1
+
+define i32 @f1() {
+  %x = load i32 * @v1
+  ret i32 %x
+}
+
+@v2 = linkonce_odr global i32 32
+; CHECK: .globl  _v2
+; CHECK: .weak_definition _v2
+
+@v3 = linkonce_odr unnamed_addr global i32 32
+; CHECK: .globl  _v3
+; CHECK: .weak_def_can_be_hidden _v3
+
+define i32* @f2() {
+  ret i32* @v2
+}
+
+define i32* @f3() {
+  ret i32* @v3
+}
diff --git a/test/CodeGen/X86/widen_arith-1.ll b/test/CodeGen/X86/widen_arith-1.ll
index 661cde8..6041356 100644
--- a/test/CodeGen/X86/widen_arith-1.ll
+++ b/test/CodeGen/X86/widen_arith-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse42 |  FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse4.2 |  FileCheck %s
 
 define void @update(<3 x i8>* %dst, <3 x i8>* %src, i32 %n) nounwind {
 entry:
diff --git a/test/CodeGen/X86/widen_arith-2.ll b/test/CodeGen/X86/widen_arith-2.ll
index d35abc3..1b81e9f 100644
--- a/test/CodeGen/X86/widen_arith-2.ll
+++ b/test/CodeGen/X86/widen_arith-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
 ; CHECK: padd
 ; CHECK: pand
 
diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll
index d86042a..d2b8e6e 100644
--- a/test/CodeGen/X86/widen_arith-3.ll
+++ b/test/CodeGen/X86/widen_arith-3.ll
@@ -1,7 +1,5 @@
-; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s
-; CHECK: incl
-; CHECK: incl
-; CHECK: incl
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse4.2 -post-RA-scheduler=true | FileCheck %s
+; CHECK: paddd
 
 ; Widen a v3i16 to v8i16 to do a vector add
 
diff --git a/test/CodeGen/X86/widen_arith-4.ll b/test/CodeGen/X86/widen_arith-4.ll
index 63c8d0e..5207e1f 100644
--- a/test/CodeGen/X86/widen_arith-4.ll
+++ b/test/CodeGen/X86/widen_arith-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse4.2 | FileCheck %s
 ; CHECK: psubw
 ; CHECK-NEXT: pmullw
 
diff --git a/test/CodeGen/X86/widen_arith-5.ll b/test/CodeGen/X86/widen_arith-5.ll
index 41df0e4..70b6a8a 100644
--- a/test/CodeGen/X86/widen_arith-5.ll
+++ b/test/CodeGen/X86/widen_arith-5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse42  | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse4.2  | FileCheck %s
 ; CHECK: movdqa
 ; CHECK: pslld $2
 ; CHECK: psubd
diff --git a/test/CodeGen/X86/widen_arith-6.ll b/test/CodeGen/X86/widen_arith-6.ll
index b983d14..329048a 100644
--- a/test/CodeGen/X86/widen_arith-6.ll
+++ b/test/CodeGen/X86/widen_arith-6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
 ; CHECK: mulps
 ; CHECK: addps
 
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll
index 56c6364..d115929 100644
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=x86 -mcpu=generic -mattr=+sse42 < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=generic -mattr=+sse4.2 < %s | FileCheck %s
 ; RUN: llc -march=x86 -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s
 
-; CHECK: paddd
 ; CHECK: movl
+; CHECK: paddd
 ; CHECK: movlpd
 
 ; Scheduler causes produce a different instruction order
diff --git a/test/CodeGen/X86/widen_cast-2.ll b/test/CodeGen/X86/widen_cast-2.ll
index 3979ce4..40b42fb 100644
--- a/test/CodeGen/X86/widen_cast-2.ll
+++ b/test/CodeGen/X86/widen_cast-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=nehalem -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=nehalem -mattr=+sse4.2 | FileCheck %s
 ; CHECK: pextrd
 ; CHECK: pextrd
 ; CHECK: movd
diff --git a/test/CodeGen/X86/widen_cast-3.ll b/test/CodeGen/X86/widen_cast-3.ll
index 87486d9..40a8dc5 100644
--- a/test/CodeGen/X86/widen_cast-3.ll
+++ b/test/CodeGen/X86/widen_cast-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
 ; CHECK: paddd
 ; CHECK: pextrd
 ; CHECK: pextrd
diff --git a/test/CodeGen/X86/widen_cast-4.ll b/test/CodeGen/X86/widen_cast-4.ll
index 5ea5426..1bc06a7 100644
--- a/test/CodeGen/X86/widen_cast-4.ll
+++ b/test/CodeGen/X86/widen_cast-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
 ; CHECK: psraw
 ; CHECK: psraw
 
diff --git a/test/CodeGen/X86/widen_cast-5.ll b/test/CodeGen/X86/widen_cast-5.ll
index 9086d3a..ccf0bd1 100644
--- a/test/CodeGen/X86/widen_cast-5.ll
+++ b/test/CodeGen/X86/widen_cast-5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
 ; CHECK: movl
 ; CHECK: movlpd
 
diff --git a/test/CodeGen/X86/widen_cast-6.ll b/test/CodeGen/X86/widen_cast-6.ll
index 3903234..7c06ad8 100644
--- a/test/CodeGen/X86/widen_cast-6.ll
+++ b/test/CodeGen/X86/widen_cast-6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.1 | FileCheck %s
 ; CHECK: movd
 
 ; Test bit convert that requires widening in the operand.
diff --git a/test/CodeGen/X86/widen_conv-1.ll b/test/CodeGen/X86/widen_conv-1.ll
index 51f1c88..9f6778c 100644
--- a/test/CodeGen/X86/widen_conv-1.ll
+++ b/test/CodeGen/X86/widen_conv-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
 ; CHECK: paddq
 
 ; truncate v2i64 to v2i32
diff --git a/test/CodeGen/X86/widen_conv-2.ll b/test/CodeGen/X86/widen_conv-2.ll
index db8fa93..906f7cd 100644
--- a/test/CodeGen/X86/widen_conv-2.ll
+++ b/test/CodeGen/X86/widen_conv-2.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
-; CHECK: cwtl
-; CHECK: cwtl
+; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
+; CHECK: {{cwtl|movswl}}
+; CHECK: {{cwtl|movswl}}
 
 ; sign extension v2i32 to v2i16
 
diff --git a/test/CodeGen/X86/widen_conv-3.ll b/test/CodeGen/X86/widen_conv-3.ll
index a25fae9..a2f3d7b 100644
--- a/test/CodeGen/X86/widen_conv-3.ll
+++ b/test/CodeGen/X86/widen_conv-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
 ; CHECK: cvtsi2ss
 
 ; sign to float v2i16 to v2f32
diff --git a/test/CodeGen/X86/widen_conv-4.ll b/test/CodeGen/X86/widen_conv-4.ll
index 1158e04..f633592 100644
--- a/test/CodeGen/X86/widen_conv-4.ll
+++ b/test/CodeGen/X86/widen_conv-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=nehalem -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=nehalem -mattr=+sse4.2 | FileCheck %s
 ; CHECK-NOT: cvtsi2ss
 
 ; unsigned to float v7i16 to v7f32
diff --git a/test/CodeGen/X86/widen_extract-1.ll b/test/CodeGen/X86/widen_extract-1.ll
index c4fe43a..6832de1 100644
--- a/test/CodeGen/X86/widen_extract-1.ll
+++ b/test/CodeGen/X86/widen_extract-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=nehalem -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=nehalem -mattr=+sse4.2 | FileCheck %s
 ; widen extract subvector
 
 define void @convert(<2 x double>* %dst.addr, <3 x double> %src)  {
diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll
index f0f94e4..26815a4 100644
--- a/test/CodeGen/X86/widen_load-2.ll
+++ b/test/CodeGen/X86/widen_load-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+sse4.2 | FileCheck %s
 
 ; Test based on pr5626 to load/store
 ;
@@ -73,9 +73,7 @@ define void @add12i32(%i32vec12*  sret %ret, %i32vec12* %ap, %i32vec12* %bp)  {
 ; CHECK: add3i16
 %i16vec3 = type <3 x i16>
 define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind {
-; CHECK: addl
-; CHECK: addl
-; CHECK: addl
+; CHECK: paddd
 ; CHECK: ret
 	%a = load %i16vec3* %ap, align 16
 	%b = load %i16vec3* %bp, align 16
@@ -135,9 +133,7 @@ define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18*
 ; CHECK: add3i8
 %i8vec3 = type <3 x i8>
 define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind {
-; CHECK: addb
-; CHECK: addb
-; CHECK: addb
+; CHECK: paddd
 ; CHECK: ret
 	%a = load %i8vec3* %ap, align 16
 	%b = load %i8vec3* %bp, align 16
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index c7d2044..803402b 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
 
 ; widening shuffle v3float and then a add
 define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
diff --git a/test/CodeGen/X86/win64_alloca_dynalloca.ll b/test/CodeGen/X86/win64_alloca_dynalloca.ll
index 275ebf9..aff5305 100644
--- a/test/CodeGen/X86/win64_alloca_dynalloca.ll
+++ b/test/CodeGen/X86/win64_alloca_dynalloca.ll
@@ -1,10 +1,13 @@
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32     | FileCheck %s -check-prefix=M64
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32       | FileCheck %s -check-prefix=W64
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
+; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-mingw32     | FileCheck %s -check-prefix=M64
+; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-win32       | FileCheck %s -check-prefix=W64
+; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
 ; PR8777
 ; PR8778
 
-define i64 @foo(i64 %n, i64 %x) nounwind {
+define i64 @unaligned(i64 %n, i64 %x) nounwind {
+; M64-LABEL: unaligned:
+; W64-LABEL: unaligned:
+; EFI-LABEL: unaligned:
 entry:
 
   %buf0 = alloca i8, i64 4096, align 1
@@ -49,18 +52,18 @@ entry:
   %r = call i64 @bar(i64 %n, i64 %x, i64 %n, i8* %buf0, i8* %buf1) nounwind
 
 ; M64: subq  $48, %rsp
-; M64: leaq  -4096(%rbp), %r9
 ; M64: movq  %rax, 32(%rsp)
+; M64: leaq  -4096(%rbp), %r9
 ; M64: callq bar
 
 ; W64: subq  $48, %rsp
-; W64: leaq  -4096(%rbp), %r9
 ; W64: movq  %rax, 32(%rsp)
+; W64: leaq  -4096(%rbp), %r9
 ; W64: callq bar
 
 ; EFI: subq  $48, %rsp
-; EFI: leaq  -[[B0OFS]](%rbp), %r9
 ; EFI: movq  [[R64]], 32(%rsp)
+; EFI: leaq  -[[B0OFS]](%rbp), %r9
 ; EFI: callq _bar
 
   ret i64 %r
@@ -71,4 +74,51 @@ entry:
 
 }
 
+define i64 @aligned(i64 %n, i64 %x) nounwind {
+; M64-LABEL: aligned:
+; W64-LABEL: aligned:
+; EFI-LABEL: aligned:
+entry:
+
+  %buf1 = alloca i8, i64 %n, align 128
+
+; M64: leaq  15(%{{.*}}), %rax
+; M64: andq  $-16, %rax
+; M64: callq ___chkstk
+; M64: movq  %rsp, [[R2:%r.*]]
+; M64: andq  $-128, [[R2]]
+; M64: movq  [[R2]], %rsp
+
+; W64: leaq  15(%{{.*}}), %rax
+; W64: andq  $-16, %rax
+; W64: callq __chkstk
+; W64: subq  %rax, %rsp
+; W64: movq  %rsp, [[R2:%r.*]]
+; W64: andq  $-128, [[R2]]
+; W64: movq  [[R2]], %rsp
+
+; EFI: leaq  15(%{{.*}}), [[R1:%r.*]]
+; EFI: andq  $-16, [[R1]]
+; EFI: movq  %rsp, [[R64:%r.*]]
+; EFI: subq  [[R1]], [[R64]]
+; EFI: andq  $-128, [[R64]]
+; EFI: movq  [[R64]], %rsp
+
+  %r = call i64 @bar(i64 %n, i64 %x, i64 %n, i8* undef, i8* %buf1) nounwind
+
+; M64: subq  $48, %rsp
+; M64: movq  [[R2]], 32(%rsp)
+; M64: callq bar
+
+; W64: subq  $48, %rsp
+; W64: movq  [[R2]], 32(%rsp)
+; W64: callq bar
+
+; EFI: subq  $48, %rsp
+; EFI: movq  [[R64]], 32(%rsp)
+; EFI: callq _bar
+
+  ret i64 %r
+}
+
 declare i64 @bar(i64, i64, i64, i8* nocapture, i8* nocapture) nounwind
diff --git a/test/CodeGen/X86/x86-64-pic-10.ll b/test/CodeGen/X86/x86-64-pic-10.ll
index 3ec172b..da8082b 100644
--- a/test/CodeGen/X86/x86-64-pic-10.ll
+++ b/test/CodeGen/X86/x86-64-pic-10.ll
@@ -9,4 +9,6 @@ entry:
         ret void
 }
 
-declare extern_weak i32 @f()
+define weak i32 @f() {
+  ret i32 42
+}
diff --git a/test/CodeGen/X86/x86-64-psub.ll b/test/CodeGen/X86/x86-64-psub.ll
index be09a4f..183ddf4 100644
--- a/test/CodeGen/X86/x86-64-psub.ll
+++ b/test/CodeGen/X86/x86-64-psub.ll
@@ -4,8 +4,8 @@
 ; This test checks that the operands of packed sub instructions are
 ; never interchanged by the "Two-Address instruction pass".
 
-declare { i64, double } @getFirstParam() 
-declare { i64, double } @getSecondParam() 
+declare { i64, double } @getFirstParam()
+declare { i64, double } @getSecondParam()
 
 define i64 @test_psubb() {
 entry:
@@ -28,9 +28,10 @@ entry:
 
 ; CHECK-LABEL: test_psubb:
 ; CHECK:   callq getFirstParam
+; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
 ; CHECK:   callq getSecondParam
+; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
-; CHECK:   movq (%rsp), [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   psubb [[PARAM2]], [[PARAM1]]
 ; CHECK: ret
 
@@ -55,9 +56,10 @@ entry:
 
 ; CHECK-LABEL: test_psubw:
 ; CHECK:   callq getFirstParam
+; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
 ; CHECK:   callq getSecondParam
+; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
-; CHECK:   movq (%rsp), [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   psubw [[PARAM2]], [[PARAM1]]
 ; CHECK: ret
 
@@ -83,9 +85,10 @@ entry:
 
 ; CHECK-LABEL: test_psubd:
 ; CHECK:   callq getFirstParam
+; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
 ; CHECK:   callq getSecondParam
+; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
-; CHECK:   movq (%rsp), [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   psubd [[PARAM2]], [[PARAM1]]
 ; CHECK: ret
 
@@ -110,9 +113,10 @@ entry:
 
 ; CHECK-LABEL: test_psubsb:
 ; CHECK:   callq getFirstParam
+; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
 ; CHECK:   callq getSecondParam
+; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
-; CHECK:   movq (%rsp), [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   psubsb [[PARAM2]], [[PARAM1]]
 ; CHECK: ret
 
@@ -137,9 +141,10 @@ entry:
 
 ; CHECK-LABEL: test_psubswv:
 ; CHECK:   callq getFirstParam
+; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
 ; CHECK:   callq getSecondParam
+; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
-; CHECK:   movq (%rsp), [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   psubsw [[PARAM2]], [[PARAM1]]
 ; CHECK: ret
 
@@ -164,9 +169,10 @@ entry:
 
 ; CHECK-LABEL: test_psubusbv:
 ; CHECK:   callq getFirstParam
+; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
 ; CHECK:   callq getSecondParam
+; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
-; CHECK:   movq (%rsp), [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   psubusb [[PARAM2]], [[PARAM1]]
 ; CHECK: ret
 
@@ -191,9 +197,10 @@ entry:
 
 ; CHECK-LABEL: test_psubuswv:
 ; CHECK:   callq getFirstParam
+; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
 ; CHECK:   callq getSecondParam
+; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
-; CHECK:   movq (%rsp), [[PARAM1:%[a-z0-9]+]]
 ; CHECK:   psubusw [[PARAM2]], [[PARAM1]]
 ; CHECK: ret
 
diff --git a/test/CodeGen/X86/x86-64-tls-1.ll b/test/CodeGen/X86/x86-64-tls-1.ll
index 8d3b300..641786f 100644
--- a/test/CodeGen/X86/x86-64-tls-1.ll
+++ b/test/CodeGen/X86/x86-64-tls-1.ll
@@ -1,6 +1,10 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 @tm_nest_level = internal thread_local global i32 0
 define i64 @z() nounwind {
-; CHECK: movabsq    $tm_nest_level@TPOFF, %rcx
+; FIXME: The codegen here is primitive at best and could be much better.
+; The add and the moves can be folded together.
+; CHECK-DAG: movq    $tm_nest_level@TPOFF, %rcx
+; CHECK-DAG: movq    %fs:0, %rax
+; CHECK: addl    %ecx, %eax
   ret i64 and (i64 ptrtoint (i32* @tm_nest_level to i64), i64 100)
 }
diff --git a/test/CodeGen/X86/x86-shifts.ll b/test/CodeGen/X86/x86-shifts.ll
index af57e5c..2f3adb8 100644
--- a/test/CodeGen/X86/x86-shifts.ll
+++ b/test/CodeGen/X86/x86-shifts.ll
@@ -6,8 +6,8 @@
 define <4 x i32> @shl4(<4 x i32> %A) nounwind {
 entry:
 ; CHECK:      shl4
-; CHECK:      padd
 ; CHECK:      pslld
+; CHECK:      padd
 ; CHECK:      ret
   %B = shl <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
   %C = shl <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
@@ -67,8 +67,8 @@ entry:
 define <8 x i16> @shl8(<8 x i16> %A) nounwind {
 entry:
 ; CHECK:      shl8
-; CHECK:      padd
 ; CHECK:      psllw
+; CHECK:      padd
 ; CHECK:      ret
   %B = shl <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
   %C = shl <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll
index b56ce0f..fd8e1b4 100644
--- a/test/CodeGen/X86/xor.ll
+++ b/test/CodeGen/X86/xor.ll
@@ -165,3 +165,19 @@ define <4 x i32> @test10(<4 x i32> %a) nounwind {
 ; X32-LABEL: test10:
 ; X32:    andnps
 }
+
+define i32 @PR17487(i1 %tobool) {
+  %tmp = insertelement <2 x i1> undef, i1 %tobool, i32 1
+  %tmp1 = zext <2 x i1> %tmp to <2 x i64>
+  %tmp2 = xor <2 x i64> %tmp1, <i64 1, i64 1>
+  %tmp3 = extractelement <2 x i64> %tmp2, i32 1
+  %add = add nsw i64 0, %tmp3
+  %cmp6 = icmp ne i64 %add, 1
+  %conv7 = zext i1 %cmp6 to i32
+  ret i32 %conv7
+
+; X64-LABEL: PR17487:
+; X64: andn
+; X32-LABEL: PR17487:
+; X32: andn
+}
diff --git a/test/CodeGen/X86/zext-fold.ll b/test/CodeGen/X86/zext-fold.ll
index ff93c68..a10923f 100644
--- a/test/CodeGen/X86/zext-fold.ll
+++ b/test/CodeGen/X86/zext-fold.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 -enable-misched=false | FileCheck %s
 
 ;; Simple case
 define i32 @test1(i8 %x) nounwind readnone {
@@ -10,7 +10,7 @@ define i32 @test1(i8 %x) nounwind readnone {
 ; CHECK: movzbl
 ; CHECK-NEXT: andl {{.*}}224
 
-;; Multiple uses of %x but easily extensible. 
+;; Multiple uses of %x but easily extensible.
 define i32 @test2(i8 %x) nounwind readnone {
   %A = and i8 %x, -32
   %B = zext i8 %A to i32
@@ -21,8 +21,8 @@ define i32 @test2(i8 %x) nounwind readnone {
 }
 ; CHECK: test2
 ; CHECK: movzbl
-; CHECK: orl $63
 ; CHECK: andl $224
+; CHECK: orl $63
 
 declare void @use(i32, i8)
 
diff --git a/test/CodeGen/X86/zext-sext.ll b/test/CodeGen/X86/zext-sext.ll
index 25dabbe..5b2713d 100644
--- a/test/CodeGen/X86/zext-sext.ll
+++ b/test/CodeGen/X86/zext-sext.ll
@@ -34,10 +34,10 @@ entry:
   %tmp12 = add i64 %tmp11, 5089792279245435153
 
 ; CHECK:      addl	$2138875574, %e[[REGISTER_zext:[a-z0-9]+]]
-; CHECK-NEXT: cmpl	$-8608074, %e[[REGISTER_zext]]
-; CHECK:      movslq	%e[[REGISTER_zext]], [[REGISTER_tmp:%r[a-z0-9]+]]
-; CHECK:      movq	[[REGISTER_tmp]], [[REGISTER_sext:%r[a-z0-9]+]]
+; CHECK:      movslq	%e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]]
+; CHECK:      cmpl	$-8608074, %e[[REGISTER_zext]]
 ; CHECK-NOT:  [[REGISTER_zext]]
+; CHECK-DAG:  testl     %e[[REGISTER_zext]]
 ; CHECK:      subq	%r[[REGISTER_zext]], [[REGISTER_sext]]
 
   %tmp13 = sub i64 %tmp12, 2138875574
diff --git a/test/CodeGen/XCore/aliases.ll b/test/CodeGen/XCore/aliases.ll
index d4da63c..b7ad416 100644
--- a/test/CodeGen/XCore/aliases.ll
+++ b/test/CodeGen/XCore/aliases.ll
@@ -1,7 +1,9 @@
 ; RUN: llc < %s -march=xcore | FileCheck %s
-declare void @a_val() nounwind
-@b_val = external constant i32, section ".cp.rodata"
-@c_val = external global i32
+define void @a_val() nounwind {
+  ret void
+}
+@b_val = constant i32 42, section ".cp.rodata"
+@c_val = global i32 42
 
 @a = alias void ()* @a_val
 @b = alias i32* @b_val
diff --git a/test/CodeGen/XCore/alignment.ll b/test/CodeGen/XCore/alignment.ll
new file mode 100644
index 0000000..28bdf3b
--- /dev/null
+++ b/test/CodeGen/XCore/alignment.ll
@@ -0,0 +1,9 @@
+; RUN: not llc < %s -march=xcore 2>&1 | FileCheck %s
+
+; CHECK: emitPrologue unsupported alignment: 8
+define void @f() nounwind {
+entry:
+  %BadAlignment = alloca i64, align 8
+  ret void
+}
+
diff --git a/test/CodeGen/XCore/ashr.ll b/test/CodeGen/XCore/ashr.ll
index 2752f52..78cb144 100644
--- a/test/CodeGen/XCore/ashr.ll
+++ b/test/CodeGen/XCore/ashr.ll
@@ -1,26 +1,26 @@
 ; RUN: llc < %s -march=xcore -asm-verbose=0 | FileCheck %s
-define i32 @ashr(i32 %a, i32 %b) {
+define i32 @ashr(i32 %a, i32 %b) nounwind {
 	%1 = ashr i32 %a, %b
 	ret i32 %1
 }
 ; CHECK-LABEL: ashr:
 ; CHECK-NEXT: ashr r0, r0, r1
 
-define i32 @ashri1(i32 %a) {
+define i32 @ashri1(i32 %a) nounwind {
 	%1 = ashr i32 %a, 24
 	ret i32 %1
 }
 ; CHECK-LABEL: ashri1:
 ; CHECK-NEXT: ashr r0, r0, 24
 
-define i32 @ashri2(i32 %a) {
+define i32 @ashri2(i32 %a) nounwind {
 	%1 = ashr i32 %a, 31
 	ret i32 %1
 }
 ; CHECK-LABEL: ashri2:
 ; CHECK-NEXT: ashr r0, r0, 32
 
-define i32 @f1(i32 %a) {
+define i32 @f1(i32 %a) nounwind nounwind {
         %1 = icmp slt i32 %a, 0
 	br i1 %1, label %less, label %not_less
 less:
@@ -32,7 +32,7 @@ not_less:
 ; CHECK-NEXT: ashr r0, r0, 32
 ; CHECK-NEXT: bt r0
 
-define i32 @f2(i32 %a) {
+define i32 @f2(i32 %a) nounwind {
         %1 = icmp sge i32 %a, 0
 	br i1 %1, label %greater, label %not_greater
 greater:
@@ -44,7 +44,7 @@ not_greater:
 ; CHECK-NEXT: ashr r0, r0, 32
 ; CHECK-NEXT: bt r0
 
-define i32 @f3(i32 %a) {
+define i32 @f3(i32 %a) nounwind {
         %1 = icmp slt i32 %a, 0
 	%2 = select i1 %1, i32 10, i32 17
 	ret i32 %2
@@ -55,7 +55,7 @@ define i32 @f3(i32 %a) {
 ; CHECK-NEXT: ldc r0, 17
 ; CHECK: ldc r0, 10
 
-define i32 @f4(i32 %a) {
+define i32 @f4(i32 %a) nounwind {
         %1 = icmp sge i32 %a, 0
 	%2 = select i1 %1, i32 10, i32 17
 	ret i32 %2
@@ -66,7 +66,7 @@ define i32 @f4(i32 %a) {
 ; CHECK-NEXT: ldc r0, 10
 ; CHECK: ldc r0, 17
 
-define i32 @f5(i32 %a) {
+define i32 @f5(i32 %a) nounwind {
         %1 = icmp sge i32 %a, 0
 	%2 = zext i1 %1 to i32
 	ret i32 %2
diff --git a/test/CodeGen/XCore/atomic.ll b/test/CodeGen/XCore/atomic.ll
new file mode 100644
index 0000000..95fca9a
--- /dev/null
+++ b/test/CodeGen/XCore/atomic.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+; CHECK-LABEL: atomic_fence
+; CHECK: #MEMBARRIER
+; CHECK: #MEMBARRIER
+; CHECK: #MEMBARRIER
+; CHECK: #MEMBARRIER
+; CHECK: retsp 0
+define void @atomic_fence() nounwind {
+entry:
+  fence acquire
+  fence release
+  fence acq_rel
+  fence seq_cst
+  ret void
+}
diff --git a/test/CodeGen/XCore/byVal.ll b/test/CodeGen/XCore/byVal.ll
index a5d25d2..e9612fd 100644
--- a/test/CodeGen/XCore/byVal.ll
+++ b/test/CodeGen/XCore/byVal.ll
@@ -56,3 +56,18 @@ entry:
   call void @f2(i32 %i, %struct.st2* %s2)
   ret void
 }
+
+; CHECK-LABEL: f3Test
+; CHECK: entsp 2
+; CHECK: ldc r1, 0
+; CHECK: ld8u r2, r0[r1]
+; CHECK: ldaw r0, sp[1]
+; CHECK: st8 r2, r0[r1]
+; CHECK: bl f
+; CHECK: retsp 2
+declare void @f3(i8*) nounwind
+define void @f3Test(i8* byval %v) nounwind {
+entry:
+  call void @f3(i8* %v) nounwind
+  ret void
+}
diff --git a/test/CodeGen/XCore/exception.ll b/test/CodeGen/XCore/exception.ll
new file mode 100644
index 0000000..8018cdc
--- /dev/null
+++ b/test/CodeGen/XCore/exception.ll
@@ -0,0 +1,129 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+declare void @g()
+declare i32 @__gxx_personality_v0(...)
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+declare i8* @__cxa_begin_catch(i8*)
+declare void @__cxa_end_catch()
+declare i8* @__cxa_allocate_exception(i32)
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+@_ZTIi = external constant i8*
+@_ZTId = external constant i8*
+
+; CHECK-LABEL: fn_typeid:
+; CHECK: .cfi_startproc
+; CHECK: mkmsk r0, 1
+; CHECK: retsp 0
+; CHECK: .cfi_endproc
+define i32 @fn_typeid() {
+entry:
+  %0 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind
+  ret i32 %0
+}
+
+; CHECK-LABEL: fn_throw
+; CHECK: .cfi_startproc
+; CHECK: entsp 1
+; CHECK: .cfi_def_cfa_offset 4
+; CHECK: .cfi_offset 15, 0
+; CHECK: ldc r0, 4
+; CHECK: bl __cxa_allocate_exception
+; CHECK: ldaw r11, cp[_ZTIi]
+; CHECK: ldc r2, 0
+; CHECK: mov r1, r11
+; CHECK: bl __cxa_throw
+define void @fn_throw() {
+entry:
+  %0 = call i8* @__cxa_allocate_exception(i32 4) nounwind
+  call void @__cxa_throw(i8* %0, i8* bitcast (i8** @_ZTIi to i8*), i8* null) noreturn
+  unreachable
+}
+
+; CHECK-LABEL: fn_catch
+; CHECK: .cfi_startproc
+; CHECK: .cfi_personality 0, __gxx_personality_v0
+; CHECK: [[START:.L[a-zA-Z0-9_]+]]
+; CHECK: .cfi_lsda 0, [[LSDA:.L[a-zA-Z0-9_]+]]
+; CHECK: entsp 4
+; CHECK: .cfi_def_cfa_offset 16
+; CHECK: .cfi_offset 15, 0
+define void @fn_catch() {
+entry:
+
+; N.B. we alloc no variables, hence force compiler to spill
+; CHECK: stw r4, sp[3]
+; CHECK: .cfi_offset 4, -4
+; CHECK: stw r5, sp[2]
+; CHECK: .cfi_offset 5, -8
+; CHECK: stw r6, sp[1]
+; CHECK: .cfi_offset 6, -12
+; CHECK: [[PRE_G:.L[a-zA-Z0-9_]+]]
+; CHECK: bl g
+; CHECK: [[POST_G:.L[a-zA-Z0-9_]+]]
+; CHECK: [[RETURN:.L[a-zA-Z0-9_]+]]
+; CHECK: ldw r6, sp[1]
+; CHECK: ldw r5, sp[2]
+; CHECK: ldw r4, sp[3]
+; CHECK: retsp 4
+  invoke void @g() to label %cont unwind label %lpad
+cont:
+  ret void
+
+; CHECK: {{.L[a-zA-Z0-9_]+}}
+; CHECK: [[LANDING:.L[a-zA-Z0-9_]+]]
+; CHECK: mov r5, r1
+; CHECK: mov r4, r0
+; CHECK: bl __cxa_begin_catch
+; CHECK: ldw r6, r0[0]
+; CHECK: bl __cxa_end_catch
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+          catch i8* bitcast (i8** @_ZTId to i8*)
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = extractvalue { i8*, i32 } %0, 1
+  %3 = call i8* @__cxa_begin_catch(i8* %1) nounwind
+  %4 = bitcast i8* %3 to i32*
+  %5 = load i32* %4
+  call void @__cxa_end_catch() nounwind
+
+; CHECK: eq r0, r6, r5
+; CHECK: bf r0, [[RETURN]]
+; CHECK: mov r0, r4
+; CHECK: bl _Unwind_Resume
+; CHECK: .cfi_endproc
+; CHECK: [[END:.L[a-zA-Z0-9_]+]]
+  %6 = icmp eq i32 %5, %2
+  br i1 %6, label %Resume, label %Exit
+Resume:
+  resume { i8*, i32 } %0
+Exit:
+  ret void
+}
+
+; CHECK: [[LSDA]]:
+; CHECK: .byte  255
+; CHECK: .byte  0
+; CHECK: .asciiz
+; CHECK: .byte  3
+; CHECK: .byte  26
+; CHECK: [[SET0:.L[a-zA-Z0-9_]+]] = [[PRE_G]]-[[START]]
+; CHECK: .long [[SET0]]
+; CHECK: [[SET1:.L[a-zA-Z0-9_]+]] = [[POST_G]]-[[PRE_G]]
+; CHECK: .long [[SET1]]
+; CHECK: [[SET2:.L[a-zA-Z0-9_]+]] = [[LANDING]]-[[START]]
+; CHECK: .long [[SET2]]
+; CHECK: .byte 3
+; CHECK: [[SET3:.L[a-zA-Z0-9_]+]] = [[POST_G]]-[[START]]
+; CHECK: .long [[SET3]]
+; CHECK: [[SET4:.L[a-zA-Z0-9_]+]] = [[END]]-[[POST_G]]
+; CHECK: .long [[SET4]]
+; CHECK: .long 0
+; CHECK: .byte 0
+; CHECK: .byte 1
+; CHECK: .byte 0
+; CHECK: .byte 2
+; CHECK: .byte 125
+; CHECK: .long _ZTIi
+; CHECK: .long _ZTId
diff --git a/test/CodeGen/XCore/globals.ll b/test/CodeGen/XCore/globals.ll
index b140587..b3a872b 100644
--- a/test/CodeGen/XCore/globals.ll
+++ b/test/CodeGen/XCore/globals.ll
@@ -93,4 +93,4 @@ entry:
 
 @array = global [10 x i16] zeroinitializer, align 2
 ; CHECK: .globl  array.globound
-; CHECK: .set  array.globound,10
+; CHECK:  array.globound = 10
diff --git a/test/CodeGen/XCore/linkage.ll b/test/CodeGen/XCore/linkage.ll
new file mode 100644
index 0000000..7a1179b
--- /dev/null
+++ b/test/CodeGen/XCore/linkage.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+; CHECK: .weak fd
+define weak void @fd() {
+  call void @fr(i32* @gd, i32* @gr)
+  ret void
+}
+
+; CHECK-NOT: .hidden test_hidden
+declare hidden void @test_hidden_declaration()
+define hidden void @test_hidden() {
+  call void @test_hidden_declaration()
+  unreachable
+}
+
+; CHECK-NOT: .protected
+define protected void @test_protected() {
+  unreachable
+}
+
+; CHECK: .globl array.globound
+; CHECK: array.globound = 2
+; CHECK: .weak array.globound
+; CHECK: .globl array
+; CHECK: .weak array
+@array = weak global [2 x i32] zeroinitializer
+
+; CHECK: .weak gd
+@gd = weak global i32 0
+
+; CHECK-NOT: .hidden test_hidden_declaration
+
+; CHECK: .weak gr
+@gr = extern_weak global i32
+
+; CHECK: .weak fr
+declare extern_weak void @fr(i32*, i32*)
+
diff --git a/test/CodeGen/XCore/lit.local.cfg b/test/CodeGen/XCore/lit.local.cfg
index 8756f37..3e84c1b 100644
--- a/test/CodeGen/XCore/lit.local.cfg
+++ b/test/CodeGen/XCore/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.test']
-
 targets = set(config.root.targets_to_build.split())
 if not 'XCore' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/XCore/shedulingPreference.ll b/test/CodeGen/XCore/shedulingPreference.ll
new file mode 100644
index 0000000..6c2ac6d
--- /dev/null
+++ b/test/CodeGen/XCore/shedulingPreference.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=xcore
+
+define void @f( ) {
+entry:
+
+  switch i32 undef, label %default [
+    i32 0, label %start
+  ]
+
+start:
+  br label %end
+
+default:
+  %arg = fadd double undef, undef
+  %res = call double @f2(i32 undef, double %arg, double undef)
+  br label %end
+
+end:
+  %unused = phi double [ %res, %default ], [ undef, %start ]
+
+  unreachable
+}
+
+declare double @f2(i32, double, double)
+
diff --git a/test/CodeGen/XCore/threads.ll b/test/CodeGen/XCore/threads.ll
index 5840e77..c50da1d 100644
--- a/test/CodeGen/XCore/threads.ll
+++ b/test/CodeGen/XCore/threads.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march=xcore < %s | FileCheck %s
+; RUN: llc -march=xcore -O=0 < %s | FileCheck %s -check-prefix=PHINODE
 
 declare i8 addrspace(1)* @llvm.xcore.getst.p1i8.p1i8(i8 addrspace(1)* %r)
 declare void @llvm.xcore.msync.p1i8(i8 addrspace(1)* %r)
@@ -13,55 +14,132 @@ declare void @llvm.xcore.initdp.p1i8(i8 addrspace(1)* %r, i8* %value)
 define i8 addrspace(1)* @test_getst(i8 addrspace(1)* %r) {
 ; CHECK-LABEL: test_getst:
 ; CHECK: getst r0, res[r0]
-        %result = call i8 addrspace(1)* @llvm.xcore.getst.p1i8.p1i8(i8 addrspace(1)* %r)
-        ret i8 addrspace(1)* %result
+  %result = call i8 addrspace(1)* @llvm.xcore.getst.p1i8.p1i8(i8 addrspace(1)* %r)
+  ret i8 addrspace(1)* %result
 }
 
 define void @test_ssync() {
 ; CHECK-LABEL: test_ssync:
 ; CHECK: ssync
-	call void @llvm.xcore.ssync()
-	ret void
+  call void @llvm.xcore.ssync()
+  ret void
 }
 
 define void @test_mjoin(i8 addrspace(1)* %r) {
 ; CHECK-LABEL: test_mjoin:
 ; CHECK: mjoin res[r0]
-	call void @llvm.xcore.mjoin.p1i8(i8 addrspace(1)* %r)
-	ret void
+  call void @llvm.xcore.mjoin.p1i8(i8 addrspace(1)* %r)
+  ret void
 }
 
 define void @test_initsp(i8 addrspace(1)* %t, i8* %src) {
 ; CHECK-LABEL: test_initsp:
 ; CHECK: init t[r0]:sp, r1
-        call void @llvm.xcore.initsp.p1i8(i8 addrspace(1)* %t, i8* %src)
-        ret void
+  call void @llvm.xcore.initsp.p1i8(i8 addrspace(1)* %t, i8* %src)
+  ret void
 }
 
 define void @test_initpc(i8 addrspace(1)* %t, i8* %src) {
 ; CHECK-LABEL: test_initpc:
 ; CHECK: init t[r0]:pc, r1
-        call void @llvm.xcore.initpc.p1i8(i8 addrspace(1)* %t, i8* %src)
-        ret void
+  call void @llvm.xcore.initpc.p1i8(i8 addrspace(1)* %t, i8* %src)
+  ret void
 }
 
 define void @test_initlr(i8 addrspace(1)* %t, i8* %src) {
 ; CHECK-LABEL: test_initlr:
 ; CHECK: init t[r0]:lr, r1
-        call void @llvm.xcore.initlr.p1i8(i8 addrspace(1)* %t, i8* %src)
-        ret void
+  call void @llvm.xcore.initlr.p1i8(i8 addrspace(1)* %t, i8* %src)
+  ret void
 }
 
 define void @test_initcp(i8 addrspace(1)* %t, i8* %src) {
 ; CHECK-LABEL: test_initcp:
 ; CHECK: init t[r0]:cp, r1
-        call void @llvm.xcore.initcp.p1i8(i8 addrspace(1)* %t, i8* %src)
-        ret void
+  call void @llvm.xcore.initcp.p1i8(i8 addrspace(1)* %t, i8* %src)
+  ret void
 }
 
 define void @test_initdp(i8 addrspace(1)* %t, i8* %src) {
 ; CHECK-LABEL: test_initdp:
 ; CHECK: init t[r0]:dp, r1
-        call void @llvm.xcore.initdp.p1i8(i8 addrspace(1)* %t, i8* %src)
-        ret void
+  call void @llvm.xcore.initdp.p1i8(i8 addrspace(1)* %t, i8* %src)
+  ret void
 }
+
+@tl = thread_local global [3 x i32] zeroinitializer
+@tle = external thread_local global [2 x i32]
+
+define i32* @f_tl() {
+; CHECK-LABEL: f_tl:
+; CHECK: get r11, id
+; CHECK: ldaw [[R0:r[0-9]]], dp[tl]
+; CHECK: ldc [[R1:r[0-9]]], 8
+; CHECK: ldc [[R2:r[0-9]]], 12
+; r0 = id*12 + 8 + &tl
+; CHECK: lmul {{r[0-9]}}, r0, r11, [[R2]], [[R0]], [[R1]]
+  ret i32* getelementptr inbounds ([3 x i32]* @tl, i32 0, i32 2)
+}
+
+define i32* @f_tle() {
+; CHECK-LABEL: f_tle:
+; CHECK: get r11, id
+; CHECK: shl [[R0:r[0-9]]], r11, 3
+; CHECK: ldaw [[R1:r[0-9]]], dp[tle]
+; r0 = &tl + id*8
+; CHECK: add r0, [[R1]], [[R0]]
+  ret i32* getelementptr inbounds ([2 x i32]* @tle, i32 0, i32 0)
+}
+
+define i32 @f_tlExpr () {
+; CHECK-LABEL: f_tlExpr:
+; CHECK: get r11, id
+; CHECK: shl [[R0:r[0-9]]], r11, 3
+; CHECK: ldaw [[R1:r[0-9]]], dp[tle]
+; CHECK: add [[R2:r[0-9]]], [[R1]], [[R0]]
+; CHECK: add r0, [[R2]], [[R2]]
+  ret i32 add(
+      i32 ptrtoint( i32* getelementptr inbounds ([2 x i32]* @tle, i32 0, i32 0) to i32),
+      i32 ptrtoint( i32* getelementptr inbounds ([2 x i32]* @tle, i32 0, i32 0) to i32))
+}
+
+define void @phiNode1() {
+; N.B. lowering of duplicate constexpr in a PHI node requires -O=0
+; PHINODE-LABEL: phiNode1:
+; PHINODE: get r11, id
+; PHINODE-LABEL: .LBB11_1:
+; PHINODE: get r11, id
+; PHINODE: bu .LBB11_1
+entry:
+  br label %ConstantExpPhiNode
+ConstantExpPhiNode:
+  %ptr = phi i32* [ getelementptr inbounds ([3 x i32]* @tl, i32 0, i32 0), %entry ],
+                  [ getelementptr inbounds ([3 x i32]* @tl, i32 0, i32 0), %ConstantExpPhiNode ]
+  br label %ConstantExpPhiNode
+exit:
+  ret void
+}
+
+define void @phiNode2( i1 %bool) {
+; N.B. check an extra 'Node_crit_edge' (LBB12_1) is inserted
+; PHINODE-LABEL: phiNode2:
+; PHINODE: bf {{r[0-9]}}, .LBB12_3
+; PHINODE: bu .LBB12_1
+; PHINODE-LABEL: .LBB12_1:
+; PHINODE: get r11, id
+; PHINODE-LABEL: .LBB12_2:
+; PHINODE: get r11, id
+; PHINODE: bu .LBB12_2
+; PHINODE-LABEL: .LBB12_3:
+entry:
+  br i1 %bool, label %ConstantExpPhiNode, label %exit
+ConstantExpPhiNode:
+  %ptr = phi i32* [ getelementptr inbounds ([3 x i32]* @tl, i32 0, i32 0), %entry ],
+                  [ getelementptr inbounds ([3 x i32]* @tl, i32 0, i32 0), %ConstantExpPhiNode ]
+  br label %ConstantExpPhiNode
+exit:
+  ret void
+}
+
+; CHECK-LABEL: tl:
+; CHECK: .space  96
diff --git a/test/CodeGen/XCore/zextfree.ll b/test/CodeGen/XCore/zextfree.ll
new file mode 100644
index 0000000..48dce88
--- /dev/null
+++ b/test/CodeGen/XCore/zextfree.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=xcore < %s | FileCheck %s
+
+; CHECK-LABEL: test:
+; CHECK-NOT: zext
+define void @test(i8* %s1) {
+entry:
+  %u8 = load i8* %s1, align 1
+  %bool = icmp eq i8 %u8, 0
+  br label %BB1
+BB1:
+  br i1 %bool, label %BB1, label %BB2
+BB2:
+  br i1 %bool, label %BB1, label %BB2
+}
+
diff --git a/test/DebugInfo/2009-11-03-InsertExtractValue.ll b/test/DebugInfo/2009-11-03-InsertExtractValue.ll
index 9a3e622..21a60b8 100644
--- a/test/DebugInfo/2009-11-03-InsertExtractValue.ll
+++ b/test/DebugInfo/2009-11-03-InsertExtractValue.ll
@@ -2,6 +2,7 @@
 
 !llvm.dbg.sp = !{!0}
 !llvm.dbg.cu = !{!5}
+!llvm.module.flags = !{!6}
 
 !0 = metadata !{i32 786478, metadata !4, metadata !1, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 3, metadata !2, i1 false, i1 false, i32 0, i32 0, null, i32 258, i1 false, null, null, i32 0, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 41, metadata !4} ; [ DW_TAG_file_type ]
@@ -19,3 +20,4 @@ define <{i32, i32}> @f1() {
 }
 
 ; CHECK: [protected]
+!6 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
index 1ca88ae..6fd7887 100644
--- a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
+++ b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
@@ -8,13 +8,14 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!18}
 
 !0 = metadata !{i32 720913, metadata !17, i32 12, metadata !"clang version 3.0 (trunk 139632)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !17, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null, metadata !10, i32 0} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, metadata !17, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @foo, null, null, metadata !10, i32 0} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [foo]
 !6 = metadata !{i32 720937, metadata !17} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{metadata !11}
@@ -24,3 +25,4 @@ entry:
 !15 = metadata !{i32 3, i32 3, metadata !16, null}
 !16 = metadata !{i32 720907, metadata !17, metadata !5, i32 1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
 !17 = metadata !{metadata !"fb.c", metadata !"/private/tmp"}
+!18 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll b/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll
index aec299b..5a10459 100644
--- a/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll
+++ b/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll
@@ -2,6 +2,7 @@
 @0 = internal constant i32 1
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9}
 
 !0 = metadata !{i32 720913, metadata !8, i32 12, metadata !"clang version 3.0 (trunk 139632)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !2 = metadata !{i32 0}
@@ -10,3 +11,4 @@
 !6 = metadata !{i32 720937, metadata !8} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !8 = metadata !{metadata !"g.c", metadata !"/private/tmp"}
+!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2009-11-10-CurrentFn.ll b/test/DebugInfo/2009-11-10-CurrentFn.ll
index 10f2e88..d154c43 100644
--- a/test/DebugInfo/2009-11-10-CurrentFn.ll
+++ b/test/DebugInfo/2009-11-10-CurrentFn.ll
@@ -11,13 +11,14 @@ declare void @foo(...)
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!18}
 
 !0 = metadata !{i32 720913, metadata !17, i32 12, metadata !"clang version 3.0 (trunk 139632)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !17, metadata !6, metadata !"bar", metadata !"bar", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void (i32)* @bar, null, null, metadata !9, metadata !""} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, metadata !17, metadata !6, metadata !"bar", metadata !"bar", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32)* @bar, null, null, metadata !9, metadata !""} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 0] [bar]
 !6 = metadata !{i32 720937, metadata !17} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null}
 !9 = metadata !{metadata !11}
 !11 = metadata !{i32 721153, metadata !17, metadata !5, metadata !"i", i32 16777219, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
@@ -27,3 +28,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !15 = metadata !{i32 720907, metadata !17, metadata !5, i32 3, i32 17, i32 0} ; [ DW_TAG_lexical_block ]
 !16 = metadata !{i32 5, i32 1, metadata !15, null}
 !17 = metadata !{metadata !"cf.c", metadata !"/private/tmp"}
+!18 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-01-05-DbgScope.ll b/test/DebugInfo/2010-01-05-DbgScope.ll
index e421c93..809cebf 100644
--- a/test/DebugInfo/2010-01-05-DbgScope.ll
+++ b/test/DebugInfo/2010-01-05-DbgScope.ll
@@ -9,15 +9,17 @@ entry:
 }
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!14}
 
 !0 = metadata !{i32 571, i32 3, metadata !1, null}
 !1 = metadata !{i32 458763, metadata !11, metadata !2, i32 1, i32 1, i32 0}; [DW_TAG_lexical_block ]
 !2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !"foo", i32 561, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0}; [DW_TAG_subprogram ]
 !3 = metadata !{i32 458769, metadata !11, i32 12, metadata !"clang 1.1", i1 true, metadata !"", i32 0, metadata !12, metadata !12, metadata !13, null, null, metadata !""}; [DW_TAG_compile_unit ]
-!4 = metadata !{i32 458773, null, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0}; [DW_TAG_subroutine_type ]
+!4 = metadata !{i32 458773, null, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6}
 !6 = metadata !{i32 458788, null, metadata !3, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 588, i32 1, metadata !2, null}
 !11 = metadata !{metadata !"hashtab.c", metadata !"/usr/src/gnu/usr.bin/cc/cc_tools/../../../../contrib/gcclibs/libiberty"}
 !12 = metadata !{i32 0}
 !13 = metadata !{metadata !2}
+!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-01-19-DbgScope.ll b/test/DebugInfo/2010-01-19-DbgScope.ll
index 6aedfc8..1a7e378 100644
--- a/test/DebugInfo/2010-01-19-DbgScope.ll
+++ b/test/DebugInfo/2010-01-19-DbgScope.ll
@@ -15,12 +15,13 @@ bb11:                                             ; preds = %entry
 }
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!15}
 
 !0 = metadata !{i32 8647, i32 0, metadata !1, null}
 !1 = metadata !{i32 458763, metadata !12, metadata !2, i32 0, i32 0, i32 0}          ; [ DW_TAG_lexical_block ]
 !2 = metadata !{i32 458798, null, metadata !3, metadata !"bar", metadata !"bar", metadata !"bar", i32 8639, metadata !4, i1 true, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !3 = metadata !{i32 458769, metadata !12, i32 1, metadata !"LLVM build 00", i1 true, metadata !"", i32 0, metadata !13, metadata !13, metadata !14, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 458773, null, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 458773, null, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6}
 !6 = metadata !{i32 458788, null, metadata !3, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 8648, i32 0, metadata !8, null}
@@ -31,3 +32,4 @@ bb11:                                             ; preds = %entry
 !12 = metadata !{metadata !"c-parser.c", metadata !"llvmgcc"}
 !13 = metadata !{i32 0}
 !14 = metadata !{metadata !2}
+!15 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-03-12-llc-crash.ll b/test/DebugInfo/2010-03-12-llc-crash.ll
index 114c870..241bb37 100644
--- a/test/DebugInfo/2010-03-12-llc-crash.ll
+++ b/test/DebugInfo/2010-03-12-llc-crash.ll
@@ -13,7 +13,7 @@ entry:
 !1 = metadata !{i32 524334, metadata !8, metadata !3, metadata !"foo", metadata !"foo", metadata !"foo", i32 892, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 524329, metadata !8} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 524305, metadata !9, i32 4, metadata !"clang 1.1", i1 true, metadata !"", i32 0, metadata !10, metadata !10, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !9, metadata !5, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 524309, metadata !9, metadata !5, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{i32 524329, metadata !9} ; [ DW_TAG_file_type ]
 !6 = metadata !{null}
 !7 = metadata !{i32 524324, metadata !9, metadata !5, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/2010-03-19-DbgDeclare.ll b/test/DebugInfo/2010-03-19-DbgDeclare.ll
index 9f52d11..d1afade 100644
--- a/test/DebugInfo/2010-03-19-DbgDeclare.ll
+++ b/test/DebugInfo/2010-03-19-DbgDeclare.ll
@@ -8,6 +8,7 @@ entry:
   ret void
 }
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!5}
 !2 = metadata !{i32 786449, metadata !4, i32 32769, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !3, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/scratch.cpp] [lang 0x8001]
 !3 = metadata !{}
 !0 = metadata !{i32 662302, i32 26, metadata !1, null}
@@ -15,3 +16,4 @@ entry:
 !4 = metadata !{metadata !"scratch.cpp", metadata !"/usr/local/google/home/blaikie/dev/scratch"}
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+!5 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-03-24-MemberFn.ll b/test/DebugInfo/2010-03-24-MemberFn.ll
index dfdf87f..1689fe6 100644
--- a/test/DebugInfo/2010-03-24-MemberFn.ll
+++ b/test/DebugInfo/2010-03-24-MemberFn.ll
@@ -37,6 +37,7 @@ return:                                           ; preds = %entry
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!5}
+!llvm.module.flags = !{!28}
 
 !0 = metadata !{i32 786688, metadata !1, metadata !"s1", metadata !4, i32 3, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
 !1 = metadata !{i32 786443, metadata !25, metadata !2, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
@@ -44,14 +45,14 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !3 = metadata !{i32 786478, metadata !25, metadata !4, metadata !"bar", metadata !"bar", metadata !"_Z3barv", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @_Z3barv, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
 !4 = metadata !{i32 786473, metadata !25} ; [ DW_TAG_file_type ]
 !5 = metadata !{i32 786449, metadata !25, i32 4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !27, metadata !27, metadata !24, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!6 = metadata !{i32 786453, metadata !25, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{i32 786453, metadata !25, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{metadata !8}
 !8 = metadata !{i32 786468, metadata !25, metadata !4, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 786451, metadata !26, metadata !4, metadata !"S", i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!9 = metadata !{i32 786451, metadata !26, metadata !4, metadata !"S", i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [S] [line 2, size 8, align 8, offset 0] [def] [from ]
 !10 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
 !11 = metadata !{metadata !12}
 !12 = metadata !{i32 786478, metadata !26, metadata !9, metadata !"foo", metadata !"foo", metadata !"_ZN1S3fooEv", i32 3, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (%struct.S*)* @_ZN1S3fooEv, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 786453, metadata !25, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!13 = metadata !{i32 786453, metadata !25, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !14 = metadata !{metadata !8, metadata !15}
 !15 = metadata !{i32 786447, metadata !25, metadata !4, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !9} ; [ DW_TAG_pointer_type ]
 !16 = metadata !{i32 3, i32 0, metadata !1, null}
@@ -66,3 +67,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !25 = metadata !{metadata !"one.cc", metadata !"/tmp/"}
 !26 = metadata !{metadata !"one.h", metadata !"/tmp/"}
 !27 = metadata !{i32 0}
+!28 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll b/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll
index 7958f49..81285a9 100644
--- a/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll
+++ b/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll
@@ -9,6 +9,7 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!5}
+!llvm.module.flags = !{!22}
 
 !0 = metadata !{{ [0 x i8] }** undef}
 !1 = metadata !{i32 524544, metadata !2, metadata !"x", metadata !4, i32 11, metadata !9} ; [ DW_TAG_auto_variable ]
@@ -16,15 +17,15 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !3 = metadata !{i32 524334, metadata !20, null, metadata !"baz", metadata !"baz", metadata !"baz", i32 8, metadata !6, i1 true, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !4 = metadata !{i32 524329, metadata !20} ; [ DW_TAG_file_type ]
 !5 = metadata !{i32 524305, metadata !20, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !21, metadata !21, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!6 = metadata !{i32 524309, metadata !20, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{i32 524309, metadata !20, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{null, metadata !8}
 !8 = metadata !{i32 524324, metadata !20, metadata !4, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !9 = metadata !{i32 524303, metadata !20, metadata !4, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 524307, metadata !20, metadata !3, metadata !"", i32 11, i64 8, i64 8, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!10 = metadata !{i32 524307, metadata !20, metadata !3, metadata !"", i32 11, i64 8, i64 8, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [line 11, size 8, align 8, offset 0] [def] [from ]
 !11 = metadata !{metadata !12}
 !12 = metadata !{i32 524301, metadata !20, metadata !10, metadata !"b", i32 11, i64 8, i64 8, i64 0, i32 0, metadata !13} ; [ DW_TAG_member ]
 !13 = metadata !{i32 524310, metadata !20, metadata !3, metadata !"A", i32 11, i64 0, i64 0, i64 0, i32 0, metadata !14} ; [ DW_TAG_typedef ]
-!14 = metadata !{i32 524289, metadata !20, metadata !4, metadata !"", i32 0, i64 8, i64 8, i64 0, i32 0, metadata !15, metadata !16, i32 0, null} ; [ DW_TAG_array_type ]
+!14 = metadata !{i32 524289, metadata !20, metadata !4, metadata !"", i32 0, i64 8, i64 8, i64 0, i32 0, metadata !15, metadata !16, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 8, align 8, offset 0] [from char]
 !15 = metadata !{i32 524324, metadata !20, metadata !4, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !16 = metadata !{metadata !17}
 !17 = metadata !{i32 524321, i64 0, i64 1}        ; [ DW_TAG_subrange_type ]
@@ -32,3 +33,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !19 = metadata !{metadata !"llvm.mdnode.fwdref.23"}
 !20 = metadata !{metadata !"2007-12-VarArrayDebug.c", metadata !"/Users/sabre/llvm/test/FrontendC/"}
 !21 = metadata !{i32 0}
+!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll b/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
index aea98fd..4d4d616 100644
--- a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
+++ b/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
@@ -51,6 +51,7 @@ entry:
 }
 
 !llvm.dbg.cu = !{!4}
+!llvm.module.flags = !{!40}
 !37 = metadata !{metadata !2, metadata !10, metadata !23}
 
 !0 = metadata !{i32 786688, metadata !1, metadata !"b", metadata !3, i32 16, metadata !8, i32 0, null} ; [ DW_TAG_auto_variable ]
@@ -58,13 +59,13 @@ entry:
 !2 = metadata !{i32 786478, metadata !38, metadata !3, metadata !"main", metadata !"main", metadata !"main", i32 15, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, i32 ()* @main, null, null, null, i32 15} ; [ DW_TAG_subprogram ]
 !3 = metadata !{i32 786473, metadata !38} ; [ DW_TAG_file_type ]
 !4 = metadata !{i32 786449, metadata !38, i32 4, metadata !"clang 1.5", i1 false, metadata !"", i32 0, metadata !39, metadata !39, metadata !37, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 786453, metadata !38, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786453, metadata !38, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{metadata !7}
 !7 = metadata !{i32 786468, metadata !38, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 786434, metadata !38, metadata !3, metadata !"B", i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_class_type ]
+!8 = metadata !{i32 786434, metadata !38, metadata !3, metadata !"B", i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_class_type ] [B] [line 2, size 8, align 8, offset 0] [def] [from ]
 !9 = metadata !{metadata !10}
 !10 = metadata !{i32 786478, metadata !38, metadata !8, metadata !"fn", metadata !"fn", metadata !"_ZN1B2fnEv", i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, i32 (%class.A*)* @_ZN1B2fnEv, null, null, null, i32 4} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, metadata !38, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{i32 786453, metadata !38, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{metadata !7, metadata !13}
 !13 = metadata !{i32 786447, metadata !38, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ]
 !14 = metadata !{i32 16, i32 5, metadata !1, null}
@@ -74,10 +75,10 @@ entry:
 !18 = metadata !{i32 4, i32 7, metadata !10, null}
 !19 = metadata !{i32 786688, metadata !20, metadata !"a", metadata !3, i32 9, metadata !21, i32 0, null} ; [ DW_TAG_auto_variable ]
 !20 = metadata !{i32 786443, metadata !38, metadata !10, i32 4, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
-!21 = metadata !{i32 786434, metadata !38, metadata !10, metadata !"A", i32 5, i64 8, i64 8, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_class_type ]
+!21 = metadata !{i32 786434, metadata !38, metadata !10, metadata !"A", i32 5, i64 8, i64 8, i64 0, i32 0, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 5, size 8, align 8, offset 0] [def] [from ]
 !22 = metadata !{metadata !23}
 !23 = metadata !{i32 786478, metadata !38, metadata !21, metadata !"foo", metadata !"foo", metadata !"_ZZN1B2fnEvEN1A3fooEv", i32 7, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, i32 (%class.A*)* @_ZZN1B2fnEvEN1A3fooEv, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
-!24 = metadata !{i32 786453, metadata !38, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!24 = metadata !{i32 786453, metadata !38, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !25 = metadata !{metadata !7, metadata !26}
 !26 = metadata !{i32 786447, metadata !38, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !21} ; [ DW_TAG_pointer_type ]
 !27 = metadata !{i32 9, i32 7, metadata !20, null}
@@ -92,3 +93,4 @@ entry:
 !36 = metadata !{i32 786443, metadata !38, metadata !23, i32 7, i32 17, i32 0} ; [ DW_TAG_lexical_block ]
 !38 = metadata !{metadata !"one.cc", metadata !"/tmp" }
 !39 = metadata !{i32 0}
+!40 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-04-19-FramePtr.ll b/test/DebugInfo/2010-04-19-FramePtr.ll
index 30aad38..4af2fdc 100644
--- a/test/DebugInfo/2010-04-19-FramePtr.ll
+++ b/test/DebugInfo/2010-04-19-FramePtr.ll
@@ -20,16 +20,18 @@ return:                                           ; preds = %entry
 }
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!12}
 !9 = metadata !{metadata !1}
 
 !0 = metadata !{i32 2, i32 0, metadata !1, null}
 !1 = metadata !{i32 786478, metadata !10, null, metadata !"foo", metadata !"foo", metadata !"foo", i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @foo, null, null, null, i32 2} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786449, metadata !10, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !11, metadata !11, metadata !9, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !10, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 786453, metadata !10, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6}
 !6 = metadata !{i32 786468, metadata !10, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 2, i32 0, metadata !8, null}
 !8 = metadata !{i32 786443, metadata !10, metadata !1, i32 2, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !10 = metadata !{metadata !"a.c", metadata !"/tmp"}
 !11 = metadata !{i32 0}
+!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-05-03-DisableFramePtr.ll b/test/DebugInfo/2010-05-03-DisableFramePtr.ll
index 1aa2240..ba8d0e5 100644
--- a/test/DebugInfo/2010-05-03-DisableFramePtr.ll
+++ b/test/DebugInfo/2010-05-03-DisableFramePtr.ll
@@ -2,7 +2,7 @@
 ; Radar 7937664
 %struct.AppleEvent = type opaque
 
-define void @DisposeDMNotificationUPP(void (%struct.AppleEvent*)* %userUPP) "no-frame-pointer-elim-non-leaf"="true" nounwind ssp {
+define void @DisposeDMNotificationUPP(void (%struct.AppleEvent*)* %userUPP) "no-frame-pointer-elim-non-leaf" nounwind ssp {
 entry:
   %userUPP_addr = alloca void (%struct.AppleEvent*)* ; <void (%struct.AppleEvent*)**> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
@@ -17,22 +17,24 @@ return:                                           ; preds = %entry
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!19}
 !0 = metadata !{i32 524545, metadata !1, metadata !"userUPP", metadata !2, i32 7, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 524334, metadata !16, null, metadata !"DisposeDMNotificationUPP", metadata !"DisposeDMNotificationUPP", metadata !"DisposeDMNotificationUPP", i32 7, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 524329, metadata !16} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 524305, metadata !16, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", i1 true, metadata !"", i32 0, metadata !17, metadata !17, metadata !18, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !16, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 524309, metadata !16, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{null, metadata !6}
 !6 = metadata !{i32 524310, metadata !16, metadata !2, metadata !"DMNotificationUPP", i32 6, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
 !7 = metadata !{i32 524303, metadata !16, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 524309, metadata !16, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{i32 524309, metadata !16, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !9 = metadata !{null, metadata !10}
 !10 = metadata !{i32 524303, metadata !16, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
 !11 = metadata !{i32 524310, metadata !16, metadata !2, metadata !"AppleEvent", i32 4, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
-!12 = metadata !{i32 524307, metadata !16, metadata !2, metadata !"AEDesc", i32 1, i64 0, i64 0, i64 0, i32 4, null, null, i32 0, null} ; [ DW_TAG_structure_type ]
+!12 = metadata !{i32 524307, metadata !16, metadata !2, metadata !"AEDesc", i32 1, i64 0, i64 0, i64 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [AEDesc] [line 1, size 0, align 0, offset 0] [decl] [from ]
 !13 = metadata !{i32 7, i32 0, metadata !1, null}
 !14 = metadata !{i32 8, i32 0, metadata !15, null}
 !15 = metadata !{i32 524299, metadata !16, metadata !1, i32 7, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !16 = metadata !{metadata !"t.c", metadata !"/Users/echeng/LLVM/radars/r7937664/"}
 !17 = metadata !{i32 0}
 !18 = metadata !{metadata !1}
+!19 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-05-03-OriginDIE.ll b/test/DebugInfo/2010-05-03-OriginDIE.ll
index b4c2bc2b..0c5d876 100644
--- a/test/DebugInfo/2010-05-03-OriginDIE.ll
+++ b/test/DebugInfo/2010-05-03-OriginDIE.ll
@@ -49,12 +49,13 @@ declare i64 @llvm.bswap.i64(i64) nounwind readnone
 declare void @uuid_LtoB(i8*, i8*)
 
 !llvm.dbg.cu = !{!4}
+!llvm.module.flags = !{!41}
 !0 = metadata !{i32 808, i32 0, metadata !1, null}
 !1 = metadata !{i32 524299, metadata !39, metadata !2, i32 807, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !2 = metadata !{i32 524334, metadata !39, null, metadata !"gpt2gpm", metadata !"gpt2gpm", metadata !"gpt2gpm", i32 807, metadata !5, i1 true, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !3 = metadata !{i32 524329, metadata !39} ; [ DW_TAG_file_type ]
 !4 = metadata !{i32 524305, metadata !39, i32 1, metadata !"llvm-gcc", i1 true, metadata !"", i32 0, metadata !18, metadata !18, metadata !40, null, null, i32 0} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 524309, metadata !39, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 524309, metadata !39, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{null}
 !7 = metadata !{i32 810, i32 0, metadata !1, null}
 !8 = metadata !{i32 524545, metadata !9, metadata !"data", metadata !10, i32 201, metadata !11} ; [ DW_TAG_arg_variable ]
@@ -74,11 +75,11 @@ declare void @uuid_LtoB(i8*, i8*)
 !22 = metadata !{i32 524324, metadata !39, metadata !3, metadata !"long unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !23 = metadata !{i32 524544, metadata !24, metadata !"u", metadata !10, i32 100, metadata !25} ; [ DW_TAG_auto_variable ]
 !24 = metadata !{i32 524299, metadata !38, metadata !16, i32 95, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!25 = metadata !{i32 524311, metadata !38, metadata !16, metadata !"", i32 97, i64 64, i64 64, i64 0, i32 0, null, metadata !26, i32 0, null} ; [ DW_TAG_union_type ]
+!25 = metadata !{i32 524311, metadata !38, metadata !16, metadata !"", i32 97, i64 64, i64 64, i64 0, i32 0, null, metadata !26, i32 0, null, null, null} ; [ DW_TAG_union_type ] [line 97, size 64, align 64, offset 0] [def] [from ]
 !26 = metadata !{metadata !27, metadata !28}
 !27 = metadata !{i32 524301, metadata !38, metadata !25, metadata !"u64", i32 98, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_member ]
 !28 = metadata !{i32 524301, metadata !38, metadata !25, metadata !"u32", i32 99, i64 64, i64 32, i64 0, i32 0, metadata !29} ; [ DW_TAG_member ]
-!29 = metadata !{i32 524289, metadata !39, metadata !3, metadata !"", i32 0, i64 64, i64 32, i64 0, i32 0, metadata !30, metadata !32, i32 0, null} ; [ DW_TAG_array_type ]
+!29 = metadata !{i32 524289, metadata !39, metadata !3, metadata !"", i32 0, i64 64, i64 32, i64 0, i32 0, metadata !30, metadata !32, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 64, align 32, offset 0] [from uint32_t]
 !30 = metadata !{i32 524310, metadata !36, metadata !3, metadata !"uint32_t", i32 55, i64 0, i64 0, i64 0, i32 0, metadata !31} ; [ DW_TAG_typedef ]
 !31 = metadata !{i32 524324, metadata !39, metadata !3, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !32 = metadata !{metadata !33}
@@ -90,3 +91,4 @@ declare void @uuid_LtoB(i8*, i8*)
 !38 = metadata !{metadata !"OSByteOrder.h", metadata !"/usr/include/libkern/ppc"}
 !39 = metadata !{metadata !"G.c", metadata !"/tmp"}
 !40 = metadata !{metadata !2, metadata !9, metadata !16}
+!41 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-05-10-MultipleCU.ll b/test/DebugInfo/2010-05-10-MultipleCU.ll
index da0b2e8..ad7c7d1 100644
--- a/test/DebugInfo/2010-05-10-MultipleCU.ll
+++ b/test/DebugInfo/2010-05-10-MultipleCU.ll
@@ -27,6 +27,7 @@ return:
 }
 
 !llvm.dbg.cu = !{!4, !12}
+!llvm.module.flags = !{!21}
 !16 = metadata !{metadata !2}
 !17 = metadata !{metadata !10}
 
@@ -35,7 +36,7 @@ return:
 !2 = metadata !{i32 786478, metadata !18, metadata !3, metadata !"foo", metadata !"foo", metadata !"foo", i32 2, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !3 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
 !4 = metadata !{i32 786449, metadata !18, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !19, metadata !19, metadata !16, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 786453, metadata !18, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786453, metadata !18, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{metadata !7}
 !7 = metadata !{i32 786468, metadata !18, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 3, i32 0, metadata !9, null}
@@ -43,9 +44,10 @@ return:
 !10 = metadata !{i32 786478, metadata !20, metadata !11, metadata !"bar", metadata !"bar", metadata !"bar", i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !11 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
 !12 = metadata !{i32 786449, metadata !20, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !19, metadata !19, metadata !17, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!13 = metadata !{i32 786453, metadata !20, metadata !11, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!13 = metadata !{i32 786453, metadata !20, metadata !11, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !14 = metadata !{metadata !15}
 !15 = metadata !{i32 786468, metadata !20, metadata !11, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !18 = metadata !{metadata !"a.c", metadata !"/tmp/"}
 !19 = metadata !{i32 0}
 !20 = metadata !{metadata !"b.c", metadata !"/tmp/"}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
index 295648f..50a3422 100644
--- a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
+++ b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
@@ -22,20 +22,21 @@ entry:
 }
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!28}
 
 !0 = metadata !{i32 786478, metadata !27, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 9, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, metadata !24, i32 9} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !27, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !20, metadata !20, metadata !25, metadata !26,  metadata !26, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !27, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !27, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5, metadata !5}
 !5 = metadata !{i32 786468, metadata !27, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 786478, metadata !27, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786453, metadata !27, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, metadata !27, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !5}
 !9 = metadata !{i32 786689, metadata !0, metadata !"j", metadata !1, i32 9, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !10 = metadata !{i32 786688, metadata !11, metadata !"xyz", metadata !1, i32 10, metadata !12, i32 0, null} ; [ DW_TAG_auto_variable ]
 !11 = metadata !{i32 786443, metadata !1, metadata !0, i32 9, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!12 = metadata !{i32 786451, metadata !27, metadata !0, metadata !"X", i32 10, i64 64, i64 32, i64 0, i32 0, null, metadata !13, i32 0, null} ; [ DW_TAG_structure_type ]
+!12 = metadata !{i32 786451, metadata !27, metadata !0, metadata !"X", i32 10, i64 64, i64 32, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [X] [line 10, size 64, align 32, offset 0] [def] [from ]
 !13 = metadata !{metadata !14, metadata !15}
 !14 = metadata !{i32 786445, metadata !27, metadata !12, metadata !"a", i32 10, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
 !15 = metadata !{i32 786445, metadata !27, metadata !12, metadata !"b", i32 10, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ]
@@ -51,3 +52,4 @@ entry:
 !25 = metadata !{metadata !0, metadata !6}
 !26 = metadata !{metadata !16}
 !27 = metadata !{metadata !"bar.c", metadata !"/tmp/"}
+!28 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-07-19-Crash.ll b/test/DebugInfo/2010-07-19-Crash.ll
index a395efe..6b6e61d 100644
--- a/test/DebugInfo/2010-07-19-Crash.ll
+++ b/test/DebugInfo/2010-07-19-Crash.ll
@@ -8,13 +8,14 @@ entry:
 }
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!15}
 !llvm.dbg.sp = !{!0, !6, !11}
 !llvm.dbg.lv.foo = !{!7}
 
 !0 = metadata !{i32 524334, metadata !12, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 524329, metadata !12} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 524305, metadata !12, i32 12, metadata !"clang 2.8", i1 true, metadata !"", i32 0, metadata !14, metadata !14, metadata !13, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !12, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 524309, metadata !12, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 524324, metadata !12, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 524334, metadata !12, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 7, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
@@ -26,3 +27,4 @@ entry:
 !12 = metadata !{metadata !"one.c", metadata !"/private/tmp"}
 !13 = metadata !{metadata !0, metadata !6, metadata !11}
 !14 = metadata !{i32 0}
+!15 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-10-01-crash.ll b/test/DebugInfo/2010-10-01-crash.ll
index ddb9acc..f8dbb6e 100644
--- a/test/DebugInfo/2010-10-01-crash.ll
+++ b/test/DebugInfo/2010-10-01-crash.ll
@@ -14,7 +14,8 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
 
 
 !llvm.dbg.cu = !{!2}
-!0 = metadata !{i32 589870, metadata !1, i32 0, metadata !"CGRectStandardize", metadata !"CGRectStandardize", metadata !"CGRectStandardize", i32 54, null, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i32*, i32*)* @CGRectStandardize, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!llvm.module.flags = !{!27}
+!0 = metadata !{i32 589870, metadata !1, null, metadata !"CGRectStandardize", metadata !"CGRectStandardize", metadata !"CGRectStandardize", i32 54, null, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i32*, i32*)* @CGRectStandardize, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 54] [def] [scope 0] [CGRectStandardize]
 !1 = metadata !{i32 589865, metadata !25}
 !2 = metadata !{i32 589841, metadata !25, i32 16, metadata !"clang version 2.9 (trunk 115292)", i1 true, metadata !"", i32 1, metadata !26, metadata !26, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !5 = metadata !{i32 589846, metadata !25, null, metadata !"CGRect", i32 49, i64 0, i64 0, i64 0, i32 0, null}
@@ -22,3 +23,4 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
 !24 = metadata !{i32 53, i32 33, metadata !0, null}
 !25 = metadata !{metadata !"GSFusedSilica.m", metadata !"/Volumes/Data/Users/sabre/Desktop"}
 !26 = metadata !{i32 0}
+!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/AArch64/dwarfdump.ll b/test/DebugInfo/AArch64/dwarfdump.ll
index 2598d5c..4c20507 100644
--- a/test/DebugInfo/AArch64/dwarfdump.ll
+++ b/test/DebugInfo/AArch64/dwarfdump.ll
@@ -21,14 +21,16 @@ define i32 @main() nounwind {
 attributes #0 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10}
 
 !0 = metadata !{i32 786449, metadata !9, i32 12, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/llvm/build/tmp.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !2 = metadata !{metadata !3}
 !3 = metadata !{i32 786478, metadata !9, metadata !4, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
 !4 = metadata !{i32 786473, metadata !9} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 786453, null, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{metadata !7}
 !7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !8 = metadata !{i32 2, i32 0, metadata !3, null}
 !9 = metadata !{metadata !"tmp.c", metadata !"/home/tim/llvm/build"}
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/AArch64/lit.local.cfg b/test/DebugInfo/AArch64/lit.local.cfg
index c5ce241..9a66a00 100644
--- a/test/DebugInfo/AArch64/lit.local.cfg
+++ b/test/DebugInfo/AArch64/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'AArch64' in targets:
     config.unsupported = True
diff --git a/test/DebugInfo/AArch64/variable-loc.ll b/test/DebugInfo/AArch64/variable-loc.ll
index 30eabc8..f42cb74 100644
--- a/test/DebugInfo/AArch64/variable-loc.ll
+++ b/test/DebugInfo/AArch64/variable-loc.ll
@@ -23,7 +23,10 @@
 ; CHECK: add x29, sp, #416
 ; CHECK: add {{x[0-9]+}}, sp, #4
 
-  ; Now check the debugging information reflects this:
+; CHECK: .Linfo_string7:
+; CHECK-NEXT: main_arr
+
+; Now check the debugging information reflects this:
 ; CHECK: DW_TAG_variable
 ; CHECK-NEXT: .word .Linfo_string7
 
@@ -32,8 +35,6 @@
 ; CHECK-NEXT: .byte 145
 ; CHECK-NEXT: .ascii "\344|"
 
-; CHECK: .Linfo_string7:
-; CHECK-NEXT: main_arr
 
 
 target datalayout = "e-p:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-f128:128:128-n32:64-S128"
@@ -68,25 +69,26 @@ entry:
 declare i32 @printf(i8*, ...)
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!30}
 
 !0 = metadata !{i32 786449, metadata !29, i32 12, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/a64-trunk/build/simple.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !11, metadata !14}
 !5 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"populate_array", metadata !"populate_array", metadata !"", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*, i32)* @populate_array, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [populate_array]
 !6 = metadata !{i32 786473, metadata !29} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9, metadata !10}
 !9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
 !10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !11 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"sum_array", metadata !"sum_array", metadata !"", i32 9, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*, i32)* @sum_array, null, null, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [sum_array]
-!12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !13 = metadata !{metadata !10, metadata !9, metadata !10}
 !14 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 18, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [main]
-!15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !16 = metadata !{metadata !10}
 !17 = metadata !{i32 786688, metadata !18, metadata !"main_arr", metadata !6, i32 19, metadata !19, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [main_arr] [line 19]
 !18 = metadata !{i32 786443, metadata !29, metadata !14, i32 18, i32 16, i32 4} ; [ DW_TAG_lexical_block ] [/home/timnor01/a64-trunk/build/simple.c]
-!19 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 3200, i64 32, i32 0, i32 0, metadata !10, metadata !20, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 3200, align 32, offset 0] [from int]
+!19 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 3200, i64 32, i32 0, i32 0, metadata !10, metadata !20, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 3200, align 32, offset 0] [from int]
 !20 = metadata !{i32 786465, i64 0, i64 99}       ; [ DW_TAG_subrange_type ] [0, 99]
 !22 = metadata !{i32 19, i32 7, metadata !18, null}
 !23 = metadata !{i32 786688, metadata !18, metadata !"val", metadata !6, i32 20, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [val] [line 20]
@@ -96,3 +98,4 @@ declare i32 @printf(i8*, ...)
 !27 = metadata !{i32 24, i32 3, metadata !18, null}
 !28 = metadata !{i32 26, i32 3, metadata !18, null}
 !29 = metadata !{metadata !"simple.c", metadata !"/home/timnor01/a64-trunk/build"}
+!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/ARM/PR16736.ll b/test/DebugInfo/ARM/PR16736.ll
new file mode 100644
index 0000000..d01fa22
--- /dev/null
+++ b/test/DebugInfo/ARM/PR16736.ll
@@ -0,0 +1,65 @@
+; RUN: llc -filetype=asm < %s | FileCheck %s
+; CHECK: @DEBUG_VALUE: h:x <- [R{{.*}}+{{.*}}]
+; generated from:
+; clang -cc1 -triple  thumbv7 -S -O1 arm.cpp  -g
+;
+; int f();
+; void g(float);
+; void h(int, int, int, int, float x) {
+;    g(x = f());
+; }
+;
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:32-n32-S64"
+target triple = "thumbv7-apple-ios"
+
+; Function Attrs: nounwind
+define arm_aapcscc void @_Z1hiiiif(i32, i32, i32, i32, float %x) #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %0}, i64 0, metadata !12), !dbg !18
+  tail call void @llvm.dbg.value(metadata !{i32 %1}, i64 0, metadata !13), !dbg !18
+  tail call void @llvm.dbg.value(metadata !{i32 %2}, i64 0, metadata !14), !dbg !18
+  tail call void @llvm.dbg.value(metadata !{i32 %3}, i64 0, metadata !15), !dbg !18
+  tail call void @llvm.dbg.value(metadata !{float %x}, i64 0, metadata !16), !dbg !18
+  %call = tail call arm_aapcscc i32 @_Z1fv() #3, !dbg !19
+  %conv = sitofp i32 %call to float, !dbg !19
+  tail call void @llvm.dbg.value(metadata !{float %conv}, i64 0, metadata !16), !dbg !19
+  tail call arm_aapcscc void @_Z1gf(float %conv) #3, !dbg !19
+  ret void, !dbg !20
+}
+
+declare arm_aapcscc void @_Z1gf(float)
+
+declare arm_aapcscc i32 @_Z1fv()
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #2
+
+attributes #0 = { nounwind  }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!17, !21}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (trunk 190804) (llvm/trunk 190797)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [//<unknown>] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"/<unknown>", metadata !""}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"h", metadata !"h", metadata !"_Z1hiiiif", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32, i32, i32, i32, float)* @_Z1hiiiif, null, null, metadata !11, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [h]
+!5 = metadata !{metadata !"/arm.cpp", metadata !""}
+!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [//arm.cpp]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{null, metadata !9, metadata !9, metadata !9, metadata !9, metadata !10}
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786468, null, null, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
+!11 = metadata !{metadata !12, metadata !13, metadata !14, metadata !15, metadata !16}
+!12 = metadata !{i32 786689, metadata !4, metadata !"", metadata !6, i32 16777219, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 3]
+!13 = metadata !{i32 786689, metadata !4, metadata !"", metadata !6, i32 33554435, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 3]
+!14 = metadata !{i32 786689, metadata !4, metadata !"", metadata !6, i32 50331651, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 3]
+!15 = metadata !{i32 786689, metadata !4, metadata !"", metadata !6, i32 67108867, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 3]
+!16 = metadata !{i32 786689, metadata !4, metadata !"x", metadata !6, i32 83886083, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 3]
+!17 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!18 = metadata !{i32 3, i32 0, metadata !4, null}
+!19 = metadata !{i32 4, i32 0, metadata !4, null}
+!20 = metadata !{i32 5, i32 0, metadata !4, null}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/ARM/lit.local.cfg b/test/DebugInfo/ARM/lit.local.cfg
new file mode 100644
index 0000000..8a3ba96
--- /dev/null
+++ b/test/DebugInfo/ARM/lit.local.cfg
@@ -0,0 +1,4 @@
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll b/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
new file mode 100644
index 0000000..0378c75
--- /dev/null
+++ b/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
@@ -0,0 +1,103 @@
+; RUN: opt  -instcombine %s -S | FileCheck %s
+;
+; Generate me from:
+; clang -cc1 -triple thumbv7-apple-ios7.0.0 -S -target-abi apcs-gnu -gdwarf-2 -Os test.c -o test.ll -emit-llvm
+; void run(float r)
+; {
+;   int count = r;
+;   float vla[count];
+;   vla[0] = r;
+;   for (int i = 0; i < count; i++)
+;     vla[i] /= r;
+; }
+; rdar://problem/15464571
+;
+; ModuleID = 'test.c'
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios8.0.0"
+
+; Function Attrs: nounwind optsize readnone
+define void @run(float %r) #0 {
+entry:
+  tail call void @llvm.dbg.declare(metadata !{float %r}, metadata !11), !dbg !22
+  %conv = fptosi float %r to i32, !dbg !23
+  tail call void @llvm.dbg.declare(metadata !{i32 %conv}, metadata !12), !dbg !23
+  %vla = alloca float, i32 %conv, align 4, !dbg !24
+  tail call void @llvm.dbg.declare(metadata !{float* %vla}, metadata !14), !dbg !24
+; The VLA alloca should be described by a dbg.declare:
+; CHECK: call void @llvm.dbg.declare(metadata !{float* %vla}, metadata ![[VLA:.*]])
+; The VLA alloca and following store into the array should not be lowered to like this:
+; CHECK-NOT:  call void @llvm.dbg.value(metadata !{float %r}, i64 0, metadata ![[VLA]])
+; the backend interprets this as "vla has the location of %r".
+  store float %r, float* %vla, align 4, !dbg !25, !tbaa !26
+  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !18), !dbg !30
+  %cmp8 = icmp sgt i32 %conv, 0, !dbg !30
+  br i1 %cmp8, label %for.body, label %for.end, !dbg !30
+
+for.body:                                         ; preds = %entry, %for.body.for.body_crit_edge
+  %0 = phi float [ %.pre, %for.body.for.body_crit_edge ], [ %r, %entry ]
+  %i.09 = phi i32 [ %inc, %for.body.for.body_crit_edge ], [ 0, %entry ]
+  %arrayidx2 = getelementptr inbounds float* %vla, i32 %i.09, !dbg !31
+  %div = fdiv float %0, %r, !dbg !31
+  store float %div, float* %arrayidx2, align 4, !dbg !31, !tbaa !26
+  %inc = add nsw i32 %i.09, 1, !dbg !30
+  tail call void @llvm.dbg.value(metadata !{i32 %inc}, i64 0, metadata !18), !dbg !30
+  %exitcond = icmp eq i32 %inc, %conv, !dbg !30
+  br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge, !dbg !30
+
+for.body.for.body_crit_edge:                      ; preds = %for.body
+  %arrayidx2.phi.trans.insert = getelementptr inbounds float* %vla, i32 %inc
+  %.pre = load float* %arrayidx2.phi.trans.insert, align 4, !dbg !31, !tbaa !26
+  br label %for.body, !dbg !30
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void, !dbg !32
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #1
+
+attributes #0 = { nounwind optsize readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!20, !33}
+!llvm.ident = !{!21}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Data/radar/15464571/<unknown>] [DW_LANG_C99]
+!1 = metadata !{metadata !"<unknown>", metadata !"/Volumes/Data/radar/15464571"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"run", metadata !"run", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (float)* @run, null, null, metadata !10, i32 2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [run]
+!5 = metadata !{metadata !"test.c", metadata !"/Volumes/Data/radar/15464571"}
+!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/Volumes/Data/radar/15464571/test.c]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{null, metadata !9}
+!9 = metadata !{i32 786468, null, null, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
+!10 = metadata !{metadata !11, metadata !12, metadata !14, metadata !18}
+!11 = metadata !{i32 786689, metadata !4, metadata !"r", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [r] [line 1]
+!12 = metadata !{i32 786688, metadata !4, metadata !"count", metadata !6, i32 3, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [count] [line 3]
+!13 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!14 = metadata !{i32 786688, metadata !4, metadata !"vla", metadata !6, i32 4, metadata !15, i32 8192, i32 0} ; [ DW_TAG_auto_variable ] [vla] [line 4]
+!15 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !16, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from float]
+!16 = metadata !{metadata !17}
+!17 = metadata !{i32 786465, i64 0, i64 -1}       ; [ DW_TAG_subrange_type ] [unbounded]
+!18 = metadata !{i32 786688, metadata !19, metadata !"i", metadata !6, i32 6, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 6]
+!19 = metadata !{i32 786443, metadata !5, metadata !4, i32 6, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Data/radar/15464571/test.c]
+!20 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!21 = metadata !{metadata !"clang version 3.4 "}
+!22 = metadata !{i32 1, i32 0, metadata !4, null}
+!23 = metadata !{i32 3, i32 0, metadata !4, null}
+!24 = metadata !{i32 4, i32 0, metadata !4, null}
+!25 = metadata !{i32 5, i32 0, metadata !4, null}
+!26 = metadata !{metadata !27, metadata !27, i64 0}
+!27 = metadata !{metadata !"float", metadata !28, i64 0}
+!28 = metadata !{metadata !"omnipotent char", metadata !29, i64 0}
+!29 = metadata !{metadata !"Simple C/C++ TBAA"}
+!30 = metadata !{i32 6, i32 0, metadata !19, null}
+!31 = metadata !{i32 7, i32 0, metadata !19, null}
+!32 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
+!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/ARM/selectiondag-deadcode.ll b/test/DebugInfo/ARM/selectiondag-deadcode.ll
new file mode 100644
index 0000000..cc151e0
--- /dev/null
+++ b/test/DebugInfo/ARM/selectiondag-deadcode.ll
@@ -0,0 +1,27 @@
+; RUN: llc -filetype=asm < %s | FileCheck %s
+target triple = "thumbv7-apple-ios7.0.0"
+%class.Matrix3.0.6.10 = type { [9 x float] }
+define arm_aapcscc void @_Z9GetMatrixv(%class.Matrix3.0.6.10* noalias nocapture sret %agg.result) #0 {
+  br i1 fcmp oeq (float fadd (float fadd (float fmul (float undef, float undef), float fmul (float undef, float undef)), float fmul (float undef, float undef)), float 0.000000e+00), label %_ZN7Vector39NormalizeEv.exit, label %1
+  tail call arm_aapcscc void @_ZL4Sqrtd() #3
+  br label %_ZN7Vector39NormalizeEv.exit
+_ZN7Vector39NormalizeEv.exit:                     ; preds = %1, %0
+  ; rdar://problem/15094721.
+  ;
+  ; When this (partially) dead use gets eliminated (and thus the def
+  ; of the vreg holding %agg.result) the dbg_value becomes dangling
+  ; and SelectionDAGISel crashes.  It should definitely not
+  ; crash. Drop the dbg_value instead.
+  ; CHECK-NOT: "matrix"
+  tail call void @llvm.dbg.declare(metadata !{%class.Matrix3.0.6.10* %agg.result}, metadata !45)
+  %2 = getelementptr inbounds %class.Matrix3.0.6.10* %agg.result, i32 0, i32 0, i32 8
+  ret void
+}
+declare void @llvm.dbg.declare(metadata, metadata) #1
+declare arm_aapcscc void @_ZL4Sqrtd() #2
+!4 = metadata !{i32 786434, metadata !5, null, metadata !"Matrix3", i32 20, i64 288, i64 32, i32 0, i32 0, null, null, i32 0, null, null, metadata !"_ZTS7Matrix3"} ; [ DW_TAG_class_type ] [Matrix3] [line 20, size 288, align 32, offset 0] [def] [from ]
+!5 = metadata !{metadata !"test.ii", metadata !"/Volumes/Data/radar/15094721"}
+!39 = metadata !{i32 786478, metadata !5, metadata !40, metadata !"GetMatrix", metadata !"GetMatrix", metadata !"_Z9GetMatrixv", i32 32, metadata !41, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%class.Matrix3.0.6.10*)* @_Z9GetMatrixv, null, null, null, i32 32} ; [ DW_TAG_subprogram ] [line 32] [def] [GetMatrix]
+!40 = metadata !{i32 786473, metadata !5}         ; [ DW_TAG_file_type ] [/Volumes/Data/radar/15094721/test.ii]
+!41 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, null, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!45 = metadata !{i32 786688, metadata !39, metadata !"matrix", metadata !40, i32 35, metadata !4, i32 8192, i32 0} ; [ DW_TAG_auto_variable ] [matrix] [line 35]
diff --git a/test/DebugInfo/Inputs/dwarfdump-inl-test.cc b/test/DebugInfo/Inputs/dwarfdump-inl-test.cc
index 8ffbb52..edf956d 100644
--- a/test/DebugInfo/Inputs/dwarfdump-inl-test.cc
+++ b/test/DebugInfo/Inputs/dwarfdump-inl-test.cc
@@ -13,3 +13,6 @@ int main() {
 // $ cp dwarfdump-inl-test.* /tmp/dbginfo
 // $ cd /tmp/dbginfo
 // $ clang++ -O2 -gline-tables-only -fsanitize=address -fPIC -shared dwarfdump-inl-test.cc -o <output>
+//
+// And similarly with with gcc 4.8.2:
+// $ gcc dwarfdump-inl-test.cc -o dwarfdump-inl-test.high_pc.elf-x86-64 -g -O2 -fPIC -shared
diff --git a/test/DebugInfo/Inputs/dwarfdump-inl-test.high_pc.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-inl-test.high_pc.elf-x86-64
new file mode 100755
index 0000000..f108861
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-inl-test.high_pc.elf-x86-64
diff --git a/test/DebugInfo/Inputs/dwarfdump-test.elf-x86-64.debuglink b/test/DebugInfo/Inputs/dwarfdump-test.elf-x86-64.debuglink
new file mode 100755
index 0000000..8c08037
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test.elf-x86-64.debuglink
diff --git a/test/DebugInfo/Inputs/dwarfdump-type-units.cc b/test/DebugInfo/Inputs/dwarfdump-type-units.cc
new file mode 100644
index 0000000..06bc9a2
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-type-units.cc
@@ -0,0 +1,15 @@
+struct foo {};
+struct bar {};
+void sink(void*);
+int main() {
+  foo f;
+  sink(&f);
+  bar b;
+  sink(&b);
+}
+
+// Built with GCC 4.8.1
+// $ mkdir -p /tmp/dbginfo
+// $ cp dwarfdump-type-units.cc /tmp/dbginfo
+// $ cd /tmp/dbginfo
+// $ g++-4.8.1 -g -fdebug-types-section -c dwarfdump-type-units.cc -o dwarfdump-type-units.elf-x86-64
diff --git a/test/DebugInfo/Inputs/dwarfdump-type-units.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-type-units.elf-x86-64
new file mode 100644
index 0000000..064b4f0
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-type-units.elf-x86-64
diff --git a/test/DebugInfo/Inputs/lit.local.cfg b/test/DebugInfo/Inputs/lit.local.cfg
deleted file mode 100644
index e6f55ee..0000000
--- a/test/DebugInfo/Inputs/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = []
diff --git a/test/DebugInfo/PowerPC/lit.local.cfg b/test/DebugInfo/PowerPC/lit.local.cfg
index 112a1c3..193ebeb 100644
--- a/test/DebugInfo/PowerPC/lit.local.cfg
+++ b/test/DebugInfo/PowerPC/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'PowerPC' in targets:
     config.unsupported = True
diff --git a/test/DebugInfo/PowerPC/tls-fission.ll b/test/DebugInfo/PowerPC/tls-fission.ll
index 83a2cf3..4a744c7 100644
--- a/test/DebugInfo/PowerPC/tls-fission.ll
+++ b/test/DebugInfo/PowerPC/tls-fission.ll
@@ -19,7 +19,7 @@
 @tls = thread_local global i32 0, align 4
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!7}
+!llvm.module.flags = !{!7, !8}
 
 !0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, metadata !2, metadata !"tls.dwo"} ; [ DW_TAG_compile_unit ] [/tmp/tls.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"tls.cpp", metadata !"/tmp"}
@@ -29,3 +29,4 @@
 !5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/tls.cpp]
 !6 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !7 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/PowerPC/tls.ll b/test/DebugInfo/PowerPC/tls.ll
index ae32a90..6557f5e 100644
--- a/test/DebugInfo/PowerPC/tls.ll
+++ b/test/DebugInfo/PowerPC/tls.ll
@@ -15,7 +15,7 @@
 @tls = thread_local global i32 7, align 4
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!7}
+!llvm.module.flags = !{!7, !8}
 
 !0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/tls.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"tls.cpp", metadata !"/tmp"}
@@ -26,3 +26,4 @@
 !6 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !7 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
 
+!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/SystemZ/lit.local.cfg b/test/DebugInfo/SystemZ/lit.local.cfg
index a70a685..b12af09 100644
--- a/test/DebugInfo/SystemZ/lit.local.cfg
+++ b/test/DebugInfo/SystemZ/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'SystemZ' in targets:
     config.unsupported = True
diff --git a/test/DebugInfo/SystemZ/variable-loc.ll b/test/DebugInfo/SystemZ/variable-loc.ll
index 139fae8..560b477 100644
--- a/test/DebugInfo/SystemZ/variable-loc.ll
+++ b/test/DebugInfo/SystemZ/variable-loc.ll
@@ -11,6 +11,9 @@
 ; CHECK: la      %r2, 164(%r11)
 ; CHECK: brasl   %r14, populate_array@PLT
 ;
+; CHECK: .Linfo_string7:
+; CHECK-NEXT: main_arr
+;
 ; Now check that the debugging information reflects this:
 ; CHECK: DW_TAG_variable
 ; CHECK-NEXT: .long .Linfo_string7
@@ -21,8 +24,6 @@
 ; CHECK-NEXT: .byte 145
 ; CHECK-NEXT: .ascii "\244\001"
 ;
-; CHECK: .Linfo_string7:
-; CHECK-NEXT: main_arr
 
 
 @.str = private unnamed_addr constant [13 x i8] c"Total is %d\0A\00", align 2
@@ -54,25 +55,26 @@ entry:
 declare i32 @printf(i8*, ...)
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!30}
 
 !0 = metadata !{i32 786449, metadata !29, i32 12, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/a64-trunk/build/simple.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !11, metadata !14}
 !5 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"populate_array", metadata !"populate_array", metadata !"", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*, i32)* @populate_array, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [populate_array]
 !6 = metadata !{i32 786473, metadata !29} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9, metadata !10}
 !9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
 !10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !11 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"sum_array", metadata !"sum_array", metadata !"", i32 9, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*, i32)* @sum_array, null, null, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [sum_array]
-!12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !13 = metadata !{metadata !10, metadata !9, metadata !10}
 !14 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 18, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [main]
-!15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !16 = metadata !{metadata !10}
 !17 = metadata !{i32 786688, metadata !18, metadata !"main_arr", metadata !6, i32 19, metadata !19, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [main_arr] [line 19]
 !18 = metadata !{i32 786443, metadata !29, metadata !14, i32 18, i32 16, i32 4} ; [ DW_TAG_lexical_block ] [/home/timnor01/a64-trunk/build/simple.c]
-!19 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 3200, i64 32, i32 0, i32 0, metadata !10, metadata !20, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 3200, align 32, offset 0] [from int]
+!19 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 3200, i64 32, i32 0, i32 0, metadata !10, metadata !20, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 3200, align 32, offset 0] [from int]
 !20 = metadata !{i32 786465, i64 0, i64 99}       ; [ DW_TAG_subrange_type ] [0, 99]
 !22 = metadata !{i32 19, i32 7, metadata !18, null}
 !23 = metadata !{i32 786688, metadata !18, metadata !"val", metadata !6, i32 20, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [val] [line 20]
@@ -82,3 +84,4 @@ declare i32 @printf(i8*, ...)
 !27 = metadata !{i32 24, i32 3, metadata !18, null}
 !28 = metadata !{i32 26, i32 3, metadata !18, null}
 !29 = metadata !{metadata !"simple.c", metadata !"/home/timnor01/a64-trunk/build"}
+!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/2010-04-13-PubType.ll b/test/DebugInfo/X86/2010-04-13-PubType.ll
index 0ec7f59..0440afc 100644
--- a/test/DebugInfo/X86/2010-04-13-PubType.ll
+++ b/test/DebugInfo/X86/2010-04-13-PubType.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -asm-verbose -mtriple=x86_64-macosx < %s | FileCheck %s
+; RUN: llc -O0 -asm-verbose -mtriple=x86_64-macosx -generate-dwarf-pub-sections=Enable < %s | FileCheck %s
 ; CHECK-NOT: .asciz "X" ## External Name
 ; CHECK: .asciz "Y" ## External Name
 ; Test to check type with no definition is listed in pubtypes section.
@@ -29,18 +29,19 @@ return:                                           ; preds = %entry
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!20}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !2, i32 7, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 786478, metadata !18, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", i32 7, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (%struct.X*, %struct.Y*)* @foo, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786449, metadata !18, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !19, metadata !19, metadata !17, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6, metadata !7, metadata !9}
 !6 = metadata !{i32 786468, metadata !18, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 786447, metadata !18, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 786451, metadata !18, metadata !2, metadata !"X", i32 3, i64 0, i64 0, i64 0, i32 4, null, null, i32 0, null} ; [ DW_TAG_structure_type ]
+!8 = metadata !{i32 786451, metadata !18, metadata !2, metadata !"X", i32 3, i64 0, i64 0, i64 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [X] [line 3, size 0, align 0, offset 0] [decl] [from ]
 !9 = metadata !{i32 786447, metadata !18, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 786451, metadata !18, metadata !2, metadata !"Y", i32 4, i64 32, i64 32, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!10 = metadata !{i32 786451, metadata !18, metadata !2, metadata !"Y", i32 4, i64 32, i64 32, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [Y] [line 4, size 32, align 32, offset 0] [def] [from ]
 !11 = metadata !{metadata !12}
 !12 = metadata !{i32 786445, metadata !18, metadata !10, metadata !"x", i32 5, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
 !13 = metadata !{i32 7, i32 0, metadata !1, null}
@@ -50,3 +51,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !17 = metadata !{metadata !1}
 !18 = metadata !{metadata !"a.c", metadata !"/tmp/"}
 !19 = metadata !{i32 0}
+!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/2010-08-10-DbgConstant.ll b/test/DebugInfo/X86/2010-08-10-DbgConstant.ll
index 51a375a..d0a2dfa 100644
--- a/test/DebugInfo/X86/2010-08-10-DbgConstant.ll
+++ b/test/DebugInfo/X86/2010-08-10-DbgConstant.ll
@@ -1,7 +1,7 @@
 ; RUN: llc  -mtriple=i686-linux -O0 -filetype=obj -o %t %s
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
-; CHECK: DW_TAG_constant [4]
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ( .debug_str[0x0000002c] = "ro")
+; CHECK: DW_TAG_constant
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ( .debug_str[0x{{[0-9a-f]*}}] = "ro")
 
 define void @foo() nounwind ssp {
 entry:
@@ -12,11 +12,12 @@ entry:
 declare void @bar(i32)
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!13}
 
 !0 = metadata !{i32 786478, metadata !12, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void ()* @foo, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !12, i32 12, metadata !"clang 2.8", i1 false, metadata !"", i32 0, metadata !4, metadata !4, metadata !10, metadata !11,  metadata !11, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !12, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !12, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 786471, i32 0, metadata !1, metadata !"ro", metadata !"ro", metadata !"ro", metadata !1, i32 1, metadata !6, i1 true, i1 true, i32 201, null} ; [ DW_TAG_constant ]
 !6 = metadata !{i32 786470, metadata !12, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_const_type ]
@@ -26,3 +27,4 @@ declare void @bar(i32)
 !10 = metadata !{metadata !0}
 !11 = metadata !{metadata !5}
 !12 = metadata !{metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D"}
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
index 1c6778c..cdfd952 100644
--- a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
+++ b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
@@ -17,13 +17,14 @@ define i32 @f() nounwind {
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21}
 
 !0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"clang version 3.0 (trunk)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12,  metadata !12, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @f, null, null, metadata !10, i32 0} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @f, null, null, metadata !10, i32 0} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 0] [f]
 !6 = metadata !{i32 720937, metadata !20} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{metadata !11}
@@ -37,13 +38,14 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !19 = metadata !{i32 5, i32 5, metadata !16, null}
 !20 = metadata !{metadata !"test.c", metadata !"/work/llvm/vanilla/test/DebugInfo"}
 
-; CHECK: DW_TAG_variable [3]
-; CHECK: DW_AT_name [DW_FORM_strp]       ( .debug_str[0x00000043] = "GLB")
+; CHECK: DW_TAG_variable
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]       ( .debug_str[0x{{[0-9a-f]*}}] = "GLB")
 ; CHECK: DW_AT_decl_file [DW_FORM_data1] (0x01)
 ; CHECK: DW_AT_decl_line [DW_FORM_data1] (0x01)
 
-; CHECK: DW_TAG_variable [6]
-; CHECK: DW_AT_name [DW_FORM_strp]   ( .debug_str[0x0000004d] = "LOC")
+; CHECK: DW_TAG_variable
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]   ( .debug_str[0x{{[0-9a-f]*}}] = "LOC")
 ; CHECK: DW_AT_decl_file [DW_FORM_data1]     (0x01)
 ; CHECK: DW_AT_decl_line [DW_FORM_data1]     (0x04)
 
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
index 405d9f5..5e6a601 100644
--- a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
+++ b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
@@ -87,51 +87,52 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!83}
 
 !0 = metadata !{i32 720913, metadata !82, i32 4, metadata !"clang version 3.1 (trunk 146596)", i1 false, metadata !"", i32 0, metadata !1, metadata !3, metadata !27, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !9}
-!5 = metadata !{i32 720898, metadata !82, null, metadata !"bar", i32 9, i64 128, i64 64, i32 0, i32 0, null, metadata !7, i32 0, null, null} ; [ DW_TAG_class_type ]
+!5 = metadata !{i32 720898, metadata !82, null, metadata !"bar", i32 9, i64 128, i64 64, i32 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_class_type ] [bar] [line 9, size 128, align 64, offset 0] [def] [from ]
 !6 = metadata !{i32 720937, metadata !82} ; [ DW_TAG_file_type ]
 !7 = metadata !{metadata !8, metadata !19, metadata !21}
 !8 = metadata !{i32 720909, metadata !82, metadata !5, metadata !"b", i32 11, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
-!9 = metadata !{i32 720898, metadata !82, null, metadata !"baz", i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null} ; [ DW_TAG_class_type ]
+!9 = metadata !{i32 720898, metadata !82, null, metadata !"baz", i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_class_type ] [baz] [line 3, size 32, align 32, offset 0] [def] [from ]
 !10 = metadata !{metadata !11, metadata !13}
 !11 = metadata !{i32 720909, metadata !82, metadata !9, metadata !"h", i32 5, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ]
 !12 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !13 = metadata !{i32 720942, metadata !82, metadata !9, metadata !"baz", metadata !"baz", metadata !"", i32 6, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17, i32 0} ; [ DW_TAG_subprogram ]
-!14 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !15, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!14 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !15 = metadata !{null, metadata !16, metadata !12}
-!16 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !9} ; [ DW_TAG_pointer_type ]
+!16 = metadata !{i32 720911, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !9} ; [ DW_TAG_pointer_type ]
 !17 = metadata !{metadata !18}
 !18 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !19 = metadata !{i32 720909, metadata !82, metadata !5, metadata !"b_ref", i32 12, i64 64, i64 64, i64 64, i32 0, metadata !20} ; [ DW_TAG_member ]
 !20 = metadata !{i32 720912, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ]
 !21 = metadata !{i32 720942, metadata !82, metadata !5, metadata !"bar", metadata !"bar", metadata !"", i32 13, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !25, i32 0} ; [ DW_TAG_subprogram ]
-!22 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!22 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !23, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !23 = metadata !{null, metadata !24, metadata !12}
-!24 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !5} ; [ DW_TAG_pointer_type ]
+!24 = metadata !{i32 720911, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !5} ; [ DW_TAG_pointer_type ]
 !25 = metadata !{metadata !26}
 !26 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !27 = metadata !{metadata !29, metadata !37, metadata !40, metadata !43, metadata !46}
-!29 = metadata !{i32 720942, metadata !82, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 17, metadata !30, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !35, i32 0} ; [ DW_TAG_subprogram ]
-!30 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !31, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!29 = metadata !{i32 720942, metadata !82, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 17, metadata !30, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !47, i32 0} ; [ DW_TAG_subprogram ] [line 17] [def] [scope 0] [main]
+!30 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !31, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !31 = metadata !{metadata !12, metadata !12, metadata !32}
 !32 = metadata !{i32 720911, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !33} ; [ DW_TAG_pointer_type ]
 !33 = metadata !{i32 720911, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !34} ; [ DW_TAG_pointer_type ]
 !34 = metadata !{i32 720932, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !35 = metadata !{metadata !36}
 !36 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!37 = metadata !{i32 720942, metadata !82, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC1Ei", i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC1Ei, null, metadata !21, metadata !38, i32 0} ; [ DW_TAG_subprogram ]
+!37 = metadata !{i32 720942, metadata !82, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC1Ei", i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC1Ei, null, metadata !21, metadata !47, i32 0} ; [ DW_TAG_subprogram ] [line 13] [def] [scope 0] [bar]
 !38 = metadata !{metadata !39}
 !39 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!40 = metadata !{i32 720942, metadata !82, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC2Ei", i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC2Ei, null, metadata !21, metadata !41, i32 0} ; [ DW_TAG_subprogram ]
+!40 = metadata !{i32 720942, metadata !82, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC2Ei", i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC2Ei, null, metadata !21, metadata !47, i32 0} ; [ DW_TAG_subprogram ] [line 13] [def] [scope 0] [bar]
 !41 = metadata !{metadata !42}
 !42 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!43 = metadata !{i32 720942, metadata !82, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC1Ei", i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC1Ei, null, metadata !13, metadata !44, i32 0} ; [ DW_TAG_subprogram ]
+!43 = metadata !{i32 720942, metadata !82, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC1Ei", i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC1Ei, null, metadata !13, metadata !47, i32 0} ; [ DW_TAG_subprogram ] [line 6] [def] [scope 0] [baz]
 !44 = metadata !{metadata !45}
 !45 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!46 = metadata !{i32 720942, metadata !82, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC2Ei", i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC2Ei, null, metadata !13, metadata !47, i32 0} ; [ DW_TAG_subprogram ]
+!46 = metadata !{i32 720942, metadata !82, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC2Ei", i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC2Ei, null, metadata !13, metadata !47, i32 0} ; [ DW_TAG_subprogram ] [line 6] [def] [scope 0] [baz]
 !47 = metadata !{metadata !48}
 !48 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !49 = metadata !{i32 721153, metadata !29, metadata !"argc", metadata !6, i32 16777232, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
@@ -168,3 +169,4 @@ entry:
 !80 = metadata !{i32 6, i32 24, metadata !81, null}
 !81 = metadata !{i32 720907, metadata !82, metadata !46, i32 6, i32 23, i32 2} ; [ DW_TAG_lexical_block ]
 !82 = metadata !{metadata !"main.cpp", metadata !"/Users/echristo/tmp/bad-struct-ref"}
+!83 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/DW_AT_byte_size.ll b/test/DebugInfo/X86/DW_AT_byte_size.ll
index ef55839..87e242a 100644
--- a/test/DebugInfo/X86/DW_AT_byte_size.ll
+++ b/test/DebugInfo/X86/DW_AT_byte_size.ll
@@ -5,6 +5,7 @@
 ; CHECK: DW_TAG_pointer_type
 ; CHECK-NEXT: DW_AT_type
 ; CHECK-NOT: DW_AT_byte_size
+; CHECK: DW_TAG
 ; CHECK: .debug_info contents
 
 %struct.A = type { i32 }
@@ -23,17 +24,18 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21}
 
 !0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.1 (trunk 150996)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !20, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooP1A", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%struct.A*)* @_Z3fooP1A, null, null, metadata !14, i32 3} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !10}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !12, i32 0, null, null} ; [ DW_TAG_class_type ]
+!11 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from ]
 !12 = metadata !{metadata !13}
 !13 = metadata !{i32 786445, metadata !20, metadata !11, metadata !"b", i32 1, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
 !14 = metadata !{metadata !15}
@@ -43,3 +45,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !18 = metadata !{i32 4, i32 3, metadata !19, null}
 !19 = metadata !{i32 786443, metadata !20, metadata !5, i32 3, i32 16, i32 0} ; [ DW_TAG_lexical_block ]
 !20 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo"}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/DW_AT_location-reference.ll b/test/DebugInfo/X86/DW_AT_location-reference.ll
index f0f4f48..bdd0e04 100644
--- a/test/DebugInfo/X86/DW_AT_location-reference.ll
+++ b/test/DebugInfo/X86/DW_AT_location-reference.ll
@@ -86,11 +86,12 @@ declare i32 @g(i32, i32)
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!24}
 
-!0 = metadata !{i32 786478, metadata !23, metadata !1, metadata !"f", metadata !"f", metadata !"", i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @f, null, null, metadata !22, i32 4} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !23, metadata !1, metadata !"f", metadata !"f", metadata !"", i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @f, null, null, metadata !22, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [f]
 !1 = metadata !{i32 786473, metadata !23} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !23, i32 12, metadata !"clang version 3.0 (trunk)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !21, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !23, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !23, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 786688, metadata !6, metadata !"x", metadata !1, i32 5, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
 !6 = metadata !{i32 786443, metadata !23, metadata !0, i32 4, i32 14, i32 0} ; [ DW_TAG_lexical_block ]
@@ -108,3 +109,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !21 = metadata !{metadata !0}
 !22 = metadata !{metadata !5}
 !23 = metadata !{metadata !"simple.c", metadata !"/home/rengol01/temp/tests/dwarf/relocation"}
+!24 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/DW_AT_object_pointer.ll b/test/DebugInfo/X86/DW_AT_object_pointer.ll
index 789f556..6e6c3a1 100644
--- a/test/DebugInfo/X86/DW_AT_object_pointer.ll
+++ b/test/DebugInfo/X86/DW_AT_object_pointer.ll
@@ -2,10 +2,12 @@
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; CHECK: DW_TAG_formal_parameter [
+; CHECK-NOT: ""
+; CHECK: DW_TAG
 ; CHECK: DW_TAG_class_type
-; CHECK: DW_AT_object_pointer [DW_FORM_ref4]     (cu + 0x00fd => {0x000000fd})
-; CHECK: 0x000000fd:     DW_TAG_formal_parameter [13]
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp]     ( .debug_str[0x00000086] = "this")
+; CHECK: DW_AT_object_pointer [DW_FORM_ref4]     (cu + 0x{{[0-9a-f]*}} => {[[PARAM:0x[0-9a-f]*]]})
+; CHECK: [[PARAM]]:     DW_TAG_formal_parameter
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]     ( .debug_str[0x{{[0-9a-f]*}}] = "this")
 
 %class.A = type { i32 }
 
@@ -46,20 +48,21 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!38}
 
 !0 = metadata !{i32 786449, metadata !37, i32 4, metadata !"clang version 3.2 (trunk 163586) (llvm/trunk 163570)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/echristo/debug-tests/bar.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !10, metadata !20}
 !5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooi", i32 7, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z3fooi, null, null, metadata !1, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [foo]
 !6 = metadata !{i32 786473, metadata !37} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 786478, metadata !6, null, metadata !"A", metadata !"A", metadata !"_ZN1AC1Ev", i32 3, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AC1Ev, null, metadata !17, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [A]
-!11 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{null, metadata !13}
-!13 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
-!14 = metadata !{i32 786434, metadata !37, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !15, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [from ]
+!13 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!14 = metadata !{i32 786434, metadata !37, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from ]
 !15 = metadata !{metadata !16, metadata !17}
 !16 = metadata !{i32 786445, metadata !37, metadata !14, metadata !"m_a", i32 4, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [m_a] [line 4, size 32, align 32, offset 0] [from int]
 !17 = metadata !{i32 786478, metadata !6, metadata !14, metadata !"A", metadata !"A", metadata !"", i32 3, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ] [line 3] [A]
@@ -83,3 +86,4 @@ entry:
 !35 = metadata !{i32 7, i32 0, metadata !5, null}
 !36 = metadata !{i32 786689, metadata !5, metadata !"", metadata !6, i32 16777223, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 7]
 !37 = metadata !{metadata !"bar.cpp", metadata !"/Users/echristo/debug-tests"}
+!38 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/DW_AT_specification.ll b/test/DebugInfo/X86/DW_AT_specification.ll
index 93e1ecf..4d7ef4f 100644
--- a/test/DebugInfo/X86/DW_AT_specification.ll
+++ b/test/DebugInfo/X86/DW_AT_specification.ll
@@ -3,8 +3,10 @@
 
 ; test that the DW_AT_specification is a back edge in the file.
 
-; CHECK: 0x0000003a: DW_TAG_subprogram [5] *
-; CHECK: 0x00000060: DW_AT_specification [DW_FORM_ref4]      (cu + 0x003a => {0x0000003a})
+; CHECK: DW_TAG_subprogram [{{[0-9]+}}] *
+; CHECK: DW_AT_specification [DW_FORM_ref4]      (cu + 0x[[OFFSET:[0-9a-f]*]] => {0x0000[[OFFSET]]})
+; CHECK: 0x0000[[OFFSET]]: DW_TAG_subprogram [{{[0-9]+}}] *
+; CHECK: DW_AT_name [DW_FORM_strp]	( .debug_str[0x{{[0-9a-f]*}}] = "bar")
 
 
 @_ZZN3foo3barEvE1x = constant i32 0, align 4
@@ -15,16 +17,17 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!28}
 
 !0 = metadata !{i32 786449, metadata !27, i32 4, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18,  metadata !18, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @_ZN3foo3barEv, null, metadata !11, metadata !16, i32 4} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN3foo3barEv, null, metadata !11, metadata !16, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [bar]
 !6 = metadata !{i32 720937, metadata !27} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9}
-!9 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 786451, metadata !27, null, metadata !"foo", i32 1, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!9 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 786451, metadata !27, null, metadata !"foo", i32 1, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 1, size 0, align 0, offset 0] [decl] [from ]
 !11 = metadata !{i32 720942, metadata !6, metadata !12, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !14, i32 2} ; [ DW_TAG_subprogram ]
 !12 = metadata !{i32 720898, metadata !27, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !13, i32 0, null, null} ; [ DW_TAG_class_type ]
 !13 = metadata !{metadata !11}
@@ -39,3 +42,4 @@ entry:
 !25 = metadata !{i32 6, i32 1, metadata !26, null}
 !26 = metadata !{i32 786443, metadata !5, i32 4, i32 17, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
 !27 = metadata !{metadata !"nsNativeAppSupportBase.ii", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/toolkit/library"}
+!28 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll b/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll
new file mode 100644
index 0000000..0c08f23
--- /dev/null
+++ b/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mtriple=i686-w64-mingw32 -o %t -filetype=obj %s
+; RUN: llvm-dwarfdump -debug-dump=all %t | FileCheck %s
+
+; CHECK:  	 DW_AT_stmt_list [DW_FORM_sec_offset]
+;
+; generated from:
+; clang -g -S -emit-llvm test.c -o test.ll
+; int main()
+; {
+; 	return 0;
+; }
+
+; ModuleID = 'test.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32"
+target triple = "i686-pc-win32"
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  ret i32 0, !dbg !10
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !11}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [C:\Projects/test.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"test.c", metadata !"C:\5CProjects"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [main]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [C:\Projects/test.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!10 = metadata !{i32 3, i32 0, metadata !4, null}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/DW_TAG_friend.ll b/test/DebugInfo/X86/DW_TAG_friend.ll
index 2e23222..2da9627 100644
--- a/test/DebugInfo/X86/DW_TAG_friend.ll
+++ b/test/DebugInfo/X86/DW_TAG_friend.ll
@@ -3,10 +3,10 @@
 
 ; Check that the friend tag is there and is followed by a DW_AT_friend that has a reference back.
 
-; CHECK: 0x00000032:   DW_TAG_class_type [4]
-; CHECK: 0x00000077:   DW_TAG_class_type [4]
-; CHECK: 0x000000a0:     DW_TAG_friend [9]  
-; CHECK: 0x000000a1:       DW_AT_friend [DW_FORM_ref4]   (cu + 0x0032 => {0x00000032})
+; CHECK: [[BACK:0x[0-9a-f]*]]:   DW_TAG_class_type
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]       ( .debug_str[{{.*}}] = "A")
+; CHECK: DW_TAG_friend
+; CHECK-NEXT: DW_AT_friend [DW_FORM_ref4]   (cu + 0x{{[0-9a-f]*}} => {[[BACK]]})
 
 
 %class.A = type { i32 }
@@ -16,31 +16,33 @@
 @b = global %class.B zeroinitializer, align 4
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!29}
 
 !0 = metadata !{i32 786449, metadata !28, i32 4, metadata !"clang version 3.1 (trunk 153413) (llvm/trunk 153428)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !17}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 10, metadata !7, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ]
 !6 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786434, metadata !28, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ]
+!7 = metadata !{i32 786434, metadata !28, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from ]
 !8 = metadata !{metadata !9, metadata !11}
 !9 = metadata !{i32 786445, metadata !28, metadata !7, metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ]
 !10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !11 = metadata !{i32 786478, metadata !6, metadata !7, metadata !"A", metadata !"A", metadata !"", i32 1, metadata !12, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !15, i32 1} ; [ DW_TAG_subprogram ]
-!12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !13 = metadata !{null, metadata !14}
-!14 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !7} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !7} ; [ DW_TAG_pointer_type ]
 !15 = metadata !{metadata !16}
 !16 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !17 = metadata !{i32 786484, i32 0, null, metadata !"b", metadata !"b", metadata !"", metadata !6, i32 11, metadata !18, i32 0, i32 1, %class.B* @b, null} ; [ DW_TAG_variable ]
-!18 = metadata !{i32 786434, metadata !28, null, metadata !"B", i32 5, i64 32, i64 32, i32 0, i32 0, null, metadata !19, i32 0, null, null} ; [ DW_TAG_class_type ]
+!18 = metadata !{i32 786434, metadata !28, null, metadata !"B", i32 5, i64 32, i64 32, i32 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_class_type ] [B] [line 5, size 32, align 32, offset 0] [def] [from ]
 !19 = metadata !{metadata !20, metadata !21, metadata !27}
 !20 = metadata !{i32 786445, metadata !28, metadata !18, metadata !"b", i32 7, i64 32, i64 32, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ]
 !21 = metadata !{i32 786478, metadata !6, metadata !18, metadata !"B", metadata !"B", metadata !"", i32 5, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !25, i32 5} ; [ DW_TAG_subprogram ]
-!22 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!22 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !23 = metadata !{null, metadata !24}
-!24 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !18} ; [ DW_TAG_pointer_type ]
+!24 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !18} ; [ DW_TAG_pointer_type ]
 !25 = metadata !{metadata !26}
 !26 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !27 = metadata !{i32 786474, metadata !18, null, metadata !6, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_friend ]
 !28 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo/tmp"}
+!29 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/aligned_stack_var.ll b/test/DebugInfo/X86/aligned_stack_var.ll
index 5b23f64..d733dfd 100644
--- a/test/DebugInfo/X86/aligned_stack_var.ll
+++ b/test/DebugInfo/X86/aligned_stack_var.ll
@@ -25,13 +25,14 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!15}
 
 !0 = metadata !{i32 786449, metadata !14, i32 4, metadata !"clang version 3.2 (trunk 155696:155697) (llvm/trunk 155696)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !14, metadata !6, metadata !"run", metadata !"run", metadata !"_Z3runv", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null}
 !9 = metadata !{i32 786688, metadata !10, metadata !"x", metadata !6, i32 2, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
 !10 = metadata !{i32 786443, metadata !14, metadata !5, i32 1, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
@@ -39,3 +40,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !12 = metadata !{i32 2, i32 7, metadata !10, null}
 !13 = metadata !{i32 3, i32 1, metadata !10, null}
 !14 = metadata !{metadata !"test.cc", metadata !"/home/samsonov/debuginfo"}
+!15 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/arguments.ll b/test/DebugInfo/X86/arguments.ll
index 6f99f87..1d51049 100644
--- a/test/DebugInfo/X86/arguments.ll
+++ b/test/DebugInfo/X86/arguments.ll
@@ -44,6 +44,7 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!24}
 
 !0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/scratch.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"scratch.cpp", metadata !"/usr/local/google/home/blaikie/dev/scratch"}
@@ -51,16 +52,16 @@ attributes #1 = { nounwind readnone }
 !3 = metadata !{metadata !4}
 !4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func", metadata !"func", metadata !"_Z4func3fooS_", i32 6, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.foo*, %struct.foo*)* @_Z4func3fooS_, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [func]
 !5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/scratch.cpp]
-!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{null, metadata !8, metadata !8}
-!8 = metadata !{i32 786451, metadata !1, null, metadata !"foo", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !9, i32 0, null, null} ; [ DW_TAG_structure_type ] [foo] [line 1, size 32, align 32, offset 0] [from ]
+!8 = metadata !{i32 786451, metadata !1, null, metadata !"foo", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 1, size 32, align 32, offset 0] [def] [from ]
 !9 = metadata !{metadata !10, metadata !12}
 !10 = metadata !{i32 786445, metadata !1, metadata !8, metadata !"i", i32 3, i64 32, i64 32, i64 0, i32 0, metadata !11} ; [ DW_TAG_member ] [i] [line 3, size 32, align 32, offset 0] [from int]
 !11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !12 = metadata !{i32 786478, metadata !1, metadata !8, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !13, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !18, i32 2} ; [ DW_TAG_subprogram ] [line 2] [foo]
-!13 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !14 = metadata !{null, metadata !15, metadata !16}
-!15 = metadata !{i32 786447, i32 0, i32 0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from foo]
+!15 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from foo]
 !16 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
 !17 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from foo]
 !18 = metadata !{i32 786468}
@@ -69,3 +70,4 @@ attributes #1 = { nounwind readnone }
 !21 = metadata !{i32 786689, metadata !4, metadata !"g", metadata !5, i32 33554438, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [g] [line 6]
 !22 = metadata !{i32 7, i32 0, metadata !4, null}
 !23 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
+!24 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/block-capture.ll b/test/DebugInfo/X86/block-capture.ll
index 0046730..2f966a7 100644
--- a/test/DebugInfo/X86/block-capture.ll
+++ b/test/DebugInfo/X86/block-capture.ll
@@ -2,10 +2,10 @@
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; Checks that we emit debug info for the block variable declare.
-; CHECK: 0x00000030:   DW_TAG_subprogram [3]
-; CHECK: 0x0000005b:     DW_TAG_variable [5]
-; CHECK: 0x0000005c:       DW_AT_name [DW_FORM_strp]     ( .debug_str[0x000000e6] = "block")
-; CHECK: 0x00000066:       DW_AT_location [DW_FORM_data4]        (0x00000023)
+; CHECK: DW_TAG_subprogram [3]
+; CHECK: DW_TAG_variable [5]
+; CHECK: DW_AT_name [DW_FORM_strp]     ( .debug_str[{{.*}}] = "block")
+; CHECK: DW_AT_location [DW_FORM_sec_offset]        ({{.*}})
 
 %struct.__block_descriptor = type { i64, i64 }
 %struct.__block_literal_generic = type { i8*, i32, i32, i8*, %struct.__block_descriptor* }
@@ -60,18 +60,18 @@ declare void @objc_end_catch()
 declare i32 @__objc_personality_v0(...)
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!35, !36, !37, !38}
+!llvm.module.flags = !{!35, !36, !37, !38, !64}
 
 !0 = metadata !{i32 786449, metadata !63, i32 16, metadata !"clang version 3.1 (trunk 151227)", i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !28, metadata !31, metadata !34}
 !5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 5} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !63} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9}
 !9 = metadata !{i32 786454, metadata !63, null, metadata !"dispatch_block_t", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
 !10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_literal_generic", i32 5, i64 256, i64 0, i32 0, i32 8, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!11 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_literal_generic", i32 5, i64 256, i64 0, i32 0, i32 8, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_literal_generic] [line 5, size 256, align 0, offset 0] [def] [from ]
 !12 = metadata !{metadata !13, metadata !15, metadata !17, metadata !18, metadata !19}
 !13 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__isa", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ]
 !14 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
@@ -81,7 +81,7 @@ declare i32 @__objc_personality_v0(...)
 !18 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__FuncPtr", i32 0, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ]
 !19 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__descriptor", i32 5, i64 64, i64 64, i64 192, i32 0, metadata !20} ; [ DW_TAG_member ]
 !20 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ]
-!21 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_descriptor", i32 5, i64 128, i64 0, i32 0, i32 8, null, metadata !22, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!21 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_descriptor", i32 5, i64 128, i64 0, i32 0, i32 8, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor] [line 5, size 128, align 0, offset 0] [def] [from ]
 !22 = metadata !{metadata !23, metadata !25}
 !23 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"reserved", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_member ]
 !24 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
@@ -89,10 +89,10 @@ declare i32 @__objc_personality_v0(...)
 !26 = metadata !{metadata !27}
 !27 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !28 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__foo_block_invoke_0", metadata !"__foo_block_invoke_0", metadata !"", i32 7, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @__foo_block_invoke_0, null, null, metadata !26, i32 7} ; [ DW_TAG_subprogram ]
-!29 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !30, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!29 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !30, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !30 = metadata !{null, metadata !14}
 !31 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__copy_helper_block_", metadata !"__copy_helper_block_", metadata !"", i32 10, metadata !32, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 10} ; [ DW_TAG_subprogram ]
-!32 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !33, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!32 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !33, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !33 = metadata !{null, metadata !14, metadata !14}
 !34 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__destroy_helper_block_", metadata !"__destroy_helper_block_", metadata !"", i32 10, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 10} ; [ DW_TAG_subprogram ]
 !35 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
@@ -101,7 +101,7 @@ declare i32 @__objc_personality_v0(...)
 !38 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
 !39 = metadata !{i32 786689, metadata !28, metadata !".block_descriptor", metadata !6, i32 16777223, metadata !40, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
 !40 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !41} ; [ DW_TAG_pointer_type ]
-!41 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_literal_1", i32 7, i64 320, i64 64, i32 0, i32 0, null, metadata !42, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!41 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_literal_1", i32 7, i64 320, i64 64, i32 0, i32 0, null, metadata !42, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_literal_1] [line 7, size 320, align 64, offset 0] [def] [from ]
 !42 = metadata !{metadata !43, metadata !44, metadata !45, metadata !46, metadata !47, metadata !50}
 !43 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__isa", i32 7, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ]
 !44 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__flags", i32 7, i64 32, i64 32, i64 64, i32 0, metadata !16} ; [ DW_TAG_member ]
@@ -109,7 +109,7 @@ declare i32 @__objc_personality_v0(...)
 !46 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__FuncPtr", i32 7, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ]
 !47 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__descriptor", i32 7, i64 64, i64 64, i64 192, i32 0, metadata !48} ; [ DW_TAG_member ]
 !48 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !49} ; [ DW_TAG_pointer_type ]
-!49 = metadata !{i32 786451, metadata !63, null, metadata !"__block_descriptor_withcopydispose", i32 7, i32 0, i32 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ]
+!49 = metadata !{i32 786451, metadata !63, null, metadata !"__block_descriptor_withcopydispose", i32 7, i32 0, i32 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor_withcopydispose] [line 7, size 0, align 0, offset 0] [decl] [from ]
 !50 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"block", i32 7, i64 64, i64 64, i64 256, i32 0, metadata !9} ; [ DW_TAG_member ]
 !51 = metadata !{i32 7, i32 18, metadata !28, null}
 !52 = metadata !{i32 7, i32 19, metadata !28, null}
@@ -124,3 +124,4 @@ declare i32 @__objc_personality_v0(...)
 !61 = metadata !{i32 10, i32 21, metadata !28, null}
 !62 = metadata !{i32 9, i32 20, metadata !56, null}
 !63 = metadata !{metadata !"foo.m", metadata !"/Users/echristo"}
+!64 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/byvalstruct.ll b/test/DebugInfo/X86/byvalstruct.ll
index 7d42c9f..3dea863 100644
--- a/test/DebugInfo/X86/byvalstruct.ll
+++ b/test/DebugInfo/X86/byvalstruct.ll
@@ -84,25 +84,25 @@ attributes #0 = { ssp uwtable }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!24, !25, !26, !27}
+!llvm.module.flags = !{!24, !25, !26, !27, !38}
 
 !0 = metadata !{i32 786449, metadata !1, i32 17, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 2, metadata !2, metadata !3, metadata !6, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/t.mm] [DW_LANG_ObjC_plus_plus]
 !1 = metadata !{metadata !"t.mm", metadata !""}
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !4}
-!4 = metadata !{i32 786451, metadata !1, metadata !5, metadata !"Bitmap", i32 8, i64 8, i64 8, i32 0, i32 512, null, metadata !2, i32 17, null, null} ; [ DW_TAG_structure_type ] [Bitmap] [line 8, size 8, align 8, offset 0] [from ]
+!4 = metadata !{i32 786451, metadata !1, metadata !5, metadata !"Bitmap", i32 8, i64 8, i64 8, i32 0, i32 512, null, metadata !2, i32 17, null, null, null} ; [ DW_TAG_structure_type ] [Bitmap] [line 8, size 8, align 8, offset 0] [def] [from ]
 !5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/t.mm]
 !6 = metadata !{metadata !7}
 !7 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"-[Bitmap initWithCopy:andInfo:andLength:]", metadata !"-[Bitmap initWithCopy:andInfo:andLength:]", metadata !"", i32 9, metadata !8, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, i8* (%0*, i8*, %0*, %struct.ImageInfo*, i64)* @"\01-[Bitmap initWithCopy:andInfo:andLength:]", null, null, metadata !2, i32 9} ; [ DW_TAG_subprogram ] [line 9] [local] [def] [-[Bitmap initWithCopy:andInfo:andLength:]]
-!8 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !9, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !9 = metadata !{metadata !4, metadata !10, metadata !11, metadata !14, metadata !15, metadata !19}
-!10 = metadata !{i32 786447, i32 0, i32 0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from Bitmap]
-!11 = metadata !{i32 786454, metadata !1, i32 0, metadata !"SEL", i32 9, i64 0, i64 0, i64 0, i32 64, metadata !12} ; [ DW_TAG_typedef ] [SEL] [line 9, size 0, align 0, offset 0] [artificial] [from ]
+!10 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from Bitmap]
+!11 = metadata !{i32 786454, metadata !1, null, metadata !"SEL", i32 9, i64 0, i64 0, i64 0, i32 64, metadata !12} ; [ DW_TAG_typedef ] [SEL] [line 9, size 0, align 0, offset 0] [artificial] [from ]
 !12 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !13} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_selector]
-!13 = metadata !{i32 786451, metadata !1, null, metadata !"objc_selector", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_selector] [line 0, size 0, align 0, offset 0] [fwd] [from ]
+!13 = metadata !{i32 786451, metadata !1, null, metadata !"objc_selector", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_selector] [line 0, size 0, align 0, offset 0] [decl] [from ]
 !14 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from Bitmap]
 !15 = metadata !{i32 786454, metadata !1, null, metadata !"ImageInfo", i32 7, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ] [ImageInfo] [line 7, size 0, align 0, offset 0] [from ]
-!16 = metadata !{i32 786451, metadata !1, null, metadata !"", i32 2, i64 192, i64 64, i32 0, i32 0, null, metadata !17, i32 0, null, null} ; [ DW_TAG_structure_type ] [line 2, size 192, align 64, offset 0] [from ]
+!16 = metadata !{i32 786451, metadata !1, null, metadata !"", i32 2, i64 192, i64 64, i32 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [line 2, size 192, align 64, offset 0] [def] [from ]
 !17 = metadata !{metadata !18, metadata !21, metadata !22}
 !18 = metadata !{i32 786445, metadata !1, metadata !16, metadata !"width", i32 4, i64 64, i64 64, i64 0, i32 0, metadata !19} ; [ DW_TAG_member ] [width] [line 4, size 64, align 64, offset 0] [from NSUInteger]
 !19 = metadata !{i32 786454, metadata !1, null, metadata !"NSUInteger", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_typedef ] [NSUInteger] [line 1, size 0, align 0, offset 0] [from long unsigned int]
@@ -124,3 +124,4 @@ attributes #1 = { nounwind readnone }
 !35 = metadata !{i32 786689, metadata !7, metadata !"length", metadata !5, i32 83886091, metadata !19, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [length] [line 11]
 !36 = metadata !{i32 11, i32 0, metadata !7, null}
 !37 = metadata !{i32 13, i32 0, metadata !7, null}
+!38 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/coff_relative_names.ll b/test/DebugInfo/X86/coff_relative_names.ll
index 8e46e0b..4cc38a6 100644
--- a/test/DebugInfo/X86/coff_relative_names.ll
+++ b/test/DebugInfo/X86/coff_relative_names.ll
@@ -1,40 +1,41 @@
-; RUN: llc -mtriple=i686-w64-mingw32 -filetype=asm -O0 < %s | FileCheck %s
-
-; CHECK:  	.secrel32 Linfo_string0
-; CHECK:  	.secrel32 Linfo_string1
-;
-; generated from:
-; clang -g -S -emit-llvm test.c -o test.ll
-; int main()
-; {
-; 	return 0;
-; }
-
-; ModuleID = 'test.c'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32"
-target triple = "i686-pc-win32"
-
-; Function Attrs: nounwind
-define i32 @main() #0 {
-entry:
-  %retval = alloca i32, align 4
-  store i32 0, i32* %retval
-  ret i32 0, !dbg !10
-}
-
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!9}
-
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [C:\Projects/test.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"test.c", metadata !"C:\5CProjects"}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [main]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [C:\Projects/test.c]
-!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
-!10 = metadata !{i32 3, i32 0, metadata !4, null}
+; RUN: llc -mtriple=i686-w64-mingw32 -filetype=asm -O0 < %s | FileCheck %s
+
+; CHECK:  	.secrel32 Linfo_string0
+; CHECK:  	.secrel32 Linfo_string1
+;
+; generated from:
+; clang -g -S -emit-llvm test.c -o test.ll
+; int main()
+; {
+; 	return 0;
+; }
+
+; ModuleID = 'test.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32"
+target triple = "i686-pc-win32"
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  ret i32 0, !dbg !10
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !11}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [C:\Projects/test.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"test.c", metadata !"C:\5CProjects"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [main]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [C:\Projects/test.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!10 = metadata !{i32 3, i32 0, metadata !4, null}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll
index 3b9aefc..4a15296 100644
--- a/test/DebugInfo/X86/concrete_out_of_line.ll
+++ b/test/DebugInfo/X86/concrete_out_of_line.ll
@@ -7,15 +7,15 @@
 ; first check that we have a TAG_subprogram at a given offset and it has
 ; AT_inline.
 
-; CHECK: 0x0000011e:   DW_TAG_subprogram [18]
+; CHECK: 0x0000011c:   DW_TAG_subprogram [17]
 ; CHECK-NEXT:     DW_AT_specification
 ; CHECK-NEXT:     DW_AT_inline
 
 
 ; and then that a TAG_subprogram refers to it with AT_abstract_origin.
 
-; CHECK: 0x0000015f:   DW_TAG_subprogram [20]
-; CHECK-NEXT: DW_AT_abstract_origin [DW_FORM_ref4]    (cu + 0x011e => {0x0000011e})
+; CHECK: 0x0000015d:   DW_TAG_subprogram [19]
+; CHECK-NEXT: DW_AT_abstract_origin [DW_FORM_ref4]    (cu + 0x011c => {0x0000011c})
 
 define i32 @_ZN17nsAutoRefCnt7ReleaseEv() {
 entry:
@@ -33,43 +33,44 @@ entry:
 declare void @_Z8moz_freePv(i8*)
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!60}
 
 !0 = metadata !{i32 786449, metadata !59, i32 4, metadata !"clang version 3.1 ()", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !47,  metadata !47, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !23, metadata !27, metadata !31}
-!5 = metadata !{i32 720942, metadata !6, null, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !12, metadata !20, i32 14} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, metadata !6, null, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32* null, null, metadata !12, metadata !20, i32 14} ; [ DW_TAG_subprogram ] [line 14] [def] [Release]
 !6 = metadata !{i32 720937, metadata !59} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !10}
 !9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786451, metadata !59, null, metadata !"nsAutoRefCnt", i32 10, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!10 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786451, metadata !59, null, metadata !"nsAutoRefCnt", i32 10, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [nsAutoRefCnt] [line 10, size 0, align 0, offset 0] [decl] [from ]
 !12 = metadata !{i32 720942, metadata !6, metadata !13, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", i32 11, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 11} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 720898, metadata !59, null, metadata !"nsAutoRefCnt", i32 10, i64 8, i64 8, i32 0, i32 0, null, metadata !14, i32 0, null, null} ; [ DW_TAG_class_type ]
+!13 = metadata !{i32 720898, metadata !59, null, metadata !"nsAutoRefCnt", i32 10, i64 8, i64 8, i32 0, i32 0, null, metadata !14, null, null, null} ; [ DW_TAG_class_type ]
 !14 = metadata !{metadata !12, metadata !15}
 !15 = metadata !{i32 720942, metadata !6, metadata !13, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"", i32 12, metadata !16, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 12} ; [ DW_TAG_subprogram ]
-!16 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !17 = metadata !{null, metadata !10}
 !18 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !20 = metadata !{metadata !22}
 !22 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777230, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!23 = metadata !{i32 720942, metadata !6, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD1Ev", i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !15, metadata !24, i32 18} ; [ DW_TAG_subprogram ]
+!23 = metadata !{i32 720942, metadata !6, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD1Ev", i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32* null, null, metadata !15, metadata !24, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [~nsAutoRefCnt]
 !24 = metadata !{metadata !26}
 !26 = metadata !{i32 786689, metadata !23, metadata !"this", metadata !6, i32 16777234, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!27 = metadata !{i32 720942, metadata !6, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD2Ev", i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !15, metadata !28, i32 18} ; [ DW_TAG_subprogram ]
+!27 = metadata !{i32 720942, metadata !6, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD2Ev", i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32* null, null, metadata !15, metadata !28, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [~nsAutoRefCnt]
 !28 = metadata !{metadata !30}
 !30 = metadata !{i32 786689, metadata !27, metadata !"this", metadata !6, i32 16777234, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!31 = metadata !{i32 720942, metadata !6, null, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", i32 4, metadata !32, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, metadata !36, metadata !43, i32 4} ; [ DW_TAG_subprogram ]
-!32 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !33, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!31 = metadata !{i32 720942, metadata !6, null, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", i32 4, metadata !32, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, metadata !36, metadata !43, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [operator=]
+!32 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !33, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !33 = metadata !{metadata !9, metadata !34, metadata !9}
-!34 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !35} ; [ DW_TAG_pointer_type ]
-!35 = metadata !{i32 786451, metadata !59, null, metadata !"nsAutoRefCnt", i32 2, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!34 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !35} ; [ DW_TAG_pointer_type ]
+!35 = metadata !{i32 786451, metadata !59, null, metadata !"nsAutoRefCnt", i32 2, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [nsAutoRefCnt] [line 2, size 0, align 0, offset 0] [decl] [from ]
 !36 = metadata !{i32 720942, metadata !6, metadata !37, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", i32 4, metadata !32, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 4} ; [ DW_TAG_subprogram ]
-!37 = metadata !{i32 720898, metadata !59, null, metadata !"nsAutoRefCnt", i32 2, i64 32, i64 32, i32 0, i32 0, null, metadata !38, i32 0, null, null} ; [ DW_TAG_class_type ]
+!37 = metadata !{i32 720898, metadata !59, null, metadata !"nsAutoRefCnt", i32 2, i64 32, i64 32, i32 0, i32 0, null, metadata !38, i32 0, null, null, null} ; [ DW_TAG_class_type ] [nsAutoRefCnt] [line 2, size 32, align 32, offset 0] [def] [from ]
 !38 = metadata !{metadata !39, metadata !40, metadata !36}
 !39 = metadata !{i32 786445, metadata !59, metadata !37, metadata !"mValue", i32 7, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
 !40 = metadata !{i32 720942, metadata !6, metadata !37, metadata !"nsAutoRefCnt", metadata !"nsAutoRefCnt", metadata !"", i32 3, metadata !41, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ]
-!41 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !42, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!41 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !42, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !42 = metadata !{null, metadata !34}
 !43 = metadata !{metadata !45, metadata !46}
 !45 = metadata !{i32 786689, metadata !31, metadata !"this", metadata !6, i32 16777220, metadata !34, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
@@ -86,3 +87,4 @@ declare void @_Z8moz_freePv(i8*)
 !57 = metadata !{i32 19, i32 3, metadata !55, metadata !58}
 !58 = metadata !{i32 18, i32 41, metadata !23, null}
 !59 = metadata !{metadata !"nsAutoRefCnt.ii", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/netwerk/base/src"}
+!60 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/data_member_location.ll b/test/DebugInfo/X86/data_member_location.ll
new file mode 100644
index 0000000..1adddb9
--- /dev/null
+++ b/test/DebugInfo/X86/data_member_location.ll
@@ -0,0 +1,44 @@
+; RUN: llc -mtriple=x86_64-linux -O0 -o - -filetype=obj < %s | llvm-dwarfdump -debug-dump=info -| FileCheck %s
+
+; Generated from Clang with the following source:
+;
+; struct foo {
+;   char c;
+;   int i;
+; };
+; 
+; foo f;
+
+; CHECK: DW_AT_name {{.*}} "c"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_data_member_location {{.*}} (0x00)
+
+; CHECK: DW_AT_name {{.*}} "i"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_data_member_location {{.*}} (0x04)
+
+%struct.foo = type { i8, i32 }
+
+@f = global %struct.foo zeroinitializer, align 4
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !15}
+!llvm.ident = !{!14}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !2, metadata !10, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/data_member_location.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"data_member_location.cpp", metadata !"/tmp/dbginfo"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !1, null, metadata !"foo", i32 1, i64 64, i64 32, i32 0, i32 0, null, metadata !5, i32 0, null, null, metadata !"_ZTS3foo"} ; [ DW_TAG_structure_type ] [foo] [line 1, size 64, align 32, offset 0] [def] [from ]
+!5 = metadata !{metadata !6, metadata !8}
+!6 = metadata !{i32 786445, metadata !1, metadata !"_ZTS3foo", metadata !"c", i32 2, i64 8, i64 8, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ] [c] [line 2, size 8, align 8, offset 0] [from char]
+!7 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!8 = metadata !{i32 786445, metadata !1, metadata !"_ZTS3foo", metadata !"i", i32 3, i64 32, i64 32, i64 32, i32 0, metadata !9} ; [ DW_TAG_member ] [i] [line 3, size 32, align 32, offset 32] [from int]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 786484, i32 0, null, metadata !"f", metadata !"f", metadata !"", metadata !12, i32 6, metadata !4, i32 0, i32 1, %struct.foo* @f, null} ; [ DW_TAG_variable ] [f] [line 6] [def]
+!12 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/data_member_location.cpp]
+!13 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!14 = metadata !{metadata !"clang version 3.4 "}
+
+!15 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/dbg-at-specficiation.ll b/test/DebugInfo/X86/dbg-at-specficiation.ll
index a6eebcb..8003a0f 100644
--- a/test/CodeGen/X86/dbg-at-specficiation.ll
+++ b/test/DebugInfo/X86/dbg-at-specficiation.ll
@@ -6,14 +6,16 @@
 @a = common global [10 x i32] zeroinitializer, align 16
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!12}
 
 !0 = metadata !{i32 720913, metadata !11, i32 12, metadata !"clang version 3.0 (trunk 140253)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, null, i32 0} ; [ DW_TAG_compile_unit ]
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 720948, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, [10 x i32]* @a, null} ; [ DW_TAG_variable ]
 !6 = metadata !{i32 720937, metadata !11} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 320, i64 32, i32 0, i32 0, metadata !8, metadata !9, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!7 = metadata !{i32 720897, null, null, null, i32 0, i64 320, i64 32, i32 0, i32 0, metadata !8, metadata !9, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 320, align 32, offset 0] [from int]
 !8 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !9 = metadata !{metadata !10}
 !10 = metadata !{i32 720929, i64 0, i64 10}        ; [ DW_TAG_subrange_type ]
 !11 = metadata !{metadata !"x.c", metadata !"/private/tmp"}
+!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/dbg-byval-parameter.ll b/test/DebugInfo/X86/dbg-byval-parameter.ll
index ef9e03c..d66486d 100644
--- a/test/CodeGen/X86/dbg-byval-parameter.ll
+++ b/test/DebugInfo/X86/dbg-byval-parameter.ll
@@ -26,18 +26,19 @@ return:                                           ; preds = %entry
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!21}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 786478, metadata !19, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (%struct.Rect*)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786449, metadata !19, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !20, metadata !20, metadata !18, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6, metadata !7}
 !6 = metadata !{i32 786468, metadata !19, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Rect", i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ]
+!7 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Rect", i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [Rect] [line 6, size 256, align 64, offset 0] [def] [from ]
 !8 = metadata !{metadata !9, metadata !14}
 !9 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P1", i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Pt", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!10 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Pt", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [Pt] [line 1, size 128, align 64, offset 0] [def] [from ]
 !11 = metadata !{metadata !12, metadata !13}
 !12 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"x", i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
 !13 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"y", i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
@@ -48,3 +49,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !18 = metadata !{metadata !1}
 !19 = metadata !{metadata !"b2.c", metadata !"/tmp/"}
 !20 = metadata !{i32 0}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/dbg-const-int.ll b/test/DebugInfo/X86/dbg-const-int.ll
index fc4ff6d..f2f51c9 100644
--- a/test/CodeGen/X86/dbg-const-int.ll
+++ b/test/DebugInfo/X86/dbg-const-int.ll
@@ -1,9 +1,13 @@
-; RUN: llc < %s - | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin12 -filetype=obj %s -o %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-macosx10.6.7"
 ; Radar 9511391
 
-;CHECK:         .byte   4                       ## DW_AT_const_value
+; CHECK: DW_TAG_variable
+; CHECK: "i"
+; CHECK: DW_AT_const_value [DW_FORM_sdata]   (42)
+
 define i32 @foo() nounwind uwtable readnone optsize ssp {
 entry:
   tail call void @llvm.dbg.value(metadata !8, i64 0, metadata !6), !dbg !9
@@ -13,11 +17,12 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!15}
 
 !0 = metadata !{i32 786449, metadata !13, i32 12, metadata !"clang version 3.0 (trunk 132191)", i1 true, metadata !"", i32 0, metadata !14, metadata !14, metadata !11, null,  null, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !13, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null, metadata !12, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !13, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @foo, null, null, metadata !12, i32 0} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [foo]
 !2 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !13, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !13, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 786688, metadata !7, metadata !"i", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
@@ -29,3 +34,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !12 = metadata !{metadata !6}
 !13 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
 !14 = metadata !{i32 0}
+!15 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/dbg-const.ll b/test/DebugInfo/X86/dbg-const.ll
index b37eb0a..12dc154 100644
--- a/test/CodeGen/X86/dbg-const.ll
+++ b/test/DebugInfo/X86/dbg-const.ll
@@ -29,11 +29,12 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 declare i32 @bar() nounwind readnone
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!17}
 
 !0 = metadata !{i32 786478, metadata !15, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"foobar", i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @foobar, null, null, metadata !14, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !15, i32 12, metadata !"clang version 2.9 (trunk 114183)", i1 true, metadata !"", i32 0, metadata !16, metadata !16, metadata !13, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !15, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null}
+!3 = metadata !{i32 786453, metadata !15, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !15, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
 !6 = metadata !{i32 786688, metadata !7, metadata !"j", metadata !1, i32 15, metadata !5, i32 0, null}
@@ -47,3 +48,4 @@ declare i32 @bar() nounwind readnone
 !14 = metadata !{metadata !6}
 !15 = metadata !{metadata !"mu.c", metadata !"/private/tmp"}
 !16 = metadata !{i32 0}
+!17 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/dbg-declare-arg.ll b/test/DebugInfo/X86/dbg-declare-arg.ll
index 55b4238..7bf6f4f 100644
--- a/test/CodeGen/X86/dbg-declare-arg.ll
+++ b/test/DebugInfo/X86/dbg-declare-arg.ll
@@ -70,9 +70,10 @@ entry:
 }
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!52}
 
 !0 = metadata !{i32 786478, metadata !51, metadata !1, metadata !"~A", metadata !"~A", metadata !"", i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589826, metadata !51, metadata !2, metadata !"A", i32 2, i64 128, i64 32, i32 0, i32 0, null, metadata !4, i32 0, null, null} ; [ DW_TAG_class_type ]
+!1 = metadata !{i32 589826, metadata !51, metadata !2, metadata !"A", i32 2, i64 128, i64 32, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 2, size 128, align 32, offset 0] [def] [from ]
 !2 = metadata !{i32 786449, metadata !51, i32 4, metadata !"clang version 3.0 (trunk 130127)", i1 false, metadata !"", i32 0, metadata !24, metadata !24, metadata !50, null, null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786473, metadata !51} ; [ DW_TAG_file_type ]
 !4 = metadata !{metadata !5, metadata !7, metadata !8, metadata !9, metadata !0, metadata !10, metadata !14}
@@ -82,21 +83,21 @@ entry:
 !8 = metadata !{i32 786445, metadata !51, metadata !3, metadata !"z", i32 2, i64 32, i64 32, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
 !9 = metadata !{i32 786445, metadata !51, metadata !3, metadata !"o", i32 2, i64 32, i64 32, i64 96, i32 0, metadata !6} ; [ DW_TAG_member ]
 !10 = metadata !{i32 786478, metadata !51, metadata !1, metadata !"A", metadata !"A", metadata !"", i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, metadata !51, metadata !3, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{i32 786453, metadata !51, metadata !3, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{null, metadata !13}
-!13 = metadata !{i32 786447, metadata !2, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 786447, metadata !2, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
 !14 = metadata !{i32 786478, metadata !51, metadata !1, metadata !"A", metadata !"A", metadata !"", i32 2, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!15 = metadata !{i32 786453, metadata !51, metadata !3, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!15 = metadata !{i32 786453, metadata !51, metadata !3, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !16 = metadata !{null, metadata !13, metadata !17}
 !17 = metadata !{i32 589840, null, metadata !2, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_reference_type ]
-!18 = metadata !{i32 786470, metadata !2, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !1} ; [ DW_TAG_const_type ]
-!19 = metadata !{i32 786478, metadata !51, metadata !3, metadata !"foo", metadata !"foo", metadata !"_Z3fooi", i32 4, metadata !20, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*, i32)* @_Z3fooi, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!20 = metadata !{i32 786453, metadata !51, metadata !3, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !21, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!18 = metadata !{i32 786470, metadata !2, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !1} ; [ DW_TAG_const_type ]
+!19 = metadata !{i32 786478, metadata !51, metadata !3, metadata !"foo", metadata !"foo", metadata !"_Z3fooi", i32 4, metadata !20, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*, i32)* @_Z3fooi, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 4] [def] [scope 0] [foo]
+!20 = metadata !{i32 786453, metadata !51, metadata !3, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !21, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !21 = metadata !{metadata !1}
-!22 = metadata !{i32 786478, metadata !51, metadata !3, metadata !"~A", metadata !"~A", metadata !"_ZN1AD1Ev", i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD1Ev, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!23 = metadata !{i32 786453, metadata !51, metadata !3, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !24, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!22 = metadata !{i32 786478, metadata !51, metadata !3, metadata !"~A", metadata !"~A", metadata !"_ZN1AD1Ev", i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AD1Ev, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [~A]
+!23 = metadata !{i32 786453, metadata !51, metadata !3, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !24, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !24 = metadata !{null}
-!25 = metadata !{i32 786478, metadata !51, metadata !3, metadata !"~A", metadata !"~A", metadata !"_ZN1AD2Ev", i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD2Ev, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!25 = metadata !{i32 786478, metadata !51, metadata !3, metadata !"~A", metadata !"~A", metadata !"_ZN1AD2Ev", i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AD2Ev, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [~A]
 !26 = metadata !{i32 786689, metadata !19, metadata !"i", metadata !3, i32 16777220, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !27 = metadata !{i32 4, i32 11, metadata !19, null}
 !28 = metadata !{i32 786688, metadata !29, metadata !"j", metadata !3, i32 5, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
@@ -123,3 +124,4 @@ entry:
 !49 = metadata !{i32 786443, metadata !51, metadata !25, i32 2, i32 52, i32 2} ; [ DW_TAG_lexical_block ]
 !50 = metadata !{metadata !0, metadata !10, metadata !14, metadata !19, metadata !22, metadata !25}
 !51 = metadata !{metadata !"a.cc", metadata !"/private/tmp"}
+!52 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/dbg-declare.ll b/test/DebugInfo/X86/dbg-declare.ll
index d74e270..988d0bc 100644
--- a/test/CodeGen/X86/dbg-declare.ll
+++ b/test/DebugInfo/X86/dbg-declare.ll
@@ -28,17 +28,18 @@ declare i8* @llvm.stacksave() nounwind
 declare void @llvm.stackrestore(i8*) nounwind
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!27}
 
 !0 = metadata !{i32 786449, metadata !26, i32 12, metadata !"clang version 3.1 (trunk 153698)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !26, metadata !0, metadata !"foo", metadata !"foo", metadata !"", i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*)* @foo, null, null, metadata !12, i32 0} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !10}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_const_type ]
+!10 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786470, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_const_type ]
 !12 = metadata !{metadata !13}
 !13 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !14 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777221, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
@@ -46,7 +47,7 @@ declare void @llvm.stackrestore(i8*) nounwind
 !16 = metadata !{i32 7, i32 13, metadata !17, null}
 !17 = metadata !{i32 786443, metadata !26, metadata !5, i32 6, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
 !18 = metadata !{i32 786688, metadata !17, metadata !"a", metadata !6, i32 7, metadata !19, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!19 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 0, i64 8, i32 0, i32 0, metadata !20, metadata !21, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!19 = metadata !{i32 786433, null, null, null, i32 0, i64 0, i64 8, i32 0, i32 0, metadata !20, metadata !21, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 8, offset 0] [from char]
 !20 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !21 = metadata !{metadata !22}
 !22 = metadata !{i32 786465, i64 0, i64 -1}        ; [ DW_TAG_subrange_type ]
@@ -54,3 +55,4 @@ declare void @llvm.stackrestore(i8*) nounwind
 !24 = metadata !{i32 9, i32 1, metadata !17, null}
 !25 = metadata !{i32 8, i32 3, metadata !17, null}
 !26 = metadata !{metadata !"20020104-2.c", metadata !"/Volumes/Sandbox/llvm"}
+!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/dbg-file-name.ll b/test/DebugInfo/X86/dbg-file-name.ll
index 797b4b5..e9c61c1 100644
--- a/test/CodeGen/X86/dbg-file-name.ll
+++ b/test/DebugInfo/X86/dbg-file-name.ll
@@ -10,13 +10,15 @@ define i32 @main() nounwind {
 }
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!12}
 
 !1 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !10, i32 1, metadata !"LLVM build 00", i1 true, metadata !"", i32 0, metadata !11, metadata !11, metadata !9, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !5 = metadata !{i32 786468, metadata !10, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 786478, metadata !10, metadata !1, metadata !"main", metadata !"main", metadata !"main", i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786453, metadata !10, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, metadata !10, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !5}
 !9 = metadata !{metadata !6}
 !10 = metadata !{metadata !"simple.c", metadata !"/Users/manav/one/two"}
 !11 = metadata !{i32 0}
+!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/dbg-i128-const.ll b/test/DebugInfo/X86/dbg-i128-const.ll
index f413909..01b105f 100644
--- a/test/CodeGen/X86/dbg-i128-const.ll
+++ b/test/DebugInfo/X86/dbg-i128-const.ll
@@ -13,6 +13,7 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!5}
+!llvm.module.flags = !{!16}
 
 !0 = metadata !{i128 42 }
 !1 = metadata !{i32 786688, metadata !2, metadata !"MAX", metadata !4, i32 29, metadata !8, i32 0, null} ; [ DW_TAG_auto_variable ]
@@ -20,7 +21,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !3 = metadata !{i32 786478, metadata !13, metadata !4, metadata !"__foo", metadata !"__foo", metadata !"__foo", i32 26, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, i128 (i128, i128)* @__foo, null, null, null, i32 26} ; [ DW_TAG_subprogram ]
 !4 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
 !5 = metadata !{i32 786449, metadata !13, i32 1, metadata !"clang", i1 true, metadata !"", i32 0, metadata !15, metadata !15, metadata !12, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!6 = metadata !{i32 786453, metadata !13, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{i32 786453, metadata !13, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{metadata !8, metadata !8, metadata !8}
 !8 = metadata !{i32 786454, metadata !14, metadata !4, metadata !"ti_int", i32 78, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
 !9 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ]
@@ -30,3 +31,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !13 = metadata !{metadata !"foo.c", metadata !"/tmp"}
 !14 = metadata !{metadata !"myint.h", metadata !"/tmp"}
 !15 = metadata !{i32 0}
+!16 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/dbg-large-unsigned-const.ll b/test/DebugInfo/X86/dbg-large-unsigned-const.ll
index c5cbf06..a037f3c 100644
--- a/test/CodeGen/X86/dbg-large-unsigned-const.ll
+++ b/test/DebugInfo/X86/dbg-large-unsigned-const.ll
@@ -26,6 +26,7 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!34}
 !29 = metadata !{metadata !1, metadata !6}
 !30 = metadata !{metadata !7, metadata !11}
 !31 = metadata !{metadata !12}
@@ -33,13 +34,13 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !0 = metadata !{i32 786449, metadata !32, i32 4, metadata !"clang version 3.0 (trunk 135593)", i1 true, metadata !"", i32 0, metadata !33, metadata !33, metadata !29, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 786478, metadata !32, null, metadata !"ise", metadata !"ise", metadata !"_Z3iseRKxS0_", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i1 (i64*, i64*)* @_Z3iseRKxS0_, null, null, metadata !30, i32 2} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !32, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !32, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, null, metadata !0, metadata !"bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !32, null, metadata !"fn", metadata !"fn", metadata !"_Z2fnx", i32 6, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (i64)* @_Z2fnx, null, null, metadata !31, i32 6} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786478, metadata !32, null, metadata !"fn", metadata !"fn", metadata !"_Z2fnx", i32 6, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i1 (i64)* @_Z2fnx, null, null, metadata !31, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [fn]
 !7 = metadata !{i32 786689, metadata !1, metadata !"LHS", metadata !2, i32 16777218, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !8 = metadata !{i32 786448, metadata !0, null, null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ]
-!9 = metadata !{i32 786470, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_const_type ]
+!9 = metadata !{i32 786470, metadata !0, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_const_type ]
 !10 = metadata !{i32 786468, null, metadata !0, metadata !"long long int", i32 0, i64 64, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !11 = metadata !{i32 786689, metadata !1, metadata !"RHS", metadata !2, i32 33554434, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !12 = metadata !{i32 786689, metadata !6, metadata !"a", metadata !2, i32 16777222, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
@@ -58,3 +59,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !28 = metadata !{i32 3, i32 3, metadata !16, metadata !22}
 !32 = metadata !{metadata !"lli.cc", metadata !"/private/tmp"}
 !33 = metadata !{i32 0}
+!34 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/dbg-merge-loc-entry.ll b/test/DebugInfo/X86/dbg-merge-loc-entry.ll
index ccf4808..8b619ea 100644
--- a/test/CodeGen/X86/dbg-merge-loc-entry.ll
+++ b/test/DebugInfo/X86/dbg-merge-loc-entry.ll
@@ -38,18 +38,19 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!32}
 
 !0 = metadata !{i32 786478, metadata !29, metadata !1, metadata !"__udivmodti4", metadata !"__udivmodti4", metadata !"", i32 879, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, null, i32 879} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !29} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !29, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !31, metadata !31, metadata !28, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !29, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !29, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5, metadata !5, metadata !5, metadata !8}
 !5 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"UTItype", i32 166, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
 !6 = metadata !{i32 786473, metadata !30} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786468, metadata !29, metadata !1, metadata !"", i32 0, i64 128, i64 128, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 786447, metadata !29, metadata !1, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
 !9 = metadata !{i32 786478, metadata !29, metadata !1, metadata !"__divti3", metadata !"__divti3", metadata !"__divti3", i32 1094, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i128 (i128, i128)* @__divti3, null, null, null, i32 1094} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 786453, metadata !29, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!10 = metadata !{i32 786453, metadata !29, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !11 = metadata !{metadata !12, metadata !12, metadata !12}
 !12 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"TItype", i32 160, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ]
 !13 = metadata !{i32 786468, metadata !29, metadata !1, metadata !"", i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
@@ -71,3 +72,4 @@ declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
 !29 = metadata !{metadata !"foobar.c", metadata !"/tmp"}
 !30 = metadata !{metadata !"foobar.h", metadata !"/tmp"}
 !31 = metadata !{i32 0}
+!32 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/dbg-prolog-end.ll b/test/DebugInfo/X86/dbg-prolog-end.ll
index c8d8499..a7c6cb5 100644
--- a/test/CodeGen/X86/dbg-prolog-end.ll
+++ b/test/DebugInfo/X86/dbg-prolog-end.ll
@@ -33,15 +33,16 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21}
 !18 = metadata !{metadata !1, metadata !6}
 
 !0 = metadata !{i32 786449, metadata !19, i32 12, metadata !"clang version 3.0 (trunk 131100)", i1 false, metadata !"", i32 0, metadata !20, metadata !20, metadata !18, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !19, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !19, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
 !2 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !19, metadata !2, metadata !"main", metadata !"main", metadata !"", i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @main, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786478, metadata !19, metadata !2, metadata !"main", metadata !"main", metadata !"", i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, null, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
 !7 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 16777217, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !8 = metadata !{i32 1, i32 13, metadata !1, null}
 !9 = metadata !{i32 786688, metadata !10, metadata !"j", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
@@ -55,3 +56,4 @@ entry:
 !17 = metadata !{i32 786443, metadata !19, metadata !6, i32 7, i32 12, i32 1} ; [ DW_TAG_lexical_block ]
 !19 = metadata !{metadata !"/tmp/a.c", metadata !"/private/tmp"}
 !20 = metadata !{i32 0}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/dbg-subrange.ll b/test/DebugInfo/X86/dbg-subrange.ll
index ffb5f2d..5bf330c 100644
--- a/test/CodeGen/X86/dbg-subrange.ll
+++ b/test/DebugInfo/X86/dbg-subrange.ll
@@ -13,19 +13,20 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!22}
 
 !0 = metadata !{i32 786449, metadata !21, i32 12, metadata !"clang version 3.1 (trunk 144833)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !11,  metadata !11, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !21, metadata !6, metadata !"bar", metadata !"bar", metadata !"", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @bar, null, null, metadata !9, i32 0} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, metadata !21, metadata !6, metadata !"bar", metadata !"bar", metadata !"", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @bar, null, null, metadata !9, i32 0} ; [ DW_TAG_subprogram ] [line 4] [def] [scope 0] [bar]
 !6 = metadata !{i32 720937, metadata !21} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null}
 !9 = metadata !{metadata !10}
 !10 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !11 = metadata !{metadata !13}
 !13 = metadata !{i32 720948, i32 0, null, metadata !"s", metadata !"s", metadata !"", metadata !6, i32 2, metadata !14, i32 0, i32 1, [4294967296 x i8]* @s, null} ; [ DW_TAG_variable ]
-!14 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 34359738368, i64 8, i32 0, i32 0, metadata !15, metadata !16, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!14 = metadata !{i32 720897, null, null, null, i32 0, i64 34359738368, i64 8, i32 0, i32 0, metadata !15, metadata !16, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 34359738368, align 8, offset 0] [from char]
 !15 = metadata !{i32 720932, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !16 = metadata !{metadata !17}
 !17 = metadata !{i32 720929, i64 0, i64 4294967296} ; [ DW_TAG_subrange_type ]
@@ -33,3 +34,4 @@ entry:
 !19 = metadata !{i32 786443, metadata !21, metadata !5, i32 4, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
 !20 = metadata !{i32 6, i32 1, metadata !19, null}
 !21 = metadata !{metadata !"small.c", metadata !"/private/tmp"}
+!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/dbg-value-dag-combine.ll b/test/DebugInfo/X86/dbg-value-dag-combine.ll
index e281493..12aa61b 100644
--- a/test/CodeGen/X86/dbg-value-dag-combine.ll
+++ b/test/DebugInfo/X86/dbg-value-dag-combine.ll
@@ -16,7 +16,7 @@ entry:
   call void @llvm.dbg.value(metadata !12, i64 0, metadata !13), !dbg !14
   %tmp2 = load i32 addrspace(1)* %ip, align 4, !dbg !15
   %tmp3 = add i32 0, %tmp2, !dbg !15
-; CHECK:  ##DEBUG_VALUE: idx <- EAX{{$}}
+; CHECK:  ##DEBUG_VALUE: idx <- E{{..$}}
   call void @llvm.dbg.value(metadata !{i32 %tmp3}, i64 0, metadata !13), !dbg
 !15
   %arrayidx = getelementptr i32 addrspace(1)* %ip, i32 %1, !dbg !16
@@ -24,11 +24,12 @@ entry:
   ret void, !dbg !17
 }
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!20}
 
-!0 = metadata !{i32 786478, metadata !19, metadata !1, metadata !"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !19, metadata !1, metadata !"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [__OpenCL_test_kernel]
 !1 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !19, i32 1, metadata !"clc", i1 false, metadata !"", i32 0, metadata !12, metadata !12, metadata !18, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !19, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !19, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null, metadata !5}
 !5 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
 !6 = metadata !{i32 786468, null, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
@@ -45,3 +46,4 @@ entry:
 !17 = metadata !{i32 7, i32 1, metadata !0, null}
 !18 = metadata !{metadata !0}
 !19 = metadata !{metadata !"OCL6368.tmp.cl", metadata !"E:\5CUsers\5Cmvillmow.AMD\5CAppData\5CLocal\5CTemp"}
+!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
index de9f672..1a78772 100644
--- a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
+++ b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
@@ -47,20 +47,21 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!43}
 
-!0 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.S1*, i32)* @foo, null, null, metadata !41, i32 8} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.S1*, i32)* @foo, null, null, metadata !41, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [foo]
 !1 = metadata !{i32 786473, metadata !42} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !42, i32 12, metadata !"clang version 2.9 (trunk 125693)", i1 true, metadata !"", i32 0, metadata !8, metadata !8, metadata !39, metadata !40,  metadata !40, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !42, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !42, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"", i32 15, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, void ()* @foobar, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786453, metadata !42, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"", i32 15, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, void ()* @foobar, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 15] [def] [scope 0] [foobar]
+!7 = metadata !{i32 786453, metadata !42, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null}
 !9 = metadata !{i32 786689, metadata !0, metadata !"sp", metadata !1, i32 7, metadata !10, i32 0, metadata !32} ; [ DW_TAG_arg_variable ]
 !10 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
 !11 = metadata !{i32 786454, metadata !42, metadata !2, metadata !"S1", i32 4, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
-!12 = metadata !{i32 786451, metadata !42, metadata !2, metadata !"S1", i32 1, i64 128, i64 64, i32 0, i32 0, i32 0, metadata !13, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!12 = metadata !{i32 786451, metadata !42, metadata !2, metadata !"S1", i32 1, i64 128, i64 64, i32 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [S1] [line 1, size 128, align 64, offset 0] [def] [from ]
 !13 = metadata !{metadata !14, metadata !17}
 !14 = metadata !{i32 786445, metadata !42, metadata !1, metadata !"m", i32 2, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_member ]
 !15 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ]
@@ -87,3 +88,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !40 = metadata !{metadata !19}
 !41 = metadata !{metadata !9, metadata !18}
 !42 = metadata !{metadata !"nm2.c", metadata !"/private/tmp"}
+!43 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/dbg-value-isel.ll b/test/DebugInfo/X86/dbg-value-isel.ll
index 0013385..f899f48 100644
--- a/test/CodeGen/X86/dbg-value-isel.ll
+++ b/test/DebugInfo/X86/dbg-value-isel.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-darwin10.0.0"
 @sgv = internal addrspace(2) constant [1 x i8] zeroinitializer
 @fgv = internal addrspace(2) constant [1 x i8] zeroinitializer
 @lvgv = internal constant [0 x i8*] zeroinitializer
-@llvm.global.annotations = appending global [1 x %0] [%0 { i8* bitcast (void (i32 addrspace(1)*)* @__OpenCL_nbt02_kernel to i8*), i8* bitcast ([1 x i8] addrspace(2)* @sgv to i8*), i8* bitcast ([1 x i8] addrspace(2)* @fgv to i8*), i8* bitcast ([0 x i8*]* @lvgv to i8*), i32 0 }], section "llvm.metadata"
+@llvm.global.annotations = appending global [1 x %0] [%0 { i8* bitcast (void (i32 addrspace(1)*)* @__OpenCL_nbt02_kernel to i8*), i8* addrspacecast ([1 x i8] addrspace(2)* @sgv to i8*), i8* addrspacecast ([1 x i8] addrspace(2)* @fgv to i8*), i8* bitcast ([0 x i8*]* @lvgv to i8*), i32 0 }], section "llvm.metadata"
 
 define void @__OpenCL_nbt02_kernel(i32 addrspace(1)* %ip) nounwind {
 entry:
@@ -79,11 +79,12 @@ declare <4 x i32> @__amdil_get_global_id_int() nounwind
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!22}
 
-!0 = metadata !{i32 786478, metadata !20, metadata !1, metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !20, metadata !1, metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [__OpenCL_nbt02_kernel]
 !1 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !20, i32 1, metadata !"clc", i1 false, metadata !"", i32 0, metadata !21, metadata !21, metadata !19, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !20, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !20, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null, metadata !5}
 !5 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
 !6 = metadata !{i32 589846, metadata !20, metadata !2, metadata !"uint", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
@@ -102,3 +103,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !19 = metadata !{metadata !0}
 !20 = metadata !{metadata !"OCLlLwTXZ.cl", metadata !"/tmp"}
 !21 = metadata !{i32 0}
+!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/dbg-value-location.ll b/test/DebugInfo/X86/dbg-value-location.ll
index f896e58..1e21c6a 100644
--- a/test/CodeGen/X86/dbg-value-location.ll
+++ b/test/DebugInfo/X86/dbg-value-location.ll
@@ -46,24 +46,25 @@ declare hidden fastcc i32 @bar3(i32) nounwind optsize ssp
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!29}
 
-!0 = metadata !{i32 786478, metadata !26, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 19510, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i64, i8*, i32)* @foo, null, null, null, i32 19510} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !26, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 19510, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i64, i8*, i32)* @foo, null, null, null, i32 19510} ; [ DW_TAG_subprogram ] [line 19510] [def] [foo]
 !1 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !27, i32 12, metadata !"clang version 2.9 (trunk 124753)", i1 true, metadata !"", i32 0, metadata !28, metadata !28, metadata !24, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !26, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !26, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !26, metadata !1, metadata !"bar3", metadata !"bar3", metadata !"", i32 14827, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar3, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786478, metadata !26, metadata !1, metadata !"bar2", metadata !"bar2", metadata !"", i32 15397, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar2, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!8 = metadata !{i32 786478, metadata !26, metadata !1, metadata !"bar", metadata !"bar", metadata !"", i32 12382, metadata !9, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!9 = metadata !{i32 786453, metadata !26, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !10, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{i32 786478, metadata !26, metadata !1, metadata !"bar3", metadata !"bar3", metadata !"", i32 14827, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @bar3, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 14827] [local] [def] [scope 0] [bar3]
+!7 = metadata !{i32 786478, metadata !26, metadata !1, metadata !"bar2", metadata !"bar2", metadata !"", i32 15397, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @bar2, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 15397] [local] [def] [scope 0] [bar2]
+!8 = metadata !{i32 786478, metadata !26, metadata !1, metadata !"bar", metadata !"bar", metadata !"", i32 12382, metadata !9, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i32*)* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 12382] [local] [def] [scope 0] [bar]
+!9 = metadata !{i32 786453, metadata !26, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !10 = metadata !{metadata !11}
 !11 = metadata !{i32 786468, null, metadata !2, metadata !"unsigned char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
 !12 = metadata !{i32 786689, metadata !0, metadata !"var", metadata !1, i32 19509, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !13 = metadata !{i32 19509, i32 20, metadata !0, null}
 !14 = metadata !{i32 18091, i32 2, metadata !15, metadata !17}
 !15 = metadata !{i32 786443, metadata !26, metadata !16, i32 18086, i32 1, i32 748} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{i32 786478, metadata !26, metadata !1, metadata !"foo_bar", metadata !"foo_bar", metadata !"", i32 18086, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!16 = metadata !{i32 786478, metadata !26, metadata !1, metadata !"foo_bar", metadata !"foo_bar", metadata !"", i32 18086, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 18086] [local] [def] [scope 0] [foo_bar]
 !17 = metadata !{i32 19514, i32 2, metadata !18, null}
 !18 = metadata !{i32 786443, metadata !26, metadata !0, i32 19510, i32 1, i32 99} ; [ DW_TAG_lexical_block ]
 !22 = metadata !{i32 18094, i32 2, metadata !15, metadata !17}
@@ -73,3 +74,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !26 = metadata !{metadata !"/tmp/f.c", metadata !"/tmp"}
 !27 = metadata !{metadata !"f.i", metadata !"/tmp"}
 !28 = metadata !{i32 0}
+!29 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg-value-range.ll b/test/DebugInfo/X86/dbg-value-range.ll
index a784cc1..d9e7a63 100644
--- a/test/DebugInfo/X86/dbg-value-range.ll
+++ b/test/DebugInfo/X86/dbg-value-range.ll
@@ -18,16 +18,17 @@ declare i32 @foo(...)
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!24}
 
-!0 = metadata !{i32 786478, metadata !22, metadata !1, metadata !"bar", metadata !"bar", metadata !"", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.a*)* @bar, null, null, metadata !21, i32 0} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !22, metadata !1, metadata !"bar", metadata !"bar", metadata !"", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.a*)* @bar, null, null, metadata !21, i32 0} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 0] [bar]
 !1 = metadata !{i32 786473, metadata !22} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !22, i32 12, metadata !"clang version 2.9 (trunk 122997)", i1 true, metadata !"", i32 0, metadata !23, metadata !23, metadata !20, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !22, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !22, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 786689, metadata !0, metadata !"b", metadata !1, i32 5, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
 !7 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 786451, metadata !22, metadata !2, metadata !"a", i32 1, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !9, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!8 = metadata !{i32 786451, metadata !22, metadata !2, metadata !"a", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [a] [line 1, size 32, align 32, offset 0] [def] [from ]
 !9 = metadata !{metadata !10}
 !10 = metadata !{i32 786445, metadata !22, metadata !1, metadata !"c", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
 !11 = metadata !{i32 786688, metadata !12, metadata !"x", metadata !1, i32 6, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
@@ -59,3 +60,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 ;CHECK-NEXT: Ltmp
 ;CHECK-NEXT:	.quad	0
 ;CHECK-NEXT:	.quad	0
+!24 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/dbg-value-terminator.ll b/test/DebugInfo/X86/dbg-value-terminator.ll
index e8d70de..f08f281 100644
--- a/test/CodeGen/X86/dbg-value-terminator.ll
+++ b/test/DebugInfo/X86/dbg-value-terminator.ll
@@ -111,11 +111,12 @@ attributes #1 = { nounwind uwtable }
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!22}
 
 !0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"Apple clang version", i1 true, metadata !"", i32 0, metadata !21, metadata !21, metadata !18, null,  null, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !20, metadata !2, metadata !"foo", metadata !"", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, %a* ()* @test, null, null, metadata !19, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !20, metadata !2, metadata !"foo", metadata !"", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, %a* ()* @test, null, null, metadata !19, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
 !2 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 16777218, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
@@ -129,3 +130,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !19 = metadata !{metadata !6, metadata !7, metadata !10}
 !20 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
 !21 = metadata !{i32 0}
+!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg_value_direct.ll b/test/DebugInfo/X86/dbg_value_direct.ll
index 9a40d59..8a22cd7 100644
--- a/test/DebugInfo/X86/dbg_value_direct.ll
+++ b/test/DebugInfo/X86/dbg_value_direct.ll
@@ -145,7 +145,7 @@ attributes #1 = { nounwind readnone }
 attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "ssp-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!22}
+!llvm.module.flags = !{!22, !27}
 
 !0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/crash.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"crash.cpp", metadata !"/tmp"}
@@ -153,17 +153,17 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !3 = metadata !{metadata !4}
 !4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func", metadata !"func", metadata !"_Z4funci", i32 6, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.A*, i32)* @_Z4funci, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [func]
 !5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/crash.cpp]
-!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{metadata !8, metadata !21}
-!8 = metadata !{i32 786451, metadata !1, null, metadata !"A", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !9, i32 0, null, null} ; [ DW_TAG_structure_type ] [A] [line 1, size 8, align 8, offset 0] [def] [from ]
+!8 = metadata !{i32 786451, metadata !1, null, metadata !"A", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [A] [line 1, size 8, align 8, offset 0] [def] [from ]
 !9 = metadata !{metadata !10, metadata !15}
 !10 = metadata !{i32 786478, metadata !1, metadata !8, metadata !"A", metadata !"A", metadata !"", i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !14, i32 2} ; [ DW_TAG_subprogram ] [line 2] [A]
-!11 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{null, metadata !13}
-!13 = metadata !{i32 786447, i32 0, i32 0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A]
+!13 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A]
 !14 = metadata !{i32 786468}
 !15 = metadata !{i32 786478, metadata !1, metadata !8, metadata !"A", metadata !"A", metadata !"", i32 3, metadata !16, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !20, i32 3} ; [ DW_TAG_subprogram ] [line 3] [A]
-!16 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !17 = metadata !{null, metadata !13, metadata !18}
 !18 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !19} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
 !19 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from A]
@@ -174,3 +174,4 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !24 = metadata !{i32 786688, metadata !4, metadata !"a", metadata !5, i32 7, metadata !8, i32 8192, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 7]
 !25 = metadata !{i32 7, i32 0, metadata !4, null}
 !26 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
+!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/debug-info-block-captured-self.ll b/test/DebugInfo/X86/debug-info-block-captured-self.ll
index 3d36350..6e4d200 100644
--- a/test/DebugInfo/X86/debug-info-block-captured-self.ll
+++ b/test/DebugInfo/X86/debug-info-block-captured-self.ll
@@ -77,23 +77,24 @@ define internal void @"__24-[Main initWithContext:]_block_invoke_2"(i8* %.block_
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!108}
 !0 = metadata !{i32 786449, metadata !107, i32 16, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 2, metadata !2, metadata !4, metadata !23, metadata !15,  metadata !15, metadata !""} ; [ DW_TAG_compile_unit ] [llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m] [DW_LANG_ObjC]
 !1 = metadata !{i32 786473, metadata !107} ; [ DW_TAG_file_type ]
 !2 = metadata !{metadata !3}
-!3 = metadata !{i32 786436, metadata !107, null, metadata !"", i32 20, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] [line 20, size 32, align 32, offset 0] [from ]
+!3 = metadata !{i32 786436, metadata !107, null, metadata !"", i32 20, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [line 20, size 32, align 32, offset 0] [def] [from ]
 !4 = metadata !{}
 !15 = metadata !{i32 0}
 !23 = metadata !{metadata !38, metadata !42}
 !27 = metadata !{i32 786454, metadata !107, null, metadata !"id", i32 31, i64 0, i64 0, i64 0, i32 0, metadata !28} ; [ DW_TAG_typedef ] [id] [line 31, size 0, align 0, offset 0] [from ]
 !28 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
-!29 = metadata !{i32 786451, metadata !107, null, metadata !"objc_object", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !30, i32 0, null, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [from ]
+!29 = metadata !{i32 786451, metadata !107, null, metadata !"objc_object", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !30, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [def] [from ]
 !30 = metadata !{metadata !31}
 !31 = metadata !{i32 786445, metadata !107, metadata !29, metadata !"isa", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
 !32 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
-!33 = metadata !{i32 786451, metadata !107, null, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [fwd] [from ]
-!34 = metadata !{i32 786451, metadata !107, null, metadata !"Main", i32 23, i64 0, i64 0, i32 0, i32 1092, i32 0, i32 0, i32 16} ; [ DW_TAG_structure_type ] [Main] [line 23, size 0, align 0, offset 0] [artificial] [fwd] [from ]
+!33 = metadata !{i32 786451, metadata !107, null, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [decl] [from ]
+!34 = metadata !{i32 786451, metadata !107, null, metadata !"Main", i32 23, i64 0, i64 0, i32 0, i32 1092, null, i32 0, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [Main] [line 23, size 0, align 0, offset 0] [artificial] [decl] [from ]
 !38 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"__24-[Main initWithContext:]_block_invoke", metadata !"__24-[Main initWithContext:]_block_invoke", metadata !"", i32 33, metadata !39, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*, i8*)* @"__24-[Main initWithContext:]_block_invoke", null, null, metadata !15, i32 33} ; [ DW_TAG_subprogram ] [line 33] [local] [def] [__24-[Main initWithContext:]_block_invoke]
-!39 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !40, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!39 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !40, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !40 = metadata !{null, metadata !41, metadata !27}
 !41 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
 !42 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"__24-[Main initWithContext:]_block_invoke_2", metadata !"__24-[Main initWithContext:]_block_invoke_2", metadata !"", i32 35, metadata !39, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*, i8*)* @"__24-[Main initWithContext:]_block_invoke_2", null, null, metadata !15, i32 35} ; [ DW_TAG_subprogram ] [line 35] [local] [def] [__24-[Main initWithContext:]_block_invoke_2]
@@ -104,3 +105,4 @@ define internal void @"__24-[Main initWithContext:]_block_invoke_2"(i8* %.block_
 !105 = metadata !{i32 786688, metadata !42, metadata !"self", metadata !1, i32 40, metadata !34, i32 0, i32 0, i64 1, i64 32} ; [ DW_TAG_auto_variable ] [self] [line 40]
 !106 = metadata !{i32 40, i32 0, metadata !42, null}
 !107 = metadata !{metadata !"llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m", metadata !""}
+!108 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/debug-info-blocks.ll b/test/DebugInfo/X86/debug-info-blocks.ll
index ae95033..c3bedf2 100644
--- a/test/DebugInfo/X86/debug-info-blocks.ll
+++ b/test/DebugInfo/X86/debug-info-blocks.ll
@@ -5,13 +5,15 @@
 ; rdar://problem/9279956
 ; test that the DW_AT_location of self is at ( fbreg +{{[0-9]+}}, deref, +{{[0-9]+}} )
 
+; CHECK: DW_TAG_subprogram
 ; CHECK: DW_AT_name{{.*}}_block_invoke
+
 ; CHECK-NOT: DW_TAG_subprogram
 ; CHECK: DW_TAG_formal_parameter
-; CHECK-NOT: DW_TAG
-; CHECK: .block_descriptor
+; CHECK-NEXT: DW_AT_name{{.*}}.block_descriptor
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_location
+
 ; CHECK-NOT: DW_TAG_subprogram
 ; CHECK: DW_TAG_variable
 ; CHECK-NEXT: DW_AT_name{{.*}}"self"
@@ -24,7 +26,7 @@
 ; 0x23 = DW_OP_uconst
 ; 0x91 = DW_OP_fbreg
 ; CHECK: DW_AT_location{{.*}}91 {{[0-9]+}} 06 23 {{[0-9]+}} )
-; CHECK: DW_TAG_structure_type
+
 ; CHECK: [[A:.*]]:   DW_TAG_structure_type
 ; CHECK-NEXT: DW_AT_APPLE_objc_complete_type
 ; CHECK-NEXT: DW_AT_name{{.*}}"A"
@@ -258,52 +260,52 @@ attributes #2 = { nonlazybind }
 attributes #3 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!56, !57, !58, !59}
+!llvm.module.flags = !{!56, !57, !58, !59, !110}
 
 !0 = metadata !{i32 786449, metadata !1, i32 16, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 2, metadata !2, metadata !3, metadata !12, metadata !2,  metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [llvm/tools/clang/test/CodeGenObjC/<unknown>] [DW_LANG_ObjC]
 !1 = metadata !{metadata !"llvm/tools/clang/test/CodeGenObjC/<unknown>", metadata !"llvm/_build.ninja.Debug"}
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !4}
-!4 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"A", i32 33, i64 32, i64 32, i32 0, i32 512, null, metadata !7, i32 16, null, null} ; [ DW_TAG_structure_type ] [A] [line 33, size 32, align 32, offset 0] [from ]
+!4 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"A", i32 33, i64 32, i64 32, i32 0, i32 512, null, metadata !7, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [A] [line 33, size 32, align 32, offset 0] [def] [from ]
 !5 = metadata !{metadata !"llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m", metadata !"llvm/_build.ninja.Debug"}
 !6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m]
 !7 = metadata !{metadata !8, metadata !10}
 !8 = metadata !{i32 786460, null, metadata !4, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from NSObject]
-!9 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSObject", i32 21, i64 0, i64 8, i32 0, i32 0, null, metadata !2, i32 16, null, null} ; [ DW_TAG_structure_type ] [NSObject] [line 21, size 0, align 8, offset 0] [from ]
+!9 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSObject", i32 21, i64 0, i64 8, i32 0, i32 0, null, metadata !2, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [NSObject] [line 21, size 0, align 8, offset 0] [def] [from ]
 !10 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"ivar", i32 35, i64 32, i64 32, i64 0, i32 0, metadata !11, null} ; [ DW_TAG_member ] [ivar] [line 35, size 32, align 32, offset 0] [from int]
 !11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !12 = metadata !{metadata !13, metadata !27, metadata !31, metadata !35, metadata !36, metadata !39}
 !13 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"-[A init]", metadata !"-[A init]", metadata !"", i32 46, metadata !14, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, i8* (%0*, i8*)* @"\01-[A init]", null, null, metadata !2, i32 46} ; [ DW_TAG_subprogram ] [line 46] [local] [def] [-[A init]]
-!14 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !15 = metadata !{metadata !16, metadata !23, metadata !24}
 !16 = metadata !{i32 786454, metadata !5, null, metadata !"id", i32 46, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_typedef ] [id] [line 46, size 0, align 0, offset 0] [from ]
 !17 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !18} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
-!18 = metadata !{i32 786451, metadata !1, null, metadata !"objc_object", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !19, i32 0, null, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [from ]
+!18 = metadata !{i32 786451, metadata !1, null, metadata !"objc_object", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [def] [from ]
 !19 = metadata !{metadata !20}
 !20 = metadata !{i32 786445, metadata !1, metadata !18, metadata !"isa", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !21} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
 !21 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
-!22 = metadata !{i32 786451, metadata !1, null, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [fwd] [from ]
-!23 = metadata !{i32 786447, i32 0, i32 0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A]
-!24 = metadata !{i32 786454, metadata !5, i32 0, metadata !"SEL", i32 46, i64 0, i64 0, i64 0, i32 64, metadata !25} ; [ DW_TAG_typedef ] [SEL] [line 46, size 0, align 0, offset 0] [artificial] [from ]
+!22 = metadata !{i32 786451, metadata !1, null, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [decl] [from ]
+!23 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A]
+!24 = metadata !{i32 786454, metadata !5, null, metadata !"SEL", i32 46, i64 0, i64 0, i64 0, i32 64, metadata !25} ; [ DW_TAG_typedef ] [SEL] [line 46, size 0, align 0, offset 0] [artificial] [from ]
 !25 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !26} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_selector]
-!26 = metadata !{i32 786451, metadata !1, null, metadata !"objc_selector", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_selector] [line 0, size 0, align 0, offset 0] [fwd] [from ]
+!26 = metadata !{i32 786451, metadata !1, null, metadata !"objc_selector", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_selector] [line 0, size 0, align 0, offset 0] [decl] [from ]
 !27 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"__9-[A init]_block_invoke", metadata !"__9-[A init]_block_invoke", metadata !"", i32 49, metadata !28, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @"__9-[A init]_block_invoke", null, null, metadata !2, i32 49} ; [ DW_TAG_subprogram ] [line 49] [local] [def] [__9-[A init]_block_invoke]
-!28 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!28 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !29 = metadata !{null, metadata !30}
 !30 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
 !31 = metadata !{i32 786478, metadata !1, metadata !32, metadata !"__copy_helper_block_", metadata !"__copy_helper_block_", metadata !"", i32 52, metadata !33, i1 true, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i8*, i8*)* @__copy_helper_block_, null, null, metadata !2, i32 52} ; [ DW_TAG_subprogram ] [line 52] [local] [def] [__copy_helper_block_]
 !32 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [llvm/tools/clang/test/CodeGenObjC/<unknown>]
-!33 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !34, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!33 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !34, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !34 = metadata !{null, metadata !30, metadata !30}
 !35 = metadata !{i32 786478, metadata !1, metadata !32, metadata !"__destroy_helper_block_", metadata !"__destroy_helper_block_", metadata !"", i32 52, metadata !28, i1 true, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i8*)* @__destroy_helper_block_, null, null, metadata !2, i32 52} ; [ DW_TAG_subprogram ] [line 52] [local] [def] [__destroy_helper_block_]
 !36 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 59, metadata !37, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !2, i32 60} ; [ DW_TAG_subprogram ] [line 59] [def] [scope 60] [main]
-!37 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !38, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!37 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !38, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !38 = metadata !{metadata !11}
 !39 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"run", metadata !"run", metadata !"", i32 39, metadata !40, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (void ()*)* @run, null, null, metadata !2, i32 40} ; [ DW_TAG_subprogram ] [line 39] [local] [def] [scope 40] [run]
-!40 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !41, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!40 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !41, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !41 = metadata !{null, metadata !42}
 !42 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !43} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_literal_generic]
-!43 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_literal_generic", i32 40, i64 256, i64 0, i32 0, i32 8, null, metadata !44, i32 0, null, null} ; [ DW_TAG_structure_type ] [__block_literal_generic] [line 40, size 256, align 0, offset 0] [from ]
+!43 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_literal_generic", i32 40, i64 256, i64 0, i32 0, i32 8, null, metadata !44, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_literal_generic] [line 40, size 256, align 0, offset 0] [def] [from ]
 !44 = metadata !{metadata !45, metadata !46, metadata !47, metadata !48, metadata !49}
 !45 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__isa", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !30} ; [ DW_TAG_member ] [__isa] [line 0, size 64, align 64, offset 0] [from ]
 !46 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__flags", i32 0, i64 32, i64 32, i64 64, i32 0, metadata !11} ; [ DW_TAG_member ] [__flags] [line 0, size 32, align 32, offset 64] [from int]
@@ -311,7 +313,7 @@ attributes #3 = { nounwind }
 !48 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__FuncPtr", i32 0, i64 64, i64 64, i64 128, i32 0, metadata !30} ; [ DW_TAG_member ] [__FuncPtr] [line 0, size 64, align 64, offset 128] [from ]
 !49 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__descriptor", i32 40, i64 64, i64 64, i64 192, i32 0, metadata !50} ; [ DW_TAG_member ] [__descriptor] [line 40, size 64, align 64, offset 192] [from ]
 !50 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !51} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_descriptor]
-!51 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_descriptor", i32 40, i64 128, i64 0, i32 0, i32 8, null, metadata !52, i32 0, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor] [line 40, size 128, align 0, offset 0] [from ]
+!51 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_descriptor", i32 40, i64 128, i64 0, i32 0, i32 8, null, metadata !52, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor] [line 40, size 128, align 0, offset 0] [def] [from ]
 !52 = metadata !{metadata !53, metadata !55}
 !53 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"reserved", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !54} ; [ DW_TAG_member ] [reserved] [line 0, size 64, align 64, offset 0] [from long unsigned int]
 !54 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
@@ -338,7 +340,7 @@ attributes #3 = { nounwind }
 !75 = metadata !{i32 42, i32 0, metadata !39, null}
 !76 = metadata !{i32 786689, metadata !27, metadata !".block_descriptor", metadata !6, i32 16777265, metadata !77, i32 64, i32 0} ; [ DW_TAG_arg_variable ] [.block_descriptor] [line 49]
 !77 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !78} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_literal_1]
-!78 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_literal_1", i32 49, i64 320, i64 64, i32 0, i32 0, null, metadata !79, i32 0, null, null} ; [ DW_TAG_structure_type ] [__block_literal_1] [line 49, size 320, align 64, offset 0] [from ]
+!78 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_literal_1", i32 49, i64 320, i64 64, i32 0, i32 0, null, metadata !79, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_literal_1] [line 49, size 320, align 64, offset 0] [def] [from ]
 !79 = metadata !{metadata !80, metadata !81, metadata !82, metadata !83, metadata !84, metadata !87}
 !80 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__isa", i32 49, i64 64, i64 64, i64 0, i32 0, metadata !30} ; [ DW_TAG_member ] [__isa] [line 49, size 64, align 64, offset 0] [from ]
 !81 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__flags", i32 49, i64 32, i64 32, i64 64, i32 0, metadata !11} ; [ DW_TAG_member ] [__flags] [line 49, size 32, align 32, offset 64] [from int]
@@ -346,7 +348,7 @@ attributes #3 = { nounwind }
 !83 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__FuncPtr", i32 49, i64 64, i64 64, i64 128, i32 0, metadata !30} ; [ DW_TAG_member ] [__FuncPtr] [line 49, size 64, align 64, offset 128] [from ]
 !84 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__descriptor", i32 49, i64 64, i64 64, i64 192, i32 0, metadata !85} ; [ DW_TAG_member ] [__descriptor] [line 49, size 64, align 64, offset 192] [from ]
 !85 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !86} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from __block_descriptor_withcopydispose]
-!86 = metadata !{i32 786451, metadata !1, null, metadata !"__block_descriptor_withcopydispose", i32 49, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [__block_descriptor_withcopydispose] [line 49, size 0, align 0, offset 0] [fwd] [from ]
+!86 = metadata !{i32 786451, metadata !1, null, metadata !"__block_descriptor_withcopydispose", i32 49, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor_withcopydispose] [line 49, size 0, align 0, offset 0] [decl] [from ]
 !87 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"self", i32 49, i64 64, i64 64, i64 256, i32 0, metadata !61} ; [ DW_TAG_member ] [self] [line 49, size 64, align 64, offset 256] [from ]
 !88 = metadata !{i32 49, i32 0, metadata !27, null}
 !89 = metadata !{i32 786688, metadata !27, metadata !"self", metadata !32, i32 52, metadata !23, i32 0, i32 0, i64 2, i64 1, i64 32} ; [ DW_TAG_auto_variable ] [self] [line 52]
@@ -354,10 +356,10 @@ attributes #3 = { nounwind }
 !91 = metadata !{i32 786688, metadata !92, metadata !"d", metadata !6, i32 50, metadata !93, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 50]
 !92 = metadata !{i32 786443, metadata !5, metadata !27, i32 49, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m]
 !93 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !94} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from NSMutableDictionary]
-!94 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSMutableDictionary", i32 30, i64 0, i64 8, i32 0, i32 0, null, metadata !95, i32 16, null, null} ; [ DW_TAG_structure_type ] [NSMutableDictionary] [line 30, size 0, align 8, offset 0] [from ]
+!94 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSMutableDictionary", i32 30, i64 0, i64 8, i32 0, i32 0, null, metadata !95, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [NSMutableDictionary] [line 30, size 0, align 8, offset 0] [def] [from ]
 !95 = metadata !{metadata !96}
 !96 = metadata !{i32 786460, null, metadata !94, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !97} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from NSDictionary]
-!97 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSDictionary", i32 26, i64 0, i64 8, i32 0, i32 0, null, metadata !98, i32 16, null, null} ; [ DW_TAG_structure_type ] [NSDictionary] [line 26, size 0, align 8, offset 0] [from ]
+!97 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSDictionary", i32 26, i64 0, i64 8, i32 0, i32 0, null, metadata !98, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [NSDictionary] [line 26, size 0, align 8, offset 0] [def] [from ]
 !98 = metadata !{metadata !99}
 !99 = metadata !{i32 786460, null, metadata !97, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from NSObject]
 !100 = metadata !{i32 50, i32 0, metadata !92, null}
@@ -370,3 +372,4 @@ attributes #3 = { nounwind }
 !107 = metadata !{i32 786688, metadata !36, metadata !"a", metadata !6, i32 61, metadata !61, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 61]
 !108 = metadata !{i32 61, i32 0, metadata !36, null}
 !109 = metadata !{i32 62, i32 0, metadata !36, null}
+!110 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/debug-info-static-member.ll b/test/DebugInfo/X86/debug-info-static-member.ll
index 02b8ae0..1792bb4 100644
--- a/test/DebugInfo/X86/debug-info-static-member.ll
+++ b/test/DebugInfo/X86/debug-info-static-member.ll
@@ -57,18 +57,19 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!34}
 
 !0 = metadata !{i32 786449, metadata !33, i32 4, metadata !"clang version 3.3 (trunk 171914)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !10,  metadata !10, metadata !""} ; [ DW_TAG_compile_unit ] [/home/probinson/projects/upstream/static-member/test/debug-info-static-member.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !33, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 18, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 23} ; [ DW_TAG_subprogram ] [line 18] [def] [scope 23] [main]
 !6 = metadata !{i32 786473, metadata !33} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{metadata !12, metadata !27, metadata !28}
 !12 = metadata !{i32 786484, i32 0, metadata !13, metadata !"a", metadata !"a", metadata !"_ZN1C1aE", metadata !6, i32 14, metadata !9, i32 0, i32 1, i32* @_ZN1C1aE, metadata !15} ; [ DW_TAG_variable ] [a] [line 14] [def]
-!13 = metadata !{i32 786434, metadata !33, null, metadata !"C", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !14, i32 0, null, null} ; [ DW_TAG_class_type ] [C] [line 1, size 32, align 32, offset 0] [from ]
+!13 = metadata !{i32 786434, metadata !33, null, metadata !"C", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_class_type ] [C] [line 1, size 32, align 32, offset 0] [def] [from ]
 !14 = metadata !{metadata !15, metadata !16, metadata !19, metadata !20, metadata !23, metadata !24, metadata !26}
 !15 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"a", i32 3, i64 0, i64 0, i64 0, i32 4097, metadata !9, null} ; [ DW_TAG_member ] [a] [line 3, size 0, align 0, offset 0] [private] [static] [from int]
 !16 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"const_a", i32 4, i64 0, i64 0, i64 0, i32 4097, metadata !17, i1 true} ; [ DW_TAG_member ] [const_a] [line 4, size 0, align 0, offset 0] [private] [static] [from ]
@@ -120,7 +121,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 ; PRESENT:      DW_TAG_member
 ; PRESENT-NEXT: DW_AT_name {{.*}} "const_c"
 ; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x01)
-; PRESENT:      DW_AT_const_value {{.*}} (0x00000012)
+; PRESENT:      DW_AT_const_value {{.*}} (18)
 ; While we're here, a normal member has data_member_location and
 ; accessibility attributes.
 ; PRESENT:      DW_TAG_member
@@ -151,7 +152,6 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 ; DARWINP:      DW_AT_external
 ; DARWINP:      DW_AT_declaration
 ; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x03)
-; DARWINP:      DW_AT_MIPS_linkage_name {{.*}} "_ZN1C1aE"
 ; DARWINP:      DW_TAG_member
 ; DARWINP-NEXT: DW_AT_name {{.*}} "const_a"
 ; DARWINP:      DW_AT_external
@@ -161,7 +161,6 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 ; DARWINP:      0x[[DECL_B:[0-9a-f]+]]: DW_TAG_member
 ; DARWINP-NEXT: DW_AT_name {{.*}} "b"
 ; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x02)
-; DARWINP:      DW_AT_MIPS_linkage_name {{.*}} "_ZN1C1bE"
 ; DARWINP:      DW_TAG_member
 ; DARWINP-NEXT: DW_AT_name {{.*}} "const_b"
 ; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x02)
@@ -169,11 +168,10 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 ; DARWINP:      0x[[DECL_C:[0-9a-f]+]]: DW_TAG_member
 ; DARWINP-NEXT: DW_AT_name {{.*}} "c"
 ; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x01)
-; DARWINP:      DW_AT_MIPS_linkage_name {{.*}} "_ZN1C1cE"
 ; DARWINP:      DW_TAG_member
 ; DARWINP-NEXT: DW_AT_name {{.*}} "const_c"
 ; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x01)
-; DARWINP:      DW_AT_const_value {{.*}} (0x00000012)
+; DARWINP:      DW_AT_const_value {{.*}} (18)
 ; While we're here, a normal member has data_member_location and
 ; accessibility attributes.
 ; DARWINP:      DW_TAG_member
@@ -255,3 +253,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 ; DARWINA-NOT:  DW_AT_const_value
 ; DARWINA-NOT:  DW_AT_location
 ; DARWINA:      NULL
+!34 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/debug_frame.ll b/test/DebugInfo/X86/debug_frame.ll
index 622e5d3..67f2e5d 100644
--- a/test/DebugInfo/X86/debug_frame.ll
+++ b/test/DebugInfo/X86/debug_frame.ll
@@ -10,11 +10,13 @@ entry:
 }
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!7}
 !5 = metadata !{metadata !0}
 
-!0 = metadata !{i32 786478, metadata !6, metadata !1, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !6, metadata !1, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
 !1 = metadata !{i32 786473, metadata !6} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !6, i32 12, metadata !"clang version 3.0 ()", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !5, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !6, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !6, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !6 = metadata !{metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/llvm/build"}
+!7 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll b/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll
new file mode 100644
index 0000000..42a57bf
--- /dev/null
+++ b/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll
@@ -0,0 +1,89 @@
+; RUN: llc < %s | FileCheck %s
+
+; CHECK: .short  2 # DWARF Arange version number
+; CHECK: # Segment Size
+; CHECK-NOT: debug_loc
+; CHECK: .quad global
+; CHECK-NOT: debug_loc
+; CHECK: # ARange terminator
+
+; --- Source code ---
+; Generated with "clang -g -O1 -S -emit-llvm"
+
+; int global = 2;
+; int foo(int bar) { return bar; }
+; int foo2(int bar2) { return bar2; }
+
+; int main() {
+;   return foo(2) + foo2(1) + global;
+; }
+
+
+; ModuleID = 'tmp/debug_ranges/a.cc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@global = global i32 2, align 4
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @_Z3fooi(i32 %bar) #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %bar}, i64 0, metadata !10), !dbg !20
+  ret i32 %bar, !dbg !20
+}
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @_Z4foo2i(i32 %bar2) #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %bar2}, i64 0, metadata !13), !dbg !21
+  ret i32 %bar2, !dbg !21
+}
+
+; Function Attrs: nounwind readonly uwtable
+define i32 @main() #1 {
+entry:
+  %call = tail call i32 @_Z3fooi(i32 2), !dbg !22
+  %call1 = tail call i32 @_Z4foo2i(i32 1), !dbg !22
+  %add = add nsw i32 %call1, %call, !dbg !22
+  %0 = load i32* @global, align 4, !dbg !22, !tbaa !23
+  %add2 = add nsw i32 %add, %0, !dbg !22
+  ret i32 %add2, !dbg !22
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #2
+
+attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readonly uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!19, !26}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (191881)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !17, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/debug_ranges/a.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"tmp/debug_ranges/a.cc", metadata !"/"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !11, metadata !14}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"_Z3fooi", i32 2, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @_Z3fooi, null, null, metadata !9, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [foo]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/debug_ranges/a.cc]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8, metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 786689, metadata !4, metadata !"bar", metadata !5, i32 16777218, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [bar] [line 2]
+!11 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo2", metadata !"foo2", metadata !"_Z4foo2i", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @_Z4foo2i, null, null, metadata !12, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [foo2]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 786689, metadata !11, metadata !"bar2", metadata !5, i32 16777219, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [bar2] [line 3]
+!14 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 5, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [main]
+!15 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{metadata !8}
+!17 = metadata !{metadata !18}
+!18 = metadata !{i32 786484, i32 0, null, metadata !"global", metadata !"global", metadata !"", metadata !5, i32 1, metadata !8, i32 0, i32 1, i32* @global, null} ; [ DW_TAG_variable ] [global] [line 1] [def]
+!19 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!20 = metadata !{i32 2, i32 0, metadata !4, null}
+!21 = metadata !{i32 3, i32 0, metadata !11, null}
+!22 = metadata !{i32 6, i32 0, metadata !14, null}
+!23 = metadata !{metadata !"int", metadata !24}
+!24 = metadata !{metadata !"omnipotent char", metadata !25}
+!25 = metadata !{metadata !"Simple C/C++ TBAA"}
+!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dwarf-aranges.ll b/test/DebugInfo/X86/dwarf-aranges.ll
new file mode 100644
index 0000000..203afc7
--- /dev/null
+++ b/test/DebugInfo/X86/dwarf-aranges.ll
@@ -0,0 +1,87 @@
+; RUN: llc < %s | FileCheck %s
+
+
+; -- header --
+; CHECK: .short 2 # DWARF Arange version number
+; CHECK-NEXT: .long .L.debug_info_begin0
+; CHECK-NEXT: .byte 8 # Address Size (in bytes)
+; CHECK-NEXT: .byte 0 # Segment Size (in bytes)
+; -- alignment --
+; CHECK-NEXT: .byte
+; CHECK-NEXT: .byte
+; CHECK-NEXT: .byte
+; CHECK-NEXT: .byte
+
+; <common symbols> - it should have made one span for each symbol.
+; CHECK-NEXT: .quad some_bss
+; CHECK-NEXT: .quad 4
+
+; <data section> - it should have made one span covering all vars in this CU.
+; CHECK-NEXT: .quad some_data
+; CHECK-NEXT: .Lset0 = .Ldebug_end1-some_data
+; CHECK-NEXT: .quad .Lset0
+
+; <text section> - it should have made one span covering all functions in this CU.
+; CHECK-NEXT: .quad .Lfunc_begin0
+; CHECK-NEXT: .Lset1 = .Ldebug_end2-.Lfunc_begin0
+; CHECK-NEXT: .quad .Lset1
+
+; <other sections> - it should have made one span covering all vars in this CU.
+; CHECK-NEXT: .quad some_other
+; CHECK-NEXT: .Lset2 = .Ldebug_end3-some_other
+; CHECK-NEXT: .quad .Lset2
+
+; -- finish --
+; CHECK-NEXT: # ARange terminator
+
+
+
+; -- source code --
+; Generated from: "clang -c -g -emit-llvm"
+;
+; int some_data = 4;
+; int some_bss;
+; int some_other __attribute__ ((section ("strange+section"))) = 5;
+; 
+; void some_code()
+; {
+;    some_bss += some_data + some_other;
+; }
+
+target triple = "x86_64-unknown-linux-gnu"
+
+@some_data = global i32 4, align 4
+@some_other = global i32 5, section "strange+section", align 4
+@some_bss = common global i32 0, align 4
+
+define void @some_code() {
+entry:
+  %0 = load i32* @some_data, align 4, !dbg !14
+  %1 = load i32* @some_other, align 4, !dbg !14
+  %add = add nsw i32 %0, %1, !dbg !14
+  %2 = load i32* @some_bss, align 4, !dbg !14
+  %add1 = add nsw i32 %2, %add, !dbg !14
+  store i32 %add1, i32* @some_bss, align 4, !dbg !14
+  ret void, !dbg !15
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !16}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !8, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/home/kayamon/test.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"test.c", metadata !"/home/kayamon"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"some_code", metadata !"some_code", metadata !"", i32 5, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @some_code, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 6] [some_code]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/home/kayamon/test.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null}
+!8 = metadata !{metadata !9, metadata !11, metadata !12}
+!9 = metadata !{i32 786484, i32 0, null, metadata !"some_data", metadata !"some_data", metadata !"", metadata !5, i32 1, metadata !10, i32 0, i32 1, i32* @some_data, null} ; [ DW_TAG_variable ] [some_data] [line 1] [def]
+!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!11 = metadata !{i32 786484, i32 0, null, metadata !"some_other", metadata !"some_other", metadata !"", metadata !5, i32 3, metadata !10, i32 0, i32 1, i32* @some_other, null} ; [ DW_TAG_variable ] [some_other] [line 3] [def]
+!12 = metadata !{i32 786484, i32 0, null, metadata !"some_bss", metadata !"some_bss", metadata !"", metadata !5, i32 2, metadata !10, i32 0, i32 1, i32* @some_bss, null} ; [ DW_TAG_variable ] [some_bss] [line 2] [def]
+!13 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!14 = metadata !{i32 7, i32 0, metadata !4, null}
+!15 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
+!16 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dwarf-public-names.ll b/test/DebugInfo/X86/dwarf-public-names.ll
new file mode 100644
index 0000000..d66e5a0
--- /dev/null
+++ b/test/DebugInfo/X86/dwarf-public-names.ll
@@ -0,0 +1,132 @@
+; RUN: llc -mtriple=x86_64-pc-linux-gnu -filetype=obj -o %t.o < %s
+; RUN: llvm-dwarfdump -debug-dump=pubnames %t.o | FileCheck --check-prefix=LINUX %s
+; RUN: llc -mtriple=x86_64-apple-darwin12 -filetype=obj -o %t.o < %s
+; RUN: llvm-dwarfdump -debug-dump=pubnames %t.o | FileCheck --check-prefix=DARWIN %s
+; ModuleID = 'dwarf-public-names.cpp'
+;
+; Generated from:
+;
+; struct C {
+;   void member_function();
+;   static int static_member_function();
+;   static int static_member_variable;
+; };
+;
+; int C::static_member_variable = 0;
+;
+; void C::member_function() {
+;   static_member_variable = 0;
+; }
+;
+; int C::static_member_function() {
+;   return static_member_variable;
+; }
+;
+; C global_variable;
+;
+; int global_function() {
+;   return -1;
+; }
+;
+; namespace ns {
+;   void global_namespace_function() {
+;     global_variable.member_function();
+;   }
+;   int global_namespace_variable = 1;
+; }
+
+; Darwin shouldn't be generating the section by default
+; DARWIN: debug_pubnames
+; DARWIN: unit_size = 0x00000000
+
+; Skip the output to the header of the pubnames section.
+; LINUX: debug_pubnames
+
+; Check for each name in the output.
+; LINUX: global_namespace_variable
+; LINUX: global_namespace_function
+; LINUX: static_member_function
+; LINUX: global_variable
+; LINUX: global_function
+; LINUX: member_function
+
+%struct.C = type { i8 }
+
+@_ZN1C22static_member_variableE = global i32 0, align 4
+@global_variable = global %struct.C zeroinitializer, align 1
+@_ZN2ns25global_namespace_variableE = global i32 1, align 4
+
+define void @_ZN1C15member_functionEv(%struct.C* %this) nounwind uwtable align 2 {
+entry:
+  %this.addr = alloca %struct.C*, align 8
+  store %struct.C* %this, %struct.C** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr}, metadata !28), !dbg !30
+  %this1 = load %struct.C** %this.addr
+  store i32 0, i32* @_ZN1C22static_member_variableE, align 4, !dbg !31
+  ret void, !dbg !32
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @_ZN1C22static_member_functionEv() nounwind uwtable align 2 {
+entry:
+  %0 = load i32* @_ZN1C22static_member_variableE, align 4, !dbg !33
+  ret i32 %0, !dbg !33
+}
+
+define i32 @_Z15global_functionv() nounwind uwtable {
+entry:
+  ret i32 -1, !dbg !34
+}
+
+define void @_ZN2ns25global_namespace_functionEv() nounwind uwtable {
+entry:
+  call void @_ZN1C15member_functionEv(%struct.C* @global_variable), !dbg !35
+  ret void, !dbg !36
+}
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!38}
+
+!0 = metadata !{i32 786449, metadata !37, i32 4, metadata !"clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !24,  metadata !24, metadata !""} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{i32 0}
+!2 = metadata !{metadata !3, metadata !18, metadata !19, metadata !20}
+!3 = metadata !{i32 786478, metadata !4, null, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 9, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1C15member_functionEv, null, metadata !12, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [member_function]
+!4 = metadata !{i32 786473, metadata !37} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{null, metadata !7}
+!7 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from C]
+!8 = metadata !{i32 786451, metadata !37, null, metadata !"C", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [def] [from ]
+!9 = metadata !{metadata !10, metadata !12, metadata !14}
+!10 = metadata !{i32 786445, metadata !37, metadata !8, metadata !"static_member_variable", i32 4, i64 0, i64 0, i64 0, i32 4096, metadata !11, null} ; [ DW_TAG_member ] [static_member_variable] [line 4, size 0, align 0, offset 0] [static] [from int]
+!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = metadata !{i32 786478, metadata !4, metadata !8, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 2, metadata !5, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !13, i32 2} ; [ DW_TAG_subprogram ] [line 2] [member_function]
+!13 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
+!14 = metadata !{i32 786478, metadata !4, metadata !8, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 3, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17, i32 3} ; [ DW_TAG_subprogram ] [line 3] [static_member_function]
+!15 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{metadata !11}
+!17 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
+!18 = metadata !{i32 786478, metadata !4, null, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 13, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_ZN1C22static_member_functionEv, null, metadata !14, metadata !1, i32 13} ; [ DW_TAG_subprogram ] [line 13] [def] [static_member_function]
+!19 = metadata !{i32 786478, metadata !4, metadata !4, metadata !"global_function", metadata !"global_function", metadata !"_Z15global_functionv", i32 19, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z15global_functionv, null, null, metadata !1, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [global_function]
+!20 = metadata !{i32 786478, metadata !4, metadata !21, metadata !"global_namespace_function", metadata !"global_namespace_function", metadata !"_ZN2ns25global_namespace_functionEv", i32 24, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN2ns25global_namespace_functionEv, null, null, metadata !1, i32 24} ; [ DW_TAG_subprogram ] [line 24] [def] [global_namespace_function]
+!21 = metadata !{i32 786489, null, metadata !"ns", metadata !4, i32 23} ; [ DW_TAG_namespace ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp]
+!22 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!23 = metadata !{null}
+!24 = metadata !{metadata !25, metadata !26, metadata !27}
+!25 = metadata !{i32 786484, i32 0, metadata !8, metadata !"static_member_variable", metadata !"static_member_variable", metadata !"_ZN1C22static_member_variableE", metadata !4, i32 7, metadata !11, i32 0, i32 1, i32* @_ZN1C22static_member_variableE, metadata !10} ; [ DW_TAG_variable ] [static_member_variable] [line 7] [def]
+!26 = metadata !{i32 786484, i32 0, null, metadata !"global_variable", metadata !"global_variable", metadata !"", metadata !4, i32 17, metadata !8, i32 0, i32 1, %struct.C* @global_variable, null} ; [ DW_TAG_variable ] [global_variable] [line 17] [def]
+!27 = metadata !{i32 786484, i32 0, metadata !21, metadata !"global_namespace_variable", metadata !"global_namespace_variable", metadata !"_ZN2ns25global_namespace_variableE", metadata !4, i32 27, metadata !11, i32 0, i32 1, i32* @_ZN2ns25global_namespace_variableE, null} ; [ DW_TAG_variable ] [global_namespace_variable] [line 27] [def]
+!28 = metadata !{i32 786689, metadata !3, metadata !"this", metadata !4, i32 16777225, metadata !29, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 9]
+!29 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from C]
+!30 = metadata !{i32 9, i32 0, metadata !3, null}
+!31 = metadata !{i32 10, i32 0, metadata !3, null}
+!32 = metadata !{i32 11, i32 0, metadata !3, null}
+!33 = metadata !{i32 14, i32 0, metadata !18, null}
+!34 = metadata !{i32 20, i32 0, metadata !19, null}
+!35 = metadata !{i32 25, i32 0, metadata !20, null}
+!36 = metadata !{i32 26, i32 0, metadata !20, null}
+!37 = metadata !{metadata !"dwarf-public-names.cpp", metadata !"/usr2/kparzysz/s.hex/t"}
+!38 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dwarf-pubnames-split.ll b/test/DebugInfo/X86/dwarf-pubnames-split.ll
new file mode 100644
index 0000000..131e5aa
--- /dev/null
+++ b/test/DebugInfo/X86/dwarf-pubnames-split.ll
@@ -0,0 +1,38 @@
+; RUN: llc -mtriple=x86_64-pc-linux-gnu -split-dwarf=Enable %s -o - | FileCheck %s
+; Derived from:
+
+; int main (void) {
+;    return 0;
+; }
+
+; Check that we get a symbol off of the debug_info section when using split dwarf and pubnames.
+
+; CHECK: .Lpubtypes_begin0:
+; CHECK-NEXT: .short    2                       # DWARF Version
+; CHECK-NEXT: .long     .L.debug_info_begin0    # Offset of Compilation Unit Info
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  ret i32 0, !dbg !10
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !11}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 189287) (llvm/trunk 189296)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"foo.c", metadata !"/usr/local/google/home/echristo/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/foo.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!10 = metadata !{i32 2, i32 0, metadata !4, null}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/earlydup-crash.ll b/test/DebugInfo/X86/earlydup-crash.ll
index 1e66264..b5dc01e 100644
--- a/test/DebugInfo/X86/earlydup-crash.ll
+++ b/test/DebugInfo/X86/earlydup-crash.ll
@@ -43,12 +43,13 @@ bb33:                                             ; preds = %bb31, %bb22, %bb18,
 declare void @foobar(i32)
 
 !llvm.dbg.cu = !{!4}
+!llvm.module.flags = !{!47}
 !0 = metadata !{i32 590080, metadata !1, metadata !"frname_len", metadata !3, i32 517, metadata !38, i32 0} ; [ DW_TAG_auto_variable ]
 !1 = metadata !{i32 589835, metadata !44, metadata !2, i32 515, i32 0, i32 19} ; [ DW_TAG_lexical_block ]
 !2 = metadata !{i32 589870, metadata !44, null, metadata !"framework_construct_pathname", metadata !"framework_construct_pathname", metadata !"", i32 515, metadata !5, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8* (i8*, %struct.cpp_dir*)* @framework_construct_pathname, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !3 = metadata !{i32 589865, metadata !44}  ; [ DW_TAG_file_type ]
 !4 = metadata !{i32 589841, metadata !44, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !46, metadata !46, metadata !45, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 589845, metadata !44, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 589845, metadata !44, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{metadata !7, metadata !9, metadata !11}
 !7 = metadata !{i32 589839, metadata !44, metadata !3, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
 !8 = metadata !{i32 589860, metadata !44, metadata !3, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
@@ -57,7 +58,7 @@ declare void @foobar(i32)
 !11 = metadata !{i32 589839, metadata !44, metadata !3, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
 !12 = metadata !{i32 589846, metadata !41, metadata !13, metadata !"cpp_dir", i32 45, i64 0, i64 0, i64 0, i32 0, metadata !14} ; [ DW_TAG_typedef ]
 !13 = metadata !{i32 589865, metadata !41} ; [ DW_TAG_file_type ]
-!14 = metadata !{i32 589843, metadata !41, metadata !3, metadata !"cpp_dir", i32 43, i64 352, i64 32, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_structure_type ]
+!14 = metadata !{i32 589843, metadata !41, metadata !3, metadata !"cpp_dir", i32 43, i64 352, i64 32, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [cpp_dir] [line 43, size 352, align 32, offset 0] [def] [from ]
 !15 = metadata !{metadata !16, metadata !18, metadata !19, metadata !21, metadata !23, metadata !25, metadata !27, metadata !29, metadata !33, metadata !36}
 !16 = metadata !{i32 589837, metadata !41, metadata !14, metadata !"next", i32 572, i64 32, i64 32, i64 0, i32 0, metadata !17} ; [ DW_TAG_member ]
 !17 = metadata !{i32 589839, metadata !44, metadata !3, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
@@ -90,3 +91,4 @@ declare void @foobar(i32)
 !44 = metadata !{metadata !"darwin-c.c", metadata !"/Users/espindola/llvm/build-llvm-gcc/gcc/../../llvm-gcc-4.2/gcc/config"}
 !45 = metadata !{metadata !2}
 !46 = metadata !{i32 0}
+!47 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/eh_symbol.ll b/test/DebugInfo/X86/eh_symbol.ll
index 9ab95b1..172ca92 100644
--- a/test/DebugInfo/X86/eh_symbol.ll
+++ b/test/DebugInfo/X86/eh_symbol.ll
@@ -9,14 +9,16 @@ entry:
 }
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!9}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !6, metadata !1, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 ()* @f, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 589870, metadata !6, metadata !1, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @f, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [f]
 !1 = metadata !{i32 589865, metadata !6} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 589841, metadata !6, i32 12, metadata !"clang version 3.0 ()", i1 true, metadata !"", i32 0, metadata !7, metadata !7, metadata !8, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !6, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 589845, metadata !6, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 589860, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/tmpfs/build"}
 !7 = metadata !{i32 0}
 !8 = metadata !{metadata !0}
+!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/elf-names.ll b/test/DebugInfo/X86/elf-names.ll
index 7bc532e..7b38fde 100644
--- a/test/DebugInfo/X86/elf-names.ll
+++ b/test/DebugInfo/X86/elf-names.ll
@@ -3,12 +3,15 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck --check-prefix=CHECK-DIS %s
 
 ; CHECK: 0x0000000b: DW_TAG_compile_unit
-; CHECK: 0x00000012:   DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000035] = "foo.cpp")
-; CHECK: 0x0000003c:   DW_TAG_class_type
-; CHECK: 0x0000003d:     DW_AT_name [DW_FORM_strp]       ( .debug_str[0x0000006d] = "D")
-; CHECK: 0x00000044:     DW_TAG_member
-; CHECK: 0x00000045:       DW_AT_name [DW_FORM_strp]     ( .debug_str[0x0000005d] = "c1")
-; CHECK: 0x0000008d:       DW_AT_artificial [DW_FORM_flag_present]       (true)
+; CHECK:               DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000035] = "foo.cpp")
+; CHECK: 0x{{[0-9a-f]+}}:   DW_TAG_class_type
+; CHECK:                 DW_AT_name [DW_FORM_strp]       ( .debug_str[0x{{[0-9a-f]+}}] = "D")
+; CHECK: 0x{{[0-9a-f]+}}:     DW_TAG_member
+; CHECK:                   DW_AT_name [DW_FORM_strp]     ( .debug_str[0x{{[0-9a-f]+}}] = "c1")
+; CHECK: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]     ( .debug_str[0x{{[0-9a-f]+}}] = "D")
+; CHECK: DW_TAG_formal_parameter
+; CHECK: DW_AT_artificial [DW_FORM_flag_present]       (true)
 
 ; CHECK-DIS: [artificial]
 
@@ -57,16 +60,17 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!54}
 
 !0 = metadata !{i32 786449, metadata !53, i32 4, metadata !"clang version 3.2 (trunk 167506) (llvm/trunk 167505)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/foo.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !31}
 !5 = metadata !{i32 786478, metadata !6, null, metadata !"D", metadata !"D", metadata !"_ZN1DC2Ev", i32 12, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%class.D*)* @_ZN1DC2Ev, null, metadata !17, metadata !27, i32 12} ; [ DW_TAG_subprogram ] [line 12] [def] [D]
 !6 = metadata !{i32 786473, metadata !53} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9}
-!9 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from D]
-!10 = metadata !{i32 786434, metadata !53, null, metadata !"D", i32 1, i64 128, i64 32, i32 0, i32 0, null, metadata !11, i32 0, null, null} ; [ DW_TAG_class_type ] [D] [line 1, size 128, align 32, offset 0] [from ]
+!9 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from D]
+!10 = metadata !{i32 786434, metadata !53, null, metadata !"D", i32 1, i64 128, i64 32, i32 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_class_type ] [D] [line 1, size 128, align 32, offset 0] [def] [from ]
 !11 = metadata !{metadata !12, metadata !14, metadata !15, metadata !16, metadata !17, metadata !20}
 !12 = metadata !{i32 786445, metadata !53, metadata !10, metadata !"c1", i32 6, i64 32, i64 32, i64 0, i32 1, metadata !13} ; [ DW_TAG_member ] [c1] [line 6, size 32, align 32, offset 0] [private] [from int]
 !13 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
@@ -77,7 +81,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !18 = metadata !{metadata !19}
 !19 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
 !20 = metadata !{i32 786478, metadata !6, metadata !10, metadata !"D", metadata !"D", metadata !"", i32 4, metadata !21, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !25, i32 4} ; [ DW_TAG_subprogram ] [line 4] [D]
-!21 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!21 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !22 = metadata !{null, metadata !9, metadata !23}
 !23 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !24} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
 !24 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from D]
@@ -107,3 +111,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !51 = metadata !{i32 23, i32 0, metadata !48, null}
 !52 = metadata !{i32 24, i32 0, metadata !48, null}
 !53 = metadata !{metadata !"foo.cpp", metadata !"/usr/local/google/home/echristo"}
+!54 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/empty-and-one-elem-array.ll b/test/DebugInfo/X86/empty-and-one-elem-array.ll
index ce3035e..a3a08f0 100644
--- a/test/DebugInfo/X86/empty-and-one-elem-array.ll
+++ b/test/DebugInfo/X86/empty-and-one-elem-array.ll
@@ -28,61 +28,68 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 ; An empty array should not have an AT_upper_bound attribute. But an array of 1
 ; should.
 
-; CHECK:      0x00000074:   DW_TAG_base_type [5]  
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x00000043] = "int")
+; CHECK:      DW_TAG_base_type
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[{{.*}}] = "int")
 ; CHECK-NEXT: DW_AT_encoding [DW_FORM_data1]   (0x05)
 ; CHECK-NEXT: DW_AT_byte_size [DW_FORM_data1]  (0x04)
 
+; int foo::b[1]:
+; CHECK: DW_TAG_structure_type
+; CHECK: DW_AT_name{{.*}}"foo"
+; CHECK:      DW_TAG_member
+; CHECK:      DW_TAG_member
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[{{.*}}] = "b")
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
+
 ; int[1]:
-; CHECK:      0x00000082:   DW_TAG_array_type [7] *
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]    (cu + 0x0074 => {0x00000074})
-; CHECK:      0x00000087:     DW_TAG_subrange_type [8]
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]  (cu + 0x007b => {0x0000007b})
+; CHECK:      DW_TAG_array_type [{{.*}}] *
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
+; CHECK:      DW_TAG_subrange_type [{{.*}}]
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
 ; CHECK-NEXT: DW_AT_upper_bound [DW_FORM_data1]  (0x00)
 
-; int foo::b[1]:
-; CHECK:      0x000000a5:     DW_TAG_member [10]
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x00000050] = "b")
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]  (cu + 0x0082 => {0x00000082})
+; int bar::b[0]:
+; CHECK: DW_TAG_structure_type
+; CHECK: DW_AT_name{{.*}}"bar"
+; CHECK:      DW_TAG_member
+; CHECK:      DW_TAG_member
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[{{.*}}] = "b")
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
 
 ; int[0]:
-; CHECK:      0x000000b5:   DW_TAG_array_type [7] *
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]    (cu + 0x0074 => {0x00000074})
-; CHECK:      0x000000ba:     DW_TAG_subrange_type [11]
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]  (cu + 0x007b => {0x0000007b})
+; CHECK:      DW_TAG_array_type [{{.*}}] *
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
+; CHECK:      DW_TAG_subrange_type [11]
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
 ; CHECK-NOT:  DW_AT_upper_bound
 
-; int bar::b[0]:
-; CHECK:      0x000000d7:     DW_TAG_member [10]
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x00000050] = "b")
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]  (cu + 0x00b5 => {0x000000b5})
-
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!33}
 
 !0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/test.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"func", metadata !"func", metadata !"", i32 11, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @func, null, null, metadata !1, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [func]
 !6 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 786688, metadata !11, metadata !"my_foo", metadata !6, i32 12, metadata !12, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [my_foo] [line 12]
 !11 = metadata !{i32 786443, metadata !6, metadata !5, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Sandbox/llvm/test.c]
-!12 = metadata !{i32 786451, metadata !32, null, metadata !"foo", i32 1, i64 64, i64 32, i32 0, i32 0, null, metadata !13, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [foo] [line 1, size 64, align 32, offset 0] [from ]
+!12 = metadata !{i32 786451, metadata !32, null, metadata !"foo", i32 1, i64 64, i64 32, i32 0, i32 0, null, metadata !13, i32 0, null, i32 0, null} ; [ DW_TAG_structure_type ] [foo] [line 1, size 64, align 32, offset 0] [def] [from ]
 !13 = metadata !{metadata !14, metadata !15}
 !14 = metadata !{i32 786445, metadata !32, metadata !12, metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
 !15 = metadata !{i32 786445, metadata !32, metadata !12, metadata !"b", i32 3, i64 32, i64 32, i64 32, i32 0, metadata !16} ; [ DW_TAG_member ] [b] [line 3, size 32, align 32, offset 32] [from ]
-!16 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 32, i64 32, i32 0, i32 0, metadata !9, metadata !17, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 32, align 32, offset 0] [from int]
+!16 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 32, i64 32, i32 0, i32 0, metadata !9, metadata !17, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32, align 32, offset 0] [from int]
 !17 = metadata !{metadata !18}
 !18 = metadata !{i32 786465, i64 0, i64 1} ; [ DW_TAG_subrange_type ] [0, 1]
 !19 = metadata !{i32 12, i32 0, metadata !11, null}
 !20 = metadata !{i32 786688, metadata !11, metadata !"my_bar", metadata !6, i32 13, metadata !21, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [my_bar] [line 13]
-!21 = metadata !{i32 786451, metadata !32, null, metadata !"bar", i32 6, i64 32, i64 32, i32 0, i32 0, null, metadata !22, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [bar] [line 6, size 32, align 32, offset 0] [from ]
+!21 = metadata !{i32 786451, metadata !32, null, metadata !"bar", i32 6, i64 32, i64 32, i32 0, i32 0, null, metadata !22, i32 0, null, i32 0, null} ; [ DW_TAG_structure_type ] [bar] [line 6, size 32, align 32, offset 0] [def] [from ]
 !22 = metadata !{metadata !23, metadata !24}
 !23 = metadata !{i32 786445, metadata !32, metadata !21, metadata !"a", i32 7, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [a] [line 7, size 32, align 32, offset 0] [from int]
 !24 = metadata !{i32 786445, metadata !32, metadata !21, metadata !"b", i32 8, i64 0, i64 32, i64 32, i32 0, metadata !25} ; [ DW_TAG_member ] [b] [line 8, size 0, align 32, offset 32] [from ]
-!25 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !26, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
+!25 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !26, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
 !26 = metadata !{metadata !27}
 !27 = metadata !{i32 786465, i64 0, i64 0} ; [ DW_TAG_subrange_type ] [0, 0]
 !28 = metadata !{i32 13, i32 0, metadata !11, null}
@@ -90,3 +97,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !30 = metadata !{i32 16, i32 0, metadata !11, null}
 !31 = metadata !{i32 17, i32 0, metadata !11, null}
 !32 = metadata !{metadata !"test.c", metadata !"/Volumes/Sandbox/llvm"}
+!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/empty-array.ll b/test/DebugInfo/X86/empty-array.ll
index 1f46281..2436467 100644
--- a/test/DebugInfo/X86/empty-array.ll
+++ b/test/DebugInfo/X86/empty-array.ll
@@ -6,40 +6,44 @@
 
 @a = global %class.A zeroinitializer, align 4
 
-; CHECK:      0x0000002d:   DW_TAG_base_type [3]  
-; CHECK-NEXT: DW_AT_name
-; CHECK-NEXT: DW_AT_byte_size [DW_FORM_data1]  (0x04)
-; CHECK-NEXT: DW_AT_encoding [DW_FORM_data1]   (0x05)
+; CHECK: DW_TAG_class_type
+; CHECK:      DW_TAG_member
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x{{[0-9a-f]*}}] = "x")
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]  (cu + 0x{{[0-9a-f]*}} => {[[ARRAY:0x[0-9a-f]*]]})
 
-; CHECK:      0x00000034:   DW_TAG_array_type [4] *
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]    (cu + 0x0026 => {0x00000026})
+; CHECK:      [[ARRAY]]: DW_TAG_array_type [{{.*}}] *
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]    (cu + 0x{{[0-9a-f]*}} => {[[BASETYPE:0x[0-9a-f]*]]})
 
-; CHECK:      0x00000039:     DW_TAG_subrange_type [5]
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]  (cu + 0x002d => {0x0000002d})
+; CHECK:      DW_TAG_subrange_type
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]  (cu + 0x{{[0-9a-f]*}} => {[[BASE2:0x[0-9a-f]*]]})
 ; CHECK-NOT:  DW_AT_upper_bound
 
-; CHECK:      DW_TAG_member [8]
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x0000003f] = "x")
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]  (cu + 0x0034 => {0x00000034})
+; CHECK: [[BASETYPE]]: DW_TAG_base_type
+; CHECK: [[BASE2]]: DW_TAG_base_type
+; CHECK-NEXT: DW_AT_name
+; CHECK-NEXT: DW_AT_byte_size [DW_FORM_data1]  (0x04)
+; CHECK-NEXT: DW_AT_encoding [DW_FORM_data1]   (0x05)
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21}
 
 !0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
 !6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [from ]
+!7 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [def] [from ]
 !8 = metadata !{metadata !9, metadata !14}
 !9 = metadata !{i32 786445, metadata !20, metadata !7, metadata !"x", i32 1, i64 0, i64 0, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ] [x] [line 1, size 0, align 0, offset 0] [private] [from ]
-!10 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !11, metadata !12, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
+!10 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !11, metadata !12, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
 !11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !12 = metadata !{metadata !13}
 !13 = metadata !{i32 786465, i64 0, i64 -1} ; [ DW_TAG_subrange_type ] [unbound]
 !14 = metadata !{i32 786478, metadata !6, metadata !7, metadata !"A", metadata !"A", metadata !"", i32 1, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !18, i32 1} ; [ DW_TAG_subprogram ] [line 1] [A]
-!15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !16 = metadata !{null, metadata !17}
-!17 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!17 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
 !18 = metadata !{metadata !19}
 !19 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
 !20 = metadata !{metadata !"t.cpp", metadata !"/Volumes/Sandbox/llvm"}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/ending-run.ll b/test/DebugInfo/X86/ending-run.ll
index b55ccc4..ae17fd0 100644
--- a/test/DebugInfo/X86/ending-run.ll
+++ b/test/DebugInfo/X86/ending-run.ll
@@ -27,13 +27,14 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!20}
 
 !0 = metadata !{i32 786449, metadata !19, i32 12, metadata !"clang version 3.1 (trunk 153921) (llvm/trunk 153916)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !19, metadata !6, metadata !"callee", metadata !"callee", metadata !"", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (i32)* @callee, null, null, metadata !10, i32 7} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{metadata !11}
@@ -46,3 +47,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !17 = metadata !{i32 8, i32 18, metadata !15, null}
 !18 = metadata !{i32 9, i32 5, metadata !15, null}
 !19 = metadata !{metadata !"ending-run.c", metadata !"/Users/echristo/tmp"}
+!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/enum-class.ll b/test/DebugInfo/X86/enum-class.ll
index af6129c..a31e254 100644
--- a/test/DebugInfo/X86/enum-class.ll
+++ b/test/DebugInfo/X86/enum-class.ll
@@ -6,19 +6,20 @@
 @c = global i32 0, align 4
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!23}
 
 !0 = metadata !{i32 786449, metadata !22, i32 4, metadata !"clang version 3.2 (trunk 157269) (llvm/trunk 157264)", i1 false, metadata !"", i32 0, metadata !1, metadata !15, metadata !15, metadata !17,  metadata !17, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !3, metadata !8, metadata !12}
-!3 = metadata !{i32 786436, metadata !4, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, metadata !5, metadata !6, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!3 = metadata !{i32 786436, metadata !4, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, metadata !5, metadata !6, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from int]
 !4 = metadata !{i32 786473, metadata !22} ; [ DW_TAG_file_type ]
 !5 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{metadata !7}
 !7 = metadata !{i32 786472, metadata !"A1", i64 1} ; [ DW_TAG_enumerator ]
-!8 = metadata !{i32 786436, metadata !4, null, metadata !"B", i32 2, i64 64, i64 64, i32 0, i32 0, metadata !9, metadata !10, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!8 = metadata !{i32 786436, metadata !4, null, metadata !"B", i32 2, i64 64, i64 64, i32 0, i32 0, metadata !9, metadata !10, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [B] [line 2, size 64, align 64, offset 0] [def] [from long unsigned int]
 !9 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !10 = metadata !{metadata !11}
 !11 = metadata !{i32 786472, metadata !"B1", i64 1} ; [ DW_TAG_enumerator ]
-!12 = metadata !{i32 786436, metadata !4, null, metadata !"C", i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!12 = metadata !{i32 786436, metadata !4, null, metadata !"C", i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [C] [line 3, size 32, align 32, offset 0] [def] [from ]
 !13 = metadata !{metadata !14}
 !14 = metadata !{i32 786472, metadata !"C1", i64 1} ; [ DW_TAG_enumerator ]
 !15 = metadata !{i32 0}
@@ -28,16 +29,17 @@
 !21 = metadata !{i32 786484, i32 0, null, metadata !"c", metadata !"c", metadata !"", metadata !4, i32 6, metadata !12, i32 0, i32 1, i32* @c, null} ; [ DW_TAG_variable ]
 !22 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo/tmp"}
 
-; CHECK: DW_TAG_enumeration_type [3]
-; CHECK: DW_AT_type [DW_FORM_ref4]      (cu + 0x0026 => {0x00000026})
-; CHECK: DW_AT_enum_class [DW_FORM_flag]    (0x01)
+; CHECK: DW_TAG_enumeration_type [{{.*}}]
+; CHECK: DW_AT_type [DW_FORM_ref4]
+; CHECK: DW_AT_enum_class [DW_FORM_flag_present] (true)
 ; CHECK: DW_AT_name [DW_FORM_strp]      ( .debug_str[{{.*}}] = "A")
 
-; CHECK: DW_TAG_enumeration_type [3] *
-; CHECK: DW_AT_type [DW_FORM_ref4]      (cu + 0x0057 => {0x00000057})
-; CHECK: DW_AT_enum_class [DW_FORM_flag]    (0x01)
+; CHECK: DW_TAG_enumeration_type [{{.*}}] *
+; CHECK: DW_AT_type [DW_FORM_ref4]
+; CHECK: DW_AT_enum_class [DW_FORM_flag_present] (true)
 ; CHECK: DW_AT_name [DW_FORM_strp]          ( .debug_str[{{.*}}] = "B")
 
 ; CHECK: DW_TAG_enumeration_type [6]
 ; CHECK-NOT: DW_AT_enum_class
 ; CHECK: DW_AT_name [DW_FORM_strp]      ( .debug_str[{{.*}}] = "C")
+!23 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/enum-fwd-decl.ll b/test/DebugInfo/X86/enum-fwd-decl.ll
index 03f590c..6bfb930 100644
--- a/test/DebugInfo/X86/enum-fwd-decl.ll
+++ b/test/DebugInfo/X86/enum-fwd-decl.ll
@@ -4,16 +4,18 @@
 @e = global i16 0, align 2
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9}
 
 !0 = metadata !{i32 786449, metadata !8, i32 4, metadata !"clang version 3.2 (trunk 165274) (llvm/trunk 165272)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/foo.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"e", metadata !"e", metadata !"", metadata !6, i32 2, metadata !7, i32 0, i32 1, i16* @e, null} ; [ DW_TAG_variable ] [e] [line 2] [def]
 !6 = metadata !{i32 786473, metadata !8} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786436, metadata !8, null, metadata !"E", i32 1, i64 16, i64 16, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_enumeration_type ] [E] [line 1, size 16, align 16, offset 0] [fwd] [from ]
+!7 = metadata !{i32 786436, metadata !8, null, metadata !"E", i32 1, i64 16, i64 16, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [E] [line 1, size 16, align 16, offset 0] [decl] [from ]
 !8 = metadata !{metadata !"foo.cpp", metadata !"/tmp"}
 
 ; CHECK: DW_TAG_enumeration_type
 ; CHECK-NEXT: DW_AT_name
 ; CHECK-NEXT: DW_AT_byte_size
 ; CHECK-NEXT: DW_AT_declaration
+!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/fission-cu.ll b/test/DebugInfo/X86/fission-cu.ll
index 8ad3c2d..06408d7 100644
--- a/test/DebugInfo/X86/fission-cu.ll
+++ b/test/DebugInfo/X86/fission-cu.ll
@@ -5,6 +5,7 @@
 @a = common global i32 0, align 4
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9}
 
 !0 = metadata !{i32 786449, metadata !8, i32 12, metadata !"clang version 3.3 (trunk 169021) (llvm/trunk 169020)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !"baz.dwo"} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/baz.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
@@ -23,20 +24,20 @@
 ; CHECK: Abbrev table for offset: 0x00000000
 ; CHECK: [1] DW_TAG_compile_unit DW_CHILDREN_no
 ; CHECK: DW_AT_GNU_dwo_name      DW_FORM_strp
-; CHECK: DW_AT_GNU_dwo_id        DW_FORM_data8
 ; CHECK: DW_AT_GNU_addr_base     DW_FORM_sec_offset
 ; CHECK: DW_AT_low_pc    DW_FORM_addr
 ; CHECK: DW_AT_stmt_list DW_FORM_sec_offset
 ; CHECK: DW_AT_comp_dir  DW_FORM_strp
+; CHECK: DW_AT_GNU_dwo_id        DW_FORM_data8
 
 ; CHECK: .debug_info contents:
 ; CHECK: DW_TAG_compile_unit
 ; CHECK: DW_AT_GNU_dwo_name [DW_FORM_strp] ( .debug_str[0x00000000] = "baz.dwo")
-; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8]  (0x0000000000000000)
 ; CHECK: DW_AT_GNU_addr_base [DW_FORM_sec_offset]                   (0x00000000)
 ; CHECK: DW_AT_low_pc [DW_FORM_addr]       (0x0000000000000000)
 ; CHECK: DW_AT_stmt_list [DW_FORM_sec_offset]   (0x00000000)
 ; CHECK: DW_AT_comp_dir [DW_FORM_strp]     ( .debug_str[0x00000008] = "/usr/local/google/home/echristo/tmp")
+; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8]  (0x0000000000000000)
 
 ; CHECK: .debug_str contents:
 ; CHECK: 0x00000000: "baz.dwo"
@@ -54,12 +55,7 @@
 ; CHECK-NOT: DW_AT_comp_dir
 ; CHECK: DW_AT_GNU_dwo_id        DW_FORM_data8
 
-; CHECK: [2] DW_TAG_base_type    DW_CHILDREN_no
-; CHECK: DW_AT_name      DW_FORM_GNU_str_index
-; CHECK: DW_AT_encoding  DW_FORM_data1
-; CHECK: DW_AT_byte_size DW_FORM_data1
-
-; CHECK: [3] DW_TAG_variable     DW_CHILDREN_no
+; CHECK: [2] DW_TAG_variable     DW_CHILDREN_no
 ; CHECK: DW_AT_name      DW_FORM_GNU_str_index
 ; CHECK: DW_AT_type      DW_FORM_ref4
 ; CHECK: DW_AT_external  DW_FORM_flag_present
@@ -67,6 +63,11 @@
 ; CHECK: DW_AT_decl_line DW_FORM_data1
 ; CHECK: DW_AT_location  DW_FORM_block1
 
+; CHECK: [3] DW_TAG_base_type    DW_CHILDREN_no
+; CHECK: DW_AT_name      DW_FORM_GNU_str_index
+; CHECK: DW_AT_encoding  DW_FORM_data1
+; CHECK: DW_AT_byte_size DW_FORM_data1
+
 ; Check that the rest of the compile units have information.
 ; CHECK: .debug_info.dwo contents:
 ; CHECK: DW_TAG_compile_unit
@@ -77,15 +78,15 @@
 ; CHECK-NOT: DW_AT_stmt_list
 ; CHECK-NOT: DW_AT_comp_dir
 ; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8]  (0x0000000000000000)
-; CHECK: DW_TAG_base_type
-; CHECK: DW_AT_name [DW_FORM_GNU_str_index]     ( indexed (00000003) string = "int")
 ; CHECK: DW_TAG_variable
 ; CHECK: DW_AT_name [DW_FORM_GNU_str_index]     ( indexed (00000002) string = "a")
-; CHECK: DW_AT_type [DW_FORM_ref4]       (cu + 0x0018 => {0x00000018})
+; CHECK: DW_AT_type [DW_FORM_ref4]       (cu + 0x{{[0-9a-f]*}} => {[[TYPE:0x[0-9a-f]*]]})
 ; CHECK: DW_AT_external [DW_FORM_flag_present]   (true)
 ; CHECK: DW_AT_decl_file [DW_FORM_data1] (0x01)
 ; CHECK: DW_AT_decl_line [DW_FORM_data1] (0x01)
 ; CHECK: DW_AT_location [DW_FORM_block1] (<0x02> fb 00 )
+; CHECK: [[TYPE]]: DW_TAG_base_type
+; CHECK: DW_AT_name [DW_FORM_GNU_str_index]     ( indexed (00000003) string = "int")
 
 
 ; CHECK: .debug_str.dwo contents:
@@ -110,3 +111,4 @@
 ; OBJ-NEXT: R_X86_64_32 .debug_line
 ; OBJ-NEXT: R_X86_64_32 .debug_str
 ; OBJ-NEXT: }
+!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/fission-hash.ll b/test/DebugInfo/X86/fission-hash.ll
new file mode 100644
index 0000000..d3e46a9
--- /dev/null
+++ b/test/DebugInfo/X86/fission-hash.ll
@@ -0,0 +1,16 @@
+; RUN: llc -split-dwarf=Enable -generate-cu-hash -O0 %s -mtriple=x86_64-unknown-linux-gnu -filetype=obj -o %t
+; RUN: llvm-dwarfdump -debug-dump=all %t | FileCheck %s
+
+; The source is an empty file.
+
+; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x0c1e629c9e5ada4f)
+; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x0c1e629c9e5ada4f)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 188230) (llvm/trunk 188234)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !2, metadata !2, metadata !"foo.dwo"} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"foo.c", metadata !"/usr/local/google/home/echristo/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!4 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/fission-ranges.ll b/test/DebugInfo/X86/fission-ranges.ll
new file mode 100644
index 0000000..0a10079
--- /dev/null
+++ b/test/DebugInfo/X86/fission-ranges.ll
@@ -0,0 +1,151 @@
+; RUN: llc -split-dwarf=Enable -O0 %s -mtriple=x86_64-unknown-linux-gnu -filetype=obj -o %t
+; RUN: llvm-dwarfdump -debug-dump=all %t | FileCheck %s
+
+; From the code:
+
+; extern int c;
+; static void foo (int p)
+; {
+;   int a, b; 
+;   unsigned int d, e;
+
+;   for (a = 0; a < 30; a++)
+;     for (d = 0; d < 30; d++)
+;       for (b = 0; b < 30; b++)
+;         for (e = 0; e < 30; e++)
+;           {
+;             int *w = &c; 
+;             *w &= p; 
+;           }
+; }
+
+; void 
+; bar ()
+; {
+;   foo (1);
+; }
+
+; compiled with:
+
+; clang -g -S -gsplit-dwarf -O1 small.c
+
+; CHECK: DW_AT_GNU_ranges_base
+
+@c = external global i32
+
+; Function Attrs: nounwind uwtable
+define void @bar() #0 {
+entry:
+  tail call fastcc void @foo(), !dbg !27
+  ret void, !dbg !28
+}
+
+; Function Attrs: nounwind uwtable
+define internal fastcc void @foo() #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata !29, i64 0, metadata !13), !dbg !30
+  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !14), !dbg !31
+  %c.promoted9 = load i32* @c, align 4, !dbg !32, !tbaa !33
+  br label %for.cond1.preheader, !dbg !31
+
+for.cond1.preheader:                              ; preds = %for.inc16, %entry
+  %and.lcssa.lcssa.lcssa10 = phi i32 [ %c.promoted9, %entry ], [ %and, %for.inc16 ]
+  %a.08 = phi i32 [ 0, %entry ], [ %inc17, %for.inc16 ]
+  br label %for.cond4.preheader, !dbg !37
+
+for.cond4.preheader:                              ; preds = %for.inc13, %for.cond1.preheader
+  %and.lcssa.lcssa7 = phi i32 [ %and.lcssa.lcssa.lcssa10, %for.cond1.preheader ], [ %and, %for.inc13 ]
+  %d.06 = phi i32 [ 0, %for.cond1.preheader ], [ %inc14, %for.inc13 ]
+  br label %for.cond7.preheader, !dbg !38
+
+for.cond7.preheader:                              ; preds = %for.inc10, %for.cond4.preheader
+  %and.lcssa5 = phi i32 [ %and.lcssa.lcssa7, %for.cond4.preheader ], [ %and, %for.inc10 ]
+  %b.03 = phi i32 [ 0, %for.cond4.preheader ], [ %inc11, %for.inc10 ]
+  br label %for.body9, !dbg !39
+
+for.body9:                                        ; preds = %for.body9, %for.cond7.preheader
+  %and2 = phi i32 [ %and.lcssa5, %for.cond7.preheader ], [ %and, %for.body9 ], !dbg !40
+  %e.01 = phi i32 [ 0, %for.cond7.preheader ], [ %inc, %for.body9 ]
+  tail call void @llvm.dbg.value(metadata !41, i64 0, metadata !19), !dbg !40
+  %and = and i32 %and2, 1, !dbg !32
+  %inc = add i32 %e.01, 1, !dbg !39
+  tail call void @llvm.dbg.value(metadata !{i32 %inc}, i64 0, metadata !18), !dbg !39
+  %exitcond = icmp eq i32 %inc, 30, !dbg !39
+  br i1 %exitcond, label %for.inc10, label %for.body9, !dbg !39
+
+for.inc10:                                        ; preds = %for.body9
+  %inc11 = add nsw i32 %b.03, 1, !dbg !38
+  tail call void @llvm.dbg.value(metadata !{i32 %inc11}, i64 0, metadata !15), !dbg !38
+  %exitcond11 = icmp eq i32 %inc11, 30, !dbg !38
+  br i1 %exitcond11, label %for.inc13, label %for.cond7.preheader, !dbg !38
+
+for.inc13:                                        ; preds = %for.inc10
+  %inc14 = add i32 %d.06, 1, !dbg !37
+  tail call void @llvm.dbg.value(metadata !{i32 %inc14}, i64 0, metadata !16), !dbg !37
+  %exitcond12 = icmp eq i32 %inc14, 30, !dbg !37
+  br i1 %exitcond12, label %for.inc16, label %for.cond4.preheader, !dbg !37
+
+for.inc16:                                        ; preds = %for.inc13
+  %inc17 = add nsw i32 %a.08, 1, !dbg !31
+  tail call void @llvm.dbg.value(metadata !{i32 %inc17}, i64 0, metadata !14), !dbg !31
+  %exitcond13 = icmp eq i32 %inc17, 30, !dbg !31
+  br i1 %exitcond13, label %for.end18, label %for.cond1.preheader, !dbg !31
+
+for.end18:                                        ; preds = %for.inc16
+  store i32 %and, i32* @c, align 4, !dbg !32, !tbaa !33
+  ret void, !dbg !42
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #1
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!26, !43}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 191700) (llvm/trunk 191710)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"small.dwo"} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/small.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"small.c", metadata !"/usr/local/google/home/echristo/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !8}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"bar", metadata !"bar", metadata !"", i32 18, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, void ()* @bar, null, null, metadata !2, i32 19} ; [ DW_TAG_subprogram ] [line 18] [def] [scope 19] [bar]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/small.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null}
+!8 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !9, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @foo, null, null, metadata !12, i32 3} ; [ DW_TAG_subprogram ] [line 2] [local] [def] [scope 3] [foo]
+!9 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = metadata !{null, metadata !11}
+!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = metadata !{metadata !13, metadata !14, metadata !15, metadata !16, metadata !18, metadata !19}
+!13 = metadata !{i32 786689, metadata !8, metadata !"p", metadata !5, i32 16777218, metadata !11, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p] [line 2]
+!14 = metadata !{i32 786688, metadata !8, metadata !"a", metadata !5, i32 4, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 4]
+!15 = metadata !{i32 786688, metadata !8, metadata !"b", metadata !5, i32 4, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [b] [line 4]
+!16 = metadata !{i32 786688, metadata !8, metadata !"d", metadata !5, i32 5, metadata !17, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 5]
+!17 = metadata !{i32 786468, null, null, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
+!18 = metadata !{i32 786688, metadata !8, metadata !"e", metadata !5, i32 5, metadata !17, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [e] [line 5]
+!19 = metadata !{i32 786688, metadata !20, metadata !"w", metadata !5, i32 12, metadata !25, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [w] [line 12]
+!20 = metadata !{i32 786443, metadata !1, metadata !21, i32 11, i32 0, i32 4} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/small.c]
+!21 = metadata !{i32 786443, metadata !1, metadata !22, i32 10, i32 0, i32 3} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/small.c]
+!22 = metadata !{i32 786443, metadata !1, metadata !23, i32 9, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/small.c]
+!23 = metadata !{i32 786443, metadata !1, metadata !24, i32 8, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/small.c]
+!24 = metadata !{i32 786443, metadata !1, metadata !8, i32 7, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/small.c]
+!25 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!26 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!27 = metadata !{i32 20, i32 0, metadata !4, null}
+!28 = metadata !{i32 21, i32 0, metadata !4, null}
+!29 = metadata !{i32 1}
+!30 = metadata !{i32 2, i32 0, metadata !8, null}
+!31 = metadata !{i32 7, i32 0, metadata !24, null}
+!32 = metadata !{i32 13, i32 0, metadata !20, null}
+!33 = metadata !{metadata !34, metadata !34, i64 0}
+!34 = metadata !{metadata !"int", metadata !35, i64 0}
+!35 = metadata !{metadata !"omnipotent char", metadata !36, i64 0}
+!36 = metadata !{metadata !"Simple C/C++ TBAA"}
+!37 = metadata !{i32 8, i32 0, metadata !23, null} ; [ DW_TAG_imported_declaration ]
+!38 = metadata !{i32 9, i32 0, metadata !22, null}
+!39 = metadata !{i32 10, i32 0, metadata !21, null}
+!40 = metadata !{i32 12, i32 0, metadata !20, null}
+!41 = metadata !{i32* @c}
+!42 = metadata !{i32 15, i32 0, metadata !8, null}
+!43 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/generate-odr-hash.ll b/test/DebugInfo/X86/generate-odr-hash.ll
index 956d3f6..4f9cc78 100644
--- a/test/DebugInfo/X86/generate-odr-hash.ll
+++ b/test/DebugInfo/X86/generate-odr-hash.ll
@@ -103,8 +103,8 @@
 define void @_Z3foov() #0 {
 entry:
   %b = alloca %struct.baz, align 1
-  call void @llvm.dbg.declare(metadata !{%struct.baz* %b}, metadata !63), !dbg !71
-  ret void, !dbg !72
+  call void @llvm.dbg.declare(metadata !{%struct.baz* %b}, metadata !44), !dbg !46
+  ret void, !dbg !47
 }
 
 ; Function Attrs: nounwind readnone
@@ -112,8 +112,8 @@ declare void @llvm.dbg.declare(metadata, metadata) #1
 
 define internal void @__cxx_global_var_init() section ".text.startup" {
 entry:
-  call void @_ZN12_GLOBAL__N_16walrusC1Ev(%"struct.<anonymous namespace>::walrus"* @w), !dbg !73
-  ret void, !dbg !73
+  call void @_ZN12_GLOBAL__N_16walrusC1Ev(%"struct.<anonymous namespace>::walrus"* @w), !dbg !48
+  ret void, !dbg !48
 }
 
 ; Function Attrs: nounwind uwtable
@@ -121,98 +121,76 @@ define internal void @_ZN12_GLOBAL__N_16walrusC2Ev(%"struct.<anonymous namespace
 entry:
   %this.addr = alloca %"struct.<anonymous namespace>::walrus"*, align 8
   store %"struct.<anonymous namespace>::walrus"* %this, %"struct.<anonymous namespace>::walrus"** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%"struct.<anonymous namespace>::walrus"** %this.addr}, metadata !74), !dbg !76
+  call void @llvm.dbg.declare(metadata !{%"struct.<anonymous namespace>::walrus"** %this.addr}, metadata !49), !dbg !51
   %this1 = load %"struct.<anonymous namespace>::walrus"** %this.addr
-  ret void, !dbg !76
+  ret void, !dbg !52
 }
 
 define internal void @_GLOBAL__I_a() section ".text.startup" {
 entry:
-  call void @__cxx_global_var_init(), !dbg !77
-  ret void, !dbg !77
+  call void @__cxx_global_var_init(), !dbg !53
+  ret void, !dbg !53
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!62}
+!llvm.module.flags = !{!42, !54}
+!llvm.ident = !{!43}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (trunk 187387) (llvm/trunk 187385)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !20, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/bar.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"bar.cpp", metadata !"/usr/local/google/home/echristo/tmp"}
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !20, metadata !37, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/bar.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"bar.cpp", metadata !"/tmp/dbginfo"}
 !2 = metadata !{i32 0}
-!3 = metadata !{metadata !4, metadata !8, metadata !9, metadata !18}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 6, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3foov, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/bar.cpp]
-!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null}
-!8 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"__cxx_global_var_init", metadata !"__cxx_global_var_init", metadata !"", i32 31, metadata !6, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @__cxx_global_var_init, null, null, metadata !2, i32 31} ; [ DW_TAG_subprogram ] [line 31] [local] [def] [__cxx_global_var_init]
-!9 = metadata !{i32 786478, metadata !1, metadata !10, metadata !"walrus", metadata !"walrus", metadata !"_ZN12_GLOBAL__N_16walrusC2Ev", i32 27, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%"struct.<anonymous namespace>::walrus"*)* @_ZN12_GLOBAL__N_16walrusC2Ev, null, metadata !16, metadata !2, i32 27} ; [ DW_TAG_subprogram ] [line 27] [local] [def] [walrus]
-!10 = metadata !{i32 786489, metadata !1, null, metadata !"", i32 25} ; [ DW_TAG_namespace ] [line 25]
-!11 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{null, metadata !13}
-!13 = metadata !{i32 786447, i32 0, i32 0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from walrus]
-!14 = metadata !{i32 786451, metadata !1, metadata !10, metadata !"walrus", i32 26, i64 8, i64 8, i32 0, i32 0, null, metadata !15, i32 0, null, null} ; [ DW_TAG_structure_type ] [walrus] [line 26, size 8, align 8, offset 0] [def] [from ]
-!15 = metadata !{metadata !16}
-!16 = metadata !{i32 786478, metadata !1, metadata !14, metadata !"walrus", metadata !"walrus", metadata !"", i32 27, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17, i32 27} ; [ DW_TAG_subprogram ] [line 27] [walrus]
-!17 = metadata !{i32 786468}
-!18 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"_GLOBAL__I_a", metadata !"_GLOBAL__I_a", metadata !"", i32 27, metadata !19, i1 true, i1 true, i32 0, i32 0, null, i32 64, i1 false, void ()* @_GLOBAL__I_a, null, null, metadata !2, i32 27} ; [ DW_TAG_subprogram ] [line 27] [local] [def] [_GLOBAL__I_a]
-!19 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!20 = metadata !{metadata !21, metadata !29, metadata !43, metadata !44}
-!21 = metadata !{i32 786484, i32 0, null, metadata !"b", metadata !"b", metadata !"", metadata !5, i32 4, metadata !22, i32 0, i32 1, %struct.bar* @b, null} ; [ DW_TAG_variable ] [b] [line 4] [def]
-!22 = metadata !{i32 786451, metadata !1, null, metadata !"bar", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !23, i32 0, null, null} ; [ DW_TAG_structure_type ] [bar] [line 1, size 8, align 8, offset 0] [def] [from ]
-!23 = metadata !{metadata !24}
-!24 = metadata !{i32 786478, metadata !1, metadata !22, metadata !"bar", metadata !"bar", metadata !"", i32 1, metadata !25, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !28, i32 1} ; [ DW_TAG_subprogram ] [line 1] [bar]
-!25 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !26, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!26 = metadata !{null, metadata !27}
-!27 = metadata !{i32 786447, i32 0, i32 0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !22} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from bar]
-!28 = metadata !{i32 786468}
-!29 = metadata !{i32 786484, i32 0, metadata !30, metadata !"animal", metadata !"animal", metadata !"_ZN7echidna8capybara8mongoose6animalE", metadata !5, i32 20, metadata !33, i32 0, i32 1, %"class.echidna::capybara::mongoose::fluffy"* @_ZN7echidna8capybara8mongoose6animalE, null} ; [ DW_TAG_variable ] [animal] [line 20] [def]
-!30 = metadata !{i32 786489, metadata !1, metadata !31, metadata !"mongoose", i32 14} ; [ DW_TAG_namespace ] [mongoose] [line 14]
-!31 = metadata !{i32 786489, metadata !1, metadata !32, metadata !"capybara", i32 13} ; [ DW_TAG_namespace ] [capybara] [line 13]
-!32 = metadata !{i32 786489, metadata !1, null, metadata !"echidna", i32 12} ; [ DW_TAG_namespace ] [echidna] [line 12]
-!33 = metadata !{i32 786434, metadata !1, metadata !30, metadata !"fluffy", i32 15, i64 64, i64 32, i32 0, i32 0, null, metadata !34, i32 0, null, null} ; [ DW_TAG_class_type ] [fluffy] [line 15, size 64, align 32, offset 0] [def] [from ]
-!34 = metadata !{metadata !35, metadata !37, metadata !38}
-!35 = metadata !{i32 786445, metadata !1, metadata !33, metadata !"a", i32 16, i64 32, i64 32, i64 0, i32 1, metadata !36} ; [ DW_TAG_member ] [a] [line 16, size 32, align 32, offset 0] [private] [from int]
-!36 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!37 = metadata !{i32 786445, metadata !1, metadata !33, metadata !"b", i32 17, i64 32, i64 32, i64 32, i32 1, metadata !36} ; [ DW_TAG_member ] [b] [line 17, size 32, align 32, offset 32] [private] [from int]
-!38 = metadata !{i32 786478, metadata !1, metadata !33, metadata !"fluffy", metadata !"fluffy", metadata !"", i32 15, metadata !39, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !42, i32 15} ; [ DW_TAG_subprogram ] [line 15] [fluffy]
-!39 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !40, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!40 = metadata !{null, metadata !41}
-!41 = metadata !{i32 786447, i32 0, i32 0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !33} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from fluffy]
-!42 = metadata !{i32 786468}
-!43 = metadata !{i32 786484, i32 0, null, metadata !"w", metadata !"w", metadata !"", metadata !5, i32 31, metadata !14, i32 1, i32 1, %"struct.<anonymous namespace>::walrus"* @w, null} ; [ DW_TAG_variable ] [w] [line 31] [local] [def]
-!44 = metadata !{i32 786484, i32 0, null, metadata !"wom", metadata !"wom", metadata !"", metadata !5, i32 40, metadata !45, i32 0, i32 1, %struct.wombat* @wom, null} ; [ DW_TAG_variable ] [wom] [line 40] [def]
-!45 = metadata !{i32 786451, metadata !1, null, metadata !"wombat", i32 33, i64 64, i64 32, i32 0, i32 0, null, metadata !46, i32 0, null, null} ; [ DW_TAG_structure_type ] [wombat] [line 33, size 64, align 32, offset 0] [def] [from ]
-!46 = metadata !{metadata !47, metadata !57}
-!47 = metadata !{i32 786445, metadata !1, metadata !45, metadata !"a_b", i32 37, i64 64, i64 32, i64 0, i32 0, metadata !48} ; [ DW_TAG_member ] [a_b] [line 37, size 64, align 32, offset 0] [from ]
-!48 = metadata !{i32 786451, metadata !1, metadata !45, metadata !"", i32 34, i64 64, i64 32, i32 0, i32 0, null, metadata !49, i32 0, null, null} ; [ DW_TAG_structure_type ] [line 34, size 64, align 32, offset 0] [def] [from ]
-!49 = metadata !{metadata !50, metadata !51, metadata !52}
-!50 = metadata !{i32 786445, metadata !1, metadata !48, metadata !"a", i32 35, i64 32, i64 32, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ] [a] [line 35, size 32, align 32, offset 0] [from int]
-!51 = metadata !{i32 786445, metadata !1, metadata !48, metadata !"b", i32 36, i64 32, i64 32, i64 32, i32 0, metadata !36} ; [ DW_TAG_member ] [b] [line 36, size 32, align 32, offset 32] [from int]
-!52 = metadata !{i32 786478, metadata !1, metadata !48, metadata !"", metadata !"", metadata !"", i32 34, metadata !53, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !56, i32 34} ; [ DW_TAG_subprogram ] [line 34]
-!53 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !54, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!54 = metadata !{null, metadata !55}
-!55 = metadata !{i32 786447, i32 0, i32 0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !48} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from ]
-!56 = metadata !{i32 786468}
-!57 = metadata !{i32 786478, metadata !1, metadata !45, metadata !"wombat", metadata !"wombat", metadata !"", i32 33, metadata !58, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !61, i32 33} ; [ DW_TAG_subprogram ] [line 33] [wombat]
-!58 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !59, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!59 = metadata !{null, metadata !60}
-!60 = metadata !{i32 786447, i32 0, i32 0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !45} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from wombat]
-!61 = metadata !{i32 786468}
-!62 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
-!63 = metadata !{i32 786688, metadata !4, metadata !"b", metadata !5, i32 9, metadata !64, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [b] [line 9]
-!64 = metadata !{i32 786451, metadata !1, metadata !4, metadata !"baz", i32 7, i64 8, i64 8, i32 0, i32 0, null, metadata !65, i32 0, null, null} ; [ DW_TAG_structure_type ] [baz] [line 7, size 8, align 8, offset 0] [def] [from ]
-!65 = metadata !{metadata !66}
-!66 = metadata !{i32 786478, metadata !1, metadata !64, metadata !"baz", metadata !"baz", metadata !"", i32 7, metadata !67, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !70, i32 7} ; [ DW_TAG_subprogram ] [line 7] [baz]
-!67 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !68, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!68 = metadata !{null, metadata !69}
-!69 = metadata !{i32 786447, i32 0, i32 0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !64} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from baz]
-!70 = metadata !{i32 786468}
-!71 = metadata !{i32 9, i32 0, metadata !4, null}
-!72 = metadata !{i32 10, i32 0, metadata !4, null}
-!73 = metadata !{i32 31, i32 0, metadata !8, null}
-!74 = metadata !{i32 786689, metadata !9, metadata !"this", metadata !5, i32 16777243, metadata !75, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 27]
-!75 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from walrus]
-!76 = metadata !{i32 27, i32 0, metadata !9, null}
-!77 = metadata !{i32 27, i32 0, metadata !18, null}
+!3 = metadata !{metadata !4, metadata !5, metadata !13, metadata !16}
+!4 = metadata !{i32 786451, metadata !1, null, metadata !"bar", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, metadata !"_ZTS3bar"} ; [ DW_TAG_structure_type ] [bar] [line 1, size 8, align 8, offset 0] [def] [from ]
+!5 = metadata !{i32 786434, metadata !1, metadata !6, metadata !"fluffy", i32 13, i64 64, i64 32, i32 0, i32 0, null, metadata !9, i32 0, null, null, metadata !"_ZTSN7echidna8capybara8mongoose6fluffyE"} ; [ DW_TAG_class_type ] [fluffy] [line 13, size 64, align 32, offset 0] [def] [from ]
+!6 = metadata !{i32 786489, metadata !1, metadata !7, metadata !"mongoose", i32 12} ; [ DW_TAG_namespace ] [mongoose] [line 12]
+!7 = metadata !{i32 786489, metadata !1, metadata !8, metadata !"capybara", i32 11} ; [ DW_TAG_namespace ] [capybara] [line 11]
+!8 = metadata !{i32 786489, metadata !1, null, metadata !"echidna", i32 10} ; [ DW_TAG_namespace ] [echidna] [line 10]
+!9 = metadata !{metadata !10, metadata !12}
+!10 = metadata !{i32 786445, metadata !1, metadata !"_ZTSN7echidna8capybara8mongoose6fluffyE", metadata !"a", i32 14, i64 32, i64 32, i64 0, i32 1, metadata !11} ; [ DW_TAG_member ] [a] [line 14, size 32, align 32, offset 0] [private] [from int]
+!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = metadata !{i32 786445, metadata !1, metadata !"_ZTSN7echidna8capybara8mongoose6fluffyE", metadata !"b", i32 15, i64 32, i64 32, i64 32, i32 1, metadata !11} ; [ DW_TAG_member ] [b] [line 15, size 32, align 32, offset 32] [private] [from int]
+!13 = metadata !{i32 786451, metadata !1, null, metadata !"wombat", i32 31, i64 64, i64 32, i32 0, i32 0, null, metadata !14, i32 0, null, null, metadata !"_ZTS6wombat"} ; [ DW_TAG_structure_type ] [wombat] [line 31, size 64, align 32, offset 0] [def] [from ]
+!14 = metadata !{metadata !15}
+!15 = metadata !{i32 786445, metadata !1, metadata !"_ZTS6wombat", metadata !"a_b", i32 35, i64 64, i64 32, i64 0, i32 0, metadata !"_ZTSN6wombatUt_E"} ; [ DW_TAG_member ] [a_b] [line 35, size 64, align 32, offset 0] [from _ZTSN6wombatUt_E]
+!16 = metadata !{i32 786451, metadata !1, metadata !"_ZTS6wombat", metadata !"", i32 32, i64 64, i64 32, i32 0, i32 0, null, metadata !17, i32 0, null, null, metadata !"_ZTSN6wombatUt_E"} ; [ DW_TAG_structure_type ] [line 32, size 64, align 32, offset 0] [def] [from ]
+!17 = metadata !{metadata !18, metadata !19}
+!18 = metadata !{i32 786445, metadata !1, metadata !"_ZTSN6wombatUt_E", metadata !"a", i32 33, i64 32, i64 32, i64 0, i32 0, metadata !11} ; [ DW_TAG_member ] [a] [line 33, size 32, align 32, offset 0] [from int]
+!19 = metadata !{i32 786445, metadata !1, metadata !"_ZTSN6wombatUt_E", metadata !"b", i32 34, i64 32, i64 32, i64 32, i32 0, metadata !11} ; [ DW_TAG_member ] [b] [line 34, size 32, align 32, offset 32] [from int]
+!20 = metadata !{metadata !21, metadata !25, metadata !26, metadata !35}
+!21 = metadata !{i32 786478, metadata !1, metadata !22, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 5, metadata !23, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3foov, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [foo]
+!22 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/bar.cpp]
+!23 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !24, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!24 = metadata !{null}
+!25 = metadata !{i32 786478, metadata !1, metadata !22, metadata !"__cxx_global_var_init", metadata !"__cxx_global_var_init", metadata !"", i32 29, metadata !23, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @__cxx_global_var_init, null, null, metadata !2, i32 29} ; [ DW_TAG_subprogram ] [line 29] [local] [def] [__cxx_global_var_init]
+!26 = metadata !{i32 786478, metadata !1, metadata !27, metadata !"walrus", metadata !"walrus", metadata !"_ZN12_GLOBAL__N_16walrusC2Ev", i32 25, metadata !31, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%"struct.<anonymous namespace>::walrus"*)* @_ZN12_GLOBAL__N_16walrusC2Ev, null, metadata !30, metadata !2, i32 25} ; [ DW_TAG_subprogram ] [line 25] [local] [def] [walrus]
+!27 = metadata !{i32 786451, metadata !1, metadata !28, metadata !"walrus", i32 24, i64 8, i64 8, i32 0, i32 0, null, metadata !29, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [walrus] [line 24, size 8, align 8, offset 0] [def] [from ]
+!28 = metadata !{i32 786489, metadata !1, null, metadata !"", i32 23} ; [ DW_TAG_namespace ] [line 23]
+!29 = metadata !{metadata !30}
+!30 = metadata !{i32 786478, metadata !1, metadata !27, metadata !"walrus", metadata !"walrus", metadata !"", i32 25, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !34, i32 25} ; [ DW_TAG_subprogram ] [line 25] [walrus]
+!31 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !32, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!32 = metadata !{null, metadata !33}
+!33 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !27} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from walrus]
+!34 = metadata !{i32 786468}
+!35 = metadata !{i32 786478, metadata !1, metadata !22, metadata !"", metadata !"", metadata !"_GLOBAL__I_a", i32 25, metadata !36, i1 true, i1 true, i32 0, i32 0, null, i32 64, i1 false, void ()* @_GLOBAL__I_a, null, null, metadata !2, i32 25} ; [ DW_TAG_subprogram ] [line 25] [local] [def]
+!36 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!37 = metadata !{metadata !38, metadata !39, metadata !40, metadata !41}
+!38 = metadata !{i32 786484, i32 0, null, metadata !"b", metadata !"b", metadata !"", metadata !22, i32 3, metadata !4, i32 0, i32 1, %struct.bar* @b, null} ; [ DW_TAG_variable ] [b] [line 3] [def]
+!39 = metadata !{i32 786484, i32 0, metadata !6, metadata !"animal", metadata !"animal", metadata !"_ZN7echidna8capybara8mongoose6animalE", metadata !22, i32 18, metadata !5, i32 0, i32 1, %"class.echidna::capybara::mongoose::fluffy"* @_ZN7echidna8capybara8mongoose6animalE, null} ; [ DW_TAG_variable ] [animal] [line 18] [def]
+!40 = metadata !{i32 786484, i32 0, null, metadata !"w", metadata !"w", metadata !"", metadata !22, i32 29, metadata !27, i32 1, i32 1, %"struct.<anonymous namespace>::walrus"* @w, null} ; [ DW_TAG_variable ] [w] [line 29] [local] [def]
+!41 = metadata !{i32 786484, i32 0, null, metadata !"wom", metadata !"wom", metadata !"", metadata !22, i32 38, metadata !13, i32 0, i32 1, %struct.wombat* @wom, null} ; [ DW_TAG_variable ] [wom] [line 38] [def]
+!42 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!43 = metadata !{metadata !"clang version 3.4 "}
+!44 = metadata !{i32 786688, metadata !21, metadata !"b", metadata !22, i32 7, metadata !45, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [b] [line 7]
+!45 = metadata !{i32 786451, metadata !1, metadata !21, metadata !"baz", i32 6, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [baz] [line 6, size 8, align 8, offset 0] [def] [from ]
+!46 = metadata !{i32 7, i32 0, metadata !21, null}
+!47 = metadata !{i32 8, i32 0, metadata !21, null} ; [ DW_TAG_imported_declaration ]
+!48 = metadata !{i32 29, i32 0, metadata !25, null}
+!49 = metadata !{i32 786689, metadata !26, metadata !"this", null, i32 16777216, metadata !50, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!50 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !27} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from walrus]
+!51 = metadata !{i32 0, i32 0, metadata !26, null}
+!52 = metadata !{i32 25, i32 0, metadata !26, null}
+!53 = metadata !{i32 25, i32 0, metadata !35, null}
+!54 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/gnu-public-names-empty.ll b/test/DebugInfo/X86/gnu-public-names-empty.ll
new file mode 100644
index 0000000..8b0309c
--- /dev/null
+++ b/test/DebugInfo/X86/gnu-public-names-empty.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=x86_64-pc-linux-gnu -generate-gnu-dwarf-pub-sections -filetype=obj < %s | llvm-dwarfdump - | FileCheck %s
+
+; Generated from:
+
+; static int a __attribute__((section("a")));
+
+; Check that the attributes in the compile unit both point to a correct
+; location, even when nothing is exported.
+; CHECK: DW_AT_GNU_pubnames [DW_FORM_sec_offset]   (0x00000000)
+; CHECK: DW_AT_GNU_pubtypes [DW_FORM_sec_offset]   (0x00000000)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 191846) (llvm/trunk 191866)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"foo.c", metadata !"/usr/local/google/home/echristo/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!4 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/gnu-public-names.ll b/test/DebugInfo/X86/gnu-public-names.ll
new file mode 100644
index 0000000..7ad5032
--- /dev/null
+++ b/test/DebugInfo/X86/gnu-public-names.ll
@@ -0,0 +1,219 @@
+; RUN: llc -mtriple=x86_64-pc-linux-gnu -generate-gnu-dwarf-pub-sections < %s | FileCheck -check-prefix=ASM %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnu -generate-gnu-dwarf-pub-sections -filetype=obj < %s | llvm-dwarfdump - | FileCheck %s
+; ModuleID = 'dwarf-public-names.cpp'
+;
+; Generated from:
+;
+; struct C {
+;   void member_function();
+;   static int static_member_function();
+;   static int static_member_variable;
+; };
+;
+; int C::static_member_variable = 0;
+;
+; void C::member_function() {
+;   static_member_variable = 0;
+; }
+;
+; int C::static_member_function() {
+;   return static_member_variable;
+; }
+;
+; C global_variable;
+;
+; int global_function() {
+;   return -1;
+; }
+;
+; namespace ns {
+;   void global_namespace_function() {
+;     global_variable.member_function();
+;   }
+;   int global_namespace_variable = 1;
+;   struct D {
+;     int A;
+;   } d;
+; }
+
+; ASM: .section        .debug_gnu_pubnames
+; ASM: .byte   32                      # Kind: VARIABLE, EXTERNAL
+; ASM-NEXT: .asciz  "global_variable"       # External Name
+
+; ASM: .section        .debug_gnu_pubtypes
+; ASM: .byte   16                      # Kind: TYPE, EXTERNAL
+; ASM-NEXT: .asciz  "C"                     # External Name
+
+; CHECK: .debug_info contents:
+; CHECK: DW_AT_GNU_pubnames [DW_FORM_sec_offset]   (0x00000000)
+; CHECK: DW_AT_GNU_pubtypes [DW_FORM_sec_offset]   (0x00000000)
+
+; CHECK: [[C:[0-9a-f]+]]: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name {{.*}} "C"
+
+; CHECK: [[STATIC_MEM_DECL:[0-9a-f]+]]: DW_TAG_member
+; CHECK-NEXT: DW_AT_name {{.*}} "static_member_variable"
+
+; CHECK: [[MEM_FUNC_DECL:[0-9a-f]+]]: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_MIPS_linkage_name
+; CHECK-NEXT: DW_AT_name {{.*}} "member_function"
+
+; CHECK: [[STATIC_MEM_FUNC_DECL:[0-9a-f]+]]: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_MIPS_linkage_name
+; CHECK-NEXT: DW_AT_name {{.*}} "static_member_function"
+
+; CHECK: [[INT:[0-9a-f]+]]: DW_TAG_base_type
+; CHECK-NEXT: DW_AT_name {{.*}} "int"
+
+; CHECK: [[STATIC_MEM_VAR:[0-9a-f]+]]: DW_TAG_variable
+; CHECK-NEXT: DW_AT_specification {{.*}}[[STATIC_MEM_DECL]]
+
+; CHECK: [[GLOB_VAR:[0-9a-f]+]]: DW_TAG_variable
+; CHECK-NEXT: DW_AT_name {{.*}} "global_variable"
+
+; CHECK: [[NS:[0-9a-f]+]]: DW_TAG_namespace
+; CHECK-NEXT: DW_AT_name {{.*}} "ns"
+
+; CHECK: [[GLOB_NS_VAR_DECL:[0-9a-f]+]]: DW_TAG_variable
+; CHECK-NEXT: DW_AT_name {{.*}} "global_namespace_variable"
+
+; CHECK: [[D_VAR_DECL:[0-9a-f]+]]: DW_TAG_variable
+; CHECK-NEXT: DW_AT_name {{.*}} "d"
+
+; CHECK: [[D:[0-9a-f]+]]: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name {{.*}} "D"
+
+; CHECK: [[GLOB_NS_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_MIPS_linkage_name
+; CHECK-NEXT: DW_AT_name {{.*}} "global_namespace_function"
+
+; CHECK: [[GLOB_NS_VAR:[0-9a-f]+]]: DW_TAG_variable
+; CHECK-NEXT: DW_AT_specification {{.*}}[[GLOB_NS_VAR_DECL]]
+
+; CHECK: [[D_VAR:[0-9a-f]+]]: DW_TAG_variable
+; CHECK-NEXT: DW_AT_specification {{.*}}[[D_VAR_DECL]]
+
+; CHECK: [[MEM_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_specification {{.*}}[[MEM_FUNC_DECL]]
+
+; CHECK: [[STATIC_MEM_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_specification {{.*}}[[STATIC_MEM_FUNC_DECL]]
+
+; CHECK: [[GLOBAL_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_MIPS_linkage_name
+; CHECK-NEXT: DW_AT_name {{.*}} "global_function"
+
+; CHECK-LABEL: .debug_gnu_pubnames contents:
+; CHECK-NEXT: length = 0x000000e7 version = 0x0002 unit_offset = 0x00000000 unit_size = 0x0000017b
+; CHECK-NEXT: Offset     Linkage  Kind     Name
+; CHECK-DAG:  [[GLOBAL_FUNC]] EXTERNAL FUNCTION "global_function"
+; CHECK-DAG:  [[NS]] EXTERNAL TYPE     "ns"
+; CHECK-DAG:  [[MEM_FUNC]] EXTERNAL FUNCTION "C::member_function"
+; CHECK-DAG:  [[GLOB_VAR]] EXTERNAL VARIABLE "global_variable"
+; CHECK-DAG:  [[GLOB_NS_VAR]] EXTERNAL VARIABLE "ns::global_namespace_variable"
+; CHECK-DAG:  [[GLOB_NS_FUNC]] EXTERNAL FUNCTION "ns::global_namespace_function"
+; CHECK-DAG:  [[D_VAR]] EXTERNAL VARIABLE "ns::d"
+; CHECK-DAG:  [[STATIC_MEM_VAR]] EXTERNAL VARIABLE "C::static_member_variable"
+; CHECK-DAG:  [[STATIC_MEM_FUNC]] EXTERNAL FUNCTION "C::static_member_function"
+
+
+; CHECK-LABEL: debug_gnu_pubtypes contents:
+; CHECK: Offset     Linkage  Kind     Name
+; CHECK-DAG:  [[C]] EXTERNAL TYPE     "C"
+; CHECK-DAG:  [[D]] EXTERNAL TYPE     "ns::D"
+; CHECK-DAG:  [[INT]] STATIC   TYPE     "int"
+
+%struct.C = type { i8 }
+%"struct.ns::D" = type { i32 }
+
+@_ZN1C22static_member_variableE = global i32 0, align 4
+@global_variable = global %struct.C zeroinitializer, align 1
+@_ZN2ns25global_namespace_variableE = global i32 1, align 4
+@_ZN2ns1dE = global %"struct.ns::D" zeroinitializer, align 4
+
+; Function Attrs: nounwind uwtable
+define void @_ZN1C15member_functionEv(%struct.C* %this) #0 align 2 {
+entry:
+  %this.addr = alloca %struct.C*, align 8
+  store %struct.C* %this, %struct.C** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr}, metadata !36), !dbg !38
+  %this1 = load %struct.C** %this.addr
+  store i32 0, i32* @_ZN1C22static_member_variableE, align 4, !dbg !39
+  ret void, !dbg !39
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @_ZN1C22static_member_functionEv() #0 align 2 {
+entry:
+  %0 = load i32* @_ZN1C22static_member_variableE, align 4, !dbg !40
+  ret i32 %0, !dbg !40
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z15global_functionv() #0 {
+entry:
+  ret i32 -1, !dbg !41
+}
+
+; Function Attrs: nounwind uwtable
+define void @_ZN2ns25global_namespace_functionEv() #0 {
+entry:
+  call void @_ZN1C15member_functionEv(%struct.C* @global_variable), !dbg !42
+  ret void, !dbg !42
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!34, !43}
+!llvm.ident = !{!35}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (trunk 192862) (llvm/trunk 192861)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !21, metadata !29, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/pubnames.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"pubnames.cpp", metadata !"/usr/local/google/home/echristo/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !17}
+!4 = metadata !{i32 786451, metadata !1, null, metadata !"C", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !5, i32 0, null, null, metadata !"_ZTS1C"} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [def] [from ]
+!5 = metadata !{metadata !6, metadata !8, metadata !13}
+!6 = metadata !{i32 786445, metadata !1, metadata !"_ZTS1C", metadata !"static_member_variable", i32 4, i64 0, i64 0, i64 0, i32 4096, metadata !7, null} ; [ DW_TAG_member ] [static_member_variable] [line 4, size 0, align 0, offset 0] [static] [from int]
+!7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1C", metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 2, metadata !9, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !12, i32 2} ; [ DW_TAG_subprogram ] [line 2] [member_function]
+!9 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = metadata !{null, metadata !11}
+!11 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1C]
+!12 = metadata !{i32 786468}
+!13 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1C", metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 3, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !16, i32 3} ; [ DW_TAG_subprogram ] [line 3] [static_member_function]
+!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{metadata !7}
+!16 = metadata !{i32 786468}
+!17 = metadata !{i32 786451, metadata !1, metadata !18, metadata !"D", i32 21, i64 32, i64 32, i32 0, i32 0, null, metadata !19, i32 0, null, null, metadata !"_ZTSN2ns1DE"} ; [ DW_TAG_structure_type ] [D] [line 21, size 32, align 32, offset 0] [def] [from ]
+!18 = metadata !{i32 786489, metadata !1, null, metadata !"ns", i32 17} ; [ DW_TAG_namespace ] [ns] [line 17]
+!19 = metadata !{metadata !20}
+!20 = metadata !{i32 786445, metadata !1, metadata !"_ZTSN2ns1DE", metadata !"A", i32 22, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ] [A] [line 22, size 32, align 32, offset 0] [from int]
+!21 = metadata !{metadata !22, metadata !23, metadata !24, metadata !26}
+!22 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1C", metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 9, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1C15member_functionEv, null, metadata !8, metadata !2, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [member_function]
+!23 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1C", metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_ZN1C22static_member_functionEv, null, metadata !13, metadata !2, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [static_member_function]
+!24 = metadata !{i32 786478, metadata !1, metadata !25, metadata !"global_function", metadata !"global_function", metadata !"_Z15global_functionv", i32 15, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z15global_functionv, null, null, metadata !2, i32 15} ; [ DW_TAG_subprogram ] [line 15] [def] [global_function]
+!25 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/pubnames.cpp]
+!26 = metadata !{i32 786478, metadata !1, metadata !18, metadata !"global_namespace_function", metadata !"global_namespace_function", metadata !"_ZN2ns25global_namespace_functionEv", i32 18, metadata !27, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN2ns25global_namespace_functionEv, null, null, metadata !2, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [global_namespace_function]
+!27 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !28, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!28 = metadata !{null}
+!29 = metadata !{metadata !30, metadata !31, metadata !32, metadata !33}
+!30 = metadata !{i32 786484, i32 0, metadata !4, metadata !"static_member_variable", metadata !"static_member_variable", metadata !"_ZN1C22static_member_variableE", metadata !25, i32 7, metadata !7, i32 0, i32 1, i32* @_ZN1C22static_member_variableE, metadata !6} ; [ DW_TAG_variable ] [static_member_variable] [line 7] [def]
+!31 = metadata !{i32 786484, i32 0, null, metadata !"global_variable", metadata !"global_variable", metadata !"", metadata !25, i32 13, metadata !4, i32 0, i32 1, %struct.C* @global_variable, null} ; [ DW_TAG_variable ] [global_variable] [line 13] [def]
+!32 = metadata !{i32 786484, i32 0, metadata !18, metadata !"global_namespace_variable", metadata !"global_namespace_variable", metadata !"_ZN2ns25global_namespace_variableE", metadata !25, i32 19, metadata !7, i32 0, i32 1, i32* @_ZN2ns25global_namespace_variableE, null} ; [ DW_TAG_variable ] [global_namespace_variable] [line 19] [def]
+!33 = metadata !{i32 786484, i32 0, metadata !18, metadata !"d", metadata !"d", metadata !"_ZN2ns1dE", metadata !25, i32 23, metadata !17, i32 0, i32 1, %"struct.ns::D"* @_ZN2ns1dE, null} ; [ DW_TAG_variable ] [d] [line 23] [def]
+!34 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!35 = metadata !{metadata !"clang version 3.4 (trunk 192862) (llvm/trunk 192861)"}
+!36 = metadata !{i32 786689, metadata !22, metadata !"this", null, i32 16777216, metadata !37, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!37 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1C]
+!38 = metadata !{i32 0, i32 0, metadata !22, null}
+!39 = metadata !{i32 9, i32 0, metadata !22, null}
+!40 = metadata !{i32 11, i32 0, metadata !23, null}
+!41 = metadata !{i32 15, i32 0, metadata !24, null}
+!42 = metadata !{i32 18, i32 0, metadata !26, null}
+
+!43 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/instcombine-instrinsics.ll b/test/DebugInfo/X86/instcombine-instrinsics.ll
index 886b0eb..41dd09f 100644
--- a/test/DebugInfo/X86/instcombine-instrinsics.ll
+++ b/test/DebugInfo/X86/instcombine-instrinsics.ll
@@ -60,6 +60,7 @@ declare i32 @put(i64, i64*, i64, %struct.i24*) nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!73}
 
 !0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.3 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !48, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !"i1", metadata !""}
@@ -67,10 +68,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !3 = metadata !{metadata !4, metadata !21, metadata !33, metadata !47}
 !4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"i2", metadata !"i2", metadata !"", i32 31, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, %struct.i3* (i64)* @barz, null, null, metadata !16, i32 32} ; [ DW_TAG_subprogram ] [line 31]  [scope 32]
 !5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ]
-!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{metadata !8, metadata !13}
 !8 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from i3]
-!9 = metadata !{i32 786451, metadata !1, null, metadata !"i3", i32 25, i64 32, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null} ; [ DW_TAG_structure_type ]  [line 25, size 32, align 32, offset 0] [from ]
+!9 = metadata !{i32 786451, metadata !1, null, metadata !"i3", i32 25, i64 32, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [i3] [line 25, size 32, align 32, offset 0] [def] [from ]
 !10 = metadata !{metadata !11}
 !11 = metadata !{i32 786445, metadata !1, metadata !9, metadata !"i4", i32 26, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ]  [line 26, size 32, align 32, offset 0] [from i5]
 !12 = metadata !{i32 786468, null, null, metadata !"i5", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]  [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
@@ -79,12 +80,12 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !15 = metadata !{i32 786468, null, null, metadata !"i8", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]  [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
 !16 = metadata !{}
 !21 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"i13", metadata !"i13", metadata !"", i32 42, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @init, null, null, metadata !24, i32 43} ; [ DW_TAG_subprogram ] [line 42]  [scope 43]
-!22 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!22 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !34, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !23 = metadata !{null}
 !24 = metadata !{metadata !25}
 !25 = metadata !{i32 786688, metadata !21, metadata !"i14", metadata !5, i32 45, metadata !27, i32 0, i32 0} ; [ DW_TAG_auto_variable ]  [line 45]
 !27 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !28} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from i14]
-!28 = metadata !{i32 786451, metadata !1, null, metadata !"i14", i32 16, i64 32, i64 32, i32 0, i32 0, null, metadata !29, i32 0, null, null} ; [ DW_TAG_structure_type ]  [line 16, size 32, align 32, offset 0] [from ]
+!28 = metadata !{i32 786451, metadata !1, null, metadata !"i14", i32 16, i64 32, i64 32, i32 0, i32 0, null, metadata !29, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [i14] [line 16, size 32, align 32, offset 0] [def] [from ]
 !29 = metadata !{metadata !30}
 !30 = metadata !{i32 786445, metadata !1, metadata !28, metadata !"i16", i32 17, i64 32, i64 32, i64 0, i32 0, metadata !31} ; [ DW_TAG_member ]  [line 17, size 32, align 32, offset 0] [from i17]
 !31 = metadata !{i32 786454, metadata !1, null, metadata !"i17", i32 7, i64 0, i64 0, i64 0, i32 0, metadata !32} ; [ DW_TAG_typedef ]  [line 7, size 0, align 0, offset 0] [from int]
@@ -98,3 +99,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !52 = metadata !{i64 0}
 !55 = metadata !{%struct.i3* null}
 !72 = metadata !{%struct.i24* null}
+!73 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/line-info.ll b/test/DebugInfo/X86/line-info.ll
index fd813b3..46daccf 100644
--- a/test/DebugInfo/X86/line-info.ll
+++ b/test/DebugInfo/X86/line-info.ll
@@ -36,6 +36,7 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!19}
 
 !0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2,  metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/list0.c] [DW_LANG_C99]
 !1 = metadata !{metadata !"list0.c", metadata !"/usr/local/google/home/blaikie/dev/scratch"}
@@ -44,15 +45,16 @@ attributes #1 = { nounwind readnone }
 !4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
 !5 = metadata !{metadata !"./list0.h", metadata !"/usr/local/google/home/blaikie/dev/scratch"}
 !6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/./list0.h]
-!7 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"main", metadata !"main", metadata !"", i32 2, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [main]
 !11 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/list0.c]
-!12 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !13 = metadata !{metadata !9}
 !14 = metadata !{i32 786689, metadata !4, metadata !"x", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 1]
 !15 = metadata !{i32 1, i32 0, metadata !4, null}
 !16 = metadata !{i32 2, i32 0, metadata !4, null}
 !17 = metadata !{i32 3, i32 0, metadata !18, null}
 !18 = metadata !{i32 786443, metadata !11, metadata !10} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/blaikie/dev/scratch/list0.c]
+!19 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/linkage-name.ll b/test/DebugInfo/X86/linkage-name.ll
index 1ed7b18..3d11667 100644
--- a/test/DebugInfo/X86/linkage-name.ll
+++ b/test/DebugInfo/X86/linkage-name.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-macosx -darwin-gdb-compat=Disable %s -o %t -filetype=obj
+; RUN: llc -mtriple=x86_64-macosx %s -o %t -filetype=obj
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; CHECK: DW_TAG_subprogram [9] *
@@ -25,17 +25,18 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!29}
 
 !0 = metadata !{i32 786449, metadata !28, i32 4, metadata !"clang version 3.1 (trunk 152691) (llvm/trunk 152692)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18,  metadata !18, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !6, null, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%class.A*, i32)* @_ZN1A1aEi, null, metadata !13, metadata !16, i32 5} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !10, metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786434, metadata !28, null, metadata !"A", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !12, i32 0, null, null} ; [ DW_TAG_class_type ]
+!10 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786434, metadata !28, null, metadata !"A", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 8, align 8, offset 0] [def] [from ]
 !12 = metadata !{metadata !13}
 !13 = metadata !{i32 786478, metadata !6, metadata !11, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !14, i32 0} ; [ DW_TAG_subprogram ]
 !14 = metadata !{metadata !15}
@@ -52,3 +53,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !26 = metadata !{i32 6, i32 4, metadata !27, null}
 !27 = metadata !{i32 786443, metadata !6, metadata !5, i32 5, i32 17, i32 0} ; [ DW_TAG_lexical_block ]
 !28 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo"}
+!29 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/lit.local.cfg b/test/DebugInfo/X86/lit.local.cfg
index 60d66ea..19840aa 100644
--- a/test/DebugInfo/X86/lit.local.cfg
+++ b/test/DebugInfo/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/DebugInfo/X86/low-pc-cu.ll b/test/DebugInfo/X86/low-pc-cu.ll
index f7e1aae..922ae8d 100644
--- a/test/DebugInfo/X86/low-pc-cu.ll
+++ b/test/DebugInfo/X86/low-pc-cu.ll
@@ -13,13 +13,14 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!16}
 
 !0 = metadata !{i32 786449, metadata !15, i32 4, metadata !"clang version 3.1 (trunk 153454) (llvm/trunk 153471)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !12}
-!5 = metadata !{i32 786478, metadata !6, i32 0, metadata !"q", metadata !"q", metadata !"_Z1qv", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z1qv, null, null, metadata !10, i32 0} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !6, null, metadata !"q", metadata !"q", metadata !"_Z1qv", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z1qv, null, null, metadata !10, i32 0} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 0] [q]
 !6 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786468, metadata !15, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{metadata !11}
@@ -28,3 +29,4 @@ entry:
 !13 = metadata !{i32 7, i32 1, metadata !14, null}
 !14 = metadata !{i32 786443, metadata !5, i32 5, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
 !15 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo/tmp"}
+!16 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/misched-dbg-value.ll b/test/DebugInfo/X86/misched-dbg-value.ll
index 9aa362f..cfb0667 100644
--- a/test/DebugInfo/X86/misched-dbg-value.ll
+++ b/test/DebugInfo/X86/misched-dbg-value.ll
@@ -88,10 +88,11 @@ attributes #0 = { nounwind optsize ssp uwtable }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!83}
 
 !0 = metadata !{i32 786449, metadata !82, i32 12, metadata !"clang version 3.3 (trunk 175015)", i1 true, metadata !"", i32 0, metadata !1, metadata !10, metadata !11, metadata !29,  metadata !29, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/manmanren/test-Nov/rdar_13183203/test2/dry.c] [DW_LANG_C99]
 !1 = metadata !{metadata !2}
-!2 = metadata !{i32 786436, metadata !82, null, metadata !"", i32 128, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] [line 128, size 32, align 32, offset 0] [from ]
+!2 = metadata !{i32 786436, metadata !82, null, metadata !"", i32 128, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [line 128, size 32, align 32, offset 0] [def] [from ]
 !3 = metadata !{i32 786473, metadata !82} ; [ DW_TAG_file_type ]
 !4 = metadata !{metadata !5, metadata !6, metadata !7, metadata !8, metadata !9}
 !5 = metadata !{i32 786472, metadata !"Ident1", i64 0} ; [ DW_TAG_enumerator ] [Ident1 :: 0]
@@ -102,12 +103,12 @@ attributes #1 = { nounwind readnone }
 !10 = metadata !{i32 0}
 !11 = metadata !{metadata !12}
 !12 = metadata !{i32 786478, metadata !82, metadata !3, metadata !"Proc8", metadata !"Proc8", metadata !"", i32 180, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, void (i32*, [51 x i32]*, i32, i32)* @Proc8, null, null, metadata !22, i32 185} ; [ DW_TAG_subprogram ] [line 180] [def] [scope 185] [Proc8]
-!13 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !14 = metadata !{null, metadata !15, metadata !17, metadata !21, metadata !21}
 !15 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
 !16 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !17 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !18} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!18 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 1632, i64 32, i32 0, i32 0, metadata !16, metadata !19, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 1632, align 32, offset 0] [from int]
+!18 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 1632, i64 32, i32 0, i32 0, metadata !16, metadata !19, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 1632, align 32, offset 0] [from int]
 !19 = metadata !{metadata !20}
 !20 = metadata !{i32 786465, i64 0, i64 51}       ; [ DW_TAG_subrange_type ] [0, 50]
 !21 = metadata !{i32 786454, metadata !82, null, metadata !"OneToFifty", i32 132, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ] [OneToFifty] [line 132, size 0, align 0, offset 0] [from int]
@@ -120,7 +121,7 @@ attributes #1 = { nounwind readnone }
 !28 = metadata !{i32 786688, metadata !12, metadata !"IntIndex", metadata !3, i32 187, metadata !21, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [IntIndex] [line 187]
 !29 = metadata !{metadata !30, metadata !35, metadata !36, metadata !38, metadata !39, metadata !40, metadata !42, metadata !46, metadata !63}
 !30 = metadata !{i32 786484, i32 0, null, metadata !"Version", metadata !"Version", metadata !"", metadata !3, i32 111, metadata !31, i32 0, i32 1, [4 x i8]* @Version, null} ; [ DW_TAG_variable ] [Version] [line 111] [def]
-!31 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 32, i64 8, i32 0, i32 0, metadata !32, metadata !33, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 32, align 8, offset 0] [from char]
+!31 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 32, i64 8, i32 0, i32 0, metadata !32, metadata !33, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32, align 8, offset 0] [from char]
 !32 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
 !33 = metadata !{metadata !34}
 !34 = metadata !{i32 786465, i64 0, i64 4}        ; [ DW_TAG_subrange_type ] [0, 3]
@@ -133,13 +134,13 @@ attributes #1 = { nounwind readnone }
 !41 = metadata !{i32 786454, metadata !82, null, metadata !"Array1Dim", i32 135, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_typedef ] [Array1Dim] [line 135, size 0, align 0, offset 0] [from ]
 !42 = metadata !{i32 786484, i32 0, null, metadata !"Array2Glob", metadata !"Array2Glob", metadata !"", metadata !3, i32 176, metadata !43, i32 0, i32 1, [51 x [51 x i32]]* @Array2Glob, null} ; [ DW_TAG_variable ] [Array2Glob] [line 176] [def]
 !43 = metadata !{i32 786454, metadata !82, null, metadata !"Array2Dim", i32 136, i64 0, i64 0, i64 0, i32 0, metadata !44} ; [ DW_TAG_typedef ] [Array2Dim] [line 136, size 0, align 0, offset 0] [from ]
-!44 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 83232, i64 32, i32 0, i32 0, metadata !16, metadata !45, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 83232, align 32, offset 0] [from int]
+!44 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 83232, i64 32, i32 0, i32 0, metadata !16, metadata !45, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 83232, align 32, offset 0] [from int]
 !45 = metadata !{metadata !20, metadata !20}
 !46 = metadata !{i32 786484, i32 0, null, metadata !"PtrGlb", metadata !"PtrGlb", metadata !"", metadata !3, i32 177, metadata !47, i32 0, i32 1, %struct.Record** @PtrGlb, null} ; [ DW_TAG_variable ] [PtrGlb] [line 177] [def]
 !47 = metadata !{i32 786454, metadata !82, null, metadata !"RecordPtr", i32 148, i64 0, i64 0, i64 0, i32 0, metadata !48} ; [ DW_TAG_typedef ] [RecordPtr] [line 148, size 0, align 0, offset 0] [from ]
 !48 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !49} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from RecordType]
 !49 = metadata !{i32 786454, metadata !82, null, metadata !"RecordType", i32 147, i64 0, i64 0, i64 0, i32 0, metadata !50} ; [ DW_TAG_typedef ] [RecordType] [line 147, size 0, align 0, offset 0] [from Record]
-!50 = metadata !{i32 786451, metadata !82, null, metadata !"Record", i32 138, i64 448, i64 64, i32 0, i32 0, null, metadata !51, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [Record] [line 138, size 448, align 64, offset 0] [from ]
+!50 = metadata !{i32 786451, metadata !82, null, metadata !"Record", i32 138, i64 448, i64 64, i32 0, i32 0, null, metadata !51, i32 0, null, i32 0, null} ; [ DW_TAG_structure_type ] [Record] [line 138, size 448, align 64, offset 0] [def] [from ]
 !51 = metadata !{metadata !52, metadata !54, metadata !56, metadata !57, metadata !58}
 !52 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"PtrComp", i32 140, i64 64, i64 64, i64 0, i32 0, metadata !53} ; [ DW_TAG_member ] [PtrComp] [line 140, size 64, align 64, offset 0] [from ]
 !53 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !50} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from Record]
@@ -149,7 +150,7 @@ attributes #1 = { nounwind readnone }
 !57 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"IntComp", i32 143, i64 32, i64 32, i64 128, i32 0, metadata !21} ; [ DW_TAG_member ] [IntComp] [line 143, size 32, align 32, offset 128] [from OneToFifty]
 !58 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"StringComp", i32 144, i64 248, i64 8, i64 160, i32 0, metadata !59} ; [ DW_TAG_member ] [StringComp] [line 144, size 248, align 8, offset 160] [from String30]
 !59 = metadata !{i32 786454, metadata !82, null, metadata !"String30", i32 134, i64 0, i64 0, i64 0, i32 0, metadata !60} ; [ DW_TAG_typedef ] [String30] [line 134, size 0, align 0, offset 0] [from ]
-!60 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 248, i64 8, i32 0, i32 0, metadata !32, metadata !61, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 248, align 8, offset 0] [from char]
+!60 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 248, i64 8, i32 0, i32 0, metadata !32, metadata !61, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 248, align 8, offset 0] [from char]
 !61 = metadata !{metadata !62}
 !62 = metadata !{i32 786465, i64 0, i64 31}       ; [ DW_TAG_subrange_type ] [0, 30]
 !63 = metadata !{i32 786484, i32 0, null, metadata !"PtrGlbNext", metadata !"PtrGlbNext", metadata !"", metadata !3, i32 178, metadata !47, i32 0, i32 1, %struct.Record** @PtrGlbNext, null} ; [ DW_TAG_variable ] [PtrGlbNext] [line 178] [def]
@@ -169,3 +170,4 @@ attributes #1 = { nounwind readnone }
 !80 = metadata !{i32 197, i32 0, metadata !12, null}
 !81 = metadata !{i32 198, i32 0, metadata !12, null}
 !82 = metadata !{metadata !"dry.c", metadata !"/Users/manmanren/test-Nov/rdar_13183203/test2"}
+!83 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/multiple-aranges.ll b/test/DebugInfo/X86/multiple-aranges.ll
new file mode 100644
index 0000000..4c205d8
--- /dev/null
+++ b/test/DebugInfo/X86/multiple-aranges.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s | FileCheck %s
+
+; First CU
+; CHECK:      .long   44                      # Length of ARange Set
+; CHECK-NEXT: .short  2                       # DWARF Arange version number
+; CHECK-NEXT: .long   .L.debug_info_begin0    # Offset Into Debug Info Section
+; CHECK-NEXT: .byte   8                       # Address Size (in bytes)
+; CHECK-NEXT: .byte   0                       # Segment Size (in bytes)
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .quad   kittens
+; CHECK-NEXT: .Lset0 = rainbows-kittens
+; CHECK-NEXT: .quad   .Lset0
+; CHECK-NEXT: .quad   0                       # ARange terminator
+; CHECK-NEXT: .quad   0
+
+; Second CU
+; CHECK-NEXT: .long   44                      # Length of ARange Set
+; CHECK-NEXT: .short  2                       # DWARF Arange version number
+; CHECK-NEXT: .long   .L.debug_info_begin1    # Offset Into Debug Info Section
+; CHECK-NEXT: .byte   8                       # Address Size (in bytes)
+; CHECK-NEXT: .byte   0                       # Segment Size (in bytes)
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .quad   rainbows
+; CHECK-NEXT: .Lset1 = .Ldebug_end0-rainbows
+; CHECK-NEXT: .quad   .Lset1
+; CHECK-NEXT: .quad   0                       # ARange terminator
+; CHECK-NEXT: .quad   0
+
+
+; Generated from: clang -c -g -emit-llvm
+;                 llvm-link test1.bc test2.bc -o test.bc
+; test1.c: int kittens = 4;
+; test2.c: int rainbows = 5;
+
+
+
+
+; ModuleID = 'test.bc'
+target triple = "x86_64-unknown-linux-gnu"
+
+@kittens = global i32 4, align 4
+@rainbows = global i32 5, align 4
+
+!llvm.dbg.cu = !{!0, !7}
+!llvm.module.flags = !{!12, !13}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/home/kayamon/test1.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"test1.c", metadata !"/home/kayamon"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786484, i32 0, null, metadata !"kittens", metadata !"kittens", metadata !"", metadata !5, i32 1, metadata !6, i32 0, i32 1, i32* @kittens, null} ; [ DW_TAG_variable ] [kittens] [line 1] [def]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/home/kayamon/test1.c]
+!6 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!7 = metadata !{i32 786449, metadata !8, i32 12, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !9, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/home/kayamon/test2.c] [DW_LANG_C99]
+!8 = metadata !{metadata !"test2.c", metadata !"/home/kayamon"}
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 786484, i32 0, null, metadata !"rainbows", metadata !"rainbows", metadata !"", metadata !11, i32 1, metadata !6, i32 0, i32 1, i32* @rainbows, null} ; [ DW_TAG_variable ] [rainbows] [line 1] [def]
+!11 = metadata !{i32 786473, metadata !8}         ; [ DW_TAG_file_type ] [/home/kayamon/test2.c]
+!12 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/multiple-at-const-val.ll b/test/DebugInfo/X86/multiple-at-const-val.ll
index 2e02cbd..9a66061 100644
--- a/test/DebugInfo/X86/multiple-at-const-val.ll
+++ b/test/DebugInfo/X86/multiple-at-const-val.ll
@@ -8,7 +8,7 @@
 ; CHECK: DW_TAG_class_type
 ; CHECK: DW_TAG_member
 ; CHECK: badbit
-; CHECK: DW_AT_const_value [DW_FORM_data4]	(0x00000001)
+; CHECK: DW_AT_const_value [DW_FORM_sdata]      (1)
 ; CHECK-NOT: DW_AT_const_value
 ; CHECK: NULL
 
@@ -30,22 +30,23 @@ declare %"class.std::basic_ostream"* @test(%"class.std::basic_ostream"*, i8*, i6
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!1803}
 
 !0 = metadata !{i32 786449, metadata !1802, i32 4, metadata !"clang version 3.3 (trunk 174207)", i1 true, metadata !"", i32 0, metadata !1, metadata !955, metadata !956, metadata !1786,  metadata !1786, metadata !""} ; [ DW_TAG_compile_unit ] [/privite/tmp/student2.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !26}
 !4 = metadata !{i32 786489, null, metadata !"std", metadata !5, i32 48} ; [ DW_TAG_namespace ]
 !5 = metadata !{i32 786473, metadata !1801} ; [ DW_TAG_file_type ]
 !25 = metadata !{i32 786472, metadata !"_S_os_fmtflags_end", i64 65536} ; [ DW_TAG_enumerator ]
-!26 = metadata !{i32 786436, metadata !1801, metadata !4, metadata !"_Ios_Iostate", i32 146, i64 32, i64 32, i32 0, i32 0, null, metadata !27, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!26 = metadata !{i32 786436, metadata !1801, metadata !4, metadata !"_Ios_Iostate", i32 146, i64 32, i64 32, i32 0, i32 0, null, metadata !27, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [_Ios_Iostate] [line 146, size 32, align 32, offset 0] [def] [from ]
 !27 = metadata !{metadata !28, metadata !29, metadata !30, metadata !31, metadata !32}
 !28 = metadata !{i32 786472, metadata !"_S_goodbit", i64 0} ; [ DW_TAG_enumerator ] [_S_goodbit :: 0]
 !29 = metadata !{i32 786472, metadata !"_S_badbit", i64 1} ; [ DW_TAG_enumerator ] [_S_badbit :: 1]
 !30 = metadata !{i32 786472, metadata !"_S_eofbit", i64 2} ; [ DW_TAG_enumerator ] [_S_eofbit :: 2]
 !31 = metadata !{i32 786472, metadata !"_S_failbit", i64 4} ; [ DW_TAG_enumerator ] [_S_failbit :: 4]
 !32 = metadata !{i32 786472, metadata !"_S_os_ostate_end", i64 65536} ; [ DW_TAG_enumerator ] [_S_os_ostate_end :: 65536]
-!49 = metadata !{i32 786434, metadata !1801, metadata !4, metadata !"os_base", i32 200, i64 1728, i64 64, i32 0, i32 0, null, metadata !50, i32 0, metadata !49, null} ; [ DW_TAG_class_type ]
+!49 = metadata !{i32 786434, metadata !1801, metadata !4, metadata !"os_base", i32 200, i64 1728, i64 64, i32 0, i32 0, null, metadata !50, i32 0, metadata !49, null, null} ; [ DW_TAG_class_type ] [os_base] [line 200, size 1728, align 64, offset 0] [def] [from ]
 !50 = metadata !{metadata !77}
-!54 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !55, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!54 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !55, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !55 = metadata !{metadata !56}
 !56 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !77 = metadata !{i32 786445, metadata !1801, metadata !49, metadata !"badbit", i32 331, i64 0, i64 0, i64 0, i32 4096, metadata !78, i32 1} ; [ DW_TAG_member ]
@@ -59,3 +60,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !1800 = metadata !{i32 786484, i32 0, metadata !5, metadata !"badbit", metadata !"badbit", metadata !"badbit", metadata !5, i32 331, metadata !78, i32 1, i32 1, i32 1, metadata !77} ; [ DW_TAG_variable ]
 !1801 = metadata !{metadata !"os_base.h", metadata !"/privite/tmp"}
 !1802 = metadata !{metadata !"student2.cpp", metadata !"/privite/tmp"}
+!1803 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/nondefault-subrange-array.ll b/test/DebugInfo/X86/nondefault-subrange-array.ll
index a5f786c..91065a3 100644
--- a/test/DebugInfo/X86/nondefault-subrange-array.ll
+++ b/test/DebugInfo/X86/nondefault-subrange-array.ll
@@ -8,41 +8,45 @@
 ; Check that we can handle non-default array bounds. In this case, the array
 ; goes from [-3, 38].
 
-; CHECK:      0x0000002d:   DW_TAG_base_type [3]
-; CHECK-NEXT: 0x0000002e:     DW_AT_name [DW_FORM_strp]       ( .debug_str[0x00000041] = "int")
-; CHECK-NEXT: 0x00000032:     DW_AT_byte_size [DW_FORM_data1] (0x04)
-; CHECK-NEXT: 0x00000033:     DW_AT_encoding [DW_FORM_data1]  (0x05)
+; CHECK: DW_TAG_class_type
+; CHECK: DW_TAG_member
+; CHECK-NEXT:                   DW_AT_name [DW_FORM_strp]       ( .debug_str[0x{{[0-9a-f]*}}] = "x")
+; CHECK-NEXT:                   DW_AT_type [DW_FORM_ref4]       (cu + 0x{{[0-9a-f]*}} => {[[ARRAY:0x[0-9a-f]*]]})
 
-; CHECK:      0x00000034:   DW_TAG_array_type [4] *
-; CHECK-NEXT: 0x00000035:     DW_AT_type [DW_FORM_ref4]    (cu + 0x0026 => {0x00000026})
+; CHECK: [[ARRAY]]: DW_TAG_array_type [{{.*}}] *
+; CHECK-NEXT:                 DW_AT_type [DW_FORM_ref4]    (cu + 0x{{[0-9a-f]*}} => {[[BASE:0x[0-9a-f]*]]})
 
-; CHECK:      0x00000039:     DW_TAG_subrange_type [5]
-; CHECK-NEXT: 0x0000003a:       DW_AT_type [DW_FORM_ref4]  (cu + 0x002d => {0x0000002d})
-; CHECK-NEXT: 0x0000003e:       DW_AT_lower_bound [DW_FORM_data8]       (0xfffffffffffffffd)
-; CHECK-NEXT: 0x00000046:       DW_AT_upper_bound [DW_FORM_data1]       (0x26)
+; CHECK: DW_TAG_subrange_type
+; CHECK-NEXT:                   DW_AT_type [DW_FORM_ref4]  (cu + 0x{{[0-9a-f]*}} => {[[BASE2:0x[0-9a-f]*]]})
+; CHECK-NEXT:                   DW_AT_lower_bound [DW_FORM_data8]       (0xfffffffffffffffd)
+; CHECK-NEXT:                   DW_AT_upper_bound [DW_FORM_data1]       (0x26)
 
-; CHECK:      0x00000055:     DW_TAG_member [8]
-; CHECK-NEXT: 0x00000056:       DW_AT_name [DW_FORM_strp]       ( .debug_str[0x0000003f] = "x")
-; CHECK-NEXT: 0x0000005a:       DW_AT_type [DW_FORM_ref4]       (cu + 0x0034 => {0x00000034})
+; CHECK: [[BASE]]: DW_TAG_base_type
+; CHECK: [[BASE2]]: DW_TAG_base_type
+; CHECK-NEXT:                 DW_AT_name [DW_FORM_strp]       ( .debug_str[0x{{[0-9a-f]*}}] = "int")
+; CHECK-NEXT:                 DW_AT_byte_size [DW_FORM_data1] (0x04)
+; CHECK-NEXT:                 DW_AT_encoding [DW_FORM_data1]  (0x05)
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21}
 
 !0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
 !6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [from ]
+!7 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [def] [from ]
 !8 = metadata !{metadata !9, metadata !14}
 !9 = metadata !{i32 786445, metadata !20, metadata !7, metadata !"x", i32 1, i64 0, i64 0, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ] [x] [line 1, size 0, align 0, offset 0] [private] [from ]
-!10 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !11, metadata !12, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
+!10 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !11, metadata !12, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
 !11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !12 = metadata !{metadata !13}
 !13 = metadata !{i32 786465, i64 -3, i64 42} ; [ DW_TAG_subrange_type ] [-3, 39]
 !14 = metadata !{i32 786478, metadata !6, metadata !7, metadata !"A", metadata !"A", metadata !"", i32 1, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !18, i32 1} ; [ DW_TAG_subprogram ] [line 1] [A]
-!15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !16 = metadata !{null, metadata !17}
-!17 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!17 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
 !18 = metadata !{metadata !19}
 !19 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
 !20 = metadata !{metadata !"t.cpp", metadata !"/Volumes/Sandbox/llvm"}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/objc-fwd-decl.ll b/test/DebugInfo/X86/objc-fwd-decl.ll
index 3070ff8..a5e9632 100644
--- a/test/DebugInfo/X86/objc-fwd-decl.ll
+++ b/test/DebugInfo/X86/objc-fwd-decl.ll
@@ -1,16 +1,16 @@
 ; RUN: llc -mtriple=x86_64-macosx %s -o %t -filetype=obj
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
-; CHECK: 0x00000027:   DW_TAG_structure_type
-; CHECK: 0x0000002c:     DW_AT_declaration
-; CHECK: 0x0000002d:     DW_AT_APPLE_runtime_class
+; CHECK: DW_TAG_structure_type
+; CHECK:                 DW_AT_declaration
+; CHECK:                 DW_AT_APPLE_runtime_class
 
 %0 = type opaque
 
 @a = common global %0* null, align 8
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!9, !10, !11, !12}
+!llvm.module.flags = !{!9, !10, !11, !12, !14}
 
 !0 = metadata !{i32 786449, metadata !13, i32 16, metadata !"clang version 3.1 (trunk 152054 trunk 152094)", i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
@@ -18,9 +18,10 @@
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 3, metadata !7, i32 0, i32 1, %0** @a, null} ; [ DW_TAG_variable ]
 !6 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 786451, metadata !13, null, metadata !"FooBarBaz", i32 1, i32 0, i32 0, i32 0, i32 4, null, null, i32 16} ; [ DW_TAG_structure_type ]
+!8 = metadata !{i32 786451, metadata !13, null, metadata !"FooBarBaz", i32 1, i32 0, i32 0, i32 0, i32 4, null, null, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [FooBarBaz] [line 1, size 0, align 0, offset 0] [decl] [from ]
 !9 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
 !10 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
 !11 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
 !12 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
 !13 = metadata !{metadata !"foo.m", metadata !"/Users/echristo"}
+!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/op_deref.ll b/test/DebugInfo/X86/op_deref.ll
index 864cbef..300f13d 100644
--- a/test/DebugInfo/X86/op_deref.ll
+++ b/test/DebugInfo/X86/op_deref.ll
@@ -4,13 +4,14 @@
 ; DW-CHECK: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x00000067] = "vla")
 ; FIXME: The location here needs to be fixed, but llvm-dwarfdump doesn't handle
 ; DW_AT_location lists yet.
-; DW-CHECK: DW_AT_location [DW_FORM_data4]                      (0x00000000)
+; DW-CHECK: DW_AT_location [DW_FORM_sec_offset]                      (0x00000000)
 
 ; Unfortunately llvm-dwarfdump can't unparse a list of DW_AT_locations
 ; right now, so we check the asm output:
 ; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o - -filetype=asm | FileCheck %s -check-prefix=ASM-CHECK
 ; vla should have a register-indirect address at one point.
-; ASM-CHECK: DEBUG_VALUE: vla <- [RCX+0]
+; ASM-CHECK: DEBUG_VALUE: vla <- RCX
+; ASM-CHECK: DW_OP_breg2
 
 define void @testVLAwithSize(i32 %s) nounwind uwtable ssp {
 entry:
@@ -64,21 +65,22 @@ declare i8* @llvm.stacksave() nounwind
 declare void @llvm.stackrestore(i8*) nounwind
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!29}
 
 !0 = metadata !{i32 786449, metadata !28, i32 12, metadata !"clang version 3.2 (trunk 156005) (llvm/trunk 156000)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !28, metadata !6, metadata !"testVLAwithSize", metadata !"testVLAwithSize", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @testVLAwithSize, null, null, metadata !1, i32 2} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 786689, metadata !5, metadata !"s", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !11 = metadata !{i32 1, i32 26, metadata !5, null}
 !12 = metadata !{i32 3, i32 13, metadata !13, null}
 !13 = metadata !{i32 786443, metadata !28, metadata !5, i32 2, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
-!14 = metadata !{i32 786688, metadata !13, metadata !"vla", metadata !6, i32 3, metadata !15, i32 0, i32 0, i64 2} ; [ DW_TAG_auto_variable ]
-!15 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !16, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!14 = metadata !{i32 786688, metadata !13, metadata !"vla", metadata !6, i32 3, metadata !15, i32 8192, i32 0, i64 2} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !16, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
 !16 = metadata !{metadata !17}
 !17 = metadata !{i32 786465, i64 0, i64 -1}        ; [ DW_TAG_subrange_type ]
 !18 = metadata !{i32 3, i32 7, metadata !13, null}
@@ -92,3 +94,4 @@ declare void @llvm.stackrestore(i8*) nounwind
 !26 = metadata !{i32 5, i32 22, metadata !22, null}
 !27 = metadata !{i32 8, i32 1, metadata !13, null}
 !28 = metadata !{metadata !"bar.c", metadata !"/Users/echristo/tmp"}
+!29 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/parameters.ll b/test/DebugInfo/X86/parameters.ll
index 7846924..fa91bd2 100644
--- a/test/DebugInfo/X86/parameters.ll
+++ b/test/DebugInfo/X86/parameters.ll
@@ -78,7 +78,7 @@ attributes #1 = { nounwind readnone }
 attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!21}
+!llvm.module.flags = !{!21, !33}
 
 !0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/pass.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"pass.cpp", metadata !"/tmp"}
@@ -86,19 +86,19 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !3 = metadata !{metadata !4, metadata !17}
 !4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func", metadata !"func", metadata !"_ZN7pr147634funcENS_3fooE", i32 6, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%"struct.pr14763::foo"*, %"struct.pr14763::foo"*)* @_ZN7pr147634funcENS_3fooE, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [func]
 !5 = metadata !{i32 786489, metadata !1, null, metadata !"pr14763", i32 1} ; [ DW_TAG_namespace ] [pr14763] [line 1]
-!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{metadata !8, metadata !8}
-!8 = metadata !{i32 786451, metadata !1, metadata !5, metadata !"foo", i32 2, i64 8, i64 8, i32 0, i32 0, null, metadata !9, i32 0, null, null} ; [ DW_TAG_structure_type ] [foo] [line 2, size 8, align 8, offset 0] [from ]
+!8 = metadata !{i32 786451, metadata !1, metadata !5, metadata !"foo", i32 2, i64 8, i64 8, i32 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 2, size 8, align 8, offset 0] [def] [from ]
 !9 = metadata !{metadata !10}
 !10 = metadata !{i32 786478, metadata !1, metadata !8, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !16, i32 3} ; [ DW_TAG_subprogram ] [line 3] [foo]
-!11 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{null, metadata !13, metadata !14}
-!13 = metadata !{i32 786447, i32 0, i32 0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from foo]
+!13 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from foo]
 !14 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !15} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
 !15 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from foo]
 !16 = metadata !{i32 786468}
 !17 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func2", metadata !"func2", metadata !"_ZN7pr147635func2EbNS_3fooE", i32 12, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i1, %"struct.pr14763::foo"*)* @_ZN7pr147635func2EbNS_3fooE, null, null, metadata !2, i32 12} ; [ DW_TAG_subprogram ] [line 12] [def] [func2]
-!18 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!18 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !19 = metadata !{null, metadata !20, metadata !8}
 !20 = metadata !{i32 786468, null, null, metadata !"bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean]
 !21 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
@@ -113,3 +113,4 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !30 = metadata !{i32 786443, metadata !1, metadata !17, i32 13, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp/pass.cpp]
 !31 = metadata !{i32 14, i32 0, metadata !30, null}
 !32 = metadata !{i32 15, i32 0, metadata !17, null}
+!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/pointer-type-size.ll b/test/DebugInfo/X86/pointer-type-size.ll
index a640069..cf789b2 100644
--- a/test/DebugInfo/X86/pointer-type-size.ll
+++ b/test/DebugInfo/X86/pointer-type-size.ll
@@ -9,16 +9,18 @@
 @crass = common global %struct.crass zeroinitializer, align 8
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!14}
 
 !0 = metadata !{i32 786449, metadata !13, i32 12, metadata !"clang version 3.1 (trunk 147882)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 720948, i32 0, null, metadata !"crass", metadata !"crass", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %struct.crass* @crass, null} ; [ DW_TAG_variable ]
 !6 = metadata !{i32 720937, metadata !13} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786451, metadata !13, null, metadata !"crass", i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!7 = metadata !{i32 786451, metadata !13, null, metadata !"crass", i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [crass] [line 1, size 64, align 64, offset 0] [def] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786445, metadata !13, metadata !7, metadata !"ptr", i32 1, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
 !10 = metadata !{i32 720934, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_const_type ]
 !11 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
 !12 = metadata !{i32 720932, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !13 = metadata !{metadata !"foo.c", metadata !"/Users/echristo/tmp"}
+!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/pr11300.ll b/test/DebugInfo/X86/pr11300.ll
index 54e0c8b..caa24ee 100644
--- a/test/DebugInfo/X86/pr11300.ll
+++ b/test/DebugInfo/X86/pr11300.ll
@@ -3,8 +3,11 @@
 
 ; test that the DW_AT_specification is a back edge in the file.
 
-; CHECK: 0x0000005c:     DW_TAG_subprogram [5]
-; CHECK: 0x0000007c:     DW_AT_specification [DW_FORM_ref4]      (cu + 0x005c => {0x0000005c})
+; CHECK: DW_TAG_subprogram
+; CHECK: DW_AT_name [DW_FORM_strp]	( .debug_str[0x{{[0-9a-f]*}}] = "zed")
+; CHECK: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_specification [DW_FORM_ref4]      (cu + {{.*}} => {[[BACK:0x[0-9a-f]*]]})
+; CHECK: [[BACK]]:     DW_TAG_subprogram
 
 %struct.foo = type { i8 }
 
@@ -30,26 +33,27 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!33}
 
 !0 = metadata !{i32 786449, metadata !32, i32 4, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !20}
-!5 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"zed", metadata !"zed", metadata !"_Z3zedP3foo", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.foo*)* @_Z3zedP3foo, null, null, metadata !18, i32 4} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"zed", metadata !"zed", metadata !"_Z3zedP3foo", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.foo*)* @_Z3zedP3foo, null, null, metadata !21, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [zed]
 !6 = metadata !{i32 720937, metadata !32} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9}
 !9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 720898, metadata !32, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !11, i32 0, null, null} ; [ DW_TAG_class_type ]
+!10 = metadata !{i32 720898, metadata !32, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_class_type ] [foo] [line 1, size 8, align 8, offset 0] [def] [from ]
 !11 = metadata !{metadata !12}
 !12 = metadata !{i32 720942, metadata !6, metadata !10, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !13, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !16, i32 2} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!13 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !14 = metadata !{null, metadata !15}
-!15 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
+!15 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
 !16 = metadata !{metadata !17}
 !17 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !18 = metadata !{metadata !19}
 !19 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!20 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.foo*)* @_ZN3foo3barEv, null, metadata !12, metadata !21, i32 2} ; [ DW_TAG_subprogram ]
+!20 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.foo*)* @_ZN3foo3barEv, null, metadata !12, metadata !21, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [bar]
 !21 = metadata !{metadata !22}
 !22 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !23 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
@@ -62,3 +66,4 @@ entry:
 !30 = metadata !{i32 2, i32 15, metadata !31, null}
 !31 = metadata !{i32 786443, metadata !6, metadata !20, i32 2, i32 14, i32 1} ; [ DW_TAG_lexical_block ]
 !32 = metadata !{metadata !"/home/espindola/llvm/test.cc", metadata !"/home/espindola/tmpfs/build"}
+!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/pr12831.ll b/test/DebugInfo/X86/pr12831.ll
index 0244d1e..6dea4a0 100644
--- a/test/DebugInfo/X86/pr12831.ll
+++ b/test/DebugInfo/X86/pr12831.ll
@@ -76,48 +76,49 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!162}
 
 !0 = metadata !{i32 786449, metadata !161, i32 4, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !128, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !106, metadata !107, metadata !126, metadata !127}
 !5 = metadata !{i32 786478, metadata !6, null, metadata !"writeExpr", metadata !"writeExpr", metadata !"_ZN17BPLFunctionWriter9writeExprEv", i32 19, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.BPLFunctionWriter*)* @_ZN17BPLFunctionWriter9writeExprEv, null, metadata !103, metadata !1, i32 19} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !160} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9}
-!9 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 786434, metadata !160, null, metadata !"BPLFunctionWriter", i32 15, i64 64, i64 64, i32 0, i32 0, null, metadata !11, i32 0, null, null} ; [ DW_TAG_class_type ]
+!9 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 786434, metadata !160, null, metadata !"BPLFunctionWriter", i32 15, i64 64, i64 64, i32 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_class_type ] [BPLFunctionWriter] [line 15, size 64, align 64, offset 0] [def] [from ]
 !11 = metadata !{metadata !12, metadata !103}
 !12 = metadata !{i32 786445, metadata !160, metadata !10, metadata !"MW", i32 16, i64 64, i64 64, i64 0, i32 1, metadata !13} ; [ DW_TAG_member ]
-!13 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
-!14 = metadata !{i32 786434, metadata !160, null, metadata !"BPLModuleWriter", i32 12, i64 8, i64 8, i32 0, i32 0, null, metadata !15, i32 0, null, null} ; [ DW_TAG_class_type ]
+!13 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 786434, metadata !160, null, metadata !"BPLModuleWriter", i32 12, i64 8, i64 8, i32 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_class_type ] [BPLModuleWriter] [line 12, size 8, align 8, offset 0] [def] [from ]
 !15 = metadata !{metadata !16}
 !16 = metadata !{i32 786478, metadata !6, metadata !14, metadata !"writeIntrinsic", metadata !"writeIntrinsic", metadata !"_ZN15BPLModuleWriter14writeIntrinsicE8functionIFvvEE", i32 13, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !101, i32 13} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!17 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !18 = metadata !{null, metadata !19, metadata !20}
-!19 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !14} ; [ DW_TAG_pointer_type ]
-!20 = metadata !{i32 786434, metadata !160, null, metadata !"function<void ()>", i32 6, i64 8, i64 8, i32 0, i32 0, null, metadata !21, i32 0, null, metadata !97} ; [ DW_TAG_class_type ]
+!19 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !14} ; [ DW_TAG_pointer_type ]
+!20 = metadata !{i32 786434, metadata !160, null, metadata !"function<void ()>", i32 6, i64 8, i64 8, i32 0, i32 0, null, metadata !21, i32 0, null, metadata !97, null} ; [ DW_TAG_class_type ] [function<void ()>] [line 6, size 8, align 8, offset 0] [def] [from ]
 !21 = metadata !{metadata !22, metadata !51, metadata !58, metadata !86, metadata !92}
 !22 = metadata !{i32 786478, metadata !6, metadata !20, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"", i32 8, metadata !23, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !47, i32 0, metadata !49, i32 8} ; [ DW_TAG_subprogram ]
-!23 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !24, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!23 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !24, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !24 = metadata !{null, metadata !25, metadata !26}
-!25 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !20} ; [ DW_TAG_pointer_type ]
-!26 = metadata !{i32 786434, metadata !160, metadata !5, metadata !"", i32 20, i64 8, i64 8, i32 0, i32 0, null, metadata !27, i32 0, null, null} ; [ DW_TAG_class_type ]
+!25 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !20} ; [ DW_TAG_pointer_type ]
+!26 = metadata !{i32 786434, metadata !160, metadata !5, metadata !"", i32 20, i64 8, i64 8, i32 0, i32 0, null, metadata !27, i32 0, null, null, null} ; [ DW_TAG_class_type ] [line 20, size 8, align 8, offset 0] [def] [from ]
 !27 = metadata !{metadata !28, metadata !35, metadata !41}
 !28 = metadata !{i32 786478, metadata !6, metadata !26, metadata !"operator()", metadata !"operator()", metadata !"", i32 20, metadata !29, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !33, i32 20} ; [ DW_TAG_subprogram ]
-!29 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !30, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!29 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !30, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !30 = metadata !{null, metadata !31}
-!31 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !32} ; [ DW_TAG_pointer_type ]
-!32 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_const_type ]
+!31 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !32} ; [ DW_TAG_pointer_type ]
+!32 = metadata !{i32 786470, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_const_type ]
 !33 = metadata !{metadata !34}
 !34 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !35 = metadata !{i32 786478, metadata !6, metadata !26, metadata !"~", metadata !"~", metadata !"", i32 20, metadata !36, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !39, i32 20} ; [ DW_TAG_subprogram ]
-!36 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !37, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!36 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !37, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !37 = metadata !{null, metadata !38}
-!38 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !26} ; [ DW_TAG_pointer_type ]
+!38 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !26} ; [ DW_TAG_pointer_type ]
 !39 = metadata !{metadata !40}
 !40 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !41 = metadata !{i32 786478, metadata !6, metadata !26, metadata !"", metadata !"", metadata !"", i32 20, metadata !42, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !45, i32 20} ; [ DW_TAG_subprogram ]
-!42 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !43, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!42 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !43, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !43 = metadata !{null, metadata !38, metadata !44}
 !44 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_rvalue_reference_type ]
 !45 = metadata !{metadata !46}
@@ -127,32 +128,32 @@ entry:
 !49 = metadata !{metadata !50}
 !50 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !51 = metadata !{i32 786478, metadata !6, metadata !20, metadata !"function<function<void ()> >", metadata !"function<function<void ()> >", metadata !"", i32 8, metadata !52, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !54, i32 0, metadata !56, i32 8} ; [ DW_TAG_subprogram ]
-!52 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !53, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!52 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !53, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !53 = metadata !{null, metadata !25, metadata !20}
 !54 = metadata !{metadata !55}
 !55 = metadata !{i32 786479, null, metadata !"_Functor", metadata !20, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
 !56 = metadata !{metadata !57}
 !57 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !58 = metadata !{i32 786478, metadata !6, metadata !20, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"", i32 8, metadata !59, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !82, i32 0, metadata !84, i32 8} ; [ DW_TAG_subprogram ]
-!59 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !60, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!59 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !60, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !60 = metadata !{null, metadata !25, metadata !61}
-!61 = metadata !{i32 786434, metadata !160, metadata !5, metadata !"", i32 23, i64 8, i64 8, i32 0, i32 0, null, metadata !62, i32 0, null, null} ; [ DW_TAG_class_type ]
+!61 = metadata !{i32 786434, metadata !160, metadata !5, metadata !"", i32 23, i64 8, i64 8, i32 0, i32 0, null, metadata !62, i32 0, null, null, null} ; [ DW_TAG_class_type ] [line 23, size 8, align 8, offset 0] [def] [from ]
 !62 = metadata !{metadata !63, metadata !70, metadata !76}
 !63 = metadata !{i32 786478, metadata !6, metadata !61, metadata !"operator()", metadata !"operator()", metadata !"", i32 23, metadata !64, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !68, i32 23} ; [ DW_TAG_subprogram ]
-!64 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !65, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!64 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !65, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !65 = metadata !{null, metadata !66}
-!66 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !67} ; [ DW_TAG_pointer_type ]
-!67 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !61} ; [ DW_TAG_const_type ]
+!66 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !67} ; [ DW_TAG_pointer_type ]
+!67 = metadata !{i32 786470, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !61} ; [ DW_TAG_const_type ]
 !68 = metadata !{metadata !69}
 !69 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !70 = metadata !{i32 786478, metadata !6, metadata !61, metadata !"~", metadata !"~", metadata !"", i32 23, metadata !71, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !74, i32 23} ; [ DW_TAG_subprogram ]
-!71 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !72, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!71 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !72, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !72 = metadata !{null, metadata !73}
-!73 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !61} ; [ DW_TAG_pointer_type ]
+!73 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !61} ; [ DW_TAG_pointer_type ]
 !74 = metadata !{metadata !75}
 !75 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !76 = metadata !{i32 786478, metadata !6, metadata !61, metadata !"", metadata !"", metadata !"", i32 23, metadata !77, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !80, i32 23} ; [ DW_TAG_subprogram ]
-!77 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !78, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!77 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !78, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !78 = metadata !{null, metadata !73, metadata !79}
 !79 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !61} ; [ DW_TAG_rvalue_reference_type ]
 !80 = metadata !{metadata !81}
@@ -162,19 +163,19 @@ entry:
 !84 = metadata !{metadata !85}
 !85 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !86 = metadata !{i32 786478, metadata !6, metadata !20, metadata !"function", metadata !"function", metadata !"", i32 6, metadata !87, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !90, i32 6} ; [ DW_TAG_subprogram ]
-!87 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !88, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!87 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !88, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !88 = metadata !{null, metadata !25, metadata !89}
 !89 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_rvalue_reference_type ]
 !90 = metadata !{metadata !91}
 !91 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !92 = metadata !{i32 786478, metadata !6, metadata !20, metadata !"~function", metadata !"~function", metadata !"", i32 6, metadata !93, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !95, i32 6} ; [ DW_TAG_subprogram ]
-!93 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !94, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!93 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !94, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !94 = metadata !{null, metadata !25}
 !95 = metadata !{metadata !96}
 !96 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !97 = metadata !{metadata !98}
 !98 = metadata !{i32 786479, null, metadata !"T", metadata !99, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
-!99 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !100, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!99 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !100, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !100 = metadata !{null}
 !101 = metadata !{metadata !102}
 !102 = metadata !{i32 786468}                     ; [ DW_TAG_base_type ]
@@ -183,16 +184,16 @@ entry:
 !105 = metadata !{i32 786468}                     ; [ DW_TAG_base_type ]
 !106 = metadata !{i32 786478, metadata !6, null, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", i32 8, metadata !59, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", metadata !82, metadata !58, metadata !1, i32 8} ; [ DW_TAG_subprogram ]
 !107 = metadata !{i32 786478, metadata !6, null, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", i32 3, metadata !108, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.anon.0*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", metadata !111, metadata !113, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
-!108 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !109, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!108 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !109, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !109 = metadata !{null, metadata !110}
 !110 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !61} ; [ DW_TAG_reference_type ]
 !111 = metadata !{metadata !112}
 !112 = metadata !{i32 786479, null, metadata !"_Tp", metadata !61, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
 !113 = metadata !{i32 786478, metadata !6, metadata !114, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", i32 3, metadata !108, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !111, i32 0, metadata !124, i32 3} ; [ DW_TAG_subprogram ]
-!114 = metadata !{i32 786434, metadata !160, null, metadata !"_Base_manager", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !115, i32 0, null, null} ; [ DW_TAG_class_type ]
+!114 = metadata !{i32 786434, metadata !160, null, metadata !"_Base_manager", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !115, i32 0, null, null, null} ; [ DW_TAG_class_type ] [_Base_manager] [line 1, size 8, align 8, offset 0] [def] [from ]
 !115 = metadata !{metadata !116, metadata !113}
 !116 = metadata !{i32 786478, metadata !6, metadata !114, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", i32 3, metadata !117, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !120, i32 0, metadata !122, i32 3} ; [ DW_TAG_subprogram ]
-!117 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !118, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!117 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !118, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !118 = metadata !{null, metadata !119}
 !119 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_reference_type ]
 !120 = metadata !{metadata !121}
@@ -208,14 +209,14 @@ entry:
 !131 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !132} ; [ DW_TAG_const_type ]
 !132 = metadata !{i32 786468, null, null, metadata !"bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
 !133 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777235, metadata !134, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!134 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
+!134 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
 !135 = metadata !{i32 19, i32 39, metadata !5, null}
 !136 = metadata !{i32 20, i32 17, metadata !137, null}
 !137 = metadata !{i32 786443, metadata !5, i32 19, i32 51, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
 !138 = metadata !{i32 23, i32 17, metadata !137, null}
 !139 = metadata !{i32 26, i32 15, metadata !137, null}
 !140 = metadata !{i32 786689, metadata !106, metadata !"this", metadata !6, i32 16777224, metadata !141, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!141 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ]
+!141 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ]
 !142 = metadata !{i32 8, i32 45, metadata !106, null}
 !143 = metadata !{i32 786689, metadata !106, metadata !"__f", metadata !6, i32 33554440, metadata !61, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !144 = metadata !{i32 8, i32 63, metadata !106, null}
@@ -236,3 +237,4 @@ entry:
 !159 = metadata !{i32 786473, metadata !161} ; [ DW_TAG_file_type ]
 !160 = metadata !{metadata !"BPLFunctionWriter2.ii", metadata !"/home/peter/crashdelta"}
 !161 = metadata !{metadata !"BPLFunctionWriter.cpp", metadata !"/home/peter/crashdelta"}
+!162 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/pr13303.ll b/test/DebugInfo/X86/pr13303.ll
index 338c576..4737862 100644
--- a/test/DebugInfo/X86/pr13303.ll
+++ b/test/DebugInfo/X86/pr13303.ll
@@ -13,15 +13,17 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13}
 
 !0 = metadata !{i32 786449, metadata !12, i32 12, metadata !"clang version 3.2 (trunk 160143)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/probinson/PR13303.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !12, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
 !6 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 1, i32 14, metadata !11, null}
 !11 = metadata !{i32 786443, metadata !12, metadata !5, i32 1, i32 12, i32 0} ; [ DW_TAG_lexical_block ] [/home/probinson/PR13303.c]
 !12 = metadata !{metadata !"PR13303.c", metadata !"/home/probinson"}
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/pr9951.ll b/test/DebugInfo/X86/pr9951.ll
index db60fb1..d933beb 100644
--- a/test/DebugInfo/X86/pr9951.ll
+++ b/test/DebugInfo/X86/pr9951.ll
@@ -6,12 +6,13 @@ entry:
 }
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!9}
 !6 = metadata !{metadata !0}
 
-!0 = metadata !{i32 786478, metadata !7, metadata !1, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 ()* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !7, metadata !1, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
 !1 = metadata !{i32 786473, metadata !7} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !7, i32 12, metadata !"clang version 3.0 ()", i1 true, metadata !"", i32 0, metadata !8, metadata !8, metadata !6, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !7, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !7, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !7 = metadata !{metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/llvm/build-rust2"}
@@ -23,3 +24,4 @@ entry:
 ; CHECK:      Ltmp9 = (Ltmp3-Ltmp2)-0
 ; CHECK-NEXT:	.long	Ltmp9
 ; CHECK-NEXT:	.quad	Ltmp0
+!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/prologue-stack.ll b/test/DebugInfo/X86/prologue-stack.ll
index 57d164a..b37e41a 100644
--- a/test/DebugInfo/X86/prologue-stack.ll
+++ b/test/DebugInfo/X86/prologue-stack.ll
@@ -19,16 +19,18 @@ entry:
 declare i32 @callme(i32)
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!14}
 
 !0 = metadata !{i32 786449, metadata !13, i32 12, metadata !"clang version 3.2 (trunk 164980) (llvm/trunk 164979)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/bar.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !13, metadata !6, metadata !"isel_line_test2", metadata !"isel_line_test2", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @isel_line_test2, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [isel_line_test2]
 !6 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 5, i32 3, metadata !11, null}
 !11 = metadata !{i32 786443, metadata !13, metadata !5, i32 4, i32 1, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/bar.c]
 !12 = metadata !{i32 6, i32 3, metadata !11, null}
 !13 = metadata !{metadata !"bar.c", metadata !"/usr/local/google/home/echristo/tmp"}
+!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/ref_addr_relocation.ll b/test/DebugInfo/X86/ref_addr_relocation.ll
new file mode 100644
index 0000000..fc5197d
--- /dev/null
+++ b/test/DebugInfo/X86/ref_addr_relocation.ll
@@ -0,0 +1,71 @@
+; RUN: llc -filetype=asm -O0 -mtriple=x86_64-linux-gnu < %s | FileCheck %s
+; RUN: llc -filetype=obj -O0 %s -mtriple=x86_64-linux-gnu -o %t
+; RUN: llvm-dwarfdump %t | FileCheck %s -check-prefix=CHECK-DWARF
+
+; RUN: llc -filetype=obj %s -mtriple=x86_64-apple-darwin -o %t2
+; RUN: llvm-dwarfdump %t2 | FileCheck %s -check-prefix=DARWIN-DWARF
+
+; Testing case generated from:
+; clang++ tu1.cpp tu2.cpp -g -emit-llvm -c
+; llvm-link tu1.bc tu2.bc -o tu12.ll -S
+; cat hdr.h
+; struct foo {
+; };
+; cat tu1.cpp
+; #include "hdr.h"
+; foo f;
+; cat tu2.cpp
+; #include "hdr.h"
+; foo g;
+
+; Make sure we use relocation for ref_addr on non-darwin platforms.
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_TAG_variable
+; CHECK: .long [[TYPE:.*]] # DW_AT_type
+; CHECK: DW_TAG_structure_type
+; CHECK: debug_info_end0
+; CHECK: DW_TAG_compile_unit
+; CHECK-NOT: DW_TAG_structure_type
+; This variable's type is in the 1st CU.
+; CHECK: DW_TAG_variable
+; Make sure this is relocatable.
+; CHECK: .quad .Lsection_info+[[TYPE]] # DW_AT_type
+; CHECK-NOT: DW_TAG_structure_type
+; CHECK: debug_info_end1
+
+; CHECK-DWARF: DW_TAG_compile_unit
+; CHECK-DWARF: 0x[[ADDR:.*]]: DW_TAG_structure_type
+; CHECK-DWARF: DW_TAG_compile_unit
+; CHECK-DWARF: DW_TAG_variable
+; CHECK-DWARF: DW_AT_type [DW_FORM_ref_addr] {{.*}}[[ADDR]])
+
+; DARWIN-DWARF: DW_TAG_compile_unit
+; DARWIN-DWARF: 0x[[ADDR:.*]]: DW_TAG_structure_type
+; DARWIN-DWARF: DW_TAG_compile_unit
+; DARWIN-DWARF: DW_TAG_variable
+; DARWIN-DWARF: DW_AT_type [DW_FORM_ref_addr] {{.*}}[[ADDR]])
+
+%struct.foo = type { i8 }
+
+@f = global %struct.foo zeroinitializer, align 1
+@g = global %struct.foo zeroinitializer, align 1
+
+!llvm.dbg.cu = !{!0, !9}
+!llvm.module.flags = !{!14, !15}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (trunk 191799)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !2, metadata !6, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/manmanren/test-Nov/type_unique_air/ref_addr/tu1.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"tu1.cpp", metadata !"/Users/manmanren/test-Nov/type_unique_air/ref_addr"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !5, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, metadata !"_ZTS3foo"} ; [ DW_TAG_structure_type ] [foo] [line 1, size 8, align 8, offset 0] [def] [from ]
+!5 = metadata !{metadata !"./hdr.h", metadata !"/Users/manmanren/test-Nov/type_unique_air/ref_addr"}
+!6 = metadata !{metadata !7}
+!7 = metadata !{i32 786484, i32 0, null, metadata !"f", metadata !"f", metadata !"", metadata !8, i32 2, metadata !4, i32 0, i32 1, %struct.foo* @f, null} ; [ DW_TAG_variable ] [f] [line 2] [def]
+!8 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/Users/manmanren/test-Nov/type_unique_air/ref_addr/tu1.cpp]
+!9 = metadata !{i32 786449, metadata !10, i32 4, metadata !"clang version 3.4 (trunk 191799)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !2, metadata !11, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/manmanren/test-Nov/type_unique_air/ref_addr/tu2.cpp] [DW_LANG_C_plus_plus]
+!10 = metadata !{metadata !"tu2.cpp", metadata !"/Users/manmanren/test-Nov/type_unique_air/ref_addr"}
+!11 = metadata !{metadata !12}
+!12 = metadata !{i32 786484, i32 0, null, metadata !"g", metadata !"g", metadata !"", metadata !13, i32 2, metadata !4, i32 0, i32 1, %struct.foo* @g, null} ; [ DW_TAG_variable ] [g] [line 2] [def]
+!13 = metadata !{i32 786473, metadata !10}        ; [ DW_TAG_file_type ] [/Users/manmanren/test-Nov/type_unique_air/ref_addr/tu2.cpp]
+!14 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!15 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/reference-argument.ll b/test/DebugInfo/X86/reference-argument.ll
index 00846b3..be54386 100644
--- a/test/DebugInfo/X86/reference-argument.ll
+++ b/test/DebugInfo/X86/reference-argument.ll
@@ -30,7 +30,7 @@ declare void @_ZN4SValD1Ev(%class.SVal* %this)
 declare void @_ZN4SValD2Ev(%class.SVal* %this)
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!47}
+!llvm.module.flags = !{!47, !68}
 
 !0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [aggregate-indirect-arg.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"aggregate-indirect-arg.cpp", metadata !""}
@@ -38,10 +38,10 @@ declare void @_ZN4SValD2Ev(%class.SVal* %this)
 !3 = metadata !{metadata !4, metadata !29, metadata !33, metadata !34, metadata !35}
 !4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"bar", metadata !"bar", metadata !"_Z3barR4SVal", i32 19, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.SVal*)* @_Z3barR4SVal, null, null, metadata !2, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [bar]
 !5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [aggregate-indirect-arg.cpp]
-!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{null, metadata !8}
 !8 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from SVal]
-!9 = metadata !{i32 786434, metadata !1, null, metadata !"SVal", i32 12, i64 128, i64 64, i32 0, i32 0, null, metadata !10, i32 0, null, null} ; [ DW_TAG_class_type ] [SVal] [line 12, size 128, align 64, offset 0] [def] [from ]
+!9 = metadata !{i32 786434, metadata !1, null, metadata !"SVal", i32 12, i64 128, i64 64, i32 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_class_type ] [SVal] [line 12, size 128, align 64, offset 0] [def] [from ]
 !10 = metadata !{metadata !11, metadata !14, metadata !16, metadata !21, metadata !23}
 !11 = metadata !{i32 786445, metadata !1, metadata !9, metadata !"Data", i32 15, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ] [Data] [line 15, size 64, align 64, offset 0] [from ]
 !12 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !13} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
@@ -49,34 +49,34 @@ declare void @_ZN4SValD2Ev(%class.SVal* %this)
 !14 = metadata !{i32 786445, metadata !1, metadata !9, metadata !"Kind", i32 16, i64 32, i64 32, i64 64, i32 0, metadata !15} ; [ DW_TAG_member ] [Kind] [line 16, size 32, align 32, offset 64] [from unsigned int]
 !15 = metadata !{i32 786468, null, null, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
 !16 = metadata !{i32 786478, metadata !1, metadata !9, metadata !"~SVal", metadata !"~SVal", metadata !"", i32 14, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !20, i32 14} ; [ DW_TAG_subprogram ] [line 14] [~SVal]
-!17 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!17 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !18 = metadata !{null, metadata !19}
-!19 = metadata !{i32 786447, i32 0, i32 0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from SVal]
+!19 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from SVal]
 !20 = metadata !{i32 786468}
 !21 = metadata !{i32 786478, metadata !1, metadata !9, metadata !"SVal", metadata !"SVal", metadata !"", i32 12, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !22, i32 12} ; [ DW_TAG_subprogram ] [line 12] [SVal]
 !22 = metadata !{i32 786468}
 !23 = metadata !{i32 786478, metadata !1, metadata !9, metadata !"SVal", metadata !"SVal", metadata !"", i32 12, metadata !24, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !28, i32 12} ; [ DW_TAG_subprogram ] [line 12] [SVal]
-!24 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!24 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !25 = metadata !{null, metadata !19, metadata !26}
 !26 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !27} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
 !27 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from SVal]
 !28 = metadata !{i32 786468}
 !29 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 25, metadata !30, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 25} ; [ DW_TAG_subprogram ] [line 25] [def] [main]
-!30 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !31, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!30 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !31, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !31 = metadata !{metadata !32}
 !32 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !33 = metadata !{i32 786478, metadata !1, null, metadata !"~SVal", metadata !"~SVal", metadata !"_ZN4SValD1Ev", i32 14, metadata !17, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.SVal*)* @_ZN4SValD1Ev, null, metadata !16, metadata !2, i32 14} ; [ DW_TAG_subprogram ] [line 14] [def] [~SVal]
 !34 = metadata !{i32 786478, metadata !1, null, metadata !"~SVal", metadata !"~SVal", metadata !"_ZN4SValD2Ev", i32 14, metadata !17, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.SVal*)* @_ZN4SValD2Ev, null, metadata !16, metadata !2, i32 14} ; [ DW_TAG_subprogram ] [line 14] [def] [~SVal]
 !35 = metadata !{i32 786478, metadata !1, null, metadata !"foo", metadata !"foo", metadata !"_ZN1A3fooE4SVal", i32 22, metadata !36, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*, %class.SVal*)* @_ZN1A3fooE4SVal, null, metadata !41, metadata !2, i32 22} ; [ DW_TAG_subprogram ] [line 22] [def] [foo]
-!36 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !37, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!36 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !37, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !37 = metadata !{null, metadata !38, metadata !9}
-!38 = metadata !{i32 786447, i32 0, i32 0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !39} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A]
-!39 = metadata !{i32 786434, metadata !1, null, metadata !"A", i32 20, i64 8, i64 8, i32 0, i32 0, null, metadata !40, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 20, size 8, align 8, offset 0] [def] [from ]
+!38 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !39} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A]
+!39 = metadata !{i32 786434, metadata !1, null, metadata !"A", i32 20, i64 8, i64 8, i32 0, i32 0, null, metadata !40, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 20, size 8, align 8, offset 0] [def] [from ]
 !40 = metadata !{metadata !41, metadata !43}
 !41 = metadata !{i32 786478, metadata !1, metadata !39, metadata !"foo", metadata !"foo", metadata !"_ZN1A3fooE4SVal", i32 22, metadata !36, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !42, i32 22} ; [ DW_TAG_subprogram ] [line 22] [foo]
 !42 = metadata !{i32 786468}
 !43 = metadata !{i32 786478, metadata !1, metadata !39, metadata !"A", metadata !"A", metadata !"", i32 20, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !46, i32 20} ; [ DW_TAG_subprogram ] [line 20] [A]
-!44 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !45, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!44 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !45, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !45 = metadata !{null, metadata !38}
 !46 = metadata !{i32 786468}
 !47 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
@@ -100,3 +100,4 @@ declare void @_ZN4SValD2Ev(%class.SVal* %this)
 !65 = metadata !{i32 14, i32 0, metadata !33, null}
 !66 = metadata !{i32 786689, metadata !34, metadata !"this", metadata !5, i32 16777230, metadata !64, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 14]
 !67 = metadata !{i32 14, i32 0, metadata !34, null}
+!68 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/rvalue-ref.ll b/test/DebugInfo/X86/rvalue-ref.ll
index d4f69fe..e9ea427 100644
--- a/test/DebugInfo/X86/rvalue-ref.ll
+++ b/test/DebugInfo/X86/rvalue-ref.ll
@@ -21,13 +21,14 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 declare i32 @printf(i8*, ...)
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!17}
 
 !0 = metadata !{i32 786449, metadata !16, i32 4, metadata !"clang version 3.2 (trunk 157054) (llvm/trunk 157060)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !16, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooOi", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*)* @_Z3fooOi, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !16} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9}
 !9 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_rvalue_reference_type ]
 !10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
@@ -37,3 +38,4 @@ declare i32 @printf(i8*, ...)
 !14 = metadata !{i32 786443, metadata !16, metadata !5, i32 5, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
 !15 = metadata !{i32 7, i32 1, metadata !14, null}
 !16 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo/tmp"}
+!17 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll b/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
index a0bed16..72eb62f 100644
--- a/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
+++ b/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
@@ -48,12 +48,13 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0, !10}
+!llvm.module.flags = !{!25}
 !0 = metadata !{i32 786449, metadata !23, i32 12, metadata !"clang version 3.3", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !23, metadata !6, metadata !"test", metadata !"test", metadata !"", i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @test, null, null, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 3] [test]
 !6 = metadata !{i32 786473, metadata !23} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 786449, metadata !24, i32 12, metadata !"clang version 3.3 (trunk 172862)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !11, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
@@ -70,3 +71,4 @@ entry:
 !22 = metadata !{i32 786443, metadata !24, metadata !13, i32 1, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !23 = metadata !{metadata !"simple.c", metadata !"/private/tmp"}
 !24 = metadata !{metadata !"simple2.c", metadata !"/private/tmp"}
+!25 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/stmt-list.ll b/test/DebugInfo/X86/stmt-list.ll
index c3a456e..6f846c1 100644
--- a/test/DebugInfo/X86/stmt-list.ll
+++ b/test/DebugInfo/X86/stmt-list.ll
@@ -11,11 +11,13 @@ entry:
 }
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!7}
 !5 = metadata !{metadata !0}
 
-!0 = metadata !{i32 786478, metadata !6, metadata !1, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !6, metadata !1, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
 !1 = metadata !{i32 786473, metadata !6} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !6, i32 12, metadata !"clang version 3.0 ()", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !5, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !6, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !6, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !6 = metadata !{metadata !"test2.c", metadata !"/home/espindola/llvm"}
+!7 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/stringpool.ll b/test/DebugInfo/X86/stringpool.ll
index d9604de..fccac26 100644
--- a/test/DebugInfo/X86/stringpool.ll
+++ b/test/DebugInfo/X86/stringpool.ll
@@ -4,6 +4,7 @@
 @yyyy = common global i32 0, align 4
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9}
 
 !0 = metadata !{i32 786449, metadata !8, i32 12, metadata !"clang version 3.1 (trunk 143009)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
@@ -13,9 +14,15 @@
 !7 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !8 = metadata !{metadata !"z.c", metadata !"/home/nicholas"}
 
+; Verify that "yyyy" ended up in the stringpool.
+; LINUX: .section .debug_str,"MS",@progbits,1
+; LINUX: yyyy
+; DARWIN: .section __DWARF,__debug_str,regular,debug
+; DARWIN: yyyy
+
 ; Verify that we refer to 'yyyy' with a relocation.
 ; LINUX:      .long   .Linfo_string3          # DW_AT_name
-; LINUX-NEXT: .long   38                      # DW_AT_type
+; LINUX-NEXT: .long   {{[0-9]+}}              # DW_AT_type
 ; LINUX-NEXT:                                 # DW_AT_external
 ; LINUX-NEXT: .byte   1                       # DW_AT_decl_file
 ; LINUX-NEXT: .byte   1                       # DW_AT_decl_line
@@ -24,20 +31,13 @@
 ; LINUX-NEXT: .quad   yyyy
 
 ; Verify that we refer to 'yyyy' without a relocation.
-; DARWIN: Lset5 = Linfo_string3-Linfo_string          ## DW_AT_name
-; DARWIN-NEXT:        .long   Lset5
-; DARWIN-NEXT:        .long   39                      ## DW_AT_type
-; DARWIN-NEXT:        .byte   1                       ## DW_AT_external
+; DARWIN: Lset[[ID:[0-9]+]] = Linfo_string3-Linfo_string ## DW_AT_name
+; DARWIN-NEXT:        .long   Lset[[ID]]
+; DARWIN-NEXT:        .long   {{[0-9]+}}              ## DW_AT_type
+; DARWIN-NEXT:                                        ## DW_AT_external
 ; DARWIN-NEXT:        .byte   1                       ## DW_AT_decl_file
 ; DARWIN-NEXT:        .byte   1                       ## DW_AT_decl_line
 ; DARWIN-NEXT:        .byte   9                       ## DW_AT_location
 ; DARWIN-NEXT:        .byte   3
 ; DARWIN-NEXT:        .quad   _yyyy
-
-; Verify that "yyyy" ended up in the stringpool.
-; LINUX: .section .debug_str,"MS",@progbits,1
-; LINUX-NOT: .section
-; LINUX: yyyy
-; DARWIN: .section __DWARF,__debug_str,regular,debug
-; DARWIN-NOT: .section
-; DARWIN: yyyy
+!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/struct-loc.ll b/test/DebugInfo/X86/struct-loc.ll
index fb990b2..95bdd41 100644
--- a/test/DebugInfo/X86/struct-loc.ll
+++ b/test/DebugInfo/X86/struct-loc.ll
@@ -2,7 +2,7 @@
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; Make sure that structures have a decl file and decl line attached.
-; CHECK: DW_TAG_structure_type [3]
+; CHECK: DW_TAG_structure_type
 ; CHECK: DW_AT_decl_file
 ; CHECK: DW_AT_decl_line
 ; CHECK: DW_TAG_member
@@ -12,14 +12,16 @@
 @f = common global %struct.foo zeroinitializer, align 4
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!12}
 
 !0 = metadata !{i32 786449, metadata !11, i32 12, metadata !"clang version 3.1 (trunk 152837) (llvm/trunk 152845)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"f", metadata !"f", metadata !"", metadata !6, i32 5, metadata !7, i32 0, i32 1, %struct.foo* @f, null} ; [ DW_TAG_variable ]
 !6 = metadata !{i32 786473, metadata !11} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786451, metadata !11, null, metadata !"foo", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!7 = metadata !{i32 786451, metadata !11, null, metadata !"foo", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 1, size 32, align 32, offset 0] [def] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786445, metadata !11, metadata !7, metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
 !10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !11 = metadata !{metadata !"struct_bug.c", metadata !"/Users/echristo/tmp"}
+!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/subrange-type.ll b/test/DebugInfo/X86/subrange-type.ll
index da95893..05b1477 100644
--- a/test/DebugInfo/X86/subrange-type.ll
+++ b/test/DebugInfo/X86/subrange-type.ll
@@ -2,10 +2,10 @@
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; Make sure that the base type from the subrange type has a name.
-; CHECK: 0x0000006b:   DW_TAG_base_type [6]
+; CHECK: DW_TAG_subrange_type
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]     (cu + 0x{{[0-9a-f]+}} => {[[SUBTYPE:0x[0-9a-f]*]]})
+; CHECK: [[SUBTYPE]]: DW_TAG_base_type
 ; CHECK-NEXT: DW_AT_name
-; CHECK: DW_TAG_subrange_type [8]
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]     (cu + 0x006b => {0x0000006b})
 
 define i32 @main() nounwind uwtable {
 entry:
@@ -19,20 +19,22 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!18}
 
 !0 = metadata !{i32 786449, metadata !17, i32 12, metadata !"clang version 3.3 (trunk 171472) (llvm/trunk 171487)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 3] [main]
 !6 = metadata !{i32 786473, metadata !17} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 786688, metadata !11, metadata !"i", metadata !6, i32 4, metadata !12, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 4]
 !11 = metadata !{i32 786443, metadata !6, metadata !5, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/foo.c]
-!12 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 64, i64 32, i32 0, i32 0, metadata !9, metadata !13, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 64, align 32, offset 0] [from int]
+!12 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 64, i64 32, i32 0, i32 0, metadata !9, metadata !13, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 64, align 32, offset 0] [from int]
 !13 = metadata !{metadata !14}
 !14 = metadata !{i32 786465, i64 0, i64 2}        ; [ DW_TAG_subrange_type ] [0, 1]
 !15 = metadata !{i32 4, i32 0, metadata !11, null}
 !16 = metadata !{i32 6, i32 0, metadata !11, null}
 !17 = metadata !{metadata !"foo.c", metadata !"/usr/local/google/home/echristo/tmp"}
+!18 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/subreg.ll b/test/DebugInfo/X86/subreg.ll
index 9aa6e54..162c2d1 100644
--- a/test/DebugInfo/X86/subreg.ll
+++ b/test/DebugInfo/X86/subreg.ll
@@ -17,15 +17,17 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!11}
 !9 = metadata !{metadata !1}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"zzz", metadata !2, i32 16777219, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !10, metadata !2, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !4, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i16 (i16)* @f, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !10, metadata !2, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i16 (i16)* @f, null, null, null, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
 !2 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786449, metadata !10, i32 12, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, metadata !5, metadata !5, metadata !9, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !10, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 786453, metadata !10, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{null}
 !6 = metadata !{i32 786468, null, metadata !3, metadata !"short", i32 0, i64 16, i64 16, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 4, i32 22, metadata !8, null}
 !8 = metadata !{i32 786443, metadata !10, metadata !1, i32 3, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
 !10 = metadata !{metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/tmpfs/build"}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/template.ll b/test/DebugInfo/X86/template.ll
index f80dd5c..64a8f7a 100644
--- a/test/DebugInfo/X86/template.ll
+++ b/test/DebugInfo/X86/template.ll
@@ -27,7 +27,7 @@
 ; even as data1. DWARF strongly urges implementations to prefer 
 ; _sdata/_udata rather than dataN
 
-; CHECK-NEXT: DW_AT_const_value [DW_FORM_data4]{{.*}}(0x00000003)
+; CHECK-NEXT: DW_AT_const_value [DW_FORM_sdata]{{.*}}(3)
 
 ; CHECK: DW_TAG_template_value_parameter
 ; CHECK-NEXT: DW_AT_type{{.*}}=> {[[INTPTR:0x[0-9a-f]*]]}
@@ -47,11 +47,11 @@
 ; CHECK-NOT: NULL
 ; CHECK: DW_TAG_template_value_parameter
 ; CHECK-NEXT: DW_AT_type{{.*}}=> {[[INT]]}
-; CHECK-NEXT: DW_AT_const_value  [DW_FORM_data4]{{.*}}(0x00000001)
+; CHECK-NEXT: DW_AT_const_value  [DW_FORM_sdata]{{.*}}(1)
 ; CHECK-NOT: NULL
 ; CHECK: DW_TAG_template_value_parameter
 ; CHECK-NEXT: DW_AT_type{{.*}}=> {[[INT]]}
-; CHECK-NEXT: DW_AT_const_value  [DW_FORM_data4]{{.*}}(0x00000002)
+; CHECK-NEXT: DW_AT_const_value  [DW_FORM_sdata]{{.*}}(2)
 
 ; CHECK: [[INTPTR]]:{{ *}}DW_TAG_pointer_type
 ; CHECK-NEXT: DW_AT_type{{.*}} => {[[INT]]}
@@ -64,65 +64,63 @@
 
 define internal void @__cxx_global_var_init() section ".text.startup" {
 entry:
-  %call = call i32 @_Z4funcILi3EXadL_Z4glblEE6y_implJLi1ELi2EEEiv(), !dbg !37
-  store i32 %call, i32* @glbl, align 4, !dbg !37
-  ret void, !dbg !37
+  %call = call i32 @_Z4funcILi3EXadL_Z4glblEE6y_implJLi1ELi2EEEiv(), !dbg !33
+  store i32 %call, i32* @glbl, align 4, !dbg !33
+  ret void, !dbg !33
 }
 
 ; Function Attrs: nounwind uwtable
 define linkonce_odr i32 @_Z4funcILi3EXadL_Z4glblEE6y_implJLi1ELi2EEEiv() #0 {
 entry:
-  ret i32 3, !dbg !38
+  ret i32 3, !dbg !34
 }
 
 define internal void @_GLOBAL__I_a() section ".text.startup" {
 entry:
-  call void @__cxx_global_var_init(), !dbg !39
-  ret void, !dbg !39
+  call void @__cxx_global_var_init(), !dbg !35
+  ret void, !dbg !35
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!36}
+!llvm.module.flags = !{!31, !36}
+!llvm.ident = !{!32}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !23, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/templ.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"templ.cpp", metadata !"/tmp"}
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (trunk 192849) (llvm/trunk 192850)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !9, metadata !28, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/bar.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"bar.cpp", metadata !"/usr/local/google/home/echristo/tmp"}
 !2 = metadata !{i32 0}
-!3 = metadata !{metadata !4, metadata !8, metadata !21}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"__cxx_global_var_init", metadata !"__cxx_global_var_init", metadata !"", i32 3, metadata !6, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @__cxx_global_var_init, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [local] [def] [__cxx_global_var_init]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/templ.cpp]
-!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null}
-!8 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func<3, &glbl, y_impl, 1, 2>", metadata !"func<3, &glbl, y_impl, 1, 2>", metadata !"_Z4funcILi3EXadL_Z4glblEE6y_implJLi1ELi2EEEiv", i32 1, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z4funcILi3EXadL_Z4glblEE6y_implJLi1ELi2EEEiv, metadata !12, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [func<3, &glbl, y_impl, 1, 2>]
-!9 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!10 = metadata !{metadata !11}
-!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!12 = metadata !{metadata !13, metadata !14, metadata !16, metadata !17}
-!13 = metadata !{i32 786480, null, metadata !"x", metadata !11, i32 3, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
-!14 = metadata !{i32 786480, null, metadata !"", metadata !15, i32* @glbl, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
-!15 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
-!16 = metadata !{i32 803078, null, metadata !"y", null, metadata !"y_impl", null, i32 0, i32 0} ; [ DW_TAG_GNU_template_template_param ]
-!17 = metadata !{i32 803079, null, metadata !"z", null, metadata !18, null, i32 0, i32 0} ; [ DW_TAG_GNU_template_parameter_pack ]
-!18 = metadata !{metadata !19, metadata !20}
-!19 = metadata !{i32 786480, null, metadata !"", metadata !11, i32 1, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
-!20 = metadata !{i32 786480, null, metadata !"", metadata !11, i32 2, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
-!21 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"_GLOBAL__I_a", metadata !"_GLOBAL__I_a", metadata !"", i32 1, metadata !22, i1 true, i1 true, i32 0, i32 0, null, i32 64, i1 false, void ()* @_GLOBAL__I_a, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [local] [def] [_GLOBAL__I_a]
-!22 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!3 = metadata !{metadata !4, metadata !8}
+!4 = metadata !{i32 786451, metadata !1, null, metadata !"y_impl<int>", i32 2, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, metadata !5, metadata !"_ZTS6y_implIiE"} ; [ DW_TAG_structure_type ] [y_impl<int>] [line 2, size 8, align 8, offset 0] [def] [from ]
+!5 = metadata !{metadata !6}
+!6 = metadata !{i32 786479, null, metadata !"", metadata !7, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
+!7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = metadata !{i32 786451, metadata !1, metadata !"_ZTS6y_implIiE", metadata !"nested", i32 2, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, metadata !"_ZTSN6y_implIiE6nestedE"} ; [ DW_TAG_structure_type ] [nested] [line 2, size 8, align 8, offset 0] [def] [from ]
+!9 = metadata !{metadata !10, metadata !14, metadata !26}
+!10 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"__cxx_global_var_init", metadata !"__cxx_global_var_init", metadata !"", i32 3, metadata !12, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @__cxx_global_var_init, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [local] [def] [__cxx_global_var_init]
+!11 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/bar.cpp]
+!12 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = metadata !{null}
+!14 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"func<3, &glbl, y_impl, 1, 2>", metadata !"func<3, &glbl, y_impl, 1, 2>", metadata !"_Z4funcILi3EXadL_Z4glblEE6y_implJLi1ELi2EEEiv", i32 1, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z4funcILi3EXadL_Z4glblEE6y_implJLi1ELi2EEEiv, metadata !17, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [func<3, &glbl, y_impl, 1, 2>]
+!15 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{metadata !7}
+!17 = metadata !{metadata !18, metadata !19, metadata !21, metadata !22}
+!18 = metadata !{i32 786480, null, metadata !"x", metadata !7, i32 3, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
+!19 = metadata !{i32 786480, null, metadata !"", metadata !20, i32* @glbl, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
+!20 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!21 = metadata !{i32 803078, null, metadata !"y", null, metadata !"y_impl", null, i32 0, i32 0} ; [ DW_TAG_GNU_template_template_param ]
+!22 = metadata !{i32 803079, null, metadata !"z", null, metadata !23, null, i32 0, i32 0} ; [ DW_TAG_GNU_template_parameter_pack ]
 !23 = metadata !{metadata !24, metadata !25}
-!24 = metadata !{i32 786484, i32 0, null, metadata !"glbl", metadata !"glbl", metadata !"", metadata !5, i32 3, metadata !11, i32 0, i32 1, i32* @glbl, null} ; [ DW_TAG_variable ] [glbl] [line 3] [def]
-!25 = metadata !{i32 786484, i32 0, null, metadata !"n", metadata !"n", metadata !"", metadata !5, i32 4, metadata !26, i32 0, i32 1, %"struct.y_impl<int>::nested"* @n, null} ; [ DW_TAG_variable ] [n] [line 4] [def]
-!26 = metadata !{i32 786451, metadata !1, metadata !27, metadata !"nested", i32 2, i64 8, i64 8, i32 0, i32 0, null, metadata !30, i32 0, null, null} ; [ DW_TAG_structure_type ] [nested] [line 2, size 8, align 8, offset 0] [def] [from ]
-!27 = metadata !{i32 786451, metadata !1, null, metadata !"y_impl<int>", i32 2, i64 8, i64 8, i32 0, i32 0, null, null, i32 0, null, metadata !28} ; [ DW_TAG_structure_type ] [y_impl<int>] [line 2, size 8, align 8, offset 0] [def] [from ]
-!28 = metadata !{metadata !29}
-!29 = metadata !{i32 786479, null, metadata !"", metadata !11, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
-!30 = metadata !{metadata !31}
-!31 = metadata !{i32 786478, metadata !1, metadata !26, metadata !"nested", metadata !"nested", metadata !"", i32 2, metadata !32, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !35, i32 2} ; [ DW_TAG_subprogram ] [line 2] [nested]
-!32 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !33, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!33 = metadata !{null, metadata !34}
-!34 = metadata !{i32 786447, i32 0, i32 0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !26} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from nested]
-!35 = metadata !{i32 786468}
-!36 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
-!37 = metadata !{i32 3, i32 0, metadata !4, null}
-!38 = metadata !{i32 1, i32 0, metadata !8, null}
-!39 = metadata !{i32 1, i32 0, metadata !21, null}
+!24 = metadata !{i32 786480, null, metadata !"", metadata !7, i32 1, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
+!25 = metadata !{i32 786480, null, metadata !"", metadata !7, i32 2, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
+!26 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"", metadata !"", metadata !"_GLOBAL__I_a", i32 1, metadata !27, i1 true, i1 true, i32 0, i32 0, null, i32 64, i1 false, void ()* @_GLOBAL__I_a, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [local] [def]
+!27 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!28 = metadata !{metadata !29, metadata !30}
+!29 = metadata !{i32 786484, i32 0, null, metadata !"glbl", metadata !"glbl", metadata !"", metadata !11, i32 3, metadata !7, i32 0, i32 1, i32* @glbl, null} ; [ DW_TAG_variable ] [glbl] [line 3] [def]
+!30 = metadata !{i32 786484, i32 0, null, metadata !"n", metadata !"n", metadata !"", metadata !11, i32 4, metadata !8, i32 0, i32 1, %"struct.y_impl<int>::nested"* @n, null} ; [ DW_TAG_variable ] [n] [line 4] [def]
+!31 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!32 = metadata !{metadata !"clang version 3.4 (trunk 192849) (llvm/trunk 192850)"}
+!33 = metadata !{i32 3, i32 0, metadata !10, null}
+!34 = metadata !{i32 1, i32 0, metadata !14, null}
+!35 = metadata !{i32 1, i32 0, metadata !26, null}
+!36 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/tls-fission.ll b/test/DebugInfo/X86/tls-fission.ll
index b95ff40..8a25ace 100644
--- a/test/DebugInfo/X86/tls-fission.ll
+++ b/test/DebugInfo/X86/tls-fission.ll
@@ -19,7 +19,7 @@
 @tls = thread_local global i32 0, align 4
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!7}
+!llvm.module.flags = !{!7, !8}
 
 !0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, metadata !2, metadata !"tls.dwo"} ; [ DW_TAG_compile_unit ] [/tmp/tls.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"tls.cpp", metadata !"/tmp"}
@@ -29,3 +29,4 @@
 !5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/tls.cpp]
 !6 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !7 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/tls.ll b/test/DebugInfo/X86/tls.ll
index e2a9af9..745c2f4 100644
--- a/test/DebugInfo/X86/tls.ll
+++ b/test/DebugInfo/X86/tls.ll
@@ -22,7 +22,7 @@
 @tls = thread_local global i32 7, align 4
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!7}
+!llvm.module.flags = !{!7, !8}
 
 !0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/tls.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"tls.cpp", metadata !"/tmp"}
@@ -32,3 +32,4 @@
 !5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/tls.cpp]
 !6 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !7 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/union-template.ll b/test/DebugInfo/X86/union-template.ll
index 8d23cae..c70ae07 100644
--- a/test/DebugInfo/X86/union-template.ll
+++ b/test/DebugInfo/X86/union-template.ll
@@ -27,6 +27,7 @@ attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!28}
 
 !0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.3 (trunk 178499) (llvm/trunk 178472)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !9,  metadata !9, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.cc] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"foo.cc", metadata !"/usr/local/google/home/echristo/tmp"}
@@ -34,20 +35,20 @@ attributes #1 = { nounwind readnone }
 !3 = metadata !{metadata !4}
 !4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"g", metadata !"g", metadata !"_ZN7PR156371gEf", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (float)* @_ZN7PR156371gEf, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [g]
 !5 = metadata !{i32 786489, metadata !1, null, metadata !"PR15637", i32 1} ; [ DW_TAG_namespace ] [PR15637] [line 1]
-!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{null, metadata !8}
 !8 = metadata !{i32 786468, null, null, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
 !9 = metadata !{metadata !10}
 !10 = metadata !{i32 786484, i32 0, metadata !5, metadata !"f", metadata !"f", metadata !"_ZN7PR156371fE", metadata !11, i32 6, metadata !12, i32 0, i32 1, %"union.PR15637::Value"* @_ZN7PR156371fE, null} ; [ DW_TAG_variable ] [f] [line 6] [def]
 !11 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/foo.cc]
-!12 = metadata !{i32 786455, metadata !1, metadata !5, metadata !"Value<float>", i32 2, i64 32, i64 32, i64 0, i32 0, null, metadata !13, i32 0, null, metadata !21} ; [ DW_TAG_union_type ] [Value<float>] [line 2, size 32, align 32, offset 0] [from ]
+!12 = metadata !{i32 786455, metadata !1, metadata !5, metadata !"Value<float>", i32 2, i64 32, i64 32, i64 0, i32 0, null, metadata !13, i32 0, null, metadata !21, null} ; [ DW_TAG_union_type ] [Value<float>] [line 2, size 32, align 32, offset 0] [def] [from ]
 !13 = metadata !{metadata !14, metadata !16}
 !14 = metadata !{i32 786445, metadata !1, metadata !12, metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !15} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
 !15 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !16 = metadata !{i32 786478, metadata !1, metadata !12, metadata !"Value", metadata !"Value", metadata !"", i32 2, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !20, i32 2} ; [ DW_TAG_subprogram ] [line 2] [Value]
-!17 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!17 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !18 = metadata !{null, metadata !19}
-!19 = metadata !{i32 786447, i32 0, i32 0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !12} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from Value<float>]
+!19 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !12} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from Value<float>]
 !20 = metadata !{i32 786468}
 !21 = metadata !{metadata !22}
 !22 = metadata !{i32 786479, null, metadata !"T", metadata !8, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
@@ -56,3 +57,4 @@ attributes #1 = { nounwind readnone }
 !25 = metadata !{i32 786688, metadata !4, metadata !"tempValue", metadata !11, i32 4, metadata !12, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [tempValue] [line 4]
 !26 = metadata !{i32 4, i32 0, metadata !4, null}
 !27 = metadata !{i32 5, i32 0, metadata !4, null}
+!28 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/vector.ll b/test/DebugInfo/X86/vector.ll
index 0f33032..6e14ed6 100644
--- a/test/DebugInfo/X86/vector.ll
+++ b/test/DebugInfo/X86/vector.ll
@@ -10,6 +10,7 @@
 @a = common global <4 x i32> zeroinitializer, align 16
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13}
 
 !0 = metadata !{i32 786449, metadata !12, i32 12, metadata !"clang version 3.3 (trunk 171825) (llvm/trunk 171822)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/echristo/foo.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
@@ -17,7 +18,7 @@
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 3, metadata !7, i32 0, i32 1, <4 x i32>* @a, null} ; [ DW_TAG_variable ] [a] [line 3] [def]
 !6 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786454, metadata !12, null, metadata !"v4si", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ] [v4si] [line 1, size 0, align 0, offset 0] [from ]
-!8 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 128, i64 128, i32 0, i32 2048, metadata !9, metadata !10, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [vector] [from int]
+!8 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 128, i64 128, i32 0, i32 2048, metadata !9, metadata !10, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [vector] [from int]
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{metadata !11}
 !11 = metadata !{i32 786465, i64 0, i64 4}        ; [ DW_TAG_subrange_type ] [0, 3]
@@ -26,3 +27,4 @@
 ; Check that we get an array type with a vector attribute.
 ; CHECK: DW_TAG_array_type
 ; CHECK-NEXT: DW_AT_GNU_vector
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/vla.ll b/test/DebugInfo/X86/vla.ll
index 81faec7..512b223 100644
--- a/test/DebugInfo/X86/vla.ll
+++ b/test/DebugInfo/X86/vla.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -O0 -mtriple=x86_64-apple-darwin -filetype=asm %s -o - | FileCheck %s
-; Ensure that we generate a breg+0 location for the variable length array a.
-; CHECK: ##DEBUG_VALUE: vla:a <- [RDX+0]
+; Ensure that we generate an indirect location for the variable length array a.
+; CHECK: ##DEBUG_VALUE: vla:a <- RDX
+; CHECK: DW_OP_breg1
 ; rdar://problem/13658587
 ;
 ; generated from:
@@ -72,6 +73,7 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!29}
 
 !0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/vla.c] [DW_LANG_C99]
 !1 = metadata !{metadata !"vla.c", metadata !""}
@@ -79,11 +81,11 @@ entry:
 !3 = metadata !{metadata !4, metadata !9}
 !4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"vla", metadata !"vla", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @vla, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [vla]
 !5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/vla.c]
-!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{metadata !8, metadata !8}
 !8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !9 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 7, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !2, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
-!10 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !11 = metadata !{metadata !8, metadata !8, metadata !12}
 !12 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !13} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
 !13 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
@@ -91,8 +93,8 @@ entry:
 !15 = metadata !{i32 786689, metadata !4, metadata !"n", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [n] [line 1]
 !16 = metadata !{i32 1, i32 0, metadata !4, null}
 !17 = metadata !{i32 2, i32 0, metadata !4, null}
-!18 = metadata !{i32 786688, metadata !4, metadata !"a", metadata !5, i32 2, metadata !19, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 2]
-!19 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !8, metadata !20, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
+!18 = metadata !{i32 786688, metadata !4, metadata !"a", metadata !5, i32 2, metadata !19, i32 8192, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 2]
+!19 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !8, metadata !20, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
 !20 = metadata !{metadata !21}
 !21 = metadata !{i32 786465, i64 0, i64 -1}       ; [ DW_TAG_subrange_type ] [unbounded]
 !22 = metadata !{i32 3, i32 0, metadata !4, null}
@@ -102,3 +104,4 @@ entry:
 !26 = metadata !{i32 7, i32 0, metadata !9, null}
 !27 = metadata !{i32 786689, metadata !9, metadata !"argv", metadata !5, i32 33554439, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 7]
 !28 = metadata !{i32 8, i32 0, metadata !9, null}
+!29 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/array.ll b/test/DebugInfo/array.ll
index f6556fc..e5e07ff 100644
--- a/test/DebugInfo/array.ll
+++ b/test/DebugInfo/array.ll
@@ -13,16 +13,17 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!16}
 
-!0 = metadata !{i32 786478, metadata !14, metadata !1, metadata !"main", metadata !"main", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @main, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !14, metadata !1, metadata !"main", metadata !"main", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, null, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [main]
 !1 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 786449, metadata !14, i32 12, metadata !"clang version 3.0 (trunk 129138)", i1 false, metadata !"", i32 0, metadata !15, metadata !15, metadata !13, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !14, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !14, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 786688, metadata !7, metadata !"a", metadata !1, i32 4, metadata !8, i32 0, null} ; [ DW_TAG_auto_variable ]
 !7 = metadata !{i32 786443, metadata !14, metadata !0, i32 3, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
-!8 = metadata !{i32 786433, metadata !14, metadata !2, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !5, metadata !9, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!8 = metadata !{i32 786433, metadata !14, metadata !2, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !5, metadata !9, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
 !9 = metadata !{metadata !10}
 ;CHECK: DW_TAG_subrange_type
 ;CHECK-NEXT: DW_AT_type
@@ -35,3 +36,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !13 = metadata !{metadata !0}
 !14 = metadata !{metadata !"array.c", metadata !"/private/tmp"}
 !15 = metadata !{i32 0}
+!16 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/bug_null_debuginfo.ll b/test/DebugInfo/bug_null_debuginfo.ll
index 06436f9..458fb58 100644
--- a/test/DebugInfo/bug_null_debuginfo.ll
+++ b/test/DebugInfo/bug_null_debuginfo.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
 
 !0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"", i1 false, metadata !"", i32 0, null, null, null,  null, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !"t", metadata !""}
+!2 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/debuginfofinder-multiple-cu.ll b/test/DebugInfo/debuginfofinder-multiple-cu.ll
new file mode 100644
index 0000000..74965df
--- /dev/null
+++ b/test/DebugInfo/debuginfofinder-multiple-cu.ll
@@ -0,0 +1,41 @@
+; RUN: opt -analyze -module-debuginfo < %s | FileCheck %s
+
+; Produced from linking:
+;   /tmp/test1.c containing f()
+;   /tmp/test2.c containing g()
+
+; Verify that both compile units and both their contained functions are
+; listed by DebugInfoFinder:
+;CHECK: Compile Unit: [ DW_TAG_compile_unit ] [/tmp/test1.c] [DW_LANG_C99]
+;CHECK: Compile Unit: [ DW_TAG_compile_unit ] [/tmp/test2.c] [DW_LANG_C99]
+;CHECK: Subprogram: [ DW_TAG_subprogram ] [line 1] [def] [f]
+;CHECK: Subprogram: [ DW_TAG_subprogram ] [line 1] [def] [g]
+
+define void @f() {
+  ret void, !dbg !14
+}
+
+define void @g() {
+  ret void, !dbg !15
+}
+
+!llvm.dbg.cu = !{!0, !8}
+!llvm.module.flags = !{!13, !16}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (192092)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/test1.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"test1.c", metadata !"/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @f, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/test1.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null}
+!8 = metadata !{i32 786449, metadata !9, i32 12, metadata !"clang version 3.4 (192092)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !10, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/test2.c] [DW_LANG_C99]
+!9 = metadata !{metadata !"test2.c", metadata !"/tmp"}
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 786478, metadata !9, metadata !12, metadata !"g", metadata !"g", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @g, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [g]
+!12 = metadata !{i32 786473, metadata !9}         ; [ DW_TAG_file_type ] [/tmp/test2.c]
+!13 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!14 = metadata !{i32 1, i32 0, metadata !4, null}
+!15 = metadata !{i32 1, i32 0, metadata !11, null}
+!16 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/dwarf-public-names.ll b/test/DebugInfo/dwarf-public-names.ll
index 0733c1b..fc33631 100644
--- a/test/DebugInfo/dwarf-public-names.ll
+++ b/test/DebugInfo/dwarf-public-names.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: object-emission
 
-; RUN: llc -generate-dwarf-pubnames -filetype=obj -o %t.o < %s
+; RUN: llc -generate-dwarf-pub-sections=Enable -filetype=obj -o %t.o < %s
 ; RUN: llvm-dwarfdump -debug-dump=pubnames %t.o | FileCheck %s
 ; ModuleID = 'dwarf-public-names.cpp'
 ;
@@ -37,6 +37,7 @@
 
 ; Skip the output to the header of the pubnames section.
 ; CHECK: debug_pubnames
+; CHECK: version = 0x0002
 
 ; Check for each name in the output.
 ; CHECK: global_namespace_variable
@@ -85,30 +86,31 @@ attributes #0 = { nounwind uwtable }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!38}
 
 !0 = metadata !{i32 786449, metadata !37, i32 4, metadata !"clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !24,  metadata !24, metadata !""} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !2 = metadata !{metadata !3, metadata !18, metadata !19, metadata !20}
 !3 = metadata !{i32 786478, metadata !4, null, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 9, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1C15member_functionEv, null, metadata !12, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [member_function]
 !4 = metadata !{i32 786473, metadata !37} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{null, metadata !7}
-!7 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from C]
-!8 = metadata !{i32 786451, metadata !37, null, metadata !"C", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !9, i32 0, null, null} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [from ]
+!7 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from C]
+!8 = metadata !{i32 786451, metadata !37, null, metadata !"C", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [def] [from ]
 !9 = metadata !{metadata !10, metadata !12, metadata !14}
 !10 = metadata !{i32 786445, metadata !37, metadata !8, metadata !"static_member_variable", i32 4, i64 0, i64 0, i64 0, i32 4096, metadata !11, null} ; [ DW_TAG_member ] [static_member_variable] [line 4, size 0, align 0, offset 0] [static] [from int]
 !11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !12 = metadata !{i32 786478, metadata !4, metadata !8, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 2, metadata !5, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !13, i32 2} ; [ DW_TAG_subprogram ] [line 2] [member_function]
 !13 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
 !14 = metadata !{i32 786478, metadata !4, metadata !8, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 3, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17, i32 3} ; [ DW_TAG_subprogram ] [line 3] [static_member_function]
-!15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !16 = metadata !{metadata !11}
 !17 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
 !18 = metadata !{i32 786478, metadata !4, null, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 13, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_ZN1C22static_member_functionEv, null, metadata !14, metadata !1, i32 13} ; [ DW_TAG_subprogram ] [line 13] [def] [static_member_function]
 !19 = metadata !{i32 786478, metadata !4, metadata !4, metadata !"global_function", metadata !"global_function", metadata !"_Z15global_functionv", i32 19, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z15global_functionv, null, null, metadata !1, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [global_function]
 !20 = metadata !{i32 786478, metadata !4, metadata !21, metadata !"global_namespace_function", metadata !"global_namespace_function", metadata !"_ZN2ns25global_namespace_functionEv", i32 24, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN2ns25global_namespace_functionEv, null, null, metadata !1, i32 24} ; [ DW_TAG_subprogram ] [line 24] [def] [global_namespace_function]
 !21 = metadata !{i32 786489, null, metadata !"ns", metadata !4, i32 23} ; [ DW_TAG_namespace ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp]
-!22 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!22 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !23 = metadata !{null}
 !24 = metadata !{metadata !25, metadata !26, metadata !27}
 !25 = metadata !{i32 786484, i32 0, metadata !8, metadata !"static_member_variable", metadata !"static_member_variable", metadata !"_ZN1C22static_member_variableE", metadata !4, i32 7, metadata !11, i32 0, i32 1, i32* @_ZN1C22static_member_variableE, metadata !10} ; [ DW_TAG_variable ] [static_member_variable] [line 7] [def]
@@ -124,3 +126,4 @@ attributes #1 = { nounwind readnone }
 !35 = metadata !{i32 25, i32 0, metadata !20, null}
 !36 = metadata !{i32 26, i32 0, metadata !20, null}
 !37 = metadata !{metadata !"dwarf-public-names.cpp", metadata !"/usr2/kparzysz/s.hex/t"}
+!38 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/dwarfdump-pubnames.test b/test/DebugInfo/dwarfdump-pubnames.test
index e1b16c2..215953a 100644
--- a/test/DebugInfo/dwarfdump-pubnames.test
+++ b/test/DebugInfo/dwarfdump-pubnames.test
@@ -2,15 +2,13 @@ RUN: llvm-dwarfdump %p/Inputs/dwarfdump-pubnames.elf-x86-64 \
 RUN:   -debug-dump=pubnames | FileCheck %s
 
 CHECK: .debug_pubnames contents:
-CHECK: Length:                161
-CHECK: Version:               2
-CHECK: Offset in .debug_info: 0
-CHECK: Size:                  321
+CHECK: length = 0x000000a1 version = 0x0002 unit_offset = 0x00000000 unit_size = 0x00000141
+
+CHECK: Offset        Name
+CHECK: 0x00000098    "global_namespace_variable"
+CHECK: 0x000000a7    "global_namespace_function"
+CHECK: 0x000000ec    "static_member_function"
+CHECK: 0x0000007c    "global_variable"
+CHECK: 0x00000103    "global_function"
+CHECK: 0x000000c2    "member_function"
 
-CHECK:  Offset    Name
-CHECK:      98    global_namespace_variable
-CHECK:      a7    global_namespace_function
-CHECK:      ec    static_member_function
-CHECK:      7c    global_variable
-CHECK:     103    global_function
-CHECK:      c2    member_function
diff --git a/test/DebugInfo/dwarfdump-type-units.test b/test/DebugInfo/dwarfdump-type-units.test
new file mode 100644
index 0000000..5fca81d
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-type-units.test
@@ -0,0 +1,32 @@
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-type-units.elf-x86-64 > %t
+RUN: cat %t | FileCheck -check-prefix=FOO %s
+RUN: cat %t | FileCheck -check-prefix=BAR %s
+RUN: llvm-dwarfdump -debug-dump=types %p/Inputs/dwarfdump-type-units.elf-x86-64 | FileCheck -check-prefix=TYPES %s
+
+FOO: debug_info contents:
+FOO: DW_TAG_variable
+FOO-NEXT: DW_AT_name {{.*}}"f"
+FOO: DW_AT_type [DW_FORM_ref_sig8] ([[FOO_SIG:0x[0-9a-f]*]])
+
+FOO: debug_types contents:
+FOO: 0x00000000: Type Unit: {{.*}} type_signature = [[FOO_SIG]] type_offset = 0x[[FOO_OFF:[0-9a-f]*]] (next unit at
+FOO: DW_TAG_type_unit
+FOO-NOT: NULL
+FOO: 0x0000[[FOO_OFF]]: DW_TAG_structure_type
+FOO-NEXT: DW_AT_name {{.*}}"foo"
+
+BAR: debug_info contents:
+BAR: DW_TAG_variable
+BAR: DW_TAG_variable
+BAR-NEXT: DW_AT_name {{.*}}"b"
+BAR: DW_AT_type [DW_FORM_ref_sig8] ([[BAR_SIG:0x[0-9a-f]*]])
+
+BAR: debug_types contents:
+BAR: 0x00000000: Type Unit: {{.*}} type_signature = [[BAR_SIG]] type_offset = 0x[[BAR_OFF:[0-9a-f]*]] (next unit at
+BAR: DW_TAG_type_unit
+BAR-NOT: NULL
+BAR: 0x0000[[BAR_OFF]]: DW_TAG_structure_type
+BAR-NEXT: DW_AT_name {{.*}}"bar"
+
+TYPES-NOT: debug_info contents:
+TYPES: debug_types contents:
diff --git a/test/DebugInfo/enum.ll b/test/DebugInfo/enum.ll
index 59a303e..bc09846 100644
--- a/test/DebugInfo/enum.ll
+++ b/test/DebugInfo/enum.ll
@@ -47,28 +47,28 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata) #1
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!19}
+!llvm.module.flags = !{!19, !24}
 
 !0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !11, metadata !12, metadata !17, metadata !11, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/enum.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"enum.cpp", metadata !"/tmp"}
 !2 = metadata !{metadata !3, metadata !8}
-!3 = metadata !{i32 786436, metadata !1, null, metadata !"e1", i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !4, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] [e1] [line 1, size 64, align 64, offset 0] [def] [from ]
+!3 = metadata !{i32 786436, metadata !1, null, metadata !"e1", i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [e1] [line 1, size 64, align 64, offset 0] [def] [from ]
 !4 = metadata !{metadata !5, metadata !6, metadata !7}
 !5 = metadata !{i32 786472, metadata !"I", i64 0} ; [ DW_TAG_enumerator ] [I :: 0]
 !6 = metadata !{i32 786472, metadata !"J", i64 4294967295} ; [ DW_TAG_enumerator ] [J :: 4294967295]
 !7 = metadata !{i32 786472, metadata !"K", i64 -1152921504606846976} ; [ DW_TAG_enumerator ] [K :: 17293822569102704640]
-!8 = metadata !{i32 786436, metadata !1, null, metadata !"e2", i32 2, i64 32, i64 32, i32 0, i32 0, null, metadata !9, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] [e2] [line 2, size 32, align 32, offset 0] [def] [from ]
+!8 = metadata !{i32 786436, metadata !1, null, metadata !"e2", i32 2, i64 32, i64 32, i32 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [e2] [line 2, size 32, align 32, offset 0] [def] [from ]
 !9 = metadata !{metadata !10}
 !10 = metadata !{i32 786472, metadata !"X", i64 0} ; [ DW_TAG_enumerator ] [X :: 0]
 !11 = metadata !{i32 0}
 !12 = metadata !{metadata !13}
 !13 = metadata !{i32 786478, metadata !1, metadata !14, metadata !"func", metadata !"func", metadata !"_Z4funcv", i32 3, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z4funcv, null, null, metadata !11, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [func]
 !14 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/tmp/enum.cpp]
-!15 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !16 = metadata !{null}
 !17 = metadata !{metadata !18}
 !18 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !14, i32 1, metadata !3, i32 0, i32 1, i64* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
@@ -77,3 +77,4 @@ attributes #1 = { nounwind readnone }
 !21 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !22 = metadata !{i32 4, i32 0, metadata !13, null}
 !23 = metadata !{i32 5, i32 0, metadata !13, null}
+!24 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/global.ll b/test/DebugInfo/global.ll
index b438305..9a0c32a 100644
--- a/test/DebugInfo/global.ll
+++ b/test/DebugInfo/global.ll
@@ -18,10 +18,10 @@ entry:
   ret i32 0, !dbg !12
 }
 
-attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!11}
+!llvm.module.flags = !{!11, !13}
 
 !0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !9, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/global.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"global.cpp", metadata !"/tmp"}
@@ -29,10 +29,11 @@ attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-fra
 !3 = metadata !{metadata !4}
 !4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 2, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [main]
 !5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/global.cpp]
-!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{metadata !8}
 !8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !9 = metadata !{metadata !10}
 !10 = metadata !{i32 786484, i32 0, null, metadata !"i", metadata !"i", metadata !"_ZL1i", metadata !5, i32 1, metadata !8, i32 1, i32 1, null, null}
 !11 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
 !12 = metadata !{i32 4, i32 0, metadata !4, null}
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/inheritance.ll b/test/DebugInfo/inheritance.ll
index b0665b2..6b3ae09 100644
--- a/test/DebugInfo/inheritance.ll
+++ b/test/DebugInfo/inheritance.ll
@@ -110,29 +110,29 @@ declare void @_ZdlPv(i8*) nounwind
 !2 = metadata !{i32 458763, metadata !44, metadata !3, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !3 = metadata !{i32 458798, i32 0, metadata !4, metadata !"main", metadata !"main", metadata !"main", i32 11, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !4 = metadata !{i32 458769, metadata !44, i32 4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !45, metadata !45, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 458773, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 458773, metadata !4, null, metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{metadata !7}
 !7 = metadata !{i32 458788, null, metadata !4, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 458771, metadata !44, metadata !4, metadata !"test1", i32 1, i64 64, i64 64, i64 0, i32 0, null, metadata !9, i32 0, metadata !8} ; [ DW_TAG_structure_type ]
+!8 = metadata !{i32 458771, metadata !44, metadata !4, metadata !"test1", i32 1, i64 64, i64 64, i64 0, i32 0, null, metadata !9, i32 0, metadata !8, null, null} ; [ DW_TAG_structure_type ] [test1] [line 1, size 64, align 64, offset 0] [def] [from ]
 !9 = metadata !{metadata !10, metadata !14, metadata !18}
 !10 = metadata !{i32 458765, metadata !44, metadata !8, metadata !"_vptr$test1", i32 1, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_member ]
-!11 = metadata !{i32 458767, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 458767, metadata !4, null, metadata !4, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
 !12 = metadata !{i32 458767, null, metadata !4, metadata !"__vtbl_ptr_type", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
 !13 = metadata !{i32 458769, metadata !46, i32 4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !45, metadata !45, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !14 = metadata !{i32 458798, i32 0, metadata !8, metadata !"test1", metadata !"test1", metadata !"", i32 1, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i1 true, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!15 = metadata !{i32 458773, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!15 = metadata !{i32 458773, metadata !4, null, metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !16 = metadata !{null, metadata !17}
-!17 = metadata !{i32 458767, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ]
+!17 = metadata !{i32 458767, metadata !4, null, metadata !4, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ]
 !18 = metadata !{i32 458798, i32 0, metadata !8, metadata !"~test1", metadata !"~test1", metadata !"", i32 4, metadata !19, i1 false, i1 false, i32 1, i32 0, metadata !8, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!19 = metadata !{i32 458773, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !20, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!19 = metadata !{i32 458773, metadata !4, null, metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !20, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !20 = metadata !{null, metadata !17, metadata !7}
 !21 = metadata !{i32 11, i32 0, metadata !1, null}
 !22 = metadata !{i32 13, i32 0, metadata !1, null}
 !23 = metadata !{i32 14, i32 0, metadata !1, null}
 !24 = metadata !{i32 459009, metadata !25, metadata !"this", metadata !4, i32 13, metadata !26} ; [ DW_TAG_arg_variable ]
 !25 = metadata !{i32 458798, i32 0, metadata !4, metadata !"test1", metadata !"test1", metadata !"_ZN5test1C1Ev", i32 1, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!26 = metadata !{i32 458790, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !27} ; [ DW_TAG_const_type ]
-!27 = metadata !{i32 458767, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!26 = metadata !{i32 458790, metadata !4, null, metadata !4, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !27} ; [ DW_TAG_const_type ]
+!27 = metadata !{i32 458767, metadata !4, null, metadata !4, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
 !28 = metadata !{i32 1, i32 0, metadata !25, null}
 !29 = metadata !{i32 1, i32 0, metadata !30, null}
 !30 = metadata !{i32 458763, metadata !44, metadata !31, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/inline-debug-info-multiret.ll b/test/DebugInfo/inline-debug-info-multiret.ll
index 108f212..594512f 100644
--- a/test/DebugInfo/inline-debug-info-multiret.ll
+++ b/test/DebugInfo/inline-debug-info-multiret.ll
@@ -120,6 +120,7 @@ attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!31}
 
 !0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [<unknown>] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"<unknown>", metadata !""}
@@ -128,11 +129,11 @@ attributes #2 = { nounwind }
 !4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"test", metadata !"test", metadata !"_Z4testi", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z4testi, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [test]
 !5 = metadata !{metadata !"test.cpp", metadata !""}
 !6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [test.cpp]
-!7 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"test2", metadata !"test2", metadata !"_Z5test2v", i32 11, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z5test2v, null, null, metadata !2, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [test2]
-!11 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{metadata !9}
 !13 = metadata !{i32 786689, metadata !4, metadata !"k", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [k] [line 4]
 !14 = metadata !{i32 4, i32 0, metadata !4, null}
@@ -152,3 +153,4 @@ attributes #2 = { nounwind }
 !28 = metadata !{i32 18, i32 0, metadata !27, null}
 !29 = metadata !{i32 19, i32 0, metadata !10, null}
 !30 = metadata !{i32 20, i32 0, metadata !10, null}
+!31 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/inline-debug-info.ll b/test/DebugInfo/inline-debug-info.ll
index 7c3267a..b56ca95 100644
--- a/test/DebugInfo/inline-debug-info.ll
+++ b/test/DebugInfo/inline-debug-info.ll
@@ -138,6 +138,7 @@ attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!31}
 
 !0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [<unknown>] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"<unknown>", metadata !""}
@@ -146,11 +147,11 @@ attributes #2 = { nounwind }
 !4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"test", metadata !"test", metadata !"_Z4testi", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z4testi, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [test]
 !5 = metadata !{metadata !"test.cpp", metadata !""}
 !6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [test.cpp]
-!7 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"test2", metadata !"test2", metadata !"_Z5test2v", i32 11, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z5test2v, null, null, metadata !2, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [test2]
-!11 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{metadata !9}
 !13 = metadata !{i32 786689, metadata !4, metadata !"k", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [k] [line 4]
 !14 = metadata !{i32 4, i32 0, metadata !4, null}
@@ -170,3 +171,4 @@ attributes #2 = { nounwind }
 !28 = metadata !{i32 18, i32 0, metadata !27, null}
 !29 = metadata !{i32 19, i32 0, metadata !10, null}
 !30 = metadata !{i32 20, i32 0, metadata !10, null}
+!31 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/inlined-arguments.ll b/test/DebugInfo/inlined-arguments.ll
index 50a9068..1dd5b2c 100644
--- a/test/DebugInfo/inlined-arguments.ll
+++ b/test/DebugInfo/inlined-arguments.ll
@@ -41,11 +41,12 @@ declare void @_Z2f3i(i32) #1
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata) #2
 
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!26}
 
 !0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/exp.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"exp.cpp", metadata !"/usr/local/google/home/blaikie/dev/scratch"}
@@ -53,10 +54,10 @@ attributes #2 = { nounwind readnone }
 !3 = metadata !{metadata !4, metadata !8}
 !4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f2", metadata !"f2", metadata !"_Z2f2v", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @_Z2f2v, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f2]
 !5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/exp.cpp]
-!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{null}
 !8 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f1", metadata !"f1", metadata !"_Z2f1ii", i32 6, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32, i32)* @_Z2f1ii, null, null, metadata !12, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [f1]
-!9 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!9 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !10 = metadata !{null, metadata !11, metadata !11}
 !11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !12 = metadata !{metadata !13, metadata !14}
@@ -73,3 +74,4 @@ attributes #2 = { nounwind readnone }
 !23 = metadata !{i32 6, i32 0, metadata !8, null}
 !24 = metadata !{i32 7, i32 0, metadata !8, null}
 !25 = metadata !{i32 8, i32 0, metadata !8, null} ; [ DW_TAG_imported_declaration ]
+!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/inlined-vars.ll b/test/DebugInfo/inlined-vars.ll
index cd98e1d..34c5101 100644
--- a/test/DebugInfo/inlined-vars.ll
+++ b/test/DebugInfo/inlined-vars.ll
@@ -16,17 +16,18 @@ declare void @smth(i32)
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!27}
 
 !0 = metadata !{i32 786449, metadata !26, i32 4, metadata !"clang version 3.2 (trunk 159419)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !10}
 !5 = metadata !{i32 786478, metadata !26, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 10, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !1, i32 10} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 786478, metadata !26, metadata !6, metadata !"f", metadata !"f", metadata !"_ZL1fi", i32 3, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !13, i32 3} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{metadata !9, metadata !9}
 !13 = metadata !{metadata !14}
 !14 = metadata !{metadata !15, metadata !16}
@@ -52,3 +53,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !24 = metadata !{i32 5, i32 3, metadata !10, metadata !19}
 !25 = metadata !{i32 6, i32 3, metadata !10, metadata !19}
 !26 = metadata !{metadata !"inline-bug.cc", metadata !"/tmp/dbginfo/pr13202"}
+!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/lit.local.cfg b/test/DebugInfo/lit.local.cfg
deleted file mode 100644
index 00bd9b8..0000000
--- a/test/DebugInfo/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.test']
diff --git a/test/DebugInfo/llvm-symbolizer.test b/test/DebugInfo/llvm-symbolizer.test
index 4dc3699..a8799cf 100644
--- a/test/DebugInfo/llvm-symbolizer.test
+++ b/test/DebugInfo/llvm-symbolizer.test
@@ -1,7 +1,9 @@
 RUN: echo "%p/Inputs/dwarfdump-test.elf-x86-64 0x400559" > %t.input
+RUN: echo "%p/Inputs/dwarfdump-test.elf-x86-64.debuglink 0x400559" >> %t.input
 RUN: echo "%p/Inputs/dwarfdump-test.elf-x86-64 0x400436" >> %t.input
 RUN: echo "%p/Inputs/dwarfdump-test4.elf-x86-64 0x62c" >> %t.input
 RUN: echo "%p/Inputs/dwarfdump-inl-test.elf-x86-64 0x710" >> %t.input
+RUN: echo "%p/Inputs/dwarfdump-inl-test.high_pc.elf-x86-64 0x568" >> %t.input
 RUN: echo "\"%p/Inputs/dwarfdump-test3.elf-x86-64 space\" 0x633" >> %t.input
 RUN: echo "%p/Inputs/macho-universal 0x1f84" >> %t.input
 RUN: echo "%p/Inputs/macho-universal:i386 0x1f67" >> %t.input
@@ -13,6 +15,9 @@ RUN:    --default-arch=i386 < %t.input | FileCheck %s
 CHECK:       main
 CHECK-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:16
 
+CHECK:       main
+CHECK-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:16
+
 CHECK:      _start
 
 CHECK:      _Z1cv
@@ -27,6 +32,15 @@ CHECK-NEXT: dwarfdump-inl-test.cc:3
 CHECK-NEXT: main
 CHECK-NEXT: dwarfdump-inl-test.cc:
 
+CHECK:      inlined_h
+CHECK-NEXT: dwarfdump-inl-test.h:3
+CHECK-NEXT: inlined_g
+CHECK-NEXT: dwarfdump-inl-test.h:7
+CHECK-NEXT: inlined_f
+CHECK-NEXT: dwarfdump-inl-test.cc:3
+CHECK-NEXT: main
+CHECK-NEXT: dwarfdump-inl-test.cc:
+
 CHECK:       _Z3do1v
 CHECK-NEXT: dwarfdump-test3-decl.h:7
 
diff --git a/test/DebugInfo/member-order.ll b/test/DebugInfo/member-order.ll
new file mode 100644
index 0000000..a0c283d
--- /dev/null
+++ b/test/DebugInfo/member-order.ll
@@ -0,0 +1,66 @@
+; REQUIRES: object-emission
+
+; RUN: llc -filetype=obj -O0 < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; generated by clang from:
+; struct foo {
+;   void f1();
+;   void f2();
+; };
+;
+; void foo::f1() {
+; }
+
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name {{.*}} "foo"
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: NULL
+; CHECK: DW_AT_name {{.*}} "f1"
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: NULL
+; CHECK: DW_AT_name {{.*}} "f2"
+
+
+%struct.foo = type { i8 }
+
+; Function Attrs: nounwind uwtable
+define void @_ZN3foo2f1Ev(%struct.foo* %this) #0 align 2 {
+entry:
+  %this.addr = alloca %struct.foo*, align 8
+  store %struct.foo* %this, %struct.foo** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.foo** %this.addr}, metadata !16), !dbg !18
+  %this1 = load %struct.foo** %this.addr
+  ret void, !dbg !19
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!15, !20}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !13, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/member-order.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"member-order.cpp", metadata !"/tmp/dbginfo"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !1, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !5, i32 0, null, null, metadata !"_ZTS3foo"} ; [ DW_TAG_structure_type ] [foo] [line 1, size 8, align 8, offset 0] [def] [from ]
+!5 = metadata !{metadata !6, metadata !11}
+!6 = metadata !{i32 786478, metadata !1, metadata !4, metadata !"f1", metadata !"f1", metadata !"_ZN3foo2f1Ev", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !10, i32 2} ; [ DW_TAG_subprogram ] [line 2] [f1]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{null, metadata !9}
+!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS3foo]
+!10 = metadata !{i32 786468}
+!11 = metadata !{i32 786478, metadata !1, metadata !4, metadata !"f2", metadata !"f2", metadata !"_ZN3foo2f2Ev", i32 3, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !12, i32 3} ; [ DW_TAG_subprogram ] [line 3] [f2]
+!12 = metadata !{i32 786468}
+!13 = metadata !{metadata !14}
+!14 = metadata !{i32 786478, metadata !1, null, metadata !"f1", metadata !"f1", metadata !"_ZN3foo2f1Ev", i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.foo*)* @_ZN3foo2f1Ev, null, metadata !6, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [f1]
+!15 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!16 = metadata !{i32 786689, metadata !14, metadata !"this", null, i32 16777216, metadata !17, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!17 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS3foo]
+!18 = metadata !{i32 0, i32 0, metadata !14, null}
+!19 = metadata !{i32 7, i32 0, metadata !14, null}
+!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/member-pointers.ll b/test/DebugInfo/member-pointers.ll
index 7d999f1..0bc4ee6 100644
--- a/test/DebugInfo/member-pointers.ll
+++ b/test/DebugInfo/member-pointers.ll
@@ -4,12 +4,12 @@
 ; RUN: llc -filetype=obj -O0 < %s > %t
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 ; CHECK: DW_TAG_ptr_to_member_type
-; CHECK: [[TYPE:.*]]:   DW_TAG_subroutine_type
+; CHECK: DW_TAG_ptr_to_member_type
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]       (cu + {{.*}} => {[[TYPE:0x[0-9a-f]+]]})
+; CHECK: [[TYPE]]:   DW_TAG_subroutine_type
 ; CHECK: DW_TAG_formal_parameter
 ; CHECK-NEXT: DW_AT_type
 ; CHECK-NEXT: DW_AT_artificial [DW_FORM_flag
-; CHECK: DW_TAG_ptr_to_member_type
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]       (cu + {{.*}} => {[[TYPE]]})
 ; IR generated from clang -g with the following source:
 ; struct S {
 ; };
@@ -21,6 +21,7 @@
 @y = global { i64, i64 } zeroinitializer, align 8
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!16}
 
 !0 = metadata !{i32 786449, metadata !15, i32 4, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/home/blaikie/Development/scratch/simple.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
@@ -29,10 +30,11 @@
 !6 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786463, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !8, metadata !9} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from int]
 !8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786451, metadata !15, null, metadata !"S", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !1, i32 0, null, null} ; [ DW_TAG_structure_type ] [S] [line 1, size 8, align 8, offset 0] [from ]
+!9 = metadata !{i32 786451, metadata !15, null, metadata !"S", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !1, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [S] [line 1, size 8, align 8, offset 0] [def] [from ]
 !10 = metadata !{i32 786484, i32 0, null, metadata !"y", metadata !"y", metadata !"", metadata !6, i32 5, metadata !11, i32 0, i32 1, { i64, i64 }* @y, null} ; [ DW_TAG_variable ] [y] [line 5] [def]
 !11 = metadata !{i32 786463, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !12, metadata !9} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !13 = metadata !{null, metadata !14, metadata !8}
-!14 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from S]
+!14 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from S]
 !15 = metadata !{metadata !"simple.cpp", metadata !"/home/blaikie/Development/scratch"}
+!16 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/namespace.ll b/test/DebugInfo/namespace.ll
index 81b8a87..9d0b25c 100644
--- a/test/DebugInfo/namespace.ll
+++ b/test/DebugInfo/namespace.ll
@@ -191,10 +191,11 @@ return:                                           ; preds = %if.end, %if.then
   ret i32 %5, !dbg !51
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!52}
 
 !0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !19, metadata !21, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/llvm/build/clang/debug//usr/local/google/home/blaikie/dev/llvm/src/tools/clang/test/CodeGenCXX/debug-info-namespace.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"/usr/local/google/home/blaikie/dev/llvm/src/tools/clang/test/CodeGenCXX/debug-info-namespace.cpp", metadata !"/usr/local/google/home/blaikie/dev/llvm/build/clang/debug"}
@@ -204,15 +205,15 @@ attributes #1 = { nounwind readnone }
 !5 = metadata !{metadata !"foo.cpp", metadata !"/usr/local/google/home/blaikie/dev/llvm/build/clang/debug"}
 !6 = metadata !{i32 786489, metadata !5, metadata !7, metadata !"B", i32 1} ; [ DW_TAG_namespace ] [B] [line 1]
 !7 = metadata !{i32 786489, metadata !1, null, metadata !"A", i32 3} ; [ DW_TAG_namespace ] [A] [line 3]
-!8 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !9, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !9 = metadata !{null}
 !10 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"f1", metadata !"f1", metadata !"_ZN1A1B2f1Ei", i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @_ZN1A1B2f1Ei, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [f1]
-!11 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{null, metadata !13}
 !13 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !14 = metadata !{i32 786478, metadata !5, metadata !15, metadata !"func", metadata !"func", metadata !"_Z4funcb", i32 13, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i1)* @_Z4funcb, null, null, metadata !2, i32 13} ; [ DW_TAG_subprogram ] [line 13] [def] [func]
 !15 = metadata !{i32 786473, metadata !5}         ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/llvm/build/clang/debug/foo.cpp]
-!16 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !17 = metadata !{metadata !13, metadata !18}
 !18 = metadata !{i32 786468, null, null, metadata !"bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean]
 !19 = metadata !{metadata !20}
@@ -224,14 +225,14 @@ attributes #1 = { nounwind readnone }
 !25 = metadata !{i32 786443, metadata !5, metadata !14, i32 14, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/blaikie/dev/llvm/build/clang/debug/foo.cpp]
 !26 = metadata !{i32 786490, metadata !14, metadata !7, i32 18} ; [ DW_TAG_imported_module ]
 !27 = metadata !{i32 786440, metadata !14, metadata !28, i32 19} ; [ DW_TAG_imported_declaration ]
-!28 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"foo", i32 5, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [foo] [line 5, size 0, align 0, offset 0] [fwd] [from ]
+!28 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"foo", i32 5, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 5, size 0, align 0, offset 0] [decl] [from ]
 !29 = metadata !{i32 786440, metadata !14, metadata !30, i32 20} ; [ DW_TAG_imported_declaration ]
-!30 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"bar", i32 6, i64 8, i64 8, i32 0, i32 0, null, metadata !31, i32 0, null, null} ; [ DW_TAG_structure_type ] [bar] [line 6, size 8, align 8, offset 0] [from ]
+!30 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"bar", i32 6, i64 8, i64 8, i32 0, i32 0, null, metadata !31, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [bar] [line 6, size 8, align 8, offset 0] [def] [from ]
 !31 = metadata !{metadata !32}
 !32 = metadata !{i32 786478, metadata !5, metadata !30, metadata !"bar", metadata !"bar", metadata !"", i32 6, metadata !33, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !36, i32 6} ; [ DW_TAG_subprogram ] [line 6] [bar]
-!33 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !34, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!33 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !34, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !34 = metadata !{null, metadata !35}
-!35 = metadata !{i32 786447, i32 0, i32 0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !30} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from bar]
+!35 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !30} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from bar]
 !36 = metadata !{i32 786468}
 !37 = metadata !{i32 786440, metadata !14, metadata !10, i32 21} ; [ DW_TAG_imported_declaration ]
 !38 = metadata !{i32 786440, metadata !14, metadata !20, i32 22} ; [ DW_TAG_imported_declaration ]
@@ -248,3 +249,4 @@ attributes #1 = { nounwind readnone }
 !49 = metadata !{i32 23, i32 0, metadata !14, null}
 !50 = metadata !{i32 26, i32 0, metadata !14, null}
 !51 = metadata !{i32 27, i32 0, metadata !14, null}
+!52 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/template-recursive-void.ll b/test/DebugInfo/template-recursive-void.ll
new file mode 100644
index 0000000..2ed57a6
--- /dev/null
+++ b/test/DebugInfo/template-recursive-void.ll
@@ -0,0 +1,65 @@
+; REQUIRES: object-emission
+
+; RUN: llc -O0 -filetype=obj < %s > %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; This was pulled from clang's debug-info-template-recursive.cpp test.
+; class base { };
+
+; template <class T> class foo : public base  {
+;   void operator=(const foo r) { }
+; };
+
+; class bar : public foo<void> { };
+; bar filters;
+
+; CHECK: DW_TAG_template_type_parameter [{{.*}}]
+; CHECK-NEXT: DW_AT_name{{.*}}"T"
+; CHECK-NOT: DW_AT_type
+; CHECK: NULL
+
+%class.bar = type { i8 }
+
+@filters = global %class.bar zeroinitializer, align 1
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!36, !37}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (trunk 187958) (llvm/trunk 187964)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/debug-info-template-recursive.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"debug-info-template-recursive.cpp", metadata !"/usr/local/google/home/echristo/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786484, i32 0, null, metadata !"filters", metadata !"filters", metadata !"", metadata !5, i32 10, metadata !6, i32 0, i32 1, %class.bar* @filters, null} ; [ DW_TAG_variable ] [filters] [line 10] [def]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/debug-info-template-recursive.cpp]
+!6 = metadata !{i32 786434, metadata !1, null, metadata !"bar", i32 9, i64 8, i64 8, i32 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_class_type ] [bar] [line 9, size 8, align 8, offset 0] [def] [from ]
+!7 = metadata !{metadata !8, metadata !31}
+!8 = metadata !{i32 786460, null, metadata !6, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from foo<void>]
+!9 = metadata !{i32 786434, metadata !1, null, metadata !"foo<void>", i32 5, i64 8, i64 8, i32 0, i32 0, null, metadata !10, i32 0, null, metadata !29, null} ; [ DW_TAG_class_type ] [foo<void>] [line 5, size 8, align 8, offset 0] [def] [from ]
+!10 = metadata !{metadata !11, metadata !19, metadata !25}
+!11 = metadata !{i32 786460, null, metadata !9, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from base]
+!12 = metadata !{i32 786434, metadata !1, null, metadata !"base", i32 3, i64 8, i64 8, i32 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_class_type ] [base] [line 3, size 8, align 8, offset 0] [def] [from ]
+!13 = metadata !{metadata !14}
+!14 = metadata !{i32 786478, metadata !1, metadata !12, metadata !"base", metadata !"base", metadata !"", i32 3, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ] [line 3] [base]
+!15 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{null, metadata !17}
+!17 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !12} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from base]
+!18 = metadata !{i32 786468}
+!19 = metadata !{i32 786478, metadata !1, metadata !9, metadata !"operator=", metadata !"operator=", metadata !"_ZN3fooIvEaSES0_", i32 6, metadata !20, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !24, i32 6} ; [ DW_TAG_subprogram ] [line 6] [private] [operator=]
+!20 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !21, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!21 = metadata !{null, metadata !22, metadata !23}
+!22 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from foo<void>]
+!23 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from foo<void>]
+!24 = metadata !{i32 786468}
+!25 = metadata !{i32 786478, metadata !1, metadata !9, metadata !"foo", metadata !"foo", metadata !"", i32 5, metadata !26, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !28, i32 5} ; [ DW_TAG_subprogram ] [line 5] [foo]
+!26 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !27, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!27 = metadata !{null, metadata !22}
+!28 = metadata !{i32 786468}
+!29 = metadata !{metadata !30}
+!30 = metadata !{i32 786479, null, metadata !"T", null, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
+!31 = metadata !{i32 786478, metadata !1, metadata !6, metadata !"bar", metadata !"bar", metadata !"", i32 9, metadata !32, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !35, i32 9} ; [ DW_TAG_subprogram ] [line 9] [bar]
+!32 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !33, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!33 = metadata !{null, metadata !34}
+!34 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !6} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from bar]
+!35 = metadata !{i32 786468}
+!36 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!37 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/tu-composite.ll b/test/DebugInfo/tu-composite.ll
new file mode 100644
index 0000000..f838eca
--- /dev/null
+++ b/test/DebugInfo/tu-composite.ll
@@ -0,0 +1,185 @@
+; REQUIRES: object-emission
+
+; RUN: llc -filetype=obj -O0 < %s > %t
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; CHECK: [[TYPE:.*]]: DW_TAG_structure_type
+; Make sure we correctly handle containing type of a struct being a type identifier.
+; CHECK-NEXT: DW_AT_containing_type [DW_FORM_ref4]       (cu + {{.*}} => {[[TYPE]]})
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp] {{.*}}= "C")
+
+; Make sure we correctly handle context of a subprogram being a type identifier.
+; CHECK: [[SP:.*]]: DW_TAG_subprogram
+; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "foo")
+; Make sure we correctly handle containing type of a subprogram being a type identifier.
+; CHECK: DW_AT_containing_type [DW_FORM_ref4]       (cu + {{.*}} => {[[TYPE]]})
+; CHECK: DW_TAG_formal_parameter
+; CHECK: NULL
+; CHECK: NULL
+
+; CHECK: [[TYPE2:.*]]: DW_TAG_structure_type
+; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "bar")
+; CHECK: DW_TAG_structure_type
+; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "D")
+; CHECK: DW_TAG_member
+; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "a") 
+; Make sure we correctly handle context of a struct being a type identifier.
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp] {{.*}}= "Nested")
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp] {{.*}}= "Nested2")
+; CHECK-NEXT: DW_AT_declaration [DW_FORM_flag]      (0x01)
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp] {{.*}}= "virt<bar>")
+; Make sure we correctly handle type of a template_type being a type identifier.
+; CHECK: DW_TAG_template_type_parameter
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE2]]})
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp] {{.*}}= "T")
+; Make sure we correctly handle derived-from of a typedef being a type identifier.
+; CHECK: DW_TAG_typedef
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE2]]})
+; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "baz2")
+; Make sure we correctly handle derived-from of a pointer type being a type identifier.
+; CHECK: DW_TAG_pointer_type
+; CHECK: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE]]})
+; CHECK: DW_TAG_typedef
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE2]]})
+; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "baz")
+; Make sure we correctly handle derived-from of an array type being a type identifier.
+; CHECK: DW_TAG_array_type
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE2]]})
+; IR generated from clang -g with the following source:
+; struct C {
+;   virtual void foo();
+; };
+; void C::foo() {
+; }
+;
+; struct bar { };
+; typedef bar baz;
+; struct D {
+;   typedef bar baz2;
+;   static int a;
+;   struct Nested { };
+;   struct Nested2 { };
+;   template <typename T>
+;   struct virt {
+;     T* values;
+;   };
+; };
+; void test() {
+;   baz B;
+;   bar A[3];
+;   D::baz2 B2;
+;   D::Nested e;
+;   D::Nested2 *p;
+;   D::virt<bar> t;
+; }
+
+%struct.C = type { i32 (...)** }
+%struct.bar = type { i8 }
+%"struct.D::Nested" = type { i8 }
+%"struct.D::Nested2" = type { i8 }
+%"struct.D::virt" = type { %struct.bar* }
+
+@_ZTV1C = unnamed_addr constant [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI1C to i8*), i8* bitcast (void (%struct.C*)* @_ZN1C3fooEv to i8*)]
+@_ZTVN10__cxxabiv117__class_type_infoE = external global i8*
+@_ZTS1C = constant [3 x i8] c"1C\00"
+@_ZTI1C = unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8]* @_ZTS1C, i32 0, i32 0) }
+
+; Function Attrs: nounwind ssp uwtable
+define void @_ZN1C3fooEv(%struct.C* %this) unnamed_addr #0 align 2 {
+entry:
+  %this.addr = alloca %struct.C*, align 8
+  store %struct.C* %this, %struct.C** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr}, metadata !36), !dbg !38
+  %this1 = load %struct.C** %this.addr
+  ret void, !dbg !39
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+; Function Attrs: nounwind ssp uwtable
+define void @_Z4testv() #0 {
+entry:
+  %B = alloca %struct.bar, align 1
+  %A = alloca [3 x %struct.bar], align 1
+  %B2 = alloca %struct.bar, align 1
+  %e = alloca %"struct.D::Nested", align 1
+  %p = alloca %"struct.D::Nested2"*, align 8
+  %t = alloca %"struct.D::virt", align 8
+  call void @llvm.dbg.declare(metadata !{%struct.bar* %B}, metadata !40), !dbg !42
+  call void @llvm.dbg.declare(metadata !{[3 x %struct.bar]* %A}, metadata !43), !dbg !47
+  call void @llvm.dbg.declare(metadata !{%struct.bar* %B2}, metadata !48), !dbg !50
+  call void @llvm.dbg.declare(metadata !{%"struct.D::Nested"* %e}, metadata !51), !dbg !52
+  call void @llvm.dbg.declare(metadata !{%"struct.D::Nested2"** %p}, metadata !53), !dbg !55
+  call void @llvm.dbg.declare(metadata !{%"struct.D::virt"* %t}, metadata !56), !dbg !57
+  ret void, !dbg !58
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!35, !59}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !30, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [tmp.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"tmp.cpp", metadata !"."}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !18, metadata !19, metadata !22, metadata !23, metadata !24}
+!4 = metadata !{i32 786451, metadata !1, null, metadata !"C", i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !5, i32 0, metadata !"_ZTS1C", null, metadata !"_ZTS1C"} ; [ DW_TAG_structure_type ] [C] [line 1, size 64, align 64, offset 0] [def] [from ]
+!5 = metadata !{metadata !6, metadata !13}
+!6 = metadata !{i32 786445, metadata !1, metadata !7, metadata !"_vptr$C", i32 0, i64 64, i64 0, i64 0, i32 64, metadata !8} ; [ DW_TAG_member ] [_vptr$C] [line 0, size 64, align 0, offset 0] [artificial] [from ]
+!7 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [tmp.cpp]
+!8 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __vtbl_ptr_type]
+!9 = metadata !{i32 786447, null, null, metadata !"__vtbl_ptr_type", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [__vtbl_ptr_type] [line 0, size 64, align 0, offset 0] [from ]
+!10 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = metadata !{metadata !12}
+!12 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!13 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1C", metadata !"foo", metadata !"foo", metadata !"_ZN1C3fooEv", i32 2, metadata !14, i1 false, i1 false, i32 1, i32 0, metadata !"_ZTS1C", i32 256, i1 false, null, null, i32 0, metadata !17, i32 2} ; [ DW_TAG_subprogram ] [line 2] [foo]
+!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{null, metadata !16}
+!16 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1C]
+!17 = metadata !{i32 786468}
+!18 = metadata !{i32 786451, metadata !1, null, metadata !"bar", i32 7, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, metadata !"_ZTS3bar"} ; [ DW_TAG_structure_type ] [bar] [line 7, size 8, align 8, offset 0] [def] [from ]
+!19 = metadata !{i32 786451, metadata !1, null, metadata !"D", i32 9, i64 8, i64 8, i32 0, i32 0, null, metadata !20, i32 0, null, null, metadata !"_ZTS1D"} ; [ DW_TAG_structure_type ] [D] [line 9, size 8, align 8, offset 0] [def] [from ]
+!20 = metadata !{metadata !21}
+!21 = metadata !{i32 786445, metadata !1, metadata !"_ZTS1D", metadata !"a", i32 11, i64 0, i64 0, i64 0, i32 4096, metadata !12, null} ; [ DW_TAG_member ] [a] [line 11, size 0, align 0, offset 0] [static] [from int]
+!22 = metadata !{i32 786451, metadata !1, metadata !"_ZTS1D", metadata !"Nested", i32 12, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, metadata !"_ZTSN1D6NestedE"} ; [ DW_TAG_structure_type ] [Nested] [line 12, size 8, align 8, offset 0] [def] [from ]
+!23 = metadata !{i32 786451, metadata !1, metadata !"_ZTS1D", metadata !"Nested2", i32 13, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, metadata !"_ZTSN1D7Nested2E"} ; [ DW_TAG_structure_type ] [Nested2] [line 13, size 0, align 0, offset 0] [decl] [from ]
+!24 = metadata !{i32 786451, metadata !1, metadata !"_ZTS1D", metadata !"virt<bar>", i32 15, i64 64, i64 64, i32 0, i32 0, null, metadata !25, i32 0, null, metadata !28, metadata !"_ZTSN1D4virtI3barEE"} ; [ DW_TAG_structure_type ] [virt<bar>] [line 15, size 64, align 64, offset 0] [def] [from ]
+!25 = metadata !{metadata !26}
+!26 = metadata !{i32 786445, metadata !1, metadata !"_ZTSN1D4virtI3barEE", metadata !"values", i32 16, i64 64, i64 64, i64 0, i32 0, metadata !27} ; [ DW_TAG_member ] [values] [line 16, size 64, align 64, offset 0] [from ]
+!27 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS3bar"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS3bar]
+!28 = metadata !{metadata !29}
+!29 = metadata !{i32 786479, null, metadata !"T", metadata !"_ZTS3bar", null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
+!30 = metadata !{metadata !31, metadata !32}
+!31 = metadata !{i32 786478, metadata !1, null, metadata !"foo", metadata !"foo", metadata !"_ZN1C3fooEv", i32 4, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1C3fooEv, null, metadata !13, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [foo]
+!32 = metadata !{i32 786478, metadata !1, metadata !7, metadata !"test", metadata !"test", metadata !"_Z4testv", i32 20, metadata !33, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z4testv, null, null, metadata !2, i32 20} ; [ DW_TAG_subprogram ] [line 20] [def] [test]
+!33 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !34, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!34 = metadata !{null}
+!35 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!36 = metadata !{i32 786689, metadata !31, metadata !"this", null, i32 16777216, metadata !37, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!37 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1C]
+!38 = metadata !{i32 0, i32 0, metadata !31, null}
+!39 = metadata !{i32 5, i32 0, metadata !31, null}
+!40 = metadata !{i32 786688, metadata !32, metadata !"B", metadata !7, i32 21, metadata !41, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [B] [line 21]
+!41 = metadata !{i32 786454, metadata !1, null, metadata !"baz", i32 8, i64 0, i64 0, i64 0, i32 0, metadata !"_ZTS3bar"} ; [ DW_TAG_typedef ] [baz] [line 8, size 0, align 0, offset 0] [from _ZTS3bar]
+!42 = metadata !{i32 21, i32 0, metadata !32, null}
+!43 = metadata !{i32 786688, metadata !32, metadata !"A", metadata !7, i32 22, metadata !44, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [A] [line 22]
+!44 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 24, i64 8, i32 0, i32 0, metadata !"_ZTS3bar", metadata !45, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 24, align 8, offset 0] [from _ZTS3bar]
+!45 = metadata !{metadata !46}
+!46 = metadata !{i32 786465, i64 0, i64 3}        ; [ DW_TAG_subrange_type ] [0, 2]
+!47 = metadata !{i32 22, i32 0, metadata !32, null}
+!48 = metadata !{i32 786688, metadata !32, metadata !"B2", metadata !7, i32 23, metadata !49, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [B2] [line 23]
+!49 = metadata !{i32 786454, metadata !1, metadata !"_ZTS1D", metadata !"baz2", i32 10, i64 0, i64 0, i64 0, i32 0, metadata !"_ZTS3bar"} ; [ DW_TAG_typedef ] [baz2] [line 10, size 0, align 0, offset 0] [from _ZTS3bar]
+!50 = metadata !{i32 23, i32 0, metadata !32, null}
+!51 = metadata !{i32 786688, metadata !32, metadata !"e", metadata !7, i32 24, metadata !22, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [e] [line 24]
+!52 = metadata !{i32 24, i32 0, metadata !32, null}
+!53 = metadata !{i32 786688, metadata !32, metadata !"p", metadata !7, i32 25, metadata !54, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [p] [line 25]
+!54 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTSN1D7Nested2E"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTSN1D7Nested2E]
+!55 = metadata !{i32 25, i32 0, metadata !32, null}
+!56 = metadata !{i32 786688, metadata !32, metadata !"t", metadata !7, i32 26, metadata !24, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [t] [line 26]
+!57 = metadata !{i32 26, i32 0, metadata !32, null}
+!58 = metadata !{i32 27, i32 0, metadata !32, null}
+!59 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/tu-member-pointer.ll b/test/DebugInfo/tu-member-pointer.ll
new file mode 100644
index 0000000..b746d3b
--- /dev/null
+++ b/test/DebugInfo/tu-member-pointer.ll
@@ -0,0 +1,30 @@
+; REQUIRES: object-emission
+
+; RUN: llc -filetype=obj -O0 < %s > %t
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; CHECK: DW_TAG_ptr_to_member_type
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]       (cu + {{.*}} => {[[TYPE:0x[0-9a-f]+]]})
+; CHECK: [[TYPE]]:   DW_TAG_base_type
+; IR generated from clang -g with the following source:
+; struct Foo {
+;   int e;
+; };
+; int Foo:*x = 0;
+
+@x = global i64 -1, align 8
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !2, metadata !5, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [foo.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"foo.cpp", metadata !"."}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !1, null, metadata !"Foo", i32 1, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, metadata !"_ZTS3Foo"} ; [ DW_TAG_structure_type ] [Foo] [line 1, size 0, align 0, offset 0] [decl] [from ]
+!5 = metadata !{metadata !6}
+!6 = metadata !{i32 786484, i32 0, null, metadata !"x", metadata !"x", metadata !"", metadata !7, i32 4, metadata !8, i32 0, i32 1, i64* @x, null} ; [ DW_TAG_variable ] [x] [line 4] [def]
+!7 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [foo.cpp]
+!8 = metadata !{i32 786463, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9, metadata !"_ZTS3Foo"} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from int]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/two-cus-from-same-file.ll b/test/DebugInfo/two-cus-from-same-file.ll
index 6d8c484..8589840 100644
--- a/test/DebugInfo/two-cus-from-same-file.ll
+++ b/test/DebugInfo/two-cus-from-same-file.ll
@@ -33,18 +33,19 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0, !9}
+!llvm.module.flags = !{!33}
 
 !0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.2 (trunk 156513)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !32, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @foo, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null}
 !9 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.2 (trunk 156513)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !10, metadata !1, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !10 = metadata !{metadata !12}
 !12 = metadata !{i32 786478, metadata !32, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 11, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !19, i32 11} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!13 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !14 = metadata !{metadata !15, metadata !15, metadata !16}
 !15 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !16 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !17} ; [ DW_TAG_pointer_type ]
@@ -70,3 +71,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 ; CHECK: {{DW_TAG_compile_unit}}
 ; CHECK: {{foo\.c}}
 
+!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/version.ll b/test/DebugInfo/version.ll
index b36e38e..f4dde0a 100644
--- a/test/DebugInfo/version.ll
+++ b/test/DebugInfo/version.ll
@@ -13,10 +13,10 @@ entry:
   ret i32 0, !dbg !10
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!9}
+!llvm.module.flags = !{!9, !11}
 
 !0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 185475)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !"CodeGen/dwarf-version.c", metadata !"test"}
@@ -24,8 +24,9 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !3 = metadata !{metadata !4}
 !4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 6, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [main]
 !5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ]
-!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{metadata !8}
 !8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !9 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
 !10 = metadata !{i32 7, i32 0, metadata !4, null}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/ExecutionEngine/MCJIT/Inputs/cross-module-b.ll b/test/ExecutionEngine/MCJIT/Inputs/cross-module-b.ll
new file mode 100644
index 0000000..6870117
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/Inputs/cross-module-b.ll
@@ -0,0 +1,7 @@
+declare i32 @FA()
+
+define i32 @FB() {
+  %r = call i32 @FA( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/Inputs/multi-module-b.ll b/test/ExecutionEngine/MCJIT/Inputs/multi-module-b.ll
new file mode 100644
index 0000000..103b601
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/Inputs/multi-module-b.ll
@@ -0,0 +1,7 @@
+declare i32 @FC()
+
+define i32 @FB() {
+  %r = call i32 @FC( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/Inputs/multi-module-c.ll b/test/ExecutionEngine/MCJIT/Inputs/multi-module-c.ll
new file mode 100644
index 0000000..b39306b
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/Inputs/multi-module-c.ll
@@ -0,0 +1,4 @@
+define i32 @FC() {
+  ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/Inputs/multi-module-eh-b.ll b/test/ExecutionEngine/MCJIT/Inputs/multi-module-eh-b.ll
new file mode 100644
index 0000000..d7dbb03
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/Inputs/multi-module-eh-b.ll
@@ -0,0 +1,30 @@
+declare i8* @__cxa_allocate_exception(i64)
+declare void @__cxa_throw(i8*, i8*, i8*)
+declare i32 @__gxx_personality_v0(...)
+declare void @__cxa_end_catch()
+declare i8* @__cxa_begin_catch(i8*)
+
+@_ZTIi = external constant i8*
+
+define void @throwException_B() {
+  %exception = tail call i8* @__cxa_allocate_exception(i64 4)
+  call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null)
+  unreachable
+}
+
+define i32 @FB() {
+entry:
+  invoke void @throwException_B()
+          to label %try.cont unwind label %lpad
+
+lpad:
+  %p = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %e = extractvalue { i8*, i32 } %p, 0
+  call i8* @__cxa_begin_catch(i8* %e)
+  call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:
+  ret i32 0
+}
diff --git a/test/ExecutionEngine/MCJIT/cross-module-a.ll b/test/ExecutionEngine/MCJIT/cross-module-a.ll
new file mode 100644
index 0000000..fe8d386
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/cross-module-a.ll
@@ -0,0 +1,13 @@
+; RUN: %lli_mcjit -extra-module=%p/Inputs/cross-module-b.ll %s > /dev/null
+
+declare i32 @FB()
+
+define i32 @FA() {
+  ret i32 0
+}
+
+define i32 @main() {
+  %r = call i32 @FB( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/cross-module-sm-pic-a.ll b/test/ExecutionEngine/MCJIT/cross-module-sm-pic-a.ll
new file mode 100644
index 0000000..ee26702
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/cross-module-sm-pic-a.ll
@@ -0,0 +1,14 @@
+; RUN: %lli_mcjit -extra-module=%p/Inputs/cross-module-b.ll -relocation-model=pic -code-model=small %s > /dev/null
+; XFAIL: mips, i686, i386, arm
+
+declare i32 @FB()
+
+define i32 @FA() {
+  ret i32 0
+}
+
+define i32 @main() {
+  %r = call i32 @FB( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/eh-lg-pic.ll b/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
new file mode 100644
index 0000000..7c0227d
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
@@ -0,0 +1,32 @@
+; RUN: %lli_mcjit -relocation-model=pic -code-model=large %s
+; XFAIL: cygwin, win32, mingw, mips, powerpc64, i686, i386, aarch64, arm
+declare i8* @__cxa_allocate_exception(i64)
+declare void @__cxa_throw(i8*, i8*, i8*)
+declare i32 @__gxx_personality_v0(...)
+declare void @__cxa_end_catch()
+declare i8* @__cxa_begin_catch(i8*)
+
+@_ZTIi = external constant i8*
+
+define void @throwException() {
+  %exception = tail call i8* @__cxa_allocate_exception(i64 4)
+  call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null)
+  unreachable
+}
+
+define i32 @main() {
+entry:
+  invoke void @throwException()
+          to label %try.cont unwind label %lpad
+
+lpad:
+  %p = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %e = extractvalue { i8*, i32 } %p, 0
+  call i8* @__cxa_begin_catch(i8* %e)
+  call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:
+  ret i32 0
+}
diff --git a/test/ExecutionEngine/MCJIT/eh-sm-pic.ll b/test/ExecutionEngine/MCJIT/eh-sm-pic.ll
new file mode 100644
index 0000000..00c2bb0
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/eh-sm-pic.ll
@@ -0,0 +1,32 @@
+; RUN: %lli_mcjit -relocation-model=pic -code-model=small %s
+; XFAIL: cygwin, win32, mingw, mips, i686, i386, darwin, aarch64, arm
+declare i8* @__cxa_allocate_exception(i64)
+declare void @__cxa_throw(i8*, i8*, i8*)
+declare i32 @__gxx_personality_v0(...)
+declare void @__cxa_end_catch()
+declare i8* @__cxa_begin_catch(i8*)
+
+@_ZTIi = external constant i8*
+
+define void @throwException() {
+  %exception = tail call i8* @__cxa_allocate_exception(i64 4)
+  call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null)
+  unreachable
+}
+
+define i32 @main() {
+entry:
+  invoke void @throwException()
+          to label %try.cont unwind label %lpad
+
+lpad:
+  %p = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %e = extractvalue { i8*, i32 } %p, 0
+  call i8* @__cxa_begin_catch(i8* %e)
+  call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:
+  ret i32 0
+}
diff --git a/test/ExecutionEngine/MCJIT/hello-sm-pic.ll b/test/ExecutionEngine/MCJIT/hello-sm-pic.ll
new file mode 100644
index 0000000..115846c
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/hello-sm-pic.ll
@@ -0,0 +1,12 @@
+; RUN: %lli_mcjit -relocation-model=pic -code-model=small %s > /dev/null
+; XFAIL: mips, i686, i386, darwin, aarch64, arm
+
+@.LC0 = internal global [12 x i8] c"Hello World\00"		; <[12 x i8]*> [#uses=1]
+
+declare i32 @puts(i8*)
+
+define i32 @main() {
+	%reg210 = call i32 @puts( i8* getelementptr ([12 x i8]* @.LC0, i64 0, i64 0) )		; <i32> [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/lit.local.cfg b/test/ExecutionEngine/MCJIT/lit.local.cfg
index f21468f..fdb36ee 100644
--- a/test/ExecutionEngine/MCJIT/lit.local.cfg
+++ b/test/ExecutionEngine/MCJIT/lit.local.cfg
@@ -1,12 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
+root = config.root
 targets = set(root.targets_to_build.split())
 if ('X86' in targets) | ('AArch64' in targets) | ('ARM' in targets) | \
    ('Mips' in targets) | ('PowerPC' in targets) | ('SystemZ' in targets):
@@ -23,8 +15,12 @@ if root.host_arch not in ['i386', 'x86', 'x86_64',
 if 'armv7' in root.host_arch:
     config.unsupported = False
 
-if 'i386-apple-darwin'  in root.target_triple:
+if 'i386-apple-darwin' in root.target_triple:
     config.unsupported = True
 
 if 'powerpc' in root.target_triple and not 'powerpc64' in root.target_triple:
     config.unsupported = True
+
+# ExecutionEngine tests are not expected to pass in a cross-compilation setup.
+if 'native' not in config.available_features:
+    config.unsupported = True
diff --git a/test/ExecutionEngine/MCJIT/multi-module-a.ll b/test/ExecutionEngine/MCJIT/multi-module-a.ll
new file mode 100644
index 0000000..8848ca6
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/multi-module-a.ll
@@ -0,0 +1,9 @@
+; RUN: %lli_mcjit -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll %s > /dev/null
+
+declare i32 @FB()
+
+define i32 @main() {
+  %r = call i32 @FB( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll b/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll
new file mode 100644
index 0000000..66fafc9
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll
@@ -0,0 +1,35 @@
+; RUN: %lli_mcjit -extra-module=%p/Inputs/multi-module-eh-b.ll %s
+; XFAIL: arm, cygwin, win32, mingw
+declare i8* @__cxa_allocate_exception(i64)
+declare void @__cxa_throw(i8*, i8*, i8*)
+declare i32 @__gxx_personality_v0(...)
+declare void @__cxa_end_catch()
+declare i8* @__cxa_begin_catch(i8*)
+
+@_ZTIi = external constant i8*
+
+declare i32 @FB()
+
+define void @throwException() {
+  %exception = tail call i8* @__cxa_allocate_exception(i64 4)
+  call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null)
+  unreachable
+}
+
+define i32 @main() {
+entry:
+  invoke void @throwException()
+          to label %try.cont unwind label %lpad
+
+lpad:
+  %p = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %e = extractvalue { i8*, i32 } %p, 0
+  call i8* @__cxa_begin_catch(i8* %e)
+  call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:
+  %r = call i32 @FB( )
+  ret i32 %r
+}
diff --git a/test/ExecutionEngine/MCJIT/multi-module-sm-pic-a.ll b/test/ExecutionEngine/MCJIT/multi-module-sm-pic-a.ll
new file mode 100644
index 0000000..f2fa59f
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/multi-module-sm-pic-a.ll
@@ -0,0 +1,10 @@
+; RUN: %lli_mcjit -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -relocation-model=pic -code-model=small %s > /dev/null
+; XFAIL: mips, i686, i386, arm
+
+declare i32 @FB()
+
+define i32 @main() {
+  %r = call i32 @FB( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/remote/Inputs/cross-module-b.ll b/test/ExecutionEngine/MCJIT/remote/Inputs/cross-module-b.ll
new file mode 100644
index 0000000..6870117
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/Inputs/cross-module-b.ll
@@ -0,0 +1,7 @@
+declare i32 @FA()
+
+define i32 @FB() {
+  %r = call i32 @FA( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/remote/Inputs/multi-module-b.ll b/test/ExecutionEngine/MCJIT/remote/Inputs/multi-module-b.ll
new file mode 100644
index 0000000..103b601
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/Inputs/multi-module-b.ll
@@ -0,0 +1,7 @@
+declare i32 @FC()
+
+define i32 @FB() {
+  %r = call i32 @FC( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/remote/Inputs/multi-module-c.ll b/test/ExecutionEngine/MCJIT/remote/Inputs/multi-module-c.ll
new file mode 100644
index 0000000..b39306b
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/Inputs/multi-module-c.ll
@@ -0,0 +1,4 @@
+define i32 @FC() {
+  ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll b/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll
new file mode 100644
index 0000000..094d362
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll
@@ -0,0 +1,13 @@
+; RUN: %lli_mcjit -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target %s > /dev/null
+
+declare i32 @FB()
+
+define i32 @FA() {
+  ret i32 0
+}
+
+define i32 @main() {
+  %r = call i32 @FB( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/remote/cross-module-sm-pic-a.ll b/test/ExecutionEngine/MCJIT/remote/cross-module-sm-pic-a.ll
new file mode 100644
index 0000000..bdaa9a0
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/cross-module-sm-pic-a.ll
@@ -0,0 +1,14 @@
+; RUN: %lli_mcjit -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target -relocation-model=pic -code-model=small %s > /dev/null
+; XFAIL: mips, i686, i386, arm
+
+declare i32 @FB()
+
+define i32 @FA() {
+  ret i32 0
+}
+
+define i32 @main() {
+  %r = call i32 @FB( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/remote/lit.local.cfg b/test/ExecutionEngine/MCJIT/remote/lit.local.cfg
index 39b2a95..6b192ae 100644
--- a/test/ExecutionEngine/MCJIT/remote/lit.local.cfg
+++ b/test/ExecutionEngine/MCJIT/remote/lit.local.cfg
@@ -1,11 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-if 'armv4' in root.target_triple or 'armv5' in root.target_triple:
+if 'armv4' in config.root.target_triple or \
+        'armv5' in config.root.target_triple:
     config.unsupported = True
diff --git a/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll b/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll
new file mode 100644
index 0000000..91d0387
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll
@@ -0,0 +1,9 @@
+; RUN: %lli_mcjit -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target %s > /dev/null
+
+declare i32 @FB()
+
+define i32 @main() {
+  %r = call i32 @FB( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/remote/multi-module-sm-pic-a.ll b/test/ExecutionEngine/MCJIT/remote/multi-module-sm-pic-a.ll
new file mode 100644
index 0000000..73228e4
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/multi-module-sm-pic-a.ll
@@ -0,0 +1,10 @@
+; RUN: %lli_mcjit -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target -relocation-model=pic -code-model=small %s > /dev/null
+; XFAIL: mips, i686, i386, arm
+
+declare i32 @FB()
+
+define i32 @main() {
+  %r = call i32 @FB( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll b/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll
index f1a69d8..d10a411 100644
--- a/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll
+++ b/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll
@@ -1,5 +1,4 @@
-; RUN: %lli_mcjit -remote-mcjit %s > /dev/null
-; XFAIL:  mips
+; RUN: %lli_mcjit -remote-mcjit -mcjit-remote-process=lli-child-target %s > /dev/null
 
 define i32 @bar() {
 	ret i32 0
diff --git a/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll b/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll
index 47a710d..97932bc 100644
--- a/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll
+++ b/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll
@@ -1,5 +1,6 @@
-; RUN: %lli_mcjit -remote-mcjit -disable-lazy-compilation=false %s
-; XFAIL:  mips
+; RUN: %lli_mcjit -remote-mcjit -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target %s
+; XFAIL: *
+; This test should fail until remote symbol resolution is supported.
 
 define i32 @main() nounwind {
 entry:
diff --git a/test/ExecutionEngine/MCJIT/remote/stubs-sm-pic.ll b/test/ExecutionEngine/MCJIT/remote/stubs-sm-pic.ll
new file mode 100644
index 0000000..88faf21
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/stubs-sm-pic.ll
@@ -0,0 +1,37 @@
+; RUN: %lli_mcjit -remote-mcjit -disable-lazy-compilation=false -relocation-model=pic -code-model=small %s
+; XFAIL: *
+; This function should fail until remote symbol resolution is supported.
+
+define i32 @main() nounwind {
+entry:
+	call void @lazily_compiled_address_is_consistent()
+	ret i32 0
+}
+
+; Test PR3043: @test should have the same address before and after
+; it's JIT-compiled.
+@funcPtr = common global i1 ()* null, align 4
+@lcaic_failure = internal constant [46 x i8] c"@lazily_compiled_address_is_consistent failed\00"
+
+define void @lazily_compiled_address_is_consistent() nounwind {
+entry:
+	store i1 ()* @test, i1 ()** @funcPtr
+	%pass = tail call i1 @test()		; <i32> [#uses=1]
+	br i1 %pass, label %pass_block, label %fail_block
+pass_block:
+	ret void
+fail_block:
+	call i32 @puts(i8* getelementptr([46 x i8]* @lcaic_failure, i32 0, i32 0))
+	call void @exit(i32 1)
+	unreachable
+}
+
+define i1 @test() nounwind {
+entry:
+	%tmp = load i1 ()** @funcPtr
+	%eq = icmp eq i1 ()* %tmp, @test
+	ret i1 %eq
+}
+
+declare i32 @puts(i8*) noreturn
+declare void @exit(i32) noreturn
diff --git a/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll
index eb2e686..6328089 100644
--- a/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll
+++ b/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll
@@ -1,5 +1,4 @@
-; RUN: %lli_mcjit -remote-mcjit -O0 -disable-lazy-compilation=false %s
-; XFAIL: mips
+; RUN: %lli_mcjit -remote-mcjit -O0 -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target %s
 
 ; The intention of this test is to verify that symbols mapped to COMMON in ELF
 ; work as expected.
diff --git a/test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll
index 874eeae..6b2b97b 100644
--- a/test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll
+++ b/test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll
@@ -1,5 +1,4 @@
-; RUN:  %lli_mcjit -remote-mcjit -O0 %s
-; XFAIL: mips
+; RUN:  %lli_mcjit -remote-mcjit -O0 -mcjit-remote-process=lli-child-target %s
 
 ; Check that a variable is always aligned as specified.
 
diff --git a/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll
index d7e8c35..a8a93a8 100644
--- a/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll
+++ b/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll
@@ -1,5 +1,4 @@
-; RUN: %lli_mcjit -remote-mcjit %s > /dev/null
-; XFAIL:  mips
+; RUN: %lli_mcjit -remote-mcjit -mcjit-remote-process=lli-child-target %s > /dev/null
 
 define double @test(double* %DP, double %Arg) {
 	%D = load double* %DP		; <double> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll
index 5337c5d..4181fb0 100644
--- a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll
+++ b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll
@@ -1,5 +1,4 @@
-; RUN: %lli_mcjit -remote-mcjit %s > /dev/null
-; XFAIL: mips
+; RUN: %lli_mcjit -remote-mcjit -mcjit-remote-process=lli-child-target %s > /dev/null
 
 @count = global i32 1, align 4
 
diff --git a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll
new file mode 100644
index 0000000..29ab24b
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll
@@ -0,0 +1,35 @@
+; RUN: %lli_mcjit -remote-mcjit -relocation-model=pic -code-model=small %s > /dev/null
+; XFAIL: mips, aarch64, arm, i686, i386
+
+@count = global i32 1, align 4
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 49
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* @count, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* @count, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %2 = load i32* %i, align 4
+  %inc1 = add nsw i32 %2, 1
+  store i32 %inc1, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %3 = load i32* @count, align 4
+  %sub = sub nsw i32 %3, 50
+  ret i32 %sub
+}
diff --git a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll
index bd044b6..8b56297 100644
--- a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll
+++ b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -remote-mcjit -O0 %s
+; RUN: %lli_mcjit -remote-mcjit -O0 -mcjit-remote-process=lli-child-target %s
 
 @.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
 @ptr = global i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), align 4
diff --git a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll
new file mode 100644
index 0000000..bad026f
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll
@@ -0,0 +1,17 @@
+; RUN: %lli_mcjit -remote-mcjit -O0 -relocation-model=pic -code-model=small %s
+; XFAIL: mips, aarch64, arm, i686, i386
+
+@.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
+@ptr = global i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), align 4
+@.str1 = private unnamed_addr constant [6 x i8] c"data2\00", align 1
+@ptr2 = global i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0), align 4
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
+entry:
+  %0 = load i8** @ptr, align 4
+  %1 = load i8** @ptr2, align 4
+  %cmp = icmp eq i8* %0, %1
+  %. = zext i1 %cmp to i32
+  ret i32 %.
+}
+
diff --git a/test/ExecutionEngine/MCJIT/stubs-sm-pic.ll b/test/ExecutionEngine/MCJIT/stubs-sm-pic.ll
new file mode 100644
index 0000000..9e214f5
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/stubs-sm-pic.ll
@@ -0,0 +1,36 @@
+; RUN: %lli_mcjit -disable-lazy-compilation=false -relocation-model=pic -code-model=small %s
+; XFAIL: mips, i686, i386, aarch64, arm
+
+define i32 @main() nounwind {
+entry:
+	call void @lazily_compiled_address_is_consistent()
+	ret i32 0
+}
+
+; Test PR3043: @test should have the same address before and after
+; it's JIT-compiled.
+@funcPtr = common global i1 ()* null, align 4
+@lcaic_failure = internal constant [46 x i8] c"@lazily_compiled_address_is_consistent failed\00"
+
+define void @lazily_compiled_address_is_consistent() nounwind {
+entry:
+	store i1 ()* @test, i1 ()** @funcPtr
+	%pass = tail call i1 @test()		; <i32> [#uses=1]
+	br i1 %pass, label %pass_block, label %fail_block
+pass_block:
+	ret void
+fail_block:
+	call i32 @puts(i8* getelementptr([46 x i8]* @lcaic_failure, i32 0, i32 0))
+	call void @exit(i32 1)
+	unreachable
+}
+
+define i1 @test() nounwind {
+entry:
+	%tmp = load i1 ()** @funcPtr
+	%eq = icmp eq i1 ()* %tmp, @test
+	ret i1 %eq
+}
+
+declare i32 @puts(i8*) noreturn
+declare void @exit(i32) noreturn
diff --git a/test/ExecutionEngine/MCJIT/test-global-init-nonzero-sm-pic.ll b/test/ExecutionEngine/MCJIT/test-global-init-nonzero-sm-pic.ll
new file mode 100644
index 0000000..eb031f2
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-global-init-nonzero-sm-pic.ll
@@ -0,0 +1,35 @@
+; RUN: %lli_mcjit -relocation-model=pic -code-model=small %s > /dev/null
+; XFAIL: mips, aarch64, arm, i686, i386
+
+@count = global i32 1, align 4
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 49
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* @count, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* @count, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %2 = load i32* %i, align 4
+  %inc1 = add nsw i32 %2, 1
+  store i32 %inc1, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %3 = load i32* @count, align 4
+  %sub = sub nsw i32 %3, 50
+  ret i32 %sub
+}
diff --git a/test/ExecutionEngine/MCJIT/test-ptr-reloc-sm-pic.ll b/test/ExecutionEngine/MCJIT/test-ptr-reloc-sm-pic.ll
new file mode 100644
index 0000000..9e06742
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-ptr-reloc-sm-pic.ll
@@ -0,0 +1,17 @@
+; RUN: %lli_mcjit -O0 -relocation-model=pic -code-model=small %s
+; XFAIL: mips, aarch64, arm, i686, i386
+
+@.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
+@ptr = global i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), align 4
+@.str1 = private unnamed_addr constant [6 x i8] c"data2\00", align 1
+@ptr2 = global i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0), align 4
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
+entry:
+  %0 = load i8** @ptr, align 4
+  %1 = load i8** @ptr2, align 4
+  %cmp = icmp eq i8* %0, %1
+  %. = zext i1 %cmp to i32
+  ret i32 %.
+}
+
diff --git a/test/ExecutionEngine/RuntimeDyld/Inputs/arm_secdiff_reloc.o b/test/ExecutionEngine/RuntimeDyld/Inputs/arm_secdiff_reloc.o
new file mode 100644
index 0000000..5392266
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/Inputs/arm_secdiff_reloc.o
diff --git a/test/ExecutionEngine/RuntimeDyld/arm_secdiff_reloc.test b/test/ExecutionEngine/RuntimeDyld/arm_secdiff_reloc.test
new file mode 100644
index 0000000..92e4dd7
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/arm_secdiff_reloc.test
@@ -0,0 +1 @@
+RUN: llvm-rtdyld -printline %p/Inputs/arm_secdiff_reloc.o
diff --git a/test/ExecutionEngine/fma3-jit.ll b/test/ExecutionEngine/fma3-jit.ll
new file mode 100644
index 0000000..25eaa65
--- /dev/null
+++ b/test/ExecutionEngine/fma3-jit.ll
@@ -0,0 +1,18 @@
+; RUN: %lli %s | FileCheck %s
+; REQUIRES: fma3
+; CHECK: 12.000000
+
+@msg_double = internal global [4 x i8] c"%f\0A\00"
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+  %fma = tail call double @llvm.fma.f64(double 3.0, double 3.0, double 3.0) nounwind readnone
+
+  %ptr1 = getelementptr [4 x i8]* @msg_double, i32 0, i32 0
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %fma)
+
+  ret i32 0
+}
+
+declare double @llvm.fma.f64(double, double, double) nounwind readnone
diff --git a/test/ExecutionEngine/lit.local.cfg b/test/ExecutionEngine/lit.local.cfg
index b6945ad..28c56ad 100644
--- a/test/ExecutionEngine/lit.local.cfg
+++ b/test/ExecutionEngine/lit.local.cfg
@@ -1,14 +1,9 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
+if config.root.host_arch in ['PowerPC', 'AArch64', 'SystemZ']:
+    config.unsupported = True
 
-if root.host_arch in ['PowerPC', 'AArch64', 'SystemZ']:
+if 'hexagon' in config.root.target_triple:
     config.unsupported = True
 
-if 'hexagon' in root.target_triple:
+# ExecutionEngine tests are not expected to pass in a cross-compilation setup.
+if 'native' not in config.available_features:
     config.unsupported = True
diff --git a/test/ExecutionEngine/test-interp-vec-insertelement.ll b/test/ExecutionEngine/test-interp-vec-insertelement.ll
new file mode 100644
index 0000000..814b905
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-insertelement.ll
@@ -0,0 +1,41 @@
+ ; RUN: %lli -force-interpreter=true %s > /dev/null
+
+define i32 @main() {
+ %v0 = insertelement <2 x i8> zeroinitializer, i8 1, i32 1
+ %v1 = insertelement <3 x i8> zeroinitializer, i8 2, i32 2
+ %v2 = insertelement <4 x i8> zeroinitializer, i8 3, i32 3
+ %v3 = insertelement <8 x i8> zeroinitializer, i8 4, i32 4
+ %v4 = insertelement <16 x i8> zeroinitializer, i8 5, i32 7
+
+ %v5 = insertelement <2 x i16> zeroinitializer, i16 1, i32 1
+ %v6 = insertelement <3 x i16> zeroinitializer, i16 2, i32 2
+ %v7 = insertelement <4 x i16> zeroinitializer, i16 3, i32 3
+ %v8 = insertelement <8 x i16> zeroinitializer, i16 4, i32 4
+ %v9 = insertelement <16 x i16> zeroinitializer, i16 5, i32 7
+
+ %v10 = insertelement <2 x i32> zeroinitializer, i32 1, i32 1
+ %v11 = insertelement <3 x i32> zeroinitializer, i32 2, i32 2
+ %v12 = insertelement <4 x i32> zeroinitializer, i32 3, i32 3
+ %v13 = insertelement <8 x i32> zeroinitializer, i32 4, i32 4
+ %v14 = insertelement <16 x i32> zeroinitializer, i32 5, i32 7
+
+ %v15 = insertelement <2 x i64> zeroinitializer, i64 1, i32 1
+ %v16 = insertelement <3 x i64> zeroinitializer, i64 2, i32 2
+ %v17 = insertelement <4 x i64> zeroinitializer, i64 3, i32 3
+ %v18 = insertelement <8 x i64> zeroinitializer, i64 4, i32 4
+ %v19 = insertelement <16 x i64> zeroinitializer, i64 5, i32 7
+
+ %v20 = insertelement <2 x float> zeroinitializer, float 1.0, i32 1
+ %v21 = insertelement <3 x float> zeroinitializer, float 2.0, i32 2
+ %v22 = insertelement <4 x float> zeroinitializer, float 3.0, i32 3
+ %v23 = insertelement <8 x float> zeroinitializer, float 4.0, i32 4
+ %v24 = insertelement <16 x float> zeroinitializer, float 5.0, i32 7
+
+ %v25 = insertelement <2 x double> zeroinitializer, double 1.0, i32 1
+ %v26 = insertelement <3 x double> zeroinitializer, double 2.0, i32 2
+ %v27 = insertelement <4 x double> zeroinitializer, double 3.0, i32 3
+ %v28 = insertelement <8 x double> zeroinitializer, double 4.0, i32 4
+ %v29 = insertelement <16 x double> zeroinitializer, double 5.0, i32 7
+
+ ret i32 0
+}
diff --git a/test/ExecutionEngine/test-interp-vec-insertextractvalue.ll b/test/ExecutionEngine/test-interp-vec-insertextractvalue.ll
new file mode 100644
index 0000000..09fbf6a
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-insertextractvalue.ll
@@ -0,0 +1,21 @@
+ ; RUN: %lli -force-interpreter=true %s > /dev/null
+
+define i32 @main() {
+
+    %s1 = insertvalue { i32, { float, double} } undef, i32 9, 0
+    %s2 = insertvalue { i32, { float, double} } %s1, float 3.0, 1, 0
+    %s3 = insertvalue { i32, { float, double} } %s2, double 5.0, 1, 1
+
+    %s4 = extractvalue { i32, { float, double} } %s3, 1
+
+    %a1 = extractvalue { i32, { float, double} } %s3, 0
+
+    %a2 = extractvalue { i32, { float, double} } %s3, 1, 0
+    %a3 = extractvalue { i32, { float, double} } %s3, 1, 1
+    %a4 = extractvalue { float, double} %s4, 0
+    %a5 = extractvalue { float, double} %s4, 1
+
+    %aa = fpext float %a4 to double
+
+ ret i32 0
+}
diff --git a/test/ExecutionEngine/test-interp-vec-select.ll b/test/ExecutionEngine/test-interp-vec-select.ll
new file mode 100644
index 0000000..ce086e4
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-select.ll
@@ -0,0 +1,118 @@
+; RUN: %lli -force-interpreter=true %s > /dev/null
+
+define i32 @main() {
+
+  ; Vector values
+  %a2_i8 = add <2 x i8> zeroinitializer, <i8 0, i8 1>
+  %a3_i8 = add <3 x i8> zeroinitializer, <i8 0, i8 1, i8 2>
+  %a4_i8 = add <4 x i8> zeroinitializer, <i8 0, i8 1, i8 2, i8 3>
+  %a8_i8 = add <8 x i8> zeroinitializer, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
+  %a16_i8 = add <16 x i8> zeroinitializer, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>
+
+  %a2_i16 = add <2 x i16> zeroinitializer, <i16 0, i16 1>
+  %a3_i16 = add <3 x i16> zeroinitializer, <i16 0, i16 1, i16 2>
+  %a4_i16 = add <4 x i16> zeroinitializer, <i16 0, i16 1, i16 2, i16 3>
+  %a8_i16 = add <8 x i16> zeroinitializer, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+  %a16_i16 = add <16 x i16> zeroinitializer, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+
+  %a2_i32 = add <2 x i32> zeroinitializer, <i32 0, i32 1>
+  %a3_i32 = add <3 x i32> zeroinitializer, <i32 0, i32 1, i32 2>
+  %a4_i32 = add <4 x i32> zeroinitializer, <i32 0, i32 1, i32 2, i32 3>
+  %a8_i32 = add <8 x i32> zeroinitializer, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %a16_i32 = add <16 x i32> zeroinitializer, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+
+  %a2_i64 = add <2 x i64> zeroinitializer, <i64 0, i64 1>
+  %a3_i64 = add <3 x i64> zeroinitializer, <i64 0, i64 1, i64 2>
+  %a4_i64 = add <4 x i64> zeroinitializer, <i64 0, i64 1, i64 2, i64 3>
+  %a8_i64 = add <8 x i64> zeroinitializer, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
+  %a16_i64 = add <16 x i64> zeroinitializer, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
+
+  %a2_float = fadd <2 x float> zeroinitializer, <float 0.0, float 1.0>
+  %a3_float = fadd <3 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0>
+  %a4_float = fadd <4 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0, float 3.0>
+  %a8_float = fadd <8 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>
+  %a16_float = fadd <16 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>
+
+  %a2_double = fadd <2 x double> zeroinitializer, <double 0.0, double 1.0>
+  %a3_double = fadd <3 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0>
+  %a4_double = fadd <4 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0, double 3.0>
+  %a8_double = fadd <8 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0>
+  %a16_double = fadd <16 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0>
+
+  %b2_i8  = sub <2 x i8> zeroinitializer, %a2_i8
+  %b3_i8  = sub <3 x i8> zeroinitializer, %a3_i8
+  %b4_i8  = sub <4 x i8> zeroinitializer, %a4_i8
+  %b8_i8  = sub <8 x i8> zeroinitializer, %a8_i8
+  %b16_i8 = sub <16 x i8> zeroinitializer, %a16_i8
+
+  %b2_i16  = sub <2 x i16> zeroinitializer, %a2_i16
+  %b3_i16  = sub <3 x i16> zeroinitializer, %a3_i16
+  %b4_i16  = sub <4 x i16> zeroinitializer, %a4_i16
+  %b8_i16  = sub <8 x i16> zeroinitializer, %a8_i16
+  %b16_i16 = sub <16 x i16> zeroinitializer, %a16_i16
+
+  %b2_i32  = sub <2 x i32> zeroinitializer, %a2_i32
+  %b3_i32  = sub <3 x i32> zeroinitializer, %a3_i32
+  %b4_i32  = sub <4 x i32> zeroinitializer, %a4_i32
+  %b8_i32  = sub <8 x i32> zeroinitializer, %a8_i32
+  %b16_i32 = sub <16 x i32> zeroinitializer, %a16_i32
+
+  %b2_i64  = sub <2 x i64> zeroinitializer, %a2_i64
+  %b3_i64  = sub <3 x i64> zeroinitializer, %a3_i64
+  %b4_i64  = sub <4 x i64> zeroinitializer, %a4_i64
+  %b8_i64  = sub <8 x i64> zeroinitializer, %a8_i64
+  %b16_i64 = sub <16 x i64> zeroinitializer, %a16_i64
+
+  %b2_float  = fsub <2 x float> zeroinitializer, %a2_float
+  %b3_float  = fsub <3 x float> zeroinitializer, %a3_float
+  %b4_float  = fsub <4 x float> zeroinitializer, %a4_float
+  %b8_float  = fsub <8 x float> zeroinitializer, %a8_float
+  %b16_float = fsub <16 x float> zeroinitializer, %a16_float
+
+  %b2_double  = fsub <2 x double> zeroinitializer, %a2_double
+  %b3_double  = fsub <3 x double> zeroinitializer, %a3_double
+  %b4_double  = fsub <4 x double> zeroinitializer, %a4_double
+  %b8_double  = fsub <8 x double> zeroinitializer, %a8_double
+  %b16_double = fsub <16 x double> zeroinitializer, %a16_double
+
+
+
+  %v0 = select <2 x i1> <i1 true, i1 false>, <2 x i8> %a2_i8, <2 x i8> %b2_i8
+  %v1 = select <3 x i1> <i1 true, i1 false, i1 true>, <3 x i8> %a3_i8, <3 x i8> %b3_i8
+  %v2 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i8> %a4_i8, <4 x i8> %b4_i8
+  %v3 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x i8> %a8_i8, <8 x i8> %b8_i8
+  %v4 = select <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x i8> %a16_i8, <16 x i8> %b16_i8
+
+  %v5 = select <2 x i1> <i1 true, i1 false>, <2 x i16> %a2_i16, <2 x i16> %b2_i16
+  %v6 = select <3 x i1> <i1 true, i1 false, i1 true>, <3 x i16> %a3_i16, <3 x i16> %b3_i16
+  %v7 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i16> %a4_i16, <4 x i16> %b4_i16
+  %v8 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x i16> %a8_i16, <8 x i16> %b8_i16
+  %v9 = select <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x i16> %a16_i16, <16 x i16> %b16_i16
+
+  %v10 = select <2 x i1> <i1 true, i1 false>, <2 x i32> %a2_i32, <2 x i32> %b2_i32
+  %v11 = select <3 x i1> <i1 true, i1 false, i1 true>, <3 x i32> %a3_i32, <3 x i32> %b3_i32
+  %v12 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %a4_i32, <4 x i32> %b4_i32
+  %v13 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x i32> %a8_i32, <8 x i32> %b8_i32
+  %v14 = select <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x i32> %a16_i32, <16 x i32> %b16_i32
+
+  %v15 = select <2 x i1> <i1 true, i1 false>, <2 x i64> %a2_i64, <2 x i64> %b2_i64
+  %v16 = select <3 x i1> <i1 true, i1 false, i1 true>, <3 x i64> %a3_i64, <3 x i64> %b3_i64
+  %v17 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i64> %a4_i64, <4 x i64> %b4_i64
+  %v18 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x i64> %a8_i64, <8 x i64> %b8_i64
+  %v19 = select <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x i64> %a16_i64, <16 x i64> %b16_i64
+
+  %v20 = select <2 x i1> <i1 true, i1 false>, <2 x float> %a2_float, <2 x float> %b2_float
+  %v21 = select <3 x i1> <i1 true, i1 false, i1 true>, <3 x float> %a3_float, <3 x float> %b3_float
+  %v22 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %a4_float, <4 x float> %b4_float
+  %v23 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x float> %a8_float, <8 x float> %b8_float
+  %v24 = select <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x float> %a16_float, <16 x float> %b16_float
+
+  %v25 = select <2 x i1> <i1 true, i1 false>, <2 x double> %a2_double, <2 x double> %b2_double
+  %v26 = select <3 x i1> <i1 true, i1 false, i1 true>, <3 x double> %a3_double, <3 x double> %b3_double
+  %v27 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %a4_double, <4 x double> %b4_double
+  %v28 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x double> %a8_double, <8 x double> %b8_double
+  %v29 = select <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x double> %a16_double, <16 x double> %b16_double
+
+
+  ret i32 0
+}
diff --git a/test/ExecutionEngine/test-interp-vec-shuffle.ll b/test/ExecutionEngine/test-interp-vec-shuffle.ll
new file mode 100644
index 0000000..e55fa99
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-shuffle.ll
@@ -0,0 +1,81 @@
+; RUN: %lli -force-interpreter=true %s > /dev/null
+
+define i32 @main() {
+
+  ; Vector values
+  %a2_i8 = add <2 x i8> zeroinitializer, <i8 0, i8 1>
+  %a3_i8 = add <3 x i8> zeroinitializer, <i8 0, i8 1, i8 2>
+  %a4_i8 = add <4 x i8> zeroinitializer, <i8 0, i8 1, i8 2, i8 3>
+  %a8_i8 = add <8 x i8> zeroinitializer, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
+  %a16_i8 = add <16 x i8> zeroinitializer, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>
+
+  %a2_i16 = add <2 x i16> zeroinitializer, <i16 0, i16 1>
+  %a3_i16 = add <3 x i16> zeroinitializer, <i16 0, i16 1, i16 2>
+  %a4_i16 = add <4 x i16> zeroinitializer, <i16 0, i16 1, i16 2, i16 3>
+  %a8_i16 = add <8 x i16> zeroinitializer, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+  %a16_i16 = add <16 x i16> zeroinitializer, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+
+  %a2_i32 = add <2 x i32> zeroinitializer, <i32 0, i32 1>
+  %a3_i32 = add <3 x i32> zeroinitializer, <i32 0, i32 1, i32 2>
+  %a4_i32 = add <4 x i32> zeroinitializer, <i32 0, i32 1, i32 2, i32 3>
+  %a8_i32 = add <8 x i32> zeroinitializer, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %a16_i32 = add <16 x i32> zeroinitializer, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+
+  %a2_i64 = add <2 x i64> zeroinitializer, <i64 0, i64 1>
+  %a3_i64 = add <3 x i64> zeroinitializer, <i64 0, i64 1, i64 2>
+  %a4_i64 = add <4 x i64> zeroinitializer, <i64 0, i64 1, i64 2, i64 3>
+  %a8_i64 = add <8 x i64> zeroinitializer, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
+  %a16_i64 = add <16 x i64> zeroinitializer, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
+
+  %a2_float = fadd <2 x float> zeroinitializer, <float 0.0, float 1.0>
+  %a3_float = fadd <3 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0>
+  %a4_float = fadd <4 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0, float 3.0>
+  %a8_float = fadd <8 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>
+  %a16_float = fadd <16 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>
+
+  %a2_double = fadd <2 x double> zeroinitializer, <double 0.0, double 1.0>
+  %a3_double = fadd <3 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0>
+  %a4_double = fadd <4 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0, double 3.0>
+  %a8_double = fadd <8 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0>
+  %a16_double = fadd <16 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0>
+
+
+  %v0 = shufflevector <2 x i8> %a2_i8, <2 x i8>undef, <2 x i32> <i32 1, i32 0>
+  %v1 = shufflevector <3 x i8> %a3_i8, <3 x i8>undef, <3 x i32> <i32 2, i32 1, i32 0>
+  %v2 = shufflevector <4 x i8> %a4_i8, <4 x i8>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %v3 = shufflevector <8 x i8> %a8_i8, <8 x i8>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %v4 = shufflevector <16 x i8> %a16_i8, <16 x i8>undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  %v5 = shufflevector <2 x i16> %a2_i16, <2 x i16>undef, <2 x i32> <i32 1, i32 0>
+  %v6 = shufflevector <3 x i16> %a3_i16, <3 x i16>undef, <3 x i32> <i32 2, i32 1, i32 0>
+  %v7 = shufflevector <4 x i16> %a4_i16, <4 x i16>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %v8 = shufflevector <8 x i16> %a8_i16, <8 x i16>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %v9 = shufflevector <16 x i16> %a16_i16, <16 x i16>undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  %v10 = shufflevector <2 x i32> %a2_i32, <2 x i32>undef, <2 x i32> <i32 1, i32 0>
+  %v11 = shufflevector <3 x i32> %a3_i32, <3 x i32>undef, <3 x i32> <i32 2, i32 1, i32 0>
+  %v12 = shufflevector <4 x i32> %a4_i32, <4 x i32>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %v13 = shufflevector <8 x i32> %a8_i32, <8 x i32>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %v14 = shufflevector <16 x i32> %a16_i32, <16 x i32>undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  %v15 = shufflevector <2 x i64> %a2_i64, <2 x i64>undef, <2 x i32> <i32 1, i32 0>
+  %v16 = shufflevector <3 x i64> %a3_i64, <3 x i64>undef, <3 x i32> <i32 2, i32 1, i32 0>
+  %v17 = shufflevector <4 x i64> %a4_i64, <4 x i64>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %v18 = shufflevector <8 x i64> %a8_i64, <8 x i64>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %v19 = shufflevector <16 x i64> %a16_i64, <16 x i64>undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  %v20 = shufflevector <2 x float> %a2_float, <2 x float>undef, <2 x i32> <i32 1, i32 0>
+  %v21 = shufflevector <3 x float> %a3_float, <3 x float>undef, <3 x i32> <i32 2, i32 1, i32 0>
+  %v22 = shufflevector <4 x float> %a4_float, <4 x float>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %v23 = shufflevector <8 x float> %a8_float, <8 x float>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %v24 = shufflevector <16 x float> %a16_float, <16 x float>undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  %v25 = shufflevector <2 x double> %a2_double, <2 x double>undef, <2 x i32> <i32 1, i32 0>
+  %v26 = shufflevector <3 x double> %a3_double, <3 x double>undef, <3 x i32> <i32 2, i32 1, i32 0>
+  %v27 = shufflevector <4 x double> %a4_double, <4 x double>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %v28 = shufflevector <8 x double> %a8_double, <8 x double>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %v29 = shufflevector <16 x double> %a16_double, <16 x double>undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  ret i32 0
+}
+
diff --git a/test/Feature/lit.local.cfg b/test/Feature/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Feature/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Feature/md_on_instruction.ll b/test/Feature/md_on_instruction.ll
index 8599601..955ace3 100644
--- a/test/Feature/md_on_instruction.ll
+++ b/test/Feature/md_on_instruction.ll
@@ -16,9 +16,12 @@ declare void @llvm.dbg.func.start(metadata) nounwind readnone
 
 declare void @llvm.dbg.region.end(metadata) nounwind readnone
 
+!llvm.module.flags = !{!6}
+
 !0 = metadata !{i32 458798, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 1, metadata !2, i1 false, i1 true}
 !1 = metadata !{i32 458769, metadata !4, i32 12, metadata !"clang 1.0", i1 true, metadata !"", i32 0, metadata !5, metadata !5, metadata !4, null, null, metadata !""}
 !2 = metadata !{i32 458788, null, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
 !3 = metadata !{i32 1, i32 13, metadata !1, metadata !1}
 !4 = metadata !{metadata !"foo.c", metadata !"/tmp"}
 !5 = metadata !{i32 0}
+!6 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Feature/newcasts.ll b/test/Feature/newcasts.ll
index 4cfc8bc..dd47484 100644
--- a/test/Feature/newcasts.ll
+++ b/test/Feature/newcasts.ll
@@ -20,6 +20,9 @@ define void @"NewCasts" (i16 %x) {
   %p = uitofp <4 x i32> %n to <4 x float>
   %q = fptosi <4 x float> %p to <4 x i32>
   %r = fptoui <4 x float> %p to <4 x i32>
+  %s = inttoptr <4 x i32> %n to <4 x i32*>
+  %t = addrspacecast <4 x i32*> %s to <4 x i32 addrspace(1)*>
+  %z = addrspacecast <4 x i32*> %s to <4 x float addrspace(2)*>
   ret void
 }
 
diff --git a/test/Feature/optnone.ll b/test/Feature/optnone.ll
new file mode 100644
index 0000000..7d8afd4
--- /dev/null
+++ b/test/Feature/optnone.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+; Check for the presence of attribute optnone in the disassembly.
+
+; CHECK: @foo() #0
+define void @foo() #0 {
+  ret void
+}
+
+; CHECK: attributes #0 = { noinline optnone }
+attributes #0 = { optnone noinline }
+
diff --git a/test/Feature/prefixdata.ll b/test/Feature/prefixdata.ll
new file mode 100644
index 0000000..b53945c
--- /dev/null
+++ b/test/Feature/prefixdata.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: FileCheck %s < %t1.ll
+; RUN: llvm-as < %t1.ll | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+; RUN: opt -O3 -S < %t1.ll | FileCheck %s
+
+; CHECK: @i
+@i = linkonce_odr global i32 1
+
+; CHECK: f(){{.*}}prefix i32 1
+define void @f() prefix i32 1 {
+  ret void
+}
+
+; CHECK: g(){{.*}}prefix i32* @i
+define void @g() prefix i32* @i {
+  ret void
+}
diff --git a/test/FileCheck/check-a-b-has-b.txt b/test/FileCheck/check-a-b-has-b.txt
new file mode 100644
index 0000000..4d64d09
--- /dev/null
+++ b/test/FileCheck/check-a-b-has-b.txt
@@ -0,0 +1,5 @@
+; RUN: FileCheck -check-prefix=A -check-prefix=B -input-file %s %s
+
+this is the string to be matched
+
+; B-DAG: this is the string to be {{matched}}
diff --git a/test/FileCheck/check-b-a-has-b.txt b/test/FileCheck/check-b-a-has-b.txt
new file mode 100644
index 0000000..ac14990
--- /dev/null
+++ b/test/FileCheck/check-b-a-has-b.txt
@@ -0,0 +1,5 @@
+; RUN: FileCheck -check-prefix=B -check-prefix=A -input-file %s %s
+
+this is the string to be matched
+
+; B-DAG: this is the string to be {{matched}}
diff --git a/test/FileCheck/check-dag-multi-prefix-2.txt b/test/FileCheck/check-dag-multi-prefix-2.txt
new file mode 100644
index 0000000..4add70d
--- /dev/null
+++ b/test/FileCheck/check-dag-multi-prefix-2.txt
@@ -0,0 +1,7 @@
+; RUN: FileCheck -check-prefix=A -input-file %s %s
+
+this should be matched
+
+; B-DAG: foo
+
+; A-DAG: {{this}} should be matched
diff --git a/test/FileCheck/check-dag-multi-prefix.txt b/test/FileCheck/check-dag-multi-prefix.txt
new file mode 100644
index 0000000..95dfe5a
--- /dev/null
+++ b/test/FileCheck/check-dag-multi-prefix.txt
@@ -0,0 +1,27 @@
+; RUN: FileCheck -check-prefix=A -check-prefix=B -input-file %s %s
+
+add r10, r1, r2
+add r11, r3, r4
+mul r5, r10, r11
+
+mul r11, r3, r4
+mul r10, r1, r2
+add r5, r10, r11
+
+add r11, r3, r4
+add r10, r1, r2
+mul r5, r10, r11
+
+; B-DAG: add [[REG1:r[0-9]+]], r1, r2
+; B-DAG: add [[REG2:r[0-9]+]], r3, r4
+; B: mul r5, [[REG1]], [[REG2]]
+
+; A-DAG: mul [[REG1:r[0-9]+]], r1, r2
+; A-DAG: mul [[REG2:r[0-9]+]], r3, r4
+; A: add r5, [[REG1]], [[REG2]]
+
+; B-DAG: add [[REG1:r[0-9]+]], r1, r2
+; B-DAG: add [[REG2:r[0-9]+]], r3, r4
+; B-NOT: xor
+; B-DAG: mul r5, [[REG1]], [[REG2]]
+
diff --git a/test/FileCheck/check-dag-substring-prefix.txt b/test/FileCheck/check-dag-substring-prefix.txt
new file mode 100644
index 0000000..49d4b2b
--- /dev/null
+++ b/test/FileCheck/check-dag-substring-prefix.txt
@@ -0,0 +1,7 @@
+; RUN: not FileCheck -check-prefix=A -check-prefix=AA -input-file %s %s
+
+this is the string to be matched
+this should also be matched
+
+; BAA-DAG: this is the string to be {{matched}}
+; BAA-DAG: this should also be {{matched}}
diff --git a/test/FileCheck/check-label-dag-capture.txt b/test/FileCheck/check-label-dag-capture.txt
new file mode 100644
index 0000000..d8f90f4
--- /dev/null
+++ b/test/FileCheck/check-label-dag-capture.txt
@@ -0,0 +1,11 @@
+; RUN: FileCheck -input-file %s %s
+
+bar
+foo
+foo
+zed
+
+CHECK-LABEL: {{^}}bar
+CHECK: {{^}}[[FOO:foo]]
+CHECK-DAG: {{^}}[[FOO]]
+CHECK-LABEL: {{^}}zed
diff --git a/test/FileCheck/check-label-dag.txt b/test/FileCheck/check-label-dag.txt
new file mode 100644
index 0000000..2f54c3e
--- /dev/null
+++ b/test/FileCheck/check-label-dag.txt
@@ -0,0 +1,11 @@
+; RUN: not FileCheck -input-file %s %s 2>&1 | FileCheck --check-prefix=ERROR %s
+
+bar
+zed
+
+CHECK-LABEL: {{^}}bar
+CHECK-DAG: {{^}}foo
+CHECK-LABEL: {{^}}zed
+
+ERROR: error: expected string not found in input
+ERROR-NEXT: CHECK-DAG: {{.....}}foo
diff --git a/test/FileCheck/check-multi-prefix-label.txt b/test/FileCheck/check-multi-prefix-label.txt
new file mode 100644
index 0000000..41fe641
--- /dev/null
+++ b/test/FileCheck/check-multi-prefix-label.txt
@@ -0,0 +1,6 @@
+// RUN: FileCheck -check-prefix=ONE -check-prefix=TWO -input-file %s %s
+
+foo
+bar
+; ONE-LABEL: {{f}}oo
+; TWO-NEXT: {{b}}ar
diff --git a/test/FileCheck/check-multiple-prefixes-mixed.txt b/test/FileCheck/check-multiple-prefixes-mixed.txt
new file mode 100644
index 0000000..cd3b70a
--- /dev/null
+++ b/test/FileCheck/check-multiple-prefixes-mixed.txt
@@ -0,0 +1,10 @@
+// RUN: FileCheck -check-prefix=B -check-prefix=BOTH -input-file %s %s
+// RUN: FileCheck -check-prefix=A -check-prefix=BOTH -input-file %s %s
+
+; A: {{a}}aaaaa
+; B: {{b}}bbbb
+; BOTH: {{q}}qqqqq
+aaaaaa
+bbbbb
+qqqqqq
+ccccc
diff --git a/test/FileCheck/check-multiple-prefixes-nomatch.txt b/test/FileCheck/check-multiple-prefixes-nomatch.txt
new file mode 100644
index 0000000..9d38359
--- /dev/null
+++ b/test/FileCheck/check-multiple-prefixes-nomatch.txt
@@ -0,0 +1,10 @@
+; RUN: not FileCheck -input-file %s %s -check-prefix=FOO -check-prefix=BAR 2>&1 | FileCheck %s
+
+BAR
+bar
+foo
+; BAR: ba{{z}}
+; FOO: fo{{o}}
+
+; CHECK: {{error: expected string not found in input}}
+; CHECK-NEXT: {{B}}AR: ba{{[{][{]z[}][}]}}
diff --git a/test/FileCheck/check-multiple-prefixes-substr.txt b/test/FileCheck/check-multiple-prefixes-substr.txt
new file mode 100644
index 0000000..76a2ca8
--- /dev/null
+++ b/test/FileCheck/check-multiple-prefixes-substr.txt
@@ -0,0 +1,5 @@
+// RUN: FileCheck -check-prefix=CHECKER -check-prefix=CHECK -input-file %s %s
+// RUN: FileCheck -check-prefix=CHECK -check-prefix=CHECKER -input-file %s %s
+
+foo
+; CHECKER: fo{{o}}
diff --git a/test/FileCheck/check-prefixes.txt b/test/FileCheck/check-prefixes.txt
new file mode 100644
index 0000000..fc7a043
--- /dev/null
+++ b/test/FileCheck/check-prefixes.txt
@@ -0,0 +1,9 @@
+// RUN: FileCheck -check-prefix=ANOTHER-PREFIX -input-file %s %s
+// RUN: not FileCheck -check-prefix=PREFIX -input-file %s %s 2>&1 | FileCheck -check-prefix=CHECK-NONEXISTENT-PREFIX %s
+
+foobar
+; ANOTHER-PREFIX: foobar
+
+; We use regex to match the colon so that FileCheck won't think it is a check
+; prefix.
+; CHECK-NONEXISTENT-PREFIX: error: no check strings found with prefix 'PREFIX{{:}}'
diff --git a/test/FileCheck/check-substring-multi-prefix-2.txt b/test/FileCheck/check-substring-multi-prefix-2.txt
new file mode 100644
index 0000000..618a288
--- /dev/null
+++ b/test/FileCheck/check-substring-multi-prefix-2.txt
@@ -0,0 +1,11 @@
+; RUN: FileCheck -check-prefix=FOO -check-prefix=FOOBAR -check-prefix=BARFOO -input-file %s %s
+; RUN: FileCheck -check-prefix=FOOBAR -check-prefix=FOO -check-prefix=BARFOO -input-file %s %s
+; RUN: FileCheck -check-prefix=FOOBAR -check-prefix=BARFOO -check-prefix=FOO -input-file %s %s
+
+this is the match
+this is another
+
+FOO
+FOOBAR
+FOOBAR: this is the {{match}}
+BARFOO: this is {{another}}
diff --git a/test/FileCheck/check-substring-multi-prefix.txt b/test/FileCheck/check-substring-multi-prefix.txt
new file mode 100644
index 0000000..b7edb8b
--- /dev/null
+++ b/test/FileCheck/check-substring-multi-prefix.txt
@@ -0,0 +1,9 @@
+// RUN: FileCheck -check-prefix=AAAOVERLAP -check-prefix=OVERLAP -input-file %s %s
+
+foo
+bar
+buzz
+
+OVERLAP: foo
+AAAOVERLAP: bar
+OVERLAP: buzz
diff --git a/test/FileCheck/first-character-match.txt b/test/FileCheck/first-character-match.txt
new file mode 100644
index 0000000..4b09c21
--- /dev/null
+++ b/test/FileCheck/first-character-match.txt
@@ -0,0 +1,2 @@
+RUN: FileCheck -check-prefix=RUN -input-file %s %s
+// Prefix is at the first character in the file. The run line then matches itself.
diff --git a/test/FileCheck/line-count-2.txt b/test/FileCheck/line-count-2.txt
new file mode 100644
index 0000000..a56ab6d
--- /dev/null
+++ b/test/FileCheck/line-count-2.txt
@@ -0,0 +1,11 @@
+// RUN: FileCheck -input-file %s %s
+
+something else
+CHECK: {{some}}thing else
+
+foobar
+
+CHECK: {{foo}}bar
+ALMOSTCHECK
+10 wowomg
+CHECK: [[@LINE-1]] {{wow}}omg
diff --git a/test/FileCheck/line-count.txt b/test/FileCheck/line-count.txt
new file mode 100644
index 0000000..6f91c20
--- /dev/null
+++ b/test/FileCheck/line-count.txt
@@ -0,0 +1,15 @@
+; RUN: FileCheck  -input-file %s %s
+2
+3 aaa
+4 bbb
+5 ccc
+6 CHECK: [[@LINE-3]] {{a}}aa
+7 CHECK: [[@LINE-3]] {{b}}bb
+8 CHECK: [[@LINE-3]] {{c}}cc
+9 foobar
+10 CHECK: [[@LINE-1]] {{foo}}bar
+11
+12 arst CHECK: [[@LINE]] {{a}}rst
+13
+14
+
diff --git a/test/FileCheck/lit.local.cfg b/test/FileCheck/lit.local.cfg
deleted file mode 100644
index ee25f56..0000000
--- a/test/FileCheck/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.txt']
diff --git a/test/FileCheck/multiple-missing-prefixes.txt b/test/FileCheck/multiple-missing-prefixes.txt
new file mode 100644
index 0000000..cb557d9
--- /dev/null
+++ b/test/FileCheck/multiple-missing-prefixes.txt
@@ -0,0 +1,9 @@
+// RUN: FileCheck -check-prefix=ANOTHER-PREFIX -input-file %s %s
+// RUN: not FileCheck -check-prefix=PREFIX1 -check-prefix=PREFIX2 -input-file %s %s 2>&1 | FileCheck -strict-whitespace -check-prefix=CHECK-NONEXISTENT-PREFIX -check-prefix=ALSO-NONEXISTENT %s
+
+foobar
+; ANOTHER-PREFIX: foobar
+
+; We use regex to match the colon so that FileCheck won't think it is a check
+; prefix.
+; CHECK-NONEXISTENT-PREFIX: error: no check strings found with prefixes 'PREFIX1{{:}}', 'PREFIX2{{:}}'
diff --git a/test/FileCheck/separate-multi-prefix.txt b/test/FileCheck/separate-multi-prefix.txt
new file mode 100644
index 0000000..5578d7f
--- /dev/null
+++ b/test/FileCheck/separate-multi-prefix.txt
@@ -0,0 +1,7 @@
+// RUN: not FileCheck -check-prefix=SOMEPREFIX -input-file %s %s
+// RUN: FileCheck -check-prefix=ANOTHER -input-file %s %s
+
+asdf
+; SOMEPREFIX: {{t}}his_is_not_asdf
+; ANOTHER: {{a}}sdf
+
diff --git a/test/FileCheck/validate-check-prefix.txt b/test/FileCheck/validate-check-prefix.txt
new file mode 100644
index 0000000..db3392d
--- /dev/null
+++ b/test/FileCheck/validate-check-prefix.txt
@@ -0,0 +1,9 @@
+// RUN: not FileCheck -check-prefix=A! -input-file %s %s 2>&1 | FileCheck -check-prefix=BAD_PREFIX %s
+// RUN: FileCheck -check-prefix=A1a-B_c -input-file %s %s
+// RUN: not FileCheck -check-prefix=REPEAT -check-prefix=REPEAT -input-file %s %s 2>&1 | FileCheck -check-prefix=BAD_PREFIX %s
+// RUN: not FileCheck -check-prefix=VALID -check-prefix=A! -input-file %s %s 2>&1 | FileCheck -check-prefix=BAD_PREFIX %s
+foobar
+; A1a-B_c: foobar
+
+; BAD_PREFIX: Supplied check-prefix is invalid! Prefixes must be
+  unique and start with a letter and contain only alphanumeric characters, hyphens and underscores
diff --git a/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll b/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
index 38168fc..2c4d82e 100644
--- a/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
+++ b/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
@@ -64,8 +64,10 @@ entry:
   ret void
 }
 
-!0 = metadata !{metadata !"any pointer", metadata !1}
+!0 = metadata !{metadata !5, metadata !5, i64 0}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"int", metadata !1}
+!3 = metadata !{metadata !6, metadata !6, i64 0}
 !4 = metadata !{i32 156132, i32 156164, i32 156205, i32 156238, i32 156282, i32 156332, i32 156370, i32 156408, i32 156447, i32 156486, i32 156536, i32 156574, i32 156612, i32 156651, i32 156690, i32 156740, i32 156778, i32 156816, i32 156855, i32 156894, i32 156944, i32 156982, i32 157020, i32 157059, i32 157098, i32 157148, i32 157186, i32 157224, i32 157263, i32 157302, i32 157352, i32 157390, i32 157428, i32 157467, i32 157506, i32 157556, i32 157594, i32 157632, i32 157671, i32 157710, i32 157760, i32 157798, i32 157836, i32 157875, i32 157914, i32 157952, i32 157996, i32 158046, i32 158099, i32 158140, i32 158179, i32 158218, i32 158268, i32 158321, i32 158362, i32 158401, i32 158440, i32 158490, i32 158543, i32 158584, i32 158623, i32 158662, i32 158712, i32 158765, i32 158806, i32 158845, i32 158884, i32 158922, i32 158963, i32 158996, i32 159029, i32 159062, i32 159109, i32 159154, i32 159199, i32 159243, i32 159286, i32 159329, i32 159375, i32 159422, i32 159478, i32 159522, i32 159566}
+!5 = metadata !{metadata !"any pointer", metadata !1}
+!6 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Instrumentation/AddressSanitizer/X86/lit.local.cfg b/test/Instrumentation/AddressSanitizer/X86/lit.local.cfg
index b05ed3c..ba763cf 100644
--- a/test/Instrumentation/AddressSanitizer/X86/lit.local.cfg
+++ b/test/Instrumentation/AddressSanitizer/X86/lit.local.cfg
@@ -1,13 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-targets = set(root.targets_to_build.split())
+targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
 
diff --git a/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll b/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
index da8f541..1087c9a 100644
--- a/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
+++ b/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
@@ -9,7 +9,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 
 %struct_of_7_bytes_4_aligned = type { i32, i8, i8, i8}
 
-@f = global %struct_of_7_bytes_4_aligned zeroinitializer, align 4
+@f = external global %struct_of_7_bytes_4_aligned , align 4
 
 ; Accessing bytes 4 and 6, not ok to widen to i32 if sanitize_address is set.
 
diff --git a/test/Instrumentation/AddressSanitizer/coverage.ll b/test/Instrumentation/AddressSanitizer/coverage.ll
new file mode 100644
index 0000000..47a54c0
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/coverage.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -asan -asan-coverage=1 -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+define i32 @foo(i32* %a) sanitize_address {
+entry:
+  ret i32 0
+}
+; CHECK: define i32 @foo(i32* %a) #0 {
+; CHECK: %0 = load atomic i8* @__asan_gen_cov_foo monotonic, align 1
+; CHECK: %1 = icmp eq i8 0, %0
+; CHECK: br i1 %1, label %2, label %3
+; CHECK: call void @__sanitizer_cov(i64 ptrtoint (i32 (i32*)* @foo to i64))
+; CHECK: store atomic i8 1, i8* @__asan_gen_cov_foo monotonic, align 1
diff --git a/test/Instrumentation/AddressSanitizer/debug_info.ll b/test/Instrumentation/AddressSanitizer/debug_info.ll
index ec89d26..daf2957 100644
--- a/test/Instrumentation/AddressSanitizer/debug_info.ll
+++ b/test/Instrumentation/AddressSanitizer/debug_info.ll
@@ -31,13 +31,14 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!17}
 
 !0 = metadata !{i32 786449, metadata !16, i32 4, metadata !"clang version 3.3 (trunk 169314)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, null, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/llvm_cmake_clang/tmp/debuginfo/a.cc] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !16, metadata !6, metadata !"zzz", metadata !"zzz", metadata !"_Z3zzzi", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z3zzzi, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [zzz]
 !6 = metadata !{i32 786473, metadata !16} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 786689, metadata !5, metadata !"p", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p] [line 1]
@@ -57,3 +58,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !14 = metadata !{i32 2, i32 0, metadata !13, null}
 !15 = metadata !{i32 3, i32 0, metadata !13, null}
 !16 = metadata !{metadata !"a.cc", metadata !"/usr/local/google/llvm_cmake_clang/tmp/debuginfo"}
+!17 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Instrumentation/AddressSanitizer/instrument_global.ll b/test/Instrumentation/AddressSanitizer/instrument_global.ll
index 2c183f5..4717277 100644
--- a/test/Instrumentation/AddressSanitizer/instrument_global.ll
+++ b/test/Instrumentation/AddressSanitizer/instrument_global.ll
@@ -9,12 +9,73 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK: llvm.global_ctors
 ; CHECK: llvm.global_dtors
 
-; CHECK: define internal void @asan.module_ctor
+; Test that we don't instrument global arrays with static initializer
+; indexed with constants in-bounds. But instrument all other cases.
+
+@GlobSt = global [10 x i32] zeroinitializer, align 16  ; static initializer
+@GlobDy = global [10 x i32] zeroinitializer, align 16  ; dynamic initializer
+@GlobEx = external global [10 x i32] , align 16        ; extern initializer
+
+; GlobSt is declared here, and has static initializer -- ok to optimize.
+define i32 @AccessGlobSt_0_2() sanitize_address {
+entry:
+    %0 = load i32* getelementptr inbounds ([10 x i32]* @GlobSt, i64 0, i64 2), align 8
+    ret i32 %0
+; CHECK-LABEL: define i32 @AccessGlobSt_0_2
+; CHECK-NOT: __asan_report
+; CHECK: ret i32 %0
+}
+
+; GlobSt is accessed out of bounds -- can't optimize
+define i32 @AccessGlobSt_0_12() sanitize_address {
+entry:
+    %0 = load i32* getelementptr inbounds ([10 x i32]* @GlobSt, i64 0, i64 12), align 8
+    ret i32 %0
+; CHECK-LABEL: define i32 @AccessGlobSt_0_12
+; CHECK: __asan_report
+; CHECK: ret i32
+}
+
+; GlobSt is accessed with Gep that has non-0 first index -- can't optimize.
+define i32 @AccessGlobSt_1_2() sanitize_address {
+entry:
+    %0 = load i32* getelementptr inbounds ([10 x i32]* @GlobSt, i64 1, i64 2), align 8
+    ret i32 %0
+; CHECK-LABEL: define i32 @AccessGlobSt_1_2
+; CHECK: __asan_report
+; CHECK: ret i32
+}
+
+; GlobDy is declared with dynamic initializer -- can't optimize.
+define i32 @AccessGlobDy_0_2() sanitize_address {
+entry:
+    %0 = load i32* getelementptr inbounds ([10 x i32]* @GlobDy, i64 0, i64 2), align 8
+    ret i32 %0
+; CHECK-LABEL: define i32 @AccessGlobDy_0_2
+; CHECK: __asan_report
+; CHECK: ret i32
+}
+
+; GlobEx is an external global -- can't optimize.
+define i32 @AccessGlobEx_0_2() sanitize_address {
+entry:
+    %0 = load i32* getelementptr inbounds ([10 x i32]* @GlobEx, i64 0, i64 2), align 8
+    ret i32 %0
+; CHECK-LABEL: define i32 @AccessGlobEx_0_2
+; CHECK: __asan_report
+; CHECK: ret i32
+}
+
+
+!llvm.asan.dynamically_initialized_globals = !{!0}
+!0 = metadata !{[10 x i32]* @GlobDy}
+
+; CHECK-LABEL: define internal void @asan.module_ctor
 ; CHECK-NOT: ret
 ; CHECK: call void @__asan_register_globals
 ; CHECK: ret
 
-; CHECK: define internal void @asan.module_dtor
+; CHECK-LABEL: define internal void @asan.module_dtor
 ; CHECK-NOT: ret
 ; CHECK: call void @__asan_unregister_globals
 ; CHECK: ret
diff --git a/test/Instrumentation/AddressSanitizer/lifetime-uar.ll b/test/Instrumentation/AddressSanitizer/lifetime-uar.ll
new file mode 100644
index 0000000..21eaf7f
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/lifetime-uar.ll
@@ -0,0 +1,33 @@
+; Test handling of llvm.lifetime intrinsics in UAR mode.
+; RUN: opt < %s -asan -asan-use-after-return -asan-check-lifetime -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
+
+define i32 @basic_test() sanitize_address {
+  ; CHECK-LABEL: define i32 @basic_test()
+
+entry:
+  %retval = alloca i32, align 4
+  %c = alloca i8, align 1
+
+  call void @llvm.lifetime.start(i64 1, i8* %c)
+  ; Memory is unpoisoned at llvm.lifetime.start
+  ; CHECK: call void @__asan_unpoison_stack_memory(i64 %{{[^ ]+}}, i64 1)
+
+  store i32 0, i32* %retval
+  store i8 0, i8* %c, align 1
+
+  call void @llvm.lifetime.end(i64 1, i8* %c)
+  ; Memory is poisoned at llvm.lifetime.end
+  ; CHECK: call void @__asan_poison_stack_memory(i64 %{{[^ ]+}}, i64 1)
+
+  ; No need to unpoison memory at function exit in UAR mode.
+  ; CHECK-NOT: @__asan_unpoison_stack_memory
+  ; CHECK: ret void
+
+  ret i32 0
+}
+
diff --git a/test/Instrumentation/AddressSanitizer/lifetime.ll b/test/Instrumentation/AddressSanitizer/lifetime.ll
index 3348728..d80331e 100644
--- a/test/Instrumentation/AddressSanitizer/lifetime.ll
+++ b/test/Instrumentation/AddressSanitizer/lifetime.ll
@@ -15,7 +15,7 @@ entry:
   call void @llvm.lifetime.end(i64 -1, i8* %i.ptr)
 
 ; Check that lifetime with no size are ignored.
-; CHECK: @lifetime_no_size
+; CHECK-LABEL: define void @lifetime_no_size()
 ; CHECK-NOT: @__asan_poison_stack_memory
 ; CHECK-NOT: @__asan_unpoison_stack_memory
 ; CHECK: ret void
@@ -24,7 +24,7 @@ entry:
 
 ; Generic case of lifetime analysis.
 define void @lifetime() sanitize_address {
-  ; CHECK: @lifetime
+  ; CHECK-LABEL: define void @lifetime()
 
   ; Regular variable lifetime intrinsics.
   %i = alloca i32, align 4
@@ -62,7 +62,7 @@ define void @lifetime() sanitize_address {
 
 ; Check that arguments of lifetime may come from phi nodes.
 define void @phi_args(i1 %x) sanitize_address {
-  ; CHECK: @phi_args
+  ; CHECK-LABEL: define void @phi_args(i1 %x)
 
 entry:
   %i = alloca i64, align 4
diff --git a/test/Instrumentation/AddressSanitizer/lit.local.cfg b/test/Instrumentation/AddressSanitizer/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Instrumentation/AddressSanitizer/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Instrumentation/BoundsChecking/lit.local.cfg b/test/Instrumentation/BoundsChecking/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Instrumentation/BoundsChecking/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Instrumentation/BoundsChecking/simple-32.ll b/test/Instrumentation/BoundsChecking/simple-32.ll
new file mode 100644
index 0000000..38b210f
--- /dev/null
+++ b/test/Instrumentation/BoundsChecking/simple-32.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -bounds-checking -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+
+%struct.s2_packed = type <{ i64, i32, i32, i32, i16, i8 }>
+
+; CHECK-LABEL: @f
+; CHECK-NOT: trap
+define i16 @f() {
+entry:
+  %packed1 = alloca %struct.s2_packed, align 8
+  %gep = getelementptr inbounds %struct.s2_packed* %packed1, i32 0, i32 4
+  %ptr = bitcast i16* %gep to i32*
+  %val = load i32* %ptr, align 4
+  %valt = trunc i32 %val to i16
+  ret i16 %valt
+}
+
+; CHECK-LABEL: @f
+; CHECK: call void @llvm.trap()
+define i16 @f2() {
+entry:
+  %packed1 = alloca %struct.s2_packed, align 8
+  %gep = getelementptr inbounds %struct.s2_packed* %packed1, i32 0, i32 4
+  %ptr = bitcast i16* %gep to i48*
+  %val = load i48* %ptr, align 4
+  %valt = trunc i48 %val to i16
+  ret i16 %valt
+}
diff --git a/test/Instrumentation/BoundsChecking/simple.ll b/test/Instrumentation/BoundsChecking/simple.ll
index 16870c7..72b58f4 100644
--- a/test/Instrumentation/BoundsChecking/simple.ll
+++ b/test/Instrumentation/BoundsChecking/simple.ll
@@ -126,3 +126,20 @@ define i64 @f12(i64 %x, i64 %y) nounwind {
   %4 = load i64* %3, align 8
   ret i64 %4
 }
+
+; PR17402
+; CHECK-LABEL: @f13
+define void @f13() nounwind {
+entry:
+  br label %alive
+
+dead:
+  ; Self-refential GEPs can occur in dead code.
+  %incdec.ptr = getelementptr inbounds i32* %incdec.ptr, i64 1
+  ; CHECK: %incdec.ptr = getelementptr inbounds i32* %incdec.ptr
+  %l = load i32* %incdec.ptr
+  br label %alive
+
+alive:
+  ret void
+}
diff --git a/test/Instrumentation/DataFlowSanitizer/Inputs/abilist.txt b/test/Instrumentation/DataFlowSanitizer/Inputs/abilist.txt
new file mode 100644
index 0000000..97ce5e6
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/Inputs/abilist.txt
@@ -0,0 +1,8 @@
+fun:discard*=uninstrumented
+fun:discard*=discard
+
+fun:functional=uninstrumented
+fun:functional=functional
+
+fun:custom*=uninstrumented
+fun:custom*=custom
diff --git a/test/Instrumentation/DataFlowSanitizer/abilist.ll b/test/Instrumentation/DataFlowSanitizer/abilist.ll
new file mode 100644
index 0000000..66ddc14
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/abilist.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -dfsan -dfsan-args-abi -dfsan-abilist=%S/Inputs/abilist.txt -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; CHECK: i32 @discard(i32 %a, i32 %b)
+define i32 @discard(i32 %a, i32 %b) {
+  ret i32 0
+}
+
+; CHECK: i32 @functional(i32 %a, i32 %b)
+define i32 @functional(i32 %a, i32 %b) {
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+declare void @custom1(i32 %a, i32 %b)
+
+declare i32 @custom2(i32 %a, i32 %b)
+
+declare void @customcb(i32 (i32)* %cb)
+
+declare i32 @cb(i32)
+
+; CHECK: @"dfs$f"
+define void @f() {
+  ; CHECK: %[[LABELRETURN:.*]] = alloca i16
+
+  ; CHECK: call void @__dfsw_custom1(i32 1, i32 2, i16 0, i16 0)
+  call void @custom1(i32 1, i32 2)
+
+  ; CHECK: call i32 @__dfsw_custom2(i32 1, i32 2, i16 0, i16 0, i16* %[[LABELRETURN]])
+  call i32 @custom2(i32 1, i32 2)
+
+  ; CHECK: call void @__dfsw_customcb({{.*}} @"dfst0$customcb", i8* bitcast ({{.*}} @"dfs$cb" to i8*), i16 0)
+  call void @customcb(i32 (i32)* @cb)
+
+  ret void
+}
+
+; CHECK: define i32 (i32, i32)* @discardg(i32)
+; CHECK: %[[CALL:.*]] = call { i32 (i32, i32)*, i16 } @"dfs$g"(i32 %0, i16 0)
+; CHECK: %[[XVAL:.*]] = extractvalue { i32 (i32, i32)*, i16 } %[[CALL]], 0
+; CHECK: ret {{.*}} %[[XVAL]]
+@discardg = alias i32 (i32, i32)* (i32)* @g
+
+; CHECK: define linkonce_odr { i32, i16 } @"dfsw$custom2"(i32, i32, i16, i16)
+; CHECK: %[[LABELRETURN2:.*]] = alloca i16
+; CHECK: %[[RV:.*]] = call i32 @__dfsw_custom2
+; CHECK: %[[RVSHADOW:.*]] = load i16* %[[LABELRETURN2]]
+; CHECK: insertvalue {{.*}}[[RV]], 0
+; CHECK: insertvalue {{.*}}[[RVSHADOW]], 1
+; CHECK: ret { i32, i16 }
+
+; CHECK: @"dfs$g"
+define i32 (i32, i32)* @g(i32) {
+  ; CHECK: ret {{.*}} @"dfsw$custom2"
+  ret i32 (i32, i32)* @custom2
+}
+
+; CHECK: define { i32, i16 } @"dfs$adiscard"(i32, i32, i16, i16)
+; CHECK: %[[CALL:.*]] = call i32 @discard(i32 %0, i32 %1)
+; CHECK: %[[IVAL0:.*]] = insertvalue { i32, i16 } undef, i32 %[[CALL]], 0
+; CHECK: %[[IVAL1:.*]] = insertvalue { i32, i16 } %[[IVAL0]], i16 0, 1
+; CHECK: ret { i32, i16 } %[[IVAL1]]
+@adiscard = alias i32 (i32, i32)* @discard
+
+; CHECK: declare void @__dfsw_custom1(i32, i32, i16, i16)
+; CHECK: declare i32 @__dfsw_custom2(i32, i32, i16, i16, i16*)
+
+; CHECK-LABEL: define linkonce_odr i32 @"dfst0$customcb"(i32 (i32)*, i32, i16, i16*)
+; CHECK: %[[BC:.*]] = bitcast i32 (i32)* %0 to { i32, i16 } (i32, i16)*
+; CHECK: %[[CALL:.*]] = call { i32, i16 } %[[BC]](i32 %1, i16 %2)
+; CHECK: %[[XVAL0:.*]] = extractvalue { i32, i16 } %[[CALL]], 0
+; CHECK: %[[XVAL1:.*]] = extractvalue { i32, i16 } %[[CALL]], 1
+; CHECK: store i16 %[[XVAL1]], i16* %3
+; CHECK: ret i32 %[[XVAL0]]
diff --git a/test/Instrumentation/DataFlowSanitizer/args-unreachable-bb.ll b/test/Instrumentation/DataFlowSanitizer/args-unreachable-bb.ll
new file mode 100644
index 0000000..a699f75
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/args-unreachable-bb.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -dfsan -verify -dfsan-args-abi -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @"dfs$unreachable_bb1"
+define i8 @unreachable_bb1() {
+  ; CHECK: ret { i8, i16 } { i8 1, i16 0 }
+  ; CHECK-NOT: bb2:
+  ; CHECK-NOT: bb3:
+  ; CHECK-NOT: bb4:
+  ret i8 1
+
+bb2:
+  ret i8 2
+
+bb3:
+  br label %bb4
+
+bb4:
+  br label %bb3
+}
+
+declare void @abort() noreturn
+
+; CHECK-LABEL: @"dfs$unreachable_bb2"
+define i8 @unreachable_bb2() {
+  call void @abort() noreturn
+  ; CHECK-NOT: i8 12
+  ; CHECK: unreachable
+  ret i8 12
+}
diff --git a/test/Instrumentation/DataFlowSanitizer/arith.ll b/test/Instrumentation/DataFlowSanitizer/arith.ll
new file mode 100644
index 0000000..dc61896
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/arith.ll
@@ -0,0 +1,63 @@
+; RUN: opt < %s -dfsan -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+define i8 @add(i8 %a, i8 %b) {
+  ; CHECK: @"dfs$add"
+  ; CHECK-DAG: %[[ALABEL:.*]] = load{{.*}}__dfsan_arg_tls, i64 0, i64 0
+  ; CHECK-DAG: %[[BLABEL:.*]] = load{{.*}}__dfsan_arg_tls, i64 0, i64 1
+  ; CHECK: %[[UNION:.*]] = call{{.*}}__dfsan_union(i16 zeroext %[[ALABEL]], i16 zeroext %[[BLABEL]])
+  ; CHECK: %[[ADDLABEL:.*]] = phi i16 [ %[[UNION]], {{.*}} ], [ %[[ALABEL]], {{.*}} ]
+  ; CHECK: add i8
+  ; CHECK: store i16 %[[ADDLABEL]], i16* @__dfsan_retval_tls
+  ; CHECK: ret i8
+  %c = add i8 %a, %b
+  ret i8 %c
+}
+
+define i8 @sub(i8 %a, i8 %b) {
+  ; CHECK: @"dfs$sub"
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: call{{.*}}__dfsan_union
+  ; CHECK: sub i8
+  ; CHECK: store{{.*}}__dfsan_retval_tls
+  ; CHECK: ret i8
+  %c = sub i8 %a, %b
+  ret i8 %c
+}
+
+define i8 @mul(i8 %a, i8 %b) {
+  ; CHECK: @"dfs$mul"
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: call{{.*}}__dfsan_union
+  ; CHECK: mul i8
+  ; CHECK: store{{.*}}__dfsan_retval_tls
+  ; CHECK: ret i8
+  %c = mul i8 %a, %b
+  ret i8 %c
+}
+
+define i8 @sdiv(i8 %a, i8 %b) {
+  ; CHECK: @"dfs$sdiv"
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: call{{.*}}__dfsan_union
+  ; CHECK: sdiv i8
+  ; CHECK: store{{.*}}__dfsan_retval_tls
+  ; CHECK: ret i8
+  %c = sdiv i8 %a, %b
+  ret i8 %c
+}
+
+define i8 @udiv(i8 %a, i8 %b) {
+  ; CHECK: @"dfs$udiv"
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: call{{.*}}__dfsan_union
+  ; CHECK: udiv i8
+  ; CHECK: store{{.*}}__dfsan_retval_tls
+  ; CHECK: ret i8
+  %c = udiv i8 %a, %b
+  ret i8 %c
+}
diff --git a/test/Instrumentation/DataFlowSanitizer/call.ll b/test/Instrumentation/DataFlowSanitizer/call.ll
new file mode 100644
index 0000000..813f4c1
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/call.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -dfsan -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; CHECK: @__dfsan_arg_tls = external thread_local(initialexec) global [64 x i16]
+; CHECK: @__dfsan_retval_tls = external thread_local(initialexec) global i16
+
+declare i32 @f(i32)
+declare float @llvm.sqrt.f32(float)
+
+; CHECK: @"dfs$call"
+define i32 @call() {
+  ; CHECK: store{{.*}}__dfsan_arg_tls
+  ; CHECK: call{{.*}}@"dfs$f"
+  ; CHECK: load{{.*}}__dfsan_retval_tls
+  %r = call i32 @f(i32 0)
+
+  ; CHECK-NOT: store{{.*}}__dfsan_arg_tls
+  %i = call float @llvm.sqrt.f32(float -1.0)
+
+  ; CHECK: store{{.*}}__dfsan_retval_tls
+  ; CHECK: ret i32
+  ret i32 %r
+}
diff --git a/test/Instrumentation/DataFlowSanitizer/debug-nonzero-labels.ll b/test/Instrumentation/DataFlowSanitizer/debug-nonzero-labels.ll
new file mode 100644
index 0000000..6bcd5c5
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/debug-nonzero-labels.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -dfsan -dfsan-args-abi -dfsan-debug-nonzero-labels -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare i32 @g()
+
+; CHECK: define { i32, i16 } @"dfs$f"(i32, i16)
+define i32 @f(i32) {
+  ; CHECK: [[LOCALLABELALLOCA:%.*]] = alloca i16
+  ; CHECK: [[ARGCMP:%.*]] = icmp ne i16 %1, 0
+  ; CHECK: br i1 [[ARGCMP]]
+  %i = alloca i32
+  store i32 %0, i32* %i
+  ; CHECK: [[CALL:%.*]] = call { i32, i16 } @"dfs$g"()
+  ; CHECK: [[CALLLABEL:%.*]] = extractvalue { i32, i16 } [[CALL]], 1
+  ; CHECK: [[CALLCMP:%.*]] = icmp ne i16 [[CALLLABEL]], 0
+  ; CHECK: br i1 [[CALLCMP]]
+  %call = call i32 @g()
+  ; CHECK: [[LOCALLABEL:%.*]] = load i16* [[LOCALLABELALLOCA]]
+  ; CHECK: [[LOCALCMP:%.*]] = icmp ne i16 [[LOCALLABEL]], 0
+  ; CHECK: br i1 [[LOCALCMP]]
+  %load = load i32* %i
+  ret i32 %load
+}
diff --git a/test/Instrumentation/DataFlowSanitizer/load.ll b/test/Instrumentation/DataFlowSanitizer/load.ll
new file mode 100644
index 0000000..6431213
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/load.ll
@@ -0,0 +1,81 @@
+; RUN: opt < %s -dfsan -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+define i8 @load8(i8* %p) {
+  ; CHECK: @"dfs$load8"
+  ; CHECK: ptrtoint
+  ; CHECK: and
+  ; CHECK: mul
+  ; CHECK: inttoptr
+  ; CHECK: load
+  ; CHECK: store{{.*}}__dfsan_retval_tls
+  ; CHECK: ret i8
+  %a = load i8* %p
+  ret i8 %a
+}
+
+define i16 @load16(i16* %p) {
+  ; CHECK: @"dfs$load16"
+  ; CHECK: ptrtoint
+  ; CHECK: and
+  ; CHECK: mul
+  ; CHECK: inttoptr
+  ; CHECK: load
+  ; CHECK: load
+  ; CHECK: icmp ne
+  ; CHECK: call{{.*}}__dfsan_union
+  ; CHECK: store{{.*}}__dfsan_retval_tls
+  ; CHECK: ret i16
+  %a = load i16* %p
+  ret i16 %a
+}
+
+define i32 @load32(i32* %p) {
+  ; CHECK: @"dfs$load32"
+  ; CHECK: ptrtoint
+  ; CHECK: and
+  ; CHECK: mul
+  ; CHECK: inttoptr
+  ; CHECK: bitcast
+  ; CHECK: load
+  ; CHECK: trunc
+  ; CHECK: shl
+  ; CHECK: lshr
+  ; CHECK: or
+  ; CHECK: icmp eq
+
+  ; CHECK: store{{.*}}__dfsan_retval_tls
+  ; CHECK: ret i32
+
+  ; CHECK: call{{.*}}__dfsan_union_load
+
+  %a = load i32* %p
+  ret i32 %a
+}
+
+define i64 @load64(i64* %p) {
+  ; CHECK: @"dfs$load64"
+  ; CHECK: ptrtoint
+  ; CHECK: and
+  ; CHECK: mul
+  ; CHECK: inttoptr
+  ; CHECK: bitcast
+  ; CHECK: load
+  ; CHECK: trunc
+  ; CHECK: shl
+  ; CHECK: lshr
+  ; CHECK: or
+  ; CHECK: icmp eq
+
+  ; CHECK: store{{.*}}__dfsan_retval_tls
+  ; CHECK: ret i64
+
+  ; CHECK: call{{.*}}__dfsan_union_load
+
+  ; CHECK: getelementptr
+  ; CHECK: load
+  ; CHECK: icmp eq
+
+  %a = load i64* %p
+  ret i64 %a
+}
diff --git a/test/Instrumentation/DataFlowSanitizer/memset.ll b/test/Instrumentation/DataFlowSanitizer/memset.ll
new file mode 100644
index 0000000..062ef1a
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/memset.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -dfsan -dfsan-args-abi -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
+
+define void @ms(i8* %p, i8 %v) {
+  ; CHECK-LABEL: @"dfs$ms"(i8*, i8, i16, i16)
+  ; CHECK: call void @__dfsan_set_label(i16 %3, i8* %0, i64 1)
+  call void @llvm.memset.p0i8.i64(i8* %p, i8 %v, i64 1, i32 1, i1 1)
+  ret void
+}
diff --git a/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll b/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll
new file mode 100644
index 0000000..1a56460
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -dfsan -S | FileCheck %s
+; RUN: opt < %s -dfsan -dfsan-args-abi -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; CHECK: module asm ".symver dfs$f1,dfs$f@@version1"
+module asm ".symver f1,f@@version1"
+
+; CHECK: @"dfs$f2" = alias {{.*}} @"dfs$f1"
+@f2 = alias void ()* @f1
+
+; CHECK: define void @"dfs$f1"
+define void @f1() {
+  ret void
+}
diff --git a/test/Instrumentation/DataFlowSanitizer/store.ll b/test/Instrumentation/DataFlowSanitizer/store.ll
new file mode 100644
index 0000000..9509177
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/store.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -dfsan -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+define void @store8(i8 %v, i8* %p) {
+  ; CHECK: @"dfs$store8"
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: ptrtoint
+  ; CHECK: and
+  ; CHECK: mul
+  ; CHECK: inttoptr
+  ; CHECK: getelementptr
+  ; CHECK: store
+  ; CHECK: store
+  store i8 %v, i8* %p
+  ret void
+}
+
+define void @store16(i16 %v, i16* %p) {
+  ; CHECK: @"dfs$store16"
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: ptrtoint
+  ; CHECK: and
+  ; CHECK: mul
+  ; CHECK: inttoptr
+  ; CHECK: getelementptr
+  ; CHECK: store
+  ; CHECK: getelementptr
+  ; CHECK: store
+  ; CHECK: store
+  store i16 %v, i16* %p
+  ret void
+}
+
+define void @store32(i32 %v, i32* %p) {
+  ; CHECK: @"dfs$store32"
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: ptrtoint
+  ; CHECK: and
+  ; CHECK: mul
+  ; CHECK: inttoptr
+  ; CHECK: getelementptr
+  ; CHECK: store
+  ; CHECK: getelementptr
+  ; CHECK: store
+  ; CHECK: getelementptr
+  ; CHECK: store
+  ; CHECK: getelementptr
+  ; CHECK: store
+  ; CHECK: store
+  store i32 %v, i32* %p
+  ret void
+}
+
+define void @store64(i64 %v, i64* %p) {
+  ; CHECK: @"dfs$store64"
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: ptrtoint
+  ; CHECK: and
+  ; CHECK: mul
+  ; CHECK: inttoptr
+  ; CHECK: insertelement
+  ; CHECK: insertelement
+  ; CHECK: insertelement
+  ; CHECK: insertelement
+  ; CHECK: insertelement
+  ; CHECK: insertelement
+  ; CHECK: insertelement
+  ; CHECK: insertelement
+  ; CHECK: bitcast
+  ; CHECK: getelementptr
+  ; CHECK: store
+  ; CHECK: store
+  store i64 %v, i64* %p
+  ret void
+}
diff --git a/test/Instrumentation/MemorySanitizer/atomics.ll b/test/Instrumentation/MemorySanitizer/atomics.ll
new file mode 100644
index 0000000..ff02452
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/atomics.ll
@@ -0,0 +1,189 @@
+; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; atomicrmw xchg: store clean shadow, return clean shadow
+
+define i32 @AtomicRmwXchg(i32* %p, i32 %x) sanitize_memory {
+entry:
+  %0 = atomicrmw xchg i32* %p, i32 %x seq_cst
+  ret i32 %0
+}
+
+; CHECK: @AtomicRmwXchg
+; CHECK: store i32 0,
+; CHECK: atomicrmw xchg {{.*}} seq_cst
+; CHECK: store i32 0, {{.*}} @__msan_retval_tls
+; CHECK: ret i32
+
+
+; atomicrmw max: exactly the same as above
+
+define i32 @AtomicRmwMax(i32* %p, i32 %x) sanitize_memory {
+entry:
+  %0 = atomicrmw max i32* %p, i32 %x seq_cst
+  ret i32 %0
+}
+
+; CHECK: @AtomicRmwMax
+; CHECK: store i32 0,
+; CHECK: atomicrmw max {{.*}} seq_cst
+; CHECK: store i32 0, {{.*}} @__msan_retval_tls
+; CHECK: ret i32
+
+
+; cmpxchg: the same as above, but also check %a shadow
+
+define i32 @Cmpxchg(i32* %p, i32 %a, i32 %b) sanitize_memory {
+entry:
+  %0 = cmpxchg i32* %p, i32 %a, i32 %b seq_cst
+  ret i32 %0
+}
+
+; CHECK: @Cmpxchg
+; CHECK: store i32 0,
+; CHECK: icmp
+; CHECK: br
+; CHECK: @__msan_warning
+; CHECK: cmpxchg {{.*}} seq_cst
+; CHECK: store i32 0, {{.*}} @__msan_retval_tls
+; CHECK: ret i32
+
+
+; relaxed cmpxchg: bump up to "release"
+
+define i32 @CmpxchgMonotonic(i32* %p, i32 %a, i32 %b) sanitize_memory {
+entry:
+  %0 = cmpxchg i32* %p, i32 %a, i32 %b monotonic
+  ret i32 %0
+}
+
+; CHECK: @CmpxchgMonotonic
+; CHECK: store i32 0,
+; CHECK: icmp
+; CHECK: br
+; CHECK: @__msan_warning
+; CHECK: cmpxchg {{.*}} release
+; CHECK: store i32 0, {{.*}} @__msan_retval_tls
+; CHECK: ret i32
+
+
+; atomic load: preserve alignment, load shadow value after app value
+
+define i32 @AtomicLoad(i32* %p) sanitize_memory {
+entry:
+  %0 = load atomic i32* %p seq_cst, align 16
+  ret i32 %0
+}
+
+; CHECK: @AtomicLoad
+; CHECK: load atomic i32* {{.*}} seq_cst, align 16
+; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32* {{.*}}, align 16
+; CHECK: store i32 {{.*}}[[SHADOW]], {{.*}} @__msan_retval_tls
+; CHECK: ret i32
+
+
+; atomic load: preserve alignment, load shadow value after app value
+
+define i32 @AtomicLoadAcquire(i32* %p) sanitize_memory {
+entry:
+  %0 = load atomic i32* %p acquire, align 16
+  ret i32 %0
+}
+
+; CHECK: @AtomicLoadAcquire
+; CHECK: load atomic i32* {{.*}} acquire, align 16
+; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32* {{.*}}, align 16
+; CHECK: store i32 {{.*}}[[SHADOW]], {{.*}} @__msan_retval_tls
+; CHECK: ret i32
+
+
+; atomic load monotonic: bump up to load acquire
+
+define i32 @AtomicLoadMonotonic(i32* %p) sanitize_memory {
+entry:
+  %0 = load atomic i32* %p monotonic, align 16
+  ret i32 %0
+}
+
+; CHECK: @AtomicLoadMonotonic
+; CHECK: load atomic i32* {{.*}} acquire, align 16
+; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32* {{.*}}, align 16
+; CHECK: store i32 {{.*}}[[SHADOW]], {{.*}} @__msan_retval_tls
+; CHECK: ret i32
+
+
+; atomic load unordered: bump up to load acquire
+
+define i32 @AtomicLoadUnordered(i32* %p) sanitize_memory {
+entry:
+  %0 = load atomic i32* %p unordered, align 16
+  ret i32 %0
+}
+
+; CHECK: @AtomicLoadUnordered
+; CHECK: load atomic i32* {{.*}} acquire, align 16
+; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32* {{.*}}, align 16
+; CHECK: store i32 {{.*}}[[SHADOW]], {{.*}} @__msan_retval_tls
+; CHECK: ret i32
+
+
+; atomic store: preserve alignment, store clean shadow value before app value
+
+define void @AtomicStore(i32* %p, i32 %x) sanitize_memory {
+entry:
+  store atomic i32 %x, i32* %p seq_cst, align 16
+  ret void
+}
+
+; CHECK: @AtomicStore
+; CHECK-NOT: @__msan_param_tls
+; CHECK: store i32 0, i32* {{.*}}, align 16
+; CHECK: store atomic i32 %x, i32* %p seq_cst, align 16
+; CHECK: ret void
+
+
+; atomic store: preserve alignment, store clean shadow value before app value
+
+define void @AtomicStoreRelease(i32* %p, i32 %x) sanitize_memory {
+entry:
+  store atomic i32 %x, i32* %p release, align 16
+  ret void
+}
+
+; CHECK: @AtomicStoreRelease
+; CHECK-NOT: @__msan_param_tls
+; CHECK: store i32 0, i32* {{.*}}, align 16
+; CHECK: store atomic i32 %x, i32* %p release, align 16
+; CHECK: ret void
+
+
+; atomic store monotonic: bumped up to store release
+
+define void @AtomicStoreMonotonic(i32* %p, i32 %x) sanitize_memory {
+entry:
+  store atomic i32 %x, i32* %p monotonic, align 16
+  ret void
+}
+
+; CHECK: @AtomicStoreMonotonic
+; CHECK-NOT: @__msan_param_tls
+; CHECK: store i32 0, i32* {{.*}}, align 16
+; CHECK: store atomic i32 %x, i32* %p release, align 16
+; CHECK: ret void
+
+
+; atomic store unordered: bumped up to store release
+
+define void @AtomicStoreUnordered(i32* %p, i32 %x) sanitize_memory {
+entry:
+  store atomic i32 %x, i32* %p unordered, align 16
+  ret void
+}
+
+; CHECK: @AtomicStoreUnordered
+; CHECK-NOT: @__msan_param_tls
+; CHECK: store i32 0, i32* {{.*}}, align 16
+; CHECK: store atomic i32 %x, i32* %p release, align 16
+; CHECK: ret void
diff --git a/test/Instrumentation/MemorySanitizer/lit.local.cfg b/test/Instrumentation/MemorySanitizer/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Instrumentation/MemorySanitizer/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Instrumentation/MemorySanitizer/msan_basic.ll b/test/Instrumentation/MemorySanitizer/msan_basic.ll
index 4fa0319..72a992d 100644
--- a/test/Instrumentation/MemorySanitizer/msan_basic.ll
+++ b/test/Instrumentation/MemorySanitizer/msan_basic.ll
@@ -260,6 +260,8 @@ entry:
 
 ; CHECK: @Select
 ; CHECK: select
+; CHECK-NEXT: sext i1 {{.*}} to i32
+; CHECK-NEXT: or i32
 ; CHECK-NEXT: select
 ; CHECK: ret i32
 
@@ -274,6 +276,13 @@ entry:
   ret <8 x i16> %cond
 }
 
+; CHECK: @SelectVector
+; CHECK: select <8 x i1>
+; CHECK-NEXT: sext <8 x i1> {{.*}} to <8 x i16>
+; CHECK-NEXT: or <8 x i16>
+; CHECK-NEXT: select <8 x i1>
+; CHECK: ret <8 x i16>
+
 ; CHECK-ORIGINS: @SelectVector
 ; CHECK-ORIGINS: bitcast <8 x i1> {{.*}} to i8
 ; CHECK-ORIGINS: icmp ne i8
@@ -281,6 +290,38 @@ entry:
 ; CHECK-ORIGINS: ret <8 x i16>
 
 
+; Check that we propagate origin for "select" with scalar condition and vector
+; arguments. Select condition shadow is sign-extended to the vector type and
+; mixed into the result shadow.
+
+define <8 x i16> @SelectVector2(<8 x i16> %a, <8 x i16> %b, i1 %c) nounwind uwtable readnone sanitize_memory {
+entry:
+  %cond = select i1 %c, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %cond
+}
+
+; CHECK: @SelectVector2
+; CHECK: select i1
+; CHECK: sext i1 {{.*}} to i128
+; CHECK: bitcast i128 {{.*}} to <8 x i16>
+; CHECK: or <8 x i16>
+; CHECK: select i1
+; CHECK: ret <8 x i16>
+
+
+define { i64, i64 } @SelectStruct(i1 zeroext %x, { i64, i64 } %a, { i64, i64 } %b) readnone sanitize_memory {
+entry:
+  %c = select i1 %x, { i64, i64 } %a, { i64, i64 } %b
+  ret { i64, i64 } %c
+}
+
+; CHECK: @SelectStruct
+; CHECK: select i1 {{.*}}, { i64, i64 }
+; CHECK-NEXT: select i1 {{.*}}, { i64, i64 } { i64 -1, i64 -1 }, { i64, i64 }
+; CHECK-NEXT: select i1 {{.*}}, { i64, i64 }
+; CHECK: ret { i64, i64 }
+
+
 define i8* @IntToPtr(i64 %x) nounwind uwtable readnone sanitize_memory {
 entry:
   %0 = inttoptr i64 %x to i8*
@@ -420,8 +461,8 @@ define i32 @ShadowLoadAlignmentLarge() nounwind uwtable sanitize_memory {
 }
 
 ; CHECK: @ShadowLoadAlignmentLarge
-; CHECK: load i32* {{.*}} align 64
 ; CHECK: load volatile i32* {{.*}} align 64
+; CHECK: load i32* {{.*}} align 64
 ; CHECK: ret i32
 
 define i32 @ShadowLoadAlignmentSmall() nounwind uwtable sanitize_memory {
@@ -431,14 +472,14 @@ define i32 @ShadowLoadAlignmentSmall() nounwind uwtable sanitize_memory {
 }
 
 ; CHECK: @ShadowLoadAlignmentSmall
-; CHECK: load i32* {{.*}} align 2
 ; CHECK: load volatile i32* {{.*}} align 2
+; CHECK: load i32* {{.*}} align 2
 ; CHECK: ret i32
 
 ; CHECK-ORIGINS: @ShadowLoadAlignmentSmall
+; CHECK-ORIGINS: load volatile i32* {{.*}} align 2
 ; CHECK-ORIGINS: load i32* {{.*}} align 2
 ; CHECK-ORIGINS: load i32* {{.*}} align 4
-; CHECK-ORIGINS: load volatile i32* {{.*}} align 2
 ; CHECK-ORIGINS: ret i32
 
 
@@ -578,8 +619,8 @@ define <8 x i8*> @VectorOfPointers(<8 x i8*>* %p) nounwind uwtable sanitize_memo
 }
 
 ; CHECK: @VectorOfPointers
-; CHECK: load <8 x i64>*
 ; CHECK: load <8 x i8*>*
+; CHECK: load <8 x i64>*
 ; CHECK: store <8 x i64> {{.*}} @__msan_retval_tls
 ; CHECK: ret <8 x i8*>
 
@@ -597,6 +638,31 @@ define void @VACopy(i8* %p1, i8* %p2) nounwind uwtable sanitize_memory {
 ; CHECK: ret void
 
 
+; Test that va_start instrumentation does not use va_arg_tls*.
+; It should work with a local stack copy instead.
+
+%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+declare void @llvm.va_start(i8*) nounwind
+
+; Function Attrs: nounwind uwtable
+define void @VAStart(i32 %x, ...) {
+entry:
+  %x.addr = alloca i32, align 4
+  %va = alloca [1 x %struct.__va_list_tag], align 16
+  store i32 %x, i32* %x.addr, align 4
+  %arraydecay = getelementptr inbounds [1 x %struct.__va_list_tag]* %va, i32 0, i32 0
+  %arraydecay1 = bitcast %struct.__va_list_tag* %arraydecay to i8*
+  call void @llvm.va_start(i8* %arraydecay1)
+  ret void
+}
+
+; CHECK: @VAStart
+; CHECK: call void @llvm.va_start
+; CHECK-NOT: @__msan_va_arg_tls
+; CHECK-NOT: @__msan_va_arg_overflow_size_tls
+; CHECK: ret void
+
+
 ; Test handling of volatile stores.
 ; Check that MemorySanitizer does not add a check of the value being stored.
 
@@ -709,3 +775,29 @@ entry:
 ; CHECK-AA: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 2, i32 2, i1 false)
 ; CHECK-AA: ret i16
 
+
+; Test origin propagation for insertvalue
+
+define { i64, i32 } @make_pair_64_32(i64 %x, i32 %y) sanitize_memory {
+entry:
+  %a = insertvalue { i64, i32 } undef, i64 %x, 0
+  %b = insertvalue { i64, i32 } %a, i32 %y, 1
+  ret { i64, i32 } %b
+}
+
+; CHECK-ORIGINS: @make_pair_64_32
+; First element shadow
+; CHECK-ORIGINS: insertvalue { i64, i32 } { i64 -1, i32 -1 }, i64 {{.*}}, 0
+; First element origin
+; CHECK-ORIGINS: icmp ne i64
+; CHECK-ORIGINS: select i1
+; First element app value
+; CHECK-ORIGINS: insertvalue { i64, i32 } undef, i64 {{.*}}, 0
+; Second element shadow
+; CHECK-ORIGINS: insertvalue { i64, i32 } {{.*}}, i32 {{.*}}, 1
+; Second element origin
+; CHECK-ORIGINS: icmp ne i32
+; CHECK-ORIGINS: select i1
+; Second element app value
+; CHECK-ORIGINS: insertvalue { i64, i32 } {{.*}}, i32 {{.*}}, 1
+; CHECK-ORIGINS: ret { i64, i32 }
diff --git a/test/Instrumentation/MemorySanitizer/return_from_main.ll b/test/Instrumentation/MemorySanitizer/return_from_main.ll
new file mode 100644
index 0000000..81dc888
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/return_from_main.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() sanitize_memory {
+entry:
+  %call = tail call i32 @f()
+  ret i32 %call
+}
+
+declare i32 @f() sanitize_memory
+
+; CHECK: @main
+; CHECK: call i32 @f()
+; CHECK: store i32 0, {{.*}} @__msan_retval_tls
+; CHECK: br i1
+; CHECK: call void @__msan_warning_noreturn()
+; CHECK: ret i32
diff --git a/test/Instrumentation/MemorySanitizer/vector_cvt.ll b/test/Instrumentation/MemorySanitizer/vector_cvt.ll
new file mode 100644
index 0000000..9425e25
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/vector_cvt.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
+declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
+declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>) nounwind readnone
+
+; Single argument vector conversion.
+
+define i32 @test_cvtsd2si(<2 x double> %value) sanitize_memory {
+entry:
+  %0 = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %value)
+  ret i32 %0
+}
+
+; CHECK: @test_cvtsd2si
+; CHECK: [[S:%[_01-9a-z]+]] = extractelement <2 x i64> {{.*}}, i32 0
+; CHECK: icmp ne {{.*}}[[S]], 0
+; CHECK: br
+; CHECK: call void @__msan_warning_noreturn
+; CHECK: call i32 @llvm.x86.sse2.cvtsd2si
+; CHECK: store i32 0, {{.*}} @__msan_retval_tls
+; CHECK: ret i32
+
+; Two-argument vector conversion.
+
+define <2 x double> @test_cvtsi2sd(i32 %a, double %b) sanitize_memory {
+entry:
+  %vec = insertelement <2 x double> undef, double %b, i32 1
+  %0 = tail call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %vec, i32 %a)
+  ret <2 x double> %0
+}
+
+; CHECK: @test_cvtsi2sd
+; CHECK: [[Sa:%[_01-9a-z]+]] = load i32* {{.*}} @__msan_param_tls
+; CHECK: [[Sout0:%[_01-9a-z]+]] = insertelement <2 x i64> <i64 -1, i64 -1>, i64 {{.*}}, i32 1
+; Clear low half of result shadow
+; CHECK: [[Sout:%[_01-9a-z]+]] = insertelement <2 x i64> {{.*}}[[Sout0]], i64 0, i32 0
+; Trap on %a shadow.
+; CHECK: icmp ne {{.*}}[[Sa]], 0
+; CHECK: br
+; CHECK: call void @__msan_warning_noreturn
+; CHECK: call <2 x double> @llvm.x86.sse2.cvtsi2sd
+; CHECK: store <2 x i64> {{.*}}[[Sout]], {{.*}} @__msan_retval_tls
+; CHECK: ret <2 x double>
+
+; x86_mmx packed vector conversion.
+
+define x86_mmx @test_cvtps2pi(<4 x float> %value) sanitize_memory {
+entry:
+  %0 = tail call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %value)
+  ret x86_mmx %0
+}
+
+; CHECK: @test_cvtps2pi
+; CHECK: extractelement <4 x i32> {{.*}}, i32 0
+; CHECK: extractelement <4 x i32> {{.*}}, i32 1
+; CHECK: [[S:%[_01-9a-z]+]] = or i32
+; CHECK: icmp ne {{.*}}[[S]], 0
+; CHECK: br
+; CHECK: call void @__msan_warning_noreturn
+; CHECK: call x86_mmx @llvm.x86.sse.cvtps2pi
+; CHECK: store i64 0, {{.*}} @__msan_retval_tls
+; CHECK: ret x86_mmx
diff --git a/test/Instrumentation/MemorySanitizer/wrap_indirect_calls.ll b/test/Instrumentation/MemorySanitizer/wrap_indirect_calls.ll
new file mode 100644
index 0000000..555695d
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/wrap_indirect_calls.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -msan -msan-check-access-address=0 -msan-wrap-indirect-calls=zzz -msan-wrap-indirect-calls-fast=0 -S | FileCheck %s
+; RUN: opt < %s -msan -msan-check-access-address=0 -msan-wrap-indirect-calls=zzz -msan-wrap-indirect-calls-fast=1 -S | FileCheck -check-prefix=CHECK-FAST %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Test for -msan-wrap-indirect-calls functionality.
+; Replaces indirect call to %f with a call to whatever is returned from the
+; wrapper function.
+
+; This does not depend on the sanitize_memory attribute.
+define i32 @func(i32 (i32, i32)* nocapture %f, i32 %x, i32 %y) {
+entry:
+  %call = tail call i32 %f(i32 %x, i32 %y)
+  ret i32 %call
+}
+
+; CHECK: @func
+; CHECK: bitcast i32 (i32, i32)* %f to void ()*
+; CHECK: call void ()* (void ()*)* @zzz(void ()*
+; CHECK: [[A:%[01-9a-z_.]+]] = bitcast void ()* {{.*}} to i32 (i32, i32)*
+; CHECK: call i32 {{.*}}[[A]](i32 {{.*}}, i32 {{.*}})
+; CHECK: ret i32
+
+; CHECK-FAST: @func
+; CHECK-FAST: bitcast i32 (i32, i32)* %f to void ()*
+; CHECK-FAST-DAG: icmp ult void ()* {{.*}}, bitcast (i32* @__executable_start to void ()*)
+; CHECK-FAST-DAG: icmp uge void ()* {{.*}}, bitcast (i32* @_end to void ()*)
+; CHECK-FAST: or i1
+; CHECK-FAST: br i1
+; CHECK-FAST: call void ()* (void ()*)* @zzz(void ()*
+; CHECK-FAST: br label
+; CHECK-FAST: [[A:%[01-9a-z_.]+]] = phi i32 (i32, i32)* [ %f, %entry ], [ {{.*}} ]
+; CHECK-FAST: call i32 {{.*}}[[A]](i32 {{.*}}, i32 {{.*}})
+; CHECK-FAST: ret i32
diff --git a/test/Instrumentation/ThreadSanitizer/lit.local.cfg b/test/Instrumentation/ThreadSanitizer/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Instrumentation/ThreadSanitizer/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Instrumentation/ThreadSanitizer/no_sanitize_thread.ll b/test/Instrumentation/ThreadSanitizer/no_sanitize_thread.ll
new file mode 100644
index 0000000..3949fd5
--- /dev/null
+++ b/test/Instrumentation/ThreadSanitizer/no_sanitize_thread.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -tsan -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+; no sanitize_thread attribute here
+define i32 @read_4_bytes(i32* %a) {
+entry:
+  %tmp1 = load i32* %a, align 4
+  ret i32 %tmp1
+}
+
+; CHECK: define i32 @read_4_bytes(i32* %a) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %tmp1 = load i32* %a, align 4
+; CHECK: ret i32 %tmp1
+
+; no sanitize_thread attribute here
+define i32 @read_4_bytes_and_call(i32* %a) {
+entry:
+  call void @foo()
+  %tmp1 = load i32* %a, align 4
+  ret i32 %tmp1
+}
+
+; CHECK: define i32 @read_4_bytes_and_call(i32* %a) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %0 = call i8* @llvm.returnaddress(i32 0)
+; CHECK-NEXT:   call void @__tsan_func_entry(i8* %0)
+; CHECK-NEXT:   call void @foo()
+; CHECK-NEXT:   %tmp1 = load i32* %a, align 4
+; CHECK-NEXT:   call void @__tsan_func_exit()
+; CHECK-NEXT:   ret i32 %tmp1
+
+declare void @foo()
+
diff --git a/test/Instrumentation/ThreadSanitizer/read_before_write.ll b/test/Instrumentation/ThreadSanitizer/read_before_write.ll
index 482362a..cb6603b 100644
--- a/test/Instrumentation/ThreadSanitizer/read_before_write.ll
+++ b/test/Instrumentation/ThreadSanitizer/read_before_write.ll
@@ -2,7 +2,7 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-define void @IncrementMe(i32* nocapture %ptr) nounwind uwtable {
+define void @IncrementMe(i32* nocapture %ptr) nounwind uwtable sanitize_thread {
 entry:
   %0 = load i32* %ptr, align 4
   %inc = add nsw i32 %0, 1
@@ -14,7 +14,7 @@ entry:
 ; CHECK: __tsan_write
 ; CHECK: ret void
 
-define void @IncrementMeWithCallInBetween(i32* nocapture %ptr) nounwind uwtable {
+define void @IncrementMeWithCallInBetween(i32* nocapture %ptr) nounwind uwtable sanitize_thread {
 entry:
   %0 = load i32* %ptr, align 4
   %inc = add nsw i32 %0, 1
diff --git a/test/Instrumentation/ThreadSanitizer/read_from_global.ll b/test/Instrumentation/ThreadSanitizer/read_from_global.ll
index 7b6b94e..33614a3 100644
--- a/test/Instrumentation/ThreadSanitizer/read_from_global.ll
+++ b/test/Instrumentation/ThreadSanitizer/read_from_global.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
 @const_global = external constant i32
-define i32 @read_from_const_global() nounwind uwtable readnone {
+define i32 @read_from_const_global() nounwind uwtable sanitize_thread readnone {
 entry:
   %0 = load i32* @const_global, align 4
   ret i32 %0
@@ -14,7 +14,7 @@ entry:
 ; CHECK: ret i32
 
 @non_const_global = global i32 0, align 4
-define i32 @read_from_non_const_global() nounwind uwtable readonly {
+define i32 @read_from_non_const_global() nounwind uwtable sanitize_thread readonly {
 entry:
   %0 = load i32* @non_const_global, align 4
   ret i32 %0
@@ -25,7 +25,7 @@ entry:
 ; CHECK: ret i32
 
 @const_global_array = external constant [10 x i32]
-define i32 @read_from_const_global_array(i32 %idx) nounwind uwtable readnone {
+define i32 @read_from_const_global_array(i32 %idx) nounwind uwtable sanitize_thread readnone {
 entry:
   %idxprom = sext i32 %idx to i64
   %arrayidx = getelementptr inbounds [10 x i32]* @const_global_array, i64 0, i64 %idxprom
@@ -38,10 +38,10 @@ entry:
 ; CHECK: ret i32
 
 %struct.Foo = type { i32 (...)** }
-define void @call_virtual_func(%struct.Foo* %f) uwtable {
+define void @call_virtual_func(%struct.Foo* %f) uwtable sanitize_thread {
 entry:
   %0 = bitcast %struct.Foo* %f to void (%struct.Foo*)***
-  %vtable = load void (%struct.Foo*)*** %0, align 8, !tbaa !3
+  %vtable = load void (%struct.Foo*)*** %0, align 8, !tbaa !2
   %1 = load void (%struct.Foo*)** %vtable, align 8
   call void %1(%struct.Foo* %f)
   ret void
@@ -54,8 +54,6 @@ entry:
 ; CHECK: = load
 ; CHECK: ret void
 
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"vtable pointer", metadata !2}
-
+!0 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!1 = metadata !{metadata !"vtable pointer", metadata !0}
+!2 = metadata !{metadata !1, metadata !1, i64 0}
diff --git a/test/Instrumentation/ThreadSanitizer/tsan_basic.ll b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
index 19dd45b..d449a97 100644
--- a/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
+++ b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 
-define i32 @read_4_bytes(i32* %a) {
+define i32 @read_4_bytes(i32* %a) sanitize_thread {
 entry:
   %tmp1 = load i32* %a, align 4
   ret i32 %tmp1
@@ -11,7 +11,7 @@ entry:
 
 ; CHECK: @llvm.global_ctors = {{.*}}@__tsan_init
 
-; CHECK: define i32 @read_4_bytes(i32* %a) {
+; CHECK: define i32 @read_4_bytes(i32* %a)
 ; CHECK:        call void @__tsan_func_entry(i8* %0)
 ; CHECK-NEXT:   %1 = bitcast i32* %a to i8*
 ; CHECK-NEXT:   call void @__tsan_read4(i8* %1)
diff --git a/test/Instrumentation/ThreadSanitizer/vptr_read.ll b/test/Instrumentation/ThreadSanitizer/vptr_read.ll
index 404ca3f..811ad8d 100644
--- a/test/Instrumentation/ThreadSanitizer/vptr_read.ll
+++ b/test/Instrumentation/ThreadSanitizer/vptr_read.ll
@@ -2,12 +2,12 @@
 ; Check that vptr reads are treated in a special way.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-define i8 @Foo(i8* %a) nounwind uwtable {
+define i8 @Foo(i8* %a) nounwind uwtable sanitize_thread {
 entry:
 ; CHECK: call void @__tsan_vptr_read
   %0 = load i8* %a, align 8, !tbaa !0
   ret i8 %0
 }
-!0 = metadata !{metadata !"vtable pointer", metadata !1}
+!0 = metadata !{metadata !2, metadata !2, i64 0}
 !1 = metadata !{metadata !"Simple C/C++ TBAA", null}
-
+!2 = metadata !{metadata !"vtable pointer", metadata !1}
diff --git a/test/Instrumentation/ThreadSanitizer/vptr_update.ll b/test/Instrumentation/ThreadSanitizer/vptr_update.ll
index f318659..95c7bb0 100644
--- a/test/Instrumentation/ThreadSanitizer/vptr_update.ll
+++ b/test/Instrumentation/ThreadSanitizer/vptr_update.ll
@@ -2,12 +2,12 @@
 ; Check that vtable pointer updates are treated in a special way.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-define void @Foo(i8** nocapture %a, i8* %b) nounwind uwtable {
+define void @Foo(i8** nocapture %a, i8* %b) nounwind uwtable sanitize_thread {
 entry:
 ; CHECK: call void @__tsan_vptr_update
   store i8* %b, i8** %a, align 8, !tbaa !0
   ret void
 }
-!0 = metadata !{metadata !"vtable pointer", metadata !1}
+!0 = metadata !{metadata !2, metadata !2, i64 0}
 !1 = metadata !{metadata !"Simple C/C++ TBAA", null}
-
+!2 = metadata !{metadata !"vtable pointer", metadata !1}
diff --git a/test/Integer/lit.local.cfg b/test/Integer/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Integer/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/JitListener/lit.local.cfg b/test/JitListener/lit.local.cfg
index a5aa6de..d995820 100644
--- a/test/JitListener/lit.local.cfg
+++ b/test/JitListener/lit.local.cfg
@@ -1,11 +1,3 @@
-config.suffixes = ['.ll']
-
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-if not root.llvm_use_intel_jitevents == "ON":
+if not config.root.llvm_use_intel_jitevents == "ON":
     config.unsupported = True
 
diff --git a/test/JitListener/test-common-symbols.ll b/test/JitListener/test-common-symbols.ll
index 91891d8..a389bf7 100644
--- a/test/JitListener/test-common-symbols.ll
+++ b/test/JitListener/test-common-symbols.ll
@@ -76,13 +76,14 @@ for.end:                                          ; preds = %for.cond
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!35}
 
 !0 = metadata !{i32 720913, metadata !34, i32 12, metadata !"clang version 3.1 ()", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 720942, metadata !34, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !10, i32 0} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !34} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{metadata !11}
@@ -92,7 +93,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !15 = metadata !{i32 720948, i32 0, null, metadata !"zero_double", metadata !"zero_double", metadata !"", metadata !6, i32 2, metadata !16, i32 0, i32 1, double* @zero_double, null} ; [ DW_TAG_variable ]
 !16 = metadata !{i32 720932, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !17 = metadata !{i32 720948, i32 0, null, metadata !"zero_arr", metadata !"zero_arr", metadata !"", metadata !6, i32 3, metadata !18, i32 0, i32 1, [10 x i32]* @zero_arr, null} ; [ DW_TAG_variable ]
-!18 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 320, i64 32, i32 0, i32 0, metadata !9, metadata !19, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!18 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 320, i64 32, i32 0, i32 0, metadata !9, metadata !19, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 320, align 32, offset 0] [from int]
 !19 = metadata !{metadata !20}
 !20 = metadata !{i32 720929, i64 0, i64 10}        ; [ DW_TAG_subrange_type ]
 !21 = metadata !{i32 7, i32 5, metadata !22, null}
@@ -109,3 +110,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !32 = metadata !{i32 12, i32 29, metadata !26, null}
 !33 = metadata !{i32 15, i32 5, metadata !22, null}
 !34 = metadata !{metadata !"test-common-symbols.c", metadata !"/store/store/llvm/build"}
+!35 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/JitListener/test-inline.ll b/test/JitListener/test-inline.ll
index 5c16c94..0d365b1 100644
--- a/test/JitListener/test-inline.ll
+++ b/test/JitListener/test-inline.ll
@@ -132,30 +132,31 @@ entry:
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!78}
 
 !0 = metadata !{i32 786449, metadata !77, i32 4, metadata !"clang version 3.3 (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-clang2 gitosis@miro.kw.intel.com:clang.git 39450d0469e0d5589ad39fd0b20b5742750619a0) (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-llvm gitosis@miro.kw.intel.com:llvm.git 376642ed620ecae05b68c7bc81f79aeb2065abe0)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !43, null, metadata !""} ; [ DW_TAG_compile_unit ] [/home/akaylor/dev/test-inline.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !35, metadata !40}
 !5 = metadata !{i32 786478, metadata !77, metadata !6, metadata !"test_parameters", metadata !"test_parameters", metadata !"_Z15test_parametersPfPA2_dR11char_structPPitm", i32 32, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, double (float*, [2 x double]*, %struct.char_struct*, i32**, i16, i64)* @_Z15test_parametersPfPA2_dR11char_structPPitm, null, null, metadata !1, i32 33} ; [ DW_TAG_subprogram ] [line 32] [def] [scope 33] [test_parameters]
 !6 = metadata !{i32 786473, metadata !77} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !10, metadata !12, metadata !16, metadata !29, metadata !32, metadata !33}
 !9 = metadata !{i32 786468, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
 !10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from float]
 !11 = metadata !{i32 786468, null, null, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
 !12 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !13} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!13 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 128, i64 64, i32 0, i32 0, metadata !9, metadata !14, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 128, align 64, offset 0] [from double]
+!13 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 128, i64 64, i32 0, i32 0, metadata !9, metadata !14, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 64, offset 0] [from double]
 !14 = metadata !{metadata !15}
 !15 = metadata !{i32 786465, i64 0, i64 2}        ; [ DW_TAG_subrange_type ] [0, 1]
 !16 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from char_struct]
-!17 = metadata !{i32 786451, metadata !77, null, metadata !"char_struct", i32 22, i64 24, i64 8, i32 0, i32 0, null, metadata !18, i32 0, null, null} ; [ DW_TAG_structure_type ] [char_struct] [line 22, size 24, align 8, offset 0] [from ]
+!17 = metadata !{i32 786451, metadata !77, null, metadata !"char_struct", i32 22, i64 24, i64 8, i32 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [char_struct] [line 22, size 24, align 8, offset 0] [def] [from ]
 !18 = metadata !{metadata !19, metadata !21, metadata !23}
 !19 = metadata !{i32 786445, metadata !77, metadata !17, metadata !"c", i32 23, i64 8, i64 8, i64 0, i32 0, metadata !20} ; [ DW_TAG_member ] [c] [line 23, size 8, align 8, offset 0] [from char]
 !20 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
 !21 = metadata !{i32 786445, metadata !77, metadata !17, metadata !"c2", i32 24, i64 16, i64 8, i64 8, i32 0, metadata !22} ; [ DW_TAG_member ] [c2] [line 24, size 16, align 8, offset 8] [from ]
-!22 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 16, i64 8, i32 0, i32 0, metadata !20, metadata !14, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 16, align 8, offset 0] [from char]
+!22 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 16, i64 8, i32 0, i32 0, metadata !20, metadata !14, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 16, align 8, offset 0] [from char]
 !23 = metadata !{i32 786478, metadata !77, metadata !17, metadata !"char_struct", metadata !"char_struct", metadata !"", i32 22, metadata !24, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !27, i32 22} ; [ DW_TAG_subprogram ] [line 22] [char_struct]
-!24 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!24 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !25 = metadata !{null, metadata !26}
 !26 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !17} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char_struct]
 !27 = metadata !{metadata !28}
@@ -167,12 +168,12 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
 !33 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !34} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from long unsigned int]
 !34 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
 !35 = metadata !{i32 786478, metadata !77, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 38, metadata !36, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !1, i32 39} ; [ DW_TAG_subprogram ] [line 38] [def] [scope 39] [main]
-!36 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !37, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!36 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !37, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !37 = metadata !{metadata !31, metadata !31, metadata !38}
 !38 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !39} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
 !39 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
 !40 = metadata !{i32 786478, metadata !77, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 27, metadata !41, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z3foov, null, null, metadata !1, i32 28} ; [ DW_TAG_subprogram ] [line 27] [def] [scope 28] [foo]
-!41 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !42, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!41 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !42, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !42 = metadata !{metadata !31}
 !43 = metadata !{metadata !45}
 !45 = metadata !{i32 786484, i32 0, null, metadata !"compound_char", metadata !"compound_char", metadata !"", metadata !6, i32 25, metadata !17, i32 0, i32 1, %struct.char_struct* @compound_char, null} ; [ DW_TAG_variable ] [compound_char] [line 25] [def]
@@ -198,7 +199,7 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
 !65 = metadata !{i32 786688, metadata !63, metadata !"f", metadata !6, i32 41, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [f] [line 41]
 !66 = metadata !{i32 41, i32 0, metadata !63, null}
 !67 = metadata !{i32 786688, metadata !63, metadata !"d", metadata !6, i32 42, metadata !68, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 42]
-!68 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 256, i64 64, i32 0, i32 0, metadata !9, metadata !69, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 256, align 64, offset 0] [from double]
+!68 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 256, i64 64, i32 0, i32 0, metadata !9, metadata !69, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 256, align 64, offset 0] [from double]
 !69 = metadata !{metadata !15, metadata !15}
 !70 = metadata !{i32 42, i32 0, metadata !63, null}
 !71 = metadata !{i32 44, i32 0, metadata !63, null}
@@ -208,3 +209,4 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
 !75 = metadata !{i32 48, i32 0, metadata !63, null}
 !76 = metadata !{i32 49, i32 0, metadata !63, null}
 !77 = metadata !{metadata !"test-inline.cpp", metadata !"/home/akaylor/dev"}
+!78 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/JitListener/test-parameters.ll b/test/JitListener/test-parameters.ll
index 96af18e..7feb6bb 100644
--- a/test/JitListener/test-parameters.ll
+++ b/test/JitListener/test-parameters.ll
@@ -131,34 +131,35 @@ entry:
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!78}
 
 !0 = metadata !{i32 786449, metadata !77, i32 4, metadata !"clang version 3.3 (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-clang2 gitosis@miro.kw.intel.com:clang.git 39450d0469e0d5589ad39fd0b20b5742750619a0) (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-llvm gitosis@miro.kw.intel.com:llvm.git 376642ed620ecae05b68c7bc81f79aeb2065abe0)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !43, null, metadata !""} ; [ DW_TAG_compile_unit ] [/home/akaylor/dev/test-parameters.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !10, metadata !38}
 !5 = metadata !{i32 786478, metadata !77, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 27, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z3foov, null, null, metadata !1, i32 28} ; [ DW_TAG_subprogram ] [line 27] [def] [scope 28] [foo]
 !6 = metadata !{i32 786473, metadata !77} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 786478, metadata !77, metadata !6, metadata !"test_parameters", metadata !"test_parameters", metadata !"_Z15test_parametersPfPA2_dR11char_structPPitm", i32 32, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, double (float*, [2 x double]*, %struct.char_struct*, i32**, i16, i64)* @_Z15test_parametersPfPA2_dR11char_structPPitm, null, null, metadata !1, i32 33} ; [ DW_TAG_subprogram ] [line 32] [def] [scope 33] [test_parameters]
-!11 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{metadata !13, metadata !14, metadata !16, metadata !20, metadata !33, metadata !35, metadata !36}
 !13 = metadata !{i32 786468, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
 !14 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from float]
 !15 = metadata !{i32 786468, null, null, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
 !16 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !17} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!17 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 128, i64 64, i32 0, i32 0, metadata !13, metadata !18, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 128, align 64, offset 0] [from double]
+!17 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 128, i64 64, i32 0, i32 0, metadata !13, metadata !18, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 64, offset 0] [from double]
 !18 = metadata !{metadata !19}
 !19 = metadata !{i32 786465, i64 0, i64 2}        ; [ DW_TAG_subrange_type ] [0, 1]
 !20 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !21} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from char_struct]
-!21 = metadata !{i32 786451, metadata !77, null, metadata !"char_struct", i32 22, i64 24, i64 8, i32 0, i32 0, null, metadata !22, i32 0, null, null} ; [ DW_TAG_structure_type ] [char_struct] [line 22, size 24, align 8, offset 0] [from ]
+!21 = metadata !{i32 786451, metadata !77, null, metadata !"char_struct", i32 22, i64 24, i64 8, i32 0, i32 0, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [char_struct] [line 22, size 24, align 8, offset 0] [def] [from ]
 !22 = metadata !{metadata !23, metadata !25, metadata !27}
 !23 = metadata !{i32 786445, metadata !77, metadata !21, metadata !"c", i32 23, i64 8, i64 8, i64 0, i32 0, metadata !24} ; [ DW_TAG_member ] [c] [line 23, size 8, align 8, offset 0] [from char]
 !24 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
 !25 = metadata !{i32 786445, metadata !77, metadata !21, metadata !"c2", i32 24, i64 16, i64 8, i64 8, i32 0, metadata !26} ; [ DW_TAG_member ] [c2] [line 24, size 16, align 8, offset 8] [from ]
-!26 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 16, i64 8, i32 0, i32 0, metadata !24, metadata !18, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 16, align 8, offset 0] [from char]
+!26 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 16, i64 8, i32 0, i32 0, metadata !24, metadata !18, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 16, align 8, offset 0] [from char]
 !27 = metadata !{i32 786478, metadata !77, metadata !21, metadata !"char_struct", metadata !"char_struct", metadata !"", i32 22, metadata !28, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !31, i32 22} ; [ DW_TAG_subprogram ] [line 22] [char_struct]
-!28 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!28 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !29 = metadata !{null, metadata !30}
 !30 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !21} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char_struct]
 !31 = metadata !{metadata !32}
@@ -169,7 +170,7 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
 !36 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !37} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from long unsigned int]
 !37 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
 !38 = metadata !{i32 786478, metadata !77, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 38, metadata !39, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !1, i32 39} ; [ DW_TAG_subprogram ] [line 38] [def] [scope 39] [main]
-!39 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !40, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!39 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !40, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !40 = metadata !{metadata !9, metadata !9, metadata !41}
 !41 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !42} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
 !42 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
@@ -197,7 +198,7 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
 !65 = metadata !{i32 786688, metadata !63, metadata !"f", metadata !6, i32 41, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [f] [line 41]
 !66 = metadata !{i32 41, i32 0, metadata !63, null}
 !67 = metadata !{i32 786688, metadata !63, metadata !"d", metadata !6, i32 42, metadata !68, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 42]
-!68 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 256, i64 64, i32 0, i32 0, metadata !13, metadata !69, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 256, align 64, offset 0] [from double]
+!68 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 256, i64 64, i32 0, i32 0, metadata !13, metadata !69, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 256, align 64, offset 0] [from double]
 !69 = metadata !{metadata !19, metadata !19}
 !70 = metadata !{i32 42, i32 0, metadata !63, null}
 !71 = metadata !{i32 44, i32 0, metadata !63, null}
@@ -207,3 +208,4 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
 !75 = metadata !{i32 48, i32 0, metadata !63, null}
 !76 = metadata !{i32 49, i32 0, metadata !63, null}
 !77 = metadata !{metadata !"test-parameters.cpp", metadata !"/home/akaylor/dev"}
+!78 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/LTO/cfi_endproc.ll b/test/LTO/cfi_endproc.ll
new file mode 100644
index 0000000..a5cc649
--- /dev/null
+++ b/test/LTO/cfi_endproc.ll
@@ -0,0 +1,37 @@
+; RUN: llvm-as < %s >%t1
+; RUN: llvm-lto -o %t2 %t1
+; RUN: llvm-nm %t2 | FileCheck %s -check-prefix=NOEXPORT
+; RUN: llvm-lto -o %t3 -exported-symbol=main %t1
+; RUN: llvm-nm %t3 | FileCheck %s -check-prefix=EXPORT
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm ".text"
+module asm ".align 16, 0x90"
+module asm ".type PR14512, @function"
+module asm "PR14512:.cfi_startproc"
+module asm "ret"
+module asm ".cfi_endproc"
+
+declare void @PR14512()
+
+; Without -exported-symbol, main should be eliminated by LTO.
+; With -exported-symbol=main, main should be preserved by LTO.
+define i32 @main(i32 %argc, i8** %argv) {
+; NOEXPORT-NOT: main
+; EXPORT: main
+  call void @PR14512()
+  ret i32 0
+}
+
+; RUN: llvm-lto -o %t -dso-symbol=zed1 -dso-symbol=zed2 %t1 -disable-opt
+; RUN: llvm-nm %t | FileCheck %s -check-prefix=ZED1_AND_ZED2
+; ZED1_AND_ZED2: V zed1
+@zed1 = linkonce_odr global i32 42
+define i32* @get_zed1() {
+  ret i32* @zed1
+}
+
+; ZED1_AND_ZED2: d zed2
+@zed2 = linkonce_odr unnamed_addr global i32 42
diff --git a/test/LTO/linkonce_odr_func.ll b/test/LTO/linkonce_odr_func.ll
new file mode 100644
index 0000000..8a49326
--- /dev/null
+++ b/test/LTO/linkonce_odr_func.ll
@@ -0,0 +1,45 @@
+; RUN: llvm-as < %s >%t1
+; RUN: llvm-lto -o %t2 -dso-symbol=foo1 -dso-symbol=foo2 -dso-symbol=foo3 \
+; RUN:     -dso-symbol=foo4  %t1 -disable-opt
+; RUN: llvm-nm %t2 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: t foo1
+define linkonce_odr void @foo1() noinline {
+  ret void
+}
+
+; CHECK: W foo2
+define linkonce_odr void @foo2() noinline {
+  ret void
+}
+
+; CHECK: t foo3
+define linkonce_odr void @foo3() noinline {
+  ret void
+}
+
+; CHECK: W foo4
+define linkonce_odr void @foo4() noinline {
+  ret void
+}
+
+declare void @f(void()*)
+
+declare void @p()
+
+define void @bar() {
+bb0:
+  call void @foo1()
+  call void @f(void()* @foo2)
+  invoke void @foo3() to label %bb1 unwind label %clean
+bb1:
+  invoke void @f(void()* @foo4) to label %bb2 unwind label %clean
+bb2:
+  ret void
+clean:
+  landingpad {i32, i32} personality void()* @p cleanup
+  ret void
+}
diff --git a/test/LTO/lit.local.cfg b/test/LTO/lit.local.cfg
new file mode 100644
index 0000000..6df0e03
--- /dev/null
+++ b/test/LTO/lit.local.cfg
@@ -0,0 +1,3 @@
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+  config.unsupported = True
diff --git a/test/LTO/runtime-library.ll b/test/LTO/runtime-library.ll
new file mode 100644
index 0000000..76fc6f0
--- /dev/null
+++ b/test/LTO/runtime-library.ll
@@ -0,0 +1,27 @@
+; runtime library implementations should be added to llvm.compiler.used
+; RUN: llvm-as <%s >%t1
+; RUN: llvm-lto -o %t2 %t1
+; RUN: llvm-nm -no-sort %t2 | FileCheck %s -check-prefix=KEEP -check-prefix=LOSE
+
+target triple = "x86_64-apple-darwin9"
+
+; KEEP-LABEL: _puts
+define void @puts() {
+  ret void
+}
+
+; KEEP-LABEL: ___divti3
+define void @__divti3() {
+  ret void
+}
+
+; KEEP-LABEL: _memset
+define void @memset() {
+  ret void
+}
+
+; LOSE-NOT: _myprintf
+define void @myprintf() {
+  ret void
+}
+
diff --git a/test/Linker/2011-08-04-DebugLoc.ll b/test/Linker/2011-08-04-DebugLoc.ll
index 5daf33b..d26e8cd 100644
--- a/test/Linker/2011-08-04-DebugLoc.ll
+++ b/test/Linker/2011-08-04-DebugLoc.ll
@@ -14,12 +14,13 @@ define i32 @foo() nounwind ssp {
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11}
 !llvm.dbg.sp = !{!1}
 
 !0 = metadata !{i32 589841, metadata !8, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-209.11) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, metadata !9, metadata !9, metadata !10, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 589870, metadata !8, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589870, metadata !8, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
 !2 = metadata !{i32 589865, metadata !8} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589845, metadata !8, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 589845, metadata !8, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 589860, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 2, i32 13, metadata !7, null}
@@ -27,3 +28,4 @@ define i32 @foo() nounwind ssp {
 !8 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
 !9 = metadata !{i32 0}
 !10 = metadata !{metadata !1}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/2011-08-04-DebugLoc2.ll b/test/Linker/2011-08-04-DebugLoc2.ll
index 3f8504f..c20941d 100644
--- a/test/Linker/2011-08-04-DebugLoc2.ll
+++ b/test/Linker/2011-08-04-DebugLoc2.ll
@@ -11,12 +11,13 @@ define i32 @bar() nounwind ssp {
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11}
 !llvm.dbg.sp = !{!1}
 
 !0 = metadata !{i32 589841, metadata !8, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-209.11) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, metadata !9, metadata !9, metadata !10, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 589870, metadata !8, metadata !2, metadata !"bar", metadata !"bar", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589870, metadata !8, metadata !2, metadata !"bar", metadata !"bar", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [bar]
 !2 = metadata !{i32 589865, metadata !8} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589845, metadata !8, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 589845, metadata !8, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 589860, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 1, i32 13, metadata !7, null}
@@ -24,3 +25,4 @@ define i32 @bar() nounwind ssp {
 !8 = metadata !{metadata !"b.c", metadata !"/private/tmp"}
 !9 = metadata !{i32 0}
 !10 = metadata !{metadata !1}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/2011-08-04-Metadata.ll b/test/Linker/2011-08-04-Metadata.ll
index b800e5d..cdf4f6f 100644
--- a/test/Linker/2011-08-04-Metadata.ll
+++ b/test/Linker/2011-08-04-Metadata.ll
@@ -15,13 +15,14 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11}
 !llvm.dbg.sp = !{!1}
 !llvm.dbg.gv = !{!5}
 
 !0 = metadata !{i32 589841, metadata !9, i32 12, metadata !"clang version 3.0 ()", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !10, null, null, metadata !""}
-!1 = metadata !{i32 589870, metadata !9, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, void ()* @foo, null, null, null, i32 0}
+!1 = metadata !{i32 589870, metadata !9, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 0] [foo]
 !2 = metadata !{i32 589865, metadata !9}
-!3 = metadata !{i32 589845, metadata !9, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0}
+!3 = metadata !{i32 589845, metadata !9, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 589876, i32 0, metadata !0, metadata !"x", metadata !"x", metadata !"", metadata !2, i32 2, metadata !6, i32 1, i32 1, i32* @x}
 !6 = metadata !{i32 589860, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
@@ -29,3 +30,4 @@ entry:
 !8 = metadata !{i32 589835, metadata !9, metadata !1, i32 3, i32 12, i32 0}
 !9 = metadata !{metadata !"/tmp/one.c", metadata !"/Volumes/Lalgate/Slate/D"}
 !10 = metadata !{metadata !1}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/2011-08-04-Metadata2.ll b/test/Linker/2011-08-04-Metadata2.ll
index 311a7c6..80884cc 100644
--- a/test/Linker/2011-08-04-Metadata2.ll
+++ b/test/Linker/2011-08-04-Metadata2.ll
@@ -15,13 +15,14 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11}
 !llvm.dbg.sp = !{!1}
 !llvm.dbg.gv = !{!5}
 
 !0 = metadata !{i32 589841, metadata !9, i32 12, metadata !"clang version 3.0 ()", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !10, null, null, metadata !""}
-!1 = metadata !{i32 589870, metadata !9, metadata !2, metadata !"bar", metadata !"bar", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, void ()* @bar, null, null, null, i32 0}
+!1 = metadata !{i32 589870, metadata !9, metadata !2, metadata !"bar", metadata !"bar", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [bar]
 !2 = metadata !{i32 589865, metadata !9}
-!3 = metadata !{i32 589845, metadata !9, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0}
+!3 = metadata !{i32 589845, metadata !9, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 589876, i32 0, metadata !0, metadata !"x", metadata !"x", metadata !"", metadata !2, i32 1, metadata !6, i32 1, i32 1, i32* @x}
 !6 = metadata !{i32 589860, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
@@ -29,3 +30,4 @@ entry:
 !8 = metadata !{i32 589835, metadata !9, metadata !1, i32 2, i32 12, i32 0}
 !9 = metadata !{metadata !"/tmp/two.c", metadata !"/Volumes/Lalgate/Slate/D"}
 !10 = metadata !{metadata !1}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/2011-08-18-unique-class-type.ll b/test/Linker/2011-08-18-unique-class-type.ll
index b460ac3..b077f23 100644
--- a/test/Linker/2011-08-18-unique-class-type.ll
+++ b/test/Linker/2011-08-18-unique-class-type.ll
@@ -18,17 +18,18 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!18}
 
 !0 = metadata !{i32 720913, metadata !16, i32 4, metadata !"clang version 3.0 (trunk 137954)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !2}
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !16, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooN2N11AE", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @_Z3fooN2N11AE, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, metadata !16, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooN2N11AE", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3fooN2N11AE, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 4] [def] [scope 0] [foo]
 !6 = metadata !{i32 720937, metadata !16} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, metadata !16, metadata !6, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 720917, metadata !16, metadata !6, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null}
 !9 = metadata !{i32 721153, metadata !5, metadata !"mya", metadata !6, i32 16777220, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 720898, metadata !17, metadata !11, metadata !"A", i32 3, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null} ; [ DW_TAG_class_type ]
+!10 = metadata !{i32 720898, metadata !17, metadata !11, metadata !"A", i32 3, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 3, size 8, align 8, offset 0] [def] [from ]
 !11 = metadata !{i32 720953, metadata !17, null, metadata !"N1", i32 2} ; [ DW_TAG_namespace ]
 !12 = metadata !{i32 720937, metadata !17} ; [ DW_TAG_file_type ]
 !13 = metadata !{i32 4, i32 12, metadata !5, null}
@@ -36,3 +37,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !15 = metadata !{i32 720907, metadata !16, metadata !5, i32 4, i32 17, i32 0} ; [ DW_TAG_lexical_block ]
 !16 = metadata !{metadata !"n1.c", metadata !"/private/tmp"}
 !17 = metadata !{metadata !"./n.h", metadata !"/private/tmp"}
+!18 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/2011-08-18-unique-class-type2.ll b/test/Linker/2011-08-18-unique-class-type2.ll
index 8bd3841..7bfcd91 100644
--- a/test/Linker/2011-08-18-unique-class-type2.ll
+++ b/test/Linker/2011-08-18-unique-class-type2.ll
@@ -16,17 +16,18 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!18}
 
 !0 = metadata !{i32 720913, metadata !16, i32 4, metadata !"clang version 3.0 (trunk 137954)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !2}
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"bar", metadata !"bar", metadata !"_Z3barN2N11AE", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @_Z3barN2N11AE, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"bar", metadata !"bar", metadata !"_Z3barN2N11AE", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3barN2N11AE, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 4] [def] [scope 0] [bar]
 !6 = metadata !{i32 720937, metadata !16} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, metadata !16, metadata !6, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 720917, metadata !16, metadata !6, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null}
 !9 = metadata !{i32 721153, metadata !5, metadata !"youra", metadata !6, i32 16777220, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 720898, metadata !17, metadata !11, metadata !"A", i32 3, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null} ; [ DW_TAG_class_type ]
+!10 = metadata !{i32 720898, metadata !17, metadata !11, metadata !"A", i32 3, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 3, size 8, align 8, offset 0] [def] [from ]
 !11 = metadata !{i32 720953, metadata !17, null, metadata !"N1", i32 2} ; [ DW_TAG_namespace ]
 !12 = metadata !{i32 720937, metadata !17} ; [ DW_TAG_file_type ]
 !13 = metadata !{i32 4, i32 12, metadata !5, null}
@@ -34,3 +35,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !15 = metadata !{i32 720907, metadata !16, metadata !5, i32 4, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
 !16 = metadata !{metadata !"n2.c", metadata !"/private/tmp"}
 !17 = metadata !{metadata !"./n.h", metadata !"/private/tmp"}
+!18 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/2011-08-18-unique-debug-type.ll b/test/Linker/2011-08-18-unique-debug-type.ll
index d56968d..0e14f46 100644
--- a/test/Linker/2011-08-18-unique-debug-type.ll
+++ b/test/Linker/2011-08-18-unique-debug-type.ll
@@ -10,16 +10,18 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13}
 
 !0 = metadata !{i32 720913, metadata !12, i32 12, metadata !"clang version 3.0 (trunk 137954)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !2}
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !12, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, metadata !12, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [foo]
 !6 = metadata !{i32 720937, metadata !12} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, metadata !12, metadata !6, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 720917, metadata !12, metadata !6, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 1, i32 13, metadata !11, null}
 !11 = metadata !{i32 720907, metadata !12, metadata !5, i32 1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
 !12 = metadata !{metadata !"one.c", metadata !"/private/tmp"}
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/2011-08-18-unique-debug-type2.ll b/test/Linker/2011-08-18-unique-debug-type2.ll
index e724a67..1185100 100644
--- a/test/Linker/2011-08-18-unique-debug-type2.ll
+++ b/test/Linker/2011-08-18-unique-debug-type2.ll
@@ -10,16 +10,18 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13}
 
 !0 = metadata !{i32 720913, metadata !12, i32 12, metadata !"clang version 3.0 (trunk 137954)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !2}
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !12, metadata !6, metadata !"bar", metadata !"bar", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, metadata !12, metadata !6, metadata !"bar", metadata !"bar", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [bar]
 !6 = metadata !{i32 720937, metadata !12} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, metadata !12, metadata !6, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 720917, metadata !12, metadata !6, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 1, i32 13, metadata !11, null}
 !11 = metadata !{i32 720907, metadata !12, metadata !5, i32 1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
 !12 = metadata !{metadata !"two.c", metadata !"/private/tmp"}
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/DbgDeclare.ll b/test/Linker/DbgDeclare.ll
index 7f3fbde..4cca9d5 100644
--- a/test/Linker/DbgDeclare.ll
+++ b/test/Linker/DbgDeclare.ll
@@ -35,6 +35,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 declare void @test(i32, i8**)
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21}
 
 !0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.3 (trunk 173515)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !2}
@@ -42,12 +43,12 @@ declare void @test(i32, i8**)
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !20, null, metadata !"main", metadata !"main", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !9, metadata !10}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
-!12 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_const_type ]
+!10 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 786470, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_const_type ]
 !13 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !14 = metadata !{i32 786689, metadata !5, metadata !"argc", metadata !6, i32 16777219, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !15 = metadata !{i32 3, i32 0, metadata !5, null}
@@ -56,3 +57,4 @@ declare void @test(i32, i8**)
 !18 = metadata !{i32 786443, metadata !20, metadata !5, i32 4, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !19 = metadata !{i32 6, i32 0, metadata !18, null}
 !20 = metadata !{metadata !"main.cpp", metadata !"/private/tmp"}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/DbgDeclare2.ll b/test/Linker/DbgDeclare2.ll
index fbcae30..2649fcc 100644
--- a/test/Linker/DbgDeclare2.ll
+++ b/test/Linker/DbgDeclare2.ll
@@ -48,6 +48,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 declare i32 @puts(i8*)
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!27}
 
 !0 = metadata !{i32 786449, metadata !25, i32 4, metadata !"clang version 3.3 (trunk 173515)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !2}
@@ -55,12 +56,12 @@ declare i32 @puts(i8*)
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !26, null, metadata !"print_args", metadata !"print_args", metadata !"test", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32, i8**)* @test, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9, metadata !10}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
-!12 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_const_type ]
+!10 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 786470, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_const_type ]
 !13 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !14 = metadata !{i32 786689, metadata !5, metadata !"argc", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !15 = metadata !{i32 4, i32 0, metadata !5, null}
@@ -75,3 +76,4 @@ declare i32 @puts(i8*)
 !24 = metadata !{i32 10, i32 0, metadata !19, null}
 !25 = metadata !{metadata !"main.cpp", metadata !"/private/tmp"}
 !26 = metadata !{metadata !"test.cpp", metadata !"/private/tmp"}
+!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/Inputs/type-unique-inheritance-a.ll b/test/Linker/Inputs/type-unique-inheritance-a.ll
new file mode 100644
index 0000000..381210c
--- /dev/null
+++ b/test/Linker/Inputs/type-unique-inheritance-a.ll
@@ -0,0 +1,94 @@
+; CHECK: [ DW_TAG_class_type ] [A]
+; CHECK: [ DW_TAG_class_type ] [Base]
+; CHECK: [ DW_TAG_class_type ] [B]
+; CHECK-NOT: DW_TAG_class_type
+; Content of header files:
+; 
+; class Base;
+; class A : Base {
+;   int x;
+; };
+; 
+; class A;
+; class Base {
+;   int b;
+; };
+; 
+; class B {
+;   int bb;
+;   A *a;
+; };
+; Content of foo.cpp:
+; 
+; #include "b.hpp"
+; #include "a.hpp"
+; 
+; void f(int a) {
+;   A t;
+; }
+; Content of bar.cpp:
+; 
+; #include "b.hpp"
+; #include "a.hpp"
+; void g(int a) {
+;   B t;
+; }
+; 
+; void f(int);
+; int main() {
+;   A a;
+;   f(0);
+;   g(1);
+;   return 0;
+; }
+; ModuleID = 'foo.cpp'
+
+%class.A = type { %class.Base, i32 }
+%class.Base = type { i32 }
+
+; Function Attrs: nounwind ssp uwtable
+define void @_Z1fi(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %t = alloca %class.A, align 4
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !20), !dbg !21
+  call void @llvm.dbg.declare(metadata !{%class.A* %t}, metadata !22), !dbg !23
+  ret void, !dbg !24
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!19, !25}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (http://llvm.org/git/clang.git f54e02f969d02d640103db73efc30c45439fceab) (http://llvm.org/git/llvm.git 284353b55896cb1babfaa7add7c0a363245342d2)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !14, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/mren/c_testing/type_unique_air/inher/foo.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"foo.cpp", metadata !"/Users/mren/c_testing/type_unique_air/inher"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !8}
+!4 = metadata !{i32 786434, metadata !5, null, metadata !"A", i32 3, i64 64, i64 32, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS1A"} ; [ DW_TAG_class_type ] [A] [line 3, size 64, align 32, offset 0] [def] [from ]
+!5 = metadata !{metadata !"./a.hpp", metadata !"/Users/mren/c_testing/type_unique_air/inher"}
+!6 = metadata !{metadata !7, metadata !13}
+!7 = metadata !{i32 786460, null, metadata !"_ZTS1A", null, i32 0, i64 0, i64 0, i64 0, i32 1, metadata !8} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [private] [from Base]
+!8 = metadata !{i32 786434, metadata !9, null, metadata !"Base", i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null, metadata !"_ZTS4Base"} ; [ DW_TAG_class_type ] [Base] [line 3, size 32, align 32, offset 0] [def] [from ]
+!9 = metadata !{metadata !"./b.hpp", metadata !"/Users/mren/c_testing/type_unique_air/inher"}
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 786445, metadata !9, metadata !"_ZTS4Base", metadata !"b", i32 4, i64 32, i64 32, i64 0, i32 1, metadata !12} ; [ DW_TAG_member ] [b] [line 4, size 32, align 32, offset 0] [private] [from int]
+!12 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!13 = metadata !{i32 786445, metadata !5, metadata !"_ZTS1A", metadata !"x", i32 4, i64 32, i64 32, i64 32, i32 1, metadata !12} ; [ DW_TAG_member ] [x] [line 4, size 32, align 32, offset 32] [private] [from int]
+!14 = metadata !{metadata !15}
+!15 = metadata !{i32 786478, metadata !1, metadata !16, metadata !"f", metadata !"f", metadata !"_Z1fi", i32 5, metadata !17, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @_Z1fi, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [f]
+!16 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/Users/mren/c_testing/type_unique_air/inher/foo.cpp]
+!17 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!18 = metadata !{null, metadata !12}
+!19 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!20 = metadata !{i32 786689, metadata !15, metadata !"a", metadata !16, i32 16777221, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 5]
+!21 = metadata !{i32 5, i32 0, metadata !15, null}
+!22 = metadata !{i32 786688, metadata !15, metadata !"t", metadata !16, i32 6, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [t] [line 6]
+!23 = metadata !{i32 6, i32 0, metadata !15, null}
+!24 = metadata !{i32 7, i32 0, metadata !15, null}
+!25 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/Inputs/type-unique-inheritance-b.ll b/test/Linker/Inputs/type-unique-inheritance-b.ll
new file mode 100644
index 0000000..0cd43f6
--- /dev/null
+++ b/test/Linker/Inputs/type-unique-inheritance-b.ll
@@ -0,0 +1,81 @@
+; ModuleID = 'bar.cpp'
+
+%class.B = type { i32, %class.A* }
+%class.A = type { %class.Base, i32 }
+%class.Base = type { i32 }
+
+; Function Attrs: nounwind ssp uwtable
+define void @_Z1gi(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %t = alloca %class.B, align 8
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !28), !dbg !29
+  call void @llvm.dbg.declare(metadata !{%class.B* %t}, metadata !30), !dbg !31
+  ret void, !dbg !32
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+; Function Attrs: ssp uwtable
+define i32 @main() #2 {
+entry:
+  %retval = alloca i32, align 4
+  %a = alloca %class.A, align 4
+  store i32 0, i32* %retval
+  call void @llvm.dbg.declare(metadata !{%class.A* %a}, metadata !33), !dbg !34
+  call void @_Z1fi(i32 0), !dbg !35
+  call void @_Z1gi(i32 1), !dbg !36
+  ret i32 0, !dbg !37
+}
+
+declare void @_Z1fi(i32) #3
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!27, !38}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (http://llvm.org/git/clang.git f54e02f969d02d640103db73efc30c45439fceab) (http://llvm.org/git/llvm.git 284353b55896cb1babfaa7add7c0a363245342d2)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !19, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/mren/c_testing/type_unique_air/inher/bar.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"bar.cpp", metadata !"/Users/mren/c_testing/type_unique_air/inher"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !11, metadata !15}
+!4 = metadata !{i32 786434, metadata !5, null, metadata !"B", i32 7, i64 128, i64 64, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS1B"} ; [ DW_TAG_class_type ] [B] [line 7, size 128, align 64, offset 0] [def] [from ]
+!5 = metadata !{metadata !"./b.hpp", metadata !"/Users/mren/c_testing/type_unique_air/inher"}
+!6 = metadata !{metadata !7, metadata !9}
+!7 = metadata !{i32 786445, metadata !5, metadata !"_ZTS1B", metadata !"bb", i32 8, i64 32, i64 32, i64 0, i32 1, metadata !8} ; [ DW_TAG_member ] [bb] [line 8, size 32, align 32, offset 0] [private] [from int]
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786445, metadata !5, metadata !"_ZTS1B", metadata !"a", i32 9, i64 64, i64 64, i64 64, i32 1, metadata !10} ; [ DW_TAG_member ] [a] [line 9, size 64, align 64, offset 64] [private] [from ]
+!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!11 = metadata !{i32 786434, metadata !12, null, metadata !"A", i32 3, i64 64, i64 32, i32 0, i32 0, null, metadata !13, i32 0, null, null, metadata !"_ZTS1A"} ; [ DW_TAG_class_type ] [A] [line 3, size 64, align 32, offset 0] [def] [from ]
+!12 = metadata !{metadata !"./a.hpp", metadata !"/Users/mren/c_testing/type_unique_air/inher"}
+!13 = metadata !{metadata !14, metadata !18}
+!14 = metadata !{i32 786460, null, metadata !"_ZTS1A", null, i32 0, i64 0, i64 0, i64 0, i32 1, metadata !15} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [private] [from Base]
+!15 = metadata !{i32 786434, metadata !5, null, metadata !"Base", i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !16, i32 0, null, null, metadata !"_ZTS4Base"} ; [ DW_TAG_class_type ] [Base] [line 3, size 32, align 32, offset 0] [def] [from ]
+!16 = metadata !{metadata !17}
+!17 = metadata !{i32 786445, metadata !5, metadata !"_ZTS4Base", metadata !"b", i32 4, i64 32, i64 32, i64 0, i32 1, metadata !8} ; [ DW_TAG_member ] [b] [line 4, size 32, align 32, offset 0] [private] [from int]
+!18 = metadata !{i32 786445, metadata !12, metadata !"_ZTS1A", metadata !"x", i32 4, i64 32, i64 32, i64 32, i32 1, metadata !8} ; [ DW_TAG_member ] [x] [line 4, size 32, align 32, offset 32] [private] [from int]
+!19 = metadata !{metadata !20, metadata !24}
+!20 = metadata !{i32 786478, metadata !1, metadata !21, metadata !"g", metadata !"g", metadata !"_Z1gi", i32 4, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @_Z1gi, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [g]
+!21 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/Users/mren/c_testing/type_unique_air/inher/bar.cpp]
+!22 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!23 = metadata !{null, metadata !8}
+!24 = metadata !{i32 786478, metadata !1, metadata !21, metadata !"main", metadata !"main", metadata !"", i32 9, metadata !25, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [main]
+!25 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !26, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!26 = metadata !{metadata !8}
+!27 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!28 = metadata !{i32 786689, metadata !20, metadata !"a", metadata !21, i32 16777220, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 4]
+!29 = metadata !{i32 4, i32 0, metadata !20, null}
+!30 = metadata !{i32 786688, metadata !20, metadata !"t", metadata !21, i32 5, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [t] [line 5]
+!31 = metadata !{i32 5, i32 0, metadata !20, null}
+!32 = metadata !{i32 6, i32 0, metadata !20, null}
+!33 = metadata !{i32 786688, metadata !24, metadata !"a", metadata !21, i32 10, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 10]
+!34 = metadata !{i32 10, i32 0, metadata !24, null}
+!35 = metadata !{i32 11, i32 0, metadata !24, null}
+!36 = metadata !{i32 12, i32 0, metadata !24, null}
+!37 = metadata !{i32 13, i32 0, metadata !24, null}
+!38 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/Inputs/type-unique-simple2-a.ll b/test/Linker/Inputs/type-unique-simple2-a.ll
new file mode 100644
index 0000000..63470f3
--- /dev/null
+++ b/test/Linker/Inputs/type-unique-simple2-a.ll
@@ -0,0 +1,88 @@
+; Make sure the backend generates a single DIE and uses ref_addr.
+; CHECK: 0x[[BASE:.*]]: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name {{.*}} = "Base"
+; CHECK-NOT: DW_TAG_structure_type
+; CHECK: 0x[[INT:.*]]: DW_TAG_base_type
+; CHECK-NEXT: DW_AT_name {{.*}} = "int"
+; CHECK-NOT: DW_TAG_base_type
+
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_TAG_formal_parameter
+; CHECK: DW_AT_type [DW_FORM_ref_addr] {{.*}}[[INT]])
+; CHECK: DW_TAG_variable
+; CHECK: DW_AT_type [DW_FORM_ref_addr] {{.*}}[[BASE]])
+
+; Make sure llvm-link only generates a single copy of the struct.
+; LINK: DW_TAG_structure_type
+; LINK-NOT: DW_TAG_structure_type
+
+; Content of header files:
+; struct Base {
+;   int a;
+;   Base *b;
+; };
+; Content of foo.cpp:
+; 
+; #include "a.hpp"
+; void f(int a) {
+;   Base t;
+; }
+; Content of bar.cpp:
+; 
+; #include "a.hpp"
+; void f(int);
+; void g(int a) {
+;   Base t;
+; }
+; int main() {
+;   f(0);
+;   g(1);
+;   return 0;
+; }
+; ModuleID = 'foo.cpp'
+
+%struct.Base = type { i32, %struct.Base* }
+
+; Function Attrs: nounwind ssp uwtable
+define void @_Z1fi(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %t = alloca %struct.Base, align 8
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !17), !dbg !18
+  call void @llvm.dbg.declare(metadata !{%struct.Base* %t}, metadata !19), !dbg !20
+  ret void, !dbg !21
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!16, !22}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (http://llvm.org/git/clang.git 8a3f9e46cb988d2c664395b21910091e3730ae82) (http://llvm.org/git/llvm.git 4699e9549358bc77824a59114548eecc3f7c523c)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !11, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [foo.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"foo.cpp", metadata !"."}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !5, null, metadata !"Base", i32 1, i64 128, i64 64, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS4Base"} ; [ DW_TAG_structure_type ] [Base] [line 1, size 128, align 64, offset 0] [def] [from ]
+!5 = metadata !{metadata !"./a.hpp", metadata !"."}
+!6 = metadata !{metadata !7, metadata !9}
+!7 = metadata !{i32 786445, metadata !5, metadata !"_ZTS4Base", metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786445, metadata !5, metadata !"_ZTS4Base", metadata !"b", i32 3, i64 64, i64 64, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ] [b] [line 3, size 64, align 64, offset 64] [from ]
+!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS4Base"}
+!11 = metadata !{metadata !12}
+!12 = metadata !{i32 786478, metadata !1, metadata !13, metadata !"f", metadata !"f", metadata !"_Z1fi", i32 3, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @_Z1fi, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
+!13 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [foo.cpp]
+!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{null, metadata !8}
+!16 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!17 = metadata !{i32 786689, metadata !12, metadata !"a", metadata !13, i32 16777219, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 3]
+!18 = metadata !{i32 3, i32 0, metadata !12, null}
+!19 = metadata !{i32 786688, metadata !12, metadata !"t", metadata !13, i32 4, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [t] [line 4]
+!20 = metadata !{i32 4, i32 0, metadata !12, null}
+!21 = metadata !{i32 5, i32 0, metadata !12, null}
+!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/Inputs/type-unique-simple2-b.ll b/test/Linker/Inputs/type-unique-simple2-b.ll
new file mode 100644
index 0000000..f564d81
--- /dev/null
+++ b/test/Linker/Inputs/type-unique-simple2-b.ll
@@ -0,0 +1,67 @@
+; ModuleID = 'bar.cpp'
+
+%struct.Base = type { i32, %struct.Base* }
+
+; Function Attrs: nounwind ssp uwtable
+define void @_Z1gi(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %t = alloca %struct.Base, align 8
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !20), !dbg !21
+  call void @llvm.dbg.declare(metadata !{%struct.Base* %t}, metadata !22), !dbg !23
+  ret void, !dbg !24
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+; Function Attrs: ssp uwtable
+define i32 @main() #2 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  call void @_Z1fi(i32 0), !dbg !25
+  call void @_Z1gi(i32 1), !dbg !26
+  ret i32 0, !dbg !27
+}
+
+declare void @_Z1fi(i32) #3
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!19, !28}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (http://llvm.org/git/clang.git 8a3f9e46cb988d2c664395b21910091e3730ae82) (http://llvm.org/git/llvm.git 4699e9549358bc77824a59114548eecc3f7c523c)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !11, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [bar.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"bar.cpp", metadata !"."}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !5, null, metadata !"Base", i32 1, i64 128, i64 64, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS4Base"} ; [ DW_TAG_structure_type ] [Base] [line 1, size 128, align 64, offset 0] [def] [from ]
+!5 = metadata !{metadata !"./a.hpp", metadata !"."}
+!6 = metadata !{metadata !7, metadata !9}
+!7 = metadata !{i32 786445, metadata !5, metadata !"_ZTS4Base", metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786445, metadata !5, metadata !"_ZTS4Base", metadata !"b", i32 3, i64 64, i64 64, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ] [b] [line 3, size 64, align 64, offset 64] [from ]
+!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS4Base"}
+!11 = metadata !{metadata !12, metadata !16}
+!12 = metadata !{i32 786478, metadata !1, metadata !13, metadata !"g", metadata !"g", metadata !"_Z1gi", i32 4, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @_Z1gi, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [g]
+!13 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [bar.cpp]
+!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{null, metadata !8}
+!16 = metadata !{i32 786478, metadata !1, metadata !13, metadata !"main", metadata !"main", metadata !"", i32 7, metadata !17, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
+!17 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!18 = metadata !{metadata !8}
+!19 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!20 = metadata !{i32 786689, metadata !12, metadata !"a", metadata !13, i32 16777220, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 4]
+!21 = metadata !{i32 4, i32 0, metadata !12, null}
+!22 = metadata !{i32 786688, metadata !12, metadata !"t", metadata !13, i32 5, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [t] [line 5]
+!23 = metadata !{i32 5, i32 0, metadata !12, null}
+!24 = metadata !{i32 6, i32 0, metadata !12, null}
+!25 = metadata !{i32 8, i32 0, metadata !16, null} ; [ DW_TAG_imported_declaration ]
+!26 = metadata !{i32 9, i32 0, metadata !16, null}
+!27 = metadata !{i32 10, i32 0, metadata !16, null}
+!28 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/lit.local.cfg b/test/Linker/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Linker/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Linker/prefixdata.ll b/test/Linker/prefixdata.ll
new file mode 100644
index 0000000..1f11dc7
--- /dev/null
+++ b/test/Linker/prefixdata.ll
@@ -0,0 +1,9 @@
+; RUN: echo > %t.ll
+; RUN: llvm-link %t.ll %s -S -o - | FileCheck %s
+
+@i = linkonce_odr global i32 1
+
+; CHECK: define void @f() prefix i32* @i
+define void @f() prefix i32* @i {
+  ret void
+}
diff --git a/test/Linker/transitive-lazy-link.ll b/test/Linker/transitive-lazy-link.ll
index 32b9d21..c1cacae 100644
--- a/test/Linker/transitive-lazy-link.ll
+++ b/test/Linker/transitive-lazy-link.ll
@@ -1,6 +1,11 @@
 ; @f and @g are lazily linked. @f requires @g - ensure @g is correctly linked.
 
-; RUN: echo -e "declare i32 @f(i32)\ndefine i32 @h(i32 %x) {\n%1 = call i32 @f(i32 %x)\nret i32 %1\n}" | llvm-as >%t.1.bc
+; RUN: echo "declare i32 @f(i32)" > %t.1.ll
+; RUN: echo "define i32 @h(i32 %x) {" >> %t.1.ll
+; RUN: echo "  %1 = call i32 @f(i32 %x)" >> %t.1.ll
+; RUN: echo "  ret i32 %1" >> %t.1.ll
+; RUN: echo "}" >> %t.1.ll
+; RUN: llvm-as < %t.1.ll > %t.1.bc
 ; RUN: llvm-as < %s > %t.2.bc
 ; RUN: llvm-link %t.1.bc %t.2.bc
 
diff --git a/test/Linker/type-unique-inheritance.ll b/test/Linker/type-unique-inheritance.ll
new file mode 100644
index 0000000..1ba1b08
--- /dev/null
+++ b/test/Linker/type-unique-inheritance.ll
@@ -0,0 +1 @@
+; RUN: llvm-link %S/Inputs/type-unique-inheritance-a.ll %S/Inputs/type-unique-inheritance-b.ll -S -o - | FileCheck %S/Inputs/type-unique-inheritance-a.ll
diff --git a/test/Linker/type-unique-simple-a.ll b/test/Linker/type-unique-simple-a.ll
new file mode 100644
index 0000000..4bfdff9
--- /dev/null
+++ b/test/Linker/type-unique-simple-a.ll
@@ -0,0 +1,91 @@
+; REQUIRES: object-emission
+
+; RUN: llvm-link %s %p/type-unique-simple-b.ll -S -o %t
+; RUN: cat %t | FileCheck %s -check-prefix=LINK
+; RUN: llc -filetype=obj -O0 < %t > %t2
+; RUN: llvm-dwarfdump -debug-dump=info %t2 | FileCheck %s
+
+; Make sure the backend generates a single DIE and uses ref_addr.
+; CHECK: 0x[[BASE:.*]]: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name {{.*}} = "Base"
+; CHECK-NOT: DW_TAG_structure_type
+; CHECK: 0x[[INT:.*]]: DW_TAG_base_type
+; CHECK-NEXT: DW_AT_name {{.*}} = "int"
+; CHECK-NOT: DW_TAG_base_type
+
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_TAG_formal_parameter
+; CHECK: DW_AT_type [DW_FORM_ref_addr] {{.*}}[[INT]])
+; CHECK: DW_TAG_variable
+; CHECK: DW_AT_type [DW_FORM_ref_addr] {{.*}}[[BASE]])
+
+; Make sure llvm-link only generates a single copy of the struct.
+; LINK: DW_TAG_structure_type
+; LINK-NOT: DW_TAG_structure_type
+; Content of header files:
+; struct Base {
+;   int a;
+; };
+; Content of foo.cpp:
+; 
+; #include "a.hpp"
+; void f(int a) {
+;   Base t;
+; }
+; Content of bar.cpp:
+; 
+; #include "a.hpp"
+; void f(int);
+; void g(int a) {
+;   Base t;
+; }
+; int main() {
+;   f(0);
+;   g(1);
+;   return 0;
+; }
+; ModuleID = 'foo.cpp'
+
+%struct.Base = type { i32 }
+
+; Function Attrs: nounwind ssp uwtable
+define void @_Z1fi(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %t = alloca %struct.Base, align 4
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !15), !dbg !16
+  call void @llvm.dbg.declare(metadata !{%struct.Base* %t}, metadata !17), !dbg !18
+  ret void, !dbg !19
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!14, !20}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (http://llvm.org/git/clang.git c23b1db6268c8e7ce64026d57d1510c1aac200a0) (http://llvm.org/git/llvm.git 09b98fe3978eddefc2145adc1056cf21580ce945)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !9, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/mren/c_testing/type_unique_air/simple/foo.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"foo.cpp", metadata !"/Users/mren/c_testing/type_unique_air/simple"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !5, null, metadata !"Base", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS4Base"} ; [ DW_TAG_structure_type ] [Base] [line 1, size 32, align 32, offset 0] [def] [from ]
+!5 = metadata !{metadata !"./a.hpp", metadata !"/Users/mren/c_testing/type_unique_air/simple"}
+!6 = metadata !{metadata !7}
+!7 = metadata !{i32 786445, metadata !5, metadata !"_ZTS4Base", metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"f", metadata !"f", metadata !"_Z1fi", i32 3, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @_Z1fi, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
+!11 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/Users/mren/c_testing/type_unique_air/simple/foo.cpp]
+!12 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = metadata !{null, metadata !8}
+!14 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!15 = metadata !{i32 786689, metadata !10, metadata !"a", metadata !11, i32 16777219, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 3]
+!16 = metadata !{i32 3, i32 0, metadata !10, null}
+!17 = metadata !{i32 786688, metadata !10, metadata !"t", metadata !11, i32 4, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [t] [line 4]
+!18 = metadata !{i32 4, i32 0, metadata !10, null}
+!19 = metadata !{i32 5, i32 0, metadata !10, null}
+!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/type-unique-simple-b.ll b/test/Linker/type-unique-simple-b.ll
new file mode 100644
index 0000000..c46e67f
--- /dev/null
+++ b/test/Linker/type-unique-simple-b.ll
@@ -0,0 +1,67 @@
+; RUN: true
+
+; ModuleID = 'bar.cpp'
+
+%struct.Base = type { i32 }
+
+; Function Attrs: nounwind ssp uwtable
+define void @_Z1gi(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %t = alloca %struct.Base, align 4
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !18), !dbg !19
+  call void @llvm.dbg.declare(metadata !{%struct.Base* %t}, metadata !20), !dbg !21
+  ret void, !dbg !22
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+; Function Attrs: ssp uwtable
+define i32 @main() #2 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  call void @_Z1fi(i32 0), !dbg !23
+  call void @_Z1gi(i32 1), !dbg !24
+  ret i32 0, !dbg !25
+}
+
+declare void @_Z1fi(i32) #3
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!17, !26}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (http://llvm.org/git/clang.git c23b1db6268c8e7ce64026d57d1510c1aac200a0) (http://llvm.org/git/llvm.git 09b98fe3978eddefc2145adc1056cf21580ce945)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !9, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/mren/c_testing/type_unique_air/simple/bar.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"bar.cpp", metadata !"/Users/mren/c_testing/type_unique_air/simple"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !5, null, metadata !"Base", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS4Base"} ; [ DW_TAG_structure_type ] [Base] [line 1, size 32, align 32, offset 0] [def] [from ]
+!5 = metadata !{metadata !"./a.hpp", metadata !"/Users/mren/c_testing/type_unique_air/simple"}
+!6 = metadata !{metadata !7}
+!7 = metadata !{i32 786445, metadata !5, metadata !"_ZTS4Base", metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{metadata !10, metadata !14}
+!10 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"g", metadata !"g", metadata !"_Z1gi", i32 4, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @_Z1gi, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [g]
+!11 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/Users/mren/c_testing/type_unique_air/simple/bar.cpp]
+!12 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = metadata !{null, metadata !8}
+!14 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"main", metadata !"main", metadata !"", i32 7, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
+!15 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{metadata !8}
+!17 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!18 = metadata !{i32 786689, metadata !10, metadata !"a", metadata !11, i32 16777220, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 4]
+!19 = metadata !{i32 4, i32 0, metadata !10, null}
+!20 = metadata !{i32 786688, metadata !10, metadata !"t", metadata !11, i32 5, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [t] [line 5]
+!21 = metadata !{i32 5, i32 0, metadata !10, null}
+!22 = metadata !{i32 6, i32 0, metadata !10, null}
+!23 = metadata !{i32 8, i32 0, metadata !14, null} ; [ DW_TAG_imported_declaration ]
+!24 = metadata !{i32 9, i32 0, metadata !14, null}
+!25 = metadata !{i32 10, i32 0, metadata !14, null}
+!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/type-unique-simple2.ll b/test/Linker/type-unique-simple2.ll
new file mode 100644
index 0000000..ead91df
--- /dev/null
+++ b/test/Linker/type-unique-simple2.ll
@@ -0,0 +1,6 @@
+; REQUIRES: object-emission
+
+; RUN: llvm-link %S/Inputs/type-unique-simple2-a.ll %S/Inputs/type-unique-simple2-b.ll -S -o %t
+; RUN: cat %t | FileCheck %S/Inputs/type-unique-simple2-a.ll -check-prefix=LINK
+; RUN: llc -filetype=obj -O0 < %t > %t2
+; RUN: llvm-dwarfdump -debug-dump=info %t2 | FileCheck %S/Inputs/type-unique-simple2-a.ll
diff --git a/test/Linker/unnamed-addr-err-a.ll b/test/Linker/unnamed-addr-err-a.ll
new file mode 100644
index 0000000..4872098
--- /dev/null
+++ b/test/Linker/unnamed-addr-err-a.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-link %s %p/unnamed-addr-err-b.ll -S -o - 2>&1 | FileCheck %s
+
+@foo = appending unnamed_addr global [1 x i32] [i32 42]
+; CHECK: Appending variables with different unnamed_addr need to be linked
diff --git a/test/Linker/unnamed-addr-err-b.ll b/test/Linker/unnamed-addr-err-b.ll
new file mode 100644
index 0000000..5e5fed9
--- /dev/null
+++ b/test/Linker/unnamed-addr-err-b.ll
@@ -0,0 +1,4 @@
+; This file is for use with unnamed-addr-err-a.ll
+; RUN: true
+
+@foo = appending global [1 x i32] [i32 42]
diff --git a/test/Linker/unnamed-addr1-a.ll b/test/Linker/unnamed-addr1-a.ll
index e9c03ee..adaa400 100644
--- a/test/Linker/unnamed-addr1-a.ll
+++ b/test/Linker/unnamed-addr1-a.ll
@@ -1,27 +1,46 @@
-; RUN: llvm-link %s %p/unnamed-addr1-b.ll -S -o - | sort | FileCheck %s
+; RUN: llvm-link %s %p/unnamed-addr1-b.ll -S -o - | FileCheck %s
 
 ; Only in this file
-@a = common global i32 0
-; CHECK: @a = common global i32 0
-@b = common unnamed_addr global i32 0
-; CHECK: @b = common unnamed_addr global i32 0
+@global-a = common global i32 0
+; CHECK-DAG: @global-a = common global i32 0
+@global-b = common unnamed_addr global i32 0
+; CHECK-DAG: @global-b = common unnamed_addr global i32 0
+
+define weak void @func-a() { ret void }
+; CHECK-DAG: define weak void @func-a() {
+define weak void @func-b() unnamed_addr { ret void }
+; CHECK-DAG: define weak void @func-b() unnamed_addr {
 
 ; Other file has unnamed_addr definition
-@c = common unnamed_addr global i32 0
-; CHECK: @c = common unnamed_addr global i32 0
-@d = external global i32
-; CHECK: @d = unnamed_addr global i32 42
-@e = external unnamed_addr global i32
-; CHECK: @e = unnamed_addr global i32 42
-@f = weak global i32 42
-; CHECK: @f = unnamed_addr global i32 42
+@global-c = common unnamed_addr global i32 0
+; CHECK-DAG: @global-c = common unnamed_addr global i32 0
+@global-d = external global i32
+; CHECK-DAG: @global-d = global i32 42
+@global-e = external unnamed_addr global i32
+; CHECK-DAG: @global-e = unnamed_addr global i32 42
+@global-f = weak global i32 42
+; CHECK-DAG: @global-f = global i32 42
+
+declare void @func-c()
+; CHECK-DAG: define weak void @func-c() {
+define weak void @func-d() { ret void }
+; CHECK-DAG: define weak void @func-d() {
+define weak void @func-e() unnamed_addr { ret void }
+; CHECK-DAG: define weak void @func-e() unnamed_addr {
 
 ; Other file has non-unnamed_addr definition
-@g = common unnamed_addr global i32 0
-; CHECK: @g = common unnamed_addr global i32 0
-@h = external global i32
-; CHECK: @h = global i32 42
-@i = external unnamed_addr global i32
-; CHECK: @i = global i32 42
-@j = weak global i32 42
-; CHECK: @j = global i32 42
+@global-g = common unnamed_addr global i32 0
+; CHECK-DAG: @global-g = common global i32 0
+@global-h = external global i32
+; CHECK-DAG: @global-h = global i32 42
+@global-i = external unnamed_addr global i32
+; CHECK-DAG: @global-i = global i32 42
+@global-j = weak global i32 42
+; CHECK-DAG: @global-j = global i32 42
+
+declare void @func-g()
+; CHECK-DAG: define weak void @func-g() {
+define weak void @func-h() { ret void }
+; CHECK-DAG: define weak void @func-h() {
+define weak void @func-i() unnamed_addr { ret void }
+; CHECK-DAG: define weak void @func-i() {
diff --git a/test/Linker/unnamed-addr1-b.ll b/test/Linker/unnamed-addr1-b.ll
index 7d94dc1..aa1507b 100644
--- a/test/Linker/unnamed-addr1-b.ll
+++ b/test/Linker/unnamed-addr1-b.ll
@@ -1,12 +1,20 @@
 ; This file is for use with unnamed-addr1-a.ll
 ; RUN: true
 
-@c = common unnamed_addr global i32 42
-@d = unnamed_addr global i32 42
-@e = unnamed_addr global i32 42
-@f = unnamed_addr global i32 42
-
-@g = common global i32 42
-@h = global i32 42
-@i = global i32 42
-@j = global i32 42
+@global-c = common unnamed_addr global i32 42
+@global-d = unnamed_addr global i32 42
+@global-e = unnamed_addr global i32 42
+@global-f = unnamed_addr global i32 42
+
+define weak void @func-c() unnamed_addr { ret void }
+define weak void @func-d() unnamed_addr { ret void }
+define weak void @func-e() unnamed_addr { ret void }
+
+@global-g = common global i32 42
+@global-h = global i32 42
+@global-i = global i32 42
+@global-j = global i32 42
+
+define weak void @func-g() { ret void }
+define weak void @func-h() { ret void }
+define weak void @func-i() { ret void }
diff --git a/test/MC/AArch64/adrp-relocation.s b/test/MC/AArch64/adrp-relocation.s
new file mode 100644
index 0000000..3bcef34
--- /dev/null
+++ b/test/MC/AArch64/adrp-relocation.s
@@ -0,0 +1,18 @@
+// RUN: llvm-mc -triple=aarch64-linux-gnu -filetype=obj -o - %s| llvm-readobj -r - | FileCheck %s
+        .text
+// These should produce an ADRP/ADD pair to calculate the address of
+// testfn. The important point is that LLVM shouldn't think it can deal with the
+// relocation on the ADRP itself (even though it knows everything about the
+// relative offsets of testfn and foo) because its value depends on where this
+// object file's .text section gets relocated in memory.
+        adrp x0, sym
+        adrp x0, :got:sym
+        adrp x0, :gottprel:sym
+        adrp x0, :tlsdesc:sym
+
+        .global sym
+sym:
+// CHECK: R_AARCH64_ADR_PREL_PG_HI21 sym
+// CHECK: R_AARCH64_ADR_GOT_PAGE sym
+// CHECK: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 sym
+// CHECK: R_AARCH64_TLSDESC_ADR_PAGE sym
diff --git a/test/MC/AArch64/basic-a64-instructions.s b/test/MC/AArch64/basic-a64-instructions.s
index e4f6b21..a50efb3 100644
--- a/test/MC/AArch64/basic-a64-instructions.s
+++ b/test/MC/AArch64/basic-a64-instructions.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+fp-armv8 < %s | FileCheck %s
   .globl _func
 
 // Check that the assembler can handle the documented syntax from the ARM ARM.
diff --git a/test/MC/AArch64/basic-pic.s b/test/MC/AArch64/basic-pic.s
new file mode 100644
index 0000000..a10874d
--- /dev/null
+++ b/test/MC/AArch64/basic-pic.s
@@ -0,0 +1,98 @@
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o -| llvm-objdump -r - | FileCheck %s
+
+// CHECK: RELOCATION RECORDS FOR [.rela.text]
+
+	.file	"/home/espindola/llvm/llvm/test/CodeGen/AArch64/basic-pic.ll"
+	.text
+	.globl	get_globalvar
+	.type	get_globalvar,@function
+get_globalvar:                          // @get_globalvar
+	.cfi_startproc
+// BB#0:
+	adrp	x0, :got:var
+	ldr	x0, [x0, #:got_lo12:var]
+	ldr	 w0, [x0]
+	ret
+.Ltmp0:
+	.size	get_globalvar, .Ltmp0-get_globalvar
+	.cfi_endproc
+
+// CHECK: R_AARCH64_ADR_GOT_PAGE var
+// CHECK: R_AARCH64_LD64_GOT_LO12_NC var
+
+	.globl	get_globalvaraddr
+	.type	get_globalvaraddr,@function
+get_globalvaraddr:                      // @get_globalvaraddr
+	.cfi_startproc
+// BB#0:
+	adrp	x0, :got:var
+	ldr	x0, [x0, #:got_lo12:var]
+	ret
+.Ltmp1:
+	.size	get_globalvaraddr, .Ltmp1-get_globalvaraddr
+	.cfi_endproc
+// CHECK: R_AARCH64_ADR_GOT_PAGE var
+// CHECK: R_AARCH64_LD64_GOT_LO12_NC var
+
+	.globl	get_hiddenvar
+	.type	get_hiddenvar,@function
+get_hiddenvar:                          // @get_hiddenvar
+	.cfi_startproc
+// BB#0:
+	adrp	x0, hiddenvar
+	ldr	w0, [x0, #:lo12:hiddenvar]
+	ret
+.Ltmp2:
+	.size	get_hiddenvar, .Ltmp2-get_hiddenvar
+	.cfi_endproc
+// CHECK: R_AARCH64_ADR_PREL_PG_HI21 hiddenvar
+// CHECK: R_AARCH64_LDST32_ABS_LO12_NC hiddenvar
+
+	.globl	get_hiddenvaraddr
+	.type	get_hiddenvaraddr,@function
+get_hiddenvaraddr:                      // @get_hiddenvaraddr
+	.cfi_startproc
+// BB#0:
+	adrp	x0, hiddenvar
+	add	x0, x0, #:lo12:hiddenvar
+	ret
+.Ltmp3:
+	.size	get_hiddenvaraddr, .Ltmp3-get_hiddenvaraddr
+	.cfi_endproc
+// CHECK: R_AARCH64_ADR_PREL_PG_HI21 hiddenvar
+// CHECK: R_AARCH64_ADD_ABS_LO12_NC hiddenvar
+
+	.globl	get_func
+	.type	get_func,@function
+get_func:                               // @get_func
+	.cfi_startproc
+// BB#0:
+	adrp	x0, :got:get_func
+	ldr	x0, [x0, #:got_lo12:get_func]
+	ret
+.Ltmp4:
+	.size	get_func, .Ltmp4-get_func
+	.cfi_endproc
+
+// Particularly important that the ADRP gets a relocation, LLVM tends to think
+// it can relax it because it knows where get_func is. It can't!
+// CHECK: R_AARCH64_ADR_GOT_PAGE get_func
+// CHECK: R_AARCH64_LD64_GOT_LO12_NC get_func
+
+	.type	var,@object             // @var
+	.bss
+	.globl	var
+	.align	2
+var:
+	.word	0                       // 0x0
+	.size	var, 4
+
+	.hidden	hiddenvar               // @hiddenvar
+	.type	hiddenvar,@object
+	.globl	hiddenvar
+	.align	2
+hiddenvar:
+	.word	0                       // 0x0
+	.size	hiddenvar, 4
+
+
diff --git a/test/MC/AArch64/elf-extern.s b/test/MC/AArch64/elf-extern.s
new file mode 100644
index 0000000..dfa3fb0
--- /dev/null
+++ b/test/MC/AArch64/elf-extern.s
@@ -0,0 +1,33 @@
+// RUN: llvm-mc < %s -triple=aarch64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s
+
+// External symbols are a different concept to global variables but should still
+// get relocations and so on when used.
+
+	.file	"<stdin>"
+	.text
+	.globl	check_extern
+	.type	check_extern,@function
+check_extern:                           // @check_extern
+	.cfi_startproc
+// BB#0:
+	sub	sp, sp, #16
+.Ltmp2:
+	.cfi_def_cfa sp, 16
+	str	x30, [sp, #8]           // 8-byte Folded Spill
+.Ltmp3:
+	.cfi_offset x30, -8
+	bl	memcpy
+	mov	 x0, xzr
+	ldr	x30, [sp, #8]           // 8-byte Folded Reload
+	add	sp, sp, #16
+	ret
+.Ltmp4:
+	.size	check_extern, .Ltmp4-check_extern
+	.cfi_endproc
+
+
+// CHECK: Relocations [
+// CHECK:   Section (2) .rela.text {
+// CHECK:     0x{{[0-9,A-F]+}} R_AARCH64_CALL26 memcpy
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/AArch64/elf-reloc-ldstunsimm.s b/test/MC/AArch64/elf-reloc-ldstunsimm.s
index 64bf63a..371e7e5 100644
--- a/test/MC/AArch64/elf-reloc-ldstunsimm.s
+++ b/test/MC/AArch64/elf-reloc-ldstunsimm.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+fp-armv8 -filetype=obj %s -o - | \
 // RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         ldrb w0, [sp, #:lo12:some_label]
diff --git a/test/MC/AArch64/inline-asm-modifiers.s b/test/MC/AArch64/inline-asm-modifiers.s
new file mode 100644
index 0000000..cf34a95
--- /dev/null
+++ b/test/MC/AArch64/inline-asm-modifiers.s
@@ -0,0 +1,209 @@
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj -mattr=+fp-armv8 < %s | llvm-objdump -r - | FileCheck %s
+
+	.file	"<stdin>"
+	.text
+	.globl	test_inline_modifier_L
+	.type	test_inline_modifier_L,@function
+test_inline_modifier_L:                 // @test_inline_modifier_L
+// BB#0:
+	//APP
+	add x0, x0, #:lo12:var_simple
+	//NO_APP
+	//APP
+	ldr x0, [x0, #:got_lo12:var_got]
+	//NO_APP
+	//APP
+	add x0, x0, #:tlsdesc_lo12:var_tlsgd
+	//NO_APP
+	//APP
+	add x0, x0, #:dtprel_lo12:var_tlsld
+	//NO_APP
+	//APP
+	ldr x0, [x0, #:gottprel_lo12:var_tlsie]
+	//NO_APP
+	//APP
+	add x0, x0, #:tprel_lo12:var_tlsle
+	//NO_APP
+	ret
+.Ltmp0:
+	.size	test_inline_modifier_L, .Ltmp0-test_inline_modifier_L
+
+// CHECK: R_AARCH64_ADD_ABS_LO12_NC var_simple
+// CHECK: R_AARCH64_LD64_GOT_LO12_NC var_got
+// CHECK: R_AARCH64_TLSDESC_ADD_LO12_NC var_tlsgd
+// CHECK: R_AARCH64_TLSLD_ADD_DTPREL_LO12 var_tlsld
+// CHECK: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC var_tlsie
+// CHECK: R_AARCH64_TLSLE_ADD_TPREL_LO12 var_tlsle
+
+	.globl	test_inline_modifier_G
+	.type	test_inline_modifier_G,@function
+test_inline_modifier_G:                 // @test_inline_modifier_G
+// BB#0:
+	//APP
+	add x0, x0, #:dtprel_hi12:var_tlsld, lsl #12
+	//NO_APP
+	//APP
+	add x0, x0, #:tprel_hi12:var_tlsle, lsl #12
+	//NO_APP
+	ret
+.Ltmp1:
+	.size	test_inline_modifier_G, .Ltmp1-test_inline_modifier_G
+
+// CHECK: R_AARCH64_TLSLD_ADD_DTPREL_HI12 var_tlsld
+// CHECK: R_AARCH64_TLSLE_ADD_TPREL_HI12 var_tlsle
+
+	.globl	test_inline_modifier_A
+	.type	test_inline_modifier_A,@function
+test_inline_modifier_A:                 // @test_inline_modifier_A
+// BB#0:
+	//APP
+	adrp x0, var_simple
+	//NO_APP
+	//APP
+	adrp x0, :got:var_got
+	//NO_APP
+	//APP
+	adrp x0, :tlsdesc:var_tlsgd
+	//NO_APP
+	//APP
+	adrp x0, :gottprel:var_tlsie
+	//NO_APP
+	ret
+.Ltmp2:
+	.size	test_inline_modifier_A, .Ltmp2-test_inline_modifier_A
+// CHECK: R_AARCH64_ADR_PREL_PG_HI21 var_simple
+// CHECK: R_AARCH64_ADR_GOT_PAGE var_got
+// CHECK: R_AARCH64_TLSDESC_ADR_PAGE var_tlsgd
+// CHECK: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 var_tlsie
+
+	.globl	test_inline_modifier_wx
+	.type	test_inline_modifier_wx,@function
+test_inline_modifier_wx:                // @test_inline_modifier_wx
+// BB#0:
+	mov	 w2, w0
+	//APP
+	add w2, w2, w2
+	//NO_APP
+	mov	 w2, w0
+	//APP
+	add w2, w2, w2
+	//NO_APP
+	//APP
+	add x0, x0, x0
+	//NO_APP
+	mov	 x0, x1
+	//APP
+	add x0, x0, x0
+	//NO_APP
+	mov	 x0, x1
+	//APP
+	add w0, w0, w0
+	//NO_APP
+	//APP
+	add x1, x1, x1
+	//NO_APP
+	//APP
+	add w0, wzr, wzr
+	//NO_APP
+	//APP
+	add x0, xzr, xzr
+	//NO_APP
+	ret
+.Ltmp3:
+	.size	test_inline_modifier_wx, .Ltmp3-test_inline_modifier_wx
+
+	.globl	test_inline_modifier_bhsdq
+	.type	test_inline_modifier_bhsdq,@function
+test_inline_modifier_bhsdq:             // @test_inline_modifier_bhsdq
+// BB#0:
+	//APP
+	ldr b0, [sp]
+	//NO_APP
+	//APP
+	ldr h0, [sp]
+	//NO_APP
+	//APP
+	ldr s0, [sp]
+	//NO_APP
+	//APP
+	ldr d0, [sp]
+	//NO_APP
+	//APP
+	ldr q0, [sp]
+	//NO_APP
+	//APP
+	ldr b0, [sp]
+	//NO_APP
+	//APP
+	ldr h0, [sp]
+	//NO_APP
+	//APP
+	ldr s0, [sp]
+	//NO_APP
+	//APP
+	ldr d0, [sp]
+	//NO_APP
+	//APP
+	ldr q0, [sp]
+	//NO_APP
+	ret
+.Ltmp4:
+	.size	test_inline_modifier_bhsdq, .Ltmp4-test_inline_modifier_bhsdq
+
+	.globl	test_inline_modifier_c
+	.type	test_inline_modifier_c,@function
+test_inline_modifier_c:                 // @test_inline_modifier_c
+// BB#0:
+	//APP
+	adr x0, 3
+	//NO_APP
+	ret
+.Ltmp5:
+	.size	test_inline_modifier_c, .Ltmp5-test_inline_modifier_c
+
+	.hidden	var_simple              // @var_simple
+	.type	var_simple,@object
+	.bss
+	.globl	var_simple
+	.align	2
+var_simple:
+	.word	0                       // 0x0
+	.size	var_simple, 4
+
+	.type	var_got,@object         // @var_got
+	.globl	var_got
+	.align	2
+var_got:
+	.word	0                       // 0x0
+	.size	var_got, 4
+
+	.type	var_tlsgd,@object       // @var_tlsgd
+	.section	.tbss,"awT",@nobits
+	.globl	var_tlsgd
+	.align	2
+var_tlsgd:
+	.word	0                       // 0x0
+	.size	var_tlsgd, 4
+
+	.type	var_tlsld,@object       // @var_tlsld
+	.globl	var_tlsld
+	.align	2
+var_tlsld:
+	.word	0                       // 0x0
+	.size	var_tlsld, 4
+
+	.type	var_tlsie,@object       // @var_tlsie
+	.globl	var_tlsie
+	.align	2
+var_tlsie:
+	.word	0                       // 0x0
+	.size	var_tlsie, 4
+
+	.type	var_tlsle,@object       // @var_tlsle
+	.globl	var_tlsle
+	.align	2
+var_tlsle:
+	.word	0                       // 0x0
+	.size	var_tlsle, 4
+
+
diff --git a/test/MC/AArch64/jump-table.s b/test/MC/AArch64/jump-table.s
new file mode 100644
index 0000000..578ebf4
--- /dev/null
+++ b/test/MC/AArch64/jump-table.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc < %s -triple=aarch64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s
+
+	.file	"<stdin>"
+	.text
+	.globl	test_jumptable
+	.type	test_jumptable,@function
+test_jumptable:                         // @test_jumptable
+	.cfi_startproc
+// BB#0:
+	ubfx	w1, w0, #0, #32
+	cmp w0, #4
+	b.hi .LBB0_3
+// BB#1:
+	adrp	x0, .LJTI0_0
+	add	x0, x0, #:lo12:.LJTI0_0
+	ldr	x0, [x0, x1, lsl #3]
+	br	x0
+.LBB0_2:                                // %lbl1
+	movz	x0, #1
+	ret
+.LBB0_3:                                // %def
+	mov	 x0, xzr
+	ret
+.LBB0_4:                                // %lbl2
+	movz	x0, #2
+	ret
+.LBB0_5:                                // %lbl3
+	movz	x0, #4
+	ret
+.LBB0_6:                                // %lbl4
+	movz	x0, #8
+	ret
+.Ltmp0:
+	.size	test_jumptable, .Ltmp0-test_jumptable
+	.cfi_endproc
+	.section	.rodata,"a",@progbits
+	.align	3
+.LJTI0_0:
+	.xword	.LBB0_2
+	.xword	.LBB0_4
+	.xword	.LBB0_5
+	.xword	.LBB0_3
+	.xword	.LBB0_6
+
+
+
+// First make sure we get a page/lo12 pair in .text to pick up the jump-table
+
+// CHECK:      Relocations [
+// CHECK:        Section ({{[0-9]+}}) .rela.text {
+// CHECK-NEXT:     0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 .rodata
+// CHECK-NEXT:     0x{{[0-9,A-F]+}} R_AARCH64_ADD_ABS_LO12_NC .rodata
+// CHECK:        }
+
+// Also check the targets in .rodata are relocated
+// CHECK:        Section ({{[0-9]+}}) .rela.rodata {
+// CHECK-NEXT:     0x{{[0-9,A-F]+}} R_AARCH64_ABS64 .text
+// CHECK:        }
+// CHECK:      ]
diff --git a/test/MC/AArch64/lit.local.cfg b/test/MC/AArch64/lit.local.cfg
index cc02173..75dba81 100644
--- a/test/MC/AArch64/lit.local.cfg
+++ b/test/MC/AArch64/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'AArch64' in targets:
     config.unsupported = True
 \ No newline at end of file
diff --git a/test/MC/AArch64/neon-2velem.s b/test/MC/AArch64/neon-2velem.s
new file mode 100644
index 0000000..cde792a
--- /dev/null
+++ b/test/MC/AArch64/neon-2velem.s
@@ -0,0 +1,271 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Instructions with 2 vectors and an element
+//------------------------------------------------------------------------------
+
+        mla v0.2s, v1.2s, v2.s[2]
+        mla v0.2s, v1.2s, v22.s[2]
+        mla v3.4s, v8.4s, v2.s[1]
+        mla v3.4s, v8.4s, v22.s[3]
+
+// CHECK: mla	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0x08,0x82,0x2f]
+// CHECK: mla	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0x08,0x96,0x2f]
+// CHECK: mla	v3.4s, v8.4s, v2.s[1]   // encoding: [0x03,0x01,0xa2,0x6f]
+// CHECK: mla	v3.4s, v8.4s, v22.s[3]  // encoding: [0x03,0x09,0xb6,0x6f]
+
+        mla v0.4h, v1.4h, v2.h[2]
+        mla v0.4h, v1.4h, v15.h[2]
+        mla v0.8h, v1.8h, v2.h[7]
+        mla v0.8h, v1.8h, v14.h[6]
+
+// CHECK: mla	v0.4h, v1.4h, v2.h[2]   // encoding: [0x20,0x00,0x62,0x2f]
+// CHECK: mla	v0.4h, v1.4h, v15.h[2]  // encoding: [0x20,0x00,0x6f,0x2f]
+// CHECK: mla	v0.8h, v1.8h, v2.h[7]   // encoding: [0x20,0x08,0x72,0x6f]
+// CHECK: mla	v0.8h, v1.8h, v14.h[6]  // encoding: [0x20,0x08,0x6e,0x6f]
+
+        mls v0.2s, v1.2s, v2.s[2]
+        mls v0.2s, v1.2s, v22.s[2]
+        mls v3.4s, v8.4s, v2.s[1]
+        mls v3.4s, v8.4s, v22.s[3]
+
+// CHECK: mls	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0x48,0x82,0x2f]
+// CHECK: mls	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0x48,0x96,0x2f]
+// CHECK: mls	v3.4s, v8.4s, v2.s[1]   // encoding: [0x03,0x41,0xa2,0x6f]
+// CHECK: mls	v3.4s, v8.4s, v22.s[3]  // encoding: [0x03,0x49,0xb6,0x6f]
+
+        mls v0.4h, v1.4h, v2.h[2]
+        mls v0.4h, v1.4h, v15.h[2]
+        mls v0.8h, v1.8h, v2.h[7]
+        mls v0.8h, v1.8h, v14.h[6]
+
+// CHECK: mls	v0.4h, v1.4h, v2.h[2]   // encoding: [0x20,0x40,0x62,0x2f]
+// CHECK: mls	v0.4h, v1.4h, v15.h[2]  // encoding: [0x20,0x40,0x6f,0x2f]
+// CHECK: mls	v0.8h, v1.8h, v2.h[7]   // encoding: [0x20,0x48,0x72,0x6f]
+// CHECK: mls	v0.8h, v1.8h, v14.h[6]  // encoding: [0x20,0x48,0x6e,0x6f]
+
+        fmla v0.2s, v1.2s, v2.s[2]
+        fmla v0.2s, v1.2s, v22.s[2]
+        fmla v3.4s, v8.4s, v2.s[1]
+        fmla v3.4s, v8.4s, v22.s[3]
+        fmla v0.2d, v1.2d, v2.d[1]
+        fmla v0.2d, v1.2d, v22.d[1]
+
+// CHECK: fmla	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0x18,0x82,0x0f]
+// CHECK: fmla	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0x18,0x96,0x0f]
+// CHECK: fmla	v3.4s, v8.4s, v2.s[1]   // encoding: [0x03,0x11,0xa2,0x4f]
+// CHECK: fmla	v3.4s, v8.4s, v22.s[3]  // encoding: [0x03,0x19,0xb6,0x4f]
+// CHECK: fmla	v0.2d, v1.2d, v2.d[1]   // encoding: [0x20,0x18,0xc2,0x4f]
+// CHECK: fmla	v0.2d, v1.2d, v22.d[1]  // encoding: [0x20,0x18,0xd6,0x4f]
+
+        fmls v0.2s, v1.2s, v2.s[2]
+        fmls v0.2s, v1.2s, v22.s[2]
+        fmls v3.4s, v8.4s, v2.s[1]
+        fmls v3.4s, v8.4s, v22.s[3]
+        fmls v0.2d, v1.2d, v2.d[1]
+        fmls v0.2d, v1.2d, v22.d[1]
+
+// CHECK: fmls	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0x58,0x82,0x0f]
+// CHECK: fmls	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0x58,0x96,0x0f]
+// CHECK: fmls	v3.4s, v8.4s, v2.s[1]   // encoding: [0x03,0x51,0xa2,0x4f]
+// CHECK: fmls	v3.4s, v8.4s, v22.s[3]  // encoding: [0x03,0x59,0xb6,0x4f]
+// CHECK: fmls	v0.2d, v1.2d, v2.d[1]   // encoding: [0x20,0x58,0xc2,0x4f]
+// CHECK: fmls	v0.2d, v1.2d, v22.d[1]  // encoding: [0x20,0x58,0xd6,0x4f]
+
+        smlal v0.4s, v1.4h, v2.h[2]
+        smlal v0.2d, v1.2s, v2.s[2]
+        smlal v0.2d, v1.2s, v22.s[2]
+        smlal2 v0.4s, v1.8h, v1.h[2]
+        smlal2 v0.2d, v1.4s, v1.s[2]
+        smlal2 v0.2d, v1.4s, v22.s[2]
+
+// CHECK: smlal	v0.4s, v1.4h, v2.h[2]   // encoding: [0x20,0x20,0x62,0x0f]
+// CHECK: smlal	v0.2d, v1.2s, v2.s[2]   // encoding: [0x20,0x28,0x82,0x0f]
+// CHECK: smlal	v0.2d, v1.2s, v22.s[2]  // encoding: [0x20,0x28,0x96,0x0f]
+// CHECK: smlal2	v0.4s, v1.8h, v1.h[2]   // encoding: [0x20,0x20,0x61,0x4f]
+// CHECK: smlal2	v0.2d, v1.4s, v1.s[2]   // encoding: [0x20,0x28,0x81,0x4f]
+// CHECK: smlal2	v0.2d, v1.4s, v22.s[2]  // encoding: [0x20,0x28,0x96,0x4f]
+
+        smlsl v0.4s, v1.4h, v2.h[2]
+        smlsl v0.2d, v1.2s, v2.s[2]
+        smlsl v0.2d, v1.2s, v22.s[2]
+        smlsl2 v0.4s, v1.8h, v1.h[2]
+        smlsl2 v0.2d, v1.4s, v1.s[2]
+        smlsl2 v0.2d, v1.4s, v22.s[2]
+
+// CHECK: smlsl	v0.4s, v1.4h, v2.h[2]   // encoding: [0x20,0x60,0x62,0x0f]
+// CHECK: smlsl	v0.2d, v1.2s, v2.s[2]   // encoding: [0x20,0x68,0x82,0x0f]
+// CHECK: smlsl	v0.2d, v1.2s, v22.s[2]  // encoding: [0x20,0x68,0x96,0x0f]
+// CHECK: smlsl2	v0.4s, v1.8h, v1.h[2]   // encoding: [0x20,0x60,0x61,0x4f]
+// CHECK: smlsl2	v0.2d, v1.4s, v1.s[2]   // encoding: [0x20,0x68,0x81,0x4f]
+// CHECK: smlsl2	v0.2d, v1.4s, v22.s[2]  // encoding: [0x20,0x68,0x96,0x4f]
+
+        sqdmlal v0.4s, v1.4h, v2.h[2]
+        sqdmlal v0.2d, v1.2s, v2.s[2]
+        sqdmlal v0.2d, v1.2s, v22.s[2]
+        sqdmlal2 v0.4s, v1.8h, v1.h[2]
+        sqdmlal2 v0.2d, v1.4s, v1.s[2]
+        sqdmlal2 v0.2d, v1.4s, v22.s[2]
+
+// CHECK: sqdmlal	v0.4s, v1.4h, v2.h[2]   // encoding: [0x20,0x30,0x62,0x0f]
+// CHECK: sqdmlal	v0.2d, v1.2s, v2.s[2]   // encoding: [0x20,0x38,0x82,0x0f]
+// CHECK: sqdmlal	v0.2d, v1.2s, v22.s[2]  // encoding: [0x20,0x38,0x96,0x0f]
+// CHECK: sqdmlal2	v0.4s, v1.8h, v1.h[2] // encoding: [0x20,0x30,0x61,0x4f]
+// CHECK: sqdmlal2	v0.2d, v1.4s, v1.s[2] // encoding: [0x20,0x38,0x81,0x4f]
+// CHECK: sqdmlal2	v0.2d, v1.4s, v22.s[2] // encoding: [0x20,0x38,0x96,0x4f]
+
+        umlal v0.4s, v1.4h, v2.h[2]
+        umlal v0.2d, v1.2s, v2.s[2]
+        umlal v0.2d, v1.2s, v22.s[2]
+        umlal2 v0.4s, v1.8h, v1.h[2]
+        umlal2 v0.2d, v1.4s, v1.s[2]
+        umlal2 v0.2d, v1.4s, v22.s[2]
+
+// CHECK: umlal	v0.4s, v1.4h, v2.h[2]   // encoding: [0x20,0x20,0x62,0x2f]
+// CHECK: umlal	v0.2d, v1.2s, v2.s[2]   // encoding: [0x20,0x28,0x82,0x2f]
+// CHECK: umlal	v0.2d, v1.2s, v22.s[2]  // encoding: [0x20,0x28,0x96,0x2f]
+// CHECK: umlal2	v0.4s, v1.8h, v1.h[2]   // encoding: [0x20,0x20,0x61,0x6f]
+// CHECK: umlal2	v0.2d, v1.4s, v1.s[2]   // encoding: [0x20,0x28,0x81,0x6f]
+// CHECK: umlal2	v0.2d, v1.4s, v22.s[2]  // encoding: [0x20,0x28,0x96,0x6f]
+
+        umlsl v0.4s, v1.4h, v2.h[2]
+        umlsl v0.2d, v1.2s, v2.s[2]
+        umlsl v0.2d, v1.2s, v22.s[2]
+        umlsl2 v0.4s, v1.8h, v1.h[2]
+        umlsl2 v0.2d, v1.4s, v1.s[2]
+        umlsl2 v0.2d, v1.4s, v22.s[2]
+
+// CHECK: umlsl	v0.4s, v1.4h, v2.h[2]   // encoding: [0x20,0x60,0x62,0x2f]
+// CHECK: umlsl	v0.2d, v1.2s, v2.s[2]   // encoding: [0x20,0x68,0x82,0x2f]
+// CHECK: umlsl	v0.2d, v1.2s, v22.s[2]  // encoding: [0x20,0x68,0x96,0x2f]
+// CHECK: umlsl2	v0.4s, v1.8h, v1.h[2]   // encoding: [0x20,0x60,0x61,0x6f]
+// CHECK: umlsl2	v0.2d, v1.4s, v1.s[2]   // encoding: [0x20,0x68,0x81,0x6f]
+// CHECK: umlsl2	v0.2d, v1.4s, v22.s[2]  // encoding: [0x20,0x68,0x96,0x6f]
+
+        sqdmlsl v0.4s, v1.4h, v2.h[2]
+        sqdmlsl v0.2d, v1.2s, v2.s[2]
+        sqdmlsl v0.2d, v1.2s, v22.s[2]
+        sqdmlsl2 v0.4s, v1.8h, v1.h[2]
+        sqdmlsl2 v0.2d, v1.4s, v1.s[2]
+        sqdmlsl2 v0.2d, v1.4s, v22.s[2]
+
+// CHECK: sqdmlsl	v0.4s, v1.4h, v2.h[2]   // encoding: [0x20,0x70,0x62,0x0f]
+// CHECK: sqdmlsl	v0.2d, v1.2s, v2.s[2]   // encoding: [0x20,0x78,0x82,0x0f]
+// CHECK: sqdmlsl	v0.2d, v1.2s, v22.s[2]  // encoding: [0x20,0x78,0x96,0x0f]
+// CHECK: sqdmlsl2	v0.4s, v1.8h, v1.h[2] // encoding: [0x20,0x70,0x61,0x4f]
+// CHECK: sqdmlsl2	v0.2d, v1.4s, v1.s[2] // encoding: [0x20,0x78,0x81,0x4f]
+// CHECK: sqdmlsl2	v0.2d, v1.4s, v22.s[2] // encoding: [0x20,0x78,0x96,0x4f]
+
+        mul v0.4h, v1.4h, v2.h[2]
+        mul v0.8h, v1.8h, v2.h[2]
+        mul v0.2s, v1.2s, v2.s[2]
+        mul v0.2s, v1.2s, v22.s[2]
+        mul v0.4s, v1.4s, v2.s[2]
+        mul v0.4s, v1.4s, v22.s[2]
+
+// CHECK: mul	v0.4h, v1.4h, v2.h[2]   // encoding: [0x20,0x80,0x62,0x0f]
+// CHECK: mul	v0.8h, v1.8h, v2.h[2]   // encoding: [0x20,0x80,0x62,0x4f]
+// CHECK: mul	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0x88,0x82,0x0f]
+// CHECK: mul	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0x88,0x96,0x0f]
+// CHECK: mul	v0.4s, v1.4s, v2.s[2]   // encoding: [0x20,0x88,0x82,0x4f]
+// CHECK: mul	v0.4s, v1.4s, v22.s[2]  // encoding: [0x20,0x88,0x96,0x4f]
+
+        fmul v0.2s, v1.2s, v2.s[2]
+        fmul v0.2s, v1.2s, v22.s[2]
+        fmul v0.4s, v1.4s, v2.s[2]
+        fmul v0.4s, v1.4s, v22.s[2]
+        fmul v0.2d, v1.2d, v2.d[1]
+        fmul v0.2d, v1.2d, v22.d[1]
+
+// CHECK: fmul	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0x98,0x82,0x0f]
+// CHECK: fmul	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0x98,0x96,0x0f]
+// CHECK: fmul	v0.4s, v1.4s, v2.s[2]   // encoding: [0x20,0x98,0x82,0x4f]
+// CHECK: fmul	v0.4s, v1.4s, v22.s[2]  // encoding: [0x20,0x98,0x96,0x4f]
+// CHECK: fmul	v0.2d, v1.2d, v2.d[1]   // encoding: [0x20,0x98,0xc2,0x4f]
+// CHECK: fmul	v0.2d, v1.2d, v22.d[1]  // encoding: [0x20,0x98,0xd6,0x4f]
+
+        fmulx v0.2s, v1.2s, v2.s[2]
+        fmulx v0.2s, v1.2s, v22.s[2]
+        fmulx v0.4s, v1.4s, v2.s[2]
+        fmulx v0.4s, v1.4s, v22.s[2]
+        fmulx v0.2d, v1.2d, v2.d[1]
+        fmulx v0.2d, v1.2d, v22.d[1]
+
+// CHECK: fmulx	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0x98,0x82,0x2f]
+// CHECK: fmulx	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0x98,0x96,0x2f]
+// CHECK: fmulx	v0.4s, v1.4s, v2.s[2]   // encoding: [0x20,0x98,0x82,0x6f]
+// CHECK: fmulx	v0.4s, v1.4s, v22.s[2]  // encoding: [0x20,0x98,0x96,0x6f]
+// CHECK: fmulx	v0.2d, v1.2d, v2.d[1]   // encoding: [0x20,0x98,0xc2,0x6f]
+// CHECK: fmulx	v0.2d, v1.2d, v22.d[1]  // encoding: [0x20,0x98,0xd6,0x6f]
+
+        smull v0.4s, v1.4h, v2.h[2]
+        smull v0.2d, v1.2s, v2.s[2]
+        smull v0.2d, v1.2s, v22.s[2]
+        smull2 v0.4s, v1.8h, v2.h[2]
+        smull2 v0.2d, v1.4s, v2.s[2]
+        smull2 v0.2d, v1.4s, v22.s[2]
+
+// CHECK: smull	v0.4s, v1.4h, v2.h[2]   // encoding: [0x20,0xa0,0x62,0x0f]
+// CHECK: smull	v0.2d, v1.2s, v2.s[2]   // encoding: [0x20,0xa8,0x82,0x0f]
+// CHECK: smull	v0.2d, v1.2s, v22.s[2]  // encoding: [0x20,0xa8,0x96,0x0f]
+// CHECK: smull2	v0.4s, v1.8h, v2.h[2]   // encoding: [0x20,0xa0,0x62,0x4f]
+// CHECK: smull2	v0.2d, v1.4s, v2.s[2]   // encoding: [0x20,0xa8,0x82,0x4f]
+// CHECK: smull2	v0.2d, v1.4s, v22.s[2]  // encoding: [0x20,0xa8,0x96,0x4f]
+
+        umull v0.4s, v1.4h, v2.h[2]
+        umull v0.2d, v1.2s, v2.s[2]
+        umull v0.2d, v1.2s, v22.s[2]
+        umull2 v0.4s, v1.8h, v2.h[2]
+        umull2 v0.2d, v1.4s, v2.s[2]
+        umull2 v0.2d, v1.4s, v22.s[2]
+
+// CHECK: umull	v0.4s, v1.4h, v2.h[2]   // encoding: [0x20,0xa0,0x62,0x2f]
+// CHECK: umull	v0.2d, v1.2s, v2.s[2]   // encoding: [0x20,0xa8,0x82,0x2f]
+// CHECK: umull	v0.2d, v1.2s, v22.s[2]  // encoding: [0x20,0xa8,0x96,0x2f]
+// CHECK: umull2	v0.4s, v1.8h, v2.h[2]   // encoding: [0x20,0xa0,0x62,0x6f]
+// CHECK: umull2	v0.2d, v1.4s, v2.s[2]   // encoding: [0x20,0xa8,0x82,0x6f]
+// CHECK: umull2	v0.2d, v1.4s, v22.s[2]  // encoding: [0x20,0xa8,0x96,0x6f]
+
+        sqdmull v0.4s, v1.4h, v2.h[2]
+        sqdmull v0.2d, v1.2s, v2.s[2]
+        sqdmull v0.2d, v1.2s, v22.s[2]
+        sqdmull2 v0.4s, v1.8h, v2.h[2]
+        sqdmull2 v0.2d, v1.4s, v2.s[2]
+        sqdmull2 v0.2d, v1.4s, v22.s[2]
+
+// CHECK: sqdmull	v0.4s, v1.4h, v2.h[2]   // encoding: [0x20,0xb0,0x62,0x0f]
+// CHECK: sqdmull	v0.2d, v1.2s, v2.s[2]   // encoding: [0x20,0xb8,0x82,0x0f]
+// CHECK: sqdmull	v0.2d, v1.2s, v22.s[2]  // encoding: [0x20,0xb8,0x96,0x0f]
+// CHECK: sqdmull2	v0.4s, v1.8h, v2.h[2] // encoding: [0x20,0xb0,0x62,0x4f]
+// CHECK: sqdmull2	v0.2d, v1.4s, v2.s[2] // encoding: [0x20,0xb8,0x82,0x4f]
+// CHECK: sqdmull2	v0.2d, v1.4s, v22.s[2] // encoding: [0x20,0xb8,0x96,0x4f]
+
+        sqdmulh v0.4h, v1.4h, v2.h[2]
+        sqdmulh v0.8h, v1.8h, v2.h[2]
+        sqdmulh v0.2s, v1.2s, v2.s[2]
+        sqdmulh v0.2s, v1.2s, v22.s[2]
+        sqdmulh v0.4s, v1.4s, v2.s[2]
+        sqdmulh v0.4s, v1.4s, v22.s[2]
+
+// CHECK: sqdmulh	v0.4h, v1.4h, v2.h[2]   // encoding: [0x20,0xc0,0x62,0x0f]
+// CHECK: sqdmulh	v0.8h, v1.8h, v2.h[2]   // encoding: [0x20,0xc0,0x62,0x4f]
+// CHECK: sqdmulh	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0xc8,0x82,0x0f]
+// CHECK: sqdmulh	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0xc8,0x96,0x0f]
+// CHECK: sqdmulh	v0.4s, v1.4s, v2.s[2]   // encoding: [0x20,0xc8,0x82,0x4f]
+// CHECK: sqdmulh	v0.4s, v1.4s, v22.s[2]  // encoding: [0x20,0xc8,0x96,0x4f]
+
+        sqrdmulh v0.4h, v1.4h, v2.h[2]
+        sqrdmulh v0.8h, v1.8h, v2.h[2]
+        sqrdmulh v0.2s, v1.2s, v2.s[2]
+        sqrdmulh v0.2s, v1.2s, v22.s[2]
+        sqrdmulh v0.4s, v1.4s, v2.s[2]
+        sqrdmulh v0.4s, v1.4s, v22.s[2]
+
+// CHECK: sqrdmulh	v0.4h, v1.4h, v2.h[2] // encoding: [0x20,0xd0,0x62,0x0f]
+// CHECK: sqrdmulh	v0.8h, v1.8h, v2.h[2] // encoding: [0x20,0xd0,0x62,0x4f]
+// CHECK: sqrdmulh	v0.2s, v1.2s, v2.s[2] // encoding: [0x20,0xd8,0x82,0x0f]
+// CHECK: sqrdmulh	v0.2s, v1.2s, v22.s[2] // encoding: [0x20,0xd8,0x96,0x0f]
+// CHECK: sqrdmulh	v0.4s, v1.4s, v2.s[2] // encoding: [0x20,0xd8,0x82,0x4f]
+// CHECK: sqrdmulh	v0.4s, v1.4s, v22.s[2] // encoding: [0x20,0xd8,0x96,0x4f]
diff --git a/test/MC/AArch64/neon-3vdiff.s b/test/MC/AArch64/neon-3vdiff.s
new file mode 100644
index 0000000..3ff86bf
--- /dev/null
+++ b/test/MC/AArch64/neon-3vdiff.s
@@ -0,0 +1,415 @@
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Instructions with 3 different vector data types
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+// Long
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+// Long - Variant 1
+//------------------------------------------------------------------------------
+
+        saddl v0.8h, v1.8b, v2.8b
+        saddl v0.4s, v1.4h, v2.4h
+        saddl v0.2d, v1.2s, v2.2s
+
+// CHECK: saddl	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0x00,0x22,0x0e]
+// CHECK: saddl	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0x00,0x62,0x0e]
+// CHECK: saddl	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0x00,0xa2,0x0e]
+
+        saddl2 v0.4s, v1.8h, v2.8h
+        saddl2 v0.8h, v1.16b, v2.16b
+        saddl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: saddl2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0x00,0x62,0x4e]
+// CHECK: saddl2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0x00,0x22,0x4e]
+// CHECK: saddl2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0x00,0xa2,0x4e]
+
+        uaddl v0.8h, v1.8b, v2.8b
+        uaddl v0.4s, v1.4h, v2.4h
+        uaddl v0.2d, v1.2s, v2.2s
+
+// CHECK: uaddl	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0x00,0x22,0x2e]
+// CHECK: uaddl	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0x00,0x62,0x2e]
+// CHECK: uaddl	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0x00,0xa2,0x2e]
+
+        uaddl2 v0.8h, v1.16b, v2.16b
+        uaddl2 v0.4s, v1.8h, v2.8h
+        uaddl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: uaddl2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0x00,0x22,0x6e]
+// CHECK: uaddl2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0x00,0x62,0x6e]
+// CHECK: uaddl2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0x00,0xa2,0x6e]
+
+        ssubl v0.8h, v1.8b, v2.8b
+        ssubl v0.4s, v1.4h, v2.4h
+        ssubl v0.2d, v1.2s, v2.2s
+
+// CHECK: ssubl	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0x20,0x22,0x0e]
+// CHECK: ssubl	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0x20,0x62,0x0e]
+// CHECK: ssubl	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0x20,0xa2,0x0e]
+
+        ssubl2 v0.8h, v1.16b, v2.16b
+        ssubl2 v0.4s, v1.8h, v2.8h
+        ssubl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: ssubl2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0x20,0x22,0x4e]
+// CHECK: ssubl2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0x20,0x62,0x4e]
+// CHECK: ssubl2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0x20,0xa2,0x4e]
+
+        usubl v0.8h, v1.8b, v2.8b
+        usubl v0.4s, v1.4h, v2.4h
+        usubl v0.2d, v1.2s, v2.2s
+
+// CHECK: usubl	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0x20,0x22,0x2e]
+// CHECK: usubl	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0x20,0x62,0x2e]
+// CHECK: usubl	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0x20,0xa2,0x2e]
+
+        usubl2 v0.8h, v1.16b, v2.16b
+        usubl2 v0.4s, v1.8h, v2.8h
+        usubl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: usubl2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0x20,0x22,0x6e]
+// CHECK: usubl2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0x20,0x62,0x6e]
+// CHECK: usubl2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0x20,0xa2,0x6e]
+
+        sabal v0.8h, v1.8b, v2.8b
+        sabal v0.4s, v1.4h, v2.4h
+        sabal v0.2d, v1.2s, v2.2s
+
+// CHECK: sabal	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0x50,0x22,0x0e]
+// CHECK: sabal	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0x50,0x62,0x0e]
+// CHECK: sabal	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0x50,0xa2,0x0e]
+
+        sabal2 v0.8h, v1.16b, v2.16b
+        sabal2 v0.4s, v1.8h, v2.8h
+        sabal2 v0.2d, v1.4s, v2.4s
+
+// CHECK: sabal2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0x50,0x22,0x4e]
+// CHECK: sabal2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0x50,0x62,0x4e]
+// CHECK: sabal2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0x50,0xa2,0x4e]
+
+        uabal v0.8h, v1.8b, v2.8b
+        uabal v0.4s, v1.4h, v2.4h
+        uabal v0.2d, v1.2s, v2.2s
+
+// CHECK: uabal	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0x50,0x22,0x2e]
+// CHECK: uabal	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0x50,0x62,0x2e]
+// CHECK: uabal	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0x50,0xa2,0x2e]
+
+        uabal2 v0.8h, v1.16b, v2.16b
+        uabal2 v0.4s, v1.8h, v2.8h
+        uabal2 v0.2d, v1.4s, v2.4s
+
+// CHECK: uabal2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0x50,0x22,0x6e]
+// CHECK: uabal2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0x50,0x62,0x6e]
+// CHECK: uabal2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0x50,0xa2,0x6e]
+
+        sabdl v0.8h, v1.8b, v2.8b
+        sabdl v0.4s, v1.4h, v2.4h
+        sabdl v0.2d, v1.2s, v2.2s
+
+// CHECK: sabdl	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0x70,0x22,0x0e]
+// CHECK: sabdl	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0x70,0x62,0x0e]
+// CHECK: sabdl	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0x70,0xa2,0x0e]
+
+        sabdl2 v0.8h, v1.16b, v2.16b
+        sabdl2 v0.4s, v1.8h, v2.8h
+        sabdl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: sabdl2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0x70,0x22,0x4e]
+// CHECK: sabdl2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0x70,0x62,0x4e]
+// CHECK: sabdl2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0x70,0xa2,0x4e]
+
+        uabdl v0.8h, v1.8b, v2.8b
+        uabdl v0.4s, v1.4h, v2.4h
+        uabdl v0.2d, v1.2s, v2.2s
+
+// CHECK: uabdl	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0x70,0x22,0x2e]
+// CHECK: uabdl	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0x70,0x62,0x2e]
+// CHECK: uabdl	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0x70,0xa2,0x2e]
+
+        uabdl2 v0.8h, v1.16b, v2.16b
+        uabdl2 v0.4s, v1.8h, v2.8h
+        uabdl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: uabdl2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0x70,0x22,0x6e]
+// CHECK: uabdl2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0x70,0x62,0x6e]
+// CHECK: uabdl2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0x70,0xa2,0x6e]
+
+        smlal v0.8h, v1.8b, v2.8b
+        smlal v0.4s, v1.4h, v2.4h
+        smlal v0.2d, v1.2s, v2.2s
+
+// CHECK: smlal	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0x80,0x22,0x0e]
+// CHECK: smlal	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0x80,0x62,0x0e]
+// CHECK: smlal	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0x80,0xa2,0x0e]
+
+        smlal2 v0.8h, v1.16b, v2.16b
+        smlal2 v0.4s, v1.8h, v2.8h
+        smlal2 v0.2d, v1.4s, v2.4s
+
+// CHECK: smlal2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0x80,0x22,0x4e]
+// CHECK: smlal2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0x80,0x62,0x4e]
+// CHECK: smlal2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0x80,0xa2,0x4e]
+
+        umlal v0.8h, v1.8b, v2.8b
+        umlal v0.4s, v1.4h, v2.4h
+        umlal v0.2d, v1.2s, v2.2s
+
+// CHECK: umlal	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0x80,0x22,0x2e]
+// CHECK: umlal	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0x80,0x62,0x2e]
+// CHECK: umlal	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0x80,0xa2,0x2e]
+
+        umlal2 v0.8h, v1.16b, v2.16b
+        umlal2 v0.4s, v1.8h, v2.8h
+        umlal2 v0.2d, v1.4s, v2.4s
+
+// CHECK: umlal2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0x80,0x22,0x6e]
+// CHECK: umlal2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0x80,0x62,0x6e]
+// CHECK: umlal2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0x80,0xa2,0x6e]
+
+        smlsl v0.8h, v1.8b, v2.8b
+        smlsl v0.4s, v1.4h, v2.4h
+        smlsl v0.2d, v1.2s, v2.2s
+
+// CHECK: smlsl	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0xa0,0x22,0x0e]
+// CHECK: smlsl	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0xa0,0x62,0x0e]
+// CHECK: smlsl	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0xa0,0xa2,0x0e]
+
+        smlsl2 v0.8h, v1.16b, v2.16b
+        smlsl2 v0.4s, v1.8h, v2.8h
+        smlsl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: smlsl2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0xa0,0x22,0x4e]
+// CHECK: smlsl2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0xa0,0x62,0x4e]
+// CHECK: smlsl2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0xa0,0xa2,0x4e]
+
+        umlsl v0.8h, v1.8b, v2.8b
+        umlsl v0.4s, v1.4h, v2.4h
+        umlsl v0.2d, v1.2s, v2.2s
+
+// CHECK: umlsl	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0xa0,0x22,0x2e]
+// CHECK: umlsl	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0xa0,0x62,0x2e]
+// CHECK: umlsl	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0xa0,0xa2,0x2e]
+
+        umlsl2 v0.8h, v1.16b, v2.16b
+        umlsl2 v0.4s, v1.8h, v2.8h
+        umlsl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: umlsl2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0xa0,0x22,0x6e]
+// CHECK: umlsl2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0xa0,0x62,0x6e]
+// CHECK: umlsl2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0xa0,0xa2,0x6e]
+
+        smull v0.8h, v1.8b, v2.8b
+        smull v0.4s, v1.4h, v2.4h
+        smull v0.2d, v1.2s, v2.2s
+
+// CHECK: smull	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0xc0,0x22,0x0e]
+// CHECK: smull	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0xc0,0x62,0x0e]
+// CHECK: smull	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0xc0,0xa2,0x0e]
+
+        smull2 v0.8h, v1.16b, v2.16b
+        smull2 v0.4s, v1.8h, v2.8h
+        smull2 v0.2d, v1.4s, v2.4s
+
+// CHECK: smull2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0xc0,0x22,0x4e]
+// CHECK: smull2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0xc0,0x62,0x4e]
+// CHECK: smull2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0xc0,0xa2,0x4e]
+
+        umull v0.8h, v1.8b, v2.8b
+        umull v0.4s, v1.4h, v2.4h
+        umull v0.2d, v1.2s, v2.2s
+
+// CHECK: umull	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0xc0,0x22,0x2e]
+// CHECK: umull	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0xc0,0x62,0x2e]
+// CHECK: umull	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0xc0,0xa2,0x2e]
+
+        umull2 v0.8h, v1.16b, v2.16b
+        umull2 v0.4s, v1.8h, v2.8h
+        umull2 v0.2d, v1.4s, v2.4s
+
+// CHECK: umull2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0xc0,0x22,0x6e]
+// CHECK: umull2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0xc0,0x62,0x6e]
+// CHECK: umull2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0xc0,0xa2,0x6e]
+
+//------------------------------------------------------------------------------
+// Long - Variant 2
+//------------------------------------------------------------------------------
+
+        sqdmlal v0.4s, v1.4h, v2.4h
+        sqdmlal v0.2d, v1.2s, v2.2s
+
+// CHECK: sqdmlal	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0x90,0x62,0x0e]
+// CHECK: sqdmlal	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0x90,0xa2,0x0e]
+
+        sqdmlal2 v0.4s, v1.8h, v2.8h
+        sqdmlal2 v0.2d, v1.4s, v2.4s
+
+// CHECK: sqdmlal2	v0.4s, v1.8h, v2.8h // encoding: [0x20,0x90,0x62,0x4e]
+// CHECK: sqdmlal2	v0.2d, v1.4s, v2.4s // encoding: [0x20,0x90,0xa2,0x4e]
+
+        sqdmlsl v0.4s, v1.4h, v2.4h
+        sqdmlsl v0.2d, v1.2s, v2.2s
+
+// CHECK: sqdmlsl	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0xb0,0x62,0x0e]
+// CHECK: sqdmlsl	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0xb0,0xa2,0x0e]
+
+        sqdmlsl2 v0.4s, v1.8h, v2.8h
+        sqdmlsl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: sqdmlsl2	v0.4s, v1.8h, v2.8h // encoding: [0x20,0xb0,0x62,0x4e]
+// CHECK: sqdmlsl2	v0.2d, v1.4s, v2.4s // encoding: [0x20,0xb0,0xa2,0x4e]
+
+        sqdmull v0.4s, v1.4h, v2.4h
+        sqdmull v0.2d, v1.2s, v2.2s
+
+// CHECK: sqdmull	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0xd0,0x62,0x0e]
+// CHECK: sqdmull	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0xd0,0xa2,0x0e]
+
+        sqdmull2 v0.4s, v1.8h, v2.8h
+        sqdmull2 v0.2d, v1.4s, v2.4s
+
+// CHECK: sqdmull2	v0.4s, v1.8h, v2.8h // encoding: [0x20,0xd0,0x62,0x4e]
+// CHECK: sqdmull2	v0.2d, v1.4s, v2.4s // encoding: [0x20,0xd0,0xa2,0x4e]
+
+//------------------------------------------------------------------------------
+// Long - Variant 3
+//------------------------------------------------------------------------------
+
+        pmull v0.8h, v1.8b, v2.8b
+        pmull v0.1q, v1.1d, v2.1d
+
+// CHECK: pmull	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0xe0,0x22,0x0e]
+// CHECK: pmull	v0.1q, v1.1d, v2.1d     // encoding: [0x20,0xe0,0xe2,0x0e]
+
+        pmull2 v0.8h, v1.16b, v2.16b
+        pmull2 v0.1q, v1.2d, v2.2d
+
+// CHECK: pmull2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0xe0,0x22,0x4e]
+// CHECK: pmull2	v0.1q, v1.2d, v2.2d     // encoding: [0x20,0xe0,0xe2,0x4e]
+
+//------------------------------------------------------------------------------
+// Widen
+//------------------------------------------------------------------------------
+
+        saddw v0.8h, v1.8h, v2.8b
+        saddw v0.4s, v1.4s, v2.4h
+        saddw v0.2d, v1.2d, v2.2s
+
+// CHECK: saddw	v0.8h, v1.8h, v2.8b     // encoding: [0x20,0x10,0x22,0x0e]
+// CHECK: saddw	v0.4s, v1.4s, v2.4h     // encoding: [0x20,0x10,0x62,0x0e]
+// CHECK: saddw	v0.2d, v1.2d, v2.2s     // encoding: [0x20,0x10,0xa2,0x0e]
+
+        saddw2 v0.8h, v1.8h, v2.16b
+        saddw2 v0.4s, v1.4s, v2.8h
+        saddw2 v0.2d, v1.2d, v2.4s
+
+// CHECK: saddw2	v0.8h, v1.8h, v2.16b    // encoding: [0x20,0x10,0x22,0x4e]
+// CHECK: saddw2	v0.4s, v1.4s, v2.8h     // encoding: [0x20,0x10,0x62,0x4e]
+// CHECK: saddw2	v0.2d, v1.2d, v2.4s     // encoding: [0x20,0x10,0xa2,0x4e]
+
+        uaddw v0.8h, v1.8h, v2.8b
+        uaddw v0.4s, v1.4s, v2.4h
+        uaddw v0.2d, v1.2d, v2.2s
+
+// CHECK: uaddw	v0.8h, v1.8h, v2.8b     // encoding: [0x20,0x10,0x22,0x2e]
+// CHECK: uaddw	v0.4s, v1.4s, v2.4h     // encoding: [0x20,0x10,0x62,0x2e]
+// CHECK: uaddw	v0.2d, v1.2d, v2.2s     // encoding: [0x20,0x10,0xa2,0x2e]
+
+        uaddw2 v0.8h, v1.8h, v2.16b
+        uaddw2 v0.4s, v1.4s, v2.8h
+        uaddw2 v0.2d, v1.2d, v2.4s
+
+// CHECK: uaddw2	v0.8h, v1.8h, v2.16b    // encoding: [0x20,0x10,0x22,0x6e]
+// CHECK: uaddw2	v0.4s, v1.4s, v2.8h     // encoding: [0x20,0x10,0x62,0x6e]
+// CHECK: uaddw2	v0.2d, v1.2d, v2.4s     // encoding: [0x20,0x10,0xa2,0x6e]
+
+        ssubw v0.8h, v1.8h, v2.8b
+        ssubw v0.4s, v1.4s, v2.4h
+        ssubw v0.2d, v1.2d, v2.2s
+
+// CHECK: ssubw	v0.8h, v1.8h, v2.8b     // encoding: [0x20,0x30,0x22,0x0e]
+// CHECK: ssubw	v0.4s, v1.4s, v2.4h     // encoding: [0x20,0x30,0x62,0x0e]
+// CHECK: ssubw	v0.2d, v1.2d, v2.2s     // encoding: [0x20,0x30,0xa2,0x0e]
+
+        ssubw2 v0.8h, v1.8h, v2.16b
+        ssubw2 v0.4s, v1.4s, v2.8h
+        ssubw2 v0.2d, v1.2d, v2.4s
+
+// CHECK: ssubw2	v0.8h, v1.8h, v2.16b    // encoding: [0x20,0x30,0x22,0x4e]
+// CHECK: ssubw2	v0.4s, v1.4s, v2.8h     // encoding: [0x20,0x30,0x62,0x4e]
+// CHECK: ssubw2	v0.2d, v1.2d, v2.4s     // encoding: [0x20,0x30,0xa2,0x4e]
+
+        usubw v0.8h, v1.8h, v2.8b
+        usubw v0.4s, v1.4s, v2.4h
+        usubw v0.2d, v1.2d, v2.2s
+
+// CHECK: usubw	v0.8h, v1.8h, v2.8b     // encoding: [0x20,0x30,0x22,0x2e]
+// CHECK: usubw	v0.4s, v1.4s, v2.4h     // encoding: [0x20,0x30,0x62,0x2e]
+// CHECK: usubw	v0.2d, v1.2d, v2.2s     // encoding: [0x20,0x30,0xa2,0x2e]
+
+        usubw2 v0.8h, v1.8h, v2.16b
+        usubw2 v0.4s, v1.4s, v2.8h
+        usubw2 v0.2d, v1.2d, v2.4s
+
+// CHECK: usubw2	v0.8h, v1.8h, v2.16b    // encoding: [0x20,0x30,0x22,0x6e]
+// CHECK: usubw2	v0.4s, v1.4s, v2.8h     // encoding: [0x20,0x30,0x62,0x6e]
+// CHECK: usubw2	v0.2d, v1.2d, v2.4s     // encoding: [0x20,0x30,0xa2,0x6e]
+
+//------------------------------------------------------------------------------
+// Narrow
+//------------------------------------------------------------------------------
+
+        addhn v0.8b, v1.8h, v2.8h
+        addhn v0.4h, v1.4s, v2.4s
+        addhn v0.2s, v1.2d, v2.2d
+
+// CHECK: addhn	v0.8b, v1.8h, v2.8h     // encoding: [0x20,0x40,0x22,0x0e]
+// CHECK: addhn	v0.4h, v1.4s, v2.4s     // encoding: [0x20,0x40,0x62,0x0e]
+// CHECK: addhn	v0.2s, v1.2d, v2.2d     // encoding: [0x20,0x40,0xa2,0x0e]
+
+        addhn2 v0.16b, v1.8h, v2.8h
+        addhn2 v0.8h, v1.4s, v2.4s
+        addhn2 v0.4s, v1.2d, v2.2d
+
+// CHECK: addhn2	v0.16b, v1.8h, v2.8h    // encoding: [0x20,0x40,0x22,0x4e]
+// CHECK: addhn2	v0.8h, v1.4s, v2.4s     // encoding: [0x20,0x40,0x62,0x4e]
+// CHECK: addhn2	v0.4s, v1.2d, v2.2d     // encoding: [0x20,0x40,0xa2,0x4e]
+
+        raddhn v0.8b, v1.8h, v2.8h
+        raddhn v0.4h, v1.4s, v2.4s
+        raddhn v0.2s, v1.2d, v2.2d
+
+// CHECK: raddhn	v0.8b, v1.8h, v2.8h     // encoding: [0x20,0x40,0x22,0x2e]
+// CHECK: raddhn	v0.4h, v1.4s, v2.4s     // encoding: [0x20,0x40,0x62,0x2e]
+// CHECK: raddhn	v0.2s, v1.2d, v2.2d     // encoding: [0x20,0x40,0xa2,0x2e]
+
+        raddhn2 v0.16b, v1.8h, v2.8h
+        raddhn2 v0.8h, v1.4s, v2.4s
+        raddhn2 v0.4s, v1.2d, v2.2d
+
+// CHECK: raddhn2	v0.16b, v1.8h, v2.8h    // encoding: [0x20,0x40,0x22,0x6e]
+// CHECK: raddhn2	v0.8h, v1.4s, v2.4s     // encoding: [0x20,0x40,0x62,0x6e]
+// CHECK: raddhn2	v0.4s, v1.2d, v2.2d     // encoding: [0x20,0x40,0xa2,0x6e]
+
+        rsubhn v0.8b, v1.8h, v2.8h
+        rsubhn v0.4h, v1.4s, v2.4s
+        rsubhn v0.2s, v1.2d, v2.2d
+
+// CHECK: rsubhn	v0.8b, v1.8h, v2.8h     // encoding: [0x20,0x60,0x22,0x2e]
+// CHECK: rsubhn	v0.4h, v1.4s, v2.4s     // encoding: [0x20,0x60,0x62,0x2e]
+// CHECK: rsubhn	v0.2s, v1.2d, v2.2d     // encoding: [0x20,0x60,0xa2,0x2e]
+
+        rsubhn2 v0.16b, v1.8h, v2.8h
+        rsubhn2 v0.8h, v1.4s, v2.4s
+        rsubhn2 v0.4s, v1.2d, v2.2d
+
+// CHECK: rsubhn2	v0.16b, v1.8h, v2.8h    // encoding: [0x20,0x60,0x22,0x6e]
+// CHECK: rsubhn2	v0.8h, v1.4s, v2.4s     // encoding: [0x20,0x60,0x62,0x6e]
+// CHECK: rsubhn2	v0.4s, v1.2d, v2.2d     // encoding: [0x20,0x60,0xa2,0x6e]
diff --git a/test/MC/AArch64/neon-across.s b/test/MC/AArch64/neon-across.s
new file mode 100644
index 0000000..8b1c2d4
--- /dev/null
+++ b/test/MC/AArch64/neon-across.s
@@ -0,0 +1,101 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Instructions across vector registers
+//------------------------------------------------------------------------------
+
+        saddlv h0, v1.8b
+        saddlv h0, v1.16b
+        saddlv s0, v1.4h
+        saddlv s0, v1.8h
+        saddlv d0, v1.4s
+
+// CHECK: saddlv	h0, v1.8b               // encoding: [0x20,0x38,0x30,0x0e]
+// CHECK: saddlv	h0, v1.16b              // encoding: [0x20,0x38,0x30,0x4e]
+// CHECK: saddlv	s0, v1.4h               // encoding: [0x20,0x38,0x70,0x0e]
+// CHECK: saddlv	s0, v1.8h               // encoding: [0x20,0x38,0x70,0x4e]
+// CHECK: saddlv	d0, v1.4s               // encoding: [0x20,0x38,0xb0,0x4e]
+
+        uaddlv h0, v1.8b
+        uaddlv h0, v1.16b
+        uaddlv s0, v1.4h
+        uaddlv s0, v1.8h
+        uaddlv d0, v1.4s
+
+// CHECK: uaddlv	h0, v1.8b               // encoding: [0x20,0x38,0x30,0x2e]
+// CHECK: uaddlv	h0, v1.16b              // encoding: [0x20,0x38,0x30,0x6e]
+// CHECK: uaddlv	s0, v1.4h               // encoding: [0x20,0x38,0x70,0x2e]
+// CHECK: uaddlv	s0, v1.8h               // encoding: [0x20,0x38,0x70,0x6e]
+// CHECK: uaddlv	d0, v1.4s               // encoding: [0x20,0x38,0xb0,0x6e]
+
+        smaxv b0, v1.8b
+        smaxv b0, v1.16b
+        smaxv h0, v1.4h
+        smaxv h0, v1.8h
+        smaxv s0, v1.4s
+
+// CHECK: smaxv	b0, v1.8b               // encoding: [0x20,0xa8,0x30,0x0e]
+// CHECK: smaxv	b0, v1.16b              // encoding: [0x20,0xa8,0x30,0x4e]
+// CHECK: smaxv	h0, v1.4h               // encoding: [0x20,0xa8,0x70,0x0e]
+// CHECK: smaxv	h0, v1.8h               // encoding: [0x20,0xa8,0x70,0x4e]
+// CHECK: smaxv	s0, v1.4s               // encoding: [0x20,0xa8,0xb0,0x4e]
+
+        sminv b0, v1.8b
+        sminv b0, v1.16b
+        sminv h0, v1.4h
+        sminv h0, v1.8h
+        sminv s0, v1.4s
+
+// CHECK: sminv	b0, v1.8b               // encoding: [0x20,0xa8,0x31,0x0e]
+// CHECK: sminv	b0, v1.16b              // encoding: [0x20,0xa8,0x31,0x4e]
+// CHECK: sminv	h0, v1.4h               // encoding: [0x20,0xa8,0x71,0x0e]
+// CHECK: sminv	h0, v1.8h               // encoding: [0x20,0xa8,0x71,0x4e]
+// CHECK: sminv	s0, v1.4s               // encoding: [0x20,0xa8,0xb1,0x4e]
+
+        umaxv b0, v1.8b
+        umaxv b0, v1.16b
+        umaxv h0, v1.4h
+        umaxv h0, v1.8h
+        umaxv s0, v1.4s
+
+// CHECK: umaxv	b0, v1.8b               // encoding: [0x20,0xa8,0x30,0x2e]
+// CHECK: umaxv	b0, v1.16b              // encoding: [0x20,0xa8,0x30,0x6e]
+// CHECK: umaxv	h0, v1.4h               // encoding: [0x20,0xa8,0x70,0x2e]
+// CHECK: umaxv	h0, v1.8h               // encoding: [0x20,0xa8,0x70,0x6e]
+// CHECK: umaxv	s0, v1.4s               // encoding: [0x20,0xa8,0xb0,0x6e]
+
+        uminv b0, v1.8b
+        uminv b0, v1.16b
+        uminv h0, v1.4h
+        uminv h0, v1.8h
+        uminv s0, v1.4s
+
+// CHECK: uminv	b0, v1.8b               // encoding: [0x20,0xa8,0x31,0x2e]
+// CHECK: uminv	b0, v1.16b              // encoding: [0x20,0xa8,0x31,0x6e]
+// CHECK: uminv	h0, v1.4h               // encoding: [0x20,0xa8,0x71,0x2e]
+// CHECK: uminv	h0, v1.8h               // encoding: [0x20,0xa8,0x71,0x6e]
+// CHECK: uminv	s0, v1.4s               // encoding: [0x20,0xa8,0xb1,0x6e]
+
+        addv b0, v1.8b
+        addv b0, v1.16b
+        addv h0, v1.4h
+        addv h0, v1.8h
+        addv s0, v1.4s
+
+// CHECK: addv	b0, v1.8b               // encoding: [0x20,0xb8,0x31,0x0e]
+// CHECK: addv	b0, v1.16b              // encoding: [0x20,0xb8,0x31,0x4e]
+// CHECK: addv	h0, v1.4h               // encoding: [0x20,0xb8,0x71,0x0e]
+// CHECK: addv	h0, v1.8h               // encoding: [0x20,0xb8,0x71,0x4e]
+// CHECK: addv	s0, v1.4s               // encoding: [0x20,0xb8,0xb1,0x4e]
+
+        fmaxnmv s0, v1.4s
+        fminnmv s0, v1.4s
+        fmaxv s0, v1.4s
+        fminv s0, v1.4s
+
+// CHECK: fmaxnmv	s0, v1.4s               // encoding: [0x20,0xc8,0x30,0x6e]
+// CHECK: fminnmv	s0, v1.4s               // encoding: [0x20,0xc8,0xb0,0x6e]
+// CHECK: fmaxv	s0, v1.4s               // encoding: [0x20,0xf8,0x30,0x6e]
+// CHECK: fminv	s0, v1.4s               // encoding: [0x20,0xf8,0xb0,0x6e]
diff --git a/test/MC/AArch64/neon-add-pairwise.s b/test/MC/AArch64/neon-add-pairwise.s
index b586c22..df9938b 100644
--- a/test/MC/AArch64/neon-add-pairwise.s
+++ b/test/MC/AArch64/neon-add-pairwise.s
@@ -32,4 +32,3 @@
 // CHECK: faddp v0.2s, v1.2s, v2.2s       // encoding: [0x20,0xd4,0x22,0x2e]
 // CHECK: faddp v0.4s, v1.4s, v2.4s       // encoding: [0x20,0xd4,0x22,0x6e]
 // CHECK: faddp v0.2d, v1.2d, v2.2d       // encoding: [0x20,0xd4,0x62,0x6e]
-
diff --git a/test/MC/AArch64/neon-add-sub-instructions.s b/test/MC/AArch64/neon-add-sub-instructions.s
index 863798e..68f169b 100644
--- a/test/MC/AArch64/neon-add-sub-instructions.s
+++ b/test/MC/AArch64/neon-add-sub-instructions.s
@@ -64,19 +64,5 @@
 // CHECK: fsub v0.4s, v1.4s, v2.4s       // encoding: [0x20,0xd4,0xa2,0x4e]
 // CHECK: fsub v0.2d, v1.2d, v2.2d       // encoding: [0x20,0xd4,0xe2,0x4e]
 
-//------------------------------------------------------------------------------
-// Scalar Integer Add
-//------------------------------------------------------------------------------
-         add d31, d0, d16
-
-// CHECK: add d31, d0, d16       // encoding: [0x1f,0x84,0xf0,0x5e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Sub
-//------------------------------------------------------------------------------
-         sub d1, d7, d8
-
-// CHECK: sub d1, d7, d8       // encoding: [0xe1,0x84,0xe8,0x7e]
-
 
 
diff --git a/test/MC/AArch64/neon-crypto.s b/test/MC/AArch64/neon-crypto.s
new file mode 100644
index 0000000..2952dd5
--- /dev/null
+++ b/test/MC/AArch64/neon-crypto.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -mattr=+crypto -show-encoding < %s | FileCheck %s
+// RUN: not llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s 2>&1 | FileCheck -check-prefix=CHECK-NO-CRYPTO %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Instructions for crypto
+//------------------------------------------------------------------------------
+
+        aese v0.16b, v1.16b
+        aesd v0.16b, v1.16b
+        aesmc v0.16b, v1.16b
+        aesimc v0.16b, v1.16b
+
+// CHECK-NO-CRYPTO: error: instruction requires a CPU feature not currently enabled
+// CHECK: aese	v0.16b, v1.16b          // encoding: [0x20,0x48,0x28,0x4e]
+// CHECK: aesd	v0.16b, v1.16b          // encoding: [0x20,0x58,0x28,0x4e]
+// CHECK: aesmc	v0.16b, v1.16b          // encoding: [0x20,0x68,0x28,0x4e]
+// CHECK: aesimc	v0.16b, v1.16b          // encoding: [0x20,0x78,0x28,0x4e]
+
+        sha1h s0, s1
+        sha1su1 v0.4s, v1.4s
+        sha256su0 v0.4s, v1.4s
+
+// CHECK: sha1h	s0, s1                  // encoding: [0x20,0x08,0x28,0x5e]
+// CHECK: sha1su1	v0.4s, v1.4s            // encoding: [0x20,0x18,0x28,0x5e]
+// CHECK: sha256su0	v0.4s, v1.4s    // encoding: [0x20,0x28,0x28,0x5e]
+
+        sha1c q0, s1, v2.4s
+        sha1p q0, s1, v2.4s
+        sha1m q0, s1, v2.4s
+        sha1su0 v0.4s, v1.4s, v2.4s
+        sha256h q0, q1, v2.4s
+        sha256h2 q0, q1, v2.4s
+        sha256su1 v0.4s, v1.4s, v2.4s
+
+// CHECK: sha1c	q0, s1, v2.4s           // encoding: [0x20,0x00,0x02,0x5e]
+// CHECK: sha1p	q0, s1, v2.4s           // encoding: [0x20,0x10,0x02,0x5e]
+// CHECK: sha1m	q0, s1, v2.4s           // encoding: [0x20,0x20,0x02,0x5e]
+// CHECK: sha1su0	v0.4s, v1.4s, v2.4s     // encoding: [0x20,0x30,0x02,0x5e]
+// CHECK: sha256h	q0, q1, v2.4s           // encoding: [0x20,0x40,0x02,0x5e]
+// CHECK: sha256h2	q0, q1, v2.4s   // encoding: [0x20,0x50,0x02,0x5e]
+// CHECK: sha256su1	v0.4s, v1.4s, v2.4s // encoding: [0x20,0x60,0x02,0x5e]
+
diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s
index 5373889..be6c163 100644
--- a/test/MC/AArch64/neon-diagnostics.s
+++ b/test/MC/AArch64/neon-diagnostics.s
@@ -213,6 +213,47 @@
 // CHECK-ERROR:         movi v1.16b, #256
 // CHECK-ERROR:                      ^
 
+//----------------------------------------------------------------------
+// Scalar Floating-point Reciprocal Estimate
+//----------------------------------------------------------------------
+
+    frecpe s19, h14
+    frecpe d13, s13
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        frecpe s19, h14
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        frecpe d13, s13
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Reciprocal Exponent
+//----------------------------------------------------------------------
+
+    frecpx s18, h10
+    frecpx d16, s19
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        frecpx s18, h10
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        frecpx d16, s19
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Reciprocal Square Root Estimate
+//----------------------------------------------------------------------
+
+    frsqrte s22, h13
+    frsqrte d21, s12
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        frsqrte s22, h13
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        frsqrte d21, s12
+// CHECK-ERROR:                     ^
 
 //----------------------------------------------------------------------
 // Vector Move Immediate - bytemask, per doubleword
@@ -826,6 +867,33 @@
 // CHECK-ERROR:        uqsub h1, h2, d2
 // CHECK-ERROR:                      ^
 
+//----------------------------------------------------------------------
+// Scalar Integer Saturating Doubling Multiply Half High (Signed)
+//----------------------------------------------------------------------
+
+    sqdmulh h10, s11, h12
+    sqdmulh s20, h21, s2
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmulh h10, s11, h12
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmulh s20, h21, s2
+// CHECK-ERROR:                     ^
+
+//------------------------------------------------------------------------
+// Scalar Integer Saturating Rounding Doubling Multiply Half High (Signed)
+//------------------------------------------------------------------------
+
+    sqrdmulh h10, s11, h12
+    sqrdmulh s20, h21, s2
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqrdmulh h10, s11, h12
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqrdmulh s20, h21, s2
+// CHECK-ERROR:                      ^
 
 //----------------------------------------------------------------------
 // Vector Shift Left (Signed and Unsigned Integer)
@@ -845,12 +913,12 @@
 // Vector Saturating Shift Left (Signed and Unsigned Integer)
 //----------------------------------------------------------------------
         // Mismatched vector types
-        sqshl v0.2s, v15.2s, v16.2d
+        sqshl v0.2s, v15.4s, v16.2d
         uqshl v1.8b, v25.4h, v6.8h
 
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        sqshl v0.2s, v15.2s, v16.2d
-// CHECK-ERROR:                                 ^
+// CHECK-ERROR:        sqshl v0.2s, v15.4s, v16.2d 
+// CHECK-ERROR:                         ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        uqshl v1.8b, v25.4h, v6.8h
 // CHECK-ERROR:                         ^
@@ -902,23 +970,23 @@
 //----------------------------------------------------------------------
 
         // Mismatched vector types
-        sqshl b0, b1, s0
-        uqshl h0, h1, b0
-        sqshl s0, s1, h0
-        uqshl d0, d1, b0
+        sqshl b0, s1, b0
+        uqshl h0, b1, h0
+        sqshl s0, h1, s0
+        uqshl d0, b1, d0
 
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        sqshl b0, b1, s0
-// CHECK-ERROR:                      ^
+// CHECK-ERROR:        sqshl b0, s1, b0
+// CHECK-ERROR:                  ^
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        uqshl h0, h1, b0
-// CHECK-ERROR:                      ^
+// CHECK-ERROR:        uqshl h0, b1, h0
+// CHECK-ERROR:                  ^
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        sqshl s0, s1, h0
-// CHECK-ERROR:                      ^
+// CHECK-ERROR:        sqshl s0, h1, s0
+// CHECK-ERROR:                  ^
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        uqshl d0, d1, b0
-// CHECK-ERROR:                      ^
+// CHECK-ERROR:        uqshl d0, b1, d0
+// CHECK-ERROR:                  ^
 
 //----------------------------------------------------------------------
 // Scalar Integer Rouding Shift Left (Signed, Unsigned)
@@ -1205,3 +1273,6046 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fmulx v1.4h, v25.4h, v3.4h
 // CHECK-ERROR:                  ^
+
+//------------------------------------------------------------------------------
+// Vector Shift Left by Immediate
+//------------------------------------------------------------------------------
+         // Mismatched vector types and out of range
+         shl v0.4s, v15,2s, #3
+         shl v0.2d, v17.4s, #3
+         shl v0.8b, v31.8b, #-1
+         shl v0.8b, v31.8b, #8
+         shl v0.4s, v21.4s, #32
+         shl v0.2d, v1.2d, #64
+
+// CHECK-ERROR: error: expected comma before next operand
+// CHECK-ERROR:         shl v0.4s, v15,2s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shl v0.2d, v17.4s, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:         shl v0.8b, v31.8b, #-1
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:         shl v0.8b, v31.8b, #8
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:         shl v0.4s, v21.4s, #32
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:         shl v0.2d, v1.2d, #64
+// CHECK-ERROR:                           ^
+
+//----------------------------------------------------------------------
+// Vector Shift Left Long by Immediate
+//----------------------------------------------------------------------
+        // Mismatched vector types
+        sshll v0.4s, v15.2s, #3
+        ushll v1.16b, v25.16b, #6
+        sshll2 v0.2d, v3.8s, #15
+        ushll2 v1.4s, v25.4s, #7
+
+        // Out of range 
+        sshll v0.8h, v1.8b, #-1
+        sshll v0.8h, v1.8b, #9
+        ushll v0.4s, v1.4h, #17
+        ushll v0.2d, v1.2s, #33
+        sshll2 v0.8h, v1.16b, #9
+        sshll2 v0.4s, v1.8h, #17
+        ushll2 v0.2d, v1.4s, #33
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sshll v0.4s, v15.2s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ushll v1.16b, v25.16b, #6
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sshll2 v0.2d, v3.8s, #15
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ushll2 v1.4s, v25.4s, #7
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:        sshll v0.8h, v1.8b, #-1
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:        sshll v0.8h, v1.8b, #9
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR:        ushll v0.4s, v1.4h, #17
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:        ushll v0.2d, v1.2s, #33
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:        sshll2 v0.8h, v1.16b, #9
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR:        sshll2 v0.4s, v1.8h, #17
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:        ushll2 v0.2d, v1.4s, #33
+// CHECK-ERROR:                             ^
+
+
+//------------------------------------------------------------------------------
+// Vector shift right by immediate
+//------------------------------------------------------------------------------
+         sshr v0.8b, v1.8h, #3
+         sshr v0.4h, v1.4s, #3
+         sshr v0.2s, v1.2d, #3
+         sshr v0.16b, v1.16b, #9
+         sshr v0.8h, v1.8h, #17
+         sshr v0.4s, v1.4s, #33
+         sshr v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sshr v0.8b, v1.8h, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sshr v0.4h, v1.4s, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sshr v0.2s, v1.2d, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         sshr v0.16b, v1.16b, #9
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         sshr v0.8h, v1.8h, #17
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         sshr v0.4s, v1.4s, #33
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         sshr v0.2d, v1.2d, #65
+// CHECK-ERROR:                            ^
+
+//------------------------------------------------------------------------------
+// Vector  shift right by immediate
+//------------------------------------------------------------------------------
+         ushr v0.8b, v1.8h, #3
+         ushr v0.4h, v1.4s, #3
+         ushr v0.2s, v1.2d, #3
+         ushr v0.16b, v1.16b, #9
+         ushr v0.8h, v1.8h, #17
+         ushr v0.4s, v1.4s, #33
+         ushr v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ushr v0.8b, v1.8h, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ushr v0.4h, v1.4s, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ushr v0.2s, v1.2d, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         ushr v0.16b, v1.16b, #9
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         ushr v0.8h, v1.8h, #17
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         ushr v0.4s, v1.4s, #33
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         ushr v0.2d, v1.2d, #65
+// CHECK-ERROR:                            ^
+
+//------------------------------------------------------------------------------
+// Vector shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         ssra v0.8b, v1.8h, #3
+         ssra v0.4h, v1.4s, #3
+         ssra v0.2s, v1.2d, #3
+         ssra v0.16b, v1.16b, #9
+         ssra v0.8h, v1.8h, #17
+         ssra v0.4s, v1.4s, #33
+         ssra v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ssra v0.8b, v1.8h, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ssra v0.4h, v1.4s, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ssra v0.2s, v1.2d, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         ssra v0.16b, v1.16b, #9
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         ssra v0.8h, v1.8h, #17
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         ssra v0.4s, v1.4s, #33
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         ssra v0.2d, v1.2d, #65
+// CHECK-ERROR:                            ^
+
+//------------------------------------------------------------------------------
+// Vector  shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         usra v0.8b, v1.8h, #3
+         usra v0.4h, v1.4s, #3
+         usra v0.2s, v1.2d, #3
+         usra v0.16b, v1.16b, #9
+         usra v0.8h, v1.8h, #17
+         usra v0.4s, v1.4s, #33
+         usra v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         usra v0.8b, v1.8h, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         usra v0.4h, v1.4s, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         usra v0.2s, v1.2d, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         usra v0.16b, v1.16b, #9
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         usra v0.8h, v1.8h, #17
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         usra v0.4s, v1.4s, #33
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         usra v0.2d, v1.2d, #65
+// CHECK-ERROR:                            ^
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right by immediate
+//------------------------------------------------------------------------------
+         srshr v0.8b, v1.8h, #3
+         srshr v0.4h, v1.4s, #3
+         srshr v0.2s, v1.2d, #3
+         srshr v0.16b, v1.16b, #9
+         srshr v0.8h, v1.8h, #17
+         srshr v0.4s, v1.4s, #33
+         srshr v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         srshr v0.8b, v1.8h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         srshr v0.4h, v1.4s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         srshr v0.2s, v1.2d, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         srshr v0.16b, v1.16b, #9
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         srshr v0.8h, v1.8h, #17
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         srshr v0.4s, v1.4s, #33
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         srshr v0.2d, v1.2d, #65
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Vecotr rounding shift right by immediate
+//------------------------------------------------------------------------------
+         urshr v0.8b, v1.8h, #3
+         urshr v0.4h, v1.4s, #3
+         urshr v0.2s, v1.2d, #3
+         urshr v0.16b, v1.16b, #9
+         urshr v0.8h, v1.8h, #17
+         urshr v0.4s, v1.4s, #33
+         urshr v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urshr v0.8b, v1.8h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urshr v0.4h, v1.4s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urshr v0.2s, v1.2d, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         urshr v0.16b, v1.16b, #9
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         urshr v0.8h, v1.8h, #17
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         urshr v0.4s, v1.4s, #33
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         urshr v0.2d, v1.2d, #65
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         srsra v0.8b, v1.8h, #3
+         srsra v0.4h, v1.4s, #3
+         srsra v0.2s, v1.2d, #3
+         srsra v0.16b, v1.16b, #9
+         srsra v0.8h, v1.8h, #17
+         srsra v0.4s, v1.4s, #33
+         srsra v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         srsra v0.8b, v1.8h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         srsra v0.4h, v1.4s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         srsra v0.2s, v1.2d, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         srsra v0.16b, v1.16b, #9
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         srsra v0.8h, v1.8h, #17
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         srsra v0.4s, v1.4s, #33
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         srsra v0.2d, v1.2d, #65
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         ursra v0.8b, v1.8h, #3
+         ursra v0.4h, v1.4s, #3
+         ursra v0.2s, v1.2d, #3
+         ursra v0.16b, v1.16b, #9
+         ursra v0.8h, v1.8h, #17
+         ursra v0.4s, v1.4s, #33
+         ursra v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursra v0.8b, v1.8h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursra v0.4h, v1.4s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursra v0.2s, v1.2d, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         ursra v0.16b, v1.16b, #9
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         ursra v0.8h, v1.8h, #17
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         ursra v0.4s, v1.4s, #33
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         ursra v0.2d, v1.2d, #65
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Vector shift right and insert by immediate
+//------------------------------------------------------------------------------
+         sri v0.8b, v1.8h, #3
+         sri v0.4h, v1.4s, #3
+         sri v0.2s, v1.2d, #3
+         sri v0.16b, v1.16b, #9
+         sri v0.8h, v1.8h, #17
+         sri v0.4s, v1.4s, #33
+         sri v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sri v0.8b, v1.8h, #3
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sri v0.4h, v1.4s, #3
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sri v0.2s, v1.2d, #3
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         sri v0.16b, v1.16b, #9
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         sri v0.8h, v1.8h, #17
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         sri v0.4s, v1.4s, #33
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         sri v0.2d, v1.2d, #65
+// CHECK-ERROR:                           ^
+
+//------------------------------------------------------------------------------
+// Vector shift left and insert by immediate
+//------------------------------------------------------------------------------
+         sli v0.8b, v1.8h, #3
+         sli v0.4h, v1.4s, #3
+         sli v0.2s, v1.2d, #3
+         sli v0.16b, v1.16b, #8
+         sli v0.8h, v1.8h, #16
+         sli v0.4s, v1.4s, #32
+         sli v0.2d, v1.2d, #64
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sli v0.8b, v1.8h, #3
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sli v0.4h, v1.4s, #3
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sli v0.2s, v1.2d, #3
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:         sli v0.16b, v1.16b, #8
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR:         sli v0.8h, v1.8h, #16
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:         sli v0.4s, v1.4s, #32
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:         sli v0.2d, v1.2d, #64
+// CHECK-ERROR:                           ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift left unsigned by immediate
+//------------------------------------------------------------------------------
+         sqshlu v0.8b, v1.8h, #3
+         sqshlu v0.4h, v1.4s, #3
+         sqshlu v0.2s, v1.2d, #3
+         sqshlu v0.16b, v1.16b, #8
+         sqshlu v0.8h, v1.8h, #16
+         sqshlu v0.4s, v1.4s, #32
+         sqshlu v0.2d, v1.2d, #64
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshlu v0.8b, v1.8h, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshlu v0.4h, v1.4s, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshlu v0.2s, v1.2d, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:         sqshlu v0.16b, v1.16b, #8
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR:         sqshlu v0.8h, v1.8h, #16
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:         sqshlu v0.4s, v1.4s, #32
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:         sqshlu v0.2d, v1.2d, #64
+// CHECK-ERROR:                              ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift left by immediate
+//------------------------------------------------------------------------------
+         sqshl v0.8b, v1.8h, #3
+         sqshl v0.4h, v1.4s, #3
+         sqshl v0.2s, v1.2d, #3
+         sqshl v0.16b, v1.16b, #8
+         sqshl v0.8h, v1.8h, #16
+         sqshl v0.4s, v1.4s, #32
+         sqshl v0.2d, v1.2d, #64
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshl v0.8b, v1.8h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshl v0.4h, v1.4s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshl v0.2s, v1.2d, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:         sqshl v0.16b, v1.16b, #8
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR:         sqshl v0.8h, v1.8h, #16
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:         sqshl v0.4s, v1.4s, #32
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:         sqshl v0.2d, v1.2d, #64
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift left by immediate
+//------------------------------------------------------------------------------
+         uqshl v0.8b, v1.8h, #3
+         uqshl v0.4h, v1.4s, #3
+         uqshl v0.2s, v1.2d, #3
+         uqshl v0.16b, v1.16b, #8
+         uqshl v0.8h, v1.8h, #16
+         uqshl v0.4s, v1.4s, #32
+         uqshl v0.2d, v1.2d, #64
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqshl v0.8b, v1.8h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqshl v0.4h, v1.4s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqshl v0.2s, v1.2d, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:         uqshl v0.16b, v1.16b, #8
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR:         uqshl v0.8h, v1.8h, #16
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:         uqshl v0.4s, v1.4s, #32
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:         uqshl v0.2d, v1.2d, #64
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Vector shift right narrow by immediate
+//------------------------------------------------------------------------------
+         shrn v0.8b, v1.8b, #3
+         shrn v0.4h, v1.4h, #3
+         shrn v0.2s, v1.2s, #3
+         shrn2 v0.16b, v1.8h, #17
+         shrn2 v0.8h, v1.4s, #33
+         shrn2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shrn v0.8b, v1.8b, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shrn v0.4h, v1.4h, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shrn v0.2s, v1.2s, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         shrn2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         shrn2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         shrn2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right unsigned narrow by immediate
+//------------------------------------------------------------------------------
+         sqshrun v0.8b, v1.8b, #3
+         sqshrun v0.4h, v1.4h, #3
+         sqshrun v0.2s, v1.2s, #3
+         sqshrun2 v0.16b, v1.8h, #17
+         sqshrun2 v0.8h, v1.4s, #33
+         sqshrun2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshrun v0.8b, v1.8b, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshrun v0.4h, v1.4h, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshrun v0.2s, v1.2s, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         sqshrun2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         sqshrun2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         sqshrun2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                                ^
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right narrow by immediate
+//------------------------------------------------------------------------------
+         rshrn v0.8b, v1.8b, #3
+         rshrn v0.4h, v1.4h, #3
+         rshrn v0.2s, v1.2s, #3
+         rshrn2 v0.16b, v1.8h, #17
+         rshrn2 v0.8h, v1.4s, #33
+         rshrn2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rshrn v0.8b, v1.8b, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rshrn v0.4h, v1.4h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rshrn v0.2s, v1.2s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         rshrn2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         rshrn2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         rshrn2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                              ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right rounded unsigned narrow by immediate
+//------------------------------------------------------------------------------
+         sqrshrun v0.8b, v1.8b, #3
+         sqrshrun v0.4h, v1.4h, #3
+         sqrshrun v0.2s, v1.2s, #3
+         sqrshrun2 v0.16b, v1.8h, #17
+         sqrshrun2 v0.8h, v1.4s, #33
+         sqrshrun2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqrshrun v0.8b, v1.8b, #3
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqrshrun v0.4h, v1.4h, #3
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqrshrun v0.2s, v1.2s, #3
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         sqrshrun2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         sqrshrun2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         sqrshrun2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                                 ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right narrow by immediate
+//------------------------------------------------------------------------------
+         sqshrn v0.8b, v1.8b, #3
+         sqshrn v0.4h, v1.4h, #3
+         sqshrn v0.2s, v1.2s, #3
+         sqshrn2 v0.16b, v1.8h, #17
+         sqshrn2 v0.8h, v1.4s, #33
+         sqshrn2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshrn v0.8b, v1.8b, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshrn v0.4h, v1.4h, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshrn v0.2s, v1.2s, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         sqshrn2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         sqshrn2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         sqshrn2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                               ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right narrow by immediate
+//------------------------------------------------------------------------------
+         uqshrn v0.8b, v1.8b, #3
+         uqshrn v0.4h, v1.4h, #3
+         uqshrn v0.2s, v1.2s, #3
+         uqshrn2 v0.16b, v1.8h, #17
+         uqshrn2 v0.8h, v1.4s, #33
+         uqshrn2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqshrn v0.8b, v1.8b, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqshrn v0.4h, v1.4h, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqshrn v0.2s, v1.2s, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         uqshrn2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         uqshrn2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         uqshrn2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                               ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right rounded narrow by immediate
+//------------------------------------------------------------------------------
+         sqrshrn v0.8b, v1.8b, #3
+         sqrshrn v0.4h, v1.4h, #3
+         sqrshrn v0.2s, v1.2s, #3
+         sqrshrn2 v0.16b, v1.8h, #17
+         sqrshrn2 v0.8h, v1.4s, #33
+         sqrshrn2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqrshrn v0.8b, v1.8b, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqrshrn v0.4h, v1.4h, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqrshrn v0.2s, v1.2s, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         sqrshrn2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         sqrshrn2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         sqrshrn2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                                ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right rounded narrow by immediate
+//------------------------------------------------------------------------------
+         uqrshrn v0.8b, v1.8b, #3
+         uqrshrn v0.4h, v1.4h, #3
+         uqrshrn v0.2s, v1.2s, #3
+         uqrshrn2 v0.16b, v1.8h, #17
+         uqrshrn2 v0.8h, v1.4s, #33
+         uqrshrn2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqrshrn v0.8b, v1.8b, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqrshrn v0.4h, v1.4h, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqrshrn v0.2s, v1.2s, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         uqrshrn2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         uqrshrn2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         uqrshrn2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                                ^
+
+//------------------------------------------------------------------------------
+// Fixed-point convert to floating-point
+//------------------------------------------------------------------------------
+         scvtf v0.2s, v1.2d, #3
+         scvtf v0.4s, v1.4h, #3
+         scvtf v0.2d, v1.2s, #3
+         ucvtf v0.2s, v1.2s, #33
+         ucvtf v0.4s, v1.4s, #33
+         ucvtf v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         scvtf v0.2s, v1.2d, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         scvtf v0.4s, v1.4h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         scvtf v0.2d, v1.2s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         ucvtf v0.2s, v1.2s, #33
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         ucvtf v0.4s, v1.4s, #33
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         ucvtf v0.2d, v1.2d, #65
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Floating-point convert to fixed-point
+//------------------------------------------------------------------------------
+         fcvtzs v0.2s, v1.2d, #3
+         fcvtzs v0.4s, v1.4h, #3
+         fcvtzs v0.2d, v1.2s, #3
+         fcvtzu v0.2s, v1.2s, #33
+         fcvtzu v0.4s, v1.4s, #33
+         fcvtzu v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzs v0.2s, v1.2d, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzs v0.4s, v1.4h, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzs v0.2d, v1.2s, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         fcvtzu v0.2s, v1.2s, #33
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         fcvtzu v0.4s, v1.4s, #33
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         fcvtzu v0.2d, v1.2d, #65
+// CHECK-ERROR:                              ^
+
+//----------------------------------------------------------------------
+// Vector operation on 3 operands with different types
+//----------------------------------------------------------------------
+
+        // Mismatched and invalid vector types
+        saddl v0.8h, v1.8h, v2.8b
+        saddl v0.4s, v1.4s, v2.4h
+        saddl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        saddl2 v0.4s, v1.8s, v2.8h
+        saddl2 v0.8h, v1.16h, v2.16b
+        saddl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        uaddl v0.8h, v1.8h, v2.8b
+        uaddl v0.4s, v1.4s, v2.4h
+        uaddl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        uaddl2 v0.8h, v1.16h, v2.16b
+        uaddl2 v0.4s, v1.8s, v2.8h
+        uaddl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        ssubl v0.8h, v1.8h, v2.8b
+        ssubl v0.4s, v1.4s, v2.4h
+        ssubl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        ssubl2 v0.8h, v1.16h, v2.16b
+        ssubl2 v0.4s, v1.8s, v2.8h
+        ssubl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        usubl v0.8h, v1.8h, v2.8b
+        usubl v0.4s, v1.4s, v2.4h
+        usubl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        usubl2 v0.8h, v1.16h, v2.16b
+        usubl2 v0.4s, v1.8s, v2.8h
+        usubl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        sabal v0.8h, v1.8h, v2.8b
+        sabal v0.4s, v1.4s, v2.4h
+        sabal v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabal v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabal v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabal v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        sabal2 v0.8h, v1.16h, v2.16b
+        sabal2 v0.4s, v1.8s, v2.8h
+        sabal2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabal2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabal2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabal2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        uabal v0.8h, v1.8h, v2.8b
+        uabal v0.4s, v1.4s, v2.4h
+        uabal v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabal v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabal v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabal v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        uabal2 v0.8h, v1.16h, v2.16b
+        uabal2 v0.4s, v1.8s, v2.8h
+        uabal2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabal2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabal2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabal2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        sabdl v0.8h, v1.8h, v2.8b
+        sabdl v0.4s, v1.4s, v2.4h
+        sabdl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabdl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabdl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabdl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        sabdl2 v0.8h, v1.16h, v2.16b
+        sabdl2 v0.4s, v1.8s, v2.8h
+        sabdl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabdl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabdl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabdl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        uabdl v0.8h, v1.8h, v2.8b
+        uabdl v0.4s, v1.4s, v2.4h
+        uabdl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabdl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabdl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabdl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        uabdl2 v0.8h, v1.16h, v2.16b
+        uabdl2 v0.4s, v1.8s, v2.8h
+        uabdl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabdl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabdl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabdl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        smlal v0.8h, v1.8h, v2.8b
+        smlal v0.4s, v1.4s, v2.4h
+        smlal v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlal v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlal v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlal v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        smlal2 v0.8h, v1.16h, v2.16b
+        smlal2 v0.4s, v1.8s, v2.8h
+        smlal2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlal2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlal2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlal2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        umlal v0.8h, v1.8h, v2.8b
+        umlal v0.4s, v1.4s, v2.4h
+        umlal v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlal v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlal v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlal v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        umlal2 v0.8h, v1.16h, v2.16b
+        umlal2 v0.4s, v1.8s, v2.8h
+        umlal2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlal2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlal2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlal2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        smlsl v0.8h, v1.8h, v2.8b
+        smlsl v0.4s, v1.4s, v2.4h
+        smlsl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlsl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlsl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlsl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        smlsl2 v0.8h, v1.16h, v2.16b
+        smlsl2 v0.4s, v1.8s, v2.8h
+        smlsl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlsl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlsl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlsl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        umlsl v0.8h, v1.8h, v2.8b
+        umlsl v0.4s, v1.4s, v2.4h
+        umlsl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlsl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlsl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlsl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        umlsl2 v0.8h, v1.16h, v2.16b
+        umlsl2 v0.4s, v1.8s, v2.8h
+        umlsl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlsl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlsl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlsl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        smull v0.8h, v1.8h, v2.8b
+        smull v0.4s, v1.4s, v2.4h
+        smull v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        smull2 v0.8h, v1.16h, v2.16b
+        smull2 v0.4s, v1.8s, v2.8h
+        smull2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        umull v0.8h, v1.8h, v2.8b
+        umull v0.4s, v1.4s, v2.4h
+        umull v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        umull2 v0.8h, v1.16h, v2.16b
+        umull2 v0.4s, v1.8s, v2.8h
+        umull2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+//------------------------------------------------------------------------------
+// Long - Variant 2
+//------------------------------------------------------------------------------
+
+        sqdmlal v0.4s, v1.4s, v2.4h
+        sqdmlal v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                          ^
+
+        sqdmlal2 v0.4s, v1.8s, v2.8h
+        sqdmlal2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                        ^
+
+        // Mismatched vector types
+        sqdmlal v0.8h, v1.8b, v2.8b
+        sqdmlal2 v0.8h, v1.16b, v2.16b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal v0.8h, v1.8b, v2.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal2 v0.8h, v1.16b, v2.16b
+// CHECK-ERROR:                    ^
+
+        sqdmlsl v0.4s, v1.4s, v2.4h
+        sqdmlsl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                          ^
+
+        sqdmlsl2 v0.4s, v1.8s, v2.8h
+        sqdmlsl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                        ^
+
+        // Mismatched vector types
+        sqdmlsl v0.8h, v1.8b, v2.8b
+        sqdmlsl2 v0.8h, v1.16b, v2.16b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl v0.8h, v1.8b, v2.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl2 v0.8h, v1.16b, v2.16b
+// CHECK-ERROR:                    ^
+
+
+        sqdmull v0.4s, v1.4s, v2.4h
+        sqdmull v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                          ^
+
+        sqdmull2 v0.4s, v1.8s, v2.8h
+        sqdmull2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                        ^
+
+        // Mismatched vector types
+        sqdmull v0.8h, v1.8b, v2.8b
+        sqdmull2 v0.8h, v1.16b, v2.16b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull v0.8h, v1.8b, v2.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull2 v0.8h, v1.16b, v2.16b
+// CHECK-ERROR:                    ^
+
+
+//------------------------------------------------------------------------------
+// Long - Variant 3
+//------------------------------------------------------------------------------
+
+        pmull v0.8h, v1.8h, v2.8b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        pmull v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+
+        pmull v0.1q, v1.2d, v2.2d
+        
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        pmull v0.1q, v1.2d, v2.2d
+// CHECK-ERROR:                     ^
+
+        // Mismatched vector types
+        pmull v0.4s, v1.4h, v2.4h
+        pmull v0.2d, v1.2s, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        pmull v0.4s, v1.4h, v2.4h
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        pmull v0.2d, v1.2s, v2.2s
+// CHECK-ERROR:                 ^
+
+
+        pmull2 v0.8h, v1.16h, v2.16b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        pmull2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+
+        pmull2 v0.q, v1.2d, v2.2d
+        
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        pmull2 v0.q, v1.2d, v2.2d
+// CHECK-ERROR:                  ^
+
+        // Mismatched vector types
+        pmull2 v0.4s, v1.8h v2.8h
+        pmull2 v0.2d, v1.4s, v2.4s
+
+// CHECK-ERROR: error: expected comma before next operand
+// CHECK-ERROR:        pmull2 v0.4s, v1.8h v2.8h
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        pmull2 v0.2d, v1.4s, v2.4s
+// CHECK-ERROR:                  ^
+
+//------------------------------------------------------------------------------
+// Widen
+//------------------------------------------------------------------------------
+
+        saddw v0.8h, v1.8h, v2.8h
+        saddw v0.4s, v1.4s, v2.4s
+        saddw v0.2d, v1.2d, v2.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddw v0.8h, v1.8h, v2.8h
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddw v0.4s, v1.4s, v2.4s
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddw v0.2d, v1.2d, v2.2d
+// CHECK-ERROR:                               ^
+
+        saddw2 v0.8h, v1.8h, v2.16h
+        saddw2 v0.4s, v1.4s, v2.8s
+        saddw2 v0.2d, v1.2d, v2.4d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddw2 v0.8h, v1.8h, v2.16h
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddw2 v0.4s, v1.4s, v2.8s
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddw2 v0.2d, v1.2d, v2.4d
+// CHECK-ERROR:                             ^
+
+        uaddw v0.8h, v1.8h, v2.8h
+        uaddw v0.4s, v1.4s, v2.4s
+        uaddw v0.2d, v1.2d, v2.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddw v0.8h, v1.8h, v2.8h
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddw v0.4s, v1.4s, v2.4s
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddw v0.2d, v1.2d, v2.2d
+// CHECK-ERROR:                               ^
+
+        uaddw2 v0.8h, v1.8h, v2.16h
+        uaddw2 v0.4s, v1.4s, v2.8s
+        uaddw2 v0.2d, v1.2d, v2.4d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddw2 v0.8h, v1.8h, v2.16h
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddw2 v0.4s, v1.4s, v2.8s
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddw2 v0.2d, v1.2d, v2.4d
+// CHECK-ERROR:                             ^
+
+        ssubw v0.8h, v1.8h, v2.8h
+        ssubw v0.4s, v1.4s, v2.4s
+        ssubw v0.2d, v1.2d, v2.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubw v0.8h, v1.8h, v2.8h
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubw v0.4s, v1.4s, v2.4s
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubw v0.2d, v1.2d, v2.2d
+// CHECK-ERROR:                               ^
+
+        ssubw2 v0.8h, v1.8h, v2.16h
+        ssubw2 v0.4s, v1.4s, v2.8s
+        ssubw2 v0.2d, v1.2d, v2.4d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubw2 v0.8h, v1.8h, v2.16h
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubw2 v0.4s, v1.4s, v2.8s
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubw2 v0.2d, v1.2d, v2.4d
+// CHECK-ERROR:                             ^
+
+        usubw v0.8h, v1.8h, v2.8h
+        usubw v0.4s, v1.4s, v2.4s
+        usubw v0.2d, v1.2d, v2.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubw v0.8h, v1.8h, v2.8h
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubw v0.4s, v1.4s, v2.4s
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubw v0.2d, v1.2d, v2.2d
+// CHECK-ERROR:                               ^
+
+        usubw2 v0.8h, v1.8h, v2.16h
+        usubw2 v0.4s, v1.4s, v2.8s
+        usubw2 v0.2d, v1.2d, v2.4d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubw2 v0.8h, v1.8h, v2.16h
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubw2 v0.4s, v1.4s, v2.8s
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubw2 v0.2d, v1.2d, v2.4d
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Narrow
+//------------------------------------------------------------------------------
+
+        addhn v0.8b, v1.8h, v2.8d
+        addhn v0.4h, v1.4s, v2.4h
+        addhn v0.2s, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        addhn v0.8b, v1.8h, v2.8d
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        addhn v0.4h, v1.4s, v2.4h
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        addhn v0.2s, v1.2d, v2.2s
+// CHECK-ERROR:                               ^
+
+        addhn2 v0.16b, v1.8h, v2.8b
+        addhn2 v0.8h, v1.4s, v2.4h
+        addhn2 v0.4s, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        addhn2 v0.16b, v1.8h, v2.8b
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        addhn2 v0.8h, v1.4s, v2.4h
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        addhn2 v0.4s, v1.2d, v2.2s
+// CHECK-ERROR:                                ^
+
+        raddhn v0.8b, v1.8h, v2.8b
+        raddhn v0.4h, v1.4s, v2.4h
+        raddhn v0.2s, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        raddhn v0.8b, v1.8h, v2.8b
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        raddhn v0.4h, v1.4s, v2.4h
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        raddhn v0.2s, v1.2d, v2.2s
+// CHECK-ERROR:                                ^
+
+        raddhn2 v0.16b, v1.8h, v2.8b
+        raddhn2 v0.8h, v1.4s, v2.4h
+        raddhn2 v0.4s, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        raddhn2 v0.16b, v1.8h, v2.8b
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        raddhn2 v0.8h, v1.4s, v2.4h
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        raddhn2 v0.4s, v1.2d, v2.2s
+// CHECK-ERROR:                                 ^
+
+        rsubhn v0.8b, v1.8h, v2.8b
+        rsubhn v0.4h, v1.4s, v2.4h
+        rsubhn v0.2s, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        rsubhn v0.8b, v1.8h, v2.8b
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        rsubhn v0.4h, v1.4s, v2.4h
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        rsubhn v0.2s, v1.2d, v2.2s
+// CHECK-ERROR:                                ^
+
+        rsubhn2 v0.16b, v1.8h, v2.8b
+        rsubhn2 v0.8h, v1.4s, v2.4h
+        rsubhn2 v0.4s, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        rsubhn2 v0.16b, v1.8h, v2.8b
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        rsubhn2 v0.8h, v1.4s, v2.4h
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        rsubhn2 v0.4s, v1.2d, v2.2s
+// CHECK-ERROR:                                 ^
+
+//----------------------------------------------------------------------
+// Scalar Reduce Add Pairwise (Integer)
+//----------------------------------------------------------------------
+         // invalid vector types
+      addp s0, d1.2d
+      addp d0, d1.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          addp s0, d1.2d
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          addp d0, d1.2s
+// CHECK-ERROR:                      ^
+
+//----------------------------------------------------------------------
+// Scalar Reduce Add Pairwise (Floating Point)
+//----------------------------------------------------------------------
+         // invalid vector types
+      faddp s0, d1.2d
+      faddp d0, d1.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          faddp s0, d1.2d
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          faddp d0, d1.2s
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Reduce Maximum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+         // mismatched and invalid vector types
+      fmaxp s0, v1.2d
+      fmaxp d31, v2.2s
+      fmaxp h3, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmaxp s0, v1.2d
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmaxp d31, v2.2s
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmaxp h3, v2.2s
+// CHECK-ERROR:                ^
+
+
+//----------------------------------------------------------------------
+// Scalar Reduce Minimum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+         // mismatched and invalid vector types
+      fminp s0, v1.4h
+      fminp d31, v2.8h
+      fminp b3, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fminp s0, v1.4h
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fminp d31, v2.8h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fminp b3, v2.2s
+// CHECK-ERROR:                ^
+
+
+//----------------------------------------------------------------------
+// Scalar Reduce maxNum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+         // mismatched and invalid vector types
+      fmaxnmp s0, v1.8b
+      fmaxnmp d31, v2.16b
+      fmaxnmp v1.2s, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmaxnmp s0, v1.8b
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmaxnmp d31, v2.16b
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: too few operands for instruction
+// CHECK-ERROR:          fmaxnmp v1.2s, v2.2s
+// CHECK-ERROR:          ^
+
+//----------------------------------------------------------------------
+// Scalar Reduce minNum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+         // mismatched and invalid vector types
+      fminnmp s0, v1.2d
+      fminnmp d31, v2.4s
+      fminnmp v1.4s, v2.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fminnmp s0, v1.2d
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fminnmp d31, v2.4s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fminnmp v1.4s, v2.2d
+// CHECK-ERROR:          ^
+
+      mla v0.2d, v1.2d, v16.d[1]
+      mla v0.2s, v1.2s, v2.s[4]
+      mla v0.4s, v1.4s, v2.s[4]
+      mla v0.2h, v1.2h, v2.h[1]
+      mla v0.4h, v1.4h, v2.h[8]
+      mla v0.8h, v1.8h, v2.h[8]
+      mla v0.4h, v1.4h, v16.h[2]
+      mla v0.8h, v1.8h, v16.h[2]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        mla v0.2d, v1.2d, v16.d[1]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mla v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mla v0.4s, v1.4s, v2.s[4]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        mla v0.2h, v1.2h, v2.h[1]
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mla v0.4h, v1.4h, v2.h[8]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mla v0.8h, v1.8h, v2.h[8]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        mla v0.4h, v1.4h, v16.h[2]
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        mla v0.8h, v1.8h, v16.h[2]
+// CHECK-ERROR:                              ^
+
+      mls v0.2d, v1.2d, v16.d[1]
+      mls v0.2s, v1.2s, v2.s[4]
+      mls v0.4s, v1.4s, v2.s[4]
+      mls v0.2h, v1.2h, v2.h[1]
+      mls v0.4h, v1.4h, v2.h[8]
+      mls v0.8h, v1.8h, v2.h[8]
+      mls v0.4h, v1.4h, v16.h[2]
+      mls v0.8h, v1.8h, v16.h[2]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        mls v0.2d, v1.2d, v16.d[1]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mls v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mls v0.4s, v1.4s, v2.s[4]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        mls v0.2h, v1.2h, v2.h[1]
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mls v0.4h, v1.4h, v2.h[8]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mls v0.8h, v1.8h, v2.h[8]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        mls v0.4h, v1.4h, v16.h[2]
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        mls v0.8h, v1.8h, v16.h[2]
+// CHECK-ERROR:                              ^
+
+      fmla v0.4h, v1.4h, v2.h[2]
+      fmla v0.8h, v1.8h, v2.h[2]
+      fmla v0.2s, v1.2s, v2.s[4]
+      fmla v0.2s, v1.2s, v22.s[4]
+      fmla v3.4s, v8.4s, v2.s[4]
+      fmla v3.4s, v8.4s, v22.s[4]
+      fmla v0.2d, v1.2d, v2.d[2]
+      fmla v0.2d, v1.2d, v22.d[2]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmla v0.4h, v1.4h, v2.h[2]
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmla v0.8h, v1.8h, v2.h[2]
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmla v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmla v0.2s, v1.2s, v22.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmla v3.4s, v8.4s, v2.s[4]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmla v3.4s, v8.4s, v22.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmla v0.2d, v1.2d, v2.d[2]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmla v0.2d, v1.2d, v22.d[2]
+// CHECK-ERROR:                                 ^
+
+      fmls v0.4h, v1.4h, v2.h[2]
+      fmls v0.8h, v1.8h, v2.h[2]
+      fmls v0.2s, v1.2s, v2.s[4]
+      fmls v0.2s, v1.2s, v22.s[4]
+      fmls v3.4s, v8.4s, v2.s[4]
+      fmls v3.4s, v8.4s, v22.s[4]
+      fmls v0.2d, v1.2d, v2.d[2]
+      fmls v0.2d, v1.2d, v22.d[2]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmls v0.4h, v1.4h, v2.h[2]
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmls v0.8h, v1.8h, v2.h[2]
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmls v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmls v0.2s, v1.2s, v22.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmls v3.4s, v8.4s, v2.s[4]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmls v3.4s, v8.4s, v22.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmls v0.2d, v1.2d, v2.d[2]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmls v0.2d, v1.2d, v22.d[2]
+// CHECK-ERROR:                                 ^
+
+      smlal v0.4h, v1.4h, v2.h[2]
+      smlal v0.4s, v1.4h, v2.h[8]
+      smlal v0.4s, v1.4h, v16.h[2]
+      smlal v0.2s, v1.2s, v2.s[4]
+      smlal v0.2d, v1.2s, v2.s[4]
+      smlal v0.2d, v1.2s, v22.s[4]
+      smlal2 v0.4h, v1.8h, v1.h[2]
+      smlal2 v0.4s, v1.8h, v1.h[8]
+      smlal2 v0.4s, v1.8h, v16.h[2]
+      smlal2 v0.2s, v1.4s, v1.s[2]
+      smlal2 v0.2d, v1.4s, v1.s[4]
+      smlal2 v0.2d, v1.4s, v22.s[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlal v0.4h, v1.4h, v2.h[2]
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlal v0.4s, v1.4h, v2.h[8]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlal v0.4s, v1.4h, v16.h[2]
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlal v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlal v0.2d, v1.2s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlal v0.2d, v1.2s, v22.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlal2 v0.4h, v1.8h, v1.h[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlal2 v0.4s, v1.8h, v1.h[8]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlal2 v0.4s, v1.8h, v16.h[2]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlal2 v0.2s, v1.4s, v1.s[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlal2 v0.2d, v1.4s, v1.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlal2 v0.2d, v1.4s, v22.s[4]
+// CHECK-ERROR:                                   ^
+
+      smlsl v0.4h, v1.4h, v2.h[2]
+      smlsl v0.4s, v1.4h, v2.h[8]
+      smlsl v0.4s, v1.4h, v16.h[2]
+      smlsl v0.2s, v1.2s, v2.s[4]
+      smlsl v0.2d, v1.2s, v2.s[4]
+      smlsl v0.2d, v1.2s, v22.s[4]
+      smlsl2 v0.4h, v1.8h, v1.h[2]
+      smlsl2 v0.4s, v1.8h, v1.h[8]
+      smlsl2 v0.4s, v1.8h, v16.h[2]
+      smlsl2 v0.2s, v1.4s, v1.s[2]
+      smlsl2 v0.2d, v1.4s, v1.s[4]
+      smlsl2 v0.2d, v1.4s, v22.s[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlsl v0.4h, v1.4h, v2.h[2]
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlsl v0.4s, v1.4h, v2.h[8]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlsl v0.4s, v1.4h, v16.h[2]
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlsl v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlsl v0.2d, v1.2s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlsl v0.2d, v1.2s, v22.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlsl2 v0.4h, v1.8h, v1.h[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlsl2 v0.4s, v1.8h, v1.h[8]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlsl2 v0.4s, v1.8h, v16.h[2]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlsl2 v0.2s, v1.4s, v1.s[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlsl2 v0.2d, v1.4s, v1.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlsl2 v0.2d, v1.4s, v22.s[4]
+// CHECK-ERROR:                                   ^
+
+      umlal v0.4h, v1.4h, v2.h[2]
+      umlal v0.4s, v1.4h, v2.h[8]
+      umlal v0.4s, v1.4h, v16.h[2]
+      umlal v0.2s, v1.2s, v2.s[4]
+      umlal v0.2d, v1.2s, v2.s[4]
+      umlal v0.2d, v1.2s, v22.s[4]
+      umlal2 v0.4h, v1.8h, v1.h[2]
+      umlal2 v0.4s, v1.8h, v1.h[8]
+      umlal2 v0.4s, v1.8h, v16.h[2]
+      umlal2 v0.2s, v1.4s, v1.s[2]
+      umlal2 v0.2d, v1.4s, v1.s[4]
+      umlal2 v0.2d, v1.4s, v22.s[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlal v0.4h, v1.4h, v2.h[2]
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlal v0.4s, v1.4h, v2.h[8]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlal v0.4s, v1.4h, v16.h[2]
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlal v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlal v0.2d, v1.2s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlal v0.2d, v1.2s, v22.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlal2 v0.4h, v1.8h, v1.h[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlal2 v0.4s, v1.8h, v1.h[8]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlal2 v0.4s, v1.8h, v16.h[2]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlal2 v0.2s, v1.4s, v1.s[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlal2 v0.2d, v1.4s, v1.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlal2 v0.2d, v1.4s, v22.s[4]
+// CHECK-ERROR:                                   ^
+
+      umlsl v0.4h, v1.4h, v2.h[2]
+      umlsl v0.4s, v1.4h, v2.h[8]
+      umlsl v0.4s, v1.4h, v16.h[2]
+      umlsl v0.2s, v1.2s, v2.s[4]
+      umlsl v0.2d, v1.2s, v2.s[4]
+      umlsl v0.2d, v1.2s, v22.s[4]
+      umlsl2 v0.4h, v1.8h, v1.h[2]
+      umlsl2 v0.4s, v1.8h, v1.h[8]
+      umlsl2 v0.4s, v1.8h, v16.h[2]
+      umlsl2 v0.2s, v1.4s, v1.s[2]
+      umlsl2 v0.2d, v1.4s, v1.s[4]
+      umlsl2 v0.2d, v1.4s, v22.s[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlsl v0.4h, v1.4h, v2.h[2]
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlsl v0.4s, v1.4h, v2.h[8]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlsl v0.4s, v1.4h, v16.h[2]
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlsl v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlsl v0.2d, v1.2s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlsl v0.2d, v1.2s, v22.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlsl2 v0.4h, v1.8h, v1.h[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlsl2 v0.4s, v1.8h, v1.h[8]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlsl2 v0.4s, v1.8h, v16.h[2]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlsl2 v0.2s, v1.4s, v1.s[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlsl2 v0.2d, v1.4s, v1.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlsl2 v0.2d, v1.4s, v22.s[4]
+// CHECK-ERROR:                                   ^
+
+      sqdmlal v0.4h, v1.4h, v2.h[2]
+      sqdmlal v0.4s, v1.4h, v2.h[8]
+      sqdmlal v0.4s, v1.4h, v16.h[2]
+      sqdmlal v0.2s, v1.2s, v2.s[4]
+      sqdmlal v0.2d, v1.2s, v2.s[4]
+      sqdmlal v0.2d, v1.2s, v22.s[4]
+      sqdmlal2 v0.4h, v1.8h, v1.h[2]
+      sqdmlal2 v0.4s, v1.8h, v1.h[8]
+      sqdmlal2 v0.4s, v1.8h, v16.h[2]
+      sqdmlal2 v0.2s, v1.4s, v1.s[2]
+      sqdmlal2 v0.2d, v1.4s, v1.s[4]
+      sqdmlal2 v0.2d, v1.4s, v22.s[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal v0.4h, v1.4h, v2.h[2]
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlal v0.4s, v1.4h, v2.h[8]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal v0.4s, v1.4h, v16.h[2]
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlal v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlal v0.2d, v1.2s, v2.s[4]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlal v0.2d, v1.2s, v22.s[4]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal2 v0.4h, v1.8h, v1.h[2]
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlal2 v0.4s, v1.8h, v1.h[8]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal2 v0.4s, v1.8h, v16.h[2]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal2 v0.2s, v1.4s, v1.s[2]
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlal2 v0.2d, v1.4s, v1.s[4]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlal2 v0.2d, v1.4s, v22.s[4]
+// CHECK-ERROR:                                     ^
+
+      sqdmlsl v0.4h, v1.4h, v2.h[2]
+      sqdmlsl v0.4s, v1.4h, v2.h[8]
+      sqdmlsl v0.4s, v1.4h, v16.h[2]
+      sqdmlsl v0.2s, v1.2s, v2.s[4]
+      sqdmlsl v0.2d, v1.2s, v2.s[4]
+      sqdmlsl v0.2d, v1.2s, v22.s[4]
+      sqdmlsl2 v0.4h, v1.8h, v1.h[2]
+      sqdmlsl2 v0.4s, v1.8h, v1.h[8]
+      sqdmlsl2 v0.4s, v1.8h, v16.h[2]
+      sqdmlsl2 v0.2s, v1.4s, v1.s[2]
+      sqdmlsl2 v0.2d, v1.4s, v1.s[4]
+      sqdmlsl2 v0.2d, v1.4s, v22.s[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl v0.4h, v1.4h, v2.h[2]
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlsl v0.4s, v1.4h, v2.h[8]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl v0.4s, v1.4h, v16.h[2]
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlsl v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlsl v0.2d, v1.2s, v2.s[4]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlsl v0.2d, v1.2s, v22.s[4]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl2 v0.4h, v1.8h, v1.h[2]
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlsl2 v0.4s, v1.8h, v1.h[8]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl2 v0.4s, v1.8h, v16.h[2]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl2 v0.2s, v1.4s, v1.s[2]
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlsl2 v0.2d, v1.4s, v1.s[4]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlsl2 v0.2d, v1.4s, v22.s[4]
+// CHECK-ERROR:                                     ^
+
+      mul v0.4h, v1.4h, v2.h[8]
+      mul v0.4h, v1.4h, v16.h[8]
+      mul v0.8h, v1.8h, v2.h[8]
+      mul v0.8h, v1.8h, v16.h[8]
+      mul v0.2s, v1.2s, v2.s[4]
+      mul v0.2s, v1.2s, v22.s[4]
+      mul v0.4s, v1.4s, v2.s[4]
+      mul v0.4s, v1.4s, v22.s[4]
+      mul v0.2d, v1.2d, v2.d[1]
+
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mul v0.4h, v1.4h, v2.h[8]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mul v0.4h, v1.4h, v16.h[8]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mul v0.8h, v1.8h, v2.h[8]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mul v0.8h, v1.8h, v16.h[8]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mul v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mul v0.2s, v1.2s, v22.s[4]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mul v0.4s, v1.4s, v2.s[4]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mul v0.4s, v1.4s, v22.s[4]
+// CHECK-ERROR:                                ^
+
+      fmul v0.4h, v1.4h, v2.h[4]
+      fmul v0.2s, v1.2s, v2.s[4]
+      fmul v0.2s, v1.2s, v22.s[4]
+      fmul v0.4s, v1.4s, v2.s[4]
+      fmul v0.4s, v1.4s, v22.s[4]
+      fmul v0.2d, v1.2d, v2.d[2]
+      fmul v0.2d, v1.2d, v22.d[2]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        mul v0.2d, v1.2d, v2.d[1]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmul v0.4h, v1.4h, v2.h[4]
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmul v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmul v0.2s, v1.2s, v22.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmul v0.4s, v1.4s, v2.s[4]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmul v0.4s, v1.4s, v22.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmul v0.2d, v1.2d, v2.d[2]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmul v0.2d, v1.2d, v22.d[2]
+// CHECK-ERROR:                                 ^
+
+      fmulx v0.4h, v1.4h, v2.h[4]
+      fmulx v0.2s, v1.2s, v2.s[4]
+      fmulx v0.2s, v1.2s, v22.s[4]
+      fmulx v0.4s, v1.4s, v2.s[4]
+      fmulx v0.4s, v1.4s, v22.s[4]
+      fmulx v0.2d, v1.2d, v2.d[2]
+      fmulx v0.2d, v1.2d, v22.d[2]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmulx v0.4h, v1.4h, v2.h[4]
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmulx v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmulx v0.2s, v1.2s, v22.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmulx v0.4s, v1.4s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmulx v0.4s, v1.4s, v22.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmulx v0.2d, v1.2d, v2.d[2]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmulx v0.2d, v1.2d, v22.d[2]
+// CHECK-ERROR:                                  ^
+
+      smull v0.4h, v1.4h, v2.h[2]
+      smull v0.4s, v1.4h, v2.h[8]
+      smull v0.4s, v1.4h, v16.h[4]
+      smull v0.2s, v1.2s, v2.s[2]
+      smull v0.2d, v1.2s, v2.s[4]
+      smull v0.2d, v1.2s, v22.s[4]
+      smull2 v0.4h, v1.8h, v2.h[2]
+      smull2 v0.4s, v1.8h, v2.h[8]
+      smull2 v0.4s, v1.8h, v16.h[4]
+      smull2 v0.2s, v1.4s, v2.s[2]
+      smull2 v0.2d, v1.4s, v2.s[4]
+      smull2 v0.2d, v1.4s, v22.s[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull v0.4h, v1.4h, v2.h[2]
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smull v0.4s, v1.4h, v2.h[8]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull v0.4s, v1.4h, v16.h[4]
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull v0.2s, v1.2s, v2.s[2]
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smull v0.2d, v1.2s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smull v0.2d, v1.2s, v22.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull2 v0.4h, v1.8h, v2.h[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smull2 v0.4s, v1.8h, v2.h[8]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull2 v0.4s, v1.8h, v16.h[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull2 v0.2s, v1.4s, v2.s[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smull2 v0.2d, v1.4s, v2.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smull2 v0.2d, v1.4s, v22.s[4]
+// CHECK-ERROR:                                   ^
+
+      umull v0.4h, v1.4h, v2.h[2]
+      umull v0.4s, v1.4h, v2.h[8]
+      umull v0.4s, v1.4h, v16.h[4]
+      umull v0.2s, v1.2s, v2.s[2]
+      umull v0.2d, v1.2s, v2.s[4]
+      umull v0.2d, v1.2s, v22.s[4]
+      umull2 v0.4h, v1.8h, v2.h[2]
+      umull2 v0.4s, v1.8h, v2.h[8]
+      umull2 v0.4s, v1.8h, v16.h[4]
+      umull2 v0.2s, v1.4s, v2.s[2]
+      umull2 v0.2d, v1.4s, v2.s[4]
+      umull2 v0.2d, v1.4s, v22.s[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull v0.4h, v1.4h, v2.h[2]
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umull v0.4s, v1.4h, v2.h[8]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull v0.4s, v1.4h, v16.h[4]
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull v0.2s, v1.2s, v2.s[2]
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umull v0.2d, v1.2s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umull v0.2d, v1.2s, v22.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull2 v0.4h, v1.8h, v2.h[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umull2 v0.4s, v1.8h, v2.h[8]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull2 v0.4s, v1.8h, v16.h[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull2 v0.2s, v1.4s, v2.s[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umull2 v0.2d, v1.4s, v2.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umull2 v0.2d, v1.4s, v22.s[4]
+// CHECK-ERROR:                                   ^
+
+      sqdmull v0.4h, v1.4h, v2.h[2]
+      sqdmull v0.4s, v1.4h, v2.h[8]
+      sqdmull v0.4s, v1.4h, v16.h[4]
+      sqdmull v0.2s, v1.2s, v2.s[2]
+      sqdmull v0.2d, v1.2s, v2.s[4]
+      sqdmull v0.2d, v1.2s, v22.s[4]
+      sqdmull2 v0.4h, v1.8h, v2.h[2]
+      sqdmull2 v0.4s, v1.8h, v2.h[8]
+      sqdmull2 v0.4s, v1.8h, v16.h[4]
+      sqdmull2 v0.2s, v1.4s, v2.s[2]
+      sqdmull2 v0.2d, v1.4s, v2.s[4]
+      sqdmull2 v0.2d, v1.4s, v22.s[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull v0.4h, v1.4h, v2.h[2]
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmull v0.4s, v1.4h, v2.h[8]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull v0.4s, v1.4h, v16.h[4]
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull v0.2s, v1.2s, v2.s[2]
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmull v0.2d, v1.2s, v2.s[4]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmull v0.2d, v1.2s, v22.s[4]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull2 v0.4h, v1.8h, v2.h[2]
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmull2 v0.4s, v1.8h, v2.h[8]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull2 v0.4s, v1.8h, v16.h[4]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull2 v0.2s, v1.4s, v2.s[2]
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmull2 v0.2d, v1.4s, v2.s[4]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmull2 v0.2d, v1.4s, v22.s[4]
+// CHECK-ERROR:                                     ^
+
+      sqdmulh v0.4h, v1.4h, v2.h[8]
+      sqdmulh v0.4h, v1.4h, v16.h[2]
+      sqdmulh v0.8h, v1.8h, v2.h[8]
+      sqdmulh v0.8h, v1.8h, v16.h[2]
+      sqdmulh v0.2s, v1.2s, v2.s[4]
+      sqdmulh v0.2s, v1.2s, v22.s[4]
+      sqdmulh v0.4s, v1.4s, v2.s[4]
+      sqdmulh v0.4s, v1.4s, v22.s[4]
+      sqdmulh v0.2d, v1.2d, v22.d[1]
+
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmulh v0.4h, v1.4h, v2.h[8]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmulh v0.4h, v1.4h, v16.h[2]
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmulh v0.8h, v1.8h, v2.h[8]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmulh v0.8h, v1.8h, v16.h[2]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmulh v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmulh v0.2s, v1.2s, v22.s[4]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmulh v0.4s, v1.4s, v2.s[4]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmulh v0.4s, v1.4s, v22.s[4]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmulh v0.2d, v1.2d, v22.d[1]
+// CHECK-ERROR:                   ^
+
+      sqrdmulh v0.4h, v1.4h, v2.h[8]
+      sqrdmulh v0.4h, v1.4h, v16.h[2]
+      sqrdmulh v0.8h, v1.8h, v2.h[8]
+      sqrdmulh v0.8h, v1.8h, v16.h[2]
+      sqrdmulh v0.2s, v1.2s, v2.s[4]
+      sqrdmulh v0.2s, v1.2s, v22.s[4]
+      sqrdmulh v0.4s, v1.4s, v2.s[4]
+      sqrdmulh v0.4s, v1.4s, v22.s[4]
+      sqrdmulh v0.2d, v1.2d, v22.d[1]
+
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqrdmulh v0.4h, v1.4h, v2.h[8]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqrdmulh v0.4h, v1.4h, v16.h[2]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqrdmulh v0.8h, v1.8h, v2.h[8]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqrdmulh v0.8h, v1.8h, v16.h[2]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqrdmulh v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqrdmulh v0.2s, v1.2s, v22.s[4]
+// CHECK-ERROR:                                     ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqrdmulh v0.4s, v1.4s, v2.s[4]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqrdmulh v0.4s, v1.4s, v22.s[4]
+// CHECK-ERROR:                                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqrdmulh v0.2d, v1.2d, v22.d[1]
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Across vectors
+//----------------------------------------------------------------------
+
+        saddlv b0, v1.8b
+        saddlv b0, v1.16b
+        saddlv h0, v1.4h
+        saddlv h0, v1.8h
+        saddlv s0, v1.2s
+        saddlv s0, v1.4s
+        saddlv d0, v1.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddlv b0, v1.8b
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddlv b0, v1.16b
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddlv h0, v1.4h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddlv h0, v1.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddlv s0, v1.2s
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddlv s0, v1.4s
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddlv d0, v1.2s
+// CHECK-ERROR:                   ^
+
+        uaddlv b0, v1.8b
+        uaddlv b0, v1.16b
+        uaddlv h0, v1.4h
+        uaddlv h0, v1.8h
+        uaddlv s0, v1.2s
+        uaddlv s0, v1.4s
+        uaddlv d0, v1.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddlv b0, v1.8b
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddlv b0, v1.16b
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddlv h0, v1.4h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddlv h0, v1.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddlv s0, v1.2s
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddlv s0, v1.4s
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddlv d0, v1.2s
+// CHECK-ERROR:                   ^
+
+        smaxv s0, v1.2s
+        sminv s0, v1.2s
+        umaxv s0, v1.2s
+        uminv s0, v1.2s
+        addv s0, v1.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smaxv s0, v1.2s
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sminv s0, v1.2s
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umaxv s0, v1.2s
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uminv s0, v1.2s
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        addv s0, v1.2s
+// CHECK-ERROR:                 ^
+
+        smaxv d0, v1.2d
+        sminv d0, v1.2d
+        umaxv d0, v1.2d
+        uminv d0, v1.2d
+        addv d0, v1.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smaxv d0, v1.2d
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sminv d0, v1.2d
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umaxv d0, v1.2d
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uminv d0, v1.2d
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        addv d0, v1.2d
+// CHECK-ERROR:             ^
+
+        fmaxnmv b0, v1.16b
+        fminnmv b0, v1.16b
+        fmaxv b0, v1.16b
+        fminv b0, v1.16b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmaxnmv b0, v1.16b
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fminnmv b0, v1.16b
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmaxv b0, v1.16b
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fminv b0, v1.16b
+// CHECK-ERROR:              ^
+
+        fmaxnmv h0, v1.8h
+        fminnmv h0, v1.8h
+        fmaxv h0, v1.8h
+        fminv h0, v1.8h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmaxnmv h0, v1.8h
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fminnmv h0, v1.8h
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmaxv h0, v1.8h
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fminv h0, v1.8h
+// CHECK-ERROR:              ^
+
+        fmaxnmv d0, v1.2d
+        fminnmv d0, v1.2d
+        fmaxv d0, v1.2d
+        fminv d0, v1.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmaxnmv d0, v1.2d
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fminnmv d0, v1.2d
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmaxv d0, v1.2d
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fminv d0, v1.2d
+// CHECK-ERROR:              ^
+
+//----------------------------------------------------------------------
+// Floating-point Multiply Extended
+//----------------------------------------------------------------------
+
+    fmulx s20, h22, s15
+    fmulx d23, d11, s1
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmulx s20, h22, s15
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmulx d23, d11, s1
+// CHECK-ERROR:                          ^
+
+//----------------------------------------------------------------------
+// Floating-point Reciprocal Step
+//----------------------------------------------------------------------
+
+    frecps s21, s16, h13
+    frecps d22, s30, d21
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          frecps s21, s16, h13
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          frecps d22, s30, d21
+// CHECK-ERROR:                      ^
+
+//----------------------------------------------------------------------
+// Floating-point Reciprocal Square Root Step
+//----------------------------------------------------------------------
+
+    frsqrts s21, h5, s12
+    frsqrts d8, s22, d18
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          frsqrts s21, h5, s12
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          frsqrts d8, s22, d18
+// CHECK-ERROR:                      ^
+
+//----------------------------------------------------------------------
+// Vector load/store multiple N-element structure (class SIMD lselem)
+//----------------------------------------------------------------------
+         ld1 {x3}, [x2]
+         ld1 {v4}, [x0]
+         ld1 {v32.16b}, [x0]
+         ld1 {v15.8h}, [x32]
+// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR:        ld1 {x3}, [x2]
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR:        ld1 {v4}, [x0]
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR:        ld1 {v32.16b}, [x0]
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ld1 {v15.8h}, [x32]
+// CHECK-ERROR:                       ^
+
+         ld1 {v0.16b, v2.16b}, [x0]
+         ld1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0]
+         ld1 v0.8b, v1.8b}, [x0]
+         ld1 {v0.8h-v4.8h}, [x0]
+         ld1 {v1.8h-v1.8h}, [x0]
+         ld1 {v15.8h-v17.4h}, [x15]
+         ld1 {v0.8b-v2.8b, [x0]
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        ld1 {v0.16b, v2.16b}, [x0]
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid number of vectors
+// CHECK-ERROR:        ld1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0]
+// CHECK-ERROR:                                         ^
+// CHECK-ERROR: error: '{' expected
+// CHECK-ERROR:        ld1 v0.8b, v1.8b}, [x0]
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid number of vectors
+// CHECK-ERROR:        ld1 {v0.8h-v4.8h}, [x0]
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid number of vectors
+// CHECK-ERROR:        ld1 {v1.8h-v1.8h}, [x0]
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR:        ld1 {v15.8h-v17.4h}, [x15]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: '}' expected
+// CHECK-ERROR:        ld1 {v0.8b-v2.8b, [x0]
+// CHECK-ERROR:                        ^
+
+         ld2 {v15.8h, v16.4h}, [x15]
+         ld2 {v0.8b, v2.8b}, [x0]
+         ld2 {v15.4h, v16.4h, v17.4h}, [x32]
+         ld2 {v15.8h-v16.4h}, [x15]
+         ld2 {v0.2d-v2.2d}, [x0]
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        ld2 {v15.8h, v16.4h}, [x15]
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        ld2 {v0.8b, v2.8b}, [x0]
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ld2 {v15.4h, v16.4h, v17.4h}, [x32]
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR:        ld2 {v15.8h-v16.4h}, [x15]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ld2 {v0.2d-v2.2d}, [x0]
+// CHECK-ERROR:            ^
+
+         ld3 {v15.8h, v16.8h, v17.4h}, [x15]
+         ld3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0]
+         ld3 {v0.8b, v2.8b, v3.8b}, [x0]
+         ld3 {v15.8h-v17.4h}, [x15]
+         ld3 {v31.4s-v2.4s}, [sp]
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        ld3 {v15.8h, v16.8h, v17.4h}, [x15]
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR:        ld3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0]
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        ld3 {v0.8b, v2.8b, v3.8b}, [x0]
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR:        ld3 {v15.8h-v17.4h}, [x15]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ld3 {v31.4s-v2.4s}, [sp]
+// CHECK-ERROR:            ^
+
+         ld4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15]
+         ld4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0]
+         ld4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31]
+         ld4 {v15.8h-v18.4h}, [x15]
+         ld4 {v31.2s-v1.2s}, [x31]
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        ld4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15]
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        ld4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0]
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid number of vectors
+// CHECK-ERROR:        ld4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31]
+// CHECK-ERROR:                                             ^
+// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR:        ld4 {v15.8h-v18.4h}, [x15]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ld4 {v31.2s-v1.2s}, [x31]
+// CHECK-ERROR:            ^
+
+         st1 {x3}, [x2]
+         st1 {v4}, [x0]
+         st1 {v32.16b}, [x0]
+         st1 {v15.8h}, [x32]
+// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR:        st1 {x3}, [x2]
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR:        st1 {v4}, [x0]
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR:        st1 {v32.16b}, [x0]
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        st1 {v15.8h}, [x32]
+// CHECK-ERROR:                       ^
+
+         st1 {v0.16b, v2.16b}, [x0]
+         st1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0]
+         st1 v0.8b, v1.8b}, [x0]
+         st1 {v0.8h-v4.8h}, [x0]
+         st1 {v1.8h-v1.8h}, [x0]
+         st1 {v15.8h-v17.4h}, [x15]
+         st1 {v0.8b-v2.8b, [x0]
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        st1 {v0.16b, v2.16b}, [x0]
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid number of vectors
+// CHECK-ERROR:        st1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0]
+// CHECK-ERROR:                                         ^
+// CHECK-ERROR: error: '{' expected
+// CHECK-ERROR:        st1 v0.8b, v1.8b}, [x0]
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid number of vectors
+// CHECK-ERROR:        st1 {v0.8h-v4.8h}, [x0]
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid number of vectors
+// CHECK-ERROR:        st1 {v1.8h-v1.8h}, [x0]
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR:        st1 {v15.8h-v17.4h}, [x15]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: '}' expected
+// CHECK-ERROR:        st1 {v0.8b-v2.8b, [x0]
+// CHECK-ERROR:                        ^
+
+         st2 {v15.8h, v16.4h}, [x15]
+         st2 {v0.8b, v2.8b}, [x0]
+         st2 {v15.4h, v16.4h, v17.4h}, [x30]
+         st2 {v15.8h-v16.4h}, [x15]
+         st2 {v0.2d-v2.2d}, [x0]
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        st2 {v15.8h, v16.4h}, [x15]
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        st2 {v0.8b, v2.8b}, [x0]
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        st2 {v15.4h, v16.4h, v17.4h}, [x30]
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR:        st2 {v15.8h-v16.4h}, [x15]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        st2 {v0.2d-v2.2d}, [x0]
+// CHECK-ERROR:            ^
+
+         st3 {v15.8h, v16.8h, v17.4h}, [x15]
+         st3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0]
+         st3 {v0.8b, v2.8b, v3.8b}, [x0]
+         st3 {v15.8h-v17.4h}, [x15]
+         st3 {v31.4s-v2.4s}, [sp]
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        st3 {v15.8h, v16.8h, v17.4h}, [x15]
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR:        st3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0]
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        st3 {v0.8b, v2.8b, v3.8b}, [x0]
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR:        st3 {v15.8h-v17.4h}, [x15]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        st3 {v31.4s-v2.4s}, [sp]
+// CHECK-ERROR:            ^
+
+         st4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15]
+         st4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0]
+         st4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31]
+         st4 {v15.8h-v18.4h}, [x15]
+         st4 {v31.2s-v1.2s}, [x31]
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        st4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15]
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        st4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0]
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid number of vectors
+// CHECK-ERROR:        st4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31]
+// CHECK-ERROR:                                             ^
+// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR:        st4 {v15.8h-v18.4h}, [x15]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        st4 {v31.2s-v1.2s}, [x31]
+// CHECK-ERROR:            ^
+
+//----------------------------------------------------------------------
+// Vector post-index load/store multiple N-element structure
+// (class SIMD lselem-post)
+//----------------------------------------------------------------------
+         ld1 {v0.16b}, [x0], #8
+         ld1 {v0.8h, v1.16h}, [x0], x1
+         ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], #24
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          ld1 {v0.16b}, [x0], #8
+// CHECK-ERROR:                              ^
+// CHECK-ERROR:  error: expected vector type register
+// CHECK-ERROR:          ld1 {v0.8h, v1.16h}, [x0], x1
+// CHECK-ERROR:                      ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:          ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], #24
+// CHECK-ERROR:                                                  ^
+
+         ld2 {v0.16b, v1.16b}, [x0], #16
+         ld3 {v5.2s, v6.2s, v7.2s}, [x1], #48
+         ld4 {v31.2d, v0.2d, v1.2d, v2.1d}, [x3], x1
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:          ld2 {v0.16b, v1.16b}, [x0], #16
+// CHECK-ERROR:                                      ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:          ld3 {v5.2s, v6.2s, v7.2s}, [x1], #48
+// CHECK-ERROR:                                           ^
+// CHECK-ERROR:  error: invalid space between two vectors
+// CHECK-ERROR:          ld4 {v31.2d, v0.2d, v1.2d, v2.1d}, [x3], x1
+// CHECK-ERROR:                                     ^
+
+         st1 {v0.16b}, [x0], #8
+         st1 {v0.8h, v1.16h}, [x0], x1
+         st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], #24
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:          st1 {v0.16b}, [x0], #8
+// CHECK-ERROR:                              ^
+// CHECK-ERROR:  error: expected vector type register
+// CHECK-ERROR:          st1 {v0.8h, v1.16h}, [x0], x1
+// CHECK-ERROR:                      ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:          st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], #24
+                                                 ^
+
+         st2 {v0.16b, v1.16b}, [x0], #16
+         st3 {v5.2s, v6.2s, v7.2s}, [x1], #48
+         st4 {v31.2d, v0.2d, v1.2d, v2.1d}, [x3], x1
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:          st2 {v0.16b, v1.16b}, [x0], #16
+// CHECK-ERROR:                                      ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:          st3 {v5.2s, v6.2s, v7.2s}, [x1], #48
+// CHECK-ERROR:                                           ^
+// CHECK-ERROR:  error: invalid space between two vectors
+// CHECK-ERROR:          st4 {v31.2d, v0.2d, v1.2d, v2.1d}, [x3], x1
+// CHECK-ERROR:                                     ^
+
+//------------------------------------------------------------------------------
+// Load single N-element structure to all lanes of N consecutive
+// registers (N = 1,2,3,4)
+//------------------------------------------------------------------------------
+         ld1r {x1}, [x0]
+         ld2r {v31.4s, v0.2s}, [sp]
+         ld3r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
+         ld4r {v31.2s, v0.2s, v1.2d, v2.2s}, [sp]
+// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR: ld1r {x1}, [x0]
+// CHECK-ERROR:       ^
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR: ld2r {v31.4s, v0.2s}, [sp]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ld3r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
+// CHECK-ERROR:      ^
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR: ld4r {v31.2s, v0.2s, v1.2d, v2.2s}, [sp]
+// CHECK-ERROR:                      ^
+
+//------------------------------------------------------------------------------
+// Load/Store single N-element structure to/from one lane of N consecutive
+// registers (N = 1, 2,3,4)
+//------------------------------------------------------------------------------
+         ld1 {v0.b}[16], [x0]
+         ld2 {v15.h, v16.h}[8], [x15]
+         ld3 {v31.s, v0.s, v1.s}[-1], [sp]
+         ld4 {v0.d, v1.d, v2.d, v3.d}[2], [x0]
+// CHECK-ERROR:: error: lane number incompatible with layout
+// CHECK-ERROR: ld1 {v0.b}[16], [x0]
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: ld2 {v15.h, v16.h}[8], [x15]
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: expected lane number
+// CHECK-ERROR: ld3 {v31.s, v0.s, v1.s}[-1], [sp]
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: ld4 {v0.d, v1.d, v2.d, v3.d}[2], [x0]
+// CHECK-ERROR:                              ^
+
+         st1 {v0.d}[16], [x0]
+         st2 {v31.s, v0.s}[3], [8]
+         st3 {v15.h, v16.h, v17.h}[-1], [x15]
+         st4 {v0.d, v1.d, v2.d, v3.d}[2], [x0]
+// CHECK-ERROR:: error: lane number incompatible with layout
+// CHECK-ERROR: st1 {v0.d}[16], [x0]
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: st2 {v31.s, v0.s}[3], [8]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected lane number
+// CHECK-ERROR: st3 {v15.h, v16.h, v17.h}[-1], [x15]
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: lane number incompatible with layout
+// CHECK-ERROR: st4 {v0.d, v1.d, v2.d, v3.d}[2], [x0]
+// CHECK-ERROR:                              ^
+
+//------------------------------------------------------------------------------
+// Post-index of load single N-element structure to all lanes of N consecutive
+// registers (N = 1,2,3,4)
+//------------------------------------------------------------------------------
+         ld1r {v15.8h}, [x15], #5
+         ld2r {v0.2d, v1.2d}, [x0], #7
+         ld3r {v15.4h, v16.4h, v17.4h}, [x15], #1
+         ld4r {v31.1d, v0.1d, v1.1d, v2.1d}, [sp], sp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ld1r {v15.8h}, [x15], #5
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ld2r {v0.2d, v1.2d}, [x0], #7
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ld3r {v15.4h, v16.4h, v17.4h}, [x15], #1
+// CHECK-ERROR:                                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ld4r {v31.1d, v0.1d, v1.1d, v2.1d}, [sp], sp
+// CHECK-ERROR:                                           ^
+
+//------------------------------------------------------------------------------
+// Post-index of Load/Store single N-element structure to/from one lane of N
+// consecutive registers (N = 1, 2,3,4)
+//------------------------------------------------------------------------------
+         ld1 {v0.b}[0], [x0], #2
+         ld2 {v15.h, v16.h}[0], [x15], #3
+         ld3 {v31.s, v0.s, v1.d}[0], [sp], x9
+         ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #24
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ld1 {v0.b}[0], [x0], #2
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ld2 {v15.h, v16.h}[0], [x15], #3
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR: ld3 {v31.s, v0.s, v1.d}[0], [sp], x9
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #24
+// CHECK-ERROR:                                        ^
+
+         st1 {v0.d}[0], [x0], #7
+         st2 {v31.s, v0.s}[0], [sp], #6
+         st3 {v15.h, v16.h, v17.h}[0], [x15], #8
+         st4 {v0.b, v1.b, v2.b, v3.b}[1], [x0], #1
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: st1 {v0.d}[0], [x0], #7
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: st2 {v31.s, v0.s}[0], [sp], #6
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: st3 {v15.h, v16.h, v17.h}[0], [x15], #8
+// CHECK-ERROR:                                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: st4 {v0.b, v1.b, v2.b, v3.b}[1], [x0], #1
+// CHECK-ERROR:                                        ^
+
+
+         ins v2.b[16], w1
+         ins v7.h[8], w14
+         ins v20.s[5], w30
+         ins v1.d[2], x7
+         ins v2.b[3], b1
+         ins v7.h[2], h14
+         ins v20.s[1], s30
+         ins v1.d[0], d7
+
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:         ins v2.b[16], w1
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:         ins v7.h[8], w14
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:         ins v20.s[5], w30
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:         ins v1.d[2], x7
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ins v2.b[3], b1
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ins v7.h[2], h14
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ins v20.s[1], s30
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ins v1.d[0], d7
+// CHECK-ERROR:                      ^
+
+         smov w1, v0.b[16]
+         smov w14, v6.h[8]
+         smov x1, v0.b[16]
+         smov x14, v6.h[8]
+         smov x20, v9.s[5]
+         smov w1, v0.d[0]
+         smov w14, v6.d[1]
+         smov x1, v0.d[0]
+         smov x14, v6.d[1]
+         smov x20, v9.d[0]
+
+// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR         smov w1, v0.b[16]
+// CHECK-ERROR                       ^
+// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR         smov w14, v6.h[8]
+// CHECK-ERROR                        ^
+// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR         smov x1, v0.b[16]
+// CHECK-ERROR                       ^
+// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR         smov x14, v6.h[8]
+// CHECK-ERROR                        ^
+// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR         smov x20, v9.s[5]
+// CHECK-ERROR                        ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         smov w1, v0.d[0]
+// CHECK-ERROR                     ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         smov w14, v6.d[1]
+// CHECK-ERROR                      ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         smov x1, v0.d[0]
+// CHECK-ERROR                     ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         smov x14, v6.d[1]
+// CHECK-ERROR                      ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         smov x20, v9.d[0]
+// CHECK-ERROR                      ^
+
+         umov w1, v0.b[16]
+         umov w14, v6.h[8]
+         umov w20, v9.s[5]
+         umov x7, v18.d[3]
+         umov w1, v0.d[0]
+         umov s20, v9.s[2]
+         umov d7, v18.d[1]
+
+// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR         umov w1, v0.b[16]
+// CHECK-ERROR                       ^
+// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR         umov w14, v6.h[8]
+// CHECK-ERROR                        ^
+// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR         umov w20, v9.s[5]
+// CHECK-ERROR                        ^
+// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR         umov x7, v18.d[3]
+// CHECK-ERROR                        ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         umov w1, v0.d[0]
+// CHECK-ERROR                     ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         umov s20, v9.s[2]
+// CHECK-ERROR              ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         umov d7, v18.d[1]
+// CHECK-ERROR              ^
+
+         Ins v1.h[2], v3.b[6]
+         Ins v6.h[7], v7.s[2]
+         Ins v15.d[0], v22.s[2]
+         Ins v0.d[0], v4.b[1]
+
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         Ins v1.h[2], v3.b[6]
+// CHECK-ERROR                         ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         Ins v6.h[7], v7.s[2]
+// CHECK-ERROR                         ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         Ins v15.d[0], v22.s[2]
+// CHECK-ERROR                           ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         Ins v0.d[0], v4.b[1]
+// CHECK-ERROR                         ^
+
+         dup v1.8h, v2.b[2]
+         dup v11.4s, v7.h[7]
+         dup v17.2d, v20.s[0]
+         dup v1.16b, v2.h[2]
+         dup v11.8h, v7.s[3]
+         dup v17.4s, v20.d[0]
+         dup v5.2d, v1.b[1]
+
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         dup v1.8h, v2.b[2]
+// CHECK-ERROR                       ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         dup v11.4s, v7.h[7]
+// CHECK-ERROR                        ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         dup v17.2d, v20.s[0]
+// CHECK-ERROR                         ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         dup v1.16b, v2.h[2]
+// CHECK-ERROR                        ^
+// CHECK-ERROR invalid operand for instruction
+// CHECK-ERROR         dup v11.8h, v7.s[3]
+// CHECK-ERROR                        ^
+// CHECK-ERROR invalid operand for instruction
+// CHECK-ERROR         dup v17.4s, v20.d[0]
+// CHECK-ERROR                         ^
+// CHECK-ERROR invalid operand for instruction
+// CHECK-ERROR         dup v5.2d, v1.b[1]
+// CHECK-ERROR                       ^
+
+         dup v1.8b, b1
+         dup v11.4h, h14
+         dup v17.2s, s30
+         dup v1.16b, d2
+         dup v11.8s, w16
+         dup v17.4d, w28
+         dup v5.2d, w0
+
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         dup v1.8b, b1
+// CHECK-ERROR                    ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         dup v11.4h, h14
+// CHECK-ERROR                     ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         dup v17.2s, s30
+// CHECK-ERROR                     ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         dup v1.16b, d2
+// CHECK-ERROR                     ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         dup v11.8s, w16
+// CHECK-ERROR             ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         dup v17.4d, w28
+// CHECK-ERROR             ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         dup v5.2d, w0
+// CHECK-ERROR                    ^
+
+//----------------------------------------------------------------------
+// Scalar Compare Bitwise Equal
+//----------------------------------------------------------------------
+
+         cmeq b20, d21, d22
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          cmeq b20, d21, d22
+// CHECK-ERROR:               ^
+
+//----------------------------------------------------------------------
+// Scalar Compare Bitwise Equal To Zero
+//----------------------------------------------------------------------
+
+         cmeq d20, b21, #0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          cmeq d20, b21, #0
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Compare Unsigned Higher Or Same
+//----------------------------------------------------------------------
+
+         cmhs b20, d21, d22
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          cmhs b20, d21, d22
+// CHECK-ERROR:               ^
+
+        
+//----------------------------------------------------------------------
+// Scalar Compare Signed Greather Than Or Equal
+//----------------------------------------------------------------------
+
+         cmge b20, d21, d22
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          cmge b20, d21, d22
+// CHECK-ERROR:               ^
+
+//----------------------------------------------------------------------
+// Scalar Compare Signed Greather Than Or Equal To Zero
+//----------------------------------------------------------------------
+
+         cmge d20, b21, #0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          cmge d20, b21, #0
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Compare Unsigned Higher
+//----------------------------------------------------------------------
+
+         cmhi b20, d21, d22
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          cmhi b20, d21, d22
+// CHECK-ERROR:               ^
+
+//----------------------------------------------------------------------
+// Scalar Compare Signed Greater Than
+//----------------------------------------------------------------------
+
+         cmgt b20, d21, d22
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          cmgt b20, d21, d22
+// CHECK-ERROR:               ^
+
+//----------------------------------------------------------------------
+// Scalar Compare Signed Greater Than Zero
+//----------------------------------------------------------------------
+
+         cmgt d20, b21, #0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          cmgt d20, b21, #0
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Compare Signed Less Than Or Equal To Zero
+//----------------------------------------------------------------------
+
+         cmle d20, b21, #0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          cmle d20, b21, #0
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Compare Less Than Zero
+//----------------------------------------------------------------------
+
+         cmlt d20, b21, #0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          cmlt d20, b21, #0
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Compare Bitwise Test Bits
+//----------------------------------------------------------------------
+
+         cmtst b20, d21, d22
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          cmtst b20, d21, d22
+// CHECK-ERROR:                ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Equal
+//----------------------------------------------------------------------
+
+         fcmeq s10, h11, s12
+         fcmeq d20, s21, d22
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmeq s10, h11, s12
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmeq d20, s21, d22
+// CHECK-ERROR:                     ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Equal To Zero
+//----------------------------------------------------------------------
+
+         fcmeq h10, s11, #0.0
+         fcmeq d20, s21, #0.0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmeq h10, s11, #0.0
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmeq d20, s21, #0.0
+// CHECK-ERROR:                     ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Greater Than Or Equal
+//----------------------------------------------------------------------
+
+         fcmge s10, h11, s12
+         fcmge d20, s21, d22
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmge s10, h11, s12
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmge d20, s21, d22
+// CHECK-ERROR:                     ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
+//----------------------------------------------------------------------
+
+         fcmge h10, s11, #0.0
+         fcmge d20, s21, #0.0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmge h10, s11, #0.0
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmge d20, s21, #0.0
+// CHECK-ERROR:                     ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Greather Than
+//----------------------------------------------------------------------
+
+         fcmgt s10, h11, s12
+         fcmgt d20, s21, d22
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmgt s10, h11, s12
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmgt d20, s21, d22
+// CHECK-ERROR:                     ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Greather Than Zero
+//----------------------------------------------------------------------
+
+         fcmgt h10, s11, #0.0
+         fcmgt d20, s21, #0.0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmgt h10, s11, #0.0
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmgt d20, s21, #0.0
+// CHECK-ERROR:                     ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Less Than Or Equal To Zero
+//----------------------------------------------------------------------
+
+         fcmle h10, s11, #0.0
+         fcmle d20, s21, #0.0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmle h10, s11, #0.0
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmle d20, s21, #0.0
+// CHECK-ERROR:                     ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Less Than
+//----------------------------------------------------------------------
+
+         fcmlt h10, s11, #0.0
+         fcmlt d20, s21, #0.0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmlt h10, s11, #0.0
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmlt d20, s21, #0.0
+// CHECK-ERROR:                     ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
+//----------------------------------------------------------------------
+
+         facge s10, h11, s12
+         facge d20, s21, d22
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          facge s10, h11, s12
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          facge d20, s21, d22
+// CHECK-ERROR:                     ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Absolute Compare Mask Greater Than
+//----------------------------------------------------------------------
+
+         facgt s10, h11, s12
+         facgt d20, d21, s22
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          facgt s10, h11, s12
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          facgt d20, d21, s22
+// CHECK-ERROR:                          ^
+        
+//----------------------------------------------------------------------
+// Scalar Signed Saturating Accumulated of Unsigned Value
+//----------------------------------------------------------------------
+
+        suqadd b0, h1
+        suqadd h0, s1
+        suqadd s0, d1
+        suqadd d0, b0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        suqadd b0, h1
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        suqadd h0, s1
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        suqadd s0, d1
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        suqadd d0, b0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Saturating Accumulated of Signed Value
+//----------------------------------------------------------------------
+
+        usqadd b0, h1
+        usqadd h0, s1
+        usqadd s0, d1
+        usqadd d0, b1
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usqadd b0, h1
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usqadd h0, s1
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usqadd s0, d1
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usqadd d0, b1
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Absolute Value
+//----------------------------------------------------------------------
+
+    abs d29, s24
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        abs d29, s24
+// CHECK-ERROR:                 ^
+
+//----------------------------------------------------------------------
+// Scalar Negate
+//----------------------------------------------------------------------
+
+    neg d29, s24
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        neg d29, s24
+// CHECK-ERROR:                 ^
+
+//----------------------------------------------------------------------
+// Signed Saturating Doubling Multiply-Add Long
+//----------------------------------------------------------------------
+
+    sqdmlal s17, h27, s12
+    sqdmlal d19, s24, d12
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal s17, h27, s12
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: too few operands for instruction
+// CHECK-ERROR:        sqdmlal d19, s24, d12
+// CHECK-ERROR:                          ^
+
+//----------------------------------------------------------------------
+// Signed Saturating Doubling Multiply-Subtract Long
+//----------------------------------------------------------------------
+
+    sqdmlsl s14, h12, s25
+    sqdmlsl d12, s23, d13
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl s14, h12, s25
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: too few operands for instruction
+// CHECK-ERROR:        sqdmlsl d12, s23, d13
+// CHECK-ERROR:                          ^
+
+//----------------------------------------------------------------------
+// Signed Saturating Doubling Multiply Long
+//----------------------------------------------------------------------
+
+    sqdmull s12, h22, s12
+    sqdmull d15, s22, d12
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull s12, h22, s12
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: too few operands for instruction
+// CHECK-ERROR:        sqdmull d15, s22, d12
+// CHECK-ERROR:                          ^
+
+//----------------------------------------------------------------------
+// Scalar Signed Saturating Extract Unsigned Narrow
+//----------------------------------------------------------------------
+
+    sqxtun b19, b14
+    sqxtun h21, h15
+    sqxtun s20, s12
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqxtun b19, b14
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqxtun h21, h15
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqxtun s20, s12
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Signed Saturating Extract Signed Narrow
+//----------------------------------------------------------------------
+
+    sqxtn b18, b18
+    sqxtn h20, h17
+    sqxtn s19, s14
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqxtn b18, b18
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqxtn h20, h17
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqxtn s19, s14
+// CHECK-ERROR:                   ^
+
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Saturating Extract Narrow
+//----------------------------------------------------------------------
+
+    uqxtn b18, b18
+    uqxtn h20, h17
+    uqxtn s19, s14
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uqxtn b18, b18
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uqxtn h20, h17
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uqxtn s19, s14
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Signed Shift Right (Immediate)
+//----------------------------------------------------------------------
+        sshr d15, d16, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        sshr d15, d16, #99
+// CHECK-ERROR:                       ^
+
+        sshr d15, s16, #31
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sshr d15, s16, #31
+// CHECK-ERROR:                  ^
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Shift Right (Immediate)
+//----------------------------------------------------------------------
+
+        ushr d10, d17, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        ushr d10, d17, #99
+// CHECK-ERROR:                       ^
+
+//----------------------------------------------------------------------
+// Scalar Signed Rounding Shift Right (Immediate)
+//----------------------------------------------------------------------
+
+        srshr d19, d18, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        srshr d19, d18, #99
+// CHECK-ERROR:                        ^
+
+//----------------------------------------------------------------------
+// Scalar Unigned Rounding Shift Right (Immediate)
+//----------------------------------------------------------------------
+
+        urshr d20, d23, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        urshr d20, d23, #99
+// CHECK-ERROR:                        ^
+
+//----------------------------------------------------------------------
+// Scalar Signed Shift Right and Accumulate (Immediate)
+//----------------------------------------------------------------------
+
+        ssra d18, d12, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        ssra d18, d12, #99
+// CHECK-ERROR:                       ^
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Shift Right and Accumulate (Immediate)
+//----------------------------------------------------------------------
+
+        usra d20, d13, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        usra d20, d13, #99
+// CHECK-ERROR:                       ^
+
+//----------------------------------------------------------------------
+// Scalar Signed Rounding Shift Right and Accumulate (Immediate)
+//----------------------------------------------------------------------
+
+        srsra d15, d11, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        srsra d15, d11, #99
+// CHECK-ERROR:                        ^
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
+//----------------------------------------------------------------------
+
+        ursra d18, d10, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        ursra d18, d10, #99
+// CHECK-ERROR:                        ^
+
+//----------------------------------------------------------------------
+// Scalar Shift Left (Immediate)
+//----------------------------------------------------------------------
+
+        shl d7, d10, #99
+
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:        shl d7, d10, #99
+// CHECK-ERROR:                     ^
+
+        shl d7, s16, #31
+        
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        shl d7, s16, #31
+// CHECK-ERROR:                ^
+
+//----------------------------------------------------------------------
+// Signed Saturating Shift Left (Immediate)
+//----------------------------------------------------------------------
+
+        sqshl b11, b19, #99
+        sqshl h13, h18, #99
+        sqshl s14, s17, #99
+        sqshl d15, d16, #99
+
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:        sqshl b11, b19, #99
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR:        sqshl h13, h18, #99
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:        sqshl s14, s17, #99
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:        sqshl d15, d16, #99
+// CHECK-ERROR:                        ^
+
+//----------------------------------------------------------------------
+// Unsigned Saturating Shift Left (Immediate)
+//----------------------------------------------------------------------
+
+        uqshl b18, b15, #99
+        uqshl h11, h18, #99
+        uqshl s14, s19, #99
+        uqshl d15, d12, #99
+
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:        uqshl b18, b15, #99
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR:        uqshl h11, h18, #99
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:        uqshl s14, s19, #99
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:        uqshl d15, d12, #99
+// CHECK-ERROR:                        ^
+
+//----------------------------------------------------------------------
+// Signed Saturating Shift Left Unsigned (Immediate)
+//----------------------------------------------------------------------
+
+        sqshlu b15, b18, #99
+        sqshlu h19, h17, #99
+        sqshlu s16, s14, #99
+        sqshlu d11, d13, #99
+
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:        sqshlu  b15, b18, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR:        sqshlu  h19, h17, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:        sqshlu  s16, s14, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:        sqshlu  d11, d13, #99
+// CHECK-ERROR:                          ^
+
+//----------------------------------------------------------------------
+// Shift Right And Insert (Immediate)
+//----------------------------------------------------------------------
+
+        sri d10, d12, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        sri d10, d12, #99
+// CHECK-ERROR:                      ^
+
+//----------------------------------------------------------------------
+// Shift Left And Insert (Immediate)
+//----------------------------------------------------------------------
+
+        sli d10, d14, #99
+
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:        sli d10, d14, #99
+// CHECK-ERROR:                      ^
+
+//----------------------------------------------------------------------
+// Signed Saturating Shift Right Narrow (Immediate)
+//----------------------------------------------------------------------
+
+        sqshrn b10, h15, #99
+        sqshrn h17, s10, #99
+        sqshrn s18, d10, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:        sqshrn  b10, h15, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:        sqshrn  h17, s10, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:        sqshrn  s18, d10, #99
+// CHECK-ERROR:                          ^
+        
+//----------------------------------------------------------------------
+// Unsigned Saturating Shift Right Narrow (Immediate)
+//----------------------------------------------------------------------
+
+        uqshrn b12, h10, #99
+        uqshrn h10, s14, #99
+        uqshrn s10, d12, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:        uqshrn  b12, h10, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:        uqshrn  h10, s14, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:        uqshrn  s10, d12, #99
+// CHECK-ERROR:                          ^
+        
+//----------------------------------------------------------------------
+// Signed Saturating Rounded Shift Right Narrow (Immediate)
+//----------------------------------------------------------------------
+
+        sqrshrn b10, h13, #99
+        sqrshrn h15, s10, #99
+        sqrshrn s15, d12, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:        sqrshrn b10, h13, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:        sqrshrn h15, s10, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:        sqrshrn s15, d12, #99
+// CHECK-ERROR:                          ^
+        
+//----------------------------------------------------------------------
+// Unsigned Saturating Rounded Shift Right Narrow (Immediate)
+//----------------------------------------------------------------------
+
+        uqrshrn b10, h12, #99
+        uqrshrn h12, s10, #99
+        uqrshrn s10, d10, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:        uqrshrn b10, h12, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:        uqrshrn h12, s10, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:        uqrshrn s10, d10, #99
+// CHECK-ERROR:                          ^
+
+//----------------------------------------------------------------------
+// Signed Saturating Shift Right Unsigned Narrow (Immediate)
+//----------------------------------------------------------------------
+
+        sqshrun b15, h10, #99
+        sqshrun h20, s14, #99
+        sqshrun s10, d15, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:        sqshrun b15, h10, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:        sqshrun h20, s14, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:        sqshrun s10, d15, #99
+// CHECK-ERROR:                          ^
+
+//----------------------------------------------------------------------
+// Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
+//----------------------------------------------------------------------
+
+        sqrshrun b17, h10, #99
+        sqrshrun h10, s13, #99
+        sqrshrun s22, d16, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:        sqrshrun b17, h10, #99
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:        sqrshrun h10, s13, #99
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:        sqrshrun s22, d16, #99
+// CHECK-ERROR:                           ^
+
+//----------------------------------------------------------------------
+// Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
+//----------------------------------------------------------------------
+
+    scvtf s22, s13, #0
+    scvtf s22, s13, #33
+    scvtf d21, d12, #65
+    scvtf d21, s12, #31
+        
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:        scvtf s22, s13, #0
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:        scvtf s22, s13, #33
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        scvtf d21, d12, #65
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        scvtf d21, s12, #31
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
+//----------------------------------------------------------------------
+
+    ucvtf s22, s13, #34
+    ucvtf d21, d14, #65
+    ucvtf d21, s14, #64
+        
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:        ucvtf s22, s13, #34
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        ucvtf d21, d14, #65
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ucvtf d21, s14, #64
+// CHECK-ERROR:                   ^
+
+//------------------------------------------------------------------------------
+// Element reverse
+//------------------------------------------------------------------------------
+         rev64 v6.2d, v8.2d
+         rev32 v30.2s, v31.2s
+         rev32 v30.4s, v31.4s
+         rev32 v30.2d, v31.2d
+         rev16 v21.4h, v1.4h
+         rev16 v21.8h, v1.8h
+         rev16 v21.2s, v1.2s
+         rev16 v21.4s, v1.4s
+         rev16 v21.2d, v1.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev64 v6.2d, v8.2d
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev32 v30.2s, v31.2s
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev32 v30.4s, v31.4s
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev32 v30.2d, v31.2d
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev16 v21.4h, v1.4h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev16 v21.8h, v1.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev16 v21.2s, v1.2s
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev16 v21.4s, v1.4s
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev16 v21.2d, v1.2d
+// CHECK-ERROR:                   ^
+
+//------------------------------------------------------------------------------
+// Signed integer pairwise add long
+//------------------------------------------------------------------------------
+
+         saddlp v3.8h, v21.8h
+         saddlp v8.8b, v5.8b
+         saddlp v9.8h, v1.4s
+         saddlp v0.4s, v1.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         saddlp v3.8h, v21.8h
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         saddlp v8.8b, v5.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         saddlp v9.8h, v1.4s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         saddlp v0.4s, v1.2d
+// CHECK-ERROR:                          ^
+
+//------------------------------------------------------------------------------
+// Unsigned integer pairwise add long
+//------------------------------------------------------------------------------
+
+         uaddlp v3.8h, v21.8h
+         uaddlp v8.8b, v5.8b
+         uaddlp v9.8h, v1.4s
+         uaddlp v0.4s, v1.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uaddlp v3.8h, v21.8h
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uaddlp v8.8b, v5.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uaddlp v9.8h, v1.4s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uaddlp v0.4s, v1.2d
+// CHECK-ERROR:                          ^
+
+//------------------------------------------------------------------------------
+// Signed integer pairwise add and accumulate long
+//------------------------------------------------------------------------------
+
+         sadalp v3.16b, v21.16b
+         sadalp v8.4h, v5.4h
+         sadalp v9.4s, v1.4s
+         sadalp v0.4h, v1.2s
+         sadalp v12.2d, v4.8h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sadalp v3.16b, v21.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sadalp v8.4h, v5.4h
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sadalp v9.4s, v1.4s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sadalp v0.4h, v1.2s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sadalp v12.2d, v4.8h
+// CHECK-ERROR:                           ^
+
+//------------------------------------------------------------------------------
+// Unsigned integer pairwise add and accumulate long
+//------------------------------------------------------------------------------
+
+         uadalp v3.16b, v21.16b
+         uadalp v8.4h, v5.4h
+         uadalp v9.4s, v1.4s
+         uadalp v0.4h, v1.2s
+         uadalp v12.2d, v4.8h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uadalp v3.16b, v21.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uadalp v8.4h, v5.4h
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uadalp v9.4s, v1.4s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uadalp v0.4h, v1.2s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uadalp v12.2d, v4.8h
+// CHECK-ERROR:                           ^
+
+//------------------------------------------------------------------------------
+// Signed integer saturating accumulate of unsigned value
+//------------------------------------------------------------------------------
+
+         suqadd v0.16b, v31.8b
+         suqadd v1.8b, v9.8h
+         suqadd v13.4h, v21.4s
+         suqadd v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         suqadd v0.16b, v31.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         suqadd v1.8b, v9.8h
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         suqadd v13.4h, v21.4s
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         suqadd v4.2s, v0.2d
+// CHECK-ERROR:                       ^
+
+//------------------------------------------------------------------------------
+// Unsigned integer saturating accumulate of signed value
+//------------------------------------------------------------------------------
+
+         usqadd v0.16b, v31.8b
+         usqadd v2.8h, v4.4h
+         usqadd v13.4h, v21.4s
+         usqadd v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         usqadd v0.16b, v31.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         usqadd v2.8h, v4.4h
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         usqadd v13.4h, v21.4s
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         usqadd v4.2s, v0.2d
+// CHECK-ERROR:                       ^
+
+//------------------------------------------------------------------------------
+// Integer saturating absolute
+//------------------------------------------------------------------------------
+
+         sqabs v0.16b, v31.8b
+         sqabs v2.8h, v4.4h
+         sqabs v6.4s, v8.2s
+         sqabs v6.2d, v8.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqabs v0.16b, v31.8b
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqabs v2.8h, v4.4h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqabs v6.4s, v8.2s
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqabs v6.2d, v8.2s
+// CHECK-ERROR:                      ^
+
+//------------------------------------------------------------------------------
+// Signed integer saturating negate
+//------------------------------------------------------------------------------
+
+         sqneg v0.16b, v31.8b
+         sqneg v2.8h, v4.4h
+         sqneg v6.4s, v8.2s
+         sqneg v6.2d, v8.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqneg v0.16b, v31.8b
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqneg v2.8h, v4.4h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqneg v6.4s, v8.2s
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqneg v6.2d, v8.2s
+// CHECK-ERROR:                      ^
+
+//------------------------------------------------------------------------------
+// Integer absolute
+//------------------------------------------------------------------------------
+
+         abs v0.16b, v31.8b
+         abs v2.8h, v4.4h
+         abs v6.4s, v8.2s
+         abs v6.2d, v8.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         abs v0.16b, v31.8b
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         abs v2.8h, v4.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         abs v6.4s, v8.2s
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         abs v6.2d, v8.2s
+// CHECK-ERROR:                    ^
+
+//------------------------------------------------------------------------------
+// Integer count leading sign bits
+//------------------------------------------------------------------------------
+
+         cls v0.2d, v31.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cls v0.2d, v31.2d
+// CHECK-ERROR:                ^
+
+//------------------------------------------------------------------------------
+// Integer count leading zeros
+//------------------------------------------------------------------------------
+
+         clz v0.2d, v31.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         clz v0.2d, v31.2d
+// CHECK-ERROR:                ^
+
+//------------------------------------------------------------------------------
+// Population count
+//------------------------------------------------------------------------------
+
+         cnt v2.8h, v4.8h
+         cnt v6.4s, v8.4s
+         cnt v6.2d, v8.2d
+         cnt v13.4h, v21.4h
+         cnt v4.2s, v0.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cnt v2.8h, v4.8h
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cnt v6.4s, v8.4s
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cnt v6.2d, v8.2d
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cnt v13.4h, v21.4h
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cnt v4.2s, v0.2s
+// CHECK-ERROR:                ^
+
+
+//------------------------------------------------------------------------------
+// Bitwise NOT
+//------------------------------------------------------------------------------
+
+         not v2.8h, v4.8h
+         not v6.4s, v8.4s
+         not v6.2d, v8.2d
+         not v13.4h, v21.4h
+         not v4.2s, v0.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         not v2.8h, v4.8h
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         not v6.4s, v8.4s
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         not v6.2d, v8.2d
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         not v13.4h, v21.4h
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         not v4.2s, v0.2s
+// CHECK-ERROR:                ^
+
+//------------------------------------------------------------------------------
+// Bitwise reverse
+//------------------------------------------------------------------------------
+
+         rbit v2.8h, v4.8h
+         rbit v6.4s, v8.4s
+         rbit v6.2d, v8.2d
+         rbit v13.4h, v21.4h
+         rbit v4.2s, v0.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rbit v2.8h, v4.8h
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rbit v6.4s, v8.4s
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rbit v6.2d, v8.2d
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rbit v13.4h, v21.4h
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rbit v4.2s, v0.2s
+// CHECK-ERROR:                 ^
+
+//------------------------------------------------------------------------------
+// Floating-point absolute
+//------------------------------------------------------------------------------
+
+         fabs v0.16b, v31.16b
+         fabs v2.8h, v4.8h
+         fabs v1.8b, v9.8b
+         fabs v13.4h, v21.4h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fabs v0.16b, v31.16b
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fabs v2.8h, v4.8h
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fabs v1.8b, v9.8b
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fabs v13.4h, v21.4h
+// CHECK-ERROR:                  ^
+
+//------------------------------------------------------------------------------
+// Floating-point negate
+//------------------------------------------------------------------------------
+
+         fneg v0.16b, v31.16b
+         fneg v2.8h, v4.8h
+         fneg v1.8b, v9.8b
+         fneg v13.4h, v21.4h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fneg v0.16b, v31.16b
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fneg v2.8h, v4.8h
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fneg v1.8b, v9.8b
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fneg v13.4h, v21.4h
+// CHECK-ERROR:                  ^
+
+//------------------------------------------------------------------------------
+// Integer extract and narrow
+//------------------------------------------------------------------------------
+
+         xtn v0.16b, v31.8h
+         xtn v2.8h, v4.4s
+         xtn v6.4s, v8.2d
+         xtn2 v1.8b, v9.8h
+         xtn2 v13.4h, v21.4s
+         xtn2 v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         xtn v0.16b, v31.8h
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         xtn v2.8h, v4.4s
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         xtn v6.4s, v8.2d
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         xtn2 v1.8b, v9.8h
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         xtn2 v13.4h, v21.4s
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         xtn2 v4.2s, v0.2d
+// CHECK-ERROR:              ^
+
+//------------------------------------------------------------------------------
+// Signed integer saturating extract and unsigned narrow
+//------------------------------------------------------------------------------
+
+         sqxtun v0.16b, v31.8h
+         sqxtun v2.8h, v4.4s
+         sqxtun v6.4s, v8.2d
+         sqxtun2 v1.8b, v9.8h
+         sqxtun2 v13.4h, v21.4s
+         sqxtun2 v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtun v0.16b, v31.8h
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtun v2.8h, v4.4s
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtun v6.4s, v8.2d
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtun2 v1.8b, v9.8h
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtun2 v13.4h, v21.4s
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtun2 v4.2s, v0.2d
+// CHECK-ERROR:                 ^
+
+//------------------------------------------------------------------------------
+// Signed integer saturating extract and narrow
+//------------------------------------------------------------------------------
+
+         sqxtn v0.16b, v31.8h
+         sqxtn v2.8h, v4.4s
+         sqxtn v6.4s, v8.2d
+         sqxtn2 v1.8b, v9.8h
+         sqxtn2 v13.4h, v21.4s
+         sqxtn2 v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtn v0.16b, v31.8h
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtn v2.8h, v4.4s
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtn v6.4s, v8.2d
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtn2 v1.8b, v9.8h
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtn2 v13.4h, v21.4s
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtn2 v4.2s, v0.2d
+// CHECK-ERROR:                ^
+
+//------------------------------------------------------------------------------
+// Unsigned integer saturating extract and narrow
+//------------------------------------------------------------------------------
+
+         uqxtn v0.16b, v31.8h
+         uqxtn v2.8h, v4.4s
+         uqxtn v6.4s, v8.2d
+         uqxtn2 v1.8b, v9.8h
+         uqxtn2 v13.4h, v21.4s
+         uqxtn2 v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqxtn v0.16b, v31.8h
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqxtn v2.8h, v4.4s
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqxtn v6.4s, v8.2d
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqxtn2 v1.8b, v9.8h
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqxtn2 v13.4h, v21.4s
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqxtn2 v4.2s, v0.2d
+// CHECK-ERROR:                ^
+
+//------------------------------------------------------------------------------
+// Integer shift left long
+//------------------------------------------------------------------------------
+
+         shll2 v2.8h, v4.16b, #7
+         shll2 v6.4s, v8.8h, #15
+         shll2 v6.2d, v8.4s, #31
+         shll v2.8h, v4.16b, #8
+         shll v6.4s, v8.8h, #16
+         shll v6.2d, v8.4s, #32
+         shll v2.8h, v4.8b, #8
+         shll v6.4s, v8.4h, #16
+         shll v6.2d, v8.2s, #32
+         shll2 v2.8h, v4.8b, #5
+         shll2 v6.4s, v8.4h, #14
+         shll2 v6.2d, v8.2s, #1
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll2 v2.8h, v4.16b, #7
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll2 v6.4s, v8.8h, #15
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll2 v6.2d, v8.4s, #31
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll v2.8h, v4.16b, #8
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll v6.4s, v8.8h, #16
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll v6.2d, v8.4s, #32
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll2 v2.8h, v4.8b, #5
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll2 v6.4s, v8.4h, #14
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll2 v6.2d, v8.2s, #1
+// CHECK-ERROR:                      ^
+
+//------------------------------------------------------------------------------
+// Floating-point convert downsize
+//------------------------------------------------------------------------------
+
+         fcvtn v2.8h, v4.4s
+         fcvtn v6.4s, v8.2d
+         fcvtn2 v13.4h, v21.4s
+         fcvtn2 v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtn v2.8h, v4.4s
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtn v6.4s, v8.2d
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtn2 v13.4h, v21.4s
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtn2 v4.2s, v0.2d
+// CHECK-ERROR:                ^
+
+//------------------------------------------------------------------------------
+// Floating-point convert downsize with inexact
+//------------------------------------------------------------------------------
+
+         fcvtxn v6.4s, v8.2d
+         fcvtxn2 v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtxn v6.4s, v8.2d
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtxn2 v4.2s, v0.2d
+// CHECK-ERROR:                 ^
+
+//------------------------------------------------------------------------------
+// Floating-point convert upsize
+//------------------------------------------------------------------------------
+
+         fcvtl2 v9.4s, v1.4h
+         fcvtl2 v0.2d, v1.2s
+         fcvtl v12.4s, v4.8h
+         fcvtl v17.2d, v28.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtl2 v9.4s, v1.4h
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtl2 v0.2d, v1.2s
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtl v12.4s, v4.8h
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtl v17.2d, v28.4s
+// CHECK-ERROR:                       ^
+
+//------------------------------------------------------------------------------
+// Floating-point round to integral
+//------------------------------------------------------------------------------
+
+         frintn v0.16b, v31.16b
+         frintn v2.8h, v4.8h
+         frintn v1.8b, v9.8b
+         frintn v13.4h, v21.4h
+
+         frinta v0.16b, v31.16b
+         frinta v2.8h, v4.8h
+         frinta v1.8b, v9.8b
+         frinta v13.4h, v21.4h
+
+         frintp v0.16b, v31.16b
+         frintp v2.8h, v4.8h
+         frintp v1.8b, v9.8b
+         frintp v13.4h, v21.4h
+
+         frintm v0.16b, v31.16b
+         frintm v2.8h, v4.8h
+         frintm v1.8b, v9.8b
+         frintm v13.4h, v21.4h
+
+         frintx v0.16b, v31.16b
+         frintx v2.8h, v4.8h
+         frintx v1.8b, v9.8b
+         frintx v13.4h, v21.4h
+
+         frintz v0.16b, v31.16b
+         frintz v2.8h, v4.8h
+         frintz v1.8b, v9.8b
+         frintz v13.4h, v21.4h
+
+         frinti v0.16b, v31.16b
+         frinti v2.8h, v4.8h
+         frinti v1.8b, v9.8b
+         frinti v13.4h, v21.4h
+
+         fcvtns v0.16b, v31.16b
+         fcvtns v2.8h, v4.8h
+         fcvtns v1.8b, v9.8b
+         fcvtns v13.4h, v21.4h
+
+         fcvtnu v0.16b, v31.16b
+         fcvtnu v2.8h, v4.8h
+         fcvtnu v1.8b, v9.8b
+         fcvtnu v13.4h, v21.4h
+
+         fcvtps v0.16b, v31.16b
+         fcvtps v2.8h, v4.8h
+         fcvtps v1.8b, v9.8b
+         fcvtps v13.4h, v21.4h
+
+         fcvtpu v0.16b, v31.16b
+         fcvtpu v2.8h, v4.8h
+         fcvtpu v1.8b, v9.8b
+         fcvtpu v13.4h, v21.4h
+
+         fcvtms v0.16b, v31.16b
+         fcvtms v2.8h, v4.8h
+         fcvtms v1.8b, v9.8b
+         fcvtms v13.4h, v21.4h
+
+         fcvtmu v0.16b, v31.16b
+         fcvtmu v2.8h, v4.8h
+         fcvtmu v1.8b, v9.8b
+         fcvtmu v13.4h, v21.4h
+
+         fcvtzs v0.16b, v31.16b
+         fcvtzs v2.8h, v4.8h
+         fcvtzs v1.8b, v9.8b
+         fcvtzs v13.4h, v21.4h
+
+         fcvtzu v0.16b, v31.16b
+         fcvtzu v2.8h, v4.8h
+         fcvtzu v1.8b, v9.8b
+         fcvtzu v13.4h, v21.4h
+
+         fcvtas v0.16b, v31.16b
+         fcvtas v2.8h, v4.8h
+         fcvtas v1.8b, v9.8b
+         fcvtas v13.4h, v21.4h
+
+         fcvtau v0.16b, v31.16b
+         fcvtau v2.8h, v4.8h
+         fcvtau v1.8b, v9.8b
+         fcvtau v13.4h, v21.4h
+
+         urecpe v0.16b, v31.16b
+         urecpe v2.8h, v4.8h
+         urecpe v1.8b, v9.8b
+         urecpe v13.4h, v21.4h
+         urecpe v1.2d, v9.2d
+
+         ursqrte v0.16b, v31.16b
+         ursqrte v2.8h, v4.8h
+         ursqrte v1.8b, v9.8b
+         ursqrte v13.4h, v21.4h
+         ursqrte v1.2d, v9.2d
+
+         scvtf v0.16b, v31.16b
+         scvtf v2.8h, v4.8h
+         scvtf v1.8b, v9.8b
+         scvtf v13.4h, v21.4h
+
+         ucvtf v0.16b, v31.16b
+         ucvtf v2.8h, v4.8h
+         ucvtf v1.8b, v9.8b
+         ucvtf v13.4h, v21.4h
+
+         frecpe v0.16b, v31.16b
+         frecpe v2.8h, v4.8h
+         frecpe v1.8b, v9.8b
+         frecpe v13.4h, v21.4h
+
+         frsqrte v0.16b, v31.16b
+         frsqrte v2.8h, v4.8h
+         frsqrte v1.8b, v9.8b
+         frsqrte v13.4h, v21.4h
+
+         fsqrt v0.16b, v31.16b
+         fsqrt v2.8h, v4.8h
+         fsqrt v1.8b, v9.8b
+         fsqrt v13.4h, v21.4h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintn v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintn v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintn v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintn v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinta v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinta v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinta v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinta v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintp v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintp v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintp v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintp v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintm v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintm v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintm v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintm v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintx v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintx v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintx v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintx v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintz v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintz v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintz v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintz v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinti v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinti v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinti v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinti v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtns v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtns v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtns v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtns v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtnu v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtnu v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtnu v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtnu v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtps v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtps v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtps v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtps v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtpu v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtpu v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtpu v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtpu v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtms v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtms v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtms v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtms v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtmu v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtmu v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtmu v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtmu v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzs v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzs v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzs v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzs v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzu v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzu v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzu v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzu v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtas v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtas v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtas v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtas v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtau v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtau v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtau v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtau v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urecpe v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urecpe v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urecpe v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urecpe v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urecpe v1.2d, v9.2d
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursqrte v0.16b, v31.16b
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursqrte v2.8h, v4.8h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursqrte v1.8b, v9.8b
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursqrte v13.4h, v21.4h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursqrte v1.2d, v9.2d
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         scvtf v0.16b, v31.16b
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         scvtf v2.8h, v4.8h
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         scvtf v1.8b, v9.8b
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         scvtf v13.4h, v21.4h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ucvtf v0.16b, v31.16b
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ucvtf v2.8h, v4.8h
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ucvtf v1.8b, v9.8b
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ucvtf v13.4h, v21.4h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frecpe v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frecpe v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frecpe v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frecpe v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frsqrte v0.16b, v31.16b
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frsqrte v2.8h, v4.8h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frsqrte v1.8b, v9.8b
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frsqrte v13.4h, v21.4h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fsqrt v0.16b, v31.16b
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fsqrt v2.8h, v4.8h
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fsqrt v1.8b, v9.8b
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fsqrt v13.4h, v21.4h
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Fixed-point (Immediate)
+//----------------------------------------------------------------------
+
+    fcvtzs s21, s12, #0
+    fcvtzs d21, d12, #65
+    fcvtzs s21, d12, #1
+
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:        fcvtzs s21, s12, #0
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        fcvtzs d21, d12, #65
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtzs s21, d12, #1
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
+//----------------------------------------------------------------------
+
+    fcvtzu s21, s12, #33
+    fcvtzu d21, d12, #0
+    fcvtzu s21, d12, #1
+
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:        fcvtzu s21, s12, #33
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        fcvtzu d21, d12, #0
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtzu s21, d12, #1
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Saturating Extract Narrow
+//----------------------------------------------------------------------
+
+        aese v0.8h, v1.8h
+        aese v0.4s, v1.4s
+        aese v0.2d, v1.2d
+        aesd v0.8h, v1.8h
+        aesmc v0.8h, v1.8h
+        aesimc v0.8h, v1.8h
+
+// CHECK:  error: invalid operand for instruction
+// CHECK:         aese v0.8h, v1.8h
+// CHECK:                 ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         aese v0.4s, v1.4s
+// CHECK:                 ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         aese v0.2d, v1.2d
+// CHECK:                 ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         aesd v0.8h, v1.8h
+// CHECK:                 ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         aesmc v0.8h, v1.8h
+// CHECK:                  ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         aesimc v0.8h, v1.8h
+// CHECK:                   ^
+
+        sha1h b0, b1
+        sha1h h0, h1
+        sha1h d0, d1
+        sha1h q0, q1
+        sha1su1 v0.16b, v1.16b
+        sha1su1 v0.8h, v1.8h
+        sha1su1 v0.2d, v1.2d
+        sha256su0 v0.16b, v1.16b
+
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1h b0, b1
+// CHECK:               ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1h h0, h1
+// CHECK:               ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1h d0, d1
+// CHECK:               ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1h q0, q1
+// CHECK:               ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1su1 v0.16b, v1.16b
+// CHECK:                    ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1su1 v0.8h, v1.8h
+// CHECK:                    ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1su1 v0.2d, v1.2d
+// CHECK:                    ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha256su0 v0.16b, v1.16b
+// CHECK:                      ^
+
+        sha1c q0, q1, v2.4s
+        sha1p q0, q1, v2.4s
+        sha1m q0, q1, v2.4s
+        sha1su0 v0.16b, v1.16b, v2.16b
+        sha1su0 v0.8h, v1.8h, v2.8h
+        sha1su0 v0.2d, v1.2d, v2.2d
+        sha256h q0, q1, q2
+        sha256h v0.4s, v1.4s, v2.4s
+        sha256h2 q0, q1, q2
+        sha256su1 v0.16b, v1.16b, v2.16b
+
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1c q0, q1, v2.4s
+// CHECK:                   ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1p q0, q1, v2.4s
+// CHECK:                   ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1m q0, q1, v2.4s
+// CHECK:                   ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1su0 v0.16b, v1.16b, v2.16b
+// CHECK:                    ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1su0 v0.8h, v1.8h, v2.8h
+// CHECK:                    ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1su0 v0.2d, v1.2d, v2.2d
+// CHECK:                    ^
+// CHECK:  error: too few operands for instruction
+// CHECK:         sha256h q0, q1, q2
+// CHECK:         ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha256h v0.4s, v1.4s, v2.4s
+// CHECK:                    ^
+// CHECK:  error: too few operands for instruction
+// CHECK:         sha256h2 q0, q1, q2
+// CHECK:         ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha256su1 v0.16b, v1.16b, v2.16b
+// CHECK:                      ^
+
+//----------------------------------------------------------------------
+// Bitwise extract
+//----------------------------------------------------------------------
+
+        ext v0.8b, v1.8b, v2.4h, #0x3
+        ext v0.4h, v1.4h, v2.4h, #0x3
+        ext v0.2s, v1.2s, v2.2s, #0x1
+        ext v0.1d, v1.1d, v2.1d, #0x0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ext v0.8b, v1.8b, v2.4h, #0x3
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ext v0.4h, v1.4h, v2.4h, #0x3
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ext v0.2s, v1.2s, v2.2s, #0x1
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ext v0.1d, v1.1d, v2.1d, #0x0
+// CHECK-ERROR:                ^
+
+        ext v0.16b, v1.16b, v2.8h, #0x3
+        ext v0.8h, v1.8h, v2.8h, #0x3
+        ext v0.4s, v1.4s, v2.4s, #0x1
+        ext v0.2d, v1.2d, v2.2d, #0x0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ext v0.16b, v1.16b, v2.8h, #0x3
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ext v0.8h, v1.8h, v2.8h, #0x3
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ext v0.4s, v1.4s, v2.4s, #0x1
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ext v0.2d, v1.2d, v2.2d, #0x0
+// CHECK-ERROR:                ^
+
+
+//----------------------------------------------------------------------
+// Permutation with 3 vectors
+//----------------------------------------------------------------------
+
+        uzp1 v0.16b, v1.8b, v2.8b
+        uzp1 v0.8b, v1.4b, v2.4b
+        uzp1 v0.8h, v1.4h, v2.4h
+        uzp1 v0.4h, v1.2h, v2.2h
+        uzp1 v0.4s, v1.2s, v2.2s
+        uzp1 v0.2s, v1.1s, v2.1s
+        uzp1 v0.2d, v1.1d, v2.1d
+        uzp1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4289:22: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4290:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4291:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4292:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4293:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4294:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4295:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4296:17: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        uzp2 v0.16b, v1.8b, v2.8b
+        uzp2 v0.8b, v1.4b, v2.4b
+        uzp2 v0.8h, v1.4h, v2.4h
+        uzp2 v0.4h, v1.2h, v2.2h
+        uzp2 v0.4s, v1.2s, v2.2s
+        uzp2 v0.2s, v1.1s, v2.1s
+        uzp2 v0.2d, v1.1d, v2.1d
+        uzp2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4298:22: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4299:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4300:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4301:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4302:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4303:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4304:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4305:17: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        zip1 v0.16b, v1.8b, v2.8b
+        zip1 v0.8b, v1.4b, v2.4b
+        zip1 v0.8h, v1.4h, v2.4h
+        zip1 v0.4h, v1.2h, v2.2h
+        zip1 v0.4s, v1.2s, v2.2s
+        zip1 v0.2s, v1.1s, v2.1s
+        zip1 v0.2d, v1.1d, v2.1d
+        zip1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4307:22: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4308:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4309:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4310:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4311:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4312:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4313:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4314:17: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        zip2 v0.16b, v1.8b, v2.8b
+        zip2 v0.8b, v1.4b, v2.4b
+        zip2 v0.8h, v1.4h, v2.4h
+        zip2 v0.4h, v1.2h, v2.2h
+        zip2 v0.4s, v1.2s, v2.2s
+        zip2 v0.2s, v1.1s, v2.1s
+        zip2 v0.2d, v1.1d, v2.1d
+        zip2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4316:22: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4317:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4318:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4319:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4320:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4321:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4322:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4323:17: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        trn1 v0.16b, v1.8b, v2.8b
+        trn1 v0.8b, v1.4b, v2.4b
+        trn1 v0.8h, v1.4h, v2.4h
+        trn1 v0.4h, v1.2h, v2.2h
+        trn1 v0.4s, v1.2s, v2.2s
+        trn1 v0.2s, v1.1s, v2.1s
+        trn1 v0.2d, v1.1d, v2.1d
+        trn1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4325:22: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4326:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4327:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4328:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4329:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4330:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4331:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4332:17: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        trn2 v0.16b, v1.8b, v2.8b
+        trn2 v0.8b, v1.4b, v2.4b
+        trn2 v0.8h, v1.4h, v2.4h
+        trn2 v0.4h, v1.2h, v2.2h
+        trn2 v0.4s, v1.2s, v2.2s
+        trn2 v0.2s, v1.1s, v2.1s
+        trn2 v0.2d, v1.1d, v2.1d
+        trn2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4334:22: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4335:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4336:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4337:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4338:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4339:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4340:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4341:17: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+//----------------------------------------------------------------------
+// Permutation with 3 vectors
+//----------------------------------------------------------------------
+
+        uzp1 v0.16b, v1.8b, v2.8b
+        uzp1 v0.8b, v1.4b, v2.4b
+        uzp1 v0.8h, v1.4h, v2.4h
+        uzp1 v0.4h, v1.2h, v2.2h
+        uzp1 v0.4s, v1.2s, v2.2s
+        uzp1 v0.2s, v1.1s, v2.1s
+        uzp1 v0.2d, v1.1d, v2.1d
+        uzp1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4289:22: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4290:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4291:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4292:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4293:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4294:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4295:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4296:17: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        uzp2 v0.16b, v1.8b, v2.8b
+        uzp2 v0.8b, v1.4b, v2.4b
+        uzp2 v0.8h, v1.4h, v2.4h
+        uzp2 v0.4h, v1.2h, v2.2h
+        uzp2 v0.4s, v1.2s, v2.2s
+        uzp2 v0.2s, v1.1s, v2.1s
+        uzp2 v0.2d, v1.1d, v2.1d
+        uzp2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4298:22: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4299:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4300:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4301:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4302:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4303:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4304:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4305:17: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        zip1 v0.16b, v1.8b, v2.8b
+        zip1 v0.8b, v1.4b, v2.4b
+        zip1 v0.8h, v1.4h, v2.4h
+        zip1 v0.4h, v1.2h, v2.2h
+        zip1 v0.4s, v1.2s, v2.2s
+        zip1 v0.2s, v1.1s, v2.1s
+        zip1 v0.2d, v1.1d, v2.1d
+        zip1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4307:22: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4308:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4309:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4310:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4311:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4312:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4313:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4314:17: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        zip2 v0.16b, v1.8b, v2.8b
+        zip2 v0.8b, v1.4b, v2.4b
+        zip2 v0.8h, v1.4h, v2.4h
+        zip2 v0.4h, v1.2h, v2.2h
+        zip2 v0.4s, v1.2s, v2.2s
+        zip2 v0.2s, v1.1s, v2.1s
+        zip2 v0.2d, v1.1d, v2.1d
+        zip2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4316:22: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4317:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4318:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4319:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4320:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4321:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4322:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4323:17: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        trn1 v0.16b, v1.8b, v2.8b
+        trn1 v0.8b, v1.4b, v2.4b
+        trn1 v0.8h, v1.4h, v2.4h
+        trn1 v0.4h, v1.2h, v2.2h
+        trn1 v0.4s, v1.2s, v2.2s
+        trn1 v0.2s, v1.1s, v2.1s
+        trn1 v0.2d, v1.1d, v2.1d
+        trn1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4325:22: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4326:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4327:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4328:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4329:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4330:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4331:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4332:17: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        trn2 v0.16b, v1.8b, v2.8b
+        trn2 v0.8b, v1.4b, v2.4b
+        trn2 v0.8h, v1.4h, v2.4h
+        trn2 v0.4h, v1.2h, v2.2h
+        trn2 v0.4s, v1.2s, v2.2s
+        trn2 v0.2s, v1.1s, v2.1s
+        trn2 v0.2d, v1.1d, v2.1d
+        trn2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4334:22: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4335:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4336:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4337:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4338:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4339:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4340:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4341:17: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+//----------------------------------------------------------------------
+// Floating Point  multiply (scalar, by element)
+//----------------------------------------------------------------------
+      // mismatched and invalid vector types
+      fmul    s0, s1, v1.h[0]
+      fmul    h0, h1, v1.s[0]
+      // invalid lane
+      fmul    s2, s29, v10.s[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmul    s0, s1, v1.h[0]
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmul    h0, h1, v1.s[0]
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error:  lane number incompatible with layout
+// CHECK-ERROR:          fmul    s2, s29, v10.s[4]
+// CHECK-ERROR:                                 ^
+
+//----------------------------------------------------------------------
+// Floating Point  multiply extended (scalar, by element)
+//----------------------------------------------------------------------
+      // mismatched and invalid vector types
+      fmulx    d0, d1, v1.b[0]
+      fmulx    h0, h1, v1.d[0]
+      // invalid lane
+      fmulx    d2, d29, v10.d[3]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmulx    d0, d1, v1.b[0]
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmulx    h0, h1, v1.d[0]
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error:  lane number incompatible with layout
+// CHECK-ERROR:          fmulx    d2, d29, v10.d[3]
+// CHECK-ERROR:                                  ^
+
+//----------------------------------------------------------------------
+// Floating Point fused multiply-add (scalar, by element)
+//----------------------------------------------------------------------
+      // mismatched and invalid vector types
+      fmla    b0, b1, v1.b[0]
+      fmla    d30, s11, v1.d[1]
+      // invalid lane
+      fmla    s16, s22, v16.s[5]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmla    b0, b1, v1.b[0]
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmla    d30, s11, v1.d[1]
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error:  lane number incompatible with layout
+// CHECK-ERROR:          fmla    s16, s22, v16.s[5]
+// CHECK-ERROR:                                  ^
+
+//----------------------------------------------------------------------
+// Floating Point fused multiply-subtract (scalar, by element)
+//----------------------------------------------------------------------
+    // mismatched and invalid vector types
+    fmls    s29, h10, v28.s[1]
+    fmls    h7, h17, v26.s[2]
+    // invalid lane
+    fmls    d16, d22, v16.d[-1]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmls    s29, h10, v28.s[1]
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmls    h7, h17, v26.s[2]
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error:  expected lane number
+// CHECK-ERROR:          fmls    d16, d22, v16.d[-1]
+// CHECK-ERROR:                                  ^
+
+//----------------------------------------------------------------------
+// Scalar Signed saturating doubling multiply-add long
+// (scalar, by element)
+//----------------------------------------------------------------------
+    // mismatched and invalid vector types
+    sqdmlal s0, h0, v0.s[0]
+    sqdmlal s8, s9, v14.s[1]
+    // invalid lane
+    sqdmlal s4, s5, v1.s[5]
+    // invalid vector index
+    sqdmlal s0, h0, v17.h[0]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmlal s0, h0, v0.s[0]
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmlal s8, s9, v14.s[1]
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:          sqdmlal s4, s5, v1.s[5]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmlal s0, h0, v17.h[0]
+// CHECK-ERROR:                           ^
+
+//----------------------------------------------------------------------
+// Scalar Signed saturating doubling multiply-subtract long
+// (scalar, by element)
+//----------------------------------------------------------------------
+    // mismatched and invalid vector types
+    sqdmlsl s1, h1, v1.d[0]
+    sqdmlsl d1, h1, v13.s[0]
+    // invalid lane
+    sqdmlsl d1, s1, v13.s[4]
+    // invalid vector index
+    sqdmlsl s1, h1, v20.h[7]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmlsl s1, h1, v1.d[0]
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmlsl d1, h1, v13.s[0]
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:          sqdmlsl d1, s1, v13.s[4]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmlsl s1, h1, v20.h[7]
+// CHECK-ERROR:                           ^
+
+//----------------------------------------------------------------------
+// Scalar Signed saturating doubling multiply long (scalar, by element)
+//----------------------------------------------------------------------
+    // mismatched and invalid vector types
+    // invalid lane
+    // invalid vector index
+    // mismatched and invalid vector types
+    sqdmull s1, h1, v1.s[1]
+    sqdmull s1, s1, v4.s[0]
+    // invalid lane
+    sqdmull s12, h17, v9.h[9]
+    // invalid vector index
+    sqdmull s1, h1, v16.h[5]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmull s1, h1, v1.s[1]
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmull s1, s1, v4.s[0]
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:          sqdmull s12, h17, v9.h[9]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmull s1, h1, v16.h[5]
+// CHECK-ERROR:                           ^
+
+//----------------------------------------------------------------------
+// Scalar Signed saturating doubling multiply returning
+// high half (scalar, by element)
+//----------------------------------------------------------------------
+    // mismatched and invalid vector types
+    sqdmulh h0, s1, v0.h[0]
+    sqdmulh s25, s26, v27.h[3]
+    // invalid lane
+    sqdmulh s25, s26, v27.s[4]
+    // invalid vector index
+    sqdmulh s0, h1, v30.h[0]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmulh h0, s1, v0.h[0]
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmulh s25, s26, v27.h[3]
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:          sqdmulh s25, s26, v27.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmulh s0, h1, v30.h[0]
+// CHECK-ERROR:                      ^
+
+//----------------------------------------------------------------------
+// Scalar Signed saturating rounding doubling multiply
+// returning high half (scalar, by element)
+//----------------------------------------------------------------------
+    // mismatched and invalid vector types
+    sqrdmulh h31, h30, v14.s[2]
+    sqrdmulh s5, h6, v7.s[2]
+    // invalid lane
+    sqrdmulh h31, h30, v14.h[9]
+    // invalid vector index
+    sqrdmulh h31, h30, v20.h[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqrdmulh h31, h30, v14.s[2]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqrdmulh s5, h6, v7.s[2]
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:          sqrdmulh h31, h30, v14.h[9]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqrdmulh h31, h30, v20.h[4]
+// CHECK-ERROR:                              ^
+
+//----------------------------------------------------------------------
+// Scalar Duplicate element (scalar)
+//----------------------------------------------------------------------
+      // mismatched and invalid vector types
+      dup b0, v1.d[0]
+      dup h0, v31.b[8]
+      dup s0, v2.h[4]
+      dup d0, v17.s[3]
+      // invalid  lane
+      dup d0, v17.d[4]
+      dup s0, v1.s[7]
+      dup h0, v31.h[16]
+      dup b1, v3.b[16]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          dup b0, v1.d[0]
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          dup h0, v31.b[8]
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          dup s0, v2.h[4]
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          dup d0, v17.s[3]
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:          dup d0, v17.d[4]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:          dup s0, v1.s[7]
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:          dup h0, v31.h[16]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:          dup b1, v3.b[16]
+// CHECK-ERROR:                       ^
+
+//----------------------------------------------------------------------
+// Table look up
+//----------------------------------------------------------------------
+
+        tbl v0.8b, {v1.8b}, v2.8b
+        tbl v0.8b, {v1.8b, v2.8b}, v2.8b
+        tbl v0.8b, {v1.8b, v2.8b, v3.8b}, v2.8b
+        tbl v0.8b, {v1.8b, v2.8b, v3.8b, v4.8b}, v2.8b
+        tbl v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b, v5.16b}, v2.8b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        tbl v0.8b, {v1.8b}, v2.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        tbl v0.8b, {v1.8b, v2.8b}, v2.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        tbl v0.8b, {v1.8b, v2.8b, v3.8b}, v2.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        tbl v0.8b, {v1.8b, v2.8b, v3.8b, v4.8b}, v2.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid number of vectors
+// CHECK-ERROR:        tbl v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b, v5.16b}, v2.8b
+// CHECK-ERROR:                                                    ^
+
+        tbx v0.8b, {v1.8b}, v2.8b
+        tbx v0.8b, {v1.8b, v2.8b}, v2.8b
+        tbx v0.8b, {v1.8b, v2.8b, v3.8b}, v2.8b
+        tbx v0.8b, {v1.8b, v2.8b, v3.8b, v4.8b}, v2.8b
+        tbx v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b, v5.16b}, v2.8b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        tbx v0.8b, {v1.8b}, v2.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        tbx v0.8b, {v1.8b, v2.8b}, v2.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        tbx v0.8b, {v1.8b, v2.8b, v3.8b}, v2.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        tbx v0.8b, {v1.8b, v2.8b, v3.8b, v4.8b}, v2.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid number of vectors
+// CHECK-ERROR:        tbx v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b, v5.16b}, v2.8b
+// CHECK-ERROR:                                                    ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Lower Precision Narrow, Rounding To
+// Odd
+//----------------------------------------------------------------------
+
+    fcvtxn s0, s1
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtxn s0, s1
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
+// With Ties To Away
+//----------------------------------------------------------------------
+
+    fcvtas s0, d0
+    fcvtas d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtas s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtas d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding To
+// Nearest With Ties To Away
+//----------------------------------------------------------------------
+
+    fcvtau s0, d0
+    fcvtau d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtau s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtau d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding Toward
+// Minus Infinity
+//----------------------------------------------------------------------
+
+    fcvtms s0, d0
+    fcvtms d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtms s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtms d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
+// Minus Infinity
+//----------------------------------------------------------------------
+
+    fcvtmu s0, d0
+    fcvtmu d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtmu s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtmu d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
+// With Ties To Even
+//----------------------------------------------------------------------
+
+    fcvtns s0, d0
+    fcvtns d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtns s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtns d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding To
+// Nearest With Ties To Even
+//----------------------------------------------------------------------
+
+    fcvtnu s0, d0
+    fcvtnu d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtnu s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtnu d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding Toward
+// Positive Infinity
+//----------------------------------------------------------------------
+
+    fcvtps s0, d0
+    fcvtps d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtps s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtps d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
+// Positive Infinity
+//----------------------------------------------------------------------
+
+    fcvtpu s0, d0
+    fcvtpu d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtpu s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtpu d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding Toward Zero
+//----------------------------------------------------------------------
+
+    fcvtzs s0, d0
+    fcvtzs d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtzs s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtzs d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward 
+// Zero
+//----------------------------------------------------------------------
+
+    fcvtzu s0, d0
+    fcvtzu d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtzu s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtzu d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Absolute Difference
+//----------------------------------------------------------------------
+
+
+    fabd s29, d24, s20
+    fabd d29, s24, d20
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fabd s29, d24, s20
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fabd d29, s24, d20
+// CHECK-ERROR:                  ^
diff --git a/test/MC/AArch64/neon-extract.s b/test/MC/AArch64/neon-extract.s
new file mode 100644
index 0000000..2d58a75
--- /dev/null
+++ b/test/MC/AArch64/neon-extract.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Instructions for bitwise extract
+//------------------------------------------------------------------------------
+
+        ext v0.8b, v1.8b, v2.8b, #0x3
+        ext v0.16b, v1.16b, v2.16b, #0x3
+
+// CHECK: ext	v0.8b, v1.8b, v2.8b, #0x3  // encoding: [0x20,0x18,0x02,0x2e]
+// CHECK: ext	v0.16b, v1.16b, v2.16b, #0x3 // encoding: [0x20,0x18,0x02,0x6e]
diff --git a/test/MC/AArch64/neon-mov.s b/test/MC/AArch64/neon-mov.s
index 8331372..c2ca803 100644
--- a/test/MC/AArch64/neon-mov.s
+++ b/test/MC/AArch64/neon-mov.s
@@ -195,13 +195,15 @@
 //----------------------------------------------------------------------
 // Vector Move -  register
 //----------------------------------------------------------------------
+
+      // FIXME: these should all print with the "mov" syntax.
       mov v0.8b, v31.8b
       mov v15.16b, v16.16b
       orr v0.8b, v31.8b, v31.8b
       orr v15.16b, v16.16b, v16.16b
 
-// CHECK:   mov v0.8b, v31.8b     // encoding: [0xe0,0x1f,0xbf,0x0e]
-// CHECK:   mov v15.16b, v16.16b  // encoding: [0x0f,0x1e,0xb0,0x4e]
-// CHECK:   mov v0.8b, v31.8b     // encoding: [0xe0,0x1f,0xbf,0x0e]
-// CHECK:   mov v15.16b, v16.16b  // encoding: [0x0f,0x1e,0xb0,0x4e]
+// CHECK:   orr v0.8b, v31.8b, v31.8b      // encoding: [0xe0,0x1f,0xbf,0x0e]
+// CHECK:   orr v15.16b, v16.16b, v16.16b  // encoding: [0x0f,0x1e,0xb0,0x4e]
+// CHECK:   orr v0.8b, v31.8b, v31.8b      // encoding: [0xe0,0x1f,0xbf,0x0e]
+// CHECK:   orr v15.16b, v16.16b, v16.16b  // encoding: [0x0f,0x1e,0xb0,0x4e]
 
diff --git a/test/MC/AArch64/neon-perm.s b/test/MC/AArch64/neon-perm.s
new file mode 100644
index 0000000..20a4acde
--- /dev/null
+++ b/test/MC/AArch64/neon-perm.s
@@ -0,0 +1,103 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Instructions for permute
+//------------------------------------------------------------------------------
+
+        uzp1 v0.8b, v1.8b, v2.8b
+        uzp1 v0.16b, v1.16b, v2.16b
+        uzp1 v0.4h, v1.4h, v2.4h
+        uzp1 v0.8h, v1.8h, v2.8h
+        uzp1 v0.2s, v1.2s, v2.2s
+        uzp1 v0.4s, v1.4s, v2.4s
+        uzp1 v0.2d, v1.2d, v2.2d
+
+// CHECK: uzp1	v0.8b, v1.8b, v2.8b     // encoding: [0x20,0x18,0x02,0x0e]
+// CHECK: uzp1	v0.16b, v1.16b, v2.16b  // encoding: [0x20,0x18,0x02,0x4e]
+// CHECK: uzp1	v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x18,0x42,0x0e]
+// CHECK: uzp1	v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x18,0x42,0x4e]
+// CHECK: uzp1	v0.2s, v1.2s, v2.2s     // encoding: [0x20,0x18,0x82,0x0e]
+// CHECK: uzp1	v0.4s, v1.4s, v2.4s     // encoding: [0x20,0x18,0x82,0x4e]
+// CHECK: uzp1	v0.2d, v1.2d, v2.2d     // encoding: [0x20,0x18,0xc2,0x4e]
+
+        trn1 v0.8b, v1.8b, v2.8b
+        trn1 v0.16b, v1.16b, v2.16b
+        trn1 v0.4h, v1.4h, v2.4h
+        trn1 v0.8h, v1.8h, v2.8h
+        trn1 v0.2s, v1.2s, v2.2s
+        trn1 v0.4s, v1.4s, v2.4s
+        trn1 v0.2d, v1.2d, v2.2d
+
+// CHECK: trn1	v0.8b, v1.8b, v2.8b     // encoding: [0x20,0x28,0x02,0x0e]
+// CHECK: trn1	v0.16b, v1.16b, v2.16b  // encoding: [0x20,0x28,0x02,0x4e]
+// CHECK: trn1	v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x28,0x42,0x0e]
+// CHECK: trn1	v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x28,0x42,0x4e]
+// CHECK: trn1	v0.2s, v1.2s, v2.2s     // encoding: [0x20,0x28,0x82,0x0e]
+// CHECK: trn1	v0.4s, v1.4s, v2.4s     // encoding: [0x20,0x28,0x82,0x4e]
+// CHECK: trn1	v0.2d, v1.2d, v2.2d     // encoding: [0x20,0x28,0xc2,0x4e]
+
+        zip1 v0.8b, v1.8b, v2.8b
+        zip1 v0.16b, v1.16b, v2.16b
+        zip1 v0.4h, v1.4h, v2.4h
+        zip1 v0.8h, v1.8h, v2.8h
+        zip1 v0.2s, v1.2s, v2.2s
+        zip1 v0.4s, v1.4s, v2.4s
+        zip1 v0.2d, v1.2d, v2.2d
+
+// CHECK: zip1	v0.8b, v1.8b, v2.8b     // encoding: [0x20,0x38,0x02,0x0e]
+// CHECK: zip1	v0.16b, v1.16b, v2.16b  // encoding: [0x20,0x38,0x02,0x4e]
+// CHECK: zip1	v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x38,0x42,0x0e]
+// CHECK: zip1	v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x38,0x42,0x4e]
+// CHECK: zip1	v0.2s, v1.2s, v2.2s     // encoding: [0x20,0x38,0x82,0x0e]
+// CHECK: zip1	v0.4s, v1.4s, v2.4s     // encoding: [0x20,0x38,0x82,0x4e]
+// CHECK: zip1	v0.2d, v1.2d, v2.2d     // encoding: [0x20,0x38,0xc2,0x4e]
+
+        uzp2 v0.8b, v1.8b, v2.8b
+        uzp2 v0.16b, v1.16b, v2.16b
+        uzp2 v0.4h, v1.4h, v2.4h
+        uzp2 v0.8h, v1.8h, v2.8h
+        uzp2 v0.2s, v1.2s, v2.2s
+        uzp2 v0.4s, v1.4s, v2.4s
+        uzp2 v0.2d, v1.2d, v2.2d
+
+// CHECK: uzp2	v0.8b, v1.8b, v2.8b     // encoding: [0x20,0x58,0x02,0x0e]
+// CHECK: uzp2	v0.16b, v1.16b, v2.16b  // encoding: [0x20,0x58,0x02,0x4e]
+// CHECK: uzp2	v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x58,0x42,0x0e]
+// CHECK: uzp2	v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x58,0x42,0x4e]
+// CHECK: uzp2	v0.2s, v1.2s, v2.2s     // encoding: [0x20,0x58,0x82,0x0e]
+// CHECK: uzp2	v0.4s, v1.4s, v2.4s     // encoding: [0x20,0x58,0x82,0x4e]
+// CHECK: uzp2	v0.2d, v1.2d, v2.2d     // encoding: [0x20,0x58,0xc2,0x4e]
+
+        trn2 v0.8b, v1.8b, v2.8b
+        trn2 v0.16b, v1.16b, v2.16b
+        trn2 v0.4h, v1.4h, v2.4h
+        trn2 v0.8h, v1.8h, v2.8h
+        trn2 v0.2s, v1.2s, v2.2s
+        trn2 v0.4s, v1.4s, v2.4s
+        trn2 v0.2d, v1.2d, v2.2d
+
+// CHECK: trn2	v0.8b, v1.8b, v2.8b     // encoding: [0x20,0x68,0x02,0x0e]
+// CHECK: trn2	v0.16b, v1.16b, v2.16b  // encoding: [0x20,0x68,0x02,0x4e]
+// CHECK: trn2	v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x68,0x42,0x0e]
+// CHECK: trn2	v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x68,0x42,0x4e]
+// CHECK: trn2	v0.2s, v1.2s, v2.2s     // encoding: [0x20,0x68,0x82,0x0e]
+// CHECK: trn2	v0.4s, v1.4s, v2.4s     // encoding: [0x20,0x68,0x82,0x4e]
+// CHECK: trn2	v0.2d, v1.2d, v2.2d     // encoding: [0x20,0x68,0xc2,0x4e]
+
+        zip2 v0.8b, v1.8b, v2.8b
+        zip2 v0.16b, v1.16b, v2.16b
+        zip2 v0.4h, v1.4h, v2.4h
+        zip2 v0.8h, v1.8h, v2.8h
+        zip2 v0.2s, v1.2s, v2.2s
+        zip2 v0.4s, v1.4s, v2.4s
+        zip2 v0.2d, v1.2d, v2.2d
+
+// CHECK: zip2	v0.8b, v1.8b, v2.8b     // encoding: [0x20,0x78,0x02,0x0e]
+// CHECK: zip2	v0.16b, v1.16b, v2.16b  // encoding: [0x20,0x78,0x02,0x4e]
+// CHECK: zip2	v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x78,0x42,0x0e]
+// CHECK: zip2	v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x78,0x42,0x4e]
+// CHECK: zip2	v0.2s, v1.2s, v2.2s     // encoding: [0x20,0x78,0x82,0x0e]
+// CHECK: zip2	v0.4s, v1.4s, v2.4s     // encoding: [0x20,0x78,0x82,0x4e]
+// CHECK: zip2	v0.2d, v1.2d, v2.2d     // encoding: [0x20,0x78,0xc2,0x4e]
diff --git a/test/MC/AArch64/neon-rounding-shift.s b/test/MC/AArch64/neon-rounding-shift.s
index f3c70d7..e70f766 100644
--- a/test/MC/AArch64/neon-rounding-shift.s
+++ b/test/MC/AArch64/neon-rounding-shift.s
@@ -41,17 +41,5 @@
 // CHECK: urshl v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x54,0xa2,0x6e]
 // CHECK: urshl v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x54,0xe2,0x6e]
 
-//------------------------------------------------------------------------------
-// Scalar Integer Rounding Shift Lef (Signed)
-//------------------------------------------------------------------------------
-         srshl d17, d31, d8
-
-// CHECK: srshl d17, d31, d8      // encoding: [0xf1,0x57,0xe8,0x5e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Rounding Shift Lef (Unsigned)
-//------------------------------------------------------------------------------
-         urshl d17, d31, d8
 
-// CHECK: urshl d17, d31, d8      // encoding: [0xf1,0x57,0xe8,0x7e]
 
diff --git a/test/MC/AArch64/neon-saturating-add-sub.s b/test/MC/AArch64/neon-saturating-add-sub.s
index 1032ae4..4a7ed10 100644
--- a/test/MC/AArch64/neon-saturating-add-sub.s
+++ b/test/MC/AArch64/neon-saturating-add-sub.s
@@ -79,55 +79,4 @@
 // CHECK: uqsub v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x2c,0xa2,0x6e]
 // CHECK: uqsub v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x2c,0xe2,0x6e]
 
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Add (Signed)
-//------------------------------------------------------------------------------
-         sqadd b0, b1, b2
-         sqadd h10, h11, h12
-         sqadd s20, s21, s2
-         sqadd d17, d31, d8
-
-// CHECK: sqadd b0, b1, b2        // encoding: [0x20,0x0c,0x22,0x5e]
-// CHECK: sqadd h10, h11, h12     // encoding: [0x6a,0x0d,0x6c,0x5e]
-// CHECK: sqadd s20, s21, s2      // encoding: [0xb4,0x0e,0xa2,0x5e]
-// CHECK: sqadd d17, d31, d8      // encoding: [0xf1,0x0f,0xe8,0x5e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Add (Unsigned)
-//------------------------------------------------------------------------------
-         uqadd b0, b1, b2
-         uqadd h10, h11, h12
-         uqadd s20, s21, s2
-         uqadd d17, d31, d8
-
-// CHECK: uqadd b0, b1, b2        // encoding: [0x20,0x0c,0x22,0x7e]
-// CHECK: uqadd h10, h11, h12     // encoding: [0x6a,0x0d,0x6c,0x7e]
-// CHECK: uqadd s20, s21, s2      // encoding: [0xb4,0x0e,0xa2,0x7e]
-// CHECK: uqadd d17, d31, d8      // encoding: [0xf1,0x0f,0xe8,0x7e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Sub (Signed)
-//------------------------------------------------------------------------------
-         sqsub b0, b1, b2
-         sqsub h10, h11, h12
-         sqsub s20, s21, s2
-         sqsub d17, d31, d8
-
-// CHECK: sqsub b0, b1, b2        // encoding: [0x20,0x2c,0x22,0x5e]
-// CHECK: sqsub h10, h11, h12     // encoding: [0x6a,0x2d,0x6c,0x5e]
-// CHECK: sqsub s20, s21, s2      // encoding: [0xb4,0x2e,0xa2,0x5e]
-// CHECK: sqsub d17, d31, d8      // encoding: [0xf1,0x2f,0xe8,0x5e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Sub (Unsigned)
-//------------------------------------------------------------------------------
-         uqsub b0, b1, b2
-         uqsub h10, h11, h12
-         uqsub s20, s21, s2
-         uqsub d17, d31, d8
-
-// CHECK: uqsub b0, b1, b2        // encoding: [0x20,0x2c,0x22,0x7e]
-// CHECK: uqsub h10, h11, h12     // encoding: [0x6a,0x2d,0x6c,0x7e]
-// CHECK: uqsub s20, s21, s2      // encoding: [0xb4,0x2e,0xa2,0x7e]
-// CHECK: uqsub d17, d31, d8      // encoding: [0xf1,0x2f,0xe8,0x7e]
 
diff --git a/test/MC/AArch64/neon-saturating-rounding-shift.s b/test/MC/AArch64/neon-saturating-rounding-shift.s
index a36e689..9215c1c 100644
--- a/test/MC/AArch64/neon-saturating-rounding-shift.s
+++ b/test/MC/AArch64/neon-saturating-rounding-shift.s
@@ -41,30 +41,3 @@
 // CHECK: uqrshl v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x5c,0xa2,0x6e]
 // CHECK: uqrshl v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x5c,0xe2,0x6e]
 
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Rounding Shift Lef (Signed)
-//------------------------------------------------------------------------------
-         sqrshl b0, b1, b2
-         sqrshl h10, h11, h12
-         sqrshl s20, s21, s2
-         sqrshl d17, d31, d8
-
-// CHECK: sqrshl b0, b1, b2        // encoding: [0x20,0x5c,0x22,0x5e]
-// CHECK: sqrshl h10, h11, h12     // encoding: [0x6a,0x5d,0x6c,0x5e]
-// CHECK: sqrshl s20, s21, s2      // encoding: [0xb4,0x5e,0xa2,0x5e]
-// CHECK: sqrshl d17, d31, d8      // encoding: [0xf1,0x5f,0xe8,0x5e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Rounding Shift Lef (Unsigned)
-//------------------------------------------------------------------------------
-         uqrshl b0, b1, b2
-         uqrshl h10, h11, h12
-         uqrshl s20, s21, s2
-         uqrshl d17, d31, d8
-
-// CHECK: uqrshl b0, b1, b2        // encoding: [0x20,0x5c,0x22,0x7e]
-// CHECK: uqrshl h10, h11, h12     // encoding: [0x6a,0x5d,0x6c,0x7e]
-// CHECK: uqrshl s20, s21, s2      // encoding: [0xb4,0x5e,0xa2,0x7e]
-// CHECK: uqrshl d17, d31, d8      // encoding: [0xf1,0x5f,0xe8,0x7e]
-
-
diff --git a/test/MC/AArch64/neon-saturating-shift.s b/test/MC/AArch64/neon-saturating-shift.s
index 2c8456d..9ae393a 100644
--- a/test/MC/AArch64/neon-saturating-shift.s
+++ b/test/MC/AArch64/neon-saturating-shift.s
@@ -41,29 +41,3 @@
 // CHECK: uqshl v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x4c,0xa2,0x6e]
 // CHECK: uqshl v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x4c,0xe2,0x6e]
 
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Shift Lef (Signed)
-//------------------------------------------------------------------------------
-         sqshl b0, b1, b2
-         sqshl h10, h11, h12
-         sqshl s20, s21, s2
-         sqshl d17, d31, d8
-
-// CHECK: sqshl b0, b1, b2        // encoding: [0x20,0x4c,0x22,0x5e]
-// CHECK: sqshl h10, h11, h12     // encoding: [0x6a,0x4d,0x6c,0x5e]
-// CHECK: sqshl s20, s21, s2      // encoding: [0xb4,0x4e,0xa2,0x5e]
-// CHECK: sqshl d17, d31, d8      // encoding: [0xf1,0x4f,0xe8,0x5e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Shift Lef (Unsigned)
-//------------------------------------------------------------------------------
-         uqshl b0, b1, b2
-         uqshl h10, h11, h12
-         uqshl s20, s21, s2
-         uqshl d17, d31, d8
-
-// CHECK: uqshl b0, b1, b2        // encoding: [0x20,0x4c,0x22,0x7e]
-// CHECK: uqshl h10, h11, h12     // encoding: [0x6a,0x4d,0x6c,0x7e]
-// CHECK: uqshl s20, s21, s2      // encoding: [0xb4,0x4e,0xa2,0x7e]
-// CHECK: uqshl d17, d31, d8      // encoding: [0xf1,0x4f,0xe8,0x7e]
-
diff --git a/test/MC/AArch64/neon-scalar-abs.s b/test/MC/AArch64/neon-scalar-abs.s
new file mode 100644
index 0000000..d08756c
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-abs.s
@@ -0,0 +1,35 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Scalar Absolute Value
+//----------------------------------------------------------------------
+
+    abs d29, d24
+
+// CHECK: abs d29, d24    // encoding: [0x1d,0xbb,0xe0,0x5e]
+        
+//----------------------------------------------------------------------
+// Scalar Floating-point Absolute Difference
+//----------------------------------------------------------------------
+
+    fabd s29, s24, s20
+    fabd d29, d24, d20
+
+// CHECK: fabd s29, s24, s20  // encoding: [0x1d,0xd7,0xb4,0x7e]
+// CHECK: fabd d29, d24, d20  // encoding: [0x1d,0xd7,0xf4,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Signed Saturating Absolute Value
+//----------------------------------------------------------------------
+
+    sqabs b19, b14
+    sqabs h21, h15
+    sqabs s20, s12
+    sqabs d18, d12
+
+// CHECK: sqabs b19, b14    // encoding: [0xd3,0x79,0x20,0x5e]
+// CHECK: sqabs h21, h15    // encoding: [0xf5,0x79,0x60,0x5e]
+// CHECK: sqabs s20, s12    // encoding: [0x94,0x79,0xa0,0x5e]
+// CHECK: sqabs d18, d12    // encoding: [0x92,0x79,0xe0,0x5e]
diff --git a/test/MC/AArch64/neon-scalar-add-sub.s b/test/MC/AArch64/neon-scalar-add-sub.s
new file mode 100644
index 0000000..0a3eba7
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-add-sub.s
@@ -0,0 +1,16 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Scalar Integer Add
+//------------------------------------------------------------------------------
+         add d31, d0, d16
+
+// CHECK: add d31, d0, d16       // encoding: [0x1f,0x84,0xf0,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Sub
+//------------------------------------------------------------------------------
+         sub d1, d7, d8
+
+// CHECK: sub d1, d7, d8       // encoding: [0xe1,0x84,0xe8,0x7e]
+
diff --git a/test/MC/AArch64/neon-scalar-by-elem-mla.s b/test/MC/AArch64/neon-scalar-by-elem-mla.s
new file mode 100644
index 0000000..fec9d12
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-by-elem-mla.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Floating Point fused multiply-add (scalar, by element)
+//------------------------------------------------------------------------------
+    fmla    s0, s1, v1.s[0]
+    fmla    s30, s11, v1.s[1]
+    fmla    s4, s5, v7.s[2]
+    fmla    s16, s22, v16.s[3]
+    fmla    d0, d1, v1.d[0]
+    fmla    d30, d11, v1.d[1]
+
+// CHECK: fmla    s0, s1, v1.s[0]         // encoding: [0x20,0x10,0x81,0x5f]
+// CHECK: fmla    s30, s11, v1.s[1]       // encoding: [0x7e,0x11,0xa1,0x5f]
+// CHECK: fmla    s4, s5, v7.s[2]         // encoding: [0xa4,0x18,0x87,0x5f]
+// CHECK: fmla    s16, s22, v16.s[3]      // encoding: [0xd0,0x1a,0xb0,0x5f]
+// CHECK: fmla    d0, d1, v1.d[0]         // encoding: [0x20,0x10,0xc1,0x5f]
+// CHECK: fmla    d30, d11, v1.d[1]       // encoding: [0x7e,0x19,0xc1,0x5f]
+ 
+//------------------------------------------------------------------------------
+// Floating Point fused multiply-subtract (scalar, by element)
+//------------------------------------------------------------------------------
+
+    fmls    s2, s3, v4.s[0]
+    fmls    s29, s10, v28.s[1]      
+    fmls    s5, s12, v23.s[2]       
+    fmls    s7, s17, v26.s[3]       
+    fmls    d0, d1, v1.d[0]         
+    fmls    d30, d11, v1.d[1]       
+
+// CHECK: fmls    s2, s3, v4.s[0]     // encoding: [0x62,0x50,0x84,0x5f]
+// CHECK: fmls    s29, s10, v28.s[1]  // encoding: [0x5d,0x51,0xbc,0x5f]
+// CHECK: fmls    s5, s12, v23.s[2]   // encoding: [0x85,0x59,0x97,0x5f]
+// CHECK: fmls    s7, s17, v26.s[3]   // encoding: [0x27,0x5a,0xba,0x5f]
+// CHECK: fmls    d0, d1, v1.d[0]     // encoding: [0x20,0x50,0xc1,0x5f]
+// CHECK: fmls    d30, d11, v1.d[1]   // encoding: [0x7e,0x59,0xc1,0x5f]
+
+        
+        
+        
+
+        
+        
+
diff --git a/test/MC/AArch64/neon-scalar-by-elem-mul.s b/test/MC/AArch64/neon-scalar-by-elem-mul.s
new file mode 100644
index 0000000..8b8a3f5
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-by-elem-mul.s
@@ -0,0 +1,37 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Floating Point  multiply (scalar, by element)
+//------------------------------------------------------------------------------
+    fmul    s0, s1, v1.s[0]
+    fmul    s30, s11, v1.s[1]
+    fmul    s4, s5, v7.s[2]
+    fmul    s16, s22, v16.s[3]
+    fmul    d0, d1, v1.d[0]
+    fmul    d30, d11, v1.d[1]
+
+// CHECK: fmul    s0, s1, v1.s[0]      // encoding: [0x20,0x90,0x81,0x5f]
+// CHECK: fmul    s30, s11, v1.s[1]    // encoding: [0x7e,0x91,0xa1,0x5f]
+// CHECK: fmul    s4, s5, v7.s[2]      // encoding: [0xa4,0x98,0x87,0x5f]
+// CHECK: fmul    s16, s22, v16.s[3]   // encoding: [0xd0,0x9a,0xb0,0x5f]
+// CHECK: fmul    d0, d1, v1.d[0]      // encoding: [0x20,0x90,0xc1,0x5f]
+// CHECK: fmul    d30, d11, v1.d[1]    // encoding: [0x7e,0x99,0xc1,0x5f]
+
+
+//------------------------------------------------------------------------------
+// Floating Point  multiply extended (scalar, by element)
+//------------------------------------------------------------------------------
+    fmulx   s6, s2, v8.s[0]
+    fmulx   s7, s3, v13.s[1]
+    fmulx   s9, s7, v9.s[2]
+    fmulx   s13, s21, v10.s[3]
+    fmulx   d15, d9, v7.d[0]
+    fmulx   d13, d12, v11.d[1]
+
+// CHECK: fmulx   s6, s2, v8.s[0]         // encoding: [0x46,0x90,0x88,0x7f]
+// CHECK: fmulx   s7, s3, v13.s[1]        // encoding: [0x67,0x90,0xad,0x7f]
+// CHECK: fmulx   s9, s7, v9.s[2]         // encoding: [0xe9,0x98,0x89,0x7f]
+// CHECK: fmulx   s13, s21, v10.s[3]      // encoding: [0xad,0x9a,0xaa,0x7f]
+// CHECK: fmulx   d15, d9, v7.d[0]        // encoding: [0x2f,0x91,0xc7,0x7f]
+// CHECK: fmulx   d13, d12, v11.d[1]      // encoding: [0x8d,0x99,0xcb,0x7f]
+
diff --git a/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s b/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s
new file mode 100644
index 0000000..e3d7e05
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s
@@ -0,0 +1,46 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//-----------------------------------------------------------------------------
+// Signed saturating doubling multiply-add long (scalar, by element)
+//-----------------------------------------------------------------------------
+    sqdmlal s0, h0, v0.h[0]
+    sqdmlal s7, h1, v4.h[3]
+    sqdmlal s11, h16, v8.h[4]
+    sqdmlal s30, h30, v15.h[7]
+    sqdmlal d0, s0, v3.s[0]
+    sqdmlal d30, s30, v30.s[3]
+    sqdmlal d8, s9, v14.s[1]
+
+// CHECK: sqdmlal s0, h0, v0.h[0]       // encoding: [0x00,0x30,0x40,0x5f]
+// CHECK: sqdmlal s7, h1, v4.h[3]       // encoding: [0x27,0x30,0x74,0x5f]
+// CHECK: sqdmlal s11, h16, v8.h[4]     // encoding: [0x0b,0x3a,0x48,0x5f]
+// CHECK: sqdmlal s30, h30, v15.h[7]    // encoding: [0xde,0x3b,0x7f,0x5f]
+// CHECK: sqdmlal d0, s0, v3.s[0]       // encoding: [0x00,0x30,0x83,0x5f]
+// CHECK: sqdmlal d30, s30, v30.s[3]    // encoding: [0xde,0x3b,0xbe,0x5f]
+// CHECK: sqdmlal d8, s9, v14.s[1]      // encoding: [0x28,0x31,0xae,0x5f]
+ 
+//-----------------------------------------------------------------------------
+// Signed saturating doubling multiply-subtract long (scalar, by element)
+//-----------------------------------------------------------------------------
+    sqdmlsl s1, h1, v1.h[0]
+    sqdmlsl s8, h2, v5.h[1]
+    sqdmlsl s12, h13, v14.h[2]
+    sqdmlsl s29, h28, v11.h[7]
+    sqdmlsl d1, s1, v13.s[0]
+    sqdmlsl d31, s31, v31.s[2]
+    sqdmlsl d16, s18, v28.s[3]
+
+// CHECK: sqdmlsl s1, h1, v1.h[0]       // encoding: [0x21,0x70,0x41,0x5f]
+// CHECK: sqdmlsl s8, h2, v5.h[1]       // encoding: [0x48,0x70,0x55,0x5f]
+// CHECK: sqdmlsl s12, h13, v14.h[2]    // encoding: [0xac,0x71,0x6e,0x5f]
+// CHECK: sqdmlsl s29, h28, v11.h[7]    // encoding: [0x9d,0x7b,0x7b,0x5f]
+// CHECK: sqdmlsl d1, s1, v13.s[0]      // encoding: [0x21,0x70,0x8d,0x5f]
+// CHECK: sqdmlsl d31, s31, v31.s[2]    // encoding: [0xff,0x7b,0x9f,0x5f]
+// CHECK: sqdmlsl d16, s18, v28.s[3]    // encoding: [0x50,0x7a,0xbc,0x5f]
+
+
+        
+
+        
+        
+
diff --git a/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s b/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s
new file mode 100644
index 0000000..8a8405e
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s
@@ -0,0 +1,58 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//-----------------------------------------------------------------------------
+// Signed saturating doubling multiply long (scalar, by element)
+//-----------------------------------------------------------------------------
+    sqdmull s1, h1, v1.h[1]
+    sqdmull s8, h2, v5.h[2]
+    sqdmull s12, h17, v9.h[3]
+    sqdmull s31, h31, v15.h[7]
+    sqdmull d1, s1, v4.s[0]
+    sqdmull d31, s31, v31.s[3]
+    sqdmull d9, s10, v15.s[0]
+
+
+// CHECK: sqdmull s1, h1, v1.h[1]       // encoding: [0x21,0xb0,0x51,0x5f]
+// CHECK: sqdmull s8, h2, v5.h[2]       // encoding: [0x48,0xb0,0x65,0x5f]
+// CHECK: sqdmull s12, h17, v9.h[3]     // encoding: [0x2c,0xb2,0x79,0x5f]
+// CHECK: sqdmull s31, h31, v15.h[7]    // encoding: [0xff,0xbb,0x7f,0x5f]
+// CHECK: sqdmull d1, s1, v4.s[0]       // encoding: [0x21,0xb0,0x84,0x5f]
+// CHECK: sqdmull d31, s31, v31.s[3]    // encoding: [0xff,0xbb,0xbf,0x5f]
+// CHECK: sqdmull d9, s10, v15.s[0]     // encoding: [0x49,0xb1,0x8f,0x5f]
+ 
+//-----------------------------------------------------------------------------
+// Scalar Signed saturating doubling multiply returning
+// high half (scalar, by element)
+//-----------------------------------------------------------------------------
+    sqdmulh h0, h1, v0.h[0]
+    sqdmulh h10, h11, v10.h[4]
+    sqdmulh h20, h21, v15.h[7]
+    sqdmulh s25, s26, v27.s[3]
+    sqdmulh s2, s6, v7.s[0]
+
+// CHECK: sqdmulh h0, h1, v0.h[0]       // encoding: [0x20,0xc0,0x40,0x5f]
+// CHECK: sqdmulh h10, h11, v10.h[4]    // encoding: [0x6a,0xc9,0x4a,0x5f]
+// CHECK: sqdmulh h20, h21, v15.h[7]    // encoding: [0xb4,0xca,0x7f,0x5f]
+// CHECK: sqdmulh s25, s26, v27.s[3]    // encoding: [0x59,0xcb,0xbb,0x5f]
+// CHECK: sqdmulh s2, s6, v7.s[0]       // encoding: [0xc2,0xc0,0x87,0x5f]
+
+//-----------------------------------------------------------------------------
+// Signed saturating rounding doubling multiply returning
+// high half (scalar, by element)
+//-----------------------------------------------------------------------------
+    sqrdmulh h31, h30, v14.h[2]
+    sqrdmulh h1, h1, v1.h[4]
+    sqrdmulh h21, h22, v15.h[7]
+    sqrdmulh s5, s6, v7.s[2]
+    sqrdmulh s20, s26, v27.s[1]
+
+// CHECK: sqrdmulh h31, h30, v14.h[2]   // encoding: [0xdf,0xd3,0x6e,0x5f]
+// CHECK: sqrdmulh h1, h1, v1.h[4]      // encoding: [0x21,0xd8,0x41,0x5f]
+// CHECK: sqrdmulh h21, h22, v15.h[7]   // encoding: [0xd5,0xda,0x7f,0x5f]
+// CHECK: sqrdmulh s5, s6, v7.s[2]      // encoding: [0xc5,0xd8,0x87,0x5f]
+// CHECK: sqrdmulh s20, s26, v27.s[1]   // encoding: [0x54,0xd3,0xbb,0x5f]
+        
+
+        
+        
+
diff --git a/test/MC/AArch64/neon-scalar-compare.s b/test/MC/AArch64/neon-scalar-compare.s
new file mode 100644
index 0000000..55ade0e
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-compare.s
@@ -0,0 +1,90 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Scalar Compare Bitwise Equal
+//----------------------------------------------------------------------
+
+         cmeq d20, d21, d22
+
+// CHECK: cmeq d20, d21, d22   // encoding: [0xb4,0x8e,0xf6,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Compare Bitwise Equal To Zero
+//----------------------------------------------------------------------
+
+         cmeq d20, d21, #0x0
+
+// CHECK: cmeq d20, d21, #0x0   // encoding: [0xb4,0x9a,0xe0,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Compare Unsigned Higher Or Same
+//----------------------------------------------------------------------
+
+         cmhs d20, d21, d22
+
+// CHECK: cmhs d20, d21, d22   // encoding: [0xb4,0x3e,0xf6,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Compare Signed Greather Than Or Equal
+//----------------------------------------------------------------------
+
+         cmge d20, d21, d22
+
+// CHECK: cmge d20, d21, d22    // encoding: [0xb4,0x3e,0xf6,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Compare Signed Greather Than Or Equal To Zero
+//----------------------------------------------------------------------
+
+         cmge d20, d21, #0x0
+
+// CHECK: cmge d20, d21, #0x0   // encoding: [0xb4,0x8a,0xe0,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Compare Unsigned Higher
+//----------------------------------------------------------------------
+
+         cmhi d20, d21, d22
+
+// CHECK: cmhi d20, d21, d22   // encoding: [0xb4,0x36,0xf6,0x7e]
+//----------------------------------------------------------------------
+// Scalar Compare Signed Greater Than
+//----------------------------------------------------------------------
+
+         cmgt d20, d21, d22
+
+// CHECK: cmgt d20, d21, d22   // encoding: [0xb4,0x36,0xf6,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Compare Signed Greater Than Zero
+//----------------------------------------------------------------------
+
+         cmgt d20, d21, #0x0
+
+// CHECK: cmgt d20, d21, #0x0   // encoding: [0xb4,0x8a,0xe0,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Compare Signed Less Than Or Equal To Zero
+//----------------------------------------------------------------------
+
+         cmle d20, d21, #0x0
+
+// CHECK: cmle d20, d21, #0x0   // encoding: [0xb4,0x9a,0xe0,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Compare Less Than Zero
+//----------------------------------------------------------------------
+
+         cmlt d20, d21, #0x0
+
+// CHECK: cmlt d20, d21, #0x0   // encoding: [0xb4,0xaa,0xe0,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Compare Bitwise Test Bits
+//----------------------------------------------------------------------
+
+         cmtst d20, d21, d22
+
+// CHECK: cmtst d20, d21, d22   // encoding: [0xb4,0x8e,0xf6,0x5e]
diff --git a/test/MC/AArch64/neon-scalar-cvt.s b/test/MC/AArch64/neon-scalar-cvt.s
new file mode 100644
index 0000000..97416da
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-cvt.s
@@ -0,0 +1,181 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Scalar Signed Integer Convert To Floating-point
+//----------------------------------------------------------------------
+
+    scvtf s22, s13
+    scvtf d21, d12
+
+// CHECK: scvtf s22, s13    // encoding: [0xb6,0xd9,0x21,0x5e]
+// CHECK: scvtf d21, d12    // encoding: [0x95,0xd9,0x61,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Integer Convert To Floating-point
+//----------------------------------------------------------------------
+
+    ucvtf s22, s13
+    ucvtf d21, d14
+
+// CHECK: ucvtf s22, s13    // encoding: [0xb6,0xd9,0x21,0x7e]
+// CHECK: ucvtf d21, d14    // encoding: [0xd5,0xd9,0x61,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
+//----------------------------------------------------------------------
+
+    scvtf s22, s13, #32
+    scvtf d21, d12, #64
+
+// CHECK: scvtf s22, s13, #32  // encoding: [0xb6,0xe5,0x20,0x5f]
+// CHECK: scvtf d21, d12, #64  // encoding: [0x95,0xe5,0x40,0x5f]    
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
+//----------------------------------------------------------------------
+
+    ucvtf s22, s13, #32
+    ucvtf d21, d14, #64
+
+// CHECK: ucvtf s22, s13, #32  // encoding: [0xb6,0xe5,0x20,0x7f]
+// CHECK: ucvtf d21, d14, #64  // encoding: [0xd5,0xe5,0x40,0x7f]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Fixed-point (Immediate)
+//----------------------------------------------------------------------
+
+    fcvtzs s21, s12, #1
+    fcvtzs d21, d12, #1
+
+// CHECK: fcvtzs s21, s12, #1  // encoding: [0x95,0xfd,0x3f,0x5f]
+// CHECK: fcvtzs d21, d12, #1  // encoding: [0x95,0xfd,0x7f,0x5f]
+        
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
+//----------------------------------------------------------------------
+
+    fcvtzu s21, s12, #1
+    fcvtzu d21, d12, #1
+
+// CHECK: fcvtzu s21, s12, #1  // encoding: [0x95,0xfd,0x3f,0x7f]
+// CHECK: fcvtzu d21, d12, #1  // encoding: [0x95,0xfd,0x7f,0x7f]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Lower Precision Narrow, Rounding To
+// Odd
+//----------------------------------------------------------------------
+
+    fcvtxn s22, d13
+
+// CHECK: fcvtxn s22, d13    // encoding: [0xb6,0x69,0x61,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
+// With Ties To Away
+//----------------------------------------------------------------------
+
+    fcvtas s12, s13
+    fcvtas d21, d14
+
+// CHECK: fcvtas s12, s13    // encoding: [0xac,0xc9,0x21,0x5e]
+// CHECK: fcvtas d21, d14    // encoding: [0xd5,0xc9,0x61,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding To
+// Nearest With Ties To Away
+//----------------------------------------------------------------------
+
+    fcvtau s12, s13
+    fcvtau d21, d14
+
+// CHECK: fcvtau s12, s13    // encoding: [0xac,0xc9,0x21,0x7e]
+// CHECK: fcvtau d21, d14    // encoding: [0xd5,0xc9,0x61,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding Toward
+// Minus Infinity
+//----------------------------------------------------------------------
+
+    fcvtms s22, s13
+    fcvtms d21, d14
+
+// CHECK: fcvtms s22, s13    // encoding: [0xb6,0xb9,0x21,0x5e]
+// CHECK: fcvtms d21, d14    // encoding: [0xd5,0xb9,0x61,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
+// Minus Infinity
+//----------------------------------------------------------------------
+
+    fcvtmu s12, s13
+    fcvtmu d21, d14
+
+// CHECK: fcvtmu s12, s13    // encoding: [0xac,0xb9,0x21,0x7e]
+// CHECK: fcvtmu d21, d14    // encoding: [0xd5,0xb9,0x61,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
+// With Ties To Even
+//----------------------------------------------------------------------
+
+    fcvtns s22, s13
+    fcvtns d21, d14
+
+// CHECK: fcvtns s22, s13    // encoding: [0xb6,0xa9,0x21,0x5e]
+// CHECK: fcvtns d21, d14    // encoding: [0xd5,0xa9,0x61,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding To
+// Nearest With Ties To Even
+//----------------------------------------------------------------------
+
+    fcvtnu s12, s13
+    fcvtnu d21, d14
+
+// CHECK: fcvtnu s12, s13    // encoding: [0xac,0xa9,0x21,0x7e]
+// CHECK: fcvtnu d21, d14    // encoding: [0xd5,0xa9,0x61,0x7e]
+        
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding Toward
+// Positive Infinity
+//----------------------------------------------------------------------
+
+    fcvtps s22, s13
+    fcvtps d21, d14
+
+// CHECK: fcvtps s22, s13    // encoding: [0xb6,0xa9,0xa1,0x5e]
+// CHECK: fcvtps d21, d14    // encoding: [0xd5,0xa9,0xe1,0x5e]
+        
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
+// Positive Infinity
+//----------------------------------------------------------------------
+
+    fcvtpu s12, s13
+    fcvtpu d21, d14
+
+// CHECK: fcvtpu s12, s13    // encoding: [0xac,0xa9,0xa1,0x7e]
+// CHECK: fcvtpu d21, d14    // encoding: [0xd5,0xa9,0xe1,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding Toward Zero
+//----------------------------------------------------------------------
+
+    fcvtzs s12, s13
+    fcvtzs d21, d14
+
+// CHECK: fcvtzs s12, s13    // encoding: [0xac,0xb9,0xa1,0x5e]
+// CHECK: fcvtzs d21, d14    // encoding: [0xd5,0xb9,0xe1,0x5e]
+        
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward 
+// Zero
+//----------------------------------------------------------------------
+
+    fcvtzu s12, s13
+    fcvtzu d21, d14
+
+// CHECK: fcvtzu s12, s13    // encoding: [0xac,0xb9,0xa1,0x7e]
+// CHECK: fcvtzu d21, d14    // encoding: [0xd5,0xb9,0xe1,0x7e]
diff --git a/test/MC/AArch64/neon-scalar-dup.s b/test/MC/AArch64/neon-scalar-dup.s
new file mode 100644
index 0000000..77c638d
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-dup.s
@@ -0,0 +1,55 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Duplicate element (scalar)
+//------------------------------------------------------------------------------
+         dup b0, v0.b[15]
+         dup b1, v0.b[7]
+         dup b17, v0.b[0]
+         dup h5, v31.h[7]
+         dup h9, v1.h[4]
+         dup h11, v17.h[0]
+         dup s2, v2.s[3]
+         dup s4, v21.s[0]
+         dup s31, v21.s[2]
+         dup d3, v5.d[0]
+         dup d6, v5.d[1]
+
+// CHECK: dup b0, v0.b[15]      // encoding: [0x00,0x04,0x1f,0x5e]
+// CHECK: dup b1, v0.b[7]       // encoding: [0x01,0x04,0x0f,0x5e]
+// CHECK: dup b17, v0.b[0]      // encoding: [0x11,0x04,0x01,0x5e]
+// CHECK: dup h5, v31.h[7]      // encoding: [0xe5,0x07,0x1e,0x5e]
+// CHECK: dup h9, v1.h[4]       // encoding: [0x29,0x04,0x12,0x5e]
+// CHECK: dup h11, v17.h[0]     // encoding: [0x2b,0x06,0x02,0x5e]
+// CHECK: dup s2, v2.s[3]       // encoding: [0x42,0x04,0x1c,0x5e]
+// CHECK: dup s4, v21.s[0]      // encoding: [0xa4,0x06,0x04,0x5e]
+// CHECK: dup s31, v21.s[2]     // encoding: [0xbf,0x06,0x14,0x5e]
+// CHECK: dup d3, v5.d[0]       // encoding: [0xa3,0x04,0x08,0x5e]
+// CHECK: dup d6, v5.d[1]       // encoding: [0xa6,0x04,0x18,0x5e]
+
+//------------------------------------------------------------------------------
+// Aliases for Duplicate element (scalar)
+//------------------------------------------------------------------------------
+         mov b0, v0.b[15]
+         mov b1, v0.b[7]
+         mov b17, v0.b[0]
+         mov h5, v31.h[7]
+         mov h9, v1.h[4]
+         mov h11, v17.h[0]
+         mov s2, v2.s[3]
+         mov s4, v21.s[0]
+         mov s31, v21.s[2]
+         mov d3, v5.d[0]
+         mov d6, v5.d[1]
+
+// CHECK: dup b0, v0.b[15]      // encoding: [0x00,0x04,0x1f,0x5e]
+// CHECK: dup b1, v0.b[7]       // encoding: [0x01,0x04,0x0f,0x5e]
+// CHECK: dup b17, v0.b[0]      // encoding: [0x11,0x04,0x01,0x5e]
+// CHECK: dup h5, v31.h[7]      // encoding: [0xe5,0x07,0x1e,0x5e]
+// CHECK: dup h9, v1.h[4]       // encoding: [0x29,0x04,0x12,0x5e]
+// CHECK: dup h11, v17.h[0]     // encoding: [0x2b,0x06,0x02,0x5e]
+// CHECK: dup s2, v2.s[3]       // encoding: [0x42,0x04,0x1c,0x5e]
+// CHECK: dup s4, v21.s[0]      // encoding: [0xa4,0x06,0x04,0x5e]
+// CHECK: dup s31, v21.s[2]     // encoding: [0xbf,0x06,0x14,0x5e]
+// CHECK: dup d3, v5.d[0]       // encoding: [0xa3,0x04,0x08,0x5e]
+// CHECK: dup d6, v5.d[1]       // encoding: [0xa6,0x04,0x18,0x5e]
diff --git a/test/MC/AArch64/neon-scalar-extract-narrow.s b/test/MC/AArch64/neon-scalar-extract-narrow.s
new file mode 100644
index 0000000..e25224e
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-extract-narrow.s
@@ -0,0 +1,40 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Scalar Signed Saturating Extract Unsigned Narrow
+//----------------------------------------------------------------------
+
+    sqxtun b19, h14
+    sqxtun h21, s15
+    sqxtun s20, d12
+
+// CHECK: sqxtun b19, h14  // encoding: [0xd3,0x29,0x21,0x7e]
+// CHECK: sqxtun h21, s15  // encoding: [0xf5,0x29,0x61,0x7e]
+// CHECK: sqxtun s20, d12  // encoding: [0x94,0x29,0xa1,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Signed Saturating Extract Signed Narrow
+//----------------------------------------------------------------------
+
+    sqxtn b18, h18
+    sqxtn h20, s17
+    sqxtn s19, d14
+
+// CHECK: sqxtn b18, h18  // encoding: [0x52,0x4a,0x21,0x5e]
+// CHECK: sqxtn h20, s17  // encoding: [0x34,0x4a,0x61,0x5e]
+// CHECK: sqxtn s19, d14  // encoding: [0xd3,0x49,0xa1,0x5e]
+
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Saturating Extract Narrow
+//----------------------------------------------------------------------
+
+    uqxtn b18, h18
+    uqxtn h20, s17
+    uqxtn s19, d14
+
+// CHECK: uqxtn b18, h18  // encoding: [0x52,0x4a,0x21,0x7e]
+// CHECK: uqxtn h20, s17  // encoding: [0x34,0x4a,0x61,0x7e]
+// CHECK: uqxtn s19, d14  // encoding: [0xd3,0x49,0xa1,0x7e]
diff --git a/test/MC/AArch64/neon-scalar-fp-compare.s b/test/MC/AArch64/neon-scalar-fp-compare.s
new file mode 100644
index 0000000..a59ec0d
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-fp-compare.s
@@ -0,0 +1,103 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Equal
+//----------------------------------------------------------------------
+
+         fcmeq s10, s11, s12
+         fcmeq d20, d21, d22
+
+// CHECK: fcmeq s10, s11, s12   // encoding: [0x6a,0xe5,0x2c,0x5e]
+// CHECK: fcmeq d20, d21, d22   // encoding: [0xb4,0xe6,0x76,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Equal To Zero
+//----------------------------------------------------------------------
+
+         fcmeq s10, s11, #0.0
+         fcmeq d20, d21, #0.0
+
+// CHECK: fcmeq s10, s11, #0.0   // encoding: [0x6a,0xd9,0xa0,0x5e]
+// CHECK: fcmeq d20, d21, #0.0   // encoding: [0xb4,0xda,0xe0,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Greater Than Or Equal
+//----------------------------------------------------------------------
+
+         fcmge s10, s11, s12
+         fcmge d20, d21, d22
+
+// CHECK: fcmge s10, s11, s12   // encoding: [0x6a,0xe5,0x2c,0x7e]
+// CHECK: fcmge d20, d21, d22   // encoding: [0xb4,0xe6,0x76,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
+//----------------------------------------------------------------------
+
+         fcmge s10, s11, #0.0
+         fcmge d20, d21, #0.0
+
+// CHECK: fcmge s10, s11, #0.0   // encoding: [0x6a,0xc9,0xa0,0x7e]
+// CHECK: fcmge d20, d21, #0.0   // encoding: [0xb4,0xca,0xe0,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Greather Than
+//----------------------------------------------------------------------
+
+         fcmgt s10, s11, s12
+         fcmgt d20, d21, d22
+
+// CHECK: fcmgt s10, s11, s12   // encoding: [0x6a,0xe5,0xac,0x7e]
+// CHECK: fcmgt d20, d21, d22   // encoding: [0xb4,0xe6,0xf6,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Greather Than Zero
+//----------------------------------------------------------------------
+
+         fcmgt s10, s11, #0.0
+         fcmgt d20, d21, #0.0
+
+// CHECK: fcmgt s10, s11, #0.0   // encoding: [0x6a,0xc9,0xa0,0x5e]
+// CHECK: fcmgt d20, d21, #0.0   // encoding: [0xb4,0xca,0xe0,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Less Than Or Equal To Zero
+//----------------------------------------------------------------------
+
+         fcmle s10, s11, #0.0
+         fcmle d20, d21, #0.0
+
+// CHECK: fcmle s10, s11, #0.0   // encoding: [0x6a,0xd9,0xa0,0x7e]
+// CHECK: fcmle d20, d21, #0.0   // encoding: [0xb4,0xda,0xe0,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Less Than
+//----------------------------------------------------------------------
+
+         fcmlt s10, s11, #0.0
+         fcmlt d20, d21, #0.0
+
+// CHECK: fcmlt s10, s11, #0.0   // encoding: [0x6a,0xe9,0xa0,0x5e]
+// CHECK: fcmlt d20, d21, #0.0   // encoding: [0xb4,0xea,0xe0,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
+//----------------------------------------------------------------------
+
+         facge s10, s11, s12
+         facge d20, d21, d22
+
+// CHECK: facge s10, s11, s12    // encoding: [0x6a,0xed,0x2c,0x7e]
+// CHECK: facge d20, d21, d22    // encoding: [0xb4,0xee,0x76,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Absolute Compare Mask Greater Than
+//----------------------------------------------------------------------
+
+         facgt s10, s11, s12
+         facgt d20, d21, d22
+
+// CHECK: facgt s10, s11, s12   // encoding: [0x6a,0xed,0xac,0x7e]
+// CHECK: facgt d20, d21, d22   // encoding: [0xb4,0xee,0xf6,0x7e]
diff --git a/test/MC/AArch64/neon-scalar-mul.s b/test/MC/AArch64/neon-scalar-mul.s
new file mode 100644
index 0000000..e33bdad
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-mul.s
@@ -0,0 +1,63 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Scalar Integer Saturating Doubling Multiply Half High
+//----------------------------------------------------------------------
+
+    sqdmulh h10, h11, h12
+    sqdmulh s20, s21, s2
+        
+// CHECK: sqdmulh h10, h11, h12     // encoding: [0x6a,0xb5,0x6c,0x5e]
+// CHECK: sqdmulh s20, s21, s2      // encoding: [0xb4,0xb6,0xa2,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Integer Saturating Rounding Doubling Multiply Half High
+//----------------------------------------------------------------------
+
+    sqrdmulh h10, h11, h12
+    sqrdmulh s20, s21, s2
+        
+// CHECK: sqrdmulh h10, h11, h12     // encoding: [0x6a,0xb5,0x6c,0x7e]
+// CHECK: sqrdmulh s20, s21, s2      // encoding: [0xb4,0xb6,0xa2,0x7e]
+
+//----------------------------------------------------------------------
+// Floating-point Multiply Extended
+//----------------------------------------------------------------------
+
+    fmulx s20, s22, s15
+    fmulx d23, d11, d1
+
+// CHECK: fmulx s20, s22, s15   // encoding: [0xd4,0xde,0x2f,0x5e]
+// CHECK: fmulx d23, d11, d1    // encoding: [0x77,0xdd,0x61,0x5e]
+
+//----------------------------------------------------------------------
+// Signed Saturating Doubling Multiply-Add Long
+//----------------------------------------------------------------------
+
+    sqdmlal s17, h27, h12
+    sqdmlal d19, s24, s12
+
+// CHECK: sqdmlal s17, h27, h12  // encoding: [0x71,0x93,0x6c,0x5e]
+// CHECK: sqdmlal d19, s24, s12  // encoding: [0x13,0x93,0xac,0x5e]
+
+//----------------------------------------------------------------------
+// Signed Saturating Doubling Multiply-Subtract Long
+//----------------------------------------------------------------------
+
+    sqdmlsl s14, h12, h25
+    sqdmlsl d12, s23, s13
+
+// CHECK: sqdmlsl s14, h12, h25  // encoding: [0x8e,0xb1,0x79,0x5e]
+// CHECK: sqdmlsl d12, s23, s13  // encoding: [0xec,0xb2,0xad,0x5e]
+
+//----------------------------------------------------------------------
+// Signed Saturating Doubling Multiply Long
+//----------------------------------------------------------------------
+
+    sqdmull s12, h22, h12
+    sqdmull d15, s22, s12
+
+// CHECK: sqdmull s12, h22, h12  // encoding: [0xcc,0xd2,0x6c,0x5e]
+// CHECK: sqdmull d15, s22, s12  // encoding: [0xcf,0xd2,0xac,0x5e]
diff --git a/test/MC/AArch64/neon-scalar-neg.s b/test/MC/AArch64/neon-scalar-neg.s
new file mode 100644
index 0000000..8e5d61d
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-neg.s
@@ -0,0 +1,25 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Scalar Negate
+//----------------------------------------------------------------------
+
+    neg d29, d24
+
+// CHECK: neg d29, d24    // encoding: [0x1d,0xbb,0xe0,0x7e]
+        
+//----------------------------------------------------------------------
+// Scalar Signed Saturating Negate
+//----------------------------------------------------------------------
+
+    sqneg b19, b14
+    sqneg h21, h15
+    sqneg s20, s12
+    sqneg d18, d12
+
+// CHECK: sqneg b19, b14    // encoding: [0xd3,0x79,0x20,0x7e]
+// CHECK: sqneg h21, h15    // encoding: [0xf5,0x79,0x60,0x7e]
+// CHECK: sqneg s20, s12    // encoding: [0x94,0x79,0xa0,0x7e]
+// CHECK: sqneg d18, d12    // encoding: [0x92,0x79,0xe0,0x7e]
diff --git a/test/MC/AArch64/neon-scalar-recip.s b/test/MC/AArch64/neon-scalar-recip.s
new file mode 100644
index 0000000..7a886f3
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-recip.s
@@ -0,0 +1,53 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Floating-point Reciprocal Step
+//----------------------------------------------------------------------
+
+    frecps s21, s16, s13
+    frecps d22, d30, d21
+
+// CHECK: frecps s21, s16, s13   // encoding: [0x15,0xfe,0x2d,0x5e]
+// CHECK: frecps d22, d30, d21   // encoding: [0xd6,0xff,0x75,0x5e]
+
+//----------------------------------------------------------------------
+// Floating-point Reciprocal Square Root Step
+//----------------------------------------------------------------------
+
+    frsqrts s21, s5, s12
+    frsqrts d8, d22, d18
+
+// CHECK: frsqrts s21, s5, s12   // encoding: [0xb5,0xfc,0xac,0x5e]
+// CHECK: frsqrts d8, d22, d18   // encoding: [0xc8,0xfe,0xf2,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Reciprocal Estimate
+//----------------------------------------------------------------------
+
+    frecpe s19, s14
+    frecpe d13, d13
+
+// CHECK: frecpe s19, s14    // encoding: [0xd3,0xd9,0xa1,0x5e]
+// CHECK: frecpe d13, d13    // encoding: [0xad,0xd9,0xe1,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Reciprocal Exponent
+//----------------------------------------------------------------------
+
+    frecpx s18, s10
+    frecpx d16, d19
+
+// CHECK: frecpx s18, s10    // encoding: [0x52,0xf9,0xa1,0x5e]
+// CHECK: frecpx d16, d19    // encoding: [0x70,0xfa,0xe1,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Reciprocal Square Root Estimate
+//----------------------------------------------------------------------
+
+    frsqrte s22, s13
+    frsqrte d21, d12
+
+// CHECK: frsqrte s22, s13    // encoding: [0xb6,0xd9,0xa1,0x7e]
+// CHECK: frsqrte d21, d12    // encoding: [0x95,0xd9,0xe1,0x7e]
diff --git a/test/MC/AArch64/neon-scalar-reduce-pairwise.s b/test/MC/AArch64/neon-scalar-reduce-pairwise.s
new file mode 100644
index 0000000..403a940
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-reduce-pairwise.s
@@ -0,0 +1,16 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//----------------------------------------------------------------------
+// Scalar Reduce Add Pairwise (Integer)
+//----------------------------------------------------------------------
+      addp d0, v1.2d
+
+// CHECK: addp d0, v1.2d     // encoding: [0x20,0xb8,0xf1,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Reduce Add Pairwise (Floating Point)
+//----------------------------------------------------------------------
+      faddp d20, v1.2d
+
+// CHECK: faddp d20, v1.2d     // encoding: [0x34,0xd8,0x70,0x7e]
+
diff --git a/test/MC/AArch64/neon-scalar-rounding-shift.s b/test/MC/AArch64/neon-scalar-rounding-shift.s
new file mode 100644
index 0000000..6113e09
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-rounding-shift.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+
+//------------------------------------------------------------------------------
+// Scalar Integer Rounding Shift Lef (Signed)
+//------------------------------------------------------------------------------
+         srshl d17, d31, d8
+
+// CHECK: srshl d17, d31, d8      // encoding: [0xf1,0x57,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Rounding Shift Lef (Unsigned)
+//------------------------------------------------------------------------------
+         urshl d17, d31, d8
+
+// CHECK: urshl d17, d31, d8      // encoding: [0xf1,0x57,0xe8,0x7e]
+
diff --git a/test/MC/AArch64/neon-scalar-saturating-add-sub.s b/test/MC/AArch64/neon-scalar-saturating-add-sub.s
new file mode 100644
index 0000000..0bf2434
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-saturating-add-sub.s
@@ -0,0 +1,81 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Add (Signed)
+//------------------------------------------------------------------------------
+         sqadd b0, b1, b2
+         sqadd h10, h11, h12
+         sqadd s20, s21, s2
+         sqadd d17, d31, d8
+
+// CHECK: sqadd b0, b1, b2        // encoding: [0x20,0x0c,0x22,0x5e]
+// CHECK: sqadd h10, h11, h12     // encoding: [0x6a,0x0d,0x6c,0x5e]
+// CHECK: sqadd s20, s21, s2      // encoding: [0xb4,0x0e,0xa2,0x5e]
+// CHECK: sqadd d17, d31, d8      // encoding: [0xf1,0x0f,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Add (Unsigned)
+//------------------------------------------------------------------------------
+         uqadd b0, b1, b2
+         uqadd h10, h11, h12
+         uqadd s20, s21, s2
+         uqadd d17, d31, d8
+
+// CHECK: uqadd b0, b1, b2        // encoding: [0x20,0x0c,0x22,0x7e]
+// CHECK: uqadd h10, h11, h12     // encoding: [0x6a,0x0d,0x6c,0x7e]
+// CHECK: uqadd s20, s21, s2      // encoding: [0xb4,0x0e,0xa2,0x7e]
+// CHECK: uqadd d17, d31, d8      // encoding: [0xf1,0x0f,0xe8,0x7e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Sub (Signed)
+//------------------------------------------------------------------------------
+         sqsub b0, b1, b2
+         sqsub h10, h11, h12
+         sqsub s20, s21, s2
+         sqsub d17, d31, d8
+
+// CHECK: sqsub b0, b1, b2        // encoding: [0x20,0x2c,0x22,0x5e]
+// CHECK: sqsub h10, h11, h12     // encoding: [0x6a,0x2d,0x6c,0x5e]
+// CHECK: sqsub s20, s21, s2      // encoding: [0xb4,0x2e,0xa2,0x5e]
+// CHECK: sqsub d17, d31, d8      // encoding: [0xf1,0x2f,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Sub (Unsigned)
+//------------------------------------------------------------------------------
+         uqsub b0, b1, b2
+         uqsub h10, h11, h12
+         uqsub s20, s21, s2
+         uqsub d17, d31, d8
+
+// CHECK: uqsub b0, b1, b2        // encoding: [0x20,0x2c,0x22,0x7e]
+// CHECK: uqsub h10, h11, h12     // encoding: [0x6a,0x2d,0x6c,0x7e]
+// CHECK: uqsub s20, s21, s2      // encoding: [0xb4,0x2e,0xa2,0x7e]
+// CHECK: uqsub d17, d31, d8      // encoding: [0xf1,0x2f,0xe8,0x7e]
+
+//----------------------------------------------------------------------
+// Signed Saturating Accumulated of Unsigned Value
+//----------------------------------------------------------------------
+
+    suqadd b19, b14
+    suqadd h20, h15
+    suqadd s21, s12
+    suqadd d18, d22
+
+// CHECK: suqadd b19, b14    // encoding: [0xd3,0x39,0x20,0x5e]
+// CHECK: suqadd h20, h15    // encoding: [0xf4,0x39,0x60,0x5e]
+// CHECK: suqadd s21, s12    // encoding: [0x95,0x39,0xa0,0x5e]
+// CHECK: suqadd d18, d22    // encoding: [0xd2,0x3a,0xe0,0x5e]
+
+//----------------------------------------------------------------------
+// Unsigned Saturating Accumulated of Signed Value
+//----------------------------------------------------------------------
+
+    usqadd b19, b14
+    usqadd h20, h15
+    usqadd s21, s12
+    usqadd d18, d22
+
+// CHECK: usqadd b19, b14    // encoding: [0xd3,0x39,0x20,0x7e]
+// CHECK: usqadd h20, h15    // encoding: [0xf4,0x39,0x60,0x7e]
+// CHECK: usqadd s21, s12    // encoding: [0x95,0x39,0xa0,0x7e]
+// CHECK: usqadd d18, d22    // encoding: [0xd2,0x3a,0xe0,0x7e]
diff --git a/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s b/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s
new file mode 100644
index 0000000..b09a589
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s
@@ -0,0 +1,28 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Rounding Shift Lef (Signed)
+//------------------------------------------------------------------------------
+         sqrshl b0, b1, b2
+         sqrshl h10, h11, h12
+         sqrshl s20, s21, s2
+         sqrshl d17, d31, d8
+
+// CHECK: sqrshl b0, b1, b2        // encoding: [0x20,0x5c,0x22,0x5e]
+// CHECK: sqrshl h10, h11, h12     // encoding: [0x6a,0x5d,0x6c,0x5e]
+// CHECK: sqrshl s20, s21, s2      // encoding: [0xb4,0x5e,0xa2,0x5e]
+// CHECK: sqrshl d17, d31, d8      // encoding: [0xf1,0x5f,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Rounding Shift Lef (Unsigned)
+//------------------------------------------------------------------------------
+         uqrshl b0, b1, b2
+         uqrshl h10, h11, h12
+         uqrshl s20, s21, s2
+         uqrshl d17, d31, d8
+
+// CHECK: uqrshl b0, b1, b2        // encoding: [0x20,0x5c,0x22,0x7e]
+// CHECK: uqrshl h10, h11, h12     // encoding: [0x6a,0x5d,0x6c,0x7e]
+// CHECK: uqrshl s20, s21, s2      // encoding: [0xb4,0x5e,0xa2,0x7e]
+// CHECK: uqrshl d17, d31, d8      // encoding: [0xf1,0x5f,0xe8,0x7e]
+
diff --git a/test/MC/AArch64/neon-scalar-saturating-shift.s b/test/MC/AArch64/neon-scalar-saturating-shift.s
new file mode 100644
index 0000000..b53c9f0
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-saturating-shift.s
@@ -0,0 +1,29 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Shift Lef (Signed)
+//------------------------------------------------------------------------------
+         sqshl b0, b1, b2
+         sqshl h10, h11, h12
+         sqshl s20, s21, s2
+         sqshl d17, d31, d8
+
+// CHECK: sqshl b0, b1, b2        // encoding: [0x20,0x4c,0x22,0x5e]
+// CHECK: sqshl h10, h11, h12     // encoding: [0x6a,0x4d,0x6c,0x5e]
+// CHECK: sqshl s20, s21, s2      // encoding: [0xb4,0x4e,0xa2,0x5e]
+// CHECK: sqshl d17, d31, d8      // encoding: [0xf1,0x4f,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Shift Lef (Unsigned)
+//------------------------------------------------------------------------------
+         uqshl b0, b1, b2
+         uqshl h10, h11, h12
+         uqshl s20, s21, s2
+         uqshl d17, d31, d8
+
+// CHECK: uqshl b0, b1, b2        // encoding: [0x20,0x4c,0x22,0x7e]
+// CHECK: uqshl h10, h11, h12     // encoding: [0x6a,0x4d,0x6c,0x7e]
+// CHECK: uqshl s20, s21, s2      // encoding: [0xb4,0x4e,0xa2,0x7e]
+// CHECK: uqshl d17, d31, d8      // encoding: [0xf1,0x4f,0xe8,0x7e]
+
+
diff --git a/test/MC/AArch64/neon-scalar-shift-imm.s b/test/MC/AArch64/neon-scalar-shift-imm.s
new file mode 100644
index 0000000..96cb815
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-shift-imm.s
@@ -0,0 +1,186 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Scalar Signed Shift Right (Immediate)
+//----------------------------------------------------------------------
+        sshr d15, d16, #12
+
+// CHECK: sshr d15, d16, #12  // encoding: [0x0f,0x06,0x74,0x5f]
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Shift Right (Immediate)
+//----------------------------------------------------------------------
+        ushr d10, d17, #18
+
+// CHECK: ushr d10, d17, #18  // encoding: [0x2a,0x06,0x6e,0x7f]
+
+//----------------------------------------------------------------------
+// Scalar Signed Rounding Shift Right (Immediate)
+//----------------------------------------------------------------------
+        srshr d19, d18, #7
+
+// CHECK: srshr d19, d18, #7  // encoding: [0x53,0x26,0x79,0x5f]
+
+//----------------------------------------------------------------------
+// Scalar Unigned Rounding Shift Right (Immediate)
+//----------------------------------------------------------------------
+        urshr d20, d23, #31
+
+// CHECK: urshr d20, d23, #31  // encoding: [0xf4,0x26,0x61,0x7f]
+
+//----------------------------------------------------------------------
+// Scalar Signed Shift Right and Accumulate (Immediate)
+//----------------------------------------------------------------------
+        ssra d18, d12, #21
+
+// CHECK: ssra d18, d12, #21  // encoding: [0x92,0x15,0x6b,0x5f]
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Shift Right and Accumulate (Immediate)
+//----------------------------------------------------------------------
+        usra d20, d13, #61
+
+// CHECK: usra d20, d13, #61  // encoding: [0xb4,0x15,0x43,0x7f]
+
+//----------------------------------------------------------------------
+// Scalar Signed Rounding Shift Right and Accumulate (Immediate)
+//----------------------------------------------------------------------
+        srsra d15, d11, #19
+
+// CHECK: srsra d15, d11, #19  // encoding: [0x6f,0x35,0x6d,0x5f]
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
+//----------------------------------------------------------------------
+        ursra d18, d10, #13
+
+// CHECK: ursra d18, d10, #13  // encoding: [0x52,0x35,0x73,0x7f]
+
+//----------------------------------------------------------------------
+// Scalar Shift Left (Immediate)
+//----------------------------------------------------------------------
+        shl d7, d10, #12
+
+// CHECK: shl d7, d10, #12  // encoding: [0x47,0x55,0x4c,0x5f]
+
+//----------------------------------------------------------------------
+// Signed Saturating Shift Left (Immediate)
+//----------------------------------------------------------------------
+        sqshl b11, b19, #7
+        sqshl h13, h18, #11
+        sqshl s14, s17, #22
+        sqshl d15, d16, #51
+
+// CHECK: sqshl b11, b19, #7   // encoding: [0x6b,0x76,0x0f,0x5f]
+// CHECK: sqshl h13, h18, #11  // encoding: [0x4d,0x76,0x1b,0x5f]
+// CHECK: sqshl s14, s17, #22  // encoding: [0x2e,0x76,0x36,0x5f]
+// CHECK: sqshl d15, d16, #51  // encoding: [0x0f,0x76,0x73,0x5f]
+
+//----------------------------------------------------------------------
+// Unsigned Saturating Shift Left (Immediate)
+//----------------------------------------------------------------------
+        uqshl b18, b15, #6
+        uqshl h11, h18, #7
+        uqshl s14, s19, #18
+        uqshl d15, d12, #19
+
+// CHECK: uqshl b18, b15, #6   // encoding: [0xf2,0x75,0x0e,0x7f]
+// CHECK: uqshl h11, h18, #7   // encoding: [0x4b,0x76,0x17,0x7f]
+// CHECK: uqshl s14, s19, #18  // encoding: [0x6e,0x76,0x32,0x7f]
+// CHECK: uqshl d15, d12, #19  // encoding: [0x8f,0x75,0x53,0x7f]
+
+//----------------------------------------------------------------------
+// Signed Saturating Shift Left Unsigned (Immediate)
+//----------------------------------------------------------------------
+        sqshlu b15, b18, #6
+        sqshlu h19, h17, #6
+        sqshlu s16, s14, #25
+        sqshlu d11, d13, #32
+
+// CHECK: sqshlu  b15, b18, #6   // encoding: [0x4f,0x66,0x0e,0x7f]
+// CHECK: sqshlu  h19, h17, #6   // encoding: [0x33,0x66,0x16,0x7f]
+// CHECK: sqshlu  s16, s14, #25  // encoding: [0xd0,0x65,0x39,0x7f]
+// CHECK: sqshlu  d11, d13, #32  // encoding: [0xab,0x65,0x60,0x7f]
+
+//----------------------------------------------------------------------
+// Shift Right And Insert (Immediate)
+//----------------------------------------------------------------------
+        sri d10, d12, #14
+
+// CHECK: sri d10, d12, #14  // encoding: [0x8a,0x45,0x72,0x7f]
+
+//----------------------------------------------------------------------
+// Shift Left And Insert (Immediate)
+//----------------------------------------------------------------------
+        sli d10, d14, #12
+
+// CHECK: sli d10, d14, #12  // encoding: [0xca,0x55,0x4c,0x7f]
+
+//----------------------------------------------------------------------
+// Signed Saturating Shift Right Narrow (Immediate)
+//----------------------------------------------------------------------
+        sqshrn b10, h15, #5
+        sqshrn h17, s10, #4
+        sqshrn s18, d10, #31
+
+// CHECK: sqshrn  b10, h15, #5   // encoding: [0xea,0x95,0x0b,0x5f]
+// CHECK: sqshrn  h17, s10, #4   // encoding: [0x51,0x95,0x1c,0x5f]
+// CHECK: sqshrn  s18, d10, #31  // encoding: [0x52,0x95,0x21,0x5f]
+
+//----------------------------------------------------------------------
+// Unsigned Saturating Shift Right Narrow (Immediate)
+//----------------------------------------------------------------------
+        uqshrn b12, h10, #7
+        uqshrn h10, s14, #5
+        uqshrn s10, d12, #13
+
+// CHECK: uqshrn  b12, h10, #7   // encoding: [0x4c,0x95,0x09,0x7f]
+// CHECK: uqshrn  h10, s14, #5   // encoding: [0xca,0x95,0x1b,0x7f]
+// CHECK: uqshrn  s10, d12, #13  // encoding: [0x8a,0x95,0x33,0x7f]
+
+//----------------------------------------------------------------------
+// Signed Saturating Rounded Shift Right Narrow (Immediate)
+//----------------------------------------------------------------------
+        sqrshrn b10, h13, #2
+        sqrshrn h15, s10, #6
+        sqrshrn s15, d12, #9
+
+// CHECK: sqrshrn b10, h13, #2  // encoding: [0xaa,0x9d,0x0e,0x5f]
+// CHECK: sqrshrn h15, s10, #6  // encoding: [0x4f,0x9d,0x1a,0x5f]
+// CHECK: sqrshrn s15, d12, #9  // encoding: [0x8f,0x9d,0x37,0x5f]
+
+//----------------------------------------------------------------------
+// Unsigned Saturating Rounded Shift Right Narrow (Immediate)
+//----------------------------------------------------------------------
+        uqrshrn b10, h12, #5
+        uqrshrn h12, s10, #14
+        uqrshrn s10, d10, #25
+
+// CHECK: uqrshrn b10, h12, #5   // encoding: [0x8a,0x9d,0x0b,0x7f]
+// CHECK: uqrshrn h12, s10, #14  // encoding: [0x4c,0x9d,0x12,0x7f]
+// CHECK: uqrshrn s10, d10, #25  // encoding: [0x4a,0x9d,0x27,0x7f]
+
+//----------------------------------------------------------------------
+// Signed Saturating Shift Right Unsigned Narrow (Immediate)
+//----------------------------------------------------------------------
+        sqshrun b15, h10, #7
+        sqshrun h20, s14, #3
+        sqshrun s10, d15, #15
+
+// CHECK: sqshrun b15, h10, #7   // encoding: [0x4f,0x85,0x09,0x7f]
+// CHECK: sqshrun h20, s14, #3   // encoding: [0xd4,0x85,0x1d,0x7f]
+// CHECK: sqshrun s10, d15, #15  // encoding: [0xea,0x85,0x31,0x7f]
+
+//----------------------------------------------------------------------
+// Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
+//----------------------------------------------------------------------
+
+        sqrshrun b17, h10, #6
+        sqrshrun h10, s13, #15
+        sqrshrun s22, d16, #31
+
+// CHECK: sqrshrun b17, h10, #6   // encoding: [0x51,0x8d,0x0a,0x7f]
+// CHECK: sqrshrun h10, s13, #15  // encoding: [0xaa,0x8d,0x11,0x7f]
+// CHECK: sqrshrun s22, d16, #31  // encoding: [0x16,0x8e,0x21,0x7f]
diff --git a/test/MC/AArch64/neon-scalar-shift.s b/test/MC/AArch64/neon-scalar-shift.s
new file mode 100644
index 0000000..366840a
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-shift.s
@@ -0,0 +1,16 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Scalar Integer Shift Lef (Signed)
+//------------------------------------------------------------------------------
+         sshl d17, d31, d8
+
+// CHECK: sshl d17, d31, d8      // encoding: [0xf1,0x47,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Shift Lef (Unsigned)
+//------------------------------------------------------------------------------
+         ushl d17, d31, d8
+
+// CHECK: ushl d17, d31, d8      // encoding: [0xf1,0x47,0xe8,0x7e]
+
diff --git a/test/MC/AArch64/neon-shift-left-long.s b/test/MC/AArch64/neon-shift-left-long.s
new file mode 100644
index 0000000..9760458
--- /dev/null
+++ b/test/MC/AArch64/neon-shift-left-long.s
@@ -0,0 +1,37 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Integer shift left long (Signed)
+//------------------------------------------------------------------------------
+         sshll v0.8h, v1.8b, #3
+         sshll v0.4s, v1.4h, #3
+         sshll v0.2d, v1.2s, #3
+         sshll2 v0.8h, v1.16b, #3
+         sshll2 v0.4s, v1.8h, #3
+         sshll2 v0.2d, v1.4s, #3
+
+// CHECK: sshll v0.8h, v1.8b, #3         // encoding: [0x20,0xa4,0x0b,0x0f]
+// CHECK: sshll v0.4s, v1.4h, #3         // encoding: [0x20,0xa4,0x13,0x0f]
+// CHECK: sshll v0.2d, v1.2s, #3         // encoding: [0x20,0xa4,0x23,0x0f]
+// CHECK: sshll2 v0.8h, v1.16b, #3       // encoding: [0x20,0xa4,0x0b,0x4f]
+// CHECK: sshll2 v0.4s, v1.8h, #3        // encoding: [0x20,0xa4,0x13,0x4f]
+// CHECK: sshll2 v0.2d, v1.4s, #3        // encoding: [0x20,0xa4,0x23,0x4f]
+
+//------------------------------------------------------------------------------
+// Integer shift left long (Unsigned)
+//------------------------------------------------------------------------------
+         ushll v0.8h, v1.8b, #3
+         ushll v0.4s, v1.4h, #3
+         ushll v0.2d, v1.2s, #3
+         ushll2 v0.8h, v1.16b, #3
+         ushll2 v0.4s, v1.8h, #3
+         ushll2 v0.2d, v1.4s, #3
+
+// CHECK: ushll v0.8h, v1.8b, #3         // encoding: [0x20,0xa4,0x0b,0x2f]
+// CHECK: ushll v0.4s, v1.4h, #3         // encoding: [0x20,0xa4,0x13,0x2f]
+// CHECK: ushll v0.2d, v1.2s, #3         // encoding: [0x20,0xa4,0x23,0x2f]
+// CHECK: ushll2 v0.8h, v1.16b, #3       // encoding: [0x20,0xa4,0x0b,0x6f]
+// CHECK: ushll2 v0.4s, v1.8h, #3        // encoding: [0x20,0xa4,0x13,0x6f]
+// CHECK: ushll2 v0.2d, v1.4s, #3        // encoding: [0x20,0xa4,0x23,0x6f]
diff --git a/test/MC/AArch64/neon-shift.s b/test/MC/AArch64/neon-shift.s
index be1799e..614e6de 100644
--- a/test/MC/AArch64/neon-shift.s
+++ b/test/MC/AArch64/neon-shift.s
@@ -42,16 +42,20 @@
 // CHECK: ushl v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x44,0xe2,0x6e]
 
 //------------------------------------------------------------------------------
-// Scalar Integer Shift Lef (Signed)
-//------------------------------------------------------------------------------
-         sshl d17, d31, d8
-
-// CHECK: sshl d17, d31, d8      // encoding: [0xf1,0x47,0xe8,0x5e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Shift Lef (Unsigned)
-//------------------------------------------------------------------------------
-         ushl d17, d31, d8
-
-// CHECK: ushl d17, d31, d8      // encoding: [0xf1,0x47,0xe8,0x7e]
-
+// Vector Integer Shift Left by Immediate
+//------------------------------------------------------------------------------
+         shl v0.8b, v1.8b, #3
+         shl v0.4h, v1.4h, #3
+         shl v0.2s, v1.2s, #3
+         shl v0.16b, v1.16b, #3
+         shl v0.8h, v1.8h, #3
+         shl v0.4s, v1.4s, #3
+         shl v0.2d, v1.2d, #3
+
+// CHECK: shl v0.8b, v1.8b, #3        // encoding: [0x20,0x54,0x0b,0x0f]
+// CHECK: shl v0.4h, v1.4h, #3        // encoding: [0x20,0x54,0x13,0x0f]
+// CHECK: shl v0.2s, v1.2s, #3        // encoding: [0x20,0x54,0x23,0x0f]
+// CHECK: shl v0.16b, v1.16b, #3      // encoding: [0x20,0x54,0x0b,0x4f]
+// CHECK: shl v0.8h, v1.8h, #3        // encoding: [0x20,0x54,0x13,0x4f]
+// CHECK: shl v0.4s, v1.4s, #3        // encoding: [0x20,0x54,0x23,0x4f]
+// CHECK: shl v0.2d, v1.2d, #3        // encoding: [0x20,0x54,0x43,0x4f]
diff --git a/test/MC/AArch64/neon-simd-copy.s b/test/MC/AArch64/neon-simd-copy.s
new file mode 100644
index 0000000..f254d65
--- /dev/null
+++ b/test/MC/AArch64/neon-simd-copy.s
@@ -0,0 +1,135 @@
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+
+//------------------------------------------------------------------------------
+// Insert element (vector, from main)
+//------------------------------------------------------------------------------
+         ins v2.b[2], w1
+         ins v7.h[7], w14
+         ins v20.s[0], w30
+         ins v1.d[1], x7
+         
+         mov v2.b[2], w1
+         mov v7.h[7], w14
+         mov v20.s[0], w30
+         mov v1.d[1], x7
+
+// CHECK: ins	v2.b[2], w1           // encoding: [0x22,0x1c,0x05,0x4e]
+// CHECK: ins	v7.h[7], w14          // encoding: [0xc7,0x1d,0x1e,0x4e]
+// CHECK: ins	v20.s[0], w30         // encoding: [0xd4,0x1f,0x04,0x4e]
+// CHECK: ins	v1.d[1], x7           // encoding: [0xe1,0x1c,0x18,0x4e]
+
+// CHECK: ins v2.b[2], w1           // encoding: [0x22,0x1c,0x05,0x4e]
+// CHECK: ins v7.h[7], w14          // encoding: [0xc7,0x1d,0x1e,0x4e]
+// CHECK: ins v20.s[0], w30         // encoding: [0xd4,0x1f,0x04,0x4e]
+// CHECK: ins v1.d[1], x7           // encoding: [0xe1,0x1c,0x18,0x4e]
+
+
+//------------------------------------------------------------------------------
+// Signed integer move (main, from element)
+//------------------------------------------------------------------------------
+         smov w1, v0.b[15]
+         smov w14, v6.h[4]
+         smov x1, v0.b[15]
+         smov x14, v6.h[4]
+         smov x20, v9.s[2]
+
+// CHECK: smov	w1, v0.b[15]          // encoding: [0x01,0x2c,0x1f,0x0e]
+// CHECK: smov	w14, v6.h[4]          // encoding: [0xce,0x2c,0x12,0x0e]
+// CHECK: smov	x1, v0.b[15]          // encoding: [0x01,0x2c,0x1f,0x4e]
+// CHECK: smov	x14, v6.h[4]          // encoding: [0xce,0x2c,0x12,0x4e]
+// CHECK: smov	x20, v9.s[2]          // encoding: [0x34,0x2d,0x14,0x4e]
+
+
+//------------------------------------------------------------------------------
+// Unsigned integer move (main, from element)
+//------------------------------------------------------------------------------
+         umov w1, v0.b[15]
+         umov w14, v6.h[4]
+         umov w20, v9.s[2]
+         umov x7, v18.d[1]
+
+         mov w20, v9.s[2]
+         mov x7, v18.d[1]
+
+// CHECK: umov	w1, v0.b[15]          // encoding: [0x01,0x3c,0x1f,0x0e]
+// CHECK: umov	w14, v6.h[4]          // encoding: [0xce,0x3c,0x12,0x0e]
+// CHECK: umov	w20, v9.s[2]          // encoding: [0x34,0x3d,0x14,0x0e]
+// CHECK: umov	x7, v18.d[1]          // encoding: [0x47,0x3e,0x18,0x4e]
+
+// CHECK: umov  w20, v9.s[2]          // encoding: [0x34,0x3d,0x14,0x0e]
+// CHECK: umov  x7, v18.d[1]          // encoding: [0x47,0x3e,0x18,0x4e]
+
+//------------------------------------------------------------------------------
+// Insert element (vector, from element)
+//------------------------------------------------------------------------------
+
+         ins v1.b[14], v3.b[6]
+         ins v6.h[7], v7.h[5]
+         ins v15.s[3], v22.s[2]
+         ins v0.d[0], v4.d[1]
+
+         mov v1.b[14], v3.b[6]
+         mov v6.h[7], v7.h[5]
+         mov v15.s[3], v22.s[2]
+         mov v0.d[0], v4.d[1]
+
+// CHECK: ins	v1.b[14], v3.b[6]       // encoding: [0x61,0x34,0x1d,0x6e]
+// CHECK: ins	v6.h[7], v7.h[5]        // encoding: [0xe6,0x54,0x1e,0x6e]
+// CHECK: ins	v15.s[3], v22.s[2]      // encoding: [0xcf,0x46,0x1c,0x6e]
+// CHECK: ins	v0.d[0], v4.d[1]        // encoding: [0x80,0x44,0x08,0x6e]
+
+// CHECK: ins v1.b[14], v3.b[6]       // encoding: [0x61,0x34,0x1d,0x6e]
+// CHECK: ins v6.h[7], v7.h[5]        // encoding: [0xe6,0x54,0x1e,0x6e]
+// CHECK: ins v15.s[3], v22.s[2]      // encoding: [0xcf,0x46,0x1c,0x6e]
+// CHECK: ins v0.d[0], v4.d[1]        // encoding: [0x80,0x44,0x08,0x6e]
+
+//------------------------------------------------------------------------------
+// Duplicate to all lanes( vector, from element)
+//------------------------------------------------------------------------------
+         dup v1.8b, v2.b[2]
+         dup v11.4h, v7.h[7]
+         dup v17.2s, v20.s[0]
+         dup v1.16b, v2.b[2]
+         dup v11.8h, v7.h[7]
+         dup v17.4s, v20.s[0]
+         dup v5.2d, v1.d[1]         
+
+// CHECK: dup v1.8b, v2.b[2]        // encoding: [0x41,0x04,0x05,0x0e]
+// CHECK: dup v11.4h, v7.h[7]       // encoding: [0xeb,0x04,0x1e,0x0e]
+// CHECK: dup v17.2s, v20.s[0]      // encoding: [0x91,0x06,0x04,0x0e]
+// CHECK: dup v1.16b, v2.b[2]       // encoding: [0x41,0x04,0x05,0x4e]
+// CHECK: dup v11.8h, v7.h[7]       // encoding: [0xeb,0x04,0x1e,0x4e]
+// CHECK: dup v17.4s, v20.s[0]      // encoding: [0x91,0x06,0x04,0x4e]
+// CHECK: dup v5.2d, v1.d[1]        // encoding: [0x25,0x04,0x18,0x4e]
+
+//------------------------------------------------------------------------------
+// Duplicate to all lanes( vector, from main)
+//------------------------------------------------------------------------------
+         dup v1.8b, w1
+         dup v11.4h, w14
+         dup v17.2s, w30
+         dup v1.16b, w2
+         dup v11.8h, w16
+         dup v17.4s, w28
+         dup v5.2d, x0        
+
+// CHECK: dup	v1.8b, w1             // encoding: [0x21,0x0c,0x01,0x0e]
+// CHECK: dup	v11.4h, w14           // encoding: [0xcb,0x0d,0x02,0x0e]
+// CHECK: dup	v17.2s, w30           // encoding: [0xd1,0x0f,0x04,0x0e]
+// CHECK: dup	v1.16b, w2            // encoding: [0x41,0x0c,0x01,0x4e]
+// CHECK: dup	v11.8h, w16           // encoding: [0x0b,0x0e,0x02,0x4e]
+// CHECK: dup	v17.4s, w28           // encoding: [0x91,0x0f,0x04,0x4e]
+// CHECK: dup	v5.2d, x0             // encoding: [0x05,0x0c,0x08,0x4e]
+
+
+
+
+
+
+
+
+
+
diff --git a/test/MC/AArch64/neon-simd-ldst-multi-elem.s b/test/MC/AArch64/neon-simd-ldst-multi-elem.s
new file mode 100644
index 0000000..05fe4da
--- /dev/null
+++ b/test/MC/AArch64/neon-simd-ldst-multi-elem.s
@@ -0,0 +1,463 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Store multiple 1-element structures from one register
+//------------------------------------------------------------------------------
+         st1 {v0.16b}, [x0]
+         st1 {v15.8h}, [x15]
+         st1 {v31.4s}, [sp]
+         st1 {v0.2d}, [x0]
+         st1 {v0.8b}, [x0]
+         st1 {v15.4h}, [x15]
+         st1 {v31.2s}, [sp]
+         st1 {v0.1d}, [x0]
+// CHECK:	st1	{v0.16b}, [x0]          // encoding: [0x00,0x70,0x00,0x4c]
+// CHECK:	st1	{v15.8h}, [x15]         // encoding: [0xef,0x75,0x00,0x4c]
+// CHECK:	st1	{v31.4s}, [sp]          // encoding: [0xff,0x7b,0x00,0x4c]
+// CHECK:	st1	{v0.2d}, [x0]           // encoding: [0x00,0x7c,0x00,0x4c]
+// CHECK:	st1	{v0.8b}, [x0]           // encoding: [0x00,0x70,0x00,0x0c]
+// CHECK:	st1	{v15.4h}, [x15]         // encoding: [0xef,0x75,0x00,0x0c]
+// CHECK:	st1	{v31.2s}, [sp]          // encoding: [0xff,0x7b,0x00,0x0c]
+// CHECK:	st1	{v0.1d}, [x0]           // encoding: [0x00,0x7c,0x00,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 1-element structures from two consecutive registers
+//------------------------------------------------------------------------------
+         st1 {v0.16b, v1.16b}, [x0]
+         st1 {v15.8h, v16.8h}, [x15]
+         st1 {v31.4s, v0.4s}, [sp]
+         st1 {v0.2d, v1.2d}, [x0]
+         st1 {v0.8b, v1.8b}, [x0]
+         st1 {v15.4h, v16.4h}, [x15]
+         st1 {v31.2s, v0.2s}, [sp]
+         st1 {v0.1d, v1.1d}, [x0]
+// CHECK:	st1	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0xa0,0x00,0x4c]
+// CHECK:	st1	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0xa5,0x00,0x4c]
+// CHECK:	st1	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0xab,0x00,0x4c]
+// CHECK:	st1	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0xac,0x00,0x4c]
+// CHECK:	st1	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0xa0,0x00,0x0c]
+// CHECK:	st1	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0xa5,0x00,0x0c]
+// CHECK:	st1	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0xab,0x00,0x0c]
+// CHECK:	st1	{v0.1d, v1.1d}, [x0]    // encoding: [0x00,0xac,0x00,0x0c]
+
+         st1 {v0.16b-v1.16b}, [x0]
+         st1 {v15.8h-v16.8h}, [x15]
+         st1 {v31.4s-v0.4s}, [sp]
+         st1 {v0.2d-v1.2d}, [x0]
+         st1 {v0.8b-v1.8b}, [x0]
+         st1 {v15.4h-v16.4h}, [x15]
+         st1 {v31.2s-v0.2s}, [sp]
+         st1 {v0.1d-v1.1d}, [x0]
+// CHECK:	st1	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0xa0,0x00,0x4c]
+// CHECK:	st1	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0xa5,0x00,0x4c]
+// CHECK:	st1	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0xab,0x00,0x4c]
+// CHECK:	st1	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0xac,0x00,0x4c]
+// CHECK:	st1	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0xa0,0x00,0x0c]
+// CHECK:	st1	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0xa5,0x00,0x0c]
+// CHECK:	st1	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0xab,0x00,0x0c]
+// CHECK:	st1	{v0.1d, v1.1d}, [x0]    // encoding: [0x00,0xac,0x00,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 1-element structures from three consecutive registers
+//------------------------------------------------------------------------------
+         st1 {v0.16b, v1.16b, v2.16b}, [x0]
+         st1 {v15.8h, v16.8h, v17.8h}, [x15]
+         st1 {v31.4s, v0.4s, v1.4s}, [sp]
+         st1 {v0.2d, v1.2d, v2.2d}, [x0]
+         st1 {v0.8b, v1.8b, v2.8b}, [x0]
+         st1 {v15.4h, v16.4h, v17.4h}, [x15]
+         st1 {v31.2s, v0.2s, v1.2s}, [sp]
+         st1 {v0.1d, v1.1d, v2.1d}, [x0]
+// CHECK:	st1	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x60,0x00,0x4c]
+// CHECK:	st1	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x65,0x00,0x4c]
+// CHECK:	st1	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x6b,0x00,0x4c]
+// CHECK:	st1	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x6c,0x00,0x4c]
+// CHECK:	st1	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x60,0x00,0x0c]
+// CHECK:	st1	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x65,0x00,0x0c]
+// CHECK:	st1	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x6b,0x00,0x0c]
+// CHECK:	st1	{v0.1d, v1.1d, v2.1d}, [x0] // encoding: [0x00,0x6c,0x00,0x0c]
+
+         st1 {v0.16b-v2.16b}, [x0]
+         st1 {v15.8h-v17.8h}, [x15]
+         st1 {v31.4s-v1.4s}, [sp]
+         st1 {v0.2d-v2.2d}, [x0]
+         st1 {v0.8b-v2.8b}, [x0]
+         st1 {v15.4h-v17.4h}, [x15]
+         st1 {v31.2s-v1.2s}, [sp]
+         st1 {v0.1d-v2.1d}, [x0]
+// CHECK:	st1	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x60,0x00,0x4c]
+// CHECK:	st1	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x65,0x00,0x4c]
+// CHECK:	st1	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x6b,0x00,0x4c]
+// CHECK:	st1	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x6c,0x00,0x4c]
+// CHECK:	st1	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x60,0x00,0x0c]
+// CHECK:	st1	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x65,0x00,0x0c]
+// CHECK:	st1	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x6b,0x00,0x0c]
+// CHECK:	st1	{v0.1d, v1.1d, v2.1d}, [x0] // encoding: [0x00,0x6c,0x00,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 1-element structures from four consecutive registers
+//------------------------------------------------------------------------------
+         st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
+         st1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15]
+         st1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp]
+         st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
+         st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
+         st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15]
+         st1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
+         st1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0]
+// CHECK:	st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x20,0x00,0x4c]
+// CHECK:	st1	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x25,0x00,0x4c]
+// CHECK:	st1	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x2b,0x00,0x4c]
+// CHECK:	st1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x2c,0x00,0x4c]
+// CHECK:	st1	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x20,0x00,0x0c]
+// CHECK:	st1	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x25,0x00,0x0c]
+// CHECK:	st1	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x2b,0x00,0x0c]
+// CHECK:	st1	{v0.1d, v1.1d, v2.1d, v3.1d}, [x0] // encoding: [0x00,0x2c,0x00,0x0c]
+
+         st1 {v0.16b-v3.16b}, [x0]
+         st1 {v15.8h-v18.8h}, [x15]
+         st1 {v31.4s-v2.4s}, [sp]
+         st1 {v0.2d-v3.2d}, [x0]
+         st1 {v0.8b-v3.8b}, [x0]
+         st1 {v15.4h-v18.4h}, [x15]
+         st1 {v31.2s-v2.2s}, [sp]
+         st1 {v0.1d-v3.1d}, [x0]
+// CHECK:	st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x20,0x00,0x4c]
+// CHECK:	st1	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x25,0x00,0x4c]
+// CHECK:	st1	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x2b,0x00,0x4c]
+// CHECK:	st1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x2c,0x00,0x4c]
+// CHECK:	st1	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x20,0x00,0x0c]
+// CHECK:	st1	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x25,0x00,0x0c]
+// CHECK:	st1	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x2b,0x00,0x0c]
+// CHECK:	st1	{v0.1d, v1.1d, v2.1d, v3.1d}, [x0] // encoding: [0x00,0x2c,0x00,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 2-element structures from two consecutive registers
+//------------------------------------------------------------------------------
+         st2 {v0.16b, v1.16b}, [x0]
+         st2 {v15.8h, v16.8h}, [x15]
+         st2 {v31.4s, v0.4s}, [sp]
+         st2 {v0.2d, v1.2d}, [x0]
+         st2 {v0.8b, v1.8b}, [x0]
+         st2 {v15.4h, v16.4h}, [x15]
+         st2 {v31.2s, v0.2s}, [sp]
+// CHECK:	st2	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0x80,0x00,0x4c]
+// CHECK:	st2	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0x85,0x00,0x4c]
+// CHECK:	st2	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0x8b,0x00,0x4c]
+// CHECK:	st2	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0x8c,0x00,0x4c]
+// CHECK:	st2	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0x80,0x00,0x0c]
+// CHECK:	st2	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0x85,0x00,0x0c]
+// CHECK:	st2	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0x8b,0x00,0x0c]
+
+         st2 {v0.16b-v1.16b}, [x0]
+         st2 {v15.8h-v16.8h}, [x15]
+         st2 {v31.4s-v0.4s}, [sp]
+         st2 {v0.2d-v1.2d}, [x0]
+         st2 {v0.8b-v1.8b}, [x0]
+         st2 {v15.4h-v16.4h}, [x15]
+         st2 {v31.2s-v0.2s}, [sp]
+// CHECK:	st2	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0x80,0x00,0x4c]
+// CHECK:	st2	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0x85,0x00,0x4c]
+// CHECK:	st2	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0x8b,0x00,0x4c]
+// CHECK:	st2	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0x8c,0x00,0x4c]
+// CHECK:	st2	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0x80,0x00,0x0c]
+// CHECK:	st2	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0x85,0x00,0x0c]
+// CHECK:	st2	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0x8b,0x00,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 3-element structures from three consecutive registers
+//------------------------------------------------------------------------------
+         st3 {v0.16b, v1.16b, v2.16b}, [x0]
+         st3 {v15.8h, v16.8h, v17.8h}, [x15]
+         st3 {v31.4s, v0.4s, v1.4s}, [sp]
+         st3 {v0.2d, v1.2d, v2.2d}, [x0]
+         st3 {v0.8b, v1.8b, v2.8b}, [x0]
+         st3 {v15.4h, v16.4h, v17.4h}, [x15]
+         st3 {v31.2s, v0.2s, v1.2s}, [sp]
+// CHECK:	st3	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x40,0x00,0x4c]
+// CHECK:	st3	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x45,0x00,0x4c]
+// CHECK:	st3	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x4b,0x00,0x4c]
+// CHECK:	st3	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x4c,0x00,0x4c]
+// CHECK:	st3	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x40,0x00,0x0c]
+// CHECK:	st3	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x45,0x00,0x0c]
+// CHECK:	st3	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x4b,0x00,0x0c]
+
+         st3 {v0.16b-v2.16b}, [x0]
+         st3 {v15.8h-v17.8h}, [x15]
+         st3 {v31.4s-v1.4s}, [sp]
+         st3 {v0.2d-v2.2d}, [x0]
+         st3 {v0.8b-v2.8b}, [x0]
+         st3 {v15.4h-v17.4h}, [x15]
+         st3 {v31.2s-v1.2s}, [sp]
+// CHECK:	st3	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x40,0x00,0x4c]
+// CHECK:	st3	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x45,0x00,0x4c]
+// CHECK:	st3	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x4b,0x00,0x4c]
+// CHECK:	st3	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x4c,0x00,0x4c]
+// CHECK:	st3	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x40,0x00,0x0c]
+// CHECK:	st3	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x45,0x00,0x0c]
+// CHECK:	st3	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x4b,0x00,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 4-element structures from four consecutive registers
+//------------------------------------------------------------------------------
+         st4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
+         st4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15]
+         st4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp]
+         st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
+         st4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
+         st4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15]
+         st4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
+// CHECK:	st4	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x00,0x00,0x4c]
+// CHECK:	st4	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x05,0x00,0x4c]
+// CHECK:	st4	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x0b,0x00,0x4c]
+// CHECK:	st4	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x0c,0x00,0x4c]
+// CHECK:	st4	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x00,0x00,0x0c]
+// CHECK:	st4	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x05,0x00,0x0c]
+// CHECK:	st4	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x0b,0x00,0x0c]
+
+         st4 {v0.16b-v3.16b}, [x0]
+         st4 {v15.8h-v18.8h}, [x15]
+         st4 {v31.4s-v2.4s}, [sp]
+         st4 {v0.2d-v3.2d}, [x0]
+         st4 {v0.8b-v3.8b}, [x0]
+         st4 {v15.4h-v18.4h}, [x15]
+         st4 {v31.2s-v2.2s}, [sp]
+// CHECK:	st4	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x00,0x00,0x4c]
+// CHECK:	st4	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x05,0x00,0x4c]
+// CHECK:	st4	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x0b,0x00,0x4c]
+// CHECK:	st4	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x0c,0x00,0x4c]
+// CHECK:	st4	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x00,0x00,0x0c]
+// CHECK:	st4	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x05,0x00,0x0c]
+// CHECK:	st4	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x0b,0x00,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 1-element structures to one register
+//------------------------------------------------------------------------------
+         ld1 {v0.16b}, [x0]
+         ld1 {v15.8h}, [x15]
+         ld1 {v31.4s}, [sp]
+         ld1 {v0.2d}, [x0]
+         ld1 {v0.8b}, [x0]
+         ld1 {v15.4h}, [x15]
+         ld1 {v31.2s}, [sp]
+         ld1 {v0.1d}, [x0]
+// CHECK:	ld1	{v0.16b}, [x0]          // encoding: [0x00,0x70,0x40,0x4c]
+// CHECK:	ld1	{v15.8h}, [x15]         // encoding: [0xef,0x75,0x40,0x4c]
+// CHECK:	ld1	{v31.4s}, [sp]          // encoding: [0xff,0x7b,0x40,0x4c]
+// CHECK:	ld1	{v0.2d}, [x0]           // encoding: [0x00,0x7c,0x40,0x4c]
+// CHECK:	ld1	{v0.8b}, [x0]           // encoding: [0x00,0x70,0x40,0x0c]
+// CHECK:	ld1	{v15.4h}, [x15]         // encoding: [0xef,0x75,0x40,0x0c]
+// CHECK:	ld1	{v31.2s}, [sp]          // encoding: [0xff,0x7b,0x40,0x0c]
+// CHECK:	ld1	{v0.1d}, [x0]           // encoding: [0x00,0x7c,0x40,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 1-element structures to two consecutive registers
+//------------------------------------------------------------------------------
+         ld1 {v0.16b, v1.16b}, [x0]
+         ld1 {v15.8h, v16.8h}, [x15]
+         ld1 {v31.4s, v0.4s}, [sp]
+         ld1 {v0.2d, v1.2d}, [x0]
+         ld1 {v0.8b, v1.8b}, [x0]
+         ld1 {v15.4h, v16.4h}, [x15]
+         ld1 {v31.2s, v0.2s}, [sp]
+         ld1 {v0.1d, v1.1d}, [x0]
+// CHECK:	ld1	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0xa0,0x40,0x4c]
+// CHECK:	ld1	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0xa5,0x40,0x4c]
+// CHECK:	ld1	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0xab,0x40,0x4c]
+// CHECK:	ld1	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0xac,0x40,0x4c]
+// CHECK:	ld1	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0xa0,0x40,0x0c]
+// CHECK:	ld1	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0xa5,0x40,0x0c]
+// CHECK:	ld1	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0xab,0x40,0x0c]
+// CHECK:	ld1	{v0.1d, v1.1d}, [x0]    // encoding: [0x00,0xac,0x40,0x0c]
+
+         ld1 {v0.16b-v1.16b}, [x0]
+         ld1 {v15.8h-v16.8h}, [x15]
+         ld1 {v31.4s-v0.4s}, [sp]
+         ld1 {v0.2d-v1.2d}, [x0]
+         ld1 {v0.8b-v1.8b}, [x0]
+         ld1 {v15.4h-v16.4h}, [x15]
+         ld1 {v31.2s-v0.2s}, [sp]
+         ld1 {v0.1d-v1.1d}, [x0]
+// CHECK:	ld1	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0xa0,0x40,0x4c]
+// CHECK:	ld1	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0xa5,0x40,0x4c]
+// CHECK:	ld1	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0xab,0x40,0x4c]
+// CHECK:	ld1	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0xac,0x40,0x4c]
+// CHECK:	ld1	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0xa0,0x40,0x0c]
+// CHECK:	ld1	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0xa5,0x40,0x0c]
+// CHECK:	ld1	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0xab,0x40,0x0c]
+// CHECK:	ld1	{v0.1d, v1.1d}, [x0]    // encoding: [0x00,0xac,0x40,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 1-element structures to three consecutive registers
+//------------------------------------------------------------------------------
+         ld1 {v0.16b, v1.16b, v2.16b}, [x0]
+         ld1 {v15.8h, v16.8h, v17.8h}, [x15]
+         ld1 {v31.4s, v0.4s, v1.4s}, [sp]
+         ld1 {v0.2d, v1.2d, v2.2d}, [x0]
+         ld1 {v0.8b, v1.8b, v2.8b}, [x0]
+         ld1 {v15.4h, v16.4h, v17.4h}, [x15]
+         ld1 {v31.2s, v0.2s, v1.2s}, [sp]
+         ld1 {v0.1d, v1.1d, v2.1d}, [x0]
+// CHECK:	ld1	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x60,0x40,0x4c]
+// CHECK:	ld1	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x65,0x40,0x4c]
+// CHECK:	ld1	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x6b,0x40,0x4c]
+// CHECK:	ld1	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x6c,0x40,0x4c]
+// CHECK:	ld1	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x60,0x40,0x0c]
+// CHECK:	ld1	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x65,0x40,0x0c]
+// CHECK:	ld1	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x6b,0x40,0x0c]
+// CHECK:	ld1	{v0.1d, v1.1d, v2.1d}, [x0] // encoding: [0x00,0x6c,0x40,0x0c]
+
+         ld1 {v0.16b-v2.16b}, [x0]
+         ld1 {v15.8h-v17.8h}, [x15]
+         ld1 {v31.4s-v1.4s}, [sp]
+         ld1 {v0.2d-v2.2d}, [x0]
+         ld1 {v0.8b-v2.8b}, [x0]
+         ld1 {v15.4h-v17.4h}, [x15]
+         ld1 {v31.2s-v1.2s}, [sp]
+         ld1 {v0.1d-v2.1d}, [x0]
+// CHECK:	ld1	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x60,0x40,0x4c]
+// CHECK:	ld1	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x65,0x40,0x4c]
+// CHECK:	ld1	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x6b,0x40,0x4c]
+// CHECK:	ld1	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x6c,0x40,0x4c]
+// CHECK:	ld1	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x60,0x40,0x0c]
+// CHECK:	ld1	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x65,0x40,0x0c]
+// CHECK:	ld1	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x6b,0x40,0x0c]
+// CHECK:	ld1	{v0.1d, v1.1d, v2.1d}, [x0] // encoding: [0x00,0x6c,0x40,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 1-element structures to four consecutive registers
+//------------------------------------------------------------------------------
+         ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
+         ld1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15]
+         ld1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp]
+         ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
+         ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
+         ld1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15]
+         ld1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
+         ld1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0]
+// CHECK:	ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x20,0x40,0x4c]
+// CHECK:	ld1	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x25,0x40,0x4c]
+// CHECK:	ld1	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x2b,0x40,0x4c]
+// CHECK:	ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x2c,0x40,0x4c]
+// CHECK:	ld1	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x20,0x40,0x0c]
+// CHECK:	ld1	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x25,0x40,0x0c]
+// CHECK:	ld1	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x2b,0x40,0x0c]
+// CHECK:	ld1	{v0.1d, v1.1d, v2.1d, v3.1d}, [x0] // encoding: [0x00,0x2c,0x40,0x0c]
+
+         ld1 {v0.16b-v3.16b}, [x0]
+         ld1 {v15.8h-v18.8h}, [x15]
+         ld1 {v31.4s-v2.4s}, [sp]
+         ld1 {v0.2d-v3.2d}, [x0]
+         ld1 {v0.8b-v3.8b}, [x0]
+         ld1 {v15.4h-v18.4h}, [x15]
+         ld1 {v31.2s-v2.2s}, [sp]
+         ld1 {v0.1d-v3.1d}, [x0]
+// CHECK:	ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x20,0x40,0x4c]
+// CHECK:	ld1	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x25,0x40,0x4c]
+// CHECK:	ld1	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x2b,0x40,0x4c]
+// CHECK:	ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x2c,0x40,0x4c]
+// CHECK:	ld1	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x20,0x40,0x0c]
+// CHECK:	ld1	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x25,0x40,0x0c]
+// CHECK:	ld1	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x2b,0x40,0x0c]
+// CHECK:	ld1	{v0.1d, v1.1d, v2.1d, v3.1d}, [x0] // encoding: [0x00,0x2c,0x40,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 4-element structures to two consecutive registers
+//------------------------------------------------------------------------------
+         ld2 {v0.16b, v1.16b}, [x0]
+         ld2 {v15.8h, v16.8h}, [x15]
+         ld2 {v31.4s, v0.4s}, [sp]
+         ld2 {v0.2d, v1.2d}, [x0]
+         ld2 {v0.8b, v1.8b}, [x0]
+         ld2 {v15.4h, v16.4h}, [x15]
+         ld2 {v31.2s, v0.2s}, [sp]
+// CHECK:	ld2	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0x80,0x40,0x4c]
+// CHECK:	ld2	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0x85,0x40,0x4c]
+// CHECK:	ld2	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0x8b,0x40,0x4c]
+// CHECK:	ld2	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0x8c,0x40,0x4c]
+// CHECK:	ld2	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0x80,0x40,0x0c]
+// CHECK:	ld2	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0x85,0x40,0x0c]
+// CHECK:	ld2	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0x8b,0x40,0x0c]
+
+         ld2 {v0.16b-v1.16b}, [x0]
+         ld2 {v15.8h-v16.8h}, [x15]
+         ld2 {v31.4s-v0.4s}, [sp]
+         ld2 {v0.2d-v1.2d}, [x0]
+         ld2 {v0.8b-v1.8b}, [x0]
+         ld2 {v15.4h-v16.4h}, [x15]
+         ld2 {v31.2s-v0.2s}, [sp]
+// CHECK:	ld2	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0x80,0x40,0x4c]
+// CHECK:	ld2	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0x85,0x40,0x4c]
+// CHECK:	ld2	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0x8b,0x40,0x4c]
+// CHECK:	ld2	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0x8c,0x40,0x4c]
+// CHECK:	ld2	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0x80,0x40,0x0c]
+// CHECK:	ld2	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0x85,0x40,0x0c]
+// CHECK:	ld2	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0x8b,0x40,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 3-element structures to three consecutive registers
+//------------------------------------------------------------------------------
+         ld3 {v0.16b, v1.16b, v2.16b}, [x0]
+         ld3 {v15.8h, v16.8h, v17.8h}, [x15]
+         ld3 {v31.4s, v0.4s, v1.4s}, [sp]
+         ld3 {v0.2d, v1.2d, v2.2d}, [x0]
+         ld3 {v0.8b, v1.8b, v2.8b}, [x0]
+         ld3 {v15.4h, v16.4h, v17.4h}, [x15]
+         ld3 {v31.2s, v0.2s, v1.2s}, [sp]
+// CHECK:	ld3	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x40,0x40,0x4c]
+// CHECK:	ld3	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x45,0x40,0x4c]
+// CHECK:	ld3	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x4b,0x40,0x4c]
+// CHECK:	ld3	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x4c,0x40,0x4c]
+// CHECK:	ld3	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x40,0x40,0x0c]
+// CHECK:	ld3	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x45,0x40,0x0c]
+// CHECK:	ld3	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x4b,0x40,0x0c]
+
+         ld3 {v0.16b-v2.16b}, [x0]
+         ld3 {v15.8h-v17.8h}, [x15]
+         ld3 {v31.4s-v1.4s}, [sp]
+         ld3 {v0.2d-v2.2d}, [x0]
+         ld3 {v0.8b-v2.8b}, [x0]
+         ld3 {v15.4h-v17.4h}, [x15]
+         ld3 {v31.2s-v1.2s}, [sp]
+// CHECK:	ld3	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x40,0x40,0x4c]
+// CHECK:	ld3	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x45,0x40,0x4c]
+// CHECK:	ld3	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x4b,0x40,0x4c]
+// CHECK:	ld3	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x4c,0x40,0x4c]
+// CHECK:	ld3	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x40,0x40,0x0c]
+// CHECK:	ld3	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x45,0x40,0x0c]
+// CHECK:	ld3	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x4b,0x40,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 4-element structures to four consecutive registers
+//------------------------------------------------------------------------------
+         ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
+         ld4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15]
+         ld4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp]
+         ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
+         ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
+         ld4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15]
+         ld4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
+// CHECK:	ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x00,0x40,0x4c]
+// CHECK:	ld4	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x05,0x40,0x4c]
+// CHECK:	ld4	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x0b,0x40,0x4c]
+// CHECK:	ld4	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x0c,0x40,0x4c]
+// CHECK:	ld4	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x00,0x40,0x0c]
+// CHECK:	ld4	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x05,0x40,0x0c]
+// CHECK:	ld4	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x0b,0x40,0x0c]
+
+         ld4 {v0.16b-v3.16b}, [x0]
+         ld4 {v15.8h-v18.8h}, [x15]
+         ld4 {v31.4s-v2.4s}, [sp]
+         ld4 {v0.2d-v3.2d}, [x0]
+         ld4 {v0.8b-v3.8b}, [x0]
+         ld4 {v15.4h-v18.4h}, [x15]
+         ld4 {v31.2s-v2.2s}, [sp]
+// CHECK:	ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x00,0x40,0x4c]
+// CHECK:	ld4	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x05,0x40,0x4c]
+// CHECK:	ld4	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x0b,0x40,0x4c]
+// CHECK:	ld4	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x0c,0x40,0x4c]
+// CHECK:	ld4	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x00,0x40,0x0c]
+// CHECK:	ld4	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x05,0x40,0x0c]
+// CHECK:	ld4	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x0b,0x40,0x0c]
diff --git a/test/MC/AArch64/neon-simd-ldst-one-elem.s b/test/MC/AArch64/neon-simd-ldst-one-elem.s
new file mode 100644
index 0000000..140d752
--- /dev/null
+++ b/test/MC/AArch64/neon-simd-ldst-one-elem.s
@@ -0,0 +1,325 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Load single 1-element structure to all lanes of 1 register
+//------------------------------------------------------------------------------
+         ld1r {v0.16b}, [x0]
+         ld1r {v15.8h}, [x15]
+         ld1r {v31.4s}, [sp]
+         ld1r {v0.2d}, [x0]
+         ld1r {v0.8b}, [x0]
+         ld1r {v15.4h}, [x15]
+         ld1r {v31.2s}, [sp]
+         ld1r {v0.1d}, [x0]
+// CHECK: ld1r {v0.16b}, [x0]          // encoding: [0x00,0xc0,0x40,0x4d]
+// CHECK: ld1r {v15.8h}, [x15]         // encoding: [0xef,0xc5,0x40,0x4d]
+// CHECK: ld1r {v31.4s}, [sp]          // encoding: [0xff,0xcb,0x40,0x4d]
+// CHECK: ld1r {v0.2d}, [x0]           // encoding: [0x00,0xcc,0x40,0x4d]
+// CHECK: ld1r {v0.8b}, [x0]           // encoding: [0x00,0xc0,0x40,0x0d]
+// CHECK: ld1r {v15.4h}, [x15]         // encoding: [0xef,0xc5,0x40,0x0d]
+// CHECK: ld1r {v31.2s}, [sp]          // encoding: [0xff,0xcb,0x40,0x0d]
+// CHECK: ld1r {v0.1d}, [x0]           // encoding: [0x00,0xcc,0x40,0x0d]
+
+//------------------------------------------------------------------------------
+// Load single N-element structure to all lanes of N consecutive
+// registers (N = 2,3,4)
+//------------------------------------------------------------------------------
+         ld2r {v0.16b, v1.16b}, [x0]
+         ld2r {v15.8h, v16.8h}, [x15]
+         ld2r {v31.4s, v0.4s}, [sp]
+         ld2r {v0.2d, v1.2d}, [x0]
+         ld2r {v0.8b, v1.8b}, [x0]
+         ld2r {v15.4h, v16.4h}, [x15]
+         ld2r {v31.2s, v0.2s}, [sp]
+         ld2r {v31.1d, v0.1d}, [sp]
+// CHECK: ld2r {v0.16b, v1.16b}, [x0]  // encoding: [0x00,0xc0,0x60,0x4d]
+// CHECK: ld2r {v15.8h, v16.8h}, [x15] // encoding: [0xef,0xc5,0x60,0x4d]
+// CHECK: ld2r {v31.4s, v0.4s}, [sp]   // encoding: [0xff,0xcb,0x60,0x4d]
+// CHECK: ld2r {v0.2d, v1.2d}, [x0]    // encoding: [0x00,0xcc,0x60,0x4d]
+// CHECK: ld2r {v0.8b, v1.8b}, [x0]    // encoding: [0x00,0xc0,0x60,0x0d]
+// CHECK: ld2r {v15.4h, v16.4h}, [x15] // encoding: [0xef,0xc5,0x60,0x0d]
+// CHECK: ld2r {v31.2s, v0.2s}, [sp]   // encoding: [0xff,0xcb,0x60,0x0d]
+// CHECK: ld2r {v31.1d, v0.1d}, [sp]   // encoding: [0xff,0xcf,0x60,0x0d]
+
+         ld3r {v0.16b, v1.16b, v2.16b}, [x0]
+         ld3r {v15.8h, v16.8h, v17.8h}, [x15]
+         ld3r {v31.4s, v0.4s, v1.4s}, [sp]
+         ld3r {v0.2d, v1.2d, v2.2d}, [x0]
+         ld3r {v0.8b, v1.8b, v2.8b}, [x0]
+         ld3r {v15.4h, v16.4h, v17.4h}, [x15]
+         ld3r {v31.2s, v0.2s, v1.2s}, [sp]
+         ld3r {v31.1d, v0.1d, v1.1d}, [sp]
+// CHECK: ld3r {v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0xe0,0x40,0x4d]
+// CHECK: ld3r {v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0xe5,0x40,0x4d]
+// CHECK: ld3r {v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0xeb,0x40,0x4d]
+// CHECK: ld3r {v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0xec,0x40,0x4d]
+// CHECK: ld3r {v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0xe0,0x40,0x0d]
+// CHECK: ld3r {v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0xe5,0x40,0x0d]
+// CHECK: ld3r {v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0xeb,0x40,0x0d]
+// CHECK: ld3r {v31.1d, v0.1d, v1.1d}, [sp] // encoding: [0xff,0xef,0x40,0x0d]
+
+         ld4r {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
+         ld4r {v15.8h, v16.8h, v17.8h, v18.8h}, [x15]
+         ld4r {v31.4s, v0.4s, v1.4s, v2.4s}, [sp]
+         ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
+         ld4r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
+         ld4r {v15.4h, v16.4h, v17.4h, v18.4h}, [x15]
+         ld4r {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
+         ld4r {v31.1d, v0.1d, v1.1d, v2.1d}, [sp]
+// CHECK: ld4r {v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0xe0,0x60,0x4d]
+// CHECK: ld4r {v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0xe5,0x60,0x4d]
+// CHECK: ld4r {v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0xeb,0x60,0x4d]
+// CHECK: ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0xec,0x60,0x4d]
+// CHECK: ld4r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0xe0,0x60,0x0d]
+// CHECK: ld4r {v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0xe5,0x60,0x0d]
+// CHECK: ld4r {v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0xeb,0x60,0x0d]
+// CHECK: ld4r {v31.1d, v0.1d, v1.1d, v2.1d}, [sp] // encoding: [0xff,0xef,0x60,0x0d]
+
+//------------------------------------------------------------------------------
+// Load single 1-element structure to one lane of 1 register.
+//------------------------------------------------------------------------------
+         ld1 {v0.b}[9], [x0]
+         ld1 {v15.h}[7], [x15]
+         ld1 {v31.s}[3], [sp]
+         ld1 {v0.d}[1], [x0]
+// CHECK: ld1 {v0.b}[9], [x0]         // encoding: [0x00,0x04,0x40,0x4d]
+// CHECK: ld1 {v15.h}[7], [x15]       // encoding: [0xef,0x59,0x40,0x4d]
+// CHECK: ld1 {v31.s}[3], [sp]        // encoding: [0xff,0x93,0x40,0x4d]
+// CHECK: ld1 {v0.d}[1], [x0]         // encoding: [0x00,0x84,0x40,0x4d]
+
+//------------------------------------------------------------------------------
+// Load single N-element structure to one lane of N consecutive registers
+// (N = 2,3,4)
+//------------------------------------------------------------------------------
+         ld2 {v0.b, v1.b}[9], [x0]
+         ld2 {v15.h, v16.h}[7], [x15]
+         ld2 {v31.s, v0.s}[3], [sp]
+         ld2 {v0.d, v1.d}[1], [x0]
+// CHECK: ld2 {v0.b, v1.b}[9], [x0]   // encoding: [0x00,0x04,0x60,0x4d]
+// CHECK: ld2 {v15.h, v16.h}[7], [x15] // encoding: [0xef,0x59,0x60,0x4d]
+// CHECK: ld2 {v31.s, v0.s}[3], [sp]  // encoding: [0xff,0x93,0x60,0x4d]
+// CHECK: ld2 {v0.d, v1.d}[1], [x0]   // encoding: [0x00,0x84,0x60,0x4d]
+
+         ld3 {v0.b, v1.b, v2.b}[9], [x0]
+         ld3 {v15.h, v16.h, v17.h}[7], [x15]
+         ld3 {v31.s, v0.s, v1.s}[3], [sp]
+         ld3 {v0.d, v1.d, v2.d}[1], [x0]
+// CHECK: ld3 {v0.b, v1.b, v2.b}[9], [x0] // encoding: [0x00,0x24,0x40,0x4d]
+// CHECK: ld3 {v15.h, v16.h, v17.h}[7], [x15] // encoding: [0xef,0x79,0x40,0x4d]
+// CHECK: ld3 {v31.s, v0.s, v1.s}[3], [sp] // encoding: [0xff,0xb3,0x40,0x4d]
+// CHECK: ld3 {v0.d, v1.d, v2.d}[1], [x0] // encoding: [0x00,0xa4,0x40,0x4d]
+
+         ld4 {v0.b, v1.b, v2.b, v3.b}[9], [x0]
+         ld4 {v15.h, v16.h, v17.h, v18.h}[7], [x15]
+         ld4 {v31.s, v0.s, v1.s, v2.s}[3], [sp]
+         ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0]
+// CHECK: ld4 {v0.b, v1.b, v2.b, v3.b}[9], [x0] // encoding: [0x00,0x24,0x60,0x4d]
+// CHECK: ld4 {v15.h, v16.h, v17.h, v18.h}[7], [x15] // encoding: [0xef,0x79,0x60,0x4d]
+// CHECK: ld4 {v31.s, v0.s, v1.s, v2.s}[3], [sp] // encoding: [0xff,0xb3,0x60,0x4d]
+// CHECK: ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0] // encoding: [0x00,0xa4,0x60,0x4d]
+
+//------------------------------------------------------------------------------
+// Store single 1-element structure from one lane of 1 register.
+//------------------------------------------------------------------------------
+         st1 {v0.b}[9], [x0]
+         st1 {v15.h}[7], [x15]
+         st1 {v31.s}[3], [sp]
+         st1 {v0.d}[1], [x0]
+// CHECK: st1 {v0.b}[9], [x0]         // encoding: [0x00,0x04,0x00,0x4d]
+// CHECK: st1 {v15.h}[7], [x15]       // encoding: [0xef,0x59,0x00,0x4d]
+// CHECK: st1 {v31.s}[3], [sp]        // encoding: [0xff,0x93,0x00,0x4d]
+// CHECK: st1 {v0.d}[1], [x0]         // encoding: [0x00,0x84,0x00,0x4d]
+
+//------------------------------------------------------------------------------
+// Store single N-element structure from one lane of N consecutive registers
+// (N = 2,3,4)
+//------------------------------------------------------------------------------
+         st2 {v0.b, v1.b}[9], [x0]
+         st2 {v15.h, v16.h}[7], [x15]
+         st2 {v31.s, v0.s}[3], [sp]
+         st2 {v0.d, v1.d}[1], [x0]
+// CHECK: st2 {v0.b, v1.b}[9], [x0]   // encoding: [0x00,0x04,0x20,0x4d]
+// CHECK: st2 {v15.h, v16.h}[7], [x15] // encoding: [0xef,0x59,0x20,0x4d]
+// CHECK: st2 {v31.s, v0.s}[3], [sp]  // encoding: [0xff,0x93,0x20,0x4d]
+// CHECK: st2 {v0.d, v1.d}[1], [x0]   // encoding: [0x00,0x84,0x20,0x4d]
+
+         st3 {v0.b, v1.b, v2.b}[9], [x0]
+         st3 {v15.h, v16.h, v17.h}[7], [x15]
+         st3 {v31.s, v0.s, v1.s}[3], [sp]
+         st3 {v0.d, v1.d, v2.d}[1], [x0]
+// CHECK: st3 {v0.b, v1.b, v2.b}[9], [x0] // encoding: [0x00,0x24,0x00,0x4d]
+// CHECK: st3 {v15.h, v16.h, v17.h}[7], [x15] // encoding: [0xef,0x79,0x00,0x4d]
+// CHECK: st3 {v31.s, v0.s, v1.s}[3], [sp] // encoding: [0xff,0xb3,0x00,0x4d]
+// CHECK: st3 {v0.d, v1.d, v2.d}[1], [x0] // encoding: [0x00,0xa4,0x00,0x4d]
+
+         st4 {v0.b, v1.b, v2.b, v3.b}[9], [x0]
+         st4 {v15.h, v16.h, v17.h, v18.h}[7], [x15]
+         st4 {v31.s, v0.s, v1.s, v2.s}[3], [sp]
+         st4 {v0.d, v1.d, v2.d, v3.d}[1], [x0]
+// CHECK: st4 {v0.b, v1.b, v2.b, v3.b}[9], [x0] // encoding: [0x00,0x24,0x20,0x4d]
+// CHECK: st4 {v15.h, v16.h, v17.h, v18.h}[7], [x15] // encoding: [0xef,0x79,0x20,0x4d]
+// CHECK: st4 {v31.s, v0.s, v1.s, v2.s}[3], [sp] // encoding: [0xff,0xb3,0x20,0x4d]
+// CHECK: st4 {v0.d, v1.d, v2.d, v3.d}[1], [x0] // encoding: [0x00,0xa4,0x20,0x4d]
+
+//------------------------------------------------------------------------------
+// Post-index oad single 1-element structure to all lanes of 1 register
+//------------------------------------------------------------------------------
+         ld1r {v0.16b}, [x0], #1
+         ld1r {v15.8h}, [x15], #2
+         ld1r {v31.4s}, [sp], #4
+         ld1r {v0.2d}, [x0], #8
+         ld1r {v0.8b}, [x0], x0
+         ld1r {v15.4h}, [x15], x1
+         ld1r {v31.2s}, [sp], x2
+         ld1r {v0.1d}, [x0], x3
+// CHECK: ld1r {v0.16b}, [x0], #1      // encoding: [0x00,0xc0,0xdf,0x4d]
+// CHECK: ld1r {v15.8h}, [x15], #2     // encoding: [0xef,0xc5,0xdf,0x4d]
+// CHECK: ld1r {v31.4s}, [sp], #4      // encoding: [0xff,0xcb,0xdf,0x4d]
+// CHECK: ld1r {v0.2d}, [x0], #8       // encoding: [0x00,0xcc,0xdf,0x4d]
+// CHECK: ld1r {v0.8b}, [x0], x0       // encoding: [0x00,0xc0,0xc0,0x0d]
+// CHECK: ld1r {v15.4h}, [x15], x1     // encoding: [0xef,0xc5,0xc1,0x0d]
+// CHECK: ld1r {v31.2s}, [sp], x2      // encoding: [0xff,0xcb,0xc2,0x0d]
+// CHECK: ld1r {v0.1d}, [x0], x3       // encoding: [0x00,0xcc,0xc3,0x0d]
+
+//------------------------------------------------------------------------------
+// Post-index load single N-element structure to all lanes of N consecutive
+// registers (N = 2,3,4)
+//------------------------------------------------------------------------------
+         ld2r {v0.16b, v1.16b}, [x0], #2
+         ld2r {v15.8h, v16.8h}, [x15], #4
+         ld2r {v31.4s, v0.4s}, [sp], #8
+         ld2r {v0.2d, v1.2d}, [x0], #16
+         ld2r {v0.8b, v1.8b}, [x0], x6
+         ld2r {v15.4h, v16.4h}, [x15], x7
+         ld2r {v31.2s, v0.2s}, [sp], x9
+         ld2r {v31.1d, v0.1d}, [x0], x5
+// CHECK: ld2r {v0.16b, v1.16b}, [x0], #2 // encoding: [0x00,0xc0,0xff,0x4d]
+// CHECK: ld2r {v15.8h, v16.8h}, [x15], #4 // encoding: [0xef,0xc5,0xff,0x4d]
+// CHECK: ld2r {v31.4s, v0.4s}, [sp], #8 // encoding: [0xff,0xcb,0xff,0x4d]
+// CHECK: ld2r {v0.2d, v1.2d}, [x0], #16 // encoding: [0x00,0xcc,0xff,0x4d]
+// CHECK: ld2r {v0.8b, v1.8b}, [x0], x6 // encoding: [0x00,0xc0,0xe6,0x0d]
+// CHECK: ld2r {v15.4h, v16.4h}, [x15], x7 // encoding: [0xef,0xc5,0xe7,0x0d]
+// CHECK: ld2r {v31.2s, v0.2s}, [sp], x9 // encoding: [0xff,0xcb,0xe9,0x0d]
+// CHECK: ld2r {v31.1d, v0.1d}, [x0], x5 // encoding: [0x1f,0xcc,0xe5,0x0d]
+
+         ld3r {v0.16b, v1.16b, v2.16b}, [x0], x9
+         ld3r {v15.8h, v16.8h, v17.8h}, [x15], x6
+         ld3r {v31.4s, v0.4s, v1.4s}, [sp], x7
+         ld3r {v0.2d, v1.2d, v2.2d}, [x0], x5
+         ld3r {v0.8b, v1.8b, v2.8b}, [x0], #3
+         ld3r {v15.4h, v16.4h, v17.4h}, [x15], #6
+         ld3r {v31.2s, v0.2s, v1.2s}, [sp], #12
+         ld3r {v31.1d, v0.1d, v1.1d}, [sp], #24
+// CHECK: ld3r {v0.16b, v1.16b, v2.16b}, [x0], x9 // encoding: [0x00,0xe0,0xc9,0x4d]
+// CHECK: ld3r {v15.8h, v16.8h, v17.8h}, [x15], x6 // encoding: [0xef,0xe5,0xc6,0x4d]
+// CHECK: ld3r {v31.4s, v0.4s, v1.4s}, [sp], x7 // encoding: [0xff,0xeb,0xc7,0x4d]
+// CHECK: ld3r {v0.2d, v1.2d, v2.2d}, [x0], x5 // encoding: [0x00,0xec,0xc5,0x4d]
+// CHECK: ld3r {v0.8b, v1.8b, v2.8b}, [x0], #3 // encoding: [0x00,0xe0,0xdf,0x0d]
+// CHECK: ld3r {v15.4h, v16.4h, v17.4h}, [x15], #6 // encoding: [0xef,0xe5,0xdf,0x0d]
+// CHECK: ld3r {v31.2s, v0.2s, v1.2s}, [sp], #12 // encoding: [0xff,0xeb,0xdf,0x0d]
+// CHECK: ld3r {v31.1d, v0.1d, v1.1d}, [sp], #24 // encoding: [0xff,0xef,0xdf,0x0d]
+
+         ld4r {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #4
+         ld4r {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], #8
+         ld4r {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #16
+         ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #32
+         ld4r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x5
+         ld4r {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x9
+         ld4r {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], x30
+         ld4r {v31.1d, v0.1d, v1.1d, v2.1d}, [sp], x7
+// CHECK: ld4r {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #4 // encoding: [0x00,0xe0,0xff,0x4d]
+// CHECK: ld4r {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], #8 // encoding: [0xef,0xe5,0xff,0x4d]
+// CHECK: ld4r {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #16 // encoding: [0xff,0xeb,0xff,0x4d]
+// CHECK: ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #32 // encoding: [0x00,0xec,0xff,0x4d]
+// CHECK: ld4r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x5 // encoding: [0x00,0xe0,0xe5,0x0d]
+// CHECK: ld4r {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x9 // encoding: [0xef,0xe5,0xe9,0x0d]
+// CHECK: ld4r {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], x30 // encoding: [0xff,0xeb,0xfe,0x0d]
+// CHECK: ld4r {v31.1d, v0.1d, v1.1d, v2.1d}, [sp], x7 // encoding: [0xff,0xef,0xe7,0x0d]
+
+//------------------------------------------------------------------------------
+// Post-index load single 1-element structure to one lane of 1 register.
+//------------------------------------------------------------------------------
+         ld1 {v0.b}[9], [x0], #1
+         ld1 {v15.h}[7], [x15], x9
+         ld1 {v31.s}[3], [sp], x6
+         ld1 {v0.d}[1], [x0], #8
+// CHECK: ld1 {v0.b}[9], [x0], #1     // encoding: [0x00,0x04,0xdf,0x4d]
+// CHECK: ld1 {v15.h}[7], [x15], x9   // encoding: [0xef,0x59,0xc9,0x4d]
+// CHECK: ld1 {v31.s}[3], [sp], x6    // encoding: [0xff,0x93,0xc6,0x4d]
+// CHECK: ld1 {v0.d}[1], [x0], #8     // encoding: [0x00,0x84,0xdf,0x4d]
+
+//------------------------------------------------------------------------------
+// Post-index load single N-element structure to one lane of N consecutive
+// registers (N = 2,3,4)
+//------------------------------------------------------------------------------
+         ld2 {v0.b, v1.b}[9], [x0], x3
+         ld2 {v15.h, v16.h}[7], [x15], #4
+         ld2 {v31.s, v0.s}[3], [sp], #8
+         ld2 {v0.d, v1.d}[1], [x0], x0
+// CHECK: ld2 {v0.b, v1.b}[9], [x0], x3 // encoding: [0x00,0x04,0xe3,0x4d]
+// CHECK: ld2 {v15.h, v16.h}[7], [x15], #4 // encoding: [0xef,0x59,0xff,0x4d]
+// CHECK: ld2 {v31.s, v0.s}[3], [sp], #8 // encoding: [0xff,0x93,0xff,0x4d]
+// CHECK: ld2 {v0.d, v1.d}[1], [x0], x0 // encoding: [0x00,0x84,0xe0,0x4d]
+
+         ld3 {v0.b, v1.b, v2.b}[9], [x0], #3
+         ld3 {v15.h, v16.h, v17.h}[7], [x15], #6
+         ld3 {v31.s, v0.s, v1.s}[3], [sp], x3
+         ld3 {v0.d, v1.d, v2.d}[1], [x0], x6
+// CHECK: ld3 {v0.b, v1.b, v2.b}[9], [x0], #3 // encoding: [0x00,0x24,0xdf,0x4d]
+// CHECK: ld3 {v15.h, v16.h, v17.h}[7], [x15], #6 // encoding: [0xef,0x79,0xdf,0x4d]
+// CHECK: ld3 {v31.s, v0.s, v1.s}[3], [sp], x3 // encoding: [0xff,0xb3,0xc3,0x4d]
+// CHECK: ld3 {v0.d, v1.d, v2.d}[1], [x0], x6 // encoding: [0x00,0xa4,0xc6,0x4d]
+
+         ld4 {v0.b, v1.b, v2.b, v3.b}[9], [x0], x5
+         ld4 {v15.h, v16.h, v17.h, v18.h}[7], [x15], x7
+         ld4 {v31.s, v0.s, v1.s, v2.s}[3], [sp], #16
+         ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32
+// CHECK: ld4 {v0.b, v1.b, v2.b, v3.b}[9], [x0], x5 // encoding: [0x00,0x24,0xe5,0x4d]
+// CHECK: ld4 {v15.h, v16.h, v17.h, v18.h}[7], [x15], x7 // encoding: [0xef,0x79,0xe7,0x4d]
+// CHECK: ld4 {v31.s, v0.s, v1.s, v2.s}[3], [sp], #16 // encoding: [0xff,0xb3,0xff,0x4d]
+// CHECK: ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32 // encoding: [0x00,0xa4,0xff,0x4d]
+
+//------------------------------------------------------------------------------
+// Post-index store single 1-element structure from one lane of 1 register.
+//------------------------------------------------------------------------------
+         st1 {v0.b}[9], [x0], #1
+         st1 {v15.h}[7], [x15], x9
+         st1 {v31.s}[3], [sp], x6
+         st1 {v0.d}[1], [x0], #8
+// CHECK: st1 {v0.b}[9], [x0], #1     // encoding: [0x00,0x04,0x9f,0x4d]
+// CHECK: st1 {v15.h}[7], [x15], x9   // encoding: [0xef,0x59,0x89,0x4d]
+// CHECK: st1 {v31.s}[3], [sp], x6    // encoding: [0xff,0x93,0x86,0x4d]
+// CHECK: st1 {v0.d}[1], [x0], #8     // encoding: [0x00,0x84,0x9f,0x4d]
+
+//------------------------------------------------------------------------------
+// Post-index store single N-element structure from one lane of N consecutive
+// registers (N = 2,3,4)
+//------------------------------------------------------------------------------
+         st2 {v0.b, v1.b}[9], [x0], x3
+         st2 {v15.h, v16.h}[7], [x15], #4
+         st2 {v31.s, v0.s}[3], [sp], #8
+         st2 {v0.d, v1.d}[1], [x0], x0
+// CHECK: st2 {v0.b, v1.b}[9], [x0], x3 // encoding: [0x00,0x04,0xa3,0x4d]
+// CHECK: st2 {v15.h, v16.h}[7], [x15], #4 // encoding: [0xef,0x59,0xbf,0x4d]
+// CHECK: st2 {v31.s, v0.s}[3], [sp], #8 // encoding: [0xff,0x93,0xbf,0x4d]
+// CHECK: st2 {v0.d, v1.d}[1], [x0], x0 // encoding: [0x00,0x84,0xa0,0x4d]
+
+         st3 {v0.b, v1.b, v2.b}[9], [x0], #3
+         st3 {v15.h, v16.h, v17.h}[7], [x15], #6
+         st3 {v31.s, v0.s, v1.s}[3], [sp], x3
+         st3 {v0.d, v1.d, v2.d}[1], [x0], x6
+// CHECK: st3 {v0.b, v1.b, v2.b}[9], [x0], #3 // encoding: [0x00,0x24,0x9f,0x4d]
+// CHECK: st3 {v15.h, v16.h, v17.h}[7], [x15], #6 // encoding: [0xef,0x79,0x9f,0x4d]
+// CHECK: st3 {v31.s, v0.s, v1.s}[3], [sp], x3 // encoding: [0xff,0xb3,0x83,0x4d]
+// CHECK: st3 {v0.d, v1.d, v2.d}[1], [x0], x6 // encoding: [0x00,0xa4,0x86,0x4d]
+
+         st4 {v0.b, v1.b, v2.b, v3.b}[9], [x0], x5
+         st4 {v15.h, v16.h, v17.h, v18.h}[7], [x15], x7
+         st4 {v31.s, v0.s, v1.s, v2.s}[3], [sp], #16
+         st4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32
+// CHECK: st4 {v0.b, v1.b, v2.b, v3.b}[9], [x0], x5 // encoding: [0x00,0x24,0xa5,0x4d]
+// CHECK: st4 {v15.h, v16.h, v17.h, v18.h}[7], [x15], x7 // encoding: [0xef,0x79,0xa7,0x4d]
+// CHECK: st4 {v31.s, v0.s, v1.s, v2.s}[3], [sp], #16 // encoding: [0xff,0xb3,0xbf,0x4d]
+// CHECK: st4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32 // encoding: [0x00,0xa4,0xbf,0x4d]
diff --git a/test/MC/AArch64/neon-simd-misc.s b/test/MC/AArch64/neon-simd-misc.s
new file mode 100644
index 0000000..9e0f9c5
--- /dev/null
+++ b/test/MC/AArch64/neon-simd-misc.s
@@ -0,0 +1,646 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+
+//------------------------------------------------------------------------------
+// Element reverse
+//------------------------------------------------------------------------------
+         rev64 v0.16b, v31.16b
+         rev64 v2.8h, v4.8h
+         rev64 v6.4s, v8.4s
+         rev64 v1.8b, v9.8b
+         rev64 v13.4h, v21.4h
+         rev64 v4.2s, v0.2s
+
+// CHECK:	rev64	v0.16b, v31.16b         // encoding: [0xe0,0x0b,0x20,0x4e]
+// CHECK:	rev64	v2.8h, v4.8h            // encoding: [0x82,0x08,0x60,0x4e]
+// CHECK:	rev64	v6.4s, v8.4s            // encoding: [0x06,0x09,0xa0,0x4e]
+// CHECK:	rev64	v1.8b, v9.8b            // encoding: [0x21,0x09,0x20,0x0e]
+// CHECK:	rev64	v13.4h, v21.4h          // encoding: [0xad,0x0a,0x60,0x0e]
+// CHECK:	rev64	v4.2s, v0.2s            // encoding: [0x04,0x08,0xa0,0x0e]
+
+         rev32 v30.16b, v31.16b
+         rev32 v4.8h, v7.8h
+         rev32 v21.8b, v1.8b
+         rev32 v0.4h, v9.4h
+
+// CHECK:	rev32	v30.16b, v31.16b        // encoding: [0xfe,0x0b,0x20,0x6e]
+// CHECK:	rev32	v4.8h, v7.8h            // encoding: [0xe4,0x08,0x60,0x6e]
+// CHECK:	rev32	v21.8b, v1.8b           // encoding: [0x35,0x08,0x20,0x2e]
+// CHECK:	rev32	v0.4h, v9.4h            // encoding: [0x20,0x09,0x60,0x2e]
+
+         rev16 v30.16b, v31.16b
+         rev16 v21.8b, v1.8b
+
+// CHECK:	rev16	v30.16b, v31.16b        // encoding: [0xfe,0x1b,0x20,0x4e]
+// CHECK:	rev16	v21.8b, v1.8b           // encoding: [0x35,0x18,0x20,0x0e]
+
+//------------------------------------------------------------------------------
+// Signed integer pairwise add long
+//------------------------------------------------------------------------------
+
+         saddlp v3.8h, v21.16b
+         saddlp v8.4h, v5.8b
+         saddlp v9.4s, v1.8h
+         saddlp v0.2s, v1.4h
+         saddlp v12.2d, v4.4s
+         saddlp v17.1d, v28.2s
+
+// CHECK:	saddlp	v3.8h, v21.16b          // encoding: [0xa3,0x2a,0x20,0x4e]
+// CHECK:	saddlp	v8.4h, v5.8b            // encoding: [0xa8,0x28,0x20,0x0e]
+// CHECK:	saddlp	v9.4s, v1.8h            // encoding: [0x29,0x28,0x60,0x4e]
+// CHECK:	saddlp	v0.2s, v1.4h            // encoding: [0x20,0x28,0x60,0x0e]
+// CHECK:	saddlp	v12.2d, v4.4s           // encoding: [0x8c,0x28,0xa0,0x4e]
+// CHECK:	saddlp	v17.1d, v28.2s          // encoding: [0x91,0x2b,0xa0,0x0e]
+
+//------------------------------------------------------------------------------
+// Unsigned integer pairwise add long
+//------------------------------------------------------------------------------
+
+         uaddlp v3.8h, v21.16b
+         uaddlp v8.4h, v5.8b
+         uaddlp v9.4s, v1.8h
+         uaddlp v0.2s, v1.4h
+         uaddlp v12.2d, v4.4s
+         uaddlp v17.1d, v28.2s
+
+// CHECK:	uaddlp	v3.8h, v21.16b          // encoding: [0xa3,0x2a,0x20,0x6e]
+// CHECK:	uaddlp	v8.4h, v5.8b            // encoding: [0xa8,0x28,0x20,0x2e]
+// CHECK:	uaddlp	v9.4s, v1.8h            // encoding: [0x29,0x28,0x60,0x6e]
+// CHECK:	uaddlp	v0.2s, v1.4h            // encoding: [0x20,0x28,0x60,0x2e]
+// CHECK:	uaddlp	v12.2d, v4.4s           // encoding: [0x8c,0x28,0xa0,0x6e]
+// CHECK:	uaddlp	v17.1d, v28.2s          // encoding: [0x91,0x2b,0xa0,0x2e]
+
+//------------------------------------------------------------------------------
+// Signed integer pairwise add and accumulate long
+//------------------------------------------------------------------------------
+
+         sadalp v3.8h, v21.16b
+         sadalp v8.4h, v5.8b
+         sadalp v9.4s, v1.8h
+         sadalp v0.2s, v1.4h
+         sadalp v12.2d, v4.4s
+         sadalp v17.1d, v28.2s
+
+// CHECK:	sadalp	v3.8h, v21.16b          // encoding: [0xa3,0x6a,0x20,0x4e]
+// CHECK:	sadalp	v8.4h, v5.8b            // encoding: [0xa8,0x68,0x20,0x0e]
+// CHECK:	sadalp	v9.4s, v1.8h            // encoding: [0x29,0x68,0x60,0x4e]
+// CHECK:	sadalp	v0.2s, v1.4h            // encoding: [0x20,0x68,0x60,0x0e]
+// CHECK:	sadalp	v12.2d, v4.4s           // encoding: [0x8c,0x68,0xa0,0x4e]
+// CHECK:	sadalp	v17.1d, v28.2s          // encoding: [0x91,0x6b,0xa0,0x0e]
+
+//------------------------------------------------------------------------------
+// Unsigned integer pairwise add and accumulate long
+//------------------------------------------------------------------------------
+
+         uadalp v3.8h, v21.16b
+         uadalp v8.4h, v5.8b
+         uadalp v9.4s, v1.8h
+         uadalp v0.2s, v1.4h
+         uadalp v12.2d, v4.4s
+         uadalp v17.1d, v28.2s
+
+// CHECK:	uadalp	v3.8h, v21.16b          // encoding: [0xa3,0x6a,0x20,0x6e]
+// CHECK:	uadalp	v8.4h, v5.8b            // encoding: [0xa8,0x68,0x20,0x2e]
+// CHECK:	uadalp	v9.4s, v1.8h            // encoding: [0x29,0x68,0x60,0x6e]
+// CHECK:	uadalp	v0.2s, v1.4h            // encoding: [0x20,0x68,0x60,0x2e]
+// CHECK:	uadalp	v12.2d, v4.4s           // encoding: [0x8c,0x68,0xa0,0x6e]
+// CHECK:	uadalp	v17.1d, v28.2s          // encoding: [0x91,0x6b,0xa0,0x2e]
+
+//------------------------------------------------------------------------------
+// Signed integer saturating accumulate of unsigned value
+//------------------------------------------------------------------------------
+
+         suqadd v0.16b, v31.16b
+         suqadd v2.8h, v4.8h
+         suqadd v6.4s, v8.4s
+         suqadd v6.2d, v8.2d
+         suqadd v1.8b, v9.8b
+         suqadd v13.4h, v21.4h
+         suqadd v4.2s, v0.2s
+
+// CHECK:	suqadd	v0.16b, v31.16b         // encoding: [0xe0,0x3b,0x20,0x4e]
+// CHECK:	suqadd	v2.8h, v4.8h            // encoding: [0x82,0x38,0x60,0x4e]
+// CHECK:	suqadd	v6.4s, v8.4s            // encoding: [0x06,0x39,0xa0,0x4e]
+// CHECK:	suqadd	v6.2d, v8.2d            // encoding: [0x06,0x39,0xe0,0x4e]
+// CHECK:	suqadd	v1.8b, v9.8b            // encoding: [0x21,0x39,0x20,0x0e]
+// CHECK:	suqadd	v13.4h, v21.4h          // encoding: [0xad,0x3a,0x60,0x0e]
+// CHECK:	suqadd	v4.2s, v0.2s            // encoding: [0x04,0x38,0xa0,0x0e]
+
+//------------------------------------------------------------------------------
+// Unsigned integer saturating accumulate of signed value
+//------------------------------------------------------------------------------
+
+         usqadd v0.16b, v31.16b
+         usqadd v2.8h, v4.8h
+         usqadd v6.4s, v8.4s
+         usqadd v6.2d, v8.2d
+         usqadd v1.8b, v9.8b
+         usqadd v13.4h, v21.4h
+         usqadd v4.2s, v0.2s
+
+// CHECK:	usqadd	v0.16b, v31.16b         // encoding: [0xe0,0x3b,0x20,0x6e]
+// CHECK:	usqadd	v2.8h, v4.8h            // encoding: [0x82,0x38,0x60,0x6e]
+// CHECK:	usqadd	v6.4s, v8.4s            // encoding: [0x06,0x39,0xa0,0x6e]
+// CHECK:	usqadd	v6.2d, v8.2d            // encoding: [0x06,0x39,0xe0,0x6e]
+// CHECK:	usqadd	v1.8b, v9.8b            // encoding: [0x21,0x39,0x20,0x2e]
+// CHECK:	usqadd	v13.4h, v21.4h          // encoding: [0xad,0x3a,0x60,0x2e]
+// CHECK:	usqadd	v4.2s, v0.2s            // encoding: [0x04,0x38,0xa0,0x2e]
+
+//------------------------------------------------------------------------------
+// Integer saturating absolute
+//------------------------------------------------------------------------------
+
+         sqabs v0.16b, v31.16b
+         sqabs v2.8h, v4.8h
+         sqabs v6.4s, v8.4s
+         sqabs v6.2d, v8.2d
+         sqabs v1.8b, v9.8b
+         sqabs v13.4h, v21.4h
+         sqabs v4.2s, v0.2s
+
+// CHECK:	sqabs	v0.16b, v31.16b         // encoding: [0xe0,0x7b,0x20,0x4e]
+// CHECK:	sqabs	v2.8h, v4.8h            // encoding: [0x82,0x78,0x60,0x4e]
+// CHECK:	sqabs	v6.4s, v8.4s            // encoding: [0x06,0x79,0xa0,0x4e]
+// CHECK:	sqabs	v6.2d, v8.2d            // encoding: [0x06,0x79,0xe0,0x4e]
+// CHECK:	sqabs	v1.8b, v9.8b            // encoding: [0x21,0x79,0x20,0x0e]
+// CHECK:	sqabs	v13.4h, v21.4h          // encoding: [0xad,0x7a,0x60,0x0e]
+// CHECK:	sqabs	v4.2s, v0.2s            // encoding: [0x04,0x78,0xa0,0x0e]
+
+//------------------------------------------------------------------------------
+// Signed integer saturating negate
+//------------------------------------------------------------------------------
+
+         sqneg v0.16b, v31.16b
+         sqneg v2.8h, v4.8h
+         sqneg v6.4s, v8.4s
+         sqneg v6.2d, v8.2d
+         sqneg v1.8b, v9.8b
+         sqneg v13.4h, v21.4h
+         sqneg v4.2s, v0.2s
+
+// CHECK:	sqneg	v0.16b, v31.16b         // encoding: [0xe0,0x7b,0x20,0x6e]
+// CHECK:	sqneg	v2.8h, v4.8h            // encoding: [0x82,0x78,0x60,0x6e]
+// CHECK:	sqneg	v6.4s, v8.4s            // encoding: [0x06,0x79,0xa0,0x6e]
+// CHECK:	sqneg	v6.2d, v8.2d            // encoding: [0x06,0x79,0xe0,0x6e]
+// CHECK:	sqneg	v1.8b, v9.8b            // encoding: [0x21,0x79,0x20,0x2e]
+// CHECK:	sqneg	v13.4h, v21.4h          // encoding: [0xad,0x7a,0x60,0x2e]
+// CHECK:	sqneg	v4.2s, v0.2s            // encoding: [0x04,0x78,0xa0,0x2e]
+
+//------------------------------------------------------------------------------
+// Integer absolute
+//------------------------------------------------------------------------------
+
+         abs v0.16b, v31.16b
+         abs v2.8h, v4.8h
+         abs v6.4s, v8.4s
+         abs v6.2d, v8.2d
+         abs v1.8b, v9.8b
+         abs v13.4h, v21.4h
+         abs v4.2s, v0.2s
+
+// CHECK:	abs	v0.16b, v31.16b         // encoding: [0xe0,0xbb,0x20,0x4e]
+// CHECK:	abs	v2.8h, v4.8h            // encoding: [0x82,0xb8,0x60,0x4e]
+// CHECK:	abs	v6.4s, v8.4s            // encoding: [0x06,0xb9,0xa0,0x4e]
+// CHECK:	abs	v6.2d, v8.2d            // encoding: [0x06,0xb9,0xe0,0x4e]
+// CHECK:	abs	v1.8b, v9.8b            // encoding: [0x21,0xb9,0x20,0x0e]
+// CHECK:	abs	v13.4h, v21.4h          // encoding: [0xad,0xba,0x60,0x0e]
+// CHECK:	abs	v4.2s, v0.2s            // encoding: [0x04,0xb8,0xa0,0x0e]
+
+//------------------------------------------------------------------------------
+// Integer negate
+//------------------------------------------------------------------------------
+
+         neg v0.16b, v31.16b
+         neg v2.8h, v4.8h
+         neg v6.4s, v8.4s
+         neg v6.2d, v8.2d
+         neg v1.8b, v9.8b
+         neg v13.4h, v21.4h
+         neg v4.2s, v0.2s
+
+// CHECK:	neg	v0.16b, v31.16b         // encoding: [0xe0,0xbb,0x20,0x6e]
+// CHECK:	neg	v2.8h, v4.8h            // encoding: [0x82,0xb8,0x60,0x6e]
+// CHECK:	neg	v6.4s, v8.4s            // encoding: [0x06,0xb9,0xa0,0x6e]
+// CHECK:	neg	v6.2d, v8.2d            // encoding: [0x06,0xb9,0xe0,0x6e]
+// CHECK:	neg	v1.8b, v9.8b            // encoding: [0x21,0xb9,0x20,0x2e]
+// CHECK:	neg	v13.4h, v21.4h          // encoding: [0xad,0xba,0x60,0x2e]
+// CHECK:	neg	v4.2s, v0.2s            // encoding: [0x04,0xb8,0xa0,0x2e]
+
+//------------------------------------------------------------------------------
+// Integer count leading sign bits
+//------------------------------------------------------------------------------
+
+         cls v0.16b, v31.16b
+         cls v2.8h, v4.8h
+         cls v6.4s, v8.4s
+         cls v1.8b, v9.8b
+         cls v13.4h, v21.4h
+         cls v4.2s, v0.2s
+
+// CHECK:	cls	v0.16b, v31.16b         // encoding: [0xe0,0x4b,0x20,0x4e]
+// CHECK:	cls	v2.8h, v4.8h            // encoding: [0x82,0x48,0x60,0x4e]
+// CHECK:	cls	v6.4s, v8.4s            // encoding: [0x06,0x49,0xa0,0x4e]
+// CHECK:	cls	v1.8b, v9.8b            // encoding: [0x21,0x49,0x20,0x0e]
+// CHECK:	cls	v13.4h, v21.4h          // encoding: [0xad,0x4a,0x60,0x0e]
+// CHECK:	cls	v4.2s, v0.2s            // encoding: [0x04,0x48,0xa0,0x0e]
+
+//------------------------------------------------------------------------------
+// Integer count leading zeros
+//------------------------------------------------------------------------------
+
+         clz v0.16b, v31.16b
+         clz v2.8h, v4.8h
+         clz v6.4s, v8.4s
+         clz v1.8b, v9.8b
+         clz v13.4h, v21.4h
+         clz v4.2s, v0.2s
+
+// CHECK:	clz	v0.16b, v31.16b         // encoding: [0xe0,0x4b,0x20,0x6e]
+// CHECK:	clz	v2.8h, v4.8h            // encoding: [0x82,0x48,0x60,0x6e]
+// CHECK:	clz	v6.4s, v8.4s            // encoding: [0x06,0x49,0xa0,0x6e]
+// CHECK:	clz	v1.8b, v9.8b            // encoding: [0x21,0x49,0x20,0x2e]
+// CHECK:	clz	v13.4h, v21.4h          // encoding: [0xad,0x4a,0x60,0x2e]
+// CHECK:	clz	v4.2s, v0.2s            // encoding: [0x04,0x48,0xa0,0x2e]
+
+//------------------------------------------------------------------------------
+// Population count
+//------------------------------------------------------------------------------
+
+         cnt v0.16b, v31.16b
+         cnt v1.8b, v9.8b
+
+// CHECK:	cnt	v0.16b, v31.16b         // encoding: [0xe0,0x5b,0x20,0x4e]
+// CHECK:	cnt	v1.8b, v9.8b            // encoding: [0x21,0x59,0x20,0x0e]
+
+//------------------------------------------------------------------------------
+// Bitwise NOT
+//------------------------------------------------------------------------------
+
+         not v0.16b, v31.16b
+         not v1.8b, v9.8b
+
+// CHECK:	not	v0.16b, v31.16b         // encoding: [0xe0,0x5b,0x20,0x6e]
+// CHECK:	not	v1.8b, v9.8b            // encoding: [0x21,0x59,0x20,0x2e]
+
+//------------------------------------------------------------------------------
+// Bitwise reverse
+//------------------------------------------------------------------------------
+
+         rbit v0.16b, v31.16b
+         rbit v1.8b, v9.8b
+
+// CHECK:	rbit	v0.16b, v31.16b         // encoding: [0xe0,0x5b,0x60,0x6e]
+// CHECK:	rbit	v1.8b, v9.8b            // encoding: [0x21,0x59,0x60,0x2e]
+
+//------------------------------------------------------------------------------
+// Floating-point absolute
+//------------------------------------------------------------------------------
+
+         fabs v6.4s, v8.4s
+         fabs v6.2d, v8.2d
+         fabs v4.2s, v0.2s
+
+// CHECK:	fabs	v6.4s, v8.4s            // encoding: [0x06,0xf9,0xa0,0x4e]
+// CHECK:	fabs	v6.2d, v8.2d            // encoding: [0x06,0xf9,0xe0,0x4e]
+// CHECK:	fabs	v4.2s, v0.2s            // encoding: [0x04,0xf8,0xa0,0x0e]
+
+//------------------------------------------------------------------------------
+// Floating-point negate
+//------------------------------------------------------------------------------
+
+         fneg v6.4s, v8.4s
+         fneg v6.2d, v8.2d
+         fneg v4.2s, v0.2s
+
+// CHECK:	fneg	v6.4s, v8.4s            // encoding: [0x06,0xf9,0xa0,0x6e]
+// CHECK:	fneg	v6.2d, v8.2d            // encoding: [0x06,0xf9,0xe0,0x6e]
+// CHECK:	fneg	v4.2s, v0.2s            // encoding: [0x04,0xf8,0xa0,0x2e]
+
+//------------------------------------------------------------------------------
+// Integer extract and narrow
+//------------------------------------------------------------------------------
+
+         xtn2 v0.16b, v31.8h
+         xtn2 v2.8h, v4.4s
+         xtn2 v6.4s, v8.2d
+         xtn v1.8b, v9.8h
+         xtn v13.4h, v21.4s
+         xtn v4.2s, v0.2d
+
+// CHECK:	xtn2	v0.16b, v31.8h          // encoding: [0xe0,0x2b,0x21,0x4e]
+// CHECK:	xtn2	v2.8h, v4.4s            // encoding: [0x82,0x28,0x61,0x4e]
+// CHECK:	xtn2	v6.4s, v8.2d            // encoding: [0x06,0x29,0xa1,0x4e]
+// CHECK:	xtn	v1.8b, v9.8h            // encoding: [0x21,0x29,0x21,0x0e]
+// CHECK:	xtn	v13.4h, v21.4s          // encoding: [0xad,0x2a,0x61,0x0e]
+// CHECK:	xtn	v4.2s, v0.2d            // encoding: [0x04,0x28,0xa1,0x0e]
+
+//------------------------------------------------------------------------------
+// Signed integer saturating extract and unsigned narrow
+//------------------------------------------------------------------------------
+
+         sqxtun2 v0.16b, v31.8h
+         sqxtun2 v2.8h, v4.4s
+         sqxtun2 v6.4s, v8.2d
+         sqxtun v1.8b, v9.8h
+         sqxtun v13.4h, v21.4s
+         sqxtun v4.2s, v0.2d
+
+// CHECK:	sqxtun2	v0.16b, v31.8h          // encoding: [0xe0,0x2b,0x21,0x6e]
+// CHECK:	sqxtun2	v2.8h, v4.4s            // encoding: [0x82,0x28,0x61,0x6e]
+// CHECK:	sqxtun2	v6.4s, v8.2d            // encoding: [0x06,0x29,0xa1,0x6e]
+// CHECK:	sqxtun	v1.8b, v9.8h            // encoding: [0x21,0x29,0x21,0x2e]
+// CHECK:	sqxtun	v13.4h, v21.4s          // encoding: [0xad,0x2a,0x61,0x2e]
+// CHECK:	sqxtun	v4.2s, v0.2d            // encoding: [0x04,0x28,0xa1,0x2e]
+
+//------------------------------------------------------------------------------
+// Signed integer saturating extract and narrow
+//------------------------------------------------------------------------------
+
+         sqxtn2 v0.16b, v31.8h
+         sqxtn2 v2.8h, v4.4s
+         sqxtn2 v6.4s, v8.2d
+         sqxtn v1.8b, v9.8h
+         sqxtn v13.4h, v21.4s
+         sqxtn v4.2s, v0.2d
+
+// CHECK:	sqxtn2	v0.16b, v31.8h          // encoding: [0xe0,0x4b,0x21,0x4e]
+// CHECK:	sqxtn2	v2.8h, v4.4s            // encoding: [0x82,0x48,0x61,0x4e]
+// CHECK:	sqxtn2	v6.4s, v8.2d            // encoding: [0x06,0x49,0xa1,0x4e]
+// CHECK:	sqxtn	v1.8b, v9.8h            // encoding: [0x21,0x49,0x21,0x0e]
+// CHECK:	sqxtn	v13.4h, v21.4s          // encoding: [0xad,0x4a,0x61,0x0e]
+// CHECK:	sqxtn	v4.2s, v0.2d            // encoding: [0x04,0x48,0xa1,0x0e]
+
+//------------------------------------------------------------------------------
+// Unsigned integer saturating extract and narrow
+//------------------------------------------------------------------------------
+
+         uqxtn2 v0.16b, v31.8h
+         uqxtn2 v2.8h, v4.4s
+         uqxtn2 v6.4s, v8.2d
+         uqxtn v1.8b, v9.8h
+         uqxtn v13.4h, v21.4s
+         uqxtn v4.2s, v0.2d
+
+// CHECK:	uqxtn2	v0.16b, v31.8h          // encoding: [0xe0,0x4b,0x21,0x6e]
+// CHECK:	uqxtn2	v2.8h, v4.4s            // encoding: [0x82,0x48,0x61,0x6e]
+// CHECK:	uqxtn2	v6.4s, v8.2d            // encoding: [0x06,0x49,0xa1,0x6e]
+// CHECK:	uqxtn	v1.8b, v9.8h            // encoding: [0x21,0x49,0x21,0x2e]
+// CHECK:	uqxtn	v13.4h, v21.4s          // encoding: [0xad,0x4a,0x61,0x2e]
+// CHECK:	uqxtn	v4.2s, v0.2d            // encoding: [0x04,0x48,0xa1,0x2e]
+
+//------------------------------------------------------------------------------
+// Integer shift left long
+//------------------------------------------------------------------------------
+
+         shll2 v2.8h, v4.16b, #8
+         shll2 v6.4s, v8.8h, #16
+         shll2 v6.2d, v8.4s, #32
+         shll v2.8h, v4.8b, #8
+         shll v6.4s, v8.4h, #16
+         shll v6.2d, v8.2s, #32
+
+// CHECK:	shll2	v2.8h, v4.16b, #8      // encoding: [0x82,0x38,0x21,0x6e]
+// CHECK:	shll2	v6.4s, v8.8h, #16      // encoding: [0x06,0x39,0x61,0x6e]
+// CHECK:	shll2	v6.2d, v8.4s, #32      // encoding: [0x06,0x39,0xa1,0x6e]
+// CHECK:	shll	v2.8h, v4.8b, #8       // encoding: [0x82,0x38,0x21,0x2e]
+// CHECK:	shll	v6.4s, v8.4h, #16      // encoding: [0x06,0x39,0x61,0x2e]
+// CHECK:	shll	v6.2d, v8.2s, #32      // encoding: [0x06,0x39,0xa1,0x2e]
+
+//------------------------------------------------------------------------------
+// Floating-point convert downsize
+//------------------------------------------------------------------------------
+
+         fcvtn2 v2.8h, v4.4s
+         fcvtn2 v6.4s, v8.2d
+         fcvtn v13.4h, v21.4s
+         fcvtn v4.2s, v0.2d
+
+// CHECK:	fcvtn2	v2.8h, v4.4s            // encoding: [0x82,0x68,0x21,0x4e]
+// CHECK:	fcvtn2	v6.4s, v8.2d            // encoding: [0x06,0x69,0x61,0x4e]
+// CHECK:	fcvtn	v13.4h, v21.4s          // encoding: [0xad,0x6a,0x21,0x0e]
+// CHECK:	fcvtn	v4.2s, v0.2d            // encoding: [0x04,0x68,0x61,0x0e]
+
+//------------------------------------------------------------------------------
+// Floating-point convert downsize with inexact
+//------------------------------------------------------------------------------
+
+         fcvtxn2 v6.4s, v8.2d
+         fcvtxn v4.2s, v0.2d
+
+// CHECK:	fcvtxn2	v6.4s, v8.2d            // encoding: [0x06,0x69,0x61,0x6e]
+// CHECK:	fcvtxn	v4.2s, v0.2d            // encoding: [0x04,0x68,0x61,0x2e]
+
+//------------------------------------------------------------------------------
+// Floating-point convert upsize
+//------------------------------------------------------------------------------
+
+         fcvtl v9.4s, v1.4h
+         fcvtl v0.2d, v1.2s
+         fcvtl2 v12.4s, v4.8h
+         fcvtl2 v17.2d, v28.4s
+
+// CHECK:	fcvtl	v9.4s, v1.4h            // encoding: [0x29,0x78,0x21,0x0e]
+// CHECK:	fcvtl	v0.2d, v1.2s            // encoding: [0x20,0x78,0x61,0x0e]
+// CHECK:	fcvtl2	v12.4s, v4.8h           // encoding: [0x8c,0x78,0x21,0x4e]
+// CHECK:	fcvtl2	v17.2d, v28.4s          // encoding: [0x91,0x7b,0x61,0x4e]
+
+//------------------------------------------------------------------------------
+// Floating-point round to integral
+//------------------------------------------------------------------------------
+
+         frintn v6.4s, v8.4s
+         frintn v6.2d, v8.2d
+         frintn v4.2s, v0.2s
+
+// CHECK:	frintn	v6.4s, v8.4s            // encoding: [0x06,0x89,0x21,0x4e]
+// CHECK:	frintn	v6.2d, v8.2d            // encoding: [0x06,0x89,0x61,0x4e]
+// CHECK:	frintn	v4.2s, v0.2s            // encoding: [0x04,0x88,0x21,0x0e]
+
+         frinta v6.4s, v8.4s
+         frinta v6.2d, v8.2d
+         frinta v4.2s, v0.2s
+
+// CHECK:	frinta	v6.4s, v8.4s            // encoding: [0x06,0x89,0x21,0x6e]
+// CHECK:	frinta	v6.2d, v8.2d            // encoding: [0x06,0x89,0x61,0x6e]
+// CHECK:	frinta	v4.2s, v0.2s            // encoding: [0x04,0x88,0x21,0x2e]
+
+         frintp v6.4s, v8.4s
+         frintp v6.2d, v8.2d
+         frintp v4.2s, v0.2s
+
+// CHECK:	frintp	v6.4s, v8.4s            // encoding: [0x06,0x89,0xa1,0x4e]
+// CHECK:	frintp	v6.2d, v8.2d            // encoding: [0x06,0x89,0xe1,0x4e]
+// CHECK:	frintp	v4.2s, v0.2s            // encoding: [0x04,0x88,0xa1,0x0e]
+
+         frintm v6.4s, v8.4s
+         frintm v6.2d, v8.2d
+         frintm v4.2s, v0.2s
+
+// CHECK:	frintm	v6.4s, v8.4s            // encoding: [0x06,0x99,0x21,0x4e]
+// CHECK:	frintm	v6.2d, v8.2d            // encoding: [0x06,0x99,0x61,0x4e]
+// CHECK:	frintm	v4.2s, v0.2s            // encoding: [0x04,0x98,0x21,0x0e]
+
+         frintx v6.4s, v8.4s
+         frintx v6.2d, v8.2d
+         frintx v4.2s, v0.2s
+
+// CHECK:	frintx	v6.4s, v8.4s            // encoding: [0x06,0x99,0x21,0x6e]
+// CHECK:	frintx	v6.2d, v8.2d            // encoding: [0x06,0x99,0x61,0x6e]
+// CHECK:	frintx	v4.2s, v0.2s            // encoding: [0x04,0x98,0x21,0x2e]
+
+         frintz v6.4s, v8.4s
+         frintz v6.2d, v8.2d
+         frintz v4.2s, v0.2s
+
+// CHECK:	frintz	v6.4s, v8.4s            // encoding: [0x06,0x99,0xa1,0x4e]
+// CHECK:	frintz	v6.2d, v8.2d            // encoding: [0x06,0x99,0xe1,0x4e]
+// CHECK:	frintz	v4.2s, v0.2s            // encoding: [0x04,0x98,0xa1,0x0e]
+
+         frinti v6.4s, v8.4s
+         frinti v6.2d, v8.2d
+         frinti v4.2s, v0.2s
+
+// CHECK:	frinti	v6.4s, v8.4s            // encoding: [0x06,0x99,0xa1,0x6e]
+// CHECK:	frinti	v6.2d, v8.2d            // encoding: [0x06,0x99,0xe1,0x6e]
+// CHECK:	frinti	v4.2s, v0.2s            // encoding: [0x04,0x98,0xa1,0x2e]
+
+//------------------------------------------------------------------------------
+// Floating-point convert to integer
+//------------------------------------------------------------------------------
+
+         fcvtns v6.4s, v8.4s
+         fcvtns v6.2d, v8.2d
+         fcvtns v4.2s, v0.2s
+
+// CHECK:	fcvtns	v6.4s, v8.4s            // encoding: [0x06,0xa9,0x21,0x4e]
+// CHECK:	fcvtns	v6.2d, v8.2d            // encoding: [0x06,0xa9,0x61,0x4e]
+// CHECK:	fcvtns	v4.2s, v0.2s            // encoding: [0x04,0xa8,0x21,0x0e]
+
+         fcvtnu v6.4s, v8.4s
+         fcvtnu v6.2d, v8.2d
+         fcvtnu v4.2s, v0.2s
+
+// CHECK:	fcvtnu	v6.4s, v8.4s            // encoding: [0x06,0xa9,0x21,0x6e]
+// CHECK:	fcvtnu	v6.2d, v8.2d            // encoding: [0x06,0xa9,0x61,0x6e]
+// CHECK:	fcvtnu	v4.2s, v0.2s            // encoding: [0x04,0xa8,0x21,0x2e]
+
+         fcvtps v6.4s, v8.4s
+         fcvtps v6.2d, v8.2d
+         fcvtps v4.2s, v0.2s
+
+// CHECK:	fcvtps	v6.4s, v8.4s            // encoding: [0x06,0xa9,0xa1,0x4e]
+// CHECK:	fcvtps	v6.2d, v8.2d            // encoding: [0x06,0xa9,0xe1,0x4e]
+// CHECK:	fcvtps	v4.2s, v0.2s            // encoding: [0x04,0xa8,0xa1,0x0e]
+
+         fcvtpu v6.4s, v8.4s
+         fcvtpu v6.2d, v8.2d
+         fcvtpu v4.2s, v0.2s
+
+// CHECK:	fcvtpu	v6.4s, v8.4s            // encoding: [0x06,0xa9,0xa1,0x6e]
+// CHECK:	fcvtpu	v6.2d, v8.2d            // encoding: [0x06,0xa9,0xe1,0x6e]
+// CHECK:	fcvtpu	v4.2s, v0.2s            // encoding: [0x04,0xa8,0xa1,0x2e]
+
+         fcvtms v6.4s, v8.4s
+         fcvtms v6.2d, v8.2d
+         fcvtms v4.2s, v0.2s
+
+// CHECK:	fcvtms	v6.4s, v8.4s            // encoding: [0x06,0xb9,0x21,0x4e]
+// CHECK:	fcvtms	v6.2d, v8.2d            // encoding: [0x06,0xb9,0x61,0x4e]
+// CHECK:	fcvtms	v4.2s, v0.2s            // encoding: [0x04,0xb8,0x21,0x0e]
+
+         fcvtmu v6.4s, v8.4s
+         fcvtmu v6.2d, v8.2d
+         fcvtmu v4.2s, v0.2s
+
+// CHECK:	fcvtmu	v6.4s, v8.4s            // encoding: [0x06,0xb9,0x21,0x6e]
+// CHECK:	fcvtmu	v6.2d, v8.2d            // encoding: [0x06,0xb9,0x61,0x6e]
+// CHECK:	fcvtmu	v4.2s, v0.2s            // encoding: [0x04,0xb8,0x21,0x2e]
+
+         fcvtzs v6.4s, v8.4s
+         fcvtzs v6.2d, v8.2d
+         fcvtzs v4.2s, v0.2s
+
+// CHECK:	fcvtzs	v6.4s, v8.4s            // encoding: [0x06,0xb9,0xa1,0x4e]
+// CHECK:	fcvtzs	v6.2d, v8.2d            // encoding: [0x06,0xb9,0xe1,0x4e]
+// CHECK:	fcvtzs	v4.2s, v0.2s            // encoding: [0x04,0xb8,0xa1,0x0e]
+
+
+         fcvtzu v6.4s, v8.4s
+         fcvtzu v6.2d, v8.2d
+         fcvtzu v4.2s, v0.2s
+
+// CHECK:	fcvtzu	v6.4s, v8.4s            // encoding: [0x06,0xb9,0xa1,0x6e]
+// CHECK:	fcvtzu	v6.2d, v8.2d            // encoding: [0x06,0xb9,0xe1,0x6e]
+// CHECK:	fcvtzu	v4.2s, v0.2s            // encoding: [0x04,0xb8,0xa1,0x2e]
+
+         fcvtas v6.4s, v8.4s
+         fcvtas v6.2d, v8.2d
+         fcvtas v4.2s, v0.2s
+
+// CHECK:	fcvtas	v6.4s, v8.4s            // encoding: [0x06,0xc9,0x21,0x4e]
+// CHECK:	fcvtas	v6.2d, v8.2d            // encoding: [0x06,0xc9,0x61,0x4e]
+// CHECK:	fcvtas	v4.2s, v0.2s            // encoding: [0x04,0xc8,0x21,0x0e]
+
+         fcvtau v6.4s, v8.4s
+         fcvtau v6.2d, v8.2d
+         fcvtau v4.2s, v0.2s
+
+// CHECK:	fcvtau	v6.4s, v8.4s            // encoding: [0x06,0xc9,0x21,0x6e]
+// CHECK:	fcvtau	v6.2d, v8.2d            // encoding: [0x06,0xc9,0x61,0x6e]
+// CHECK:	fcvtau	v4.2s, v0.2s            // encoding: [0x04,0xc8,0x21,0x2e]
+
+         urecpe v6.4s, v8.4s
+         urecpe v4.2s, v0.2s
+
+// CHECK:	urecpe	v6.4s, v8.4s            // encoding: [0x06,0xc9,0xa1,0x4e]
+// CHECK:	urecpe	v4.2s, v0.2s            // encoding: [0x04,0xc8,0xa1,0x0e]
+
+         ursqrte v6.4s, v8.4s
+         ursqrte v4.2s, v0.2s
+
+// CHECK:	ursqrte	v6.4s, v8.4s            // encoding: [0x06,0xc9,0xa1,0x6e]
+// CHECK:	ursqrte	v4.2s, v0.2s            // encoding: [0x04,0xc8,0xa1,0x2e]
+
+         scvtf v6.4s, v8.4s
+         scvtf v6.2d, v8.2d
+         scvtf v4.2s, v0.2s
+
+// CHECK:	scvtf	v6.4s, v8.4s            // encoding: [0x06,0xd9,0x21,0x4e]
+// CHECK:	scvtf	v6.2d, v8.2d            // encoding: [0x06,0xd9,0x61,0x4e]
+// CHECK:	scvtf	v4.2s, v0.2s            // encoding: [0x04,0xd8,0x21,0x0e]
+
+         ucvtf v6.4s, v8.4s
+         ucvtf v6.2d, v8.2d
+         ucvtf v4.2s, v0.2s
+
+// CHECK:	ucvtf	v6.4s, v8.4s            // encoding: [0x06,0xd9,0x21,0x6e]
+// CHECK:	ucvtf	v6.2d, v8.2d            // encoding: [0x06,0xd9,0x61,0x6e]
+// CHECK:	ucvtf	v4.2s, v0.2s            // encoding: [0x04,0xd8,0x21,0x2e]
+
+         frecpe v6.4s, v8.4s
+         frecpe v6.2d, v8.2d
+         frecpe v4.2s, v0.2s
+
+// CHECK:	frecpe	v6.4s, v8.4s            // encoding: [0x06,0xd9,0xa1,0x4e]
+// CHECK:	frecpe	v6.2d, v8.2d            // encoding: [0x06,0xd9,0xe1,0x4e]
+// CHECK:	frecpe	v4.2s, v0.2s            // encoding: [0x04,0xd8,0xa1,0x0e]
+
+         frsqrte v6.4s, v8.4s
+         frsqrte v6.2d, v8.2d
+         frsqrte v4.2s, v0.2s
+
+// CHECK:	frsqrte	v6.4s, v8.4s            // encoding: [0x06,0xd9,0xa1,0x6e]
+// CHECK:	frsqrte	v6.2d, v8.2d            // encoding: [0x06,0xd9,0xe1,0x6e]
+// CHECK:	frsqrte	v4.2s, v0.2s            // encoding: [0x04,0xd8,0xa1,0x2e]
+
+         fsqrt v6.4s, v8.4s
+         fsqrt v6.2d, v8.2d
+         fsqrt v4.2s, v0.2s
+
+// CHECK:	fsqrt	v6.4s, v8.4s            // encoding: [0x06,0xf9,0xa1,0x6e]
+// CHECK:	fsqrt	v6.2d, v8.2d            // encoding: [0x06,0xf9,0xe1,0x6e]
+// CHECK:	fsqrt	v4.2s, v0.2s            // encoding: [0x04,0xf8,0xa1,0x2e]
+
+
diff --git a/test/MC/AArch64/neon-simd-post-ldst-multi-elem.s b/test/MC/AArch64/neon-simd-post-ldst-multi-elem.s
new file mode 100644
index 0000000..8dc271e
--- /dev/null
+++ b/test/MC/AArch64/neon-simd-post-ldst-multi-elem.s
@@ -0,0 +1,389 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Load multiple 1-element structures from one register (post-index)
+//------------------------------------------------------------------------------
+         ld1 {v0.16b}, [x0], x1
+         ld1 {v15.8h}, [x15], x2
+         ld1 {v31.4s}, [sp], #16
+         ld1 {v0.2d}, [x0], #16
+         ld1 {v0.8b}, [x0], x2
+         ld1 {v15.4h}, [x15], x3
+         ld1 {v31.2s}, [sp], #8
+         ld1 {v0.1d}, [x0], #8
+// CHECK: ld1 {v0.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x70,0xc1,0x4c]
+// CHECK: ld1 {v15.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x75,0xc2,0x4c]
+// CHECK: ld1 {v31.4s}, [sp], #16
+// CHECK:     // encoding: [0xff,0x7b,0xdf,0x4c]
+// CHECK: ld1 {v0.2d}, [x0], #16
+// CHECK:     // encoding: [0x00,0x7c,0xdf,0x4c]
+// CHECK: ld1 {v0.8b}, [x0], x2
+// CHECK:     // encoding: [0x00,0x70,0xc2,0x0c]
+// CHECK: ld1 {v15.4h}, [x15], x3
+// CHECK:     // encoding: [0xef,0x75,0xc3,0x0c]
+// CHECK: ld1 {v31.2s}, [sp], #8
+// CHECK:     // encoding: [0xff,0x7b,0xdf,0x0c]
+// CHECK: ld1 {v0.1d}, [x0], #8
+// CHECK:     // encoding: [0x00,0x7c,0xdf,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 1-element structures from two consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         ld1 {v0.16b, v1.16b}, [x0], x1
+         ld1 {v15.8h, v16.8h}, [x15], x2
+         ld1 {v31.4s, v0.4s}, [sp], #32
+         ld1 {v0.2d, v1.2d}, [x0], #32
+         ld1 {v0.8b, v1.8b}, [x0], x2
+         ld1 {v15.4h, v16.4h}, [x15], x3
+         ld1 {v31.2s, v0.2s}, [sp], #16
+         ld1 {v0.1d, v1.1d}, [x0], #16
+// CHECK: ld1 {v0.16b, v1.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0xa0,0xc1,0x4c]
+// CHECK: ld1 {v15.8h, v16.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0xa5,0xc2,0x4c]
+// CHECK: ld1 {v31.4s, v0.4s}, [sp], #32
+// CHECK:     // encoding: [0xff,0xab,0xdf,0x4c]
+// CHECK: ld1 {v0.2d, v1.2d}, [x0], #32
+// CHECK:     // encoding: [0x00,0xac,0xdf,0x4c]
+// CHECK: ld1 {v0.8b, v1.8b}, [x0], x2
+// CHECK:     // encoding: [0x00,0xa0,0xc2,0x0c]
+// CHECK: ld1 {v15.4h, v16.4h}, [x15], x3
+// CHECK:     // encoding: [0xef,0xa5,0xc3,0x0c]
+// CHECK: ld1 {v31.2s, v0.2s}, [sp], #16
+// CHECK:     // encoding: [0xff,0xab,0xdf,0x0c]
+// CHECK: ld1 {v0.1d, v1.1d}, [x0], #16
+// CHECK:     // encoding: [0x00,0xac,0xdf,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 1-element structures from three consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         ld1 {v0.16b, v1.16b, v2.16b}, [x0], x1
+         ld1 {v15.8h, v16.8h, v17.8h}, [x15], x2
+         ld1 {v31.4s, v0.4s, v1.4s}, [sp], #48
+         ld1 {v0.2d, v1.2d, v2.2d}, [x0], #48
+         ld1 {v0.8b, v1.8b, v2.8b}, [x0], x2
+         ld1 {v15.4h, v16.4h, v17.4h}, [x15], x3
+         ld1 {v31.2s, v0.2s, v1.2s}, [sp], #24
+         ld1 {v0.1d, v1.1d, v2.1d}, [x0], #24
+// CHECK: ld1 {v0.16b, v1.16b, v2.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x60,0xc1,0x4c]
+// CHECK: ld1 {v15.8h, v16.8h, v17.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x65,0xc2,0x4c]
+// CHECK: ld1 {v31.4s, v0.4s, v1.4s}, [sp], #48
+// CHECK:     // encoding: [0xff,0x6b,0xdf,0x4c]
+// CHECK: ld1 {v0.2d, v1.2d, v2.2d}, [x0], #48
+// CHECK:     // encoding: [0x00,0x6c,0xdf,0x4c]
+// CHECK: ld1 {v0.8b, v1.8b, v2.8b}, [x0], x2
+// CHECK:     // encoding: [0x00,0x60,0xc2,0x0c]
+// CHECK: ld1 {v15.4h, v16.4h, v17.4h}, [x15], x3
+// CHECK:     // encoding: [0xef,0x65,0xc3,0x0c]
+// CHECK: ld1 {v31.2s, v0.2s, v1.2s}, [sp], #24
+// CHECK:     // encoding: [0xff,0x6b,0xdf,0x0c]
+// CHECK: ld1 {v0.1d, v1.1d, v2.1d}, [x0], #24
+// CHECK:     // encoding: [0x00,0x6c,0xdf,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 1-element structures from four consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
+         ld1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
+         ld1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+         ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
+         ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+         ld1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
+         ld1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
+         ld1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0], #32
+// CHECK: ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x20,0xc1,0x4c]
+// CHECK: ld1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x25,0xc2,0x4c]
+// CHECK: ld1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+// CHECK:     // encoding: [0xff,0x2b,0xdf,0x4c]
+// CHECK: ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
+// CHECK:     // encoding: [0x00,0x2c,0xdf,0x4c]
+// CHECK: ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+// CHECK:     // encoding: [0x00,0x20,0xc3,0x0c]
+// CHECK: ld1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
+// CHECK:     // encoding: [0xef,0x25,0xc4,0x0c]
+// CHECK: ld1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
+// CHECK:     // encoding: [0xff,0x2b,0xdf,0x0c]
+// CHECK: ld1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0], #32
+// CHECK:     // encoding: [0x00,0x2c,0xdf,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 2-element structures from two consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         ld2 {v0.16b, v1.16b}, [x0], x1
+         ld2 {v15.8h, v16.8h}, [x15], x2
+         ld2 {v31.4s, v0.4s}, [sp], #32
+         ld2 {v0.2d, v1.2d}, [x0], #32
+         ld2 {v0.8b, v1.8b}, [x0], x2
+         ld2 {v15.4h, v16.4h}, [x15], x3
+         ld2 {v31.2s, v0.2s}, [sp], #16
+// CHECK: ld2 {v0.16b, v1.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x80,0xc1,0x4c]
+// CHECK: ld2 {v15.8h, v16.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x85,0xc2,0x4c]
+// CHECK: ld2 {v31.4s, v0.4s}, [sp], #32
+// CHECK:     // encoding: [0xff,0x8b,0xdf,0x4c]
+// CHECK: ld2 {v0.2d, v1.2d}, [x0], #32
+// CHECK:     // encoding: [0x00,0x8c,0xdf,0x4c]
+// CHECK: ld2 {v0.8b, v1.8b}, [x0], x2
+// CHECK:     // encoding: [0x00,0x80,0xc2,0x0c]
+// CHECK: ld2 {v15.4h, v16.4h}, [x15], x3
+// CHECK:     // encoding: [0xef,0x85,0xc3,0x0c]
+// CHECK: ld2 {v31.2s, v0.2s}, [sp], #16
+// CHECK:     // encoding: [0xff,0x8b,0xdf,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 3-element structures from three consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         ld3 {v0.16b, v1.16b, v2.16b}, [x0], x1
+         ld3 {v15.8h, v16.8h, v17.8h}, [x15], x2
+         ld3 {v31.4s, v0.4s, v1.4s}, [sp], #48
+         ld3 {v0.2d, v1.2d, v2.2d}, [x0], #48
+         ld3 {v0.8b, v1.8b, v2.8b}, [x0], x2
+         ld3 {v15.4h, v16.4h, v17.4h}, [x15], x3
+         ld3 {v31.2s, v0.2s, v1.2s}, [sp], #24
+// CHECK: ld3 {v0.16b, v1.16b, v2.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x40,0xc1,0x4c]
+// CHECK: ld3 {v15.8h, v16.8h, v17.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x45,0xc2,0x4c]
+// CHECK: ld3 {v31.4s, v0.4s, v1.4s}, [sp], #48
+// CHECK:     // encoding: [0xff,0x4b,0xdf,0x4c]
+// CHECK: ld3 {v0.2d, v1.2d, v2.2d}, [x0], #48
+// CHECK:     // encoding: [0x00,0x4c,0xdf,0x4c]
+// CHECK: ld3 {v0.8b, v1.8b, v2.8b}, [x0], x2
+// CHECK:     // encoding: [0x00,0x40,0xc2,0x0c]
+// CHECK: ld3 {v15.4h, v16.4h, v17.4h}, [x15], x3
+// CHECK:     // encoding: [0xef,0x45,0xc3,0x0c]
+// CHECK: ld3 {v31.2s, v0.2s, v1.2s}, [sp], #24
+// CHECK:     // encoding: [0xff,0x4b,0xdf,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 4-element structures from four consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
+         ld4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
+         ld4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+         ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
+         ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+         ld4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
+         ld4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
+// CHECK: ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x00,0xc1,0x4c]
+// CHECK: ld4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x05,0xc2,0x4c]
+// CHECK: ld4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+// CHECK:     // encoding: [0xff,0x0b,0xdf,0x4c]
+// CHECK: ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
+// CHECK:     // encoding: [0x00,0x0c,0xdf,0x4c]
+// CHECK: ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+// CHECK:     // encoding: [0x00,0x00,0xc3,0x0c]
+// CHECK: ld4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
+// CHECK:     // encoding: [0xef,0x05,0xc4,0x0c]
+// CHECK: ld4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
+// CHECK:     // encoding: [0xff,0x0b,0xdf,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 1-element structures from one register (post-index)
+//------------------------------------------------------------------------------
+         st1 {v0.16b}, [x0], x1
+         st1 {v15.8h}, [x15], x2
+         st1 {v31.4s}, [sp], #16
+         st1 {v0.2d}, [x0], #16
+         st1 {v0.8b}, [x0], x2
+         st1 {v15.4h}, [x15], x3
+         st1 {v31.2s}, [sp], #8
+         st1 {v0.1d}, [x0], #8
+// CHECK: st1 {v0.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x70,0x81,0x4c]
+// CHECK: st1 {v15.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x75,0x82,0x4c]
+// CHECK: st1 {v31.4s}, [sp], #16
+// CHECK:     // encoding: [0xff,0x7b,0x9f,0x4c]
+// CHECK: st1 {v0.2d}, [x0], #16
+// CHECK:     // encoding: [0x00,0x7c,0x9f,0x4c]
+// CHECK: st1 {v0.8b}, [x0], x2
+// CHECK:     // encoding: [0x00,0x70,0x82,0x0c]
+// CHECK: st1 {v15.4h}, [x15], x3
+// CHECK:     // encoding: [0xef,0x75,0x83,0x0c]
+// CHECK: st1 {v31.2s}, [sp], #8
+// CHECK:     // encoding: [0xff,0x7b,0x9f,0x0c]
+// CHECK: st1 {v0.1d}, [x0], #8
+// CHECK:     // encoding: [0x00,0x7c,0x9f,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 1-element structures from two consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         st1 {v0.16b, v1.16b}, [x0], x1
+         st1 {v15.8h, v16.8h}, [x15], x2
+         st1 {v31.4s, v0.4s}, [sp], #32
+         st1 {v0.2d, v1.2d}, [x0], #32
+         st1 {v0.8b, v1.8b}, [x0], x2
+         st1 {v15.4h, v16.4h}, [x15], x3
+         st1 {v31.2s, v0.2s}, [sp], #16
+         st1 {v0.1d, v1.1d}, [x0], #16
+// CHECK: st1 {v0.16b, v1.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0xa0,0x81,0x4c]
+// CHECK: st1 {v15.8h, v16.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0xa5,0x82,0x4c]
+// CHECK: st1 {v31.4s, v0.4s}, [sp], #32
+// CHECK:     // encoding: [0xff,0xab,0x9f,0x4c]
+// CHECK: st1 {v0.2d, v1.2d}, [x0], #32
+// CHECK:     // encoding: [0x00,0xac,0x9f,0x4c]
+// CHECK: st1 {v0.8b, v1.8b}, [x0], x2
+// CHECK:     // encoding: [0x00,0xa0,0x82,0x0c]
+// CHECK: st1 {v15.4h, v16.4h}, [x15], x3
+// CHECK:     // encoding: [0xef,0xa5,0x83,0x0c]
+// CHECK: st1 {v31.2s, v0.2s}, [sp], #16
+// CHECK:     // encoding: [0xff,0xab,0x9f,0x0c]
+// CHECK: st1 {v0.1d, v1.1d}, [x0], #16
+// CHECK:     // encoding: [0x00,0xac,0x9f,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 1-element structures from three consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         st1 {v0.16b, v1.16b, v2.16b}, [x0], x1
+         st1 {v15.8h, v16.8h, v17.8h}, [x15], x2
+         st1 {v31.4s, v0.4s, v1.4s}, [sp], #48
+         st1 {v0.2d, v1.2d, v2.2d}, [x0], #48
+         st1 {v0.8b, v1.8b, v2.8b}, [x0], x2
+         st1 {v15.4h, v16.4h, v17.4h}, [x15], x3
+         st1 {v31.2s, v0.2s, v1.2s}, [sp], #24
+         st1 {v0.1d, v1.1d, v2.1d}, [x0], #24
+// CHECK: st1 {v0.16b, v1.16b, v2.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x60,0x81,0x4c]
+// CHECK: st1 {v15.8h, v16.8h, v17.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x65,0x82,0x4c]
+// CHECK: st1 {v31.4s, v0.4s, v1.4s}, [sp], #48
+// CHECK:     // encoding: [0xff,0x6b,0x9f,0x4c]
+// CHECK: st1 {v0.2d, v1.2d, v2.2d}, [x0], #48
+// CHECK:     // encoding: [0x00,0x6c,0x9f,0x4c]
+// CHECK: st1 {v0.8b, v1.8b, v2.8b}, [x0], x2
+// CHECK:     // encoding: [0x00,0x60,0x82,0x0c]
+// CHECK: st1 {v15.4h, v16.4h, v17.4h}, [x15], x3
+// CHECK:     // encoding: [0xef,0x65,0x83,0x0c]
+// CHECK: st1 {v31.2s, v0.2s, v1.2s}, [sp], #24
+// CHECK:     // encoding: [0xff,0x6b,0x9f,0x0c]
+// CHECK: st1 {v0.1d, v1.1d, v2.1d}, [x0], #24
+// CHECK:     // encoding: [0x00,0x6c,0x9f,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 1-element structures from four consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
+         st1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
+         st1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+         st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
+         st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+         st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
+         st1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
+         st1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0], #32
+// CHECK: st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x20,0x81,0x4c]
+// CHECK: st1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x25,0x82,0x4c]
+// CHECK: st1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+// CHECK:     // encoding: [0xff,0x2b,0x9f,0x4c]
+// CHECK: st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
+// CHECK:     // encoding: [0x00,0x2c,0x9f,0x4c]
+// CHECK: st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+// CHECK:     // encoding: [0x00,0x20,0x83,0x0c]
+// CHECK: st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
+// CHECK:     // encoding: [0xef,0x25,0x84,0x0c]
+// CHECK: st1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
+// CHECK:     // encoding: [0xff,0x2b,0x9f,0x0c]
+// CHECK: st1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0], #32
+// CHECK:     // encoding: [0x00,0x2c,0x9f,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 2-element structures from two consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         st2 {v0.16b, v1.16b}, [x0], x1
+         st2 {v15.8h, v16.8h}, [x15], x2
+         st2 {v31.4s, v0.4s}, [sp], #32
+         st2 {v0.2d, v1.2d}, [x0], #32
+         st2 {v0.8b, v1.8b}, [x0], x2
+         st2 {v15.4h, v16.4h}, [x15], x3
+         st2 {v31.2s, v0.2s}, [sp], #16
+// CHECK: st2 {v0.16b, v1.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x80,0x81,0x4c]
+// CHECK: st2 {v15.8h, v16.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x85,0x82,0x4c]
+// CHECK: st2 {v31.4s, v0.4s}, [sp], #32
+// CHECK:     // encoding: [0xff,0x8b,0x9f,0x4c]
+// CHECK: st2 {v0.2d, v1.2d}, [x0], #32
+// CHECK:     // encoding: [0x00,0x8c,0x9f,0x4c]
+// CHECK: st2 {v0.8b, v1.8b}, [x0], x2
+// CHECK:     // encoding: [0x00,0x80,0x82,0x0c]
+// CHECK: st2 {v15.4h, v16.4h}, [x15], x3
+// CHECK:     // encoding: [0xef,0x85,0x83,0x0c]
+// CHECK: st2 {v31.2s, v0.2s}, [sp], #16
+// CHECK:     // encoding: [0xff,0x8b,0x9f,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 3-element structures from three consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         st3 {v0.16b, v1.16b, v2.16b}, [x0], x1
+         st3 {v15.8h, v16.8h, v17.8h}, [x15], x2
+         st3 {v31.4s, v0.4s, v1.4s}, [sp], #48
+         st3 {v0.2d, v1.2d, v2.2d}, [x0], #48
+         st3 {v0.8b, v1.8b, v2.8b}, [x0], x2
+         st3 {v15.4h, v16.4h, v17.4h}, [x15], x3
+         st3 {v31.2s, v0.2s, v1.2s}, [sp], #24
+// CHECK: st3 {v0.16b, v1.16b, v2.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x40,0x81,0x4c]
+// CHECK: st3 {v15.8h, v16.8h, v17.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x45,0x82,0x4c]
+// CHECK: st3 {v31.4s, v0.4s, v1.4s}, [sp], #48
+// CHECK:     // encoding: [0xff,0x4b,0x9f,0x4c]
+// CHECK: st3 {v0.2d, v1.2d, v2.2d}, [x0], #48
+// CHECK:     // encoding: [0x00,0x4c,0x9f,0x4c]
+// CHECK: st3 {v0.8b, v1.8b, v2.8b}, [x0], x2
+// CHECK:     // encoding: [0x00,0x40,0x82,0x0c]
+// CHECK: st3 {v15.4h, v16.4h, v17.4h}, [x15], x3
+// CHECK:     // encoding: [0xef,0x45,0x83,0x0c]
+// CHECK: st3 {v31.2s, v0.2s, v1.2s}, [sp], #24
+// CHECK:     // encoding: [0xff,0x4b,0x9f,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 4-element structures from four consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         st4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
+         st4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
+         st4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+         st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
+         st4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+         st4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
+         st4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
+// CHECK: st4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x00,0x81,0x4c]
+// CHECK: st4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x05,0x82,0x4c]
+// CHECK: st4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+// CHECK:     // encoding: [0xff,0x0b,0x9f,0x4c]
+// CHECK: st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
+// CHECK:     // encoding: [0x00,0x0c,0x9f,0x4c]
+// CHECK: st4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+// CHECK:     // encoding: [0x00,0x00,0x83,0x0c]
+// CHECK: st4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
+// CHECK:     // encoding: [0xef,0x05,0x84,0x0c]
+// CHECK: st4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
+// CHECK:     // encoding: [0xff,0x0b,0x9f,0x0c]
diff --git a/test/MC/AArch64/neon-simd-shift.s b/test/MC/AArch64/neon-simd-shift.s
new file mode 100644
index 0000000..a164323
--- /dev/null
+++ b/test/MC/AArch64/neon-simd-shift.s
@@ -0,0 +1,434 @@
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Vector shift right by immediate
+//------------------------------------------------------------------------------
+         sshr v0.8b, v1.8b, #3
+         sshr v0.4h, v1.4h, #3
+         sshr v0.2s, v1.2s, #3
+         sshr v0.16b, v1.16b, #3
+         sshr v0.8h, v1.8h, #3
+         sshr v0.4s, v1.4s, #3
+         sshr v0.2d, v1.2d, #3
+// CHECK:	sshr	v0.8b, v1.8b, #3        // encoding: [0x20,0x04,0x0d,0x0f]
+// CHECK:	sshr	v0.4h, v1.4h, #3        // encoding: [0x20,0x04,0x1d,0x0f]
+// CHECK:	sshr	v0.2s, v1.2s, #3        // encoding: [0x20,0x04,0x3d,0x0f]
+// CHECK:	sshr	v0.16b, v1.16b, #3      // encoding: [0x20,0x04,0x0d,0x4f]
+// CHECK:	sshr	v0.8h, v1.8h, #3        // encoding: [0x20,0x04,0x1d,0x4f]
+// CHECK:	sshr	v0.4s, v1.4s, #3        // encoding: [0x20,0x04,0x3d,0x4f]
+// CHECK:	sshr	v0.2d, v1.2d, #3        // encoding: [0x20,0x04,0x7d,0x4f]
+
+//------------------------------------------------------------------------------
+// Vector  shift right by immediate
+//------------------------------------------------------------------------------
+         ushr v0.8b, v1.8b, #3
+         ushr v0.4h, v1.4h, #3
+         ushr v0.2s, v1.2s, #3
+         ushr v0.16b, v1.16b, #3
+         ushr v0.8h, v1.8h, #3
+         ushr v0.4s, v1.4s, #3
+         ushr v0.2d, v1.2d, #3
+
+// CHECK: 	ushr	v0.8b, v1.8b, #3        // encoding: [0x20,0x04,0x0d,0x2f]
+// CHECK: 	ushr	v0.4h, v1.4h, #3        // encoding: [0x20,0x04,0x1d,0x2f]
+// CHECK:	ushr	v0.2s, v1.2s, #3        // encoding: [0x20,0x04,0x3d,0x2f]
+// CHECK:	ushr	v0.16b, v1.16b, #3      // encoding: [0x20,0x04,0x0d,0x6f]
+// CHECK:	ushr	v0.8h, v1.8h, #3        // encoding: [0x20,0x04,0x1d,0x6f]
+// CHECK:	ushr	v0.4s, v1.4s, #3        // encoding: [0x20,0x04,0x3d,0x6f]
+// CHECK:	ushr	v0.2d, v1.2d, #3        // encoding: [0x20,0x04,0x7d,0x6f]
+
+//------------------------------------------------------------------------------
+// Vector shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         ssra v0.8b, v1.8b, #3
+         ssra v0.4h, v1.4h, #3
+         ssra v0.2s, v1.2s, #3
+         ssra v0.16b, v1.16b, #3
+         ssra v0.8h, v1.8h, #3
+         ssra v0.4s, v1.4s, #3
+         ssra v0.2d, v1.2d, #3
+
+// CHECK:	ssra	v0.8b, v1.8b, #3        // encoding: [0x20,0x14,0x0d,0x0f]
+// CHECK:	ssra	v0.4h, v1.4h, #3        // encoding: [0x20,0x14,0x1d,0x0f]
+// CHECK:	ssra	v0.2s, v1.2s, #3        // encoding: [0x20,0x14,0x3d,0x0f]
+// CHECK:	ssra	v0.16b, v1.16b, #3      // encoding: [0x20,0x14,0x0d,0x4f]
+// CHECK:	ssra	v0.8h, v1.8h, #3        // encoding: [0x20,0x14,0x1d,0x4f]
+// CHECK:	ssra	v0.4s, v1.4s, #3        // encoding: [0x20,0x14,0x3d,0x4f]
+// CHECK:	ssra	v0.2d, v1.2d, #3        // encoding: [0x20,0x14,0x7d,0x4f]
+
+//------------------------------------------------------------------------------
+// Vector  shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         usra v0.8b, v1.8b, #3
+         usra v0.4h, v1.4h, #3
+         usra v0.2s, v1.2s, #3
+         usra v0.16b, v1.16b, #3
+         usra v0.8h, v1.8h, #3
+         usra v0.4s, v1.4s, #3
+         usra v0.2d, v1.2d, #3
+
+// CHECK:	usra	v0.8b, v1.8b, #3        // encoding: [0x20,0x14,0x0d,0x2f]
+// CHECK:	usra	v0.4h, v1.4h, #3        // encoding: [0x20,0x14,0x1d,0x2f]
+// CHECK:	usra	v0.2s, v1.2s, #3        // encoding: [0x20,0x14,0x3d,0x2f]
+// CHECK:	usra	v0.16b, v1.16b, #3      // encoding: [0x20,0x14,0x0d,0x6f]
+// CHECK:	usra	v0.8h, v1.8h, #3        // encoding: [0x20,0x14,0x1d,0x6f]
+// CHECK:	usra	v0.4s, v1.4s, #3        // encoding: [0x20,0x14,0x3d,0x6f]
+// CHECK:	usra	v0.2d, v1.2d, #3        // encoding: [0x20,0x14,0x7d,0x6f]
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right by immediate
+//------------------------------------------------------------------------------
+         srshr v0.8b, v1.8b, #3
+         srshr v0.4h, v1.4h, #3
+         srshr v0.2s, v1.2s, #3
+         srshr v0.16b, v1.16b, #3
+         srshr v0.8h, v1.8h, #3
+         srshr v0.4s, v1.4s, #3
+         srshr v0.2d, v1.2d, #3
+
+// CHECK:	srshr	v0.8b, v1.8b, #3        // encoding: [0x20,0x24,0x0d,0x0f]
+// CHECK:	srshr	v0.4h, v1.4h, #3        // encoding: [0x20,0x24,0x1d,0x0f]
+// CHECK:	srshr	v0.2s, v1.2s, #3        // encoding: [0x20,0x24,0x3d,0x0f]
+// CHECK:	srshr	v0.16b, v1.16b, #3      // encoding: [0x20,0x24,0x0d,0x4f]
+// CHECK:	srshr	v0.8h, v1.8h, #3        // encoding: [0x20,0x24,0x1d,0x4f]
+// CHECK:	srshr	v0.4s, v1.4s, #3        // encoding: [0x20,0x24,0x3d,0x4f]
+// CHECK:	srshr	v0.2d, v1.2d, #3        // encoding: [0x20,0x24,0x7d,0x4f]
+
+
+//------------------------------------------------------------------------------
+// Vecotr rounding shift right by immediate
+//------------------------------------------------------------------------------
+         urshr v0.8b, v1.8b, #3
+         urshr v0.4h, v1.4h, #3
+         urshr v0.2s, v1.2s, #3
+         urshr v0.16b, v1.16b, #3
+         urshr v0.8h, v1.8h, #3
+         urshr v0.4s, v1.4s, #3
+         urshr v0.2d, v1.2d, #3
+
+// CHECK:	urshr	v0.8b, v1.8b, #3        // encoding: [0x20,0x24,0x0d,0x2f]
+// CHECK:	urshr	v0.4h, v1.4h, #3        // encoding: [0x20,0x24,0x1d,0x2f]
+// CHECK:	urshr	v0.2s, v1.2s, #3        // encoding: [0x20,0x24,0x3d,0x2f]
+// CHECK:	urshr	v0.16b, v1.16b, #3      // encoding: [0x20,0x24,0x0d,0x6f]
+// CHECK:	urshr	v0.8h, v1.8h, #3        // encoding: [0x20,0x24,0x1d,0x6f]
+// CHECK:	urshr	v0.4s, v1.4s, #3        // encoding: [0x20,0x24,0x3d,0x6f]
+// CHECK:	urshr	v0.2d, v1.2d, #3        // encoding: [0x20,0x24,0x7d,0x6f]
+
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         srsra v0.8b, v1.8b, #3
+         srsra v0.4h, v1.4h, #3
+         srsra v0.2s, v1.2s, #3
+         srsra v0.16b, v1.16b, #3
+         srsra v0.8h, v1.8h, #3
+         srsra v0.4s, v1.4s, #3
+         srsra v0.2d, v1.2d, #3
+
+// CHECK:	srsra	v0.8b, v1.8b, #3        // encoding: [0x20,0x34,0x0d,0x0f]
+// CHECK:	srsra	v0.4h, v1.4h, #3        // encoding: [0x20,0x34,0x1d,0x0f]
+// CHECK:	srsra	v0.2s, v1.2s, #3        // encoding: [0x20,0x34,0x3d,0x0f]
+// CHECK:	srsra	v0.16b, v1.16b, #3      // encoding: [0x20,0x34,0x0d,0x4f]
+// CHECK:	srsra	v0.8h, v1.8h, #3        // encoding: [0x20,0x34,0x1d,0x4f]
+// CHECK:	srsra	v0.4s, v1.4s, #3        // encoding: [0x20,0x34,0x3d,0x4f]
+// CHECK:	srsra	v0.2d, v1.2d, #3        // encoding: [0x20,0x34,0x7d,0x4f]
+
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         ursra v0.8b, v1.8b, #3
+         ursra v0.4h, v1.4h, #3
+         ursra v0.2s, v1.2s, #3
+         ursra v0.16b, v1.16b, #3
+         ursra v0.8h, v1.8h, #3
+         ursra v0.4s, v1.4s, #3
+         ursra v0.2d, v1.2d, #3
+
+// CHECK:	ursra	v0.8b, v1.8b, #3        // encoding: [0x20,0x34,0x0d,0x2f]
+// CHECK:	ursra	v0.4h, v1.4h, #3        // encoding: [0x20,0x34,0x1d,0x2f]
+// CHECK:	ursra	v0.2s, v1.2s, #3        // encoding: [0x20,0x34,0x3d,0x2f]
+// CHECK:	ursra	v0.16b, v1.16b, #3      // encoding: [0x20,0x34,0x0d,0x6f]
+// CHECK:	ursra	v0.8h, v1.8h, #3        // encoding: [0x20,0x34,0x1d,0x6f]
+// CHECK:	ursra	v0.4s, v1.4s, #3        // encoding: [0x20,0x34,0x3d,0x6f]
+// CHECK:	ursra	v0.2d, v1.2d, #3        // encoding: [0x20,0x34,0x7d,0x6f]
+
+
+//------------------------------------------------------------------------------
+// Vector shift right and insert by immediate
+//------------------------------------------------------------------------------
+         sri v0.8b, v1.8b, #3
+         sri v0.4h, v1.4h, #3
+         sri v0.2s, v1.2s, #3
+         sri v0.16b, v1.16b, #3
+         sri v0.8h, v1.8h, #3
+         sri v0.4s, v1.4s, #3
+         sri v0.2d, v1.2d, #3
+
+// CHECK:	sri	v0.8b, v1.8b, #3        // encoding: [0x20,0x44,0x0d,0x2f]
+// CHECK:	sri	v0.4h, v1.4h, #3        // encoding: [0x20,0x44,0x1d,0x2f]
+// CHECK:	sri	v0.2s, v1.2s, #3        // encoding: [0x20,0x44,0x3d,0x2f]
+// CHECK:	sri	v0.16b, v1.16b, #3      // encoding: [0x20,0x44,0x0d,0x6f]
+// CHECK:	sri	v0.8h, v1.8h, #3        // encoding: [0x20,0x44,0x1d,0x6f]
+// CHECK:	sri	v0.4s, v1.4s, #3        // encoding: [0x20,0x44,0x3d,0x6f]
+
+
+//------------------------------------------------------------------------------
+// Vector shift left and insert by immediate
+//------------------------------------------------------------------------------
+         sli v0.8b, v1.8b, #3
+         sli v0.4h, v1.4h, #3
+         sli v0.2s, v1.2s, #3
+         sli v0.16b, v1.16b, #3
+         sli v0.8h, v1.8h, #3
+         sli v0.4s, v1.4s, #3
+         sli v0.2d, v1.2d, #3
+
+// CHECK:	sli	v0.8b, v1.8b, #3        // encoding: [0x20,0x54,0x0b,0x2f]
+// CHECK:	sli	v0.4h, v1.4h, #3        // encoding: [0x20,0x54,0x13,0x2f]
+// CHECK:	sli	v0.2s, v1.2s, #3        // encoding: [0x20,0x54,0x23,0x2f]
+// CHECK:	sli	v0.16b, v1.16b, #3      // encoding: [0x20,0x54,0x0b,0x6f]
+// CHECK:	sli	v0.8h, v1.8h, #3        // encoding: [0x20,0x54,0x13,0x6f]
+// CHECK:	sli	v0.4s, v1.4s, #3        // encoding: [0x20,0x54,0x23,0x6f]
+// CHECK:	sli	v0.2d, v1.2d, #3        // encoding: [0x20,0x54,0x43,0x6f]
+
+//------------------------------------------------------------------------------
+// Vector saturating shift left unsigned by immediate
+//------------------------------------------------------------------------------
+         sqshlu v0.8b, v1.8b, #3
+         sqshlu v0.4h, v1.4h, #3
+         sqshlu v0.2s, v1.2s, #3
+         sqshlu v0.16b, v1.16b, #3
+         sqshlu v0.8h, v1.8h, #3
+         sqshlu v0.4s, v1.4s, #3
+         sqshlu v0.2d, v1.2d, #3
+
+// CHECK:	sqshlu	v0.8b, v1.8b, #3        // encoding: [0x20,0x64,0x0b,0x2f]
+// CHECK:	sqshlu	v0.4h, v1.4h, #3        // encoding: [0x20,0x64,0x13,0x2f]
+// CHECK:	sqshlu	v0.2s, v1.2s, #3        // encoding: [0x20,0x64,0x23,0x2f]
+// CHECK:	sqshlu	v0.16b, v1.16b, #3      // encoding: [0x20,0x64,0x0b,0x6f]
+// CHECK:	sqshlu	v0.8h, v1.8h, #3        // encoding: [0x20,0x64,0x13,0x6f]
+// CHECK:	sqshlu	v0.4s, v1.4s, #3        // encoding: [0x20,0x64,0x23,0x6f]
+// CHECK:	sqshlu	v0.2d, v1.2d, #3        // encoding: [0x20,0x64,0x43,0x6f]
+
+
+//------------------------------------------------------------------------------
+// Vector saturating shift left by immediate
+//------------------------------------------------------------------------------
+         sqshl v0.8b, v1.8b, #3
+         sqshl v0.4h, v1.4h, #3
+         sqshl v0.2s, v1.2s, #3
+         sqshl v0.16b, v1.16b, #3
+         sqshl v0.8h, v1.8h, #3
+         sqshl v0.4s, v1.4s, #3
+         sqshl v0.2d, v1.2d, #3
+
+// CHECK:	sqshl	v0.8b, v1.8b, #3        // encoding: [0x20,0x74,0x0b,0x0f]
+// CHECK:	sqshl	v0.4h, v1.4h, #3        // encoding: [0x20,0x74,0x13,0x0f]
+// CHECK:	sqshl	v0.2s, v1.2s, #3        // encoding: [0x20,0x74,0x23,0x0f]
+// CHECK:	sqshl	v0.16b, v1.16b, #3      // encoding: [0x20,0x74,0x0b,0x4f]
+// CHECK:	sqshl	v0.8h, v1.8h, #3        // encoding: [0x20,0x74,0x13,0x4f]
+// CHECK:	sqshl	v0.4s, v1.4s, #3        // encoding: [0x20,0x74,0x23,0x4f]
+// CHECK:	sqshl	v0.2d, v1.2d, #3        // encoding: [0x20,0x74,0x43,0x4f]
+
+
+
+//------------------------------------------------------------------------------
+// Vector saturating shift left by immediate
+//------------------------------------------------------------------------------
+         uqshl v0.8b, v1.8b, #3
+         uqshl v0.4h, v1.4h, #3
+         uqshl v0.2s, v1.2s, #3
+         uqshl v0.16b, v1.16b, #3
+         uqshl v0.8h, v1.8h, #3
+         uqshl v0.4s, v1.4s, #3
+         uqshl v0.2d, v1.2d, #3
+
+// CHECK:	uqshl	v0.8b, v1.8b, #3        // encoding: [0x20,0x74,0x0b,0x2f]
+// CHECK:	uqshl	v0.4h, v1.4h, #3        // encoding: [0x20,0x74,0x13,0x2f]
+// CHECK:	uqshl	v0.2s, v1.2s, #3        // encoding: [0x20,0x74,0x23,0x2f]
+// CHECK:	uqshl	v0.16b, v1.16b, #3      // encoding: [0x20,0x74,0x0b,0x6f]
+// CHECK:	uqshl	v0.8h, v1.8h, #3        // encoding: [0x20,0x74,0x13,0x6f]
+// CHECK:	uqshl	v0.4s, v1.4s, #3        // encoding: [0x20,0x74,0x23,0x6f]
+// CHECK:	uqshl	v0.2d, v1.2d, #3        // encoding: [0x20,0x74,0x43,0x6f]
+
+
+//------------------------------------------------------------------------------
+// Vector shift right narrow by immediate
+//------------------------------------------------------------------------------
+         shrn v0.8b, v1.8h, #3
+         shrn v0.4h, v1.4s, #3
+         shrn v0.2s, v1.2d, #3
+         shrn2 v0.16b, v1.8h, #3
+         shrn2 v0.8h, v1.4s, #3
+         shrn2 v0.4s, v1.2d, #3
+
+// CHECK:	shrn	v0.8b, v1.8h, #3        // encoding: [0x20,0x84,0x0d,0x0f]
+// CHECK:	shrn	v0.4h, v1.4s, #3        // encoding: [0x20,0x84,0x1d,0x0f]
+// CHECK:	shrn	v0.2s, v1.2d, #3        // encoding: [0x20,0x84,0x3d,0x0f]
+// CHECK:	shrn2	v0.16b, v1.8h, #3       // encoding: [0x20,0x84,0x0d,0x4f]
+// CHECK:	shrn2	v0.8h, v1.4s, #3        // encoding: [0x20,0x84,0x1d,0x4f]
+// CHECK:	shrn2	v0.4s, v1.2d, #3        // encoding: [0x20,0x84,0x3d,0x4f]
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right unsigned narrow by immediate
+//------------------------------------------------------------------------------
+         sqshrun v0.8b, v1.8h, #3
+         sqshrun v0.4h, v1.4s, #3
+         sqshrun v0.2s, v1.2d, #3
+         sqshrun2 v0.16b, v1.8h, #3
+         sqshrun2 v0.8h, v1.4s, #3
+         sqshrun2 v0.4s, v1.2d, #3
+
+// CHECK:	sqshrun	v0.8b, v1.8h, #3        // encoding: [0x20,0x84,0x0d,0x2f]
+// CHECK:	sqshrun	v0.4h, v1.4s, #3        // encoding: [0x20,0x84,0x1d,0x2f]
+// CHECK:	sqshrun	v0.2s, v1.2d, #3        // encoding: [0x20,0x84,0x3d,0x2f]
+// CHECK:	sqshrun2	v0.16b, v1.8h, #3 	// encoding: [0x20,0x84,0x0d,0x6f]
+// CHECK:	sqshrun2	v0.8h, v1.4s, #3 	// encoding: [0x20,0x84,0x1d,0x6f]
+// CHECK:	sqshrun2	v0.4s, v1.2d, #3 	// encoding: [0x20,0x84,0x3d,0x6f]
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right narrow by immediate
+//------------------------------------------------------------------------------
+         rshrn v0.8b, v1.8h, #3
+         rshrn v0.4h, v1.4s, #3
+         rshrn v0.2s, v1.2d, #3
+         rshrn2 v0.16b, v1.8h, #3
+         rshrn2 v0.8h, v1.4s, #3
+         rshrn2 v0.4s, v1.2d, #3
+
+// CHECK:	rshrn	v0.8b, v1.8h, #3        // encoding: [0x20,0x8c,0x0d,0x0f]
+// CHECK:	rshrn	v0.4h, v1.4s, #3        // encoding: [0x20,0x8c,0x1d,0x0f]
+// CHECK:	rshrn	v0.2s, v1.2d, #3        // encoding: [0x20,0x8c,0x3d,0x0f]
+// CHECK:	rshrn2	v0.16b, v1.8h, #3       // encoding: [0x20,0x8c,0x0d,0x4f]
+// CHECK:	rshrn2	v0.8h, v1.4s, #3        // encoding: [0x20,0x8c,0x1d,0x4f]
+// CHECK:	rshrn2	v0.4s, v1.2d, #3        // encoding: [0x20,0x8c,0x3d,0x4f]
+
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right rounded unsigned narrow by immediate
+//------------------------------------------------------------------------------
+         sqrshrun v0.8b, v1.8h, #3
+         sqrshrun v0.4h, v1.4s, #3
+         sqrshrun v0.2s, v1.2d, #3
+         sqrshrun2 v0.16b, v1.8h, #3
+         sqrshrun2 v0.8h, v1.4s, #3
+         sqrshrun2 v0.4s, v1.2d, #3
+
+// CHECK:	sqrshrun	v0.8b, v1.8h, #3    // encoding: [0x20,0x8c,0x0d,0x2f]
+// CHECK:	sqrshrun	v0.4h, v1.4s, #3    // encoding: [0x20,0x8c,0x1d,0x2f]
+// CHECK:	sqrshrun	v0.2s, v1.2d, #3    // encoding: [0x20,0x8c,0x3d,0x2f]
+// CHECK:	sqrshrun2	v0.16b, v1.8h, #3   // encoding: [0x20,0x8c,0x0d,0x6f]
+// CHECK:	sqrshrun2	v0.8h, v1.4s, #3    // encoding: [0x20,0x8c,0x1d,0x6f]
+// CHECK:	sqrshrun2	v0.4s, v1.2d, #3    // encoding: [0x20,0x8c,0x3d,0x6f]
+
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right narrow by immediate
+//------------------------------------------------------------------------------
+         sqshrn v0.8b, v1.8h, #3
+         sqshrn v0.4h, v1.4s, #3
+         sqshrn v0.2s, v1.2d, #3
+         sqshrn2 v0.16b, v1.8h, #3
+         sqshrn2 v0.8h, v1.4s, #3
+         sqshrn2 v0.4s, v1.2d, #3
+
+// CHECK:	sqshrn	v0.8b, v1.8h, #3        // encoding: [0x20,0x94,0x0d,0x0f]
+// CHECK:	sqshrn	v0.4h, v1.4s, #3        // encoding: [0x20,0x94,0x1d,0x0f]
+// CHECK:	sqshrn	v0.2s, v1.2d, #3        // encoding: [0x20,0x94,0x3d,0x0f]
+// CHECK:	sqshrn2	v0.16b, v1.8h, #3       // encoding: [0x20,0x94,0x0d,0x4f]
+// CHECK:	sqshrn2	v0.8h, v1.4s, #3        // encoding: [0x20,0x94,0x1d,0x4f]
+// CHECK:	sqshrn2	v0.4s, v1.2d, #3        // encoding: [0x20,0x94,0x3d,0x4f]
+
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right narrow by immediate
+//------------------------------------------------------------------------------
+         uqshrn v0.8b, v1.8h, #3
+         uqshrn v0.4h, v1.4s, #3
+         uqshrn v0.2s, v1.2d, #3
+         uqshrn2 v0.16b, v1.8h, #3
+         uqshrn2 v0.8h, v1.4s, #3
+         uqshrn2 v0.4s, v1.2d, #3
+
+// CHECK:	uqshrn	v0.8b, v1.8h, #3        // encoding: [0x20,0x94,0x0d,0x2f]
+// CHECK:	uqshrn	v0.4h, v1.4s, #3        // encoding: [0x20,0x94,0x1d,0x2f]
+// CHECK:	uqshrn	v0.2s, v1.2d, #3        // encoding: [0x20,0x94,0x3d,0x2f]
+// CHECK:	uqshrn2	v0.16b, v1.8h, #3       // encoding: [0x20,0x94,0x0d,0x6f]
+// CHECK:	uqshrn2	v0.8h, v1.4s, #3        // encoding: [0x20,0x94,0x1d,0x6f]
+// CHECK:	uqshrn2	v0.4s, v1.2d, #3        // encoding: [0x20,0x94,0x3d,0x6f]
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right rounded narrow by immediate
+//------------------------------------------------------------------------------
+         sqrshrn v0.8b, v1.8h, #3
+         sqrshrn v0.4h, v1.4s, #3
+         sqrshrn v0.2s, v1.2d, #3
+         sqrshrn2 v0.16b, v1.8h, #3
+         sqrshrn2 v0.8h, v1.4s, #3
+         sqrshrn2 v0.4s, v1.2d, #3
+
+// CHECK:	sqrshrn	v0.8b, v1.8h, #3        // encoding: [0x20,0x9c,0x0d,0x0f]
+// CHECK:	sqrshrn	v0.4h, v1.4s, #3        // encoding: [0x20,0x9c,0x1d,0x0f]
+// CHECK:	sqrshrn	v0.2s, v1.2d, #3        // encoding: [0x20,0x9c,0x3d,0x0f]
+// CHECK:	sqrshrn2	v0.16b, v1.8h, #3   // encoding: [0x20,0x9c,0x0d,0x4f]
+// CHECK:	sqrshrn2	v0.8h, v1.4s, #3    // encoding: [0x20,0x9c,0x1d,0x4f]
+// CHECK:	sqrshrn2	v0.4s, v1.2d, #3    // encoding: [0x20,0x9c,0x3d,0x4f]
+
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right rounded narrow by immediate
+//------------------------------------------------------------------------------
+         uqrshrn v0.8b, v1.8h, #3
+         uqrshrn v0.4h, v1.4s, #3
+         uqrshrn v0.2s, v1.2d, #3
+         uqrshrn2 v0.16b, v1.8h, #3
+         uqrshrn2 v0.8h, v1.4s, #3
+         uqrshrn2 v0.4s, v1.2d, #3
+
+// CHECK:	uqrshrn	v0.8b, v1.8h, #3        // encoding: [0x20,0x9c,0x0d,0x2f]
+// CHECK:	uqrshrn	v0.4h, v1.4s, #3        // encoding: [0x20,0x9c,0x1d,0x2f]
+// CHECK:	uqrshrn	v0.2s, v1.2d, #3        // encoding: [0x20,0x9c,0x3d,0x2f]
+// CHECK:	uqrshrn2	v0.16b, v1.8h, #3   // encoding: [0x20,0x9c,0x0d,0x6f]
+// CHECK:	uqrshrn2	v0.8h, v1.4s, #3    // encoding: [0x20,0x9c,0x1d,0x6f]
+// CHECK:	uqrshrn2	v0.4s, v1.2d, #3    // encoding: [0x20,0x9c,0x3d,0x6f]
+
+
+//------------------------------------------------------------------------------
+// Fixed-point convert to floating-point
+//------------------------------------------------------------------------------
+         scvtf v0.2s, v1.2s, #3
+         scvtf v0.4s, v1.4s, #3
+         scvtf v0.2d, v1.2d, #3
+         ucvtf v0.2s, v1.2s, #3
+         ucvtf v0.4s, v1.4s, #3
+         ucvtf v0.2d, v1.2d, #3
+
+// CHECK:	scvtf	v0.2s, v1.2s, #3        // encoding: [0x20,0xe4,0x3d,0x0f]
+// CHECK:	scvtf	v0.4s, v1.4s, #3        // encoding: [0x20,0xe4,0x3d,0x4f]
+// CHECK:	scvtf	v0.2d, v1.2d, #3        // encoding: [0x20,0xe4,0x7d,0x4f]
+// CHECK:	ucvtf	v0.2s, v1.2s, #3        // encoding: [0x20,0xe4,0x3d,0x2f]
+// CHECK:	ucvtf	v0.4s, v1.4s, #3        // encoding: [0x20,0xe4,0x3d,0x6f]
+// CHECK:	ucvtf	v0.2d, v1.2d, #3        // encoding: [0x20,0xe4,0x7d,0x6f]
+
+//------------------------------------------------------------------------------
+// Floating-point convert to fixed-point
+//------------------------------------------------------------------------------
+         fcvtzs v0.2s, v1.2s, #3
+         fcvtzs v0.4s, v1.4s, #3
+         fcvtzs v0.2d, v1.2d, #3
+         fcvtzu v0.2s, v1.2s, #3
+         fcvtzu v0.4s, v1.4s, #3
+         fcvtzu v0.2d, v1.2d, #3
+
+
+// CHECK:	fcvtzs	v0.2s, v1.2s, #3        // encoding: [0x20,0xfc,0x3d,0x0f]
+// CHECK:	fcvtzs	v0.4s, v1.4s, #3        // encoding: [0x20,0xfc,0x3d,0x4f]
+// CHECK:	fcvtzs	v0.2d, v1.2d, #3        // encoding: [0x20,0xfc,0x7d,0x4f]
+// CHECK:	fcvtzu	v0.2s, v1.2s, #3        // encoding: [0x20,0xfc,0x3d,0x2f]
+// CHECK:	fcvtzu	v0.4s, v1.4s, #3        // encoding: [0x20,0xfc,0x3d,0x6f]
+// CHECK:	fcvtzu	v0.2d, v1.2d, #3        // encoding: [0x20,0xfc,0x7d,0x6f]
+
diff --git a/test/MC/AArch64/neon-tbl.s b/test/MC/AArch64/neon-tbl.s
new file mode 100644
index 0000000..ff3e86b
--- /dev/null
+++ b/test/MC/AArch64/neon-tbl.s
@@ -0,0 +1,56 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Instructions across vector registers
+//------------------------------------------------------------------------------
+
+        tbl v0.8b, {v1.16b}, v2.8b
+        tbl v0.8b, {v1.16b, v2.16b}, v2.8b
+        tbl v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b
+        tbl v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b
+        tbl v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b
+
+// CHECK: tbl	v0.8b, {v1.16b}, v2.8b  // encoding: [0x20,0x00,0x02,0x0e]
+// CHECK: tbl	v0.8b, {v1.16b, v2.16b}, v2.8b // encoding: [0x20,0x20,0x02,0x0e]
+// CHECK: tbl	v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b // encoding: [0x20,0x40,0x02,0x0e]
+// CHECK: tbl	v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b // encoding: [0x20,0x60,0x02,0x0e]
+// CHECK: tbl	v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b // encoding: [0xe0,0x63,0x02,0x0e]
+
+        tbl v0.16b, {v1.16b}, v2.16b
+        tbl v0.16b, {v1.16b, v2.16b}, v2.16b
+        tbl v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b
+        tbl v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b
+        tbl v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b
+
+// CHECK: tbl	v0.16b, {v1.16b}, v2.16b // encoding: [0x20,0x00,0x02,0x4e]
+// CHECK: tbl	v0.16b, {v1.16b, v2.16b}, v2.16b // encoding: [0x20,0x20,0x02,0x4e]
+// CHECK: tbl	v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b // encoding: [0x20,0x40,0x02,0x4e]
+// CHECK: tbl	v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b // encoding: [0x20,0x60,0x02,0x4e]
+// CHECK: tbl	v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b // encoding: [0xc0,0x63,0x02,0x4e]
+
+        tbx v0.8b, {v1.16b}, v2.8b
+        tbx v0.8b, {v1.16b, v2.16b}, v2.8b
+        tbx v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b
+        tbx v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b
+        tbx v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b
+
+// CHECK: tbx	v0.8b, {v1.16b}, v2.8b  // encoding: [0x20,0x10,0x02,0x0e]
+// CHECK: tbx	v0.8b, {v1.16b, v2.16b}, v2.8b // encoding: [0x20,0x30,0x02,0x0e]
+// CHECK: tbx	v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b // encoding: [0x20,0x50,0x02,0x0e]
+// CHECK: tbx	v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b // encoding: [0x20,0x70,0x02,0x0e]
+// CHECK: tbx	v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b // encoding: [0xe0,0x73,0x02,0x0e]
+
+        tbx v0.16b, {v1.16b}, v2.16b
+        tbx v0.16b, {v1.16b, v2.16b}, v2.16b
+        tbx v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b
+        tbx v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b
+        tbx v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b
+
+// CHECK: tbx	v0.16b, {v1.16b}, v2.16b // encoding: [0x20,0x10,0x02,0x4e]
+// CHECK: tbx	v0.16b, {v1.16b, v2.16b}, v2.16b // encoding: [0x20,0x30,0x02,0x4e]
+// CHECK: tbx	v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b // encoding: [0x20,0x50,0x02,0x4e]
+// CHECK: tbx	v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b // encoding: [0x20,0x70,0x02,0x4e]
+// CHECK: tbx	v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b // encoding: [0xc0,0x73,0x02,0x4e]
+
diff --git a/test/MC/ARM/2010-11-30-reloc-movt.s b/test/MC/ARM/2010-11-30-reloc-movt.s
new file mode 100644
index 0000000..9de88f0
--- /dev/null
+++ b/test/MC/ARM/2010-11-30-reloc-movt.s
@@ -0,0 +1,41 @@
+// RUN: llvm-mc  %s -triple=armv7-linux-gnueabi -filetype=obj -o - | \
+// RUN:    llvm-readobj -s -sr -sd | FileCheck  %s
+
+	.syntax unified
+	.eabi_attribute	6, 10
+	.eabi_attribute	8, 1
+	.eabi_attribute	9, 2
+	.fpu	neon
+	.eabi_attribute	20, 1
+	.eabi_attribute	21, 1
+	.eabi_attribute	23, 3
+	.eabi_attribute	24, 1
+	.eabi_attribute	25, 1
+	.file	"/home/espindola/llvm/llvm/test/CodeGen/ARM/2010-11-30-reloc-movt.ll"
+	.text
+	.globl	barf
+	.align	2
+	.type	barf,%function
+barf:                                   @ @barf
+@ BB#0:                                 @ %entry
+	push	{r11, lr}
+	movw	r0, :lower16:a
+	movt	r0, :upper16:a
+	bl	foo
+	pop	{r11, pc}
+.Ltmp0:
+	.size	barf, .Ltmp0-barf
+
+
+
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK:          SectionData (
+// CHECK-NEXT:       0000: 00482DE9 000000E3 000040E3 FEFFFFEB
+// CHECK-NEXT:       0010: 0088BDE8
+// CHECK-NEXT:     )
+// CHECK:          Relocations [
+// CHECK-NEXT:       0x4 R_ARM_MOVW_ABS_NC a
+// CHECK-NEXT:       0x8 R_ARM_MOVT_ABS
+// CHECK-NEXT:       0xC R_ARM_CALL foo
+// CHECK-NEXT:     ]
diff --git a/test/MC/ARM/AlignedBundling/lit.local.cfg b/test/MC/ARM/AlignedBundling/lit.local.cfg
index 6c49f08..ba763cf 100644
--- a/test/MC/ARM/AlignedBundling/lit.local.cfg
+++ b/test/MC/ARM/AlignedBundling/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/MC/ARM/align_arm_2_thumb.s b/test/MC/ARM/align_arm_2_thumb.s
new file mode 100644
index 0000000..120e964
--- /dev/null
+++ b/test/MC/ARM/align_arm_2_thumb.s
@@ -0,0 +1,15 @@
+@ RUN: llvm-mc -triple armv7-none-linux -filetype=obj -o %t.o %s
+@ RUN: llvm-objdump -triple thumbv7-none-linux -d %t.o | FileCheck --check-prefix=ARM_2_THUMB %s
+
+@ RUN: llvm-mc -triple armv7-apple-darwin -filetype=obj -o %t_darwin.o %s
+@ RUN: llvm-objdump -triple thumbv7-apple-darwin -d %t_darwin.o | FileCheck --check-prefix=ARM_2_THUMB %s
+
+.syntax unified
+.code 16
+@ ARM_2_THUMB-LABEL: foo
+foo:
+  add r0, r0
+.align 3
+@ ARM_2_THUMB: 2: 00 bf     nop
+  add r0, r0
+
diff --git a/test/MC/ARM/align_thumb_2_arm.s b/test/MC/ARM/align_thumb_2_arm.s
new file mode 100644
index 0000000..328bfab
--- /dev/null
+++ b/test/MC/ARM/align_thumb_2_arm.s
@@ -0,0 +1,15 @@
+@ RUN: llvm-mc -triple thumbv7-none-linux -filetype=obj -o %t.o %s
+@ RUN: llvm-objdump -triple armv7-none-linux -d %t.o | FileCheck --check-prefix=THUMB_2_ARM %s
+
+@ RUN: llvm-mc -triple thumbv7-apple-darwin -filetype=obj -o %t_darwin.o %s
+@ RUN: llvm-objdump -triple armv7-apple-darwin -d %t_darwin.o | FileCheck --check-prefix=THUMB_2_ARM %s
+
+.syntax unified
+.code 32
+@ THUMB_2_ARM-LABEL: foo
+foo:
+  add r0, r0
+.align 3
+@ THUMB_2_ARM: 4: 00 f0 20 e3    nop
+  add r0, r0
+
diff --git a/test/MC/ARM/arm-ldrd.s b/test/MC/ARM/arm-ldrd.s
new file mode 100644
index 0000000..c26ee25
--- /dev/null
+++ b/test/MC/ARM/arm-ldrd.s
@@ -0,0 +1,57 @@
+// RUN: not llvm-mc -arch arm -mattr=+v5te %s 2>&1 | FileCheck %s
+//
+// rdar://14479793
+
+ldrd r1, r2, [pc, #0]
+ldrd r1, r2, [r3, #4]
+ldrd r1, r2, [r3], #4
+ldrd r1, r2, [r3, #4]!
+ldrd r1, r2, [r3, -r4]!
+ldrd r1, r2, [r3, r4]
+ldrd r1, r2, [r3], r4
+// CHECK: error: Rt must be even-numbered
+// CHECK: error: Rt must be even-numbered
+// CHECK: error: Rt must be even-numbered
+// CHECK: error: Rt must be even-numbered
+// CHECK: error: Rt must be even-numbered
+// CHECK: error: Rt must be even-numbered
+// CHECK: error: Rt must be even-numbered
+
+ldrd r0, r3, [pc, #0]
+ldrd r0, r3, [r4, #4]
+ldrd r0, r3, [r4], #4
+ldrd r0, r3, [r4, #4]!
+ldrd r0, r3, [r4, -r5]!
+ldrd r0, r3, [r4, r5]
+ldrd r0, r3, [r4], r5
+// CHECK: error: destination operands must be sequential
+// CHECK: error: destination operands must be sequential
+// CHECK: error: destination operands must be sequential
+// CHECK: error: destination operands must be sequential
+// CHECK: error: destination operands must be sequential
+// CHECK: error: destination operands must be sequential
+// CHECK: error: destination operands must be sequential
+
+ldrd lr, pc, [pc, #0]
+ldrd lr, pc, [r3, #4]
+ldrd lr, pc, [r3], #4
+ldrd lr, pc, [r3, #4]!
+ldrd lr, pc, [r3, -r4]!
+ldrd lr, pc, [r3, r4]
+ldrd lr, pc, [r3], r4
+// CHECK: error: Rt can't be R14
+// CHECK: error: Rt can't be R14
+// CHECK: error: Rt can't be R14
+// CHECK: error: Rt can't be R14
+// CHECK: error: Rt can't be R14
+// CHECK: error: Rt can't be R14
+// CHECK: error: Rt can't be R14
+
+ldrd r0, r1, [r0], #4
+ldrd r0, r1, [r1], #4
+ldrd r0, r1, [r0, #4]!
+ldrd r0, r1, [r1, #4]!
+// CHECK: error: base register needs to be different from destination registers
+// CHECK: error: base register needs to be different from destination registers
+// CHECK: error: base register needs to be different from destination registers
+// CHECK: error: base register needs to be different from destination registers
diff --git a/test/MC/ARM/arm-memory-instructions.s b/test/MC/ARM/arm-memory-instructions.s
index e9f0c3d..ad35dd2 100644
--- a/test/MC/ARM/arm-memory-instructions.s
+++ b/test/MC/ARM/arm-memory-instructions.s
@@ -114,21 +114,21 @@ _func:
 @------------------------------------------------------------------------------
 @ LDRD (immediate)
 @------------------------------------------------------------------------------
-        ldrd r3, r4, [r5]
-        ldrd r7, r8, [r2, #15]
-        ldrd r1, r2, [r9, #32]!
+        ldrd r2, r3, [r5]
+        ldrd r6, r7, [r2, #15]
+        ldrd r0, r1, [r9, #32]!
         ldrd r6, r7, [r1], #8
-        ldrd r1, r2, [r8], #0
-        ldrd r1, r2, [r8], #+0
-        ldrd r1, r2, [r8], #-0
+        ldrd r0, r1, [r8], #0
+        ldrd r0, r1, [r8], #+0
+        ldrd r0, r1, [r8], #-0
 
-@ CHECK: ldrd	r3, r4, [r5]            @ encoding: [0xd0,0x30,0xc5,0xe1]
-@ CHECK: ldrd	r7, r8, [r2, #15]       @ encoding: [0xdf,0x70,0xc2,0xe1]
-@ CHECK: ldrd	r1, r2, [r9, #32]!      @ encoding: [0xd0,0x12,0xe9,0xe1]
-@ CHECK: ldrd	r6, r7, [r1], #8        @ encoding: [0xd8,0x60,0xc1,0xe0]
-@ CHECK: ldrd	r1, r2, [r8], #0        @ encoding: [0xd0,0x10,0xc8,0xe0]
-@ CHECK: ldrd	r1, r2, [r8], #0        @ encoding: [0xd0,0x10,0xc8,0xe0]
-@ CHECK: ldrd	r1, r2, [r8], #-0       @ encoding: [0xd0,0x10,0x48,0xe0]
+@ CHECK: ldrd r2, r3, [r5]            @ encoding: [0xd0,0x20,0xc5,0xe1]
+@ CHECK: ldrd r6, r7, [r2, #15]       @ encoding: [0xdf,0x60,0xc2,0xe1]
+@ CHECK: ldrd r0, r1, [r9, #32]!      @ encoding: [0xd0,0x02,0xe9,0xe1]
+@ CHECK: ldrd r6, r7, [r1], #8        @ encoding: [0xd8,0x60,0xc1,0xe0]
+@ CHECK: ldrd r0, r1, [r8], #0        @ encoding: [0xd0,0x00,0xc8,0xe0]
+@ CHECK: ldrd r0, r1, [r8], #0        @ encoding: [0xd0,0x00,0xc8,0xe0]
+@ CHECK: ldrd r0, r1, [r8], #-0       @ encoding: [0xd0,0x00,0x48,0xe0]
 
 
 @------------------------------------------------------------------------------
@@ -143,15 +143,15 @@ Lbaz: .quad 0
 @------------------------------------------------------------------------------
 @ LDRD (register)
 @------------------------------------------------------------------------------
-        ldrd r3, r4, [r1, r3]
+        ldrd r4, r5, [r1, r3]
         ldrd r4, r5, [r7, r2]!
-        ldrd r1, r2, [r8], r12
-        ldrd r1, r2, [r8], -r12
+        ldrd r0, r1, [r8], r12
+        ldrd r0, r1, [r8], -r12
 
-@ CHECK: ldrd	r3, r4, [r1, r3]        @ encoding: [0xd3,0x30,0x81,0xe1]
-@ CHECK: ldrd	r4, r5, [r7, r2]!       @ encoding: [0xd2,0x40,0xa7,0xe1]
-@ CHECK: ldrd	r1, r2, [r8], r12       @ encoding: [0xdc,0x10,0x88,0xe0]
-@ CHECK: ldrd	r1, r2, [r8], -r12      @ encoding: [0xdc,0x10,0x08,0xe0]
+@ CHECK: ldrd r4, r5, [r1, r3]        @ encoding: [0xd3,0x40,0x81,0xe1]
+@ CHECK: ldrd r4, r5, [r7, r2]!       @ encoding: [0xd2,0x40,0xa7,0xe1]
+@ CHECK: ldrd r0, r1, [r8], r12       @ encoding: [0xdc,0x00,0x88,0xe0]
+@ CHECK: ldrd r0, r1, [r8], -r12      @ encoding: [0xdc,0x00,0x08,0xe0]
 
 
 @------------------------------------------------------------------------------
diff --git a/test/MC/ARM/basic-arm-instructions-v8.s b/test/MC/ARM/basic-arm-instructions-v8.s
new file mode 100644
index 0000000..4ed83c1
--- /dev/null
+++ b/test/MC/ARM/basic-arm-instructions-v8.s
@@ -0,0 +1,59 @@
+@ New ARMv8 A32 encodings
+
+@ RUN: llvm-mc -triple armv8 -show-encoding < %s | FileCheck %s --check-prefix=CHECK-V8
+@ RUN: not llvm-mc -triple armv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V7
+
+@ HLT
+        hlt  #0
+        hlt  #65535
+@ CHECK-V8: hlt  #0                       @ encoding: [0x70,0x00,0x00,0xe1]
+@ CHECK-V8: hlt  #65535                   @ encoding: [0x7f,0xff,0x0f,0xe1]
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+
+@ AL condition code allowable
+        hltal  #0
+@ CHECK-V8: hlt  #0                       @ encoding: [0x70,0x00,0x00,0xe1]
+@ CHECK-V7: error: instruction requires: armv8
+
+@------------------------------------------------------------------------------
+@ DMB (v8 barriers)
+@------------------------------------------------------------------------------
+        dmb ishld
+        dmb oshld
+        dmb nshld
+        dmb ld
+
+@ CHECK-V8: dmb ishld @ encoding: [0x59,0xf0,0x7f,0xf5]
+@ CHECK-V8: dmb oshld @ encoding: [0x51,0xf0,0x7f,0xf5]
+@ CHECK-V8: dmb nshld @ encoding: [0x55,0xf0,0x7f,0xf5]
+@ CHECK-V8: dmb ld @ encoding: [0x5d,0xf0,0x7f,0xf5]
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+
+@------------------------------------------------------------------------------
+@ DSB (v8 barriers)
+@------------------------------------------------------------------------------
+        dsb ishld
+        dsb oshld
+        dsb nshld
+        dsb ld
+
+@ CHECK-V8: dsb ishld @ encoding: [0x49,0xf0,0x7f,0xf5]
+@ CHECK-V8: dsb oshld @ encoding: [0x41,0xf0,0x7f,0xf5]
+@ CHECK-V8: dsb nshld @ encoding: [0x45,0xf0,0x7f,0xf5]
+@ CHECK-V8: dsb ld @ encoding: [0x4d,0xf0,0x7f,0xf5]
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+
+@------------------------------------------------------------------------------
+@ SEVL
+@------------------------------------------------------------------------------
+        sevl
+
+@ CHECK-V8: sevl @ encoding: [0x05,0xf0,0x20,0xe3]
+@ CHECK-V7: error: instruction requires: armv8
diff --git a/test/MC/ARM/basic-arm-instructions.s b/test/MC/ARM/basic-arm-instructions.s
index ead2ce1..29bc6c0 100644
--- a/test/MC/ARM/basic-arm-instructions.s
+++ b/test/MC/ARM/basic-arm-instructions.s
@@ -153,7 +153,6 @@ Lforward:
 @ CHECK: adr	r1, #301989888          @ encoding: [0x12,0x14,0x8f,0xe2]
 @ CHECK: adr	r1, #-2147483647        @ encoding: [0x06,0x11,0x8f,0xe2]
 
-
 @------------------------------------------------------------------------------
 @ ADD
 @------------------------------------------------------------------------------
@@ -187,6 +186,7 @@ Lforward:
 
 	add r0, #-4
 	add r4, r5, #-21
+        add r0, pc, #0xc0000000
 
 @ CHECK: add	r4, r5, #61440          @ encoding: [0x0f,0x4a,0x85,0xe2]
 @ CHECK: add	r4, r5, r6              @ encoding: [0x06,0x40,0x85,0xe0]
@@ -217,6 +217,7 @@ Lforward:
 
 @ CHECK: sub	r0, r0, #4              @ encoding: [0x04,0x00,0x40,0xe2]
 @ CHECK: sub	r4, r5, #21             @ encoding: [0x15,0x40,0x45,0xe2]
+@ CHECK: adr    r0, #-1073741824        @ encoding: [0x03,0x01,0x8f,0xe2]
 
     @ Test right shift by 32, which is encoded as 0
     add r3, r1, r2, lsr #32
@@ -459,11 +460,11 @@ Lforward:
 @------------------------------------------------------------------------------
         cdp  p7, #1, c1, c1, c1, #4
         cdp2  p7, #1, c1, c1, c1, #4
-        cdp2   p10, #0, c6, c12, c0, #7
+        cdp2   p12, #0, c6, c12, c0, #7
 
 @ CHECK: cdp  p7, #1, c1, c1, c1, #4     @ encoding: [0x81,0x17,0x11,0xee]
 @ CHECK: cdp2  p7, #1, c1, c1, c1, #4    @ encoding: [0x81,0x17,0x11,0xfe]
-@ CHECK: cdp2  p10, #0, c6, c12, c0, #7   @ encoding: [0xe0,0x6a,0x0c,0xfe]
+@ CHECK: cdp2  p12, #0, c6, c12, c0, #7   @ encoding: [0xe0,0x6c,0x0c,0xfe]
 
         cdpne  p7, #1, c1, c1, c1, #4
 @ CHECK: cdpne  p7, #1, c1, c1, c1, #4     @ encoding: [0x81,0x17,0x11,0x1e]
@@ -804,8 +805,8 @@ Lforward:
         ldc2l p7, c1, [r8]
         ldc2l p8, c0, [r9, #-224]
         ldc2l p9, c1, [r10, #-120]!
-        ldc2l p10, c2, [r11], #16
-        ldc2l p11, c3, [r12], #-72
+        ldc2l p0, c2, [r11], #16
+        ldc2l p1, c3, [r12], #-72
 
         ldc p12, c4, [r0, #4]
         ldc p13, c5, [r1]
@@ -845,8 +846,8 @@ Lforward:
 @ CHECK: ldc2l	p7, c1, [r8]            @ encoding: [0x00,0x17,0xd8,0xfd]
 @ CHECK: ldc2l	p8, c0, [r9, #-224]     @ encoding: [0x38,0x08,0x59,0xfd]
 @ CHECK: ldc2l	p9, c1, [r10, #-120]!   @ encoding: [0x1e,0x19,0x7a,0xfd]
-@ CHECK: ldc2l	p10, c2, [r11], #16     @ encoding: [0x04,0x2a,0xfb,0xfc]
-@ CHECK: ldc2l	p11, c3, [r12], #-72    @ encoding: [0x12,0x3b,0x7c,0xfc]
+@ CHECK: ldc2l	p0, c2, [r11], #16      @ encoding: [0x04,0x20,0xfb,0xfc]
+@ CHECK: ldc2l	p1, c3, [r12], #-72     @ encoding: [0x12,0x31,0x7c,0xfc]
 
 @ CHECK: ldc	p12, c4, [r0, #4]       @ encoding: [0x01,0x4c,0x90,0xed]
 @ CHECK: ldc	p13, c5, [r1]           @ encoding: [0x00,0x5d,0x91,0xed]
@@ -1077,12 +1078,12 @@ Lforward:
         mrc  p14, #0, r1, c1, c2, #4
         mrc  p15, #7, apsr_nzcv, c15, c6, #6
         mrc2  p14, #0, r1, c1, c2, #4
-        mrc2  p10, #7, apsr_nzcv, c15, c0, #1
+        mrc2  p9, #7, apsr_nzcv, c15, c0, #1
 
 @ CHECK: mrc  p14, #0, r1, c1, c2, #4             @ encoding: [0x92,0x1e,0x11,0xee]
 @ CHECK: mrc  p15, #7, apsr_nzcv, c15, c6, #6     @ encoding: [0xd6,0xff,0xff,0xee]
 @ CHECK: mrc2  p14, #0, r1, c1, c2, #4            @ encoding: [0x92,0x1e,0x11,0xfe]
-@ CHECK: mrc2  p10, #7, apsr_nzcv, c15, c0, #1    @ encoding: [0x30,0xfa,0xff,0xfe]
+@ CHECK: mrc2  p9, #7, apsr_nzcv, c15, c0, #1     @ encoding: [0x30,0xf9,0xff,0xfe]
 
          mrceq  p15, #7, apsr_nzcv, c15, c6, #6
 @ CHECK: mrceq  p15, #7, apsr_nzcv, c15, c6, #6   @ encoding: [0xd6,0xff,0xff,0x0e]
@@ -2239,8 +2240,8 @@ Lforward:
         stc2l p7, c1, [r8]
         stc2l p8, c0, [r9, #-224]
         stc2l p9, c1, [r10, #-120]!
-        stc2l p10, c2, [r11], #16
-        stc2l p11, c3, [r12], #-72
+        stc2l p0, c2, [r11], #16
+        stc2l p1, c3, [r12], #-72
 
         stc p12, c4, [r0, #4]
         stc p13, c5, [r1]
@@ -2280,8 +2281,8 @@ Lforward:
 @ CHECK: stc2l	p7, c1, [r8]            @ encoding: [0x00,0x17,0xc8,0xfd]
 @ CHECK: stc2l	p8, c0, [r9, #-224]     @ encoding: [0x38,0x08,0x49,0xfd]
 @ CHECK: stc2l	p9, c1, [r10, #-120]!   @ encoding: [0x1e,0x19,0x6a,0xfd]
-@ CHECK: stc2l	p10, c2, [r11], #16     @ encoding: [0x04,0x2a,0xeb,0xfc]
-@ CHECK: stc2l	p11, c3, [r12], #-72    @ encoding: [0x12,0x3b,0x6c,0xfc]
+@ CHECK: stc2l	p0, c2, [r11], #16      @ encoding: [0x04,0x20,0xeb,0xfc]
+@ CHECK: stc2l	p1, c3, [r12], #-72     @ encoding: [0x12,0x31,0x6c,0xfc]
 
 @ CHECK: stc	p12, c4, [r0, #4]       @ encoding: [0x01,0x4c,0x80,0xed]
 @ CHECK: stc	p13, c5, [r1]           @ encoding: [0x00,0x5d,0x81,0xed]
@@ -2927,6 +2928,7 @@ Lforward:
         hint #2
         hint #1
         hint #0
+        hintgt #239
 
 @ CHECK: wfe                            @ encoding: [0x02,0xf0,0x20,0xe3]
 @ CHECK: wfehi                          @ encoding: [0x02,0xf0,0x20,0x83]
@@ -2939,3 +2941,4 @@ Lforward:
 @ CHECK: wfe                            @ encoding: [0x02,0xf0,0x20,0xe3]
 @ CHECK: yield                          @ encoding: [0x01,0xf0,0x20,0xe3]
 @ CHECK: nop                            @ encoding: [0x00,0xf0,0x20,0xe3]
+@ CHECK: hintgt #239                    @ encoding: [0xef,0xf0,0x20,0xc3]
diff --git a/test/MC/ARM/basic-thumb-instructions.s b/test/MC/ARM/basic-thumb-instructions.s
index b48db9a..dec7f5b 100644
--- a/test/MC/ARM/basic-thumb-instructions.s
+++ b/test/MC/ARM/basic-thumb-instructions.s
@@ -128,7 +128,7 @@ _func:
         beq _bar
         b       #1838
         b       #-420
-        beq     #336
+        beq     #-256
         beq     #160
 
 @ CHECK: b	_baz                    @ encoding: [A,0xe0'A']
@@ -137,7 +137,7 @@ _func:
              @   fixup A - offset: 0, value: _bar, kind: fixup_arm_thumb_bcc
 @ CHECK: b       #1838                   @ encoding: [0x97,0xe3]
 @ CHECK: b       #-420                   @ encoding: [0x2e,0xe7]
-@ CHECK: beq     #336                    @ encoding: [0xa8,0xd0]
+@ CHECK: beq     #-256                   @ encoding: [0x80,0xd0]
 @ CHECK: beq     #160                    @ encoding: [0x50,0xd0]
 
 @------------------------------------------------------------------------------
@@ -216,6 +216,16 @@ _func:
 @ CHECK: cmp	r8, r1                  @ encoding: [0x88,0x45]
 
 @------------------------------------------------------------------------------
+@ CPS
+@------------------------------------------------------------------------------
+
+        cpsie f
+        cpsid a
+
+@ CHECK: cpsie f                        @ encoding: [0x61,0xb6]
+@ CHECK: cpsid a                        @ encoding: [0x74,0xb6]
+
+@------------------------------------------------------------------------------
 @ EOR
 @------------------------------------------------------------------------------
         eors r4, r5
diff --git a/test/MC/ARM/basic-thumb2-instructions-v8.s b/test/MC/ARM/basic-thumb2-instructions-v8.s
new file mode 100644
index 0000000..a7882ae
--- /dev/null
+++ b/test/MC/ARM/basic-thumb2-instructions-v8.s
@@ -0,0 +1,87 @@
+@ New ARMv8 T32 encodings
+
+@ RUN: llvm-mc -triple thumbv8 -show-encoding < %s | FileCheck %s --check-prefix=CHECK-V8
+@ RUN: not llvm-mc -triple thumbv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V7
+
+@ HLT
+        hlt  #0
+        hlt  #63
+@ CHECK-V8: hlt  #0                       @ encoding: [0x80,0xba]
+@ CHECK-V8: hlt  #63                      @ encoding: [0xbf,0xba]
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+
+@ In IT block
+        it pl
+        hlt #24
+
+@ CHECK-V8: it pl                         @ encoding: [0x58,0xbf]
+@ CHECK-V8: hlt #24                       @ encoding: [0x98,0xba]
+@ CHECK-V7: error: instruction requires: armv8
+
+@ Can accept AL condition code
+        hltal #24
+@ CHECK-V8: hlt #24                       @ encoding: [0x98,0xba]
+@ CHECK-V7: error: instruction requires: armv8
+
+@ DCPS{1,2,3}
+        dcps1
+        dcps2
+        dcps3
+@ CHECK-V8: dcps1                         @ encoding: [0x8f,0xf7,0x01,0x80]
+@ CHECK-V8: dcps2                         @ encoding: [0x8f,0xf7,0x02,0x80]
+@ CHECK-V8: dcps3                         @ encoding: [0x8f,0xf7,0x03,0x80]
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+
+@------------------------------------------------------------------------------
+@ DMB (v8 barriers)
+@------------------------------------------------------------------------------
+        dmb ishld
+        dmb oshld
+        dmb nshld
+        dmb ld
+
+@ CHECK-V8: dmb ishld @ encoding: [0xbf,0xf3,0x59,0x8f]
+@ CHECK-V8: dmb oshld @ encoding: [0xbf,0xf3,0x51,0x8f]
+@ CHECK-V8: dmb nshld @ encoding: [0xbf,0xf3,0x55,0x8f]
+@ CHECK-V8: dmb ld @ encoding: [0xbf,0xf3,0x5d,0x8f]
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+
+@------------------------------------------------------------------------------
+@ DSB (v8 barriers)
+@------------------------------------------------------------------------------
+        dsb ishld
+        dsb oshld
+        dsb nshld
+        dsb ld
+
+@ CHECK-V8: dsb ishld @ encoding: [0xbf,0xf3,0x49,0x8f]
+@ CHECK-V8: dsb oshld @ encoding: [0xbf,0xf3,0x41,0x8f]
+@ CHECK-V8: dsb nshld @ encoding: [0xbf,0xf3,0x45,0x8f]
+@ CHECK-V8: dsb ld @ encoding: [0xbf,0xf3,0x4d,0x8f]
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+
+@------------------------------------------------------------------------------
+@ SEVL
+@------------------------------------------------------------------------------
+        sevl
+        sevl.w
+        it ge
+        sevlge
+
+@ CHECK-V8: sevl @ encoding: [0x50,0xbf]
+@ CHECK-V8: sevl.w @ encoding: [0xaf,0xf3,0x05,0x80]
+@ CHECK-V8: it ge @ encoding: [0xa8,0xbf]
+@ CHECK-V8: sevlge @ encoding: [0x50,0xbf]
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error:
+@ CHECK-V7: error: instruction requires: armv8
diff --git a/test/MC/ARM/basic-thumb2-instructions.s b/test/MC/ARM/basic-thumb2-instructions.s
index b5d3966..3a5f488 100644
--- a/test/MC/ARM/basic-thumb2-instructions.s
+++ b/test/MC/ARM/basic-thumb2-instructions.s
@@ -80,6 +80,7 @@ _func:
         adds r2, r2, #56
         adds r2, #56
         add r1, r7, #0xcbcbcbcb
+        add sp, sp, #0x1fe0000
 
         adds.w r2, #-16
         adds.w r2, r2, #-16
@@ -103,6 +104,7 @@ _func:
 @ CHECK: adds	r2, #56                 @ encoding: [0x38,0x32]
 @ CHECK: adds	r2, #56                 @ encoding: [0x38,0x32]
 @ CHECK: add.w  r1, r7, #3419130827     @ encoding: [0x07,0xf1,0xcb,0x31]
+@ CHECK: add.w	sp, sp, #33423360       @ encoding: [0x0d,0xf1,0xff,0x7d]
 
 @ CHECK: subs.w	r2, r2, #16             @ encoding: [0xb2,0xf1,0x10,0x02]
 @ CHECK: subs.w	r2, r2, #16             @ encoding: [0xb2,0xf1,0x10,0x02]
@@ -152,12 +154,15 @@ _func:
         ands r3, r12, #0xf
         and r1, #0xff
         and r1, r1, #0xff
+        and r5, r4, #0xffffffff
+        ands r1, r9, #0xffffffff
 
 @ CHECK: and	r2, r5, #1044480        @ encoding: [0x05,0xf4,0x7f,0x22]
 @ CHECK: ands	r3, r12, #15            @ encoding: [0x1c,0xf0,0x0f,0x03]
 @ CHECK: and	r1, r1, #255            @ encoding: [0x01,0xf0,0xff,0x01]
 @ CHECK: and	r1, r1, #255            @ encoding: [0x01,0xf0,0xff,0x01]
-
+@ CHECK: and	r5, r4, #4294967295     @ encoding: [0x04,0xf0,0xff,0x35]
+@ CHECK: ands	r1, r9, #4294967295     @ encoding: [0x19,0xf0,0xff,0x31]
 
 @------------------------------------------------------------------------------
 @ AND (register)
@@ -259,6 +264,8 @@ _func:
 @ BIC
 @------------------------------------------------------------------------------
         bic r10, r1, #0xf
+        bic r5, r2, #0xffffffff
+        bics r11, r10, #0xffffffff
         bic r12, r3, r6
         bic r11, r2, r6, lsl #12
         bic r8, r4, r1, lsr #11
@@ -276,6 +283,8 @@ _func:
         bic r12, r6, ror #29
 
 @ CHECK: bic	r10, r1, #15            @ encoding: [0x21,0xf0,0x0f,0x0a]
+@ CHECK: bic	r5, r2, #4294967295     @ encoding: [0x22,0xf0,0xff,0x35]
+@ CHECK: bics	r11, r10, #4294967295   @ encoding: [0x3a,0xf0,0xff,0x3b]
 @ CHECK: bic.w	r12, r3, r6             @ encoding: [0x23,0xea,0x06,0x0c]
 @ CHECK: bic.w	r11, r2, r6, lsl #12    @ encoding: [0x22,0xea,0x06,0x3b]
 @ CHECK: bic.w	r8, r4, r1, lsr #11     @ encoding: [0x24,0xea,0xd1,0x28]
@@ -405,6 +414,31 @@ _func:
 @ CHECK: cmn.w	r2, #2                  @ encoding: [0x12,0xf1,0x02,0x0f]
 @ CHECK: cmp.w	r9, #1                  @ encoding: [0xb9,0xf1,0x01,0x0f]
 
+@------------------------------------------------------------------------------
+@ CPS
+@------------------------------------------------------------------------------
+
+        cpsie f
+        cpsid a
+        cpsie.w f
+        cpsid.w a
+        cpsie i, #3
+        cpsie.w i, #3
+        cpsid f, #9
+        cpsid.w f, #9
+        cps #0
+        cps.w #0
+
+@ CHECK: cpsie f                        @ encoding: [0x61,0xb6]
+@ CHECK: cpsid a                        @ encoding: [0x74,0xb6]
+@ CHECK: cpsie.w f                      @ encoding: [0xaf,0xf3,0x20,0x84]
+@ CHECK: cpsid.w a                      @ encoding: [0xaf,0xf3,0x80,0x86]
+@ CHECK: cpsie i, #3                    @ encoding: [0xaf,0xf3,0x43,0x85]
+@ CHECK: cpsie i, #3                    @ encoding: [0xaf,0xf3,0x43,0x85]
+@ CHECK: cpsid f, #9                    @ encoding: [0xaf,0xf3,0x29,0x87]
+@ CHECK: cpsid f, #9                    @ encoding: [0xaf,0xf3,0x29,0x87]
+@ CHECK: cps   #0                       @ encoding: [0xaf,0xf3,0x00,0x81]
+@ CHECK: cps   #0                       @ encoding: [0xaf,0xf3,0x00,0x81]
 
 @------------------------------------------------------------------------------
 @ DBG
@@ -628,8 +662,8 @@ _func:
         ldc2l p7, c1, [r8]
         ldc2l p8, c0, [r9, #-224]
         ldc2l p9, c1, [r10, #-120]!
-        ldc2l p10, c2, [r11], #16
-        ldc2l p11, c3, [r12], #-72
+        ldc2l p0, c2, [r11], #16
+        ldc2l p1, c3, [r12], #-72
 
         ldc p12, c4, [r0, #4]
         ldc p13, c5, [r1]
@@ -656,8 +690,8 @@ _func:
 @ CHECK: ldc2l	p7, c1, [r8]            @ encoding: [0xd8,0xfd,0x00,0x17]
 @ CHECK: ldc2l	p8, c0, [r9, #-224]     @ encoding: [0x59,0xfd,0x38,0x08]
 @ CHECK: ldc2l	p9, c1, [r10, #-120]!   @ encoding: [0x7a,0xfd,0x1e,0x19]
-@ CHECK: ldc2l	p10, c2, [r11], #16     @ encoding: [0xfb,0xfc,0x04,0x2a]
-@ CHECK: ldc2l	p11, c3, [r12], #-72    @ encoding: [0x7c,0xfc,0x12,0x3b]
+@ CHECK: ldc2l	p0, c2, [r11], #16      @ encoding: [0xfb,0xfc,0x04,0x20]
+@ CHECK: ldc2l	p1, c3, [r12], #-72     @ encoding: [0x7c,0xfc,0x12,0x31]
 
 @ CHECK: ldc	p12, c4, [r0, #4]       @ encoding: [0x90,0xed,0x01,0x4c]
 @ CHECK: ldc	p13, c5, [r1]           @ encoding: [0x91,0xed,0x00,0x5d]
@@ -772,23 +806,43 @@ _func:
 @ CHECK: ldr.w	lr, _strcmp-4           @ encoding: [0x5f'A',0xf8'A',A,0xe0'A']
 @ CHECK: @   fixup A - offset: 0, value: _strcmp-4, kind: fixup_t2_ldst_pcrel_12
 
+        ldr r7, [pc, #8]
+        ldr.n r7, [pc, #8]
+        ldr.w r7, [pc, #8]
         ldr r4, [pc, #1020]
         ldr r3, [pc, #-1020]
         ldr r6, [pc, #1024]
         ldr r0, [pc, #-1024]
         ldr r2, [pc, #4095]
         ldr r1, [pc, #-4095]
-        ldr.n r8, [pc, #132]
-        ldr.w r8, [pc, #132]
+        ldr r8, [pc, #132]
+        ldr pc, [pc, #256]
+        ldr pc, [pc, #-400]
 
+@ CHECK: ldr	r7, [pc, #8]            @ encoding: [0x02,0x4f]
+@ CHECK: ldr	r7, [pc, #8]            @ encoding: [0x02,0x4f]
+@ CHECK: ldr.w	r7, [pc, #8]            @ encoding: [0xdf,0xf8,0x08,0x70]
 @ CHECK: ldr	r4, [pc, #1020]       @ encoding: [0xff,0x4c]
-@ CHECK: ldr	r3, [pc, #-1020]      @ encoding: [0x01,0x4b]
+@ CHECK: ldr.w	r3, [pc, #-1020]        @ encoding: [0x5f,0xf8,0xfc,0x33]
 @ CHECK: ldr.w	r6, [pc, #1024]       @ encoding: [0xdf,0xf8,0x00,0x64]
 @ CHECK: ldr.w	r0, [pc, #-1024]      @ encoding: [0x5f,0xf8,0x00,0x04]
 @ CHECK: ldr.w	r2, [pc, #4095]       @ encoding: [0xdf,0xf8,0xff,0x2f]
 @ CHECK: ldr.w	r1, [pc, #-4095]      @ encoding: [0x5f,0xf8,0xff,0x1f]
-@ CHECK: ldr	r8, [pc, #132]        @ encoding: [0x21,0x48]
 @ CHECK: ldr.w	r8, [pc, #132]        @ encoding: [0xdf,0xf8,0x84,0x80]
+@ CHECK: ldr.w	pc, [pc, #256]          @ encoding: [0xdf,0xf8,0x00,0xf1]
+@ CHECK: ldr.w	pc, [pc, #-400]         @ encoding: [0x5f,0xf8,0x90,0xf1]
+
+        ldrb  r9, [pc, #-0]
+        ldrsb r11, [pc, #-0]
+        ldrh  r10, [pc, #-0]
+        ldrsh r1, [pc, #-0]
+        ldr   r5, [pc, #-0]
+
+@ CHECK: ldrb.w	r9, [pc, #-0]           @ encoding: [0x1f,0xf8,0x00,0x90]
+@ CHECK: ldrsb.w	r11, [pc, #-0]        @ encoding: [0x1f,0xf9,0x00,0xb0]
+@ CHECK: ldrh.w	r10, [pc, #-0]          @ encoding: [0x3f,0xf8,0x00,0xa0]
+@ CHECK: ldrsh.w	r1, [pc, #-0]         @ encoding: [0x3f,0xf9,0x00,0x10]
+@ CHECK: ldr.w	r5, [pc, #-0]           @ encoding: [0x5f,0xf8,0x00,0x50]
 
 @------------------------------------------------------------------------------
 @ LDR(register)
@@ -1269,8 +1323,15 @@ _func:
         movlo r1, #-1
 
         @ alias for mvn
-	mov r3, #-3
+        mov r3, #-3
+        mov r11, #0xabcd
+        movs r0, #1
+        it ne
+        movne r3, #15
 
+        itt eq
+        moveq r0, #255
+        moveq r1, #256
 
 @ CHECK: movs	r1, #21                 @ encoding: [0x15,0x21]
 @ CHECK: movs.w	r1, #21                 @ encoding: [0x5f,0xf0,0x15,0x01]
@@ -1289,6 +1350,14 @@ _func:
 @ CHECK: it	lo                      @ encoding: [0x38,0xbf]
 @ CHECK: movlo.w	r1, #-1         @ encoding: [0x4f,0xf0,0xff,0x31]
 @ CHECK: mvn	r3, #2                  @ encoding: [0x6f,0xf0,0x02,0x03]
+@ CHECK: movw	r11, #43981             @ encoding: [0x4a,0xf6,0xcd,0x3b]
+@ CHECK: movs	r0, #1                  @ encoding: [0x01,0x20]
+@ CHECK: it	ne                      @ encoding: [0x18,0xbf]
+@ CHECK: movne	r3, #15                 @ encoding: [0x0f,0x23]
+
+@ CHECK: itt    eq                      @ encoding: [0x04,0xbf]
+@ CHECK: moveq  r0, #255                @ encoding: [0xff,0x20]
+@ CHECK: movweq r1, #256                @ encoding: [0x40,0xf2,0x00,0x11]
 
 @------------------------------------------------------------------------------
 @ MOV(shifted register)
@@ -1348,17 +1417,17 @@ _func:
 @------------------------------------------------------------------------------
         mrc  p14, #0, r1, c1, c2, #4
         mrc  p15, #7, apsr_nzcv, c15, c6, #6
-        mrc  p11, #1, r1, c2, c2
+        mrc  p9, #1, r1, c2, c2
         mrc2 p12, #3, r3, c3, c4
         mrc2 p14, #0, r1, c1, c2, #4
-        mrc2 p10, #7, apsr_nzcv, c15, c0, #1
+        mrc2 p8, #7, apsr_nzcv, c15, c0, #1
  
 @ CHECK: mrc  p14, #0, r1, c1, c2, #4            @ encoding: [0x11,0xee,0x92,0x1e]
 @ CHECK: mrc  p15, #7, apsr_nzcv, c15, c6, #6    @ encoding: [0xff,0xee,0xd6,0xff]
-@ CHECK: mrc  p11, #1, r1, c2, c2, #0            @ encoding: [0x32,0xee,0x12,0x1b]
+@ CHECK: mrc  p9, #1, r1, c2, c2, #0             @ encoding: [0x32,0xee,0x12,0x19]
 @ CHECK: mrc2 p12, #3, r3, c3, c4, #0            @ encoding: [0x73,0xfe,0x14,0x3c]
 @ CHECK: mrc2 p14, #0, r1, c1, c2, #4            @ encoding: [0x11,0xfe,0x92,0x1e]
-@ CHECK: mrc2 p10, #7, apsr_nzcv, c15, c0, #1    @ encoding: [0xff,0xfe,0x30,0xfa]
+@ CHECK: mrc2 p8, #7, apsr_nzcv, c15, c0, #1     @ encoding: [0xff,0xfe,0x30,0xf8]
  
 @------------------------------------------------------------------------------
 @ MRRC/MRRC2
@@ -2485,8 +2554,8 @@ _func:
         stc2l p7, c1, [r8]
         stc2l p8, c0, [r9, #-224]
         stc2l p9, c1, [r10, #-120]!
-        stc2l p10, c2, [r11], #16
-        stc2l p11, c3, [r12], #-72
+        stc2l p0, c2, [r11], #16
+        stc2l p1, c3, [r12], #-72
 
         stc p12, c4, [r0, #4]
         stc p13, c5, [r1]
@@ -2513,8 +2582,8 @@ _func:
 @ CHECK: stc2l	p7, c1, [r8]            @ encoding: [0xc8,0xfd,0x00,0x17]
 @ CHECK: stc2l	p8, c0, [r9, #-224]     @ encoding: [0x49,0xfd,0x38,0x08]
 @ CHECK: stc2l	p9, c1, [r10, #-120]!   @ encoding: [0x6a,0xfd,0x1e,0x19]
-@ CHECK: stc2l	p10, c2, [r11], #16     @ encoding: [0xeb,0xfc,0x04,0x2a]
-@ CHECK: stc2l	p11, c3, [r12], #-72    @ encoding: [0x6c,0xfc,0x12,0x3b]
+@ CHECK: stc2l	p0, c2, [r11], #16      @ encoding: [0xeb,0xfc,0x04,0x20]
+@ CHECK: stc2l	p1, c3, [r12], #-72     @ encoding: [0x6c,0xfc,0x12,0x31]
 
 @ CHECK: stc	p12, c4, [r0, #4]       @ encoding: [0x80,0xed,0x01,0x4c]
 @ CHECK: stc	p13, c5, [r1]           @ encoding: [0x81,0xed,0x00,0x5d]
@@ -3524,11 +3593,21 @@ _func:
         wfige
         yieldlt
         hint.w #4
+        hint.w #3
+        hint.w #2
+        hint.w #1
+        hint.w #0
+        hint #4
         hint #3
         hint #2
         hint #1
         hint #0
 
+        itet lt
+        hintlt #15
+        hintge #16
+        hintlt #239
+
 @ CHECK: wfe                            @ encoding: [0x20,0xbf]
 @ CHECK: wfi                            @ encoding: [0x30,0xbf]
 @ CHECK: yield                          @ encoding: [0x10,0xbf]
@@ -3541,7 +3620,24 @@ _func:
 @ CHECK: wfe.w                          @ encoding: [0xaf,0xf3,0x02,0x80]
 @ CHECK: yield.w                        @ encoding: [0xaf,0xf3,0x01,0x80]
 @ CHECK: nop.w                          @ encoding: [0xaf,0xf3,0x00,0x80]
+@ CHECK: sev                            @ encoding: [0x40,0xbf]
+@ CHECK: wfi                            @ encoding: [0x30,0xbf]
+@ CHECK: wfe                            @ encoding: [0x20,0xbf]
+@ CHECK: yield                          @ encoding: [0x10,0xbf]
+@ CHECK: nop                            @ encoding: [0x00,0xbf]
 
+@ CHECK: itet	lt                      @ encoding: [0xb6,0xbf]
+@ CHECK: hintlt #15                     @ encoding: [0xf0,0xbf]
+@ CHECK: hintge.w #16                   @ encoding: [0xaf,0xf3,0x10,0x80]
+@ CHECK: hintlt.w #239                  @ encoding: [0xaf,0xf3,0xef,0x80]
+
+@------------------------------------------------------------------------------
+@ Unallocated wide/narrow hints
+@------------------------------------------------------------------------------
+        hint #7
+        hint.w #7
+@ CHECK: hint #7                        @ encoding: [0x70,0xbf]
+@ CHECK: hint.w #7                      @ encoding: [0xaf,0xf3,0x07,0x80]
 
 @------------------------------------------------------------------------------
 @ Alternate syntax for LDR*(literal) encodings
diff --git a/test/MC/ARM/crc32-thumb.s b/test/MC/ARM/crc32-thumb.s
new file mode 100644
index 0000000..3a0e7a9
--- /dev/null
+++ b/test/MC/ARM/crc32-thumb.s
@@ -0,0 +1,30 @@
+@ RUN: llvm-mc -triple=thumbv8 -show-encoding < %s | FileCheck %s
+@ RUN: not llvm-mc -triple=thumbv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V7
+@ RUN: not llvm-mc -triple=thumbv8 -mattr=-crc -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOCRC
+        crc32b  r0, r1, r2
+        crc32h  r0, r1, r2
+        crc32w  r0, r1, r2
+
+@ CHECK:  crc32b    r0, r1, r2              @ encoding: [0xc1,0xfa,0x82,0xf0]
+@ CHECK:  crc32h    r0, r1, r2              @ encoding: [0xc1,0xfa,0x92,0xf0]
+@ CHECK:  crc32w    r0, r1, r2              @ encoding: [0xc1,0xfa,0xa2,0xf0]
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-NOCRC: error: instruction requires: crc
+@ CHECK-NOCRC: error: instruction requires: crc
+@ CHECK-NOCRC: error: instruction requires: crc
+
+        crc32cb  r0, r1, r2
+        crc32ch  r0, r1, r2
+        crc32cw  r0, r1, r2
+
+@ CHECK:  crc32cb   r0, r1, r2              @ encoding: [0xd1,0xfa,0x82,0xf0]
+@ CHECK:  crc32ch   r0, r1, r2              @ encoding: [0xd1,0xfa,0x92,0xf0]
+@ CHECK:  crc32cw   r0, r1, r2              @ encoding: [0xd1,0xfa,0xa2,0xf0]
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-NOCRC: error: instruction requires: crc
+@ CHECK-NOCRC: error: instruction requires: crc
+@ CHECK-NOCRC: error: instruction requires: crc
diff --git a/test/MC/ARM/crc32.s b/test/MC/ARM/crc32.s
new file mode 100644
index 0000000..45a1f0c
--- /dev/null
+++ b/test/MC/ARM/crc32.s
@@ -0,0 +1,30 @@
+@ RUN: llvm-mc -triple=armv8 -show-encoding < %s | FileCheck %s
+@ RUN: not llvm-mc -triple=armv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V7
+@ RUN: not llvm-mc -triple=thumbv8 -mattr=-crc -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOCRC
+        crc32b  r0, r1, r2
+        crc32h  r0, r1, r2
+        crc32w  r0, r1, r2
+
+@ CHECK:  crc32b    r0, r1, r2              @ encoding: [0x42,0x00,0x01,0xe1]
+@ CHECK:  crc32h    r0, r1, r2              @ encoding: [0x42,0x00,0x21,0xe1]
+@ CHECK:  crc32w    r0, r1, r2              @ encoding: [0x42,0x00,0x41,0xe1]
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-NOCRC: error: instruction requires: crc
+@ CHECK-NOCRC: error: instruction requires: crc
+@ CHECK-NOCRC: error: instruction requires: crc
+
+        crc32cb  r0, r1, r2
+        crc32ch  r0, r1, r2
+        crc32cw  r0, r1, r2
+
+@ CHECK:  crc32cb   r0, r1, r2              @ encoding: [0x42,0x02,0x01,0xe1]
+@ CHECK:  crc32ch   r0, r1, r2              @ encoding: [0x42,0x02,0x21,0xe1]
+@ CHECK:  crc32cw   r0, r1, r2              @ encoding: [0x42,0x02,0x41,0xe1]
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-NOCRC: error: instruction requires: crc
+@ CHECK-NOCRC: error: instruction requires: crc
+@ CHECK-NOCRC: error: instruction requires: crc
diff --git a/test/MC/ARM/deprecated-v8.s b/test/MC/ARM/deprecated-v8.s
index e509a35..aa72c2e 100644
--- a/test/MC/ARM/deprecated-v8.s
+++ b/test/MC/ARM/deprecated-v8.s
@@ -1,3 +1,51 @@
-@ RUN: llvm-mc -triple armv8 -show-encoding < %s 2>&1 | FileCheck %s
+@ RUN: llvm-mc -triple armv8 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARMV8
+@ RUN: llvm-mc -triple thumbv8 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-THUMBV8
+@ RUN: llvm-mc -triple armv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARMV7
+@ RUN: llvm-mc -triple thumbv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-THUMBV7
+@ RUN: llvm-mc -triple armv6 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARMV6
 setend be
-@ CHECK: warning: deprecated on armv8
+@ CHECK-ARMV8: warning: deprecated
+@ CHECK-THUMBV8: warning: deprecated
+@ CHECK-ARMV7-NOT: warning: deprecated
+@ CHECK-THUMBV7-NOT: warning: deprecated
+mcr p15, #0, r5, c7, c5, #4
+@ CHECK-ARMV8: warning: deprecated since v7, use 'isb'
+@ CHECK-THUMBV8: warning: deprecated since v7, use 'isb'
+@ CHECK-ARMV7: warning: deprecated since v7, use 'isb'
+@ CHECK-THUMBV7: warning: deprecated since v7, use 'isb'
+@ CHECK-ARMV6-NOT: warning: deprecated since v7, use 'isb'
+mcr p15, #0, r5, c7, c10, #4
+@ CHECK-ARMV8: warning: deprecated since v7, use 'dsb'
+@ CHECK-THUMBV8: warning: deprecated since v7, use 'dsb'
+@ CHECK-ARMV7: warning: deprecated since v7, use 'dsb'
+@ CHECK-THUMBV7: warning: deprecated since v7, use 'dsb'
+@ CHECK-ARMV6-NOT: warning: deprecated since v7, use 'dsb'
+mcr p15, #0, r5, c7, c10, #5
+@ CHECK-ARMV8: warning: deprecated since v7, use 'dmb'
+@ CHECK-THUMBV8: warning: deprecated since v7, use 'dmb'
+@ CHECK-ARMV7: warning: deprecated since v7, use 'dmb'
+@ CHECK-THUMBV7: warning: deprecated since v7, use 'dmb'
+@ CHECK-ARMV6-NOT: warning: deprecated since v7, use 'dmb'
+it ge
+movge r0, #4096
+@ CHECK-THUMBV8: warning: deprecated instruction in IT block
+@ CHECK-THUMBV7-NOT: warning
+ite ge
+addge r0, r1
+addlt r0, r2
+@ CHECK-ARMV8: warning: applying IT instruction to more than one subsequent instruction is deprecated
+@ CHECK-THUMBV8: warning: applying IT instruction to more than one subsequent instruction is deprecated
+@ CHECK-THUMBV7-NOT: warning
+it ge
+movge r0, pc // invalid operand
+@ CHECK-THUMBV8: warning: deprecated instruction in IT block
+@ CHECK-THUMBV7-NOT: warning
+it ge
+revge r0, r0 // invalid instruction
+@ CHECK-THUMBV8: warning: deprecated instruction in IT block
+@ CHECK-THUMBV7-NOT: warning
+it ge
+clzge r0, r0 // only has 32-bit form
+@ CHECK-THUMBV8: warning: deprecated instruction in IT block
+@ CHECK-THUMBV7-NOT: warning
+
diff --git a/test/MC/ARM/diagnostics-noneon.s b/test/MC/ARM/diagnostics-noneon.s
new file mode 100644
index 0000000..310344a
--- /dev/null
+++ b/test/MC/ARM/diagnostics-noneon.s
@@ -0,0 +1,7 @@
+@ RUN: not llvm-mc -triple=armv7-apple-darwin -mattr=-neon < %s 2> %t
+@ RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+
+        vmov d5, d10
+        vmov q4, q5
+@ CHECK-ERRORS: error: instruction requires: NEON
+@ CHECK-ERRORS: error: instruction requires: NEON
diff --git a/test/MC/ARM/diagnostics.s b/test/MC/ARM/diagnostics.s
index 1aea117..3c26f6d 100644
--- a/test/MC/ARM/diagnostics.s
+++ b/test/MC/ARM/diagnostics.s
@@ -1,5 +1,7 @@
 @ RUN: not llvm-mc -triple=armv7-apple-darwin < %s 2> %t
 @ RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+@ RUN: not llvm-mc -triple=armv8 < %s 2> %t
+@ RUN: FileCheck --check-prefix=CHECK-ERRORS-V8 < %t %s
 
 @ Check for various assembly diagnostic messages on invalid input.
 
@@ -93,6 +95,26 @@
 
 @ CHECK-ERRORS: error: invalid operand for instruction
 
+        @ Out of range immediates for v8 HLT instruction.
+        hlt #65536
+        hlt #-1
+@CHECK-ERRORS-V8: error: invalid operand for instruction
+@CHECK-ERRORS-V8:         hlt #65536
+@CHECK-ERRORS-V8:              ^
+@CHECK-ERRORS-V8: error: invalid operand for instruction
+@CHECK-ERRORS-V8:         hlt #-1
+@CHECK-ERRORS-V8:              ^
+
+        @ Illegal condition code for v8 HLT instruction.
+        hlteq #2
+        hltlt #23
+@CHECK-ERRORS-V8: error: instruction 'hlt' is not predicable, but condition code specified
+@CHECK-ERRORS-V8:        hlteq #2
+@CHECK-ERRORS-V8:        ^
+@CHECK-ERRORS-V8: error: instruction 'hlt' is not predicable, but condition code specified
+@CHECK-ERRORS-V8:        hltlt #23
+@CHECK-ERRORS-V8:        ^
+
         @ Out of range 4 and 3 bit immediates on CDP[2]
 
         @ Out of range immediates for CDP/CDP2
@@ -129,6 +151,11 @@
 @ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
 @ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
 
+        @ p10 and p11 are reserved for NEON
+        mcr p10, #2, r5, c1, c1, #4
+        mcrr p11, #8, r5, r4, c1
+@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: invalid operand for instruction
 
         @ Out of range immediate for MOV
         movw r9, 0x10000
@@ -407,3 +434,34 @@
 
         bkpteq #7
 @ CHECK-ERRORS: error: instruction 'bkpt' is not predicable, but condition code specified
+
+        ldm r2!, {r2, r3}
+        ldmdb r2!, {r2, r3}
+        ldmda r2!, {r2, r3}
+        popeq {sp}
+@ CHECK-ERRORS: error: writeback register not allowed in register list
+@ CHECK-ERRORS: error: writeback register not allowed in register list
+@ CHECK-ERRORS: error: writeback register not allowed in register list
+@ CHECK-ERRORS: error: writeback register not allowed in register list
+
+        vrintz.f32.f32 s0, s1
+        vrintr.f32 s0, s1
+        vrintx.f64.f64 d2, d5
+        vrintz.f64 d10, d9
+        vrinta.f32.f32 s6, s7
+        vrintn.f32 s8, s9
+        vrintp.f64.f64 d10, d11
+        vrintm.f64 d12, d13
+@ CHECK-ERRORS: error: instruction requires: FPARMv8
+@ CHECK-ERRORS: error: instruction requires: FPARMv8
+@ CHECK-ERRORS: error: instruction requires: FPARMv8
+@ CHECK-ERRORS: error: instruction requires: FPARMv8
+@ CHECK-ERRORS: error: instruction requires: FPARMv8
+@ CHECK-ERRORS: error: instruction requires: FPARMv8
+@ CHECK-ERRORS: error: instruction requires: FPARMv8
+@ CHECK-ERRORS: error: instruction requires: FPARMv8
+
+        stm sp!, {r0, pc}^
+        ldm sp!, {r0}^
+@ CHECK-ERRORS: error: system STM cannot have writeback register
+@ CHECK-ERRORS: error: writeback register only allowed on system LDM if PC in register-list
diff --git a/test/MC/ARM/directive-cpu.s b/test/MC/ARM/directive-cpu.s
new file mode 100644
index 0000000..952dd93
--- /dev/null
+++ b/test/MC/ARM/directive-cpu.s
@@ -0,0 +1,26 @@
+@ RUN: llvm-mc < %s -triple armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -s -sd | FileCheck %s
+
+@ CHECK: Name: .ARM.attribute
+@ CHECK: SectionData (
+
+@ <format-version>
+@ CHECK: 41
+
+@ <section-length>
+@ CHECK: 1A0000 00
+
+@ <vendor-name> "aeabi\0"
+@ CHECK: 616561 626900
+
+@ <file-tag>
+@ CHECK: 01
+
+@ <size>
+@ CHECK: 10000000
+
+	.cpu	cortex-a8
+@ CHECK: 05
+@ CHECK: 434F52 5445582D 413800
+
+@ CHECK: )
diff --git a/test/MC/ARM/directive-eabi_attribute.s b/test/MC/ARM/directive-eabi_attribute.s
new file mode 100644
index 0000000..c060b80
--- /dev/null
+++ b/test/MC/ARM/directive-eabi_attribute.s
@@ -0,0 +1,56 @@
+@ RUN: llvm-mc < %s -triple armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -s -sd | FileCheck %s
+
+@ CHECK: Name: .ARM.attribute
+@ CHECK: SectionData (
+
+@ <format-version>
+@ CHECK: 41
+
+@ <section-length>
+@ CHECK: 250000 00
+
+@ <vendor-name> "aeabi\0"
+@ CHECK: 616561 626900
+
+@ <file-tag>
+@ CHECK: 01
+
+@ <size>
+@ CHECK: 1B000000
+
+@ <attribute>*
+
+	.eabi_attribute 6, 10
+@ CHECK: 060A
+
+	.eabi_attribute 7, 65
+@ CHECK: 0741
+
+	.eabi_attribute 8, 1
+@ CHECK: 0801
+
+	.eabi_attribute 9, 2
+@ CHECK: 0902
+
+	.eabi_attribute 10, 3
+@ CHECK: 0A03
+
+	.eabi_attribute 12, 1
+@ CHECK: 0C01
+
+	.eabi_attribute 20, 1
+@ CHECK: 1401
+
+	.eabi_attribute 21, 1
+@ CHECK: 1501
+
+	.eabi_attribute 23, 3
+@ CHECK: 1703
+
+	.eabi_attribute 24, 1
+@ CHECK: 1801
+
+	.eabi_attribute 25, 1
+@ CHECK: 1901
+@ CHECK: )
diff --git a/test/MC/ARM/directive-fpu-multiple.s b/test/MC/ARM/directive-fpu-multiple.s
new file mode 100644
index 0000000..6a93f24
--- /dev/null
+++ b/test/MC/ARM/directive-fpu-multiple.s
@@ -0,0 +1,26 @@
+@ Check multiple .fpu directives.
+
+@ The later .fpu directive should overwrite the earlier one.
+@ See also: directive-fpu-multiple2.s.
+
+@ RUN: llvm-mc < %s -triple arm-unknown-linux-gnueabi -filetype=obj \
+@ RUN:   | llvm-readobj -s -sd | FileCheck %s
+
+	.fpu neon
+	.fpu vfpv4
+
+@ CHECK:      Name: .ARM.attributes
+@ CHECK-NEXT: Type: SHT_ARM_ATTRIBUTES (0x70000003)
+@ CHECK-NEXT: Flags [ (0x0)
+@ CHECK-NEXT: ]
+@ CHECK-NEXT: Address: 0x0
+@ CHECK-NEXT: Offset: 0x34
+@ CHECK-NEXT: Size: 18
+@ CHECK-NEXT: Link: 0
+@ CHECK-NEXT: Info: 0
+@ CHECK-NEXT: AddressAlignment: 1
+@ CHECK-NEXT: EntrySize: 0
+@ CHECK-NEXT: SectionData (
+@ CHECK-NEXT:   0000: 41110000 00616561 62690001 07000000
+@ CHECK-NEXT:   0010: 0A05
+@ CHECK-NEXT: )
diff --git a/test/MC/ARM/directive-fpu.s b/test/MC/ARM/directive-fpu.s
new file mode 100644
index 0000000..24e159c
--- /dev/null
+++ b/test/MC/ARM/directive-fpu.s
@@ -0,0 +1,26 @@
+@ RUN: llvm-mc < %s -triple armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -s -sd | FileCheck %s
+
+@ CHECK: Name: .ARM.attribute
+@ CHECK: SectionData (
+
+@ <format-version>
+@ CHECK: 41
+
+@ <section-length>
+@ CHECK: 130000 00
+
+@ <vendor-name> "aeabi\0"
+@ CHECK: 616561 626900
+
+@ <file-tag>
+@ CHECK: 01
+
+@ <size>
+@ CHECK: 09000000
+
+	.fpu	neon
+@ CHECK: 0A03
+@ CHECK: 0C01
+
+@ CHECK: )
diff --git a/test/MC/ARM/elf-eflags-eabi-cg.ll b/test/MC/ARM/elf-eflags-eabi-cg.ll
deleted file mode 100644
index 0b9de7f..0000000
--- a/test/MC/ARM/elf-eflags-eabi-cg.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; Codegen version to check for ELF header flags.
-;
-; RUN: llc %s -mtriple=thumbv7-linux-gnueabi -relocation-model=pic \
-; RUN: -filetype=obj -o - | llvm-readobj -h | \
-; RUN: FileCheck %s
-
-define void @bar() nounwind {
-entry:
-  ret void
-}
-
-; For now the only e_flag set is EF_ARM_EABI_VER5
-; CHECK: ElfHeader {
-; CHECK:   Flags [ (0x5000000)
diff --git a/test/MC/ARM/elf-thumbfunc.s b/test/MC/ARM/elf-thumbfunc.s
index 26f5f0b..0ea1182 100644
--- a/test/MC/ARM/elf-thumbfunc.s
+++ b/test/MC/ARM/elf-thumbfunc.s
@@ -5,9 +5,9 @@
 	.text
 	.globl	foo
 	.align	2
-	.type	foo,%function
 	.code	16
 	.thumb_func
+	.type	foo,%function
 foo:
 	bx	lr
 
diff --git a/test/MC/ARM/v8fp.s b/test/MC/ARM/fp-armv8.s
index b12d7e2..1ffd590 100644
--- a/test/MC/ARM/v8fp.s
+++ b/test/MC/ARM/fp-armv8.s
@@ -1,4 +1,4 @@
-@ RUN: llvm-mc -triple armv8 -mattr=+v8fp -show-encoding < %s | FileCheck %s
+@ RUN: llvm-mc -triple armv8 -mattr=+fp-armv8 -show-encoding < %s | FileCheck %s
 
 @ VCVT{B,T}
 
@@ -122,3 +122,8 @@
 @ CHECK: vrintm.f64 d3, d4     @ encoding: [0x44,0x3b,0xbb,0xfe]
   vrintm.f32 s12, s1
 @ CHECK: vrintm.f32 s12, s1    @ encoding: [0x60,0x6a,0xbb,0xfe]
+
+@ MVFR2
+
+  vmrs sp, mvfr2
+@ CHECK: vmrs sp, mvfr2        @ encoding: [0x10,0xda,0xf5,0xee]
diff --git a/test/MC/ARM/idiv.s b/test/MC/ARM/idiv.s
new file mode 100644
index 0000000..6238a0f
--- /dev/null
+++ b/test/MC/ARM/idiv.s
@@ -0,0 +1,33 @@
+@ RUN: llvm-mc -triple=armv7 -mcpu=cortex-a15 -show-encoding < %s 2>&1 | FileCheck -check-prefix A15-ARM %s
+@ RUN: llvm-mc -triple=thumbv7 -mcpu=cortex-a15 -show-encoding < %s 2>&1 | FileCheck -check-prefix A15-THUMB %s
+
+@ RUN: llvm-mc -triple=armv7 -mcpu=cortex-a15 -mattr=-hwdiv -show-encoding < %s 2>&1 | FileCheck -check-prefix A15-ARM-NOTHUMBHWDIV %s
+@ RUN: llvm-mc -triple=thumbv7 -mcpu=cortex-a15 -mattr=-hwdiv-arm -show-encoding < %s 2>&1 | FileCheck -check-prefix A15-THUMB-NOARMHWDIV %s
+
+@ RUN: llvm-mc -triple=armv8 -show-encoding < %s 2>&1 | FileCheck -check-prefix ARMV8 %s
+@ RUN: llvm-mc -triple=thumbv8 -show-encoding < %s 2>&1 | FileCheck -check-prefix THUMBV8 %s
+
+@ RUN: llvm-mc -triple=armv8 -mattr=-hwdiv -show-encoding < %s 2>&1 | FileCheck -check-prefix ARMV8-NOTHUMBHWDIV %s
+@ RUN: llvm-mc -triple=thumbv8 -mattr=-hwdiv-arm -show-encoding < %s 2>&1 | FileCheck -check-prefix THUMBV8-NOTHUMBHWDIV %s
+
+        sdiv  r1, r2, r3
+        udiv  r3, r4, r5
+@ A15-ARM:              sdiv   r1, r2, r3               @ encoding: [0x12,0xf3,0x11,0xe7]
+@ A15-ARM:              udiv   r3, r4, r5               @ encoding: [0x14,0xf5,0x33,0xe7]
+@ A15-THUMB:            sdiv   r1, r2, r3               @ encoding: [0x92,0xfb,0xf3,0xf1]
+@ A15-THUMB:            udiv   r3, r4, r5               @ encoding: [0xb4,0xfb,0xf5,0xf3]
+
+@ A15-ARM-NOTHUMBHWDIV: sdiv    r1, r2, r3              @ encoding: [0x12,0xf3,0x11,0xe7]
+@ A15-ARM-NOTHUMBHWDIV: udiv    r3, r4, r5              @ encoding: [0x14,0xf5,0x33,0xe7]
+@ A15-THUMB-NOARMHWDIV: sdiv    r1, r2, r3              @ encoding: [0x92,0xfb,0xf3,0xf1]
+@ A15-THUMB-NOARMHWDIV: udiv    r3, r4, r5              @ encoding: [0xb4,0xfb,0xf5,0xf3]
+
+@ ARMV8:                sdiv    r1, r2, r3              @ encoding: [0x12,0xf3,0x11,0xe7]
+@ ARMV8:                udiv    r3, r4, r5              @ encoding: [0x14,0xf5,0x33,0xe7]
+@ THUMBV8:              sdiv    r1, r2, r3              @ encoding: [0x92,0xfb,0xf3,0xf1]
+@ THUMBV8:              udiv    r3, r4, r5              @ encoding: [0xb4,0xfb,0xf5,0xf3]
+
+@ ARMV8-NOTHUMBHWDIV:   sdiv    r1, r2, r3              @ encoding: [0x12,0xf3,0x11,0xe7]
+@ ARMV8-NOTHUMBHWDIV:   udiv    r3, r4, r5              @ encoding: [0x14,0xf5,0x33,0xe7]
+@ THUMBV8-NOTHUMBHWDIV: sdiv    r1, r2, r3              @ encoding: [0x92,0xfb,0xf3,0xf1]
+@ THUMBV8-NOTHUMBHWDIV: udiv    r3, r4, r5              @ encoding: [0xb4,0xfb,0xf5,0xf3]
diff --git a/test/MC/ARM/invalid-barrier.s b/test/MC/ARM/invalid-barrier.s
new file mode 100644
index 0000000..29fcd8e
--- /dev/null
+++ b/test/MC/ARM/invalid-barrier.s
@@ -0,0 +1,28 @@
+@ RUN: not llvm-mc -triple=armv7   -show-encoding < %s 2>&1 | FileCheck %s
+@ RUN: not llvm-mc -triple=thumbv7 -show-encoding < %s 2>&1 | FileCheck %s
+
+@------------------------------------------------------------------------------
+@ DMB
+@------------------------------------------------------------------------------
+        dmb #0x10
+        dmb imaginary_scope
+
+@ CHECK: error: immediate value out of range
+@ CHECK: error: invalid operand for instruction
+
+@------------------------------------------------------------------------------
+@ DSB
+@------------------------------------------------------------------------------
+        dsb #0x10
+        dsb imaginary_scope
+@ CHECK: error: immediate value out of range
+@ CHECK: error: invalid operand for instruction
+
+@------------------------------------------------------------------------------
+@ ISB
+@------------------------------------------------------------------------------
+        isb #0x1f
+        isb imaginary_domain
+
+@ CHECK: error: immediate value out of range
+@ CHECK: error: invalid operand for instruction
diff --git a/test/MC/ARM/invalid-crc32.s b/test/MC/ARM/invalid-crc32.s
new file mode 100644
index 0000000..a541002
--- /dev/null
+++ b/test/MC/ARM/invalid-crc32.s
@@ -0,0 +1,16 @@
+@ RUN: not llvm-mc -triple=armv8 -show-encoding < %s 2>&1 | FileCheck %s
+@ RUN: not llvm-mc -triple=thumbv8 -show-encoding < %s 2>&1 | FileCheck %s
+
+        crc32cbeq  r0, r1, r2
+        crc32bne   r0, r1, r2
+        crc32chcc  r0, r1, r2
+        crc32hpl   r0, r1, r2
+        crc32cwgt  r0, r1, r2
+        crc32wle   r0, r1, r2
+
+@ CHECK: error: instruction 'crc32cb' is not predicable, but condition code specified
+@ CHECK: error: instruction 'crc32b' is not predicable, but condition code specified
+@ CHECK: error: instruction 'crc32ch' is not predicable, but condition code specified
+@ CHECK: error: instruction 'crc32h' is not predicable, but condition code specified
+@ CHECK: error: instruction 'crc32cw' is not predicable, but condition code specified
+@ CHECK: error: instruction 'crc32w' is not predicable, but condition code specified
diff --git a/test/MC/ARM/invalid-fp-armv8.s b/test/MC/ARM/invalid-fp-armv8.s
new file mode 100644
index 0000000..21adb7e
--- /dev/null
+++ b/test/MC/ARM/invalid-fp-armv8.s
@@ -0,0 +1,89 @@
+@ RUN: not llvm-mc -triple armv8 -show-encoding -mattr=-neon < %s 2>&1 | FileCheck %s --check-prefix=V8
+
+@ VCVT{B,T}
+
+  vcvtt.f64.f16 d3, s1
+@ V7-NOT: vcvtt.f64.f16 d3, s1      @ encoding: [0xe0,0x3b,0xb2,0xee]
+  vcvtt.f16.f64 s5, d12
+@ V7-NOT: vcvtt.f16.f64 s5, d12     @ encoding: [0xcc,0x2b,0xf3,0xee]
+
+  vsel.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vselne.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vselmi.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vselpl.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vselvc.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vselcs.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vselcc.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vselhs.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vsello.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vselhi.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vsells.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vsellt.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vselle.f32 s3, s4, s6
+@ V8: error: invalid instruction
+
+vseleq.f32 s0, d2, d1
+@ V8: error: invalid operand for instruction
+vselgt.f64 s3, s2, s1
+@ V8: error: invalid operand for instruction
+vselgt.f32 s0, q3, q1
+@ V8: error: invalid operand for instruction
+vselgt.f64 q0, s3, q1
+@ V8: error: invalid operand for instruction
+
+vmaxnm.f32 s0, d2, d1
+@ V8: error: invalid operand for instruction
+vminnm.f64 s3, s2, s1
+@ V8: error: invalid operand for instruction
+vmaxnm.f32 s0, q3, q1
+@ V8: error: invalid operand for instruction
+vmaxnm.f64 q0, s3, q1
+@ V8: error: invalid operand for instruction
+vmaxnmgt.f64 q0, s3, q1
+@ CHECK: error: instruction 'vmaxnm' is not predicable, but condition code specified
+
+vcvta.s32.f64 d3, s2
+@ V8: error: invalid operand for instruction
+vcvtp.s32.f32 d3, s2
+@ V8: error: invalid operand for instruction
+vcvtn.u32.f64 d3, s2
+@ V8: error: invalid operand for instruction
+vcvtm.u32.f32 d3, s2
+@ V8: error: invalid operand for instruction
+vcvtnge.u32.f64 d3, s2
+@ V8: error: instruction 'vcvtn' is not predicable, but condition code specified
+
+vcvtbgt.f64.f16 q0, d3
+@ V8: error: invalid operand for instruction
+vcvttlt.f64.f16 s0, s3
+@ V8: error: invalid operand for instruction
+vcvttvs.f16.f64 s0, s3
+@ V8: error: invalid operand for instruction
+vcvtthi.f16.f64 q0, d3
+@ V8: error: invalid operand for instruction
+
+vrintrlo.f32.f32 d3, q0
+@ V8: error: invalid operand for instruction
+vrintxcs.f32.f32 d3, d0
+@ V8: error: instruction requires: NEON
+
+vrinta.f64.f64 s3, q0
+@ V8: error: invalid operand for instruction
+vrintn.f32.f32 d3, d0
+@ V8: error: instruction requires: NEON
+vrintp.f32 q3, q0
+@ V8: error: instruction requires: NEON
+vrintmlt.f32 q3, q0
+@ V8: error: instruction 'vrintm' is not predicable, but condition code specified
diff --git a/test/MC/ARM/invalid-hint-arm.s b/test/MC/ARM/invalid-hint-arm.s
index 3608e95..49a2e5c 100644
--- a/test/MC/ARM/invalid-hint-arm.s
+++ b/test/MC/ARM/invalid-hint-arm.s
@@ -1,7 +1,8 @@
 @ RUN: not llvm-mc -triple=armv7-apple-darwin -mcpu=cortex-a8 < %s 2>&1 | FileCheck %s
 
-hint #5
-hint #100
+hint #240
+hint #1000
+
+@ CHECK: error: immediate operand must be in the range [0,239]
+@ CHECK: error: immediate operand must be in the range [0,239]
 
-@ CHECK: error: immediate operand must be in the range [0,4]
-@ CHECK: error: immediate operand must be in the range [0,4]
diff --git a/test/MC/ARM/invalid-hint-thumb.s b/test/MC/ARM/invalid-hint-thumb.s
index bde987c..d2b50c4 100644
--- a/test/MC/ARM/invalid-hint-thumb.s
+++ b/test/MC/ARM/invalid-hint-thumb.s
@@ -1,9 +1,8 @@
 @ RUN: not llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 < %s 2>&1 | FileCheck %s
 
-hint #5
-hint.w #5
-hint #100
+hint #240
+hint #1000
+
+@ CHECK: error: immediate operand must be in the range [0,239]
+@ CHECK: error: immediate operand must be in the range [0,239]
 
-@ CHECK: error: immediate operand must be in the range [0,4]
-@ CHECK: error: immediate operand must be in the range [0,4]
-@ CHECK: error: immediate operand must be in the range [0,4]
diff --git a/test/MC/ARM/invalid-idiv.s b/test/MC/ARM/invalid-idiv.s
new file mode 100644
index 0000000..a84e66a
--- /dev/null
+++ b/test/MC/ARM/invalid-idiv.s
@@ -0,0 +1,28 @@
+@ RUN: not llvm-mc -triple=armv7 -mcpu=cortex-a15 -mattr=-hwdiv-arm < %s 2> %t
+@ RUN: FileCheck --check-prefix=ARM-A15 < %t %s
+@ RUN: not llvm-mc -triple=thumbv7 -mcpu=cortex-a15 -mattr=-hwdiv < %s 2> %t
+@ RUN: FileCheck --check-prefix=THUMB-A15 < %t %s
+@ RUN: not llvm-mc -triple=armv7 < %s 2> %t
+@ RUN: FileCheck --check-prefix=ARM < %t %s
+@ RUN: not llvm-mc -triple=thumbv7 < %s 2> %t
+@ RUN: FileCheck --check-prefix=THUMB < %t %s
+
+        sdiv  r1, r2, r3
+        udiv  r3, r4, r5
+@ ARM-A15: error: instruction requires: divide in ARM
+@ ARM-A15: sdiv r1, r2, r3
+@ ARM-A15: error: instruction requires: divide in ARM
+@ ARM-A15: udiv r3, r4, r5
+@ THUMB-A15: error: instruction requires: arm-mode
+@ THUMB-A15: sdiv r1, r2, r3
+@ THUMB-A15: error: instruction requires: arm-mode
+@ THUMB-A15: udiv r3, r4, r5
+
+@ ARM: error: instruction requires: divide in ARM
+@ ARM: sdiv r1, r2, r3
+@ ARM: error: instruction requires: divide in ARM
+@ ARM: udiv r3, r4, r5
+@ THUMB: error: instruction requires: divide in THUMB
+@ THUMB: sdiv r1, r2, r3
+@ THUMB: error: instruction requires: divide in THUMB
+@ THUMB: udiv r3, r4, r5
diff --git a/test/MC/ARM/invalid-neon-v8.s b/test/MC/ARM/invalid-neon-v8.s
new file mode 100644
index 0000000..361946d
--- /dev/null
+++ b/test/MC/ARM/invalid-neon-v8.s
@@ -0,0 +1,70 @@
+@ RUN: not llvm-mc -triple armv8 -mattr=-fp-armv8 -show-encoding < %s 2>&1 | FileCheck %s
+
+vmaxnm.f32 s4, d5, q1
+@ CHECK: error: invalid operand for instruction
+vmaxnm.f64.f64 s4, d5, q1
+@ CHECK: error: invalid operand for instruction
+vmaxnmge.f64.f64 s4, d5, q1
+@ CHECK: error: instruction 'vmaxnm' is not predicable, but condition code specified
+
+vcvta.s32.f32 s1, s2
+@ CHECK: error: instruction requires: FPARMv8
+vcvtp.u32.f32 s1, d2
+@ CHECK: error: invalid operand for instruction
+vcvtp.f32.u32 d1, q2
+@ CHECK: error: invalid operand for instruction
+vcvtplo.f32.u32 s1, s2
+@ CHECK: error: instruction 'vcvtp' is not predicable, but condition code specified
+
+vrinta.f64.f64 s3, d12
+@ CHECK: error: invalid operand for instruction
+vrintn.f32 d3, q12
+@ CHECK: error: invalid operand for instruction
+vrintz.f32 d3, q12
+@ CHECK: error: invalid operand for instruction
+vrintmge.f32.f32 d3, d4
+@ CHECK: error: instruction 'vrintm' is not predicable, but condition code specified
+
+aesd.8  q0, s1
+@ CHECK: error: invalid operand for instruction
+aese.8  s0, q1
+@ CHECK: error: invalid operand for instruction
+aesimc.8  s0, q1
+@ CHECK: error: invalid operand for instruction
+aesmc.8  q0, d1
+@ CHECK: error: invalid operand for instruction
+aesdge.8 q0, q1
+@ CHECK: error: instruction 'aesd' is not predicable, but condition code specified
+
+sha1h.32  d0, q1
+@ CHECK: error: invalid operand for instruction
+sha1su1.32  q0, s1
+@ CHECK: error: invalid operand for instruction
+sha256su0.32  s0, q1
+@ CHECK: error: invalid operand for instruction
+sha1heq.32  q0, q1
+@ CHECK: error: instruction 'sha1h' is not predicable, but condition code specified
+
+sha1c.32  s0, d1, q2
+@ CHECK: error: invalid operand for instruction
+sha1m.32  q0, s1, q2
+@ CHECK: error: invalid operand for instruction
+sha1p.32  s0, q1, q2
+@ CHECK: error: invalid operand for instruction
+sha1su0.32  d0, q1, q2
+@ CHECK: error: invalid operand for instruction
+sha256h.32  q0, s1, q2
+@ CHECK: error: invalid operand for instruction
+sha256h2.32  q0, q1, s2
+@ CHECK: error: invalid operand for instruction
+sha256su1.32  s0, d1, q2
+@ CHECK: error: invalid operand for instruction
+sha256su1lt.32  q0, d1, q2
+@ CHECK: error: instruction 'sha256su1' is not predicable, but condition code specified
+
+vmull.p64 q0, s1, s3
+@ CHECK: error: invalid operand for instruction
+vmull.p64 s1, d2, d3
+@ CHECK: error: invalid operand for instruction
+vmullge.p64 q0, d16, d17
+@ CHECK: error: instruction 'vmull' is not predicable, but condition code specified
diff --git a/test/MC/ARM/invalid-v8fp.s b/test/MC/ARM/invalid-v8fp.s
deleted file mode 100644
index 4dff188..0000000
--- a/test/MC/ARM/invalid-v8fp.s
+++ /dev/null
@@ -1,10 +0,0 @@
-@ RUN: not llvm-mc -triple armv7 -show-encoding < %s | FileCheck %s
-
-@ VCVT{B,T}
-
-  vcvtt.f64.f16 d3, s1
-@ CHECK-NOT: vcvtt.f64.f16 d3, s1      @ encoding: [0xe0,0x3b,0xb2,0xee]
-  vcvtt.f16.f64 s5, d12
-@ CHECK-NOT: vcvtt.f16.f64 s5, d12     @ encoding: [0xcc,0x2b,0xf3,0xee]
-
-
diff --git a/test/MC/ARM/lit.local.cfg b/test/MC/ARM/lit.local.cfg
index 5700913..8a3ba96 100644
--- a/test/MC/ARM/lit.local.cfg
+++ b/test/MC/ARM/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
     config.unsupported = True
diff --git a/test/MC/ARM/load-store-acquire-release-v8-thumb.s b/test/MC/ARM/load-store-acquire-release-v8-thumb.s
new file mode 100644
index 0000000..e34a263
--- /dev/null
+++ b/test/MC/ARM/load-store-acquire-release-v8-thumb.s
@@ -0,0 +1,48 @@
+@ RUN: llvm-mc -triple=thumbv8 -show-encoding < %s | FileCheck %s
+@ RUN: not llvm-mc -triple=thumbv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V7
+        ldaexb  r3, [r4]
+        ldaexh  r2, [r5]
+        ldaex  r1, [r7]
+        ldaexd  r6, r7, [r8]
+
+@ CHECK:  ldaexb	r3, [r4]                @ encoding: [0xd4,0xe8,0xcf,0x3f]
+@ CHECK:  ldaexh	r2, [r5]                @ encoding: [0xd5,0xe8,0xdf,0x2f]
+@ CHECK:  ldaex	r1, [r7]                @ encoding: [0xd7,0xe8,0xef,0x1f]
+@ CHECK:  ldaexd	r6, r7, [r8]            @ encoding: [0xd8,0xe8,0xff,0x67]
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+
+        stlexb  r1, r3, [r4]
+        stlexh  r4, r2, [r5]
+        stlex  r2, r1, [r7]
+        stlexd  r6, r2, r3, [r8]
+@ CHECK: stlexb r1, r3, [r4]            @ encoding: [0xc4,0xe8,0xc1,0x3f]
+@ CHECK: stlexh r4, r2, [r5]            @ encoding: [0xc5,0xe8,0xd4,0x2f]
+@ CHECK: stlex r2, r1, [r7]            @ encoding: [0xc7,0xe8,0xe2,0x1f]
+@ CHECK: stlexd r6, r2, r3, [r8]        @ encoding: [0xc8,0xe8,0xf6,0x23]
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+
+         lda r5, [r6]
+         ldab r5, [r6]
+         ldah r12, [r9]
+@ CHECK: lda r5, [r6]                   @ encoding: [0xd6,0xe8,0xaf,0x5f]
+@ CHECK: ldab r5, [r6]                  @ encoding: [0xd6,0xe8,0x8f,0x5f]
+@ CHECK: ldah r12, [r9]                 @ encoding: [0xd9,0xe8,0x9f,0xcf]
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+
+         stl r3, [r0]
+         stlb r2, [r1]
+         stlh r2, [r3]
+@ CHECK: stl r3, [r0]                   @ encoding: [0xc0,0xe8,0xaf,0x3f]
+@ CHECK: stlb r2, [r1]                  @ encoding: [0xc1,0xe8,0x8f,0x2f]
+@ CHECK: stlh r2, [r3]                  @ encoding: [0xc3,0xe8,0x9f,0x2f]
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
diff --git a/test/MC/ARM/load-store-acquire-release-v8.s b/test/MC/ARM/load-store-acquire-release-v8.s
new file mode 100644
index 0000000..bc55364
--- /dev/null
+++ b/test/MC/ARM/load-store-acquire-release-v8.s
@@ -0,0 +1,48 @@
+@ RUN: llvm-mc -triple=armv8 -show-encoding < %s | FileCheck %s
+@ RUN: not llvm-mc -triple=armv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V7
+        ldaexb  r3, [r4]
+        ldaexh  r2, [r5]
+        ldaex  r1, [r7]
+        ldaexd  r6, r7, [r8]
+
+@ CHECK: ldaexb r3, [r4]                @ encoding: [0x9f,0x3e,0xd4,0xe1]
+@ CHECK: ldaexh r2, [r5]                @ encoding: [0x9f,0x2e,0xf5,0xe1]
+@ CHECK: ldaex r1, [r7]                @ encoding: [0x9f,0x1e,0x97,0xe1]
+@ CHECK: ldaexd r6, r7, [r8]            @ encoding: [0x9f,0x6e,0xb8,0xe1]
+@ CHECK-V7: instruction requires: armv8
+@ CHECK-V7: instruction requires: armv8
+@ CHECK-V7: instruction requires: armv8
+@ CHECK-V7: instruction requires: armv8
+
+        stlexb  r1, r3, [r4]
+        stlexh  r4, r2, [r5]
+        stlex  r2, r1, [r7]
+        stlexd  r6, r2, r3, [r8]
+@ CHECK: stlexb r1, r3, [r4]            @ encoding: [0x93,0x1e,0xc4,0xe1]
+@ CHECK: stlexh r4, r2, [r5]            @ encoding: [0x92,0x4e,0xe5,0xe1]
+@ CHECK: stlex r2, r1, [r7]            @ encoding: [0x91,0x2e,0x87,0xe1]
+@ CHECK: stlexd r6, r2, r3, [r8]        @ encoding: [0x92,0x6e,0xa8,0xe1]
+@ CHECK-V7: instruction requires: armv8
+@ CHECK-V7: instruction requires: armv8
+@ CHECK-V7: instruction requires: armv8
+@ CHECK-V7: instruction requires: armv8
+
+         lda r5, [r6]
+         ldab r5, [r6]
+         ldah r12, [r9]
+@ CHECK: lda r5, [r6]                   @ encoding: [0x9f,0x5c,0x96,0xe1]
+@ CHECK: ldab r5, [r6]                  @ encoding: [0x9f,0x5c,0xd6,0xe1]
+@ CHECK: ldah r12, [r9]                 @ encoding: [0x9f,0xcc,0xf9,0xe1]
+@ CHECK-V7: instruction requires: armv8
+@ CHECK-V7: instruction requires: armv8
+@ CHECK-V7: instruction requires: armv8
+
+         stl r3, [r0]
+         stlb r2, [r1]
+         stlh r2, [r3]
+@ CHECK: stl r3, [r0]                   @ encoding: [0x93,0xfc,0x80,0xe1]
+@ CHECK: stlb r2, [r1]                  @ encoding: [0x92,0xfc,0xc1,0xe1]
+@ CHECK: stlh r2, [r3]                  @ encoding: [0x92,0xfc,0xe3,0xe1]
+@ CHECK-V7: instruction requires: armv8
+@ CHECK-V7: instruction requires: armv8
+@ CHECK-V7: instruction requires: armv8
diff --git a/test/MC/ARM/neon-convert-encoding.s b/test/MC/ARM/neon-convert-encoding.s
index 1733c52..20c7895 100644
--- a/test/MC/ARM/neon-convert-encoding.s
+++ b/test/MC/ARM/neon-convert-encoding.s
@@ -18,20 +18,36 @@
 	vcvt.f32.u32	q8, q8
 @ CHECK: vcvt.s32.f32	d16, d16, #1    @ encoding: [0x30,0x0f,0xff,0xf2]
 	vcvt.s32.f32	d16, d16, #1
+@ CHECK: vcvt.s32.f32	d16, d16        @ encoding: [0x20,0x07,0xfb,0xf3]
+	vcvt.s32.f32	d16, d16, #0
 @ CHECK: vcvt.u32.f32	d16, d16, #1    @ encoding: [0x30,0x0f,0xff,0xf3]
 	vcvt.u32.f32	d16, d16, #1
+@ CHECK: vcvt.u32.f32	d16, d16        @ encoding: [0xa0,0x07,0xfb,0xf3]
+	vcvt.u32.f32	d16, d16, #0
 @ CHECK: vcvt.f32.s32	d16, d16, #1    @ encoding: [0x30,0x0e,0xff,0xf2]
 	vcvt.f32.s32	d16, d16, #1
+@ CHECK: vcvt.f32.s32	d16, d16        @ encoding: [0x20,0x06,0xfb,0xf3]
+	vcvt.f32.s32	d16, d16, #0
 @ CHECK: vcvt.f32.u32	d16, d16, #1    @ encoding: [0x30,0x0e,0xff,0xf3]
 	vcvt.f32.u32	d16, d16, #1
+@ CHECK: vcvt.f32.u32	d16, d16        @ encoding: [0xa0,0x06,0xfb,0xf3]
+	vcvt.f32.u32	d16, d16, #0
 @ CHECK: vcvt.s32.f32	q8, q8, #1      @ encoding: [0x70,0x0f,0xff,0xf2]
 	vcvt.s32.f32	q8, q8, #1
+@ CHECK: vcvt.s32.f32	q8, q8          @ encoding: [0x60,0x07,0xfb,0xf3]
+	vcvt.s32.f32	q8, q8, #0
 @ CHECK: vcvt.u32.f32	q8, q8, #1      @ encoding: [0x70,0x0f,0xff,0xf3]
 	vcvt.u32.f32	q8, q8, #1
+@ CHECK: vcvt.u32.f32	q8, q8          @ encoding: [0xe0,0x07,0xfb,0xf3]
+	vcvt.u32.f32	q8, q8, #0
 @ CHECK: vcvt.f32.s32	q8, q8, #1      @ encoding: [0x70,0x0e,0xff,0xf2]
 	vcvt.f32.s32	q8, q8, #1
+@ CHECK: vcvt.f32.s32	q8, q8          @ encoding: [0x60,0x06,0xfb,0xf3]
+	vcvt.f32.s32	q8, q8, #0
 @ CHECK: vcvt.f32.u32	q8, q8, #1      @ encoding: [0x70,0x0e,0xff,0xf3]
 	vcvt.f32.u32	q8, q8, #1
+@ CHECK: vcvt.f32.u32	q8, q8          @ encoding: [0xe0,0x06,0xfb,0xf3]
+	vcvt.f32.u32	q8, q8, #0
 @ CHECK: vcvt.f32.f16	q8, d16         @ encoding: [0x20,0x07,0xf6,0xf3]
 	vcvt.f32.f16	q8, d16
 @ CHECK: vcvt.f16.f32	d16, q8         @ encoding: [0x20,0x06,0xf6,0xf3]
diff --git a/test/MC/ARM/neon-crypto.s b/test/MC/ARM/neon-crypto.s
new file mode 100644
index 0000000..92d24da
--- /dev/null
+++ b/test/MC/ARM/neon-crypto.s
@@ -0,0 +1,51 @@
+@ RUN: llvm-mc -triple armv8 -mattr=+neon,+crypto -show-encoding < %s | FileCheck %s
+@ RUN: not llvm-mc -triple=armv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V7
+
+aesd.8  q0, q1
+aese.8  q0, q1
+aesimc.8  q0, q1
+aesmc.8  q0, q1
+@ CHECK: aesd.8 q0, q1          @ encoding: [0x42,0x03,0xb0,0xf3]
+@ CHECK: aese.8 q0, q1          @ encoding: [0x02,0x03,0xb0,0xf3]
+@ CHECK: aesimc.8 q0, q1        @ encoding: [0xc2,0x03,0xb0,0xf3]
+@ CHECK: aesmc.8 q0, q1         @ encoding: [0x82,0x03,0xb0,0xf3]
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+
+sha1h.32  q0, q1
+sha1su1.32  q0, q1
+sha256su0.32  q0, q1
+@ CHECK: sha1h.32  q0, q1       @ encoding: [0xc2,0x02,0xb9,0xf3]
+@ CHECK: sha1su1.32 q0, q1      @ encoding: [0x82,0x03,0xba,0xf3]
+@ CHECK: sha256su0.32 q0, q1    @ encoding: [0xc2,0x03,0xba,0xf3]
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+
+sha1c.32  q0, q1, q2
+sha1m.32  q0, q1, q2
+sha1p.32  q0, q1, q2
+sha1su0.32  q0, q1, q2
+sha256h.32  q0, q1, q2
+sha256h2.32  q0, q1, q2
+sha256su1.32  q0, q1, q2
+@ CHECK: sha1c.32  q0, q1, q2   @ encoding: [0x44,0x0c,0x02,0xf2]
+@ CHECK: sha1m.32  q0, q1, q2   @ encoding: [0x44,0x0c,0x22,0xf2]
+@ CHECK: sha1p.32 q0, q1, q2    @ encoding: [0x44,0x0c,0x12,0xf2]
+@ CHECK: sha1su0.32  q0, q1, q2      @ encoding: [0x44,0x0c,0x32,0xf2]
+@ CHECK: sha256h.32  q0, q1, q2      @ encoding: [0x44,0x0c,0x02,0xf3]
+@ CHECK: sha256h2.32 q0, q1, q2      @ encoding: [0x44,0x0c,0x12,0xf3]
+@ CHECK: sha256su1.32 q0, q1, q2     @ encoding: [0x44,0x0c,0x22,0xf3]
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+
+vmull.p64 q8, d16, d17
+@ CHECK: vmull.p64  q8, d16, d17    @ encoding: [0xa1,0x0e,0xe0,0xf2]
+@ CHECK-V7: instruction requires: crypto armv8
diff --git a/test/MC/ARM/obsolete-v8.s b/test/MC/ARM/obsolete-v8.s
new file mode 100644
index 0000000..0d6176b
--- /dev/null
+++ b/test/MC/ARM/obsolete-v8.s
@@ -0,0 +1,7 @@
+@ RUN: not llvm-mc -triple=armv8 < %s 2>&1 | FileCheck %s
+
+swp r0, r1, [r2]
+@ CHECK: instruction requires: armv7 or earlier
+
+swpb r0, r1, [r2]
+@ CHECK: instruction requires: armv7 or earlier
diff --git a/test/MC/ARM/single-precision-fp.s b/test/MC/ARM/single-precision-fp.s
new file mode 100644
index 0000000..2ed0cfe
--- /dev/null
+++ b/test/MC/ARM/single-precision-fp.s
@@ -0,0 +1,194 @@
+@ RUN: not llvm-mc < %s -triple thumbv8-unknown-unknown -show-encoding -mattr=+fp-only-sp,-neon 2> %t > %t2
+@ RUN:     FileCheck %s < %t --check-prefix=CHECK-ERRORS
+@ RUN:     FileCheck %s < %t2
+
+        vadd.f64 d0, d1, d2
+        vsub.f64 d2, d3, d4
+        vdiv.f64 d4, d5, d6
+        vmul.f64 d6, d7, d8
+        vnmul.f64 d8, d9, d10
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vadd.f64 d0, d1, d2
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vsub.f64 d2, d3, d4
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vdiv.f64 d4, d5, d6
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vmul.f64 d6, d7, d8
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vnmul.f64 d8, d9, d10
+
+        vmla.f64 d11, d10, d9
+        vmls.f64 d8, d7, d6
+        vnmla.f64 d5, d4, d3
+        vnmls.f64 d2, d1, d0
+        vfma.f64 d1, d2, d3
+        vfms.f64 d4, d5, d6
+        vfnma.f64 d7, d8, d9
+        vfnms.f64 d10, d11, d12
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vmla.f64 d11, d10, d9
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vmls.f64 d8, d7, d6
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vnmla.f64 d5, d4, d3
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vnmls.f64 d2, d1, d0
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vfma.f64 d1, d2, d3
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vfms.f64 d4, d5, d6
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vfnma.f64 d7, d8, d9
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vfnms.f64 d10, d11, d12
+
+        vneg.f64 d15, d14
+        vsqrt.f64 d13, d12
+        vsqrt d13, d14
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vneg.f64 d15, d14
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vsqrt.f64 d13, d12
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vsqrt d13, d14
+
+        vcmpe.f64 d0, d1
+        vcmp.f64 d2, d3
+        vabs.f64 d4, d5
+        vcmpe.f64 d5, #0
+        vcmp.f64 d6, #0
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcmpe.f64 d0, d1
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcmp.f64 d2, d3
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vabs.f64 d4, d5
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcmpe.f64 d5, #0
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcmp.f64 d6, #0
+
+        @ FIXME: overlapping aliases and a probable TableGen indeterminacy mean
+        @ that the actual reason can vary by platform.
+        vmov.f64 d11, d10
+@ CHECK-ERRORS: error: instruction requires:
+@ CHECK-ERRORS-NEXT: vmov.f64 d11, d10
+
+        vcvt.f64.s32 d9, s8
+        vcvt.f64.u32 d7, s6
+        vcvt.s32.f64 s5, d4
+        vcvt.u32.f64 s3, d2
+        vcvtr.s32.f64 s1, d0
+        vcvtr.u32.f64 s1, d2
+        vcvt.s16.f64 d3, d4, #1
+        vcvt.u16.f64 d5, d6, #2
+        vcvt.s32.f64 d7, d8, #3
+        vcvt.u32.f64 d9, d10, #4
+        vcvt.f64.s16 d11, d12, #3
+        vcvt.f64.u16 d13, d14, #2
+        vcvt.f64.s32 d15, d14, #1
+        vcvt.f64.u32 d13, d12, #1
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.f64.s32 d9, s8
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.f64.u32 d7, s6
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.s32.f64 s5, d4
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.u32.f64 s3, d2
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvtr.s32.f64 s1, d0
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvtr.u32.f64 s1, d2
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.s16.f64 d3, d4, #1
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.u16.f64 d5, d6, #2
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.s32.f64 d7, d8, #3
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.u32.f64 d9, d10, #4
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.f64.s16 d11, d12, #3
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.f64.u16 d13, d14, #2
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.f64.s32 d15, d14, #1
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.f64.u32 d13, d12, #1
+
+        @ v8 operations, also double precision so make sure they're rejected.
+        vselgt.f64 d0, d1, d2
+        vselge.f64 d3, d4, d5
+        vseleq.f64 d6, d7, d8
+        vselvs.f64 d9, d10, d11
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vselgt.f64 d0, d1, d2
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vselge.f64 d3, d4, d5
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vseleq.f64 d6, d7, d8
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vselvs.f64 d9, d10, d11
+
+        vmaxnm.f64 d12, d13, d14
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vmaxnm.f64 d12, d13, d14
+
+        vcvtb.f64.f16 d7, s8
+        vcvtb.f16.f64 s9, d10
+        vcvtt.f64.f16 d11, s12
+        vcvtt.f16.f64 s13, d14
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvtb.f64.f16 d7, s8
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvtb.f16.f64 s9, d10
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvtt.f64.f16 d11, s12
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvtt.f16.f64 s13, d14
+
+        vrintz.f64 d15, d14
+        vrintr.f64.f64 d13, d12
+        vrintx.f64 d11, d10
+        vrinta.f64.f64 d9, d8
+        vrintn.f64 d7, d6
+        vrintp.f64.f64 d5, d4
+        vrintm.f64 d3, d2
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vrintz.f64 d15, d14
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vrintr.f64.f64 d13, d12
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vrintx.f64 d11, d10
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vrinta.f64.f64 d9, d8
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vrintn.f64 d7, d6
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vrintp.f64.f64 d5, d4
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vrintm.f64 d3, d2
+
+        @ Double precisionish operations that actually *are* allowed.
+        vldr d0, [sp]
+        vstr d3, [sp]
+        vldm r0, {d0, d1}
+        vstm r4, {d3, d4}
+        vpush {d6, d7}
+        vpop {d8, d9}
+        vmov r1, r0, d1
+        vmov d2, r3, r4
+        vmov.f64 r5, r6, d7
+        vmov.f64 d8, r9, r10
+@ CHECK: vldr d0, [sp]
+@ CHECK: vstr d3, [sp]
+@ CHECK: vldmia r0, {d0, d1}
+@ CHECK: vstmia r4, {d3, d4}
+@ CHECK: vpush {d6, d7}
+@ CHECK: vpop {d8, d9}
+@ CHECK: vmov r1, r0, d1
+@ CHECK: vmov d2, r3, r4
+@ CHECK: vmov r5, r6, d7
+@ CHECK: vmov d8, r9, r10
diff --git a/test/MC/ARM/thumb-diagnostics.s b/test/MC/ARM/thumb-diagnostics.s
index a194ab4..19d17c2 100644
--- a/test/MC/ARM/thumb-diagnostics.s
+++ b/test/MC/ARM/thumb-diagnostics.s
@@ -2,6 +2,8 @@
 @ RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
 @ RUN: not llvm-mc -triple=thumbv5-apple-darwin < %s 2> %t
 @ RUN: FileCheck --check-prefix=CHECK-ERRORS-V5 < %t %s
+@ RUN: not llvm-mc -triple=thumbv8 < %s 2> %t
+@ RUN: FileCheck --check-prefix=CHECK-ERRORS-V8 < %t %s
 
 @ Check for various assembly diagnostic messages on invalid input.
 
@@ -38,10 +40,25 @@ error: invalid operand for instruction
         bkpt #-1
              ^
 
+@ Out of range immediates for v8 HLT instruction.
+        hlt #64
+        hlt #-1
+@CHECK-ERRORS: error: instruction requires: armv8 arm-mode
+@CHECK-ERRORS:        hlt #64
+@CHECK-ERRORS:        ^
+@CHECK-ERRORS-V8: error: instruction requires: arm-mode
+@CHECK-ERRORS-V8:         hlt #64
+@CHECK-ERRORS-V8:              ^
+@CHECK-ERRORS: error: invalid operand for instruction
+@CHECK-ERRORS:         hlt #-1
+@CHECK-ERRORS:              ^
+
 @ Invalid writeback and register lists for LDM
         ldm r2!, {r5, r8}
         ldm r2, {r5, r7}
         ldm r2!, {r2, r3, r4}
+        ldm r2!, {r2, r3, r4, r10}
+        ldmdb r2!, {r2, r3, r4}
 @ CHECK-ERRORS: error: registers must be in range r0-r7
 @ CHECK-ERRORS:         ldm r2!, {r5, r8}
 @ CHECK-ERRORS:                  ^
@@ -51,7 +68,12 @@ error: invalid operand for instruction
 @ CHECK-ERRORS: error: writeback operator '!' not allowed when base register in register list
 @ CHECK-ERRORS:         ldm r2!, {r2, r3, r4}
 @ CHECK-ERRORS:               ^
-
+@ CHECK-ERRORS-V8: error: writeback operator '!' not allowed when base register in register list
+@ CHECK-ERRORS-V8:         ldm r2!, {r2, r3, r4, r10}
+@ CHECK-ERRORS-V8:               ^
+@ CHECK-ERRORS-V8: error: writeback register not allowed in register list
+@ CHECK-ERRORS-V8:         ldmdb r2!, {r2, r3, r4}
+@ CHECK-ERRORS-V8:                 ^
 
 @ Invalid writeback and register lists for PUSH/POP
         pop {r1, r2, r10}
@@ -67,12 +89,20 @@ error: invalid operand for instruction
 @ Invalid writeback and register lists for STM
         stm r1, {r2, r6}
         stm r1!, {r2, r9}
+        stm r2!, {r2, r9}
+        stmdb r2!, {r0, r2}
 @ CHECK-ERRORS: error: instruction requires: thumb2
 @ CHECK-ERRORS:         stm r1, {r2, r6}
 @ CHECK-ERRORS:         ^
 @ CHECK-ERRORS: error: registers must be in range r0-r7
 @ CHECK-ERRORS:         stm r1!, {r2, r9}
 @ CHECK-ERRORS:                  ^
+@ CHECK-ERRORS-V8: error: writeback operator '!' not allowed when base register in register list
+@ CHECK-ERRORS-V8:         stm r2!, {r2, r9}
+@ CHECK-ERRORS-V8:                  ^
+@ CHECK-ERRORS-V8: error: writeback register not allowed in register list
+@ CHECK-ERRORS-V8:         stmdb r2!, {r0, r2}
+@ CHECK-ERRORS-V8:                  ^
 
 @ Out of range immediates for LSL instruction.
         lsls r4, r5, #-1
@@ -138,7 +168,26 @@ error: invalid operand for instruction
 @ CHECK-ERRORS: error: source register must be the same as destination
 @ CHECK-ERRORS:         add r2, sp, ip
 @ CHECK-ERRORS:                     ^
- 
+
+
+@------------------------------------------------------------------------------
+@ B/Bcc - out of range immediates for Thumb1 branches
+@------------------------------------------------------------------------------
+
+        beq    #-258
+        bne    #256
+        bgt    #13
+        b      #-1048578
+        b      #1048576
+        b      #10323
+
+@ CHECK-ERRORS: error: branch target out of range
+@ CHECK-ERRORS: error: branch target out of range
+@ CHECK-ERRORS: error: branch target out of range
+@ CHECK-ERRORS: error: branch target out of range
+@ CHECK-ERRORS: error: branch target out of range
+@ CHECK-ERRORS: error: branch target out of range
+
 @------------------------------------------------------------------------------
 @ WFE/WFI/YIELD - are not supported pre v6T2
 @------------------------------------------------------------------------------
@@ -146,13 +195,13 @@ error: invalid operand for instruction
         wfi
         yield
 
-@ CHECK-ERRORS: error: instruction requires: thumb2
+@ CHECK-ERRORS: error: instruction requires: armv6m or armv6t2
 @ CHECK-ERRORS: wfe
 @ CHECK-ERRORS: ^
-@ CHECK-ERRORS: error: instruction requires: thumb2
+@ CHECK-ERRORS: error: instruction requires: armv6m or armv6t2
 @ CHECK-ERRORS: wfi
 @ CHECK-ERRORS: ^
-@ CHECK-ERRORS: error: instruction requires: thumb2
+@ CHECK-ERRORS: error: instruction requires: armv6m or armv6t2
 @ CHECK-ERRORS: yield
 @ CHECK-ERRORS: ^
 
@@ -161,3 +210,11 @@ error: invalid operand for instruction
 @------------------------------------------------------------------------------
         pldw [r0, #4]
 @ CHECK-ERRORS: error: instruction requires: mp-extensions
+
+@------------------------------------------------------------------------------
+@ LDR(lit) - invalid offsets
+@------------------------------------------------------------------------------
+
+        ldr r4, [pc, #-12]
+@ CHECK-ERRORS: error: instruction requires: thumb2
+
diff --git a/test/MC/ARM/thumb-v8fp.s b/test/MC/ARM/thumb-fp-armv8.s
index 50cd005..a730fa2 100644
--- a/test/MC/ARM/thumb-v8fp.s
+++ b/test/MC/ARM/thumb-fp-armv8.s
@@ -1,4 +1,4 @@
-@ RUN: llvm-mc -triple thumbv8 -mattr=+v8fp -show-encoding < %s | FileCheck %s
+@ RUN: llvm-mc -triple thumbv8 -mattr=+fp-armv8 -show-encoding < %s | FileCheck %s
 
 @ VCVT{B,T}
 
diff --git a/test/MC/ARM/thumb-hints.s b/test/MC/ARM/thumb-hints.s
new file mode 100644
index 0000000..b3c4cee
--- /dev/null
+++ b/test/MC/ARM/thumb-hints.s
@@ -0,0 +1,64 @@
+@ RUN: llvm-mc -triple=thumbv7-apple-darwin -show-encoding < %s | FileCheck %s
+@ RUN: llvm-mc -triple=thumbv6-apple-darwin -mcpu=cortex-m0 -show-encoding < %s | FileCheck %s 
+@ RUN: not llvm-mc -triple=thumbv6-apple-darwin -show-encoding < %s > %t 2> %t2
+@ RUN: FileCheck %s --check-prefix=CHECK-EVIL-PRE-UAL < %t
+@ RUN: FileCheck %s --check-prefix CHECK-ERROR < %t2
+
+  .syntax unified
+
+        nop
+        yield
+        wfe
+        wfi
+        sev
+@ CHECK: nop                            @ encoding: [0x00,0xbf]
+@ CHECK: yield                          @ encoding: [0x10,0xbf]
+@ CHECK: wfe                            @ encoding: [0x20,0xbf]
+@ CHECK: wfi                            @ encoding: [0x30,0xbf]
+@ CHECK: sev                            @ encoding: [0x40,0xbf]
+
+@ CHECK-EVIL-PRE-UAL: mov r8, r8                     @ encoding: [0xc0,0x46]
+
+        dmb sy
+        dmb
+        dsb sy
+        dsb
+        isb sy
+        isb
+@ CHECK: dmb	sy                      @ encoding: [0xbf,0xf3,0x5f,0x8f]
+@ CHECK: dmb	sy                      @ encoding: [0xbf,0xf3,0x5f,0x8f]
+@ CHECK: dsb	sy                      @ encoding: [0xbf,0xf3,0x4f,0x8f]
+@ CHECK: dsb	sy                      @ encoding: [0xbf,0xf3,0x4f,0x8f]
+@ CHECK: isb	sy                      @ encoding: [0xbf,0xf3,0x6f,0x8f]
+@ CHECK: isb	sy                      @ encoding: [0xbf,0xf3,0x6f,0x8f]
+
+
+@ CHECK-ERROR: error: instruction requires: armv6m or armv6t2
+@ CHECK-ERROR-NEXT: yield
+
+@ CHECK-ERROR: error: instruction requires: armv6m or armv6t2
+@ CHECK-ERROR-NEXT: wfe
+
+@ CHECK-ERROR: error: instruction requires: armv6m or armv6t2
+@ CHECK-ERROR-NEXT: wfi
+
+@ CHECK-ERROR: error: instruction requires: armv6m or armv6t2
+@ CHECK-ERROR-NEXT: sev
+
+@ CHECK-ERROR: error:
+@ CHECK-ERROR-NEXT: dmb sy
+
+@ CHECK-ERROR: error: instruction requires: data-barriers
+@ CHECK-ERROR-NEXT: dmb
+
+@ CHECK-ERROR: error:
+@ CHECK-ERROR-NEXT: dsb sy
+
+@ CHECK-ERROR: error: instruction requires: data-barriers
+@ CHECK-ERROR-NEXT: dsb
+
+@ CHECK-ERROR: error:
+@ CHECK-ERROR-NEXT: isb sy
+
+@ CHECK-ERROR: error: instruction requires: data-barriers
+@ CHECK-ERROR-NEXT: isb
diff --git a/test/MC/ARM/thumb-invalid-crypto.txt b/test/MC/ARM/thumb-invalid-crypto.txt
new file mode 100644
index 0000000..a5f9a19
--- /dev/null
+++ b/test/MC/ARM/thumb-invalid-crypto.txt
@@ -0,0 +1,42 @@
+@ RUN: not llvm-mc -triple thumbv8 -mattr=+neon,+crypto -show-encoding < %s 2>&1 | FileCheck %s
+
+iteee lo
+aesdlo.8  q0, q1
+@ CHECK: error: instruction 'aesd' is not predicable, but condition code specified
+aesimchs.8  q0, q1
+@ CHECK: error: instruction 'aesimc' is not predicable, but condition code specified
+aesmchs.8  q0, q1
+@ CHECK: error: instruction 'aesmc' is not predicable, but condition code specified
+aesehs.8 q0, q1
+@ CHECK: error: instruction 'aese' is not predicable, but condition code specified
+
+itee hs
+sha1hhs.32  q0, q1
+@ CHECK: error: instruction 'sha1h' is not predicable, but condition code specified
+sha1su1lo.32  q0, q1
+@ CHECK: error: instruction 'sha1su1' is not predicable, but condition code specified
+sha256su0lo.32  q0, q1
+@ CHECK: error: instruction 'sha256su0' is not predicable, but condition code specified
+
+iteee lo
+sha1clo.32  s0, d1, q2
+@ CHECK: error: instruction 'sha1c' is not predicable, but condition code specified
+sha1mhs.32  q0, s1, q2
+@ CHECK: error: instruction 'sha1m' is not predicable, but condition code specified
+sha1phs.32  s0, q1, q2
+@ CHECK: error: instruction 'sha1p' is not predicable, but condition code specified
+sha1su0hs.32  d0, q1, q2
+@ CHECK: error: instruction 'sha1su0' is not predicable, but condition code specified
+itee hs
+sha256hhs.32  q0, s1, q2
+@ CHECK: error: instruction 'sha256h' is not predicable, but condition code specified
+sha256h2lo.32  q0, q1, s2
+@ CHECK: error: instruction 'sha256h2' is not predicable, but condition code specified
+sha256su1lo.32  s0, d1, q2
+@ CHECK: error: instruction 'sha256su1' is not predicable, but condition code specified
+
+ite lo
+vmulllo.p64 q0, s1, s3
+@ CHECK: error: instruction 'vmull' is not predicable, but condition code specified
+vmullhs.p64 q0, d16, d17
+@ CHECK: error: instruction 'vmull' is not predicable, but condition code specified
diff --git a/test/MC/ARM/thumb-neon-crypto.s b/test/MC/ARM/thumb-neon-crypto.s
new file mode 100644
index 0000000..096e9e8
--- /dev/null
+++ b/test/MC/ARM/thumb-neon-crypto.s
@@ -0,0 +1,35 @@
+@ RUN: llvm-mc -triple thumbv8 -mattr=+neon,+crypto -show-encoding < %s | FileCheck %s
+
+aesd.8  q0, q1
+@ CHECK: aesd.8  q0, q1         @ encoding: [0xb0,0xff,0x42,0x03]
+aese.8  q0, q1
+@ CHECK: aese.8 q0, q1          @ encoding: [0xb0,0xff,0x02,0x03]
+aesimc.8  q0, q1
+@ CHECK: aesimc.8 q0, q1        @ encoding: [0xb0,0xff,0xc2,0x03]
+aesmc.8  q0, q1
+@ CHECK: aesmc.8 q0, q1         @ encoding: [0xb0,0xff,0x82,0x03]
+
+sha1h.32  q0, q1
+@ CHECK: sha1h.32  q0, q1       @ encoding: [0xb9,0xff,0xc2,0x02]
+sha1su1.32  q0, q1
+@ CHECK: sha1su1.32 q0, q1      @ encoding: [0xba,0xff,0x82,0x03]
+sha256su0.32  q0, q1
+@ CHECK: sha256su0.32 q0, q1    @ encoding: [0xba,0xff,0xc2,0x03]
+
+sha1c.32  q0, q1, q2
+@ CHECK: sha1c.32  q0, q1, q2   @ encoding: [0x02,0xef,0x44,0x0c]
+sha1m.32  q0, q1, q2
+@ CHECK: sha1m.32  q0, q1, q2   @ encoding: [0x22,0xef,0x44,0x0c]
+sha1p.32  q0, q1, q2
+@ CHECK: sha1p.32 q0, q1, q2    @ encoding: [0x12,0xef,0x44,0x0c]
+sha1su0.32  q0, q1, q2
+@ CHECK: sha1su0.32  q0, q1, q2      @ encoding: [0x32,0xef,0x44,0x0c]
+sha256h.32  q0, q1, q2
+@ CHECK: sha256h.32  q0, q1, q2      @ encoding: [0x02,0xff,0x44,0x0c]
+sha256h2.32  q0, q1, q2
+@ CHECK: sha256h2.32 q0, q1, q2      @ encoding: [0x12,0xff,0x44,0x0c]
+sha256su1.32  q0, q1, q2
+@ CHECK: sha256su1.32 q0, q1, q2     @ encoding: [0x22,0xff,0x44,0x0c]
+
+vmull.p64 q8, d16, d17
+@ CHECK: vmull.p64  q8, d16, d17    @ encoding: [0xe0,0xef,0xa1,0x0e]
diff --git a/test/MC/ARM/thumb-nop.s b/test/MC/ARM/thumb-nop.s
deleted file mode 100644
index 66f61a6..0000000
--- a/test/MC/ARM/thumb-nop.s
+++ /dev/null
@@ -1,9 +0,0 @@
-@ RUN: llvm-mc -triple=thumbv6-apple-darwin -show-encoding < %s | FileCheck %s -check-prefix=CHECK-V6
-@ RUN: llvm-mc -triple=thumbv7-apple-darwin -show-encoding < %s | FileCheck %s -check-prefix=CHECK-V7
-
-  .syntax unified
-
-        nop
-
-@ CHECK-V6: mov r8, r8                     @ encoding: [0xc0,0x46]
-@ CHECK-V7: nop                            @ encoding: [0x00,0xbf]
diff --git a/test/MC/ARM/thumb-only-conditionals.s b/test/MC/ARM/thumb-only-conditionals.s
index 6d13ce5..8693c24 100644
--- a/test/MC/ARM/thumb-only-conditionals.s
+++ b/test/MC/ARM/thumb-only-conditionals.s
@@ -40,10 +40,10 @@
 @ CHECK-NEXT: mcrr2gt  p7, #15, r5, r4, c1
 
         ite eq
-        mrceq p11, #1, r1, c2, c2
+        mrceq p9, #1, r1, c2, c2
         mrc2ne p12, #3, r3, c3, c4
 @ CHECK: ite eq
-@ CHECK-NEXT: mrceq p11, #1, r1, c2, c2
+@ CHECK-NEXT: mrceq p9, #1, r1, c2, c2
 @ CHECK-NEXT: mrc2ne p12, #3, r3, c3, c4
 
         itt lo
diff --git a/test/MC/ARM/thumb2-b.w-encodingT4.s b/test/MC/ARM/thumb2-b.w-encodingT4.s
index be77b06..aff02e1 100644
--- a/test/MC/ARM/thumb2-b.w-encodingT4.s
+++ b/test/MC/ARM/thumb2-b.w-encodingT4.s
@@ -9,4 +9,4 @@ _foo:
 @------------------------------------------------------------------------------
         b.w   0x3680c
 
-@ CHECK: b.w	#223244                    @ encoding: [0x6d,0xf0,0x0c,0xb0]
+@ CHECK: b.w	#223244                    @ encoding: [0x36,0xf0,0x06,0xbc]
diff --git a/test/MC/ARM/thumb2-branches.s b/test/MC/ARM/thumb2-branches.s
new file mode 100644
index 0000000..9148233
--- /dev/null
+++ b/test/MC/ARM/thumb2-branches.s
@@ -0,0 +1,286 @@
+@ RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -show-encoding < %s | FileCheck %s
+
+@------------------------------------------------------------------------------
+@ unconditional branches accept narrow suffix and encode to short encodings
+@------------------------------------------------------------------------------
+
+         b.n    #-2048
+         b.n    #2046
+
+@ CHECK: b	#-2048                  @ encoding: [0x00,0xe4]
+@ CHECK: b	#2046                   @ encoding: [0xff,0xe3]
+
+@------------------------------------------------------------------------------
+@ unconditional branches accept wide suffix and encode to wide encodings
+@------------------------------------------------------------------------------
+
+         b.w    #-2048
+         b.w    #2046
+         b.w    #-1677216
+         b.w    #1677214
+
+@ CHECK: b.w	#-2048                  @ encoding: [0xff,0xf7,0x00,0xbc]
+@ CHECK: b.w	#2046                   @ encoding: [0x00,0xf0,0xff,0xbb]
+@ CHECK: b.w	#-1677216               @ encoding: [0x66,0xf6,0x30,0xbc]
+@ CHECK: b.w	#1677214                @ encoding: [0x99,0xf1,0xcf,0xbb]
+
+@------------------------------------------------------------------------------
+@ unconditional branches without width suffix encode depending of offset size
+@------------------------------------------------------------------------------
+
+         b      #-2048
+         b      #2046
+         b      #-2050
+         b      #2048
+         b      #-1677216
+         b      #1677214
+
+@ CHECK: b	#-2048                  @ encoding: [0x00,0xe4]
+@ CHECK: b	#2046                   @ encoding: [0xff,0xe3]
+@ CHECK: b.w	#-2050                  @ encoding: [0xff,0xf7,0xff,0xbb]
+@ CHECK: b.w	#2048                   @ encoding: [0x00,0xf0,0x00,0xbc]
+@ CHECK: b.w	#-1677216               @ encoding: [0x66,0xf6,0x30,0xbc]
+@ CHECK: b.w	#1677214                @ encoding: [0x99,0xf1,0xcf,0xbb]
+
+@------------------------------------------------------------------------------
+@ unconditional branches with width narrow suffix in IT block 
+@------------------------------------------------------------------------------
+
+         it     eq
+         beq.n  #-2048
+         it     ne
+         bne.n  #-2046
+
+@ CHECK: it	eq                      @ encoding: [0x08,0xbf]
+@ CHECK: beq	#-2048                  @ encoding: [0x00,0xe4] 
+@ CHECK: it	ne                      @ encoding: [0x18,0xbf] 
+@ CHECK: bne	#-2046                  @ encoding: [0x01,0xe4]
+
+@------------------------------------------------------------------------------
+@ unconditional branches with wide suffix in IT block
+@------------------------------------------------------------------------------
+
+         it     gt
+         bgt.w  #-2048
+         it     le
+         ble.w  #2046
+         it     ge
+         bge.w  #-1677216
+         it     lt
+         blt.w  #1677214
+
+@ CHECK: it	gt                      @ encoding: [0xc8,0xbf]
+@ CHECK: bgt.w	#-2048                  @ encoding: [0xff,0xf7,0x00,0xbc]
+@ CHECK: it	le                      @ encoding: [0xd8,0xbf]
+@ CHECK: ble.w	#2046                   @ encoding: [0x00,0xf0,0xff,0xbb]
+@ CHECK: it	ge                      @ encoding: [0xa8,0xbf]
+@ CHECK: bge.w	#-1677216               @ encoding: [0x66,0xf6,0x30,0xbc]
+@ CHECK: it	lt                      @ encoding: [0xb8,0xbf]
+@ CHECK: blt.w	#1677214                @ encoding: [0x99,0xf1,0xcf,0xbb]
+
+@------------------------------------------------------------------------------
+@ conditional branches accept narrow suffix and encode to short encodings
+@------------------------------------------------------------------------------
+
+         beq.n    #-256
+         bne.n    #254
+
+@ CHECK: beq	#-256                   @ encoding: [0x80,0xd0]
+@ CHECK: bne	#254                    @ encoding: [0x7f,0xd1]
+
+@------------------------------------------------------------------------------
+@ unconditional branches accept wide suffix and encode to wide encodings
+@------------------------------------------------------------------------------
+
+         bmi.w    #-256
+         bne.w    #254
+         blt.w    #-1048576
+         bge.w    #1048574
+
+@ CHECK: bmi.w	#-256                   @ encoding: [0x3f,0xf5,0x80,0xaf]
+@ CHECK: bne.w	#254                    @ encoding: [0x40,0xf0,0x7f,0x80]
+@ CHECK: blt.w	#-1048576               @ encoding: [0xc0,0xf6,0x00,0x80]
+@ CHECK: bge.w	#1048574                @ encoding: [0xbf,0xf2,0xff,0xaf]
+
+@------------------------------------------------------------------------------
+@ unconditional branches without width suffix encode depending of offset size
+@------------------------------------------------------------------------------
+
+         bne     #-256
+         bgt     #254
+         bne     #-258
+         bgt     #256
+         bne     #-1048576
+         bgt     #1048574
+
+@ CHECK: bne	#-256                   @ encoding: [0x80,0xd1]
+@ CHECK: bgt	#254                    @ encoding: [0x7f,0xdc]
+@ CHECK: bne.w	#-258                   @ encoding: [0x7f,0xf4,0x7f,0xaf]
+@ CHECK: bgt.w	#256                    @ encoding: [0x00,0xf3,0x80,0x80]
+@ CHECK: bne.w	#-1048576               @ encoding: [0x40,0xf4,0x00,0x80]
+@ CHECK: bgt.w	#1048574                @ encoding: [0x3f,0xf3,0xff,0xaf]
+
+@------------------------------------------------------------------------------
+@ same branch insturction encoding to conditional or unconditional depending
+@ on whether it is in an IT block or not
+@------------------------------------------------------------------------------
+
+         it     eq
+         addeq  r0, r1
+         bne    #128
+
+@ CHECK: it	eq                      @ encoding: [0x08,0xbf]
+@ CHECK: addeq	r0, r1                  @ encoding: [0x08,0x44]
+@ CHECK: bne	#128                    @ encoding: [0x40,0xd1]
+
+         ite    eq
+         addeq  r0, r1
+         bne    #128
+
+@ CHECK: ite	eq                      @ encoding: [0x0c,0xbf]
+@ CHECK: addeq	r0, r1                  @ encoding: [0x08,0x44]
+@ CHECK: bne	#128                    @ encoding: [0x40,0xe0]
+
+@ RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -show-encoding < %s | FileCheck %s
+
+@------------------------------------------------------------------------------
+@ unconditional branches accept narrow suffix and encode to short encodings
+@------------------------------------------------------------------------------
+
+         b.n    #-2048
+         b.n    #2046
+
+@ CHECK: b	#-2048                  @ encoding: [0x00,0xe4]
+@ CHECK: b	#2046                   @ encoding: [0xff,0xe3]
+
+@------------------------------------------------------------------------------
+@ unconditional branches accept wide suffix and encode to wide encodings
+@------------------------------------------------------------------------------
+
+         b.w    #-2048
+         b.w    #2046
+         b.w    #-1677216
+         b.w    #1677214
+
+@ CHECK: b.w	#-2048                  @ encoding: [0xff,0xf7,0x00,0xbc]
+@ CHECK: b.w	#2046                   @ encoding: [0x00,0xf0,0xff,0xbb]
+@ CHECK: b.w	#-1677216               @ encoding: [0x66,0xf6,0x30,0xbc]
+@ CHECK: b.w	#1677214                @ encoding: [0x99,0xf1,0xcf,0xbb]
+
+@------------------------------------------------------------------------------
+@ unconditional branches without width suffix encode depending of offset size
+@------------------------------------------------------------------------------
+
+         b      #-2048
+         b      #2046
+         b      #-2050
+         b      #2048
+         b      #-1677216
+         b      #1677214
+
+@ CHECK: b	#-2048                  @ encoding: [0x00,0xe4]
+@ CHECK: b	#2046                   @ encoding: [0xff,0xe3]
+@ CHECK: b.w	#-2050                  @ encoding: [0xff,0xf7,0xff,0xbb]
+@ CHECK: b.w	#2048                   @ encoding: [0x00,0xf0,0x00,0xbc]
+@ CHECK: b.w	#-1677216               @ encoding: [0x66,0xf6,0x30,0xbc]
+@ CHECK: b.w	#1677214                @ encoding: [0x99,0xf1,0xcf,0xbb]
+
+@------------------------------------------------------------------------------
+@ unconditional branches with width narrow suffix in IT block 
+@------------------------------------------------------------------------------
+
+         it     eq
+         beq.n  #-2048
+         it     ne
+         bne.n  #-2046
+
+@ CHECK: it	eq                      @ encoding: [0x08,0xbf]
+@ CHECK: beq	#-2048                  @ encoding: [0x00,0xe4] 
+@ CHECK: it	ne                      @ encoding: [0x18,0xbf] 
+@ CHECK: bne	#-2046                  @ encoding: [0x01,0xe4]
+
+@------------------------------------------------------------------------------
+@ unconditional branches with wide suffix in IT block
+@------------------------------------------------------------------------------
+
+         it     gt
+         bgt.w  #-2048
+         it     le
+         ble.w  #2046
+         it     ge
+         bge.w  #-1677216
+         it     lt
+         blt.w  #1677214
+
+@ CHECK: it	gt                      @ encoding: [0xc8,0xbf]
+@ CHECK: bgt.w	#-2048                  @ encoding: [0xff,0xf7,0x00,0xbc]
+@ CHECK: it	le                      @ encoding: [0xd8,0xbf]
+@ CHECK: ble.w	#2046                   @ encoding: [0x00,0xf0,0xff,0xbb]
+@ CHECK: it	ge                      @ encoding: [0xa8,0xbf]
+@ CHECK: bge.w	#-1677216               @ encoding: [0x66,0xf6,0x30,0xbc]
+@ CHECK: it	lt                      @ encoding: [0xb8,0xbf]
+@ CHECK: blt.w	#1677214                @ encoding: [0x99,0xf1,0xcf,0xbb]
+
+@------------------------------------------------------------------------------
+@ conditional branches accept narrow suffix and encode to short encodings
+@------------------------------------------------------------------------------
+
+         beq.n    #-256
+         bne.n    #254
+
+@ CHECK: beq	#-256                   @ encoding: [0x80,0xd0]
+@ CHECK: bne	#254                    @ encoding: [0x7f,0xd1]
+
+@------------------------------------------------------------------------------
+@ unconditional branches accept wide suffix and encode to wide encodings
+@------------------------------------------------------------------------------
+
+         bmi.w    #-256
+         bne.w    #254
+         blt.w    #-1048576
+         bge.w    #1048574
+
+@ CHECK: bmi.w	#-256                   @ encoding: [0x3f,0xf5,0x80,0xaf]
+@ CHECK: bne.w	#254                    @ encoding: [0x40,0xf0,0x7f,0x80]
+@ CHECK: blt.w	#-1048576               @ encoding: [0xc0,0xf6,0x00,0x80]
+@ CHECK: bge.w	#1048574                @ encoding: [0xbf,0xf2,0xff,0xaf]
+
+@------------------------------------------------------------------------------
+@ unconditional branches without width suffix encode depending of offset size
+@------------------------------------------------------------------------------
+
+         bne     #-256
+         bgt     #254
+         bne     #-258
+         bgt     #256
+         bne     #-1048576
+         bgt     #1048574
+
+@ CHECK: bne	#-256                   @ encoding: [0x80,0xd1]
+@ CHECK: bgt	#254                    @ encoding: [0x7f,0xdc]
+@ CHECK: bne.w	#-258                   @ encoding: [0x7f,0xf4,0x7f,0xaf]
+@ CHECK: bgt.w	#256                    @ encoding: [0x00,0xf3,0x80,0x80]
+@ CHECK: bne.w	#-1048576               @ encoding: [0x40,0xf4,0x00,0x80]
+@ CHECK: bgt.w	#1048574                @ encoding: [0x3f,0xf3,0xff,0xaf]
+
+@------------------------------------------------------------------------------
+@ same branch insturction encoding to conditional or unconditional depending
+@ on whether it is in an IT block or not
+@------------------------------------------------------------------------------
+
+         it     eq
+         addeq  r0, r1
+         bne    #128
+
+@ CHECK: it	eq                      @ encoding: [0x08,0xbf]
+@ CHECK: addeq	r0, r1                  @ encoding: [0x08,0x44]
+@ CHECK: bne	#128                    @ encoding: [0x40,0xd1]
+
+         ite    eq
+         addeq  r0, r1
+         bne    #128
+
+@ CHECK: ite	eq                      @ encoding: [0x0c,0xbf]
+@ CHECK: addeq	r0, r1                  @ encoding: [0x08,0x44]
+@ CHECK: bne	#128                    @ encoding: [0x40,0xe0]
+
diff --git a/test/MC/ARM/thumb2-diagnostics.s b/test/MC/ARM/thumb2-diagnostics.s
index e1c0058..6ac2db0 100644
--- a/test/MC/ARM/thumb2-diagnostics.s
+++ b/test/MC/ARM/thumb2-diagnostics.s
@@ -51,3 +51,22 @@
         itt eq
         bkpteq #1
 @ CHECK-ERRORS: error: instruction 'bkpt' is not predicable, but condition code specified
+
+        nopeq
+        nopeq
+
+@ out of range operands for Thumb2 targets
+
+        beq.w  #-1048578
+        bne.w  #1048576
+        blt.w  #1013411
+        b.w    #-16777218
+        b.w    #16777216
+        b.w    #1592313
+
+@ CHECK-ERRORS: error: branch target out of range
+@ CHECK-ERRORS: error: branch target out of range
+@ CHECK-ERRORS: error: branch target out of range
+@ CHECK-ERRORS: error: branch target out of range
+@ CHECK-ERRORS: error: branch target out of range
+@ CHECK-ERRORS: error: branch target out of range
diff --git a/test/MC/ARM/thumb2-ldrd.s b/test/MC/ARM/thumb2-ldrd.s
new file mode 100644
index 0000000..4463c21
--- /dev/null
+++ b/test/MC/ARM/thumb2-ldrd.s
@@ -0,0 +1,9 @@
+// RUN: not llvm-mc -arch thumb -mattr=+thumb2 \
+// RUN: < %s >/dev/null 2> %t
+// RUN: grep "error: destination operands can't be identical" %t | count 4
+// rdar://14479780
+
+ldrd r0, r0, [pc, #0]
+ldrd r0, r0, [r1, #4]
+ldrd r0, r0, [r1], #4
+ldrd r0, r0, [r1, #4]!
diff --git a/test/MC/ARM/v8_IT_manual.s b/test/MC/ARM/v8_IT_manual.s
new file mode 100644
index 0000000..4b63aa8
--- /dev/null
+++ b/test/MC/ARM/v8_IT_manual.s
@@ -0,0 +1,6739 @@
+@ RUN: llvm-mc -triple thumbv8 -show-encoding < %s 2>&1 | FileCheck %s
+
+@ ADD reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+addge r1, r2, r3
+@ ADD reg, encoding T2
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+addge r1, r2
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge r1, pc
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge pc, r2
+@ ADD reg, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge r11, r2, r3
+@ ADD imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+addge r1, r2, #3
+@ ADD imm, encoding T2
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+addge r1, #3
+@ ADD imm, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge r11, r2, #3
+@ ADD imm, encoding T4 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge r11, r2, #333
+@ ADD SP+imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+addge r1, sp, #32
+@ ADD SP+imm, encoding T2
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge sp, #32
+@ ADD SP+imm, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge r1, sp, #33
+@ ADD SP+imm, encoding T4 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge r1, sp, #333
+
+@ SUB reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+subge r4, r3, r2
+@ SUB reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge r14, r3, r2
+@ SUB imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+subge r4, r3, #2
+@ SUB imm, encoding T2
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+subge r4, #3
+@ SUB imm, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge r14, r3, #2
+@ SUB imm, encoding T4 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge r14, r3, #2222
+@ SUB SP-imm, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge sp, #32
+@ SUB SP-imm, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge r4, sp, #33
+@ SUB SP-imm, encoding T4 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge r4, sp, #3333
+
+@ MOV reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+movge r4, r5
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movge r4, pc
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movge pc, r5
+@ MOV reg, encoding T3 (32-bit) -- can only appear as MOVS or MOV.W
+@ MOV imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+movge r4, #5
+@ MOV imm, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movge r14, #5
+@ MOV imm, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movge r14, #555
+
+@ CMP reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+cmpge r3, r4
+@ CMP reg, encoding T2
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+cmpge r13, r4
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+cmpge r3, pc
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+cmpge pc, r4
+@ CMP reg, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+cmpge r3, r4, lsl #1 
+@ CMP imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+cmpge r3, #4
+@ CMP imm, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+cmpge r13, #4
+
+@ AND reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+andge r5, r6
+@ AND reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r9, r6
+
+@ EOR reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+eorge r7, r6
+@ EOR reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r7, r9
+
+@ LSL imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+lslge r7, r0, #1
+@ LSL imm, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+lslge r7, r10, #1
+@ LSL reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+lslge r7, r0
+@ LSL reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+lslge r7, r10
+
+@ LSR imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+lsrge r3, r2, #1
+@ LSR imm, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+lsrge r3, r12, #1
+@ LSR reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+lsrge r3, r2
+@ LSR reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+lsrge r3, r12
+
+@ ASR imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+asrge r2, r3, #4
+@ ASR imm, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+asrge r12, r3, #4
+@ ASR reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+asrge r2, r3
+@ ASR reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+asrge r12, r3
+
+@ ADC reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+adcge r5, r4
+@ ADC reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r5, r5, r14
+
+@ SBC reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+sbcge r5, r6
+@ SBC reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r9, r9, r6
+
+@ ROR reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+rorge r7, r6
+@ ROR reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rorge r7, r9
+
+@ TST reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+tstge r7, r0
+@ TST reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+tstge r7, r10
+
+@ RSB imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+rsbge r1, r0, #0
+@ RSB imm, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r11, r0, #0
+
+@ CMN reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+cmnge r1, r2
+@ CMN reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+cmnge r11, r2
+
+@ ORR reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+orrge r3, r2
+@ ORR reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r3, r12
+
+@ MUL reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+mulge r3, r4, r3
+@ MUL reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mulge r3, r4, r5
+
+@ BIC reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+bicge r5, r4
+@ BIC reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r5, r14
+
+@ MVN reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+mvnge r5, r6
+@ MVN reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mvnge r9, r6
+
+@ BX, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+bxge r6
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bxge pc
+
+@ BLX, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+blxge r7
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+blxge pc
+
+@ LDR reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrge r0, [r1, r2]
+@ LDR reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge r10, [r1, r2]
+@ LDR imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrge r0, [r1]
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrge r0, [r1, #8]
+@ LDR imm, encoding T2
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrge r0, [sp]
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrge r0, [sp, #8]
+@ LDR reg, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge r0, [r1, #2]
+@ LDR reg, encoding T4 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge r0, [r1, #-2]
+@ LDR lit, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge r0, [pc, #8]
+@ LDR lit, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge r10, [pc, #8]
+
+@ STR reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+strge r1, [r2, r3]
+@ STR reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge r11, [r2, r3]
+@ STR imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+strge r1, [r2]
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+strge r1, [r2, #4]
+@ STR imm, encoding T2
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+strge r1, [sp]
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+strge r1, [sp, #4]
+@ STR imm, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge r1, [r2, #3]
+@ STR imm, encoding T4 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge r1, [r2, #-3]
+
+@ STRH reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+strhge r4, [r3, r2]
+@ STRH reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge r14, [r3, r2]
+@ STRH imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+strhge r4, [r3]
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+strhge r4, [r3, #2]
+@ STRH imm, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge r4, [r3, #1]
+@ STRH imm, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge r4, [r3, #-2]
+
+@ STRB reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+strbge r3, [r4, r5]
+@ STRB reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge r3, [r14, r5]
+@ STRB imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+strbge r3, [r4]
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+strbge r3, [r4, #5]
+@ STRB reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge r3, [r14, #5]
+@ STRB reg, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge r3, [r4, #-5]
+
+@ LDRSB reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrsbge r6, [r5, r4]
+@ LDRSB reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge r9, [r5, r4]
+
+@ LDRH reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrhge r5, [r6, r7]
+@ LDRH reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge r5, [r9, r7]
+@ LDRH imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrhge r5, [r6]
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrhge r5, [r6, #8]
+@ LDRH imm, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge r5, [r6, #7]
+@ LDRH imm, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge r5, [r6, #-8]
+
+@ LDRB reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrbge r0, [r7, r6]
+@ LDRB reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge r10, [r7, r6]
+@ LDRB imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrbge r0, [r7]
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrbge r0, [r7, #6]
+@ LDRB reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge r10, [r7, #6]
+@ LDRB reg, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge r0, [r7, #-6]
+
+@ LDRSH reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrshge r7, [r0, r1]
+@ LDRSH reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge r7, [r0, r11]
+
+@ ADR, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adrge r1, #24
+@ ADR, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adrge r1, #-23
+@ ADR, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adrge r1, #23
+
+@ SXTH, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sxthge r4, r3
+@ SXTH, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sxthge r4, r9
+
+@ SXTB, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sxtbge r4, r5
+@ SXTB, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sxtbge r14, r5
+
+@ UXTH, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+uxthge r6, r5
+@ UXTH, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+uxthge r9, r5
+
+@ UXTB, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+uxtbge r6, r7
+@ UXTB, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+uxtbge r6, r9
+
+@ PUSH, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+pushge {r1, r3, r7}
+@ PUSH, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+pushge {r1, r13, r7}
+@ PUSH, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+pushge {r13}
+
+@ REV, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+revge r7, r6
+@ REV, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+revge r9, r6
+
+@ REV16, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rev16ge r7, r0
+@ REV16, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rev16ge r7, r10
+
+@ REVSH, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+revshge r1, r0
+@ REVSH, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+revshge r11, r0
+
+@ POP, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+popge {r1, r0, r5}
+@ POP, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+popge {r1, r5, r10}
+@ POP, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+popge {r10}
+
+@ NOP, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+nopge
+@ NOP, encoding T2 (32-bit) -- can only appear as NOP.W
+
+@ STM, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stmge r1!, {r2, r3}
+@ STM, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stmge r1, {r2, r3}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stmge r1!, {r2, r13}
+
+@ LDM, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldmge r4!, {r2, r3}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldmge r4, {r2, r3}
+@ LDM, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldmge r14!, {r2, r3}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldmge r14, {r2, r3}
+
+@ SVC, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+svcge #55
+
+@ B, encoding T2
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bge #2014
+
+@ The following Thumb instructions only have 32-bit encodings.
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [pc]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r0], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r1], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r2], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r3], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r4], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r5], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r6], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r7], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r8], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r9], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r10], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r11], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r12], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [sp], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [lr], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [pc], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r0], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r1], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r2], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r3], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r4], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r5], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r6], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r7], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r8], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r9], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r10], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r11], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r12], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [sp], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [lr], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [pc], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r0, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r1, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r2, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r3, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r4, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r5, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r6, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r7, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r8, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r9, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r10, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r11, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r12, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [sp, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [lr, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [pc, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r0, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r1, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r2, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r3, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r4, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r5, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r6, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r7, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r8, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r9, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r10, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r11, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r12, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [sp, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [lr, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [pc, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [pc]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r0, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r1, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r2, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r3, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r4, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r5, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r6, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r7, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r8, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r9, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r10, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r11, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r12, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [sp, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [lr, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [pc, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movge.w r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movsge.w r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mvnge.w r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mvnsge.w r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, sp, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, sp, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, sp, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, sp, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r0], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r1], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r2], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r3], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r4], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r5], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r6], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r7], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r8], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r9], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r10], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r11], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r12], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [sp], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [lr], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [pc], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r0], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r1], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r2], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r3], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r4], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r5], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r6], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r7], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r8], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r9], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r10], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r11], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r12], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [sp], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [lr], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [pc], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r0, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r1, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r2, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r3, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r4, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r5, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r6, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r7, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r8, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r9, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r10, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r11, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r12, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, sp, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, lr, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, pc, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r0, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r1, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r2, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r3, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r4, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r5, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r6, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r7, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r8, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r9, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r10, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r11, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r12, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, sp, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, lr, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, pc, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r0], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r1], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r2], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r3], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r4], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r5], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r6], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r7], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r8], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r9], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r10], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r11], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r12], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [sp], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [lr], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [pc], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r0], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r1], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r2], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r3], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r4], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r5], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r6], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r7], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r8], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r9], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r10], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r11], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r12], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [sp], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [lr], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [pc], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r0], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r1], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r2], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r3], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r4], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r5], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r6], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r7], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r8], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r9], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r10], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r11], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r12], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [sp], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [lr], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [pc], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r0], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r1], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r2], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r3], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r4], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r5], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r6], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r7], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r8], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r9], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r10], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r11], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r12], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [sp], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [lr], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [pc], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r0], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r1], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r2], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r3], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r4], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r5], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r6], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r7], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r8], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r9], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r10], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r11], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r12], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [sp], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [lr], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [pc], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r0], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r1], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r2], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r3], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r4], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r5], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r6], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r7], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r8], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r9], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r10], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r11], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r12], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [sp], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [lr], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [pc], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r0], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r1], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r2], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r3], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r4], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r5], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r6], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r7], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r8], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r9], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r10], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r11], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r12], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [sp], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [lr], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [pc], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r0], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r1], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r2], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r3], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r4], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r5], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r6], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r7], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r8], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r9], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r10], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r11], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r12], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [sp], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [lr], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [pc], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r0], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r1], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r2], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r3], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r4], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r5], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r6], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r7], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r8], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r9], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r10], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r11], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r12], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [sp], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [lr], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [pc], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r0], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r1], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r2], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r3], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r4], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r5], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r6], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r7], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r8], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r9], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r10], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r11], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r12], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [sp], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [lr], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [pc], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r0, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r1, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r2, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r3, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r4, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r5, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r6, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r7, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r8, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r9, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r10, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r11, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r12, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [sp, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [lr, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [pc, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r0, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r1, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r2, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r3, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r4, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r5, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r6, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r7, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r8, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r9, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r10, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r11, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r12, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [sp, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [lr, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [pc, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r0, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r1, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r2, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r3, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r4, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r5, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r6, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r7, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r8, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r9, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r10, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r11, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r12, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [sp, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [lr, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [pc, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r0, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r1, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r2, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r3, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r4, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r5, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r6, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r7, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r8, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r9, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r10, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r11, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r12, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [sp, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [lr, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [pc, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r0, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r1, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r2, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r3, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r4, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r5, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r6, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r7, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r8, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r9, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r10, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r11, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r12, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [sp, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [lr, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [pc, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r0, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r1, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r2, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r3, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r4, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r5, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r6, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r7, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r8, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r9, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r10, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r11, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r12, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [sp, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [lr, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [pc, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r0, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r1, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r2, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r3, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r4, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r5, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r6, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r7, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r8, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r9, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r10, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r11, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r12, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [sp, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [lr, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [pc, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r0, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r1, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r2, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r3, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r4, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r5, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r6, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r7, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r8, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r9, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r10, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r11, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r12, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [sp, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [lr, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [pc, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [pc]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [pc]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r0, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r1, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r2, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r3, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r4, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r5, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r6, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r7, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r8, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r9, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r10, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r11, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r12, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [sp, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [lr, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [pc, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r0, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r1, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r2, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r3, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r4, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r5, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r6, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r7, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r8, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r9, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r10, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r11, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r12, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [sp, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [lr, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [pc, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [pc]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [pc]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r0, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r1, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r2, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r3, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r4, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r5, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r6, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r7, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r8, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r9, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r10, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r11, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r12, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [sp, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [lr, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [pc, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r0, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r1, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r2, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r3, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r4, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r5, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r6, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r7, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r8, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r9, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r10, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r11, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r12, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [sp, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [lr, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [pc, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movge.w r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movsge.w r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mvnge r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mvnge r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, sp, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, sp, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, sp, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, sp, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, sp, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, pc, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #4096
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #8192
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #12288
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #16384
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #20480
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #24576
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #28672
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #32768
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #36864
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #40960
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #45056
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #49152
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #53248
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #57344
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #61440
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, sp, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, pc, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #4096
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #8192
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #12288
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #16384
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #20480
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #24576
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #28672
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #32768
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #36864
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #40960
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #45056
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #49152
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #53248
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #57344
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #61440
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r2
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r3
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r4
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r5
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r6
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r7
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r8
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r9
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r10
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r11
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r12
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, lr
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r0, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r1, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r2, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r3, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r4, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r5, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r6, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r7, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r8, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r9, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r10, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r11, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r12, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, lr, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r0, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r1, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r2, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r3, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r4, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r5, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r6, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r7, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r8, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r9, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r10, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r11, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r12, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, lr, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfcge r0, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r2
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r3
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r4
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r5
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r6
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r7
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r8
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r9
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r10
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r11
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r12
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, lr
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r0, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r1, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r2, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r3, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r4, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r5, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r6, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r7, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r8, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r9, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r10, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r11, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r12, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, lr, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movge.w r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movsge.w r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mvnge r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mvnge r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, sp, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, sp, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, sp, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, sp, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r0, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r1, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r2, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r3, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r4, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r5, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r6, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r7, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r8, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r9, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r10, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r11, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r12, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, sp, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, lr, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, pc, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #6144
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #10240
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #14336
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #18432
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #22528
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #26624
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #30720
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #34816
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #38912
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #43008
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #47104
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #51200
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #55296
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #59392
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #63488
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r0, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r1, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r2, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r3, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r4, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r5, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r6, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r7, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r8, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r9, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r10, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r11, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r12, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, sp, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, lr, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, pc, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #6144
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #10240
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #14336
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #18432
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #22528
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #26624
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #30720
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #34816
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #38912
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #43008
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #47104
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #51200
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #55296
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #59392
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #63488
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r2
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r3
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r4
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r5
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r6
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r7
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r8
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r9
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r10
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r11
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r12
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, lr
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r0, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r1, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r2, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r3, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r4, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r5, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r6, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r7, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r8, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r9, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r10, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r11, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r12, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, lr, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r2
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r3
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r4
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r5
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r6
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r7
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r8
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r9
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r10
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r11
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r12
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, lr
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r0, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r1, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r2, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r3, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r4, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r5, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r6, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r7, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r8, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r9, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r10, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r11, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r12, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, lr, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r0, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r1, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r2, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r3, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r4, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r5, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r6, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r7, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r8, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r9, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r10, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r11, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r12, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [sp, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [lr, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r0, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r1, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r2, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r3, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r4, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r5, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r6, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r7, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r8, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r9, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r10, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r11, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r12, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [sp, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [lr, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [pc, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r0, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r1, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r2, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r3, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r4, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r5, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r6, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r7, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r8, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r9, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r10, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r11, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r12, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [sp, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [lr, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r0, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r1, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r2, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r3, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r4, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r5, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r6, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r7, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r8, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r9, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r10, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r11, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r12, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [sp, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [lr, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [pc, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r0, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r1, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r2, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r3, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r4, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r5, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r6, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r7, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r8, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r9, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r10, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r11, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r12, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [sp, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [lr, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r0, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r1, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r2, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r3, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r4, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r5, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r6, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r7, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r8, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r9, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r10, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r11, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r12, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [sp, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [lr, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [pc, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [pc, #0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [pc, #0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [pc, #0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r0, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r1, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r2, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r3, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r4, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r5, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r6, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r7, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r8, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r9, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r10, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r11, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r12, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [sp, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [lr, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [pc, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r0, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r1, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r2, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r3, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r4, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r5, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r6, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r7, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r8, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r9, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r10, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r11, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r12, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [sp, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [lr, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [pc, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [pc, #0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [pc, #0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r1, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r2, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r3, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r4, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r5, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r6, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r7, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r8, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r9, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r10, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r11, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r12, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, lr, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, lr, r0
diff --git a/test/MC/ARM/vfp4.s b/test/MC/ARM/vfp4.s
index 0a1fe92..8b1b0e0 100644
--- a/test/MC/ARM/vfp4.s
+++ b/test/MC/ARM/vfp4.s
@@ -1,9 +1,13 @@
 @ RUN: llvm-mc < %s -triple armv7-unknown-unknown -show-encoding -mattr=+neon,+vfp4   | FileCheck %s --check-prefix=ARM
 @ RUN: llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mattr=+neon,+vfp4 | FileCheck %s --check-prefix=THUMB
-@ RUN: llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mcpu=cortex-m4 | FileCheck %s --check-prefix=THUMB_V7EM
+@ RUN: not llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mcpu=cortex-m4 > %t 2> %t2
+@ RUN:     FileCheck %s < %t --check-prefix=THUMB_V7EM
+@ RUN:     FileCheck %s < %t2 --check-prefix=THUMB_V7EM-ERRORS
 
 @ ARM: vfma.f64 d16, d18, d17 @ encoding: [0xa1,0x0b,0xe2,0xee]
 @ THUMB: vfma.f64 d16, d18, d17 @ encoding: [0xe2,0xee,0xa1,0x0b]
+@ THUMB_V7EM-ERRORS: error: instruction requires: double precision VFP
+@ THUMB_V7EM-ERRORS-NEXT: vfma.f64 d16, d18, d17
 vfma.f64 d16, d18, d17
 
 @ ARM: vfma.f32 s2, s4, s0 @ encoding: [0x00,0x1a,0xa2,0xee]
@@ -13,14 +17,20 @@ vfma.f32 s2, s4, s0
 
 @ ARM: vfma.f32 d16, d18, d17 @ encoding: [0xb1,0x0c,0x42,0xf2]
 @ THUMB: vfma.f32 d16, d18, d17 @ encoding: [0x42,0xef,0xb1,0x0c]
+@ THUMB_V7EM-ERRORS: error: instruction requires: NEON
+@ THUMB_V7EM-ERRORS-NEXT: vfma.f32 d16, d18, d17
 vfma.f32 d16, d18, d17
 
 @ ARM: vfma.f32 q2, q4, q0 @ encoding: [0x50,0x4c,0x08,0xf2]
 @ THUMB: vfma.f32	q2, q4, q0 @ encoding: [0x08,0xef,0x50,0x4c]
+@ THUMB_V7EM-ERRORS: error: instruction requires: NEON
+@ THUMB_V7EM-ERRORS-NEXT: vfma.f32 q2, q4, q0
 vfma.f32 q2, q4, q0
 
 @ ARM: vfnma.f64 d16, d18, d17 @ encoding: [0xe1,0x0b,0xd2,0xee]
 @ THUMB: vfnma.f64 d16, d18, d17 @ encoding: [0xd2,0xee,0xe1,0x0b]
+@ THUMB_V7EM-ERRORS: error: instruction requires: double precision VFP
+@ THUMB_V7EM-ERRORS-NEXT: vfnma.f64 d16, d18, d17
 vfnma.f64 d16, d18, d17
 
 @ ARM: vfnma.f32 s2, s4, s0 @ encoding: [0x40,0x1a,0x92,0xee]
@@ -30,6 +40,8 @@ vfnma.f32 s2, s4, s0
 
 @ ARM: vfms.f64 d16, d18, d17 @ encoding: [0xe1,0x0b,0xe2,0xee]
 @ THUMB: vfms.f64 d16, d18, d17 @ encoding: [0xe2,0xee,0xe1,0x0b]
+@ THUMB_V7EM-ERRORS: error: instruction requires: double precision VFP
+@ THUMB_V7EM-ERRORS-NEXT: vfms.f64 d16, d18, d17
 vfms.f64 d16, d18, d17
 
 @ ARM: vfms.f32 s2, s4, s0 @ encoding: [0x40,0x1a,0xa2,0xee]
@@ -39,16 +51,23 @@ vfms.f32 s2, s4, s0
 
 @ ARM: vfms.f32 d16, d18, d17 @ encoding: [0xb1,0x0c,0x62,0xf2]
 @ THUMB: vfms.f32 d16, d18, d17 @ encoding: [0x62,0xef,0xb1,0x0c]
+@ THUMB_V7EM-ERRORS: error: instruction requires: NEON
+@ THUMB_V7EM-ERRORS-NEXT: vfms.f32 d16, d18, d17
 vfms.f32 d16, d18, d17
 
 @ ARM: vfms.f32 q2, q4, q0 @ encoding: [0x50,0x4c,0x28,0xf2]
 @ THUMB: vfms.f32	q2, q4, q0 @ encoding: [0x28,0xef,0x50,0x4c]
+@ THUMB_V7EM-ERRORS: error: instruction requires: NEON
+@ THUMB_V7EM-ERRORS-NEXT: vfms.f32 q2, q4, q0
 vfms.f32 q2, q4, q0
 
 @ ARM: vfnms.f64 d16, d18, d17 @ encoding: [0xa1,0x0b,0xd2,0xee]
 @ THUMB: vfnms.f64 d16, d18, d17 @ encoding: [0xd2,0xee,0xa1,0x0b]
+@ THUMB_V7EM-ERRORS: error: instruction requires: double precision VFP
+@ THUMB_V7EM-ERRORS-NEXT: vfnms.f64 d16, d18, d17
 vfnms.f64 d16, d18, d17
 
 @ ARM: vfnms.f32 s2, s4, s0 @ encoding: [0x00,0x1a,0x92,0xee]
 @ THUMB: vfnms.f32 s2, s4, s0 @ encoding: [0x92,0xee,0x00,0x1a]
+@ THUMB_V7EM: vfnms.f32 s2, s4, s0 @ encoding: [0x92,0xee,0x00,0x1a]
 vfnms.f32 s2, s4, s0
diff --git a/test/MC/ARM/xscale-attributes.ll b/test/MC/ARM/xscale-attributes.ll
index d1e9931..718fd8f 100644
--- a/test/MC/ARM/xscale-attributes.ll
+++ b/test/MC/ARM/xscale-attributes.ll
@@ -26,13 +26,14 @@ entry:
 ; OBJ-NEXT:     ]
 ; OBJ-NEXT:     Address: 0x0
 ; OBJ-NEXT:     Offset: 0x38
-; OBJ-NEXT:     Size: 32
+; OBJ-NEXT:     Size: 40
 ; OBJ-NEXT:     Link: 0
 ; OBJ-NEXT:     Info: 0
 ; OBJ-NEXT:     AddressAlignment: 1
 ; OBJ-NEXT:     EntrySize: 0
 ; OBJ-NEXT:     SectionData (
-; OBJ-NEXT:       0000: 411F0000 00616561 62690001 15000000
-; OBJ-NEXT:       0010: 06050801 09011401 15011703 18011901
+; OBJ-NEXT:       0000: 41270000 00616561 62690001 1D000000
+; OBJ-NEXT:       0010: 05585343 414C4500 06050801 09011401
+; OBJ-NEXT:       0020: 15011703 18011901
 ; OBJ-NEXT:     )
 ; OBJ-NEXT:   }
diff --git a/test/MC/AsmParser/cfi-window-save.s b/test/MC/AsmParser/cfi-window-save.s
new file mode 100644
index 0000000..c309436
--- /dev/null
+++ b/test/MC/AsmParser/cfi-window-save.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -filetype=asm -triple x86_64-pc-linux-gnu <%s | FileCheck %s
+
+# Should use SPARC as the target to test this. However, SPARC does not support
+# asm parsing yet.
+
+# CHECK: .cfi_window_save
+
+
+f:
+        .cfi_startproc
+        nop
+        .cfi_window_save
+        nop
+        .cfi_endproc
+
diff --git a/test/MC/AsmParser/directive_file.s b/test/MC/AsmParser/directive_file.s
index 121890e..9b99e0f 100644
--- a/test/MC/AsmParser/directive_file.s
+++ b/test/MC/AsmParser/directive_file.s
@@ -1,7 +1,7 @@
 # RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
 
         .file "hello"
-        .file 1 "world"
+        .file 1 "worl\144"   # "\144" is "d"
         .file 2 "directory" "file"
 
 # CHECK: .file "hello"
diff --git a/test/MC/AsmParser/directive_fill.s b/test/MC/AsmParser/directive_fill.s
index 60bd468..bb3ced0 100644
--- a/test/MC/AsmParser/directive_fill.s
+++ b/test/MC/AsmParser/directive_fill.s
@@ -15,3 +15,19 @@ TEST1:
 # CHECK: .quad 4
 TEST2:  
         .fill 1, 8, 4
+
+# CHECK: TEST3
+# CHECK: .byte 0
+# CHECK: .byte 0
+# CHECK: .byte 0
+# CHECK: .byte 0
+TEST3:
+	.fill 4
+
+# CHECK: TEST4
+# CHECK: .short 0
+# CHECK: .short 0
+# CHECK: .short 0
+# CHECK: .short 0
+TEST4:
+	.fill 4, 2
diff --git a/test/MC/AsmParser/directive_incbin.s b/test/MC/AsmParser/directive_incbin.s
index 55f9c79..ed4e27a 100644
--- a/test/MC/AsmParser/directive_incbin.s
+++ b/test/MC/AsmParser/directive_incbin.s
@@ -1,6 +1,6 @@
 # RUN: llvm-mc -triple i386-unknown-unknown %s -I %p | FileCheck %s
 
 .data
-.incbin "incbin_abcd"
+.incbin "incbin\137abcd"  # "\137" is underscore "_"
 
 # CHECK: .ascii	 "abcd\n"
diff --git a/test/MC/AsmParser/directive_include.s b/test/MC/AsmParser/directive_include.s
index fabd941..f53bc67 100644
--- a/test/MC/AsmParser/directive_include.s
+++ b/test/MC/AsmParser/directive_include.s
@@ -5,5 +5,5 @@
 # CHECK: a = 0
 # CHECK: TESTB:
 TESTA:  
-	.include       "directive_set.s"
+	.include       "directive\137set.s"   # "\137" is underscore "_"
 TESTB:
diff --git a/test/MC/AsmParser/directive_loc.s b/test/MC/AsmParser/directive_loc.s
index 164d42a..700a32c 100644
--- a/test/MC/AsmParser/directive_loc.s
+++ b/test/MC/AsmParser/directive_loc.s
@@ -6,3 +6,4 @@
         .loc 1 2
         .loc 1 2 3
         .loc 1 2 discriminator 1
+        .loc 1 0
diff --git a/test/MC/AsmParser/floating-literals.s b/test/MC/AsmParser/floating-literals.s
index d44bb98..6578e32 100644
--- a/test/MC/AsmParser/floating-literals.s
+++ b/test/MC/AsmParser/floating-literals.s
@@ -1,4 +1,5 @@
-# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+# RUN: not llvm-mc -triple i386-unknown-unknown %s 2> /dev/null | FileCheck %s
+# RUN: not llvm-mc -triple i386-unknown-unknown %s 2>&1 > /dev/null| FileCheck %s --check-prefix=CHECK-ERROR
 
 # CHECK: .long	1067412619
 # CHECK: .long	1075000115
@@ -42,3 +43,40 @@
 // APFloat should reject these with an error, not crash:
 //.double -1.2e+
 //.double -1.2e
+
+# CHECK: .long 1310177520
+.float 0x12f7.1ep+17
+# CHECK: .long 1084227584
+.float 0x.ap+3
+# CHECK: .quad 4602678819172646912
+.double 0x2.p-2
+# CHECK: .long 1094713344
+.float 0x3p2
+# CHECK: .long 872284160
+.float 0x7fp-30
+# CHECK: .long 3212836864
+.float -0x1.0p0
+
+# CHECK-ERROR: invalid hexadecimal floating-point constant: expected at least one exponent digit
+# CHECK-ERROR: unexpected token in directive
+.float 0xa.apa
+
+# CHECK-ERROR: invalid hexadecimal floating-point constant: expected at least one exponent digit
+# CHECK-ERROR: unexpected token in directive
+.double -0x1.2p+
+
+# CHECK-ERROR: invalid hexadecimal floating-point constant: expected at least one exponent digit
+# CHECK-ERROR: unexpected token in directive
+.double -0x1.2p
+
+# CHECK-ERROR: invalid hexadecimal floating-point constant: expected at least one significand digit
+# CHECK-ERROR: unexpected token in directive
+.float 0xp2
+
+# CHECK-ERROR: invalid hexadecimal floating-point constant: expected at least one significand digit
+# CHECK-ERROR: unexpected token in directive
+.float 0x.p5
+
+# CHECK-ERROR: error: invalid hexadecimal floating-point constant: expected exponent part 'p'
+# CHECK-ERROR: unexpected token in directive
+.float 0x1.2
diff --git a/test/MC/AsmParser/lit.local.cfg b/test/MC/AsmParser/lit.local.cfg
index 6c49f08..ba763cf 100644
--- a/test/MC/AsmParser/lit.local.cfg
+++ b/test/MC/AsmParser/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/MC/COFF/bss_section.ll b/test/MC/COFF/bss_section.ll
new file mode 100644
index 0000000..60924f1
--- /dev/null
+++ b/test/MC/COFF/bss_section.ll
@@ -0,0 +1,6 @@
+; RUN: llc -mtriple i386-pc-win32 < %s | FileCheck %s
+
+%struct.foo = type { i32, i32 }
+
+@"\01?thingy@@3Ufoo@@B" = global %struct.foo zeroinitializer, align 4
+; CHECK: .bss
diff --git a/test/MC/COFF/eh-frame.s b/test/MC/COFF/eh-frame.s
new file mode 100644
index 0000000..e606b76
--- /dev/null
+++ b/test/MC/COFF/eh-frame.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s -o - | llvm-readobj -s | FileCheck %s
+
+	.def	 _main;
+	.scl	2;
+	.type	32;
+	.endef
+	.text
+	.globl	_main
+_main:
+	.cfi_startproc
+	ret
+	.cfi_endproc
+
+// CHECK:    Name: .eh_frame
diff --git a/test/MC/COFF/feat00.s b/test/MC/COFF/feat00.s
new file mode 100644
index 0000000..d08f407
--- /dev/null
+++ b/test/MC/COFF/feat00.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s -o - | llvm-readobj -t | FileCheck %s
+
+"@feat.00" = 123
+.globl @feat.00
+
+// CHECK: Symbol {
+// CHECK:   Name: @feat.00
+// CHECK:   Value: 123
+// CHECK:   Section: (-1)
+// CHECK:   BaseType: Null (0x0)
+// CHECK:   ComplexType: Null (0x0)
+// CHECK:   StorageClass: External (0x2)
+// CHECK:   AuxSymbolCount: 0
+// CHECK: }
diff --git a/test/MC/COFF/lit.local.cfg b/test/MC/COFF/lit.local.cfg
index 41a8434..ba763cf 100644
--- a/test/MC/COFF/lit.local.cfg
+++ b/test/MC/COFF/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.s', '.ll']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/MC/COFF/lset0.s b/test/MC/COFF/lset0.s
new file mode 100755
index 0000000..f5020c8
--- /dev/null
+++ b/test/MC/COFF/lset0.s
@@ -0,0 +1,12 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s -o - | llvm-nm | FileCheck %s
+
+not_global = 123
+global = 456
+.globl global
+.Llocal = 789
+
+// CHECK-NOT: not_global
+// CHECK-NOT: Llocal
+// CHECK: global
+// CHECK-NOT: not_global
+// CHECK-NOT: Llocal
diff --git a/test/MC/COFF/rdata.ll b/test/MC/COFF/rdata.ll
new file mode 100644
index 0000000..f041781
--- /dev/null
+++ b/test/MC/COFF/rdata.ll
@@ -0,0 +1,6 @@
+; RUN: llc -mtriple i386-pc-win32 < %s | FileCheck %s
+
+%struct.foo = type { i32, i32 }
+
+@"\01?thingy@@3Ufoo@@B" = constant %struct.foo zeroinitializer, align 4
+; CHECK: .section        .rdata
diff --git a/test/MC/COFF/section-comdat.s b/test/MC/COFF/section-comdat.s
new file mode 100644
index 0000000..dd5be87
--- /dev/null
+++ b/test/MC/COFF/section-comdat.s
@@ -0,0 +1,188 @@
+// RUN: llvm-mc -triple i386-pc-win32 -filetype=obj %s | llvm-readobj -s -t | FileCheck %s
+// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -s -t | FileCheck %s
+
+.section assocSec
+.linkonce
+.long 1
+
+.section secName, "dr", discard, "Symbol1"
+.globl Symbol1
+Symbol1:
+.long 1
+
+.section secName, "dr", one_only, "Symbol2"
+.globl Symbol2
+Symbol2:
+.long 1
+
+.section SecName, "dr", same_size, "Symbol3"
+.globl Symbol3
+Symbol3:
+.long 1
+
+.section SecName, "dr", same_contents, "Symbol4"
+.globl Symbol4
+Symbol4:
+.long 1
+
+.section SecName, "dr", associative assocSec, "Symbol5"
+.globl Symbol5
+Symbol5:
+.long 1
+
+.section SecName, "dr", largest, "Symbol6"
+.globl Symbol6
+Symbol6:
+.long 1
+
+.section SecName, "dr", newest, "Symbol7"
+.globl Symbol7
+Symbol7:
+.long 1
+
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Number: 1
+// CHECK:     Name: assocSec
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: 2
+// CHECK:     Name: secName
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: 3
+// CHECK:     Name: secName
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: 4
+// CHECK:     Name: SecName
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: 5
+// CHECK:     Name: SecName
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: 6
+// CHECK:     Name: SecName
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: 7
+// CHECK:     Name: SecName
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: 8
+// CHECK:     Name: SecName
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: assocSec
+// CHECK:     Section: assocSec (1)
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: Any
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: secName
+// CHECK:     Section: secName (2)
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: Any
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: secName
+// CHECK:     Section: secName (3)
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: NoDuplicates
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: SecName
+// CHECK:     Section: SecName (4)
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: SameSize
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: SecName
+// CHECK:     Section: SecName (5)
+// CHECK:     AuxSymbolCount: 1
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: ExactMatch
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: SecName
+// CHECK:     Section: SecName (6)
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: Associative
+// CHECK:       AssocSection: assocSec (1)
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: SecName
+// CHECK:     Section: SecName (7)
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: Largest
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: SecName
+// CHECK:     Section: SecName (8)
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: Newest (0x7)
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: Symbol1
+// CHECK:     Section: secName (2)
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: Symbol2
+// CHECK:     Section: secName (3)
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: Symbol3
+// CHECK:     Section: SecName (4)
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: Symbol4
+// CHECK:     Section: SecName (5)
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: Symbol5
+// CHECK:     Section: SecName (6)
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: Symbol6
+// CHECK:     Section: SecName (7)
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: Symbol7
+// CHECK:     Section: SecName (8)
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/COFF/seh-align1.s b/test/MC/COFF/seh-align1.s
new file mode 100644
index 0000000..aafc6ed
--- /dev/null
+++ b/test/MC/COFF/seh-align1.s
@@ -0,0 +1,65 @@
+// This test checks the alignment and padding of the unwind info.
+
+// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -s -sd -sr -u | FileCheck %s
+
+// CHECK:      Sections [
+// CHECK:        Section {
+// CHECK:          Name: .xdata
+// CHECK:          RawDataSize: 8
+// CHECK:          RelocationCount: 0
+// CHECK:          Characteristics [
+// CHECK-NEXT:       ALIGN_4BYTES
+// CHECK-NEXT:       CNT_INITIALIZED_DATA
+// CHECK-NEXT:       MEM_READ
+// CHECK-NEXT:     ]
+// CHECK:          Relocations [
+// CHECK-NEXT:     ]
+// CHECK:          SectionData (
+// CHECK-NEXT:       0000: 01000000 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK:        Section {
+// CHECK:          Name: .pdata
+// CHECK:          RawDataSize: 12
+// CHECK:          RelocationCount: 3
+// CHECK:          Characteristics [
+// CHECK-NEXT:       IMAGE_SCN_ALIGN_4BYTES
+// CHECK-NEXT:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK-NEXT:       IMAGE_SCN_MEM_READ
+// CHECK-NEXT:     ]
+// CHECK:          Relocations [
+// CHECK-NEXT:       [[BeginDisp:0x[A-F0-9]+]] IMAGE_REL_AMD64_ADDR32NB smallFunc
+// CHECK-NEXT:       [[EndDisp:0x[A-F0-9]+]] IMAGE_REL_AMD64_ADDR32NB smallFunc
+// CHECK-NEXT:       [[UnwindDisp:0x[A-F0-9]+]] IMAGE_REL_AMD64_ADDR32NB .xdata
+// CHECK-NEXT:     ]
+// CHECK:          SectionData (
+// CHECK-NEXT:       0000: 00000000 01000000 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK:        UnwindInformation [
+// CHECK-NEXT:     RuntimeFunction {
+// CHECK-NEXT:     StartAddress: smallFunc {{(\+0x[A-F0-9]+ )?}}([[BeginDisp]])
+// CHECK-NEXT:     EndAddress: smallFunc {{(\+0x[A-F0-9]+ )?}}([[EndDisp]])
+// CHECK-NEXT:     UnwindInfoAddress: .xdata {{(\+0x[A-F0-9]+ )?}}([[UnwindDisp]])
+// CHECK-NEXT:     UnwindInfo {
+// CHECK-NEXT:       Version: 1
+// CHECK-NEXT:       Flags [
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       PrologSize: 0
+// CHECK-NEXT:       FrameRegister: -
+// CHECK-NEXT:       FrameOffset: -
+// CHECK-NEXT:       UnwindCodeCount: 0
+// CHECK-NEXT:       UnwindCodes [
+// CHECK-NEXT:       ]
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+
+// Generate the minimal unwind info.
+// It contains only the version set to 1. All other bytes are 0.
+    .globl smallFunc
+    .def smallFunc; .scl 2; .type 32; .endef
+    .seh_proc smallFunc
+smallFunc:
+    ret
+    .seh_endproc
diff --git a/test/MC/COFF/seh-align2.s b/test/MC/COFF/seh-align2.s
new file mode 100644
index 0000000..5e6c49a
--- /dev/null
+++ b/test/MC/COFF/seh-align2.s
@@ -0,0 +1,78 @@
+// This test checks the alignment and padding of the unwind info.
+
+// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -s -sd -sr -u | FileCheck %s
+
+// CHECK:      Sections [
+// CHECK:        Section {
+// CHECK:          Name: .xdata
+// CHECK:          RawDataSize: 16
+// CHECK:          RelocationCount: 1
+// CHECK:          Characteristics [
+// CHECK-NEXT:       ALIGN_4BYTES
+// CHECK-NEXT:       CNT_INITIALIZED_DATA
+// CHECK-NEXT:       MEM_READ
+// CHECK-NEXT:     ]
+// CHECK:          Relocations [
+// CHECK-NEXT:       [[HandlerDisp:0x[A-F0-9]+]] IMAGE_REL_AMD64_ADDR32NB __C_specific_handler
+// CHECK-NEXT:     ]
+// CHECK:          SectionData (
+// CHECK-NEXT:       0000: 09000100 04220000 00000000 BEBAFECA
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK:          Name: .pdata
+// CHECK:          RawDataSize: 12
+// CHECK:          RelocationCount: 3
+// CHECK:          Characteristics [
+// CHECK-NEXT:       IMAGE_SCN_ALIGN_4BYTES
+// CHECK-NEXT:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK-NEXT:       IMAGE_SCN_MEM_READ
+// CHECK-NEXT:     ]
+// CHECK:          Relocations [
+// CHECK-NEXT:       [[BeginDisp:0x[A-F0-9]+]] IMAGE_REL_AMD64_ADDR32NB func
+// CHECK-NEXT:       [[EndDisp:0x[A-F0-9]+]] IMAGE_REL_AMD64_ADDR32NB func
+// CHECK-NEXT:       [[UnwindDisp:0x[A-F0-9]+]] IMAGE_REL_AMD64_ADDR32NB .xdata
+// CHECK-NEXT:     ]
+// CHECK:          SectionData (
+// CHECK-NEXT:       0000: FCFFFFFF 05000000 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+// CHECK:      UnwindInformation [
+// CHECK-NEXT:   RuntimeFunction {
+// CHECK-NEXT:     StartAddress: func {{(\+0x[A-F0-9]+ )?}}([[BeginDisp]])
+// CHECK-NEXT:     EndAddress: func {{(\+0x[A-F0-9]+ )?}}([[EndDisp]])
+// CHECK-NEXT:     UnwindInfoAddress: .xdata {{(\+0x[A-F0-9]+ )?}}([[UnwindDisp]])
+// CHECK-NEXT:     UnwindInfo {
+// CHECK-NEXT:       Version: 1
+// CHECK-NEXT:       Flags [
+// CHECK-NEXT:         ExceptionHandler
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       PrologSize: 0
+// CHECK-NEXT:       FrameRegister: -
+// CHECK-NEXT:       FrameOffset: -
+// CHECK-NEXT:       UnwindCodeCount: 1
+// CHECK-NEXT:       UnwindCodes [
+// CHECK-NEXT:         0x04: ALLOC_SMALL size=24
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       Handler: __C_specific_handler ([[HandlerDisp]])
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+
+// Generates only one unwind code.
+// Requires padding of the unwind code array.
+    .globl func
+    .def func; .scl 2; .type 32; .endef
+    .seh_proc func
+    subq $24, %rsp
+    .seh_stackalloc 24
+    .seh_handler __C_specific_handler, @except
+    .seh_handlerdata
+    .long 0xcafebabe
+    .text
+    .seh_endprologue
+func:
+    addq $24, %rsp
+    ret
+    .seh_endproc
diff --git a/test/MC/COFF/seh-align3.s b/test/MC/COFF/seh-align3.s
new file mode 100644
index 0000000..238b5de
--- /dev/null
+++ b/test/MC/COFF/seh-align3.s
@@ -0,0 +1,83 @@
+// This test checks the alignment and padding of the unwind info.
+
+// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -s -sd -sr -u | FileCheck %s
+
+// CHECK:      Sections [
+// CHECK:        Section {
+// CHECK:          Name: .xdata
+// CHECK:          RawDataSize: 16
+// CHECK:          RelocationCount: 1
+// CHECK:          Characteristics [
+// CHECK-NEXT:       ALIGN_4BYTES
+// CHECK-NEXT:       CNT_INITIALIZED_DATA
+// CHECK-NEXT:       MEM_READ
+// CHECK-NEXT:     ]
+// CHECK:          Relocations [
+// CHECK-NEXT:       [[HandlerDisp:0x[A-F0-9]+]] IMAGE_REL_AMD64_ADDR32NB __C_specific_handler
+// CHECK-NEXT:     ]
+// CHECK:          SectionData (
+// CHECK-NEXT:       0000: 19000200 04D002C0 00000000 BEBAFECA
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK:          Name: .pdata
+// CHECK:          RawDataSize: 12
+// CHECK:          RelocationCount: 3
+// CHECK:          Characteristics [
+// CHECK-NEXT:       IMAGE_SCN_ALIGN_4BYTES
+// CHECK-NEXT:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK-NEXT:       IMAGE_SCN_MEM_READ
+// CHECK-NEXT:     ]
+// CHECK:          Relocations [
+// CHECK-NEXT:       [[BeginDisp:0x[A-F0-9]+]] IMAGE_REL_AMD64_ADDR32NB func
+// CHECK-NEXT:       [[EndDisp:0x[A-F0-9]+]] IMAGE_REL_AMD64_ADDR32NB func
+// CHECK-NEXT:       [[UnwindDisp:0x[A-F0-9]+]] IMAGE_REL_AMD64_ADDR32NB .xdata
+// CHECK-NEXT:     ]
+// CHECK:          SectionData (
+// CHECK-NEXT:       0000: FCFFFFFF 05000000 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+// CHECK:      UnwindInformation [
+// CHECK-NEXT:   RuntimeFunction {
+// CHECK-NEXT:     StartAddress: func {{(\+0x[A-F0-9]+ )?}}([[BeginDisp]])
+// CHECK-NEXT:     EndAddress: func {{(\+0x[A-F0-9]+ )?}}([[EndDisp]])
+// CHECK-NEXT:     UnwindInfoAddress: .xdata {{(\+0x[A-F0-9]+ )?}}([[UnwindDisp]])
+// CHECK-NEXT:     UnwindInfo {
+// CHECK-NEXT:       Version: 1
+// CHECK-NEXT:       Flags [
+// CHECK-NEXT:         ExceptionHandler
+// CHECK-NEXT:         TerminateHandler
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       PrologSize: 0
+// CHECK-NEXT:       FrameRegister: -
+// CHECK-NEXT:       FrameOffset: -
+// CHECK-NEXT:       UnwindCodeCount: 2
+// CHECK-NEXT:       UnwindCodes [
+// CHECK-NEXT:         0x04: PUSH_NONVOL reg=R13
+// CHECK-NEXT:         0x02: PUSH_NONVOL reg=R12
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       Handler: __C_specific_handler ([[HandlerDisp]])
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+
+// Generates two unwind codes.
+// Requires no padding of the unwind code array.
+    .globl func
+    .def func; .scl 2; .type 32; .endef
+    .seh_proc func
+    push %r12
+    .seh_pushreg 12
+    push %r13
+    .seh_pushreg 13
+    .seh_handler __C_specific_handler, @except, @unwind
+    .seh_handlerdata
+    .long 0xcafebabe
+    .text
+    .seh_endprologue
+func:
+    pop %r13
+    pop %r12
+    ret
+    .seh_endproc
diff --git a/test/MC/COFF/seh.s b/test/MC/COFF/seh.s
index bef425e..72d42f4 100644
--- a/test/MC/COFF/seh.s
+++ b/test/MC/COFF/seh.s
@@ -1,8 +1,6 @@
 // This test checks that the SEH directives emit the correct unwind data.
 
-// TODO: Expected fail because SET_FPREG has a wrong offset.
-// XFAIL: *
-// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -s -u | FileCheck %s
+// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -s -u -r | FileCheck %s
 
 // CHECK:      Sections [
 // CHECK:        Section {
@@ -36,6 +34,27 @@
 // CHECK-NEXT:   }
 // CHECK-NEXT: ]
 
+// CHECK-NEXT: Relocations [
+// CHECK-NEXT:   Section (2) .xdata {
+// CHECK-NEXT:     0x14 IMAGE_REL_AMD64_ADDR32NB __C_specific_handler
+// CHECK-NEXT:     0x20 IMAGE_REL_AMD64_ADDR32NB func
+// CHECK-NEXT:     0x24 IMAGE_REL_AMD64_ADDR32NB func
+// CHECK-NEXT:     0x28 IMAGE_REL_AMD64_ADDR32NB .xdata
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section (3) .pdata {
+// CHECK-NEXT:     0x0 IMAGE_REL_AMD64_ADDR32NB func
+// CHECK-NEXT:     0x4 IMAGE_REL_AMD64_ADDR32NB func
+// CHECK-NEXT:     0x8 IMAGE_REL_AMD64_ADDR32NB .xdata
+// CHECK-NEXT:     0xC IMAGE_REL_AMD64_ADDR32NB func
+// CHECK-NEXT:     0x10 IMAGE_REL_AMD64_ADDR32NB func
+// CHECK-NEXT:     0x14 IMAGE_REL_AMD64_ADDR32NB .xdata
+// CHECK-NEXT:     0x18 IMAGE_REL_AMD64_ADDR32NB smallFunc
+// CHECK-NEXT:     0x1C IMAGE_REL_AMD64_ADDR32NB smallFunc
+// CHECK-NEXT:     0x20 IMAGE_REL_AMD64_ADDR32NB .xdata
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+
+
 // CHECK:      UnwindInformation [
 // CHECK-NEXT:   RuntimeFunction {
 // CHECK-NEXT:     StartAddress: [[CodeSect1:[^ ]+]] [[BeginDisp1:(\+0x[A-F0-9]+)?]]
diff --git a/test/MC/COFF/tricky-names.ll b/test/MC/COFF/tricky-names.ll
new file mode 100644
index 0000000..6e041d3
--- /dev/null
+++ b/test/MC/COFF/tricky-names.ll
@@ -0,0 +1,38 @@
+; Check how tricky symbols are printed in the asm output.
+; RUN: llc -mtriple=i686-pc-win32 %s -o - | FileCheck %s --check-prefix=ASM
+
+; Check that we can roundtrip these names through our assembler.
+; RUN: llc -mtriple=i686-pc-win32 %s -o - | llvm-mc -triple i686-pc-win32 -filetype=obj | llvm-readobj -t | FileCheck %s --check-prefix=READOBJ
+
+
+@"\01??__E_Generic_object@?$_Error_objects@H@std@@YAXXZ" = global i32 0
+@"\01__ZL16ExceptionHandlerP19_EXCEPTION_POINTERS@4" = global i32 0
+@"\01@foo.bar" = global i32 0
+
+define weak i32 @"\01??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51"() section ".text" {
+  %a = load i32* @"\01??__E_Generic_object@?$_Error_objects@H@std@@YAXXZ"
+  %b = load i32* @"\01__ZL16ExceptionHandlerP19_EXCEPTION_POINTERS@4"
+  %c = load i32* @"\01@foo.bar"
+  %x = add i32 %a, %b
+  %y = add i32 %x, %c
+  ret i32 %y
+}
+
+; Check that these symbols are not quoted. They occur in output that gets passed to GAS.
+; ASM: .globl __ZL16ExceptionHandlerP19_EXCEPTION_POINTERS@4
+; ASM-NOT: .globl "__ZL16ExceptionHandlerP19_EXCEPTION_POINTERS@4"
+; ASM: .globl @foo.bar
+; ASM-NOT: .globl "@foo.bar"
+
+; READOBJ: Symbol
+; READOBJ: Name: .text$??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51
+; READOBJ: Section: .text$??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51
+; READOBJ: Symbol
+; READOBJ: Name: ??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51
+; READOBJ: Section: .text$??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51
+; READOBJ: Symbol
+; READOBJ: Name: ??__E_Generic_object@?$_Error_objects@H@std@@YAXXZ
+; READOBJ: Symbol
+; READOBJ: Name: __ZL16ExceptionHandlerP19_EXCEPTION_POINTERS@4
+; READOBJ: Symbol
+; READOBJ: Name: @foo.bar
diff --git a/test/MC/Disassembler/AArch64/a64-ignored-fields.txt b/test/MC/Disassembler/AArch64/a64-ignored-fields.txt
index 966530d..799ecdf 100644
--- a/test/MC/Disassembler/AArch64/a64-ignored-fields.txt
+++ b/test/MC/Disassembler/AArch64/a64-ignored-fields.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -triple=aarch64 -disassemble -show-encoding < %s | FileCheck %s
+# RUN: llvm-mc -triple=aarch64 -mattr=fp-armv8 -disassemble -show-encoding < %s | FileCheck %s
 
 # The "Rm" bits are ignored, but the canonical representation has them filled
 # with 0s. This is what we should produce even if the input bit-pattern had
diff --git a/test/MC/Disassembler/AArch64/basic-a64-instructions.txt b/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
index 4fa2d50..40926b1 100644
--- a/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
+++ b/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -triple=aarch64 -disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple=aarch64 -mattr=+fp-armv8 -disassemble < %s | FileCheck %s
 
 #------------------------------------------------------------------------------
 # Add/sub (immediate)
diff --git a/test/MC/Disassembler/AArch64/basic-a64-unpredictable.txt b/test/MC/Disassembler/AArch64/basic-a64-unpredictable.txt
index adb8f75..5363863 100644
--- a/test/MC/Disassembler/AArch64/basic-a64-unpredictable.txt
+++ b/test/MC/Disassembler/AArch64/basic-a64-unpredictable.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -triple=aarch64 -disassemble < %s 2>&1 | FileCheck %s
+# RUN: llvm-mc -triple=aarch64 -mattr=+fp-armv8 -disassemble < %s 2>&1 | FileCheck %s
 
 #------------------------------------------------------------------------------
 # Load-store exclusive
diff --git a/test/MC/Disassembler/AArch64/ldp-postind.predictable.txt b/test/MC/Disassembler/AArch64/ldp-postind.predictable.txt
index 775660b..637ebdb 100644
--- a/test/MC/Disassembler/AArch64/ldp-postind.predictable.txt
+++ b/test/MC/Disassembler/AArch64/ldp-postind.predictable.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -triple=aarch64 -disassemble < %s 2>&1 | FileCheck %s
+# RUN: llvm-mc -triple=aarch64 -mattr=+fp-armv8 -disassemble < %s 2>&1 | FileCheck %s
 
 # None of these instructions should be classified as unpredictable:
 
diff --git a/test/MC/Disassembler/AArch64/ldp-preind.predictable.txt b/test/MC/Disassembler/AArch64/ldp-preind.predictable.txt
index 48ea817..f52d37f 100644
--- a/test/MC/Disassembler/AArch64/ldp-preind.predictable.txt
+++ b/test/MC/Disassembler/AArch64/ldp-preind.predictable.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -triple=aarch64 -disassemble < %s 2>&1 | FileCheck %s
+# RUN: llvm-mc -triple=aarch64 -mattr=+fp-armv8 -disassemble < %s 2>&1 | FileCheck %s
 
 # None of these instructions should be classified as unpredictable:
 
diff --git a/test/MC/Disassembler/AArch64/lit.local.cfg b/test/MC/Disassembler/AArch64/lit.local.cfg
index f9df30e..9a66a00 100644
--- a/test/MC/Disassembler/AArch64/lit.local.cfg
+++ b/test/MC/Disassembler/AArch64/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.txt']
-
 targets = set(config.root.targets_to_build.split())
 if not 'AArch64' in targets:
     config.unsupported = True
diff --git a/test/MC/Disassembler/AArch64/neon-instructions.txt b/test/MC/Disassembler/AArch64/neon-instructions.txt
index 40d1f4c..863730a 100644
--- a/test/MC/Disassembler/AArch64/neon-instructions.txt
+++ b/test/MC/Disassembler/AArch64/neon-instructions.txt
@@ -131,8 +131,11 @@
 #------------------------------------------------------------------------------
 # Vector Move - register
 #------------------------------------------------------------------------------
-# CHECK: mov v1.16b, v15.16b
-# CHECK: mov v25.8b, v4.8b
+
+# FIXME: these should print as "mov", but TableGen can't handle it.
+
+# CHECK: orr v1.16b, v15.16b, v15.16b
+# CHECK: orr v25.8b, v4.8b, v4.8b
 0xe1 0x1d 0xaf 0x4e
 0x99 0x1c 0xa4 0x0e
 
@@ -671,3 +674,1965 @@
 0xf5 0xdd 0x23 0x4e
 0xab 0xdc 0x77 0x4e
 
+#----------------------------------------------------------------------
+# Vector Shift Left long 
+#----------------------------------------------------------------------
+# CHECK: shll2	v2.8h, v4.16b, #8
+# CHECK: shll2	v6.4s, v8.8h, #16
+# CHECK: shll2	v6.2d, v8.4s, #32
+# CHECK: shll	v2.8h, v4.8b, #8
+# CHECK: shll	v6.4s, v8.4h, #16
+# CHECK: shll	v6.2d, v8.2s, #32
+
+0x82,0x38,0x21,0x6e
+0x06,0x39,0x61,0x6e
+0x06,0x39,0xa1,0x6e
+0x82,0x38,0x21,0x2e
+0x06,0x39,0x61,0x2e
+0x06,0x39,0xa1,0x2e
+
+#----------------------------------------------------------------------
+# Vector Shift Left by Immediate
+#----------------------------------------------------------------------
+# CHECK: shl v0.4h, v1.4h, #3
+# CHECK: shl v0.16b, v1.16b, #3
+# CHECK: shl v0.4s, v1.4s, #3
+# CHECK: shl v0.2d, v1.2d, #3
+0x20,0x54,0x13,0x0f
+0x20,0x54,0x0b,0x4f
+0x20,0x54,0x23,0x4f
+0x20,0x54,0x43,0x4f
+
+#----------------------------------------------------------------------
+# Vector Shift Left Long (Signed, Unsigned) by Immediate
+#----------------------------------------------------------------------
+# CHECK: sshll v0.2d, v1.2s, #3
+# CHECK: sshll2 v0.4s, v1.8h, #3
+# CHECK: ushll v0.4s, v1.4h, #3
+# CHECK: ushll2 v0.8h, v1.16b, #3
+0x20 0xa4 0x23 0x0f
+0x20 0xa4 0x13 0x4f
+0x20 0xa4 0x13 0x2f
+0x20 0xa4 0x0b 0x6f
+
+#-----------------------------------------------------------------------------
+#Integer shift right (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: sshr v0.8b, v1.8b, #3
+# CHECK: sshr v0.4h, v1.4h, #3
+# CHECK: sshr v0.2s, v1.2s, #3
+# CHECK: sshr v0.16b, v1.16b, #3
+# CHECK: sshr v0.8h, v1.8h, #3
+# CHECK: sshr v0.4s, v1.4s, #3
+# CHECK: sshr v0.2d, v1.2d, #3
+0x20,0x04,0x0d,0x0f
+0x20,0x04,0x1d,0x0f
+0x20,0x04,0x3d,0x0f
+0x20,0x04,0x0d,0x4f
+0x20,0x04,0x1d,0x4f
+0x20,0x04,0x3d,0x4f
+0x20,0x04,0x7d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer shift right (Unsigned)
+#-----------------------------------------------------------------------------
+# CHECK: ushr v0.8b, v1.8b, #3
+# CHECK: ushr v0.4h, v1.4h, #3
+# CHECK: ushr v0.2s, v1.2s, #3
+# CHECK: ushr v0.16b, v1.16b, #3
+# CHECK: ushr v0.8h, v1.8h, #3
+# CHECK: ushr v0.4s, v1.4s, #3
+# CHECK: ushr v0.2d, v1.2d, #3
+0x20,0x04,0x0d,0x2f
+0x20,0x04,0x1d,0x2f
+0x20,0x04,0x3d,0x2f
+0x20,0x04,0x0d,0x6f
+0x20,0x04,0x1d,0x6f
+0x20,0x04,0x3d,0x6f
+0x20,0x04,0x7d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer shift right and accumulate (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: ssra v0.8b, v1.8b, #3
+# CHECK: ssra v0.4h, v1.4h, #3
+# CHECK: ssra v0.2s, v1.2s, #3
+# CHECK: ssra v0.16b, v1.16b, #3
+# CHECK: ssra v0.8h, v1.8h, #3
+# CHECK: ssra v0.4s, v1.4s, #3
+# CHECK: ssra v0.2d, v1.2d, #3
+0x20,0x14,0x0d,0x0f
+0x20,0x14,0x1d,0x0f
+0x20,0x14,0x3d,0x0f
+0x20,0x14,0x0d,0x4f
+0x20,0x14,0x1d,0x4f
+0x20,0x14,0x3d,0x4f
+0x20,0x14,0x7d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer shift right and accumulate (Unsigned)
+#-----------------------------------------------------------------------------
+# CHECK: usra v0.8b, v1.8b, #3
+# CHECK: usra v0.4h, v1.4h, #3
+# CHECK: usra v0.2s, v1.2s, #3
+# CHECK: usra v0.16b, v1.16b, #3
+# CHECK: usra v0.8h, v1.8h, #3
+# CHECK: usra v0.4s, v1.4s, #3
+# CHECK: usra v0.2d, v1.2d, #3
+0x20,0x14,0x0d,0x2f
+0x20,0x14,0x1d,0x2f
+0x20,0x14,0x3d,0x2f
+0x20,0x14,0x0d,0x6f
+0x20,0x14,0x1d,0x6f
+0x20,0x14,0x3d,0x6f
+0x20,0x14,0x7d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer rounding shift right (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: srshr v0.8b, v1.8b, #3
+# CHECK: srshr v0.4h, v1.4h, #3
+# CHECK: srshr v0.2s, v1.2s, #3
+# CHECK: srshr v0.16b, v1.16b, #3
+# CHECK: srshr v0.8h, v1.8h, #3
+# CHECK: srshr v0.4s, v1.4s, #3
+# CHECK: srshr v0.2d, v1.2d, #3
+0x20,0x24,0x0d,0x0f
+0x20,0x24,0x1d,0x0f
+0x20,0x24,0x3d,0x0f
+0x20,0x24,0x0d,0x4f
+0x20,0x24,0x1d,0x4f
+0x20,0x24,0x3d,0x4f
+0x20,0x24,0x7d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer rounding shift right (Unsigned)
+#-----------------------------------------------------------------------------
+# CHECK: urshr v0.8b, v1.8b, #3
+# CHECK: urshr v0.4h, v1.4h, #3
+# CHECK: urshr v0.2s, v1.2s, #3
+# CHECK: urshr v0.16b, v1.16b, #3
+# CHECK: urshr v0.8h, v1.8h, #3
+# CHECK: urshr v0.4s, v1.4s, #3
+# CHECK: urshr v0.2d, v1.2d, #3
+0x20,0x24,0x0d,0x2f
+0x20,0x24,0x1d,0x2f
+0x20,0x24,0x3d,0x2f
+0x20,0x24,0x0d,0x6f
+0x20,0x24,0x1d,0x6f
+0x20,0x24,0x3d,0x6f
+0x20,0x24,0x7d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer rounding shift right and accumulate (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: srsra v0.8b, v1.8b, #3
+# CHECK: srsra v0.4h, v1.4h, #3
+# CHECK: srsra v0.2s, v1.2s, #3
+# CHECK: srsra v0.16b, v1.16b, #3
+# CHECK: srsra v0.8h, v1.8h, #3
+# CHECK: srsra v0.4s, v1.4s, #3
+# CHECK: srsra v0.2d, v1.2d, #3
+0x20,0x34,0x0d,0x0f
+0x20,0x34,0x1d,0x0f
+0x20,0x34,0x3d,0x0f
+0x20,0x34,0x0d,0x4f
+0x20,0x34,0x1d,0x4f
+0x20,0x34,0x3d,0x4f
+0x20,0x34,0x7d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer rounding shift right and accumulate (Unsigned)
+#-----------------------------------------------------------------------------
+# CHECK: ursra v0.8b, v1.8b, #3
+# CHECK: ursra v0.4h, v1.4h, #3
+# CHECK: ursra v0.2s, v1.2s, #3
+# CHECK: ursra v0.16b, v1.16b, #3
+# CHECK: ursra v0.8h, v1.8h, #3
+# CHECK: ursra v0.4s, v1.4s, #3
+# CHECK: ursra v0.2d, v1.2d, #3
+0x20,0x34,0x0d,0x2f
+0x20,0x34,0x1d,0x2f
+0x20,0x34,0x3d,0x2f
+0x20,0x34,0x0d,0x6f
+0x20,0x34,0x1d,0x6f
+0x20,0x34,0x3d,0x6f
+0x20,0x34,0x7d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer shift right and insert
+#-----------------------------------------------------------------------------
+# CHECK: sri v0.8b, v1.8b, #3
+# CHECK: sri v0.4h, v1.4h, #3
+# CHECK: sri v0.2s, v1.2s, #3
+# CHECK: sri v0.16b, v1.16b, #3
+# CHECK: sri v0.8h, v1.8h, #3
+# CHECK: sri v0.4s, v1.4s, #3
+# CHECK: sri v0.2d, v1.2d, #3
+0x20,0x44,0x0d,0x2f
+0x20,0x44,0x1d,0x2f
+0x20,0x44,0x3d,0x2f
+0x20,0x44,0x0d,0x6f
+0x20,0x44,0x1d,0x6f
+0x20,0x44,0x3d,0x6f
+0x20,0x44,0x7d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer shift left and insert
+#-----------------------------------------------------------------------------
+# CHECK: sli v0.8b, v1.8b, #3
+# CHECK: sli v0.4h, v1.4h, #3
+# CHECK: sli v0.2s, v1.2s, #3
+# CHECK: sli v0.16b, v1.16b, #3
+# CHECK: sli v0.8h, v1.8h, #3
+# CHECK: sli v0.4s, v1.4s, #3
+# CHECK: sli v0.2d, v1.2d, #3
+0x20,0x54,0x0b,0x2f
+0x20,0x54,0x13,0x2f
+0x20,0x54,0x23,0x2f
+0x20,0x54,0x0b,0x6f
+0x20,0x54,0x13,0x6f
+0x20,0x54,0x23,0x6f
+0x20,0x54,0x43,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift left unsigned
+#-----------------------------------------------------------------------------
+# CHECK: sqshlu v0.8b, v1.8b, #3
+# CHECK: sqshlu v0.4h, v1.4h, #3
+# CHECK: sqshlu v0.2s, v1.2s, #3
+# CHECK: sqshlu v0.16b, v1.16b, #3
+# CHECK: sqshlu v0.8h, v1.8h, #3
+# CHECK: sqshlu v0.4s, v1.4s, #3
+# CHECK: sqshlu v0.2d, v1.2d, #3
+0x20,0x64,0x0b,0x2f
+0x20,0x64,0x13,0x2f
+0x20,0x64,0x23,0x2f
+0x20,0x64,0x0b,0x6f
+0x20,0x64,0x13,0x6f
+0x20,0x64,0x23,0x6f
+0x20,0x64,0x43,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift left (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: sqshl v0.8b, v1.8b, #3
+# CHECK: sqshl v0.4h, v1.4h, #3
+# CHECK: sqshl v0.2s, v1.2s, #3
+# CHECK: sqshl v0.16b, v1.16b, #3
+# CHECK: sqshl v0.8h, v1.8h, #3
+# CHECK: sqshl v0.4s, v1.4s, #3
+# CHECK: sqshl v0.2d, v1.2d, #3
+0x20,0x74,0x0b,0x0f
+0x20,0x74,0x13,0x0f
+0x20,0x74,0x23,0x0f
+0x20,0x74,0x0b,0x4f
+0x20,0x74,0x13,0x4f
+0x20,0x74,0x23,0x4f
+0x20,0x74,0x43,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift left (Unsigned)
+#-----------------------------------------------------------------------------
+# CHECK: uqshl v0.8b, v1.8b, #3
+# CHECK: uqshl v0.4h, v1.4h, #3
+# CHECK: uqshl v0.2s, v1.2s, #3
+# CHECK: uqshl v0.16b, v1.16b, #3
+# CHECK: uqshl v0.8h, v1.8h, #3
+# CHECK: uqshl v0.4s, v1.4s, #3
+# CHECK: uqshl v0.2d, v1.2d, #3
+0x20,0x74,0x0b,0x2f
+0x20,0x74,0x13,0x2f
+0x20,0x74,0x23,0x2f
+0x20,0x74,0x0b,0x6f
+0x20,0x74,0x13,0x6f
+0x20,0x74,0x23,0x6f
+0x20,0x74,0x43,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer shift right narrow
+#-----------------------------------------------------------------------------
+# CHECK: shrn v0.8b, v1.8h, #3
+# CHECK: shrn v0.4h, v1.4s, #3
+# CHECK: shrn v0.2s, v1.2d, #3
+# CHECK: shrn2 v0.16b, v1.8h, #3
+# CHECK: shrn2 v0.8h, v1.4s, #3
+# CHECK: shrn2 v0.4s, v1.2d, #3
+0x20,0x84,0x0d,0x0f
+0x20,0x84,0x1d,0x0f
+0x20,0x84,0x3d,0x0f
+0x20,0x84,0x0d,0x4f
+0x20,0x84,0x1d,0x4f
+0x20,0x84,0x3d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift right unsigned narrow (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: sqshrun v0.8b, v1.8h, #3
+# CHECK: sqshrun v0.4h, v1.4s, #3
+# CHECK: sqshrun v0.2s, v1.2d, #3
+# CHECK: sqshrun2 v0.16b, v1.8h, #3
+# CHECK: sqshrun2 v0.8h, v1.4s, #3
+# CHECK: sqshrun2 v0.4s, v1.2d, #3
+0x20,0x84,0x0d,0x2f
+0x20,0x84,0x1d,0x2f
+0x20,0x84,0x3d,0x2f
+0x20,0x84,0x0d,0x6f
+0x20,0x84,0x1d,0x6f
+0x20,0x84,0x3d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer rounding shift right narrow
+#-----------------------------------------------------------------------------
+# CHECK: rshrn v0.8b, v1.8h, #3
+# CHECK: rshrn v0.4h, v1.4s, #3
+# CHECK: rshrn v0.2s, v1.2d, #3
+# CHECK: rshrn2 v0.16b, v1.8h, #3
+# CHECK: rshrn2 v0.8h, v1.4s, #3
+# CHECK: rshrn2 v0.4s, v1.2d, #3
+0x20,0x8c,0x0d,0x0f
+0x20,0x8c,0x1d,0x0f
+0x20,0x8c,0x3d,0x0f
+0x20,0x8c,0x0d,0x4f
+0x20,0x8c,0x1d,0x4f
+0x20,0x8c,0x3d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift right rounded unsigned narrow (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: sqrshrun v0.8b, v1.8h, #3
+# CHECK: sqrshrun v0.4h, v1.4s, #3
+# CHECK: sqrshrun v0.2s, v1.2d, #3
+# CHECK: sqrshrun2 v0.16b, v1.8h, #3
+# CHECK: sqrshrun2 v0.8h, v1.4s, #3
+# CHECK: sqrshrun2 v0.4s, v1.2d, #3
+0x20,0x8c,0x0d,0x2f
+0x20,0x8c,0x1d,0x2f
+0x20,0x8c,0x3d,0x2f
+0x20,0x8c,0x0d,0x6f
+0x20,0x8c,0x1d,0x6f
+0x20,0x8c,0x3d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift right narrow (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: sqshrn v0.8b, v1.8h, #3
+# CHECK: sqshrn v0.4h, v1.4s, #3
+# CHECK: sqshrn v0.2s, v1.2d, #3
+# CHECK: sqshrn2 v0.16b, v1.8h, #3
+# CHECK: sqshrn2 v0.8h, v1.4s, #3
+# CHECK: sqshrn2 v0.4s, v1.2d, #3
+0x20,0x94,0x0d,0x0f
+0x20,0x94,0x1d,0x0f
+0x20,0x94,0x3d,0x0f
+0x20,0x94,0x0d,0x4f
+0x20,0x94,0x1d,0x4f
+0x20,0x94,0x3d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift right narrow (Unsigned)
+#-----------------------------------------------------------------------------
+# CHECK: uqshrn v0.8b, v1.8h, #3
+# CHECK: uqshrn v0.4h, v1.4s, #3
+# CHECK: uqshrn v0.2s, v1.2d, #3
+# CHECK: uqshrn2 v0.16b, v1.8h, #3
+# CHECK: uqshrn2 v0.8h, v1.4s, #3
+# CHECK: uqshrn2 v0.4s, v1.2d, #3
+0x20,0x94,0x0d,0x2f
+0x20,0x94,0x1d,0x2f
+0x20,0x94,0x3d,0x2f
+0x20,0x94,0x0d,0x6f
+0x20,0x94,0x1d,0x6f
+0x20,0x94,0x3d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift right rounded narrow (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: sqrshrn v0.8b, v1.8h, #3
+# CHECK: sqrshrn v0.4h, v1.4s, #3
+# CHECK: sqrshrn v0.2s, v1.2d, #3
+# CHECK: sqrshrn2 v0.16b, v1.8h, #3
+# CHECK: sqrshrn2 v0.8h, v1.4s, #3
+# CHECK: sqrshrn2 v0.4s, v1.2d, #3
+0x20,0x9c,0x0d,0x0f
+0x20,0x9c,0x1d,0x0f
+0x20,0x9c,0x3d,0x0f
+0x20,0x9c,0x0d,0x4f
+0x20,0x9c,0x1d,0x4f
+0x20,0x9c,0x3d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift right rounded narrow (Unsigned)
+#-----------------------------------------------------------------------------
+# CHECK: uqrshrn v0.8b, v1.8h, #3
+# CHECK: uqrshrn v0.4h, v1.4s, #3
+# CHECK: uqrshrn v0.2s, v1.2d, #3
+# CHECK: uqrshrn2 v0.16b, v1.8h, #3
+# CHECK: uqrshrn2 v0.8h, v1.4s, #3
+# CHECK: uqrshrn2 v0.4s, v1.2d, #3
+0x20,0x9c,0x0d,0x2f
+0x20,0x9c,0x1d,0x2f
+0x20,0x9c,0x3d,0x2f
+0x20,0x9c,0x0d,0x6f
+0x20,0x9c,0x1d,0x6f
+0x20,0x9c,0x3d,0x6f
+
+#-----------------------------------------------------------------------------
+#Fixed-point convert to floating-point
+#-----------------------------------------------------------------------------
+# CHECK: scvtf v0.2s, v1.2s, #3
+# CHECK: scvtf v0.4s, v1.4s, #3
+# CHECK: scvtf v0.2d, v1.2d, #3
+# CHECK: ucvtf v0.2s, v1.2s, #3
+# CHECK: ucvtf v0.4s, v1.4s, #3
+# CHECK: ucvtf v0.2d, v1.2d, #3
+
+0x20,0xe4,0x3d,0x0f
+0x20,0xe4,0x3d,0x4f
+0x20,0xe4,0x7d,0x4f
+0x20,0xe4,0x3d,0x2f
+0x20,0xe4,0x3d,0x6f
+0x20,0xe4,0x7d,0x6f
+
+#-----------------------------------------------------------------------------
+#Floating-point convert to fixed-point
+#-----------------------------------------------------------------------------
+# CHECK: fcvtzs v0.2s, v1.2s, #3
+# CHECK: fcvtzs v0.4s, v1.4s, #3
+# CHECK: fcvtzs v0.2d, v1.2d, #3
+# CHECK: fcvtzu v0.2s, v1.2s, #3
+# CHECK: fcvtzu v0.4s, v1.4s, #3
+# CHECK: fcvtzu v0.2d, v1.2d, #3
+0x20,0xfc,0x3d,0x0f
+0x20,0xfc,0x3d,0x4f
+0x20,0xfc,0x7d,0x4f
+0x20,0xfc,0x3d,0x2f
+0x20,0xfc,0x3d,0x6f
+0x20,0xfc,0x7d,0x6f
+
+
+#------------------------------------------------------------------------------
+# Vector with 3 operands having different data types
+#------------------------------------------------------------------------------
+
+#------------------------------------------------------------------------------
+# Long
+#------------------------------------------------------------------------------
+
+#------------------------------------------------------------------------------
+# Long - Variant 1
+#------------------------------------------------------------------------------
+
+# CHECK: saddl v0.8h, v1.8b, v2.8b
+# CHECK: saddl v0.4s, v1.4h, v2.4h
+# CHECK: saddl v0.2d, v1.2s, v2.2s
+0x20 0x00 0x22 0x0e
+0x20 0x00 0x62 0x0e
+0x20 0x00 0xa2 0x0e
+
+# CHECK: saddl2 v0.4s, v1.8h, v2.8h
+# CHECK: saddl2 v0.8h, v1.16b, v2.16b
+# CHECK: saddl2 v0.2d, v1.4s, v2.4s
+0x20 0x00 0x62 0x4e
+0x20 0x00 0x22 0x4e
+0x20 0x00 0xa2 0x4e
+
+# CHECK: uaddl v0.8h, v1.8b, v2.8b
+# CHECK: uaddl v0.4s, v1.4h, v2.4h
+# CHECK: uaddl v0.2d, v1.2s, v2.2s
+0x20 0x00 0x22 0x2e
+0x20 0x00 0x62 0x2e
+0x20 0x00 0xa2 0x2e
+
+# CHECK: uaddl2 v0.8h, v1.16b, v2.16b
+# CHECK: uaddl2 v0.4s, v1.8h, v2.8h
+# CHECK: uaddl2 v0.2d, v1.4s, v2.4s
+0x20 0x00 0x22 0x6e
+0x20 0x00 0x62 0x6e
+0x20 0x00 0xa2 0x6e
+
+# CHECK: ssubl v0.8h, v1.8b, v2.8b
+# CHECK: ssubl v0.4s, v1.4h, v2.4h
+# CHECK: ssubl v0.2d, v1.2s, v2.2s
+0x20 0x20 0x22 0x0e
+0x20 0x20 0x62 0x0e
+0x20 0x20 0xa2 0x0e
+
+# CHECK: ssubl2 v0.8h, v1.16b, v2.16b
+# CHECK: ssubl2 v0.4s, v1.8h, v2.8h
+# CHECK: ssubl2 v0.2d, v1.4s, v2.4s
+0x20 0x20 0x22 0x4e
+0x20 0x20 0x62 0x4e
+0x20 0x20 0xa2 0x4e
+
+# CHECK: usubl v0.8h, v1.8b, v2.8b
+# CHECK: usubl v0.4s, v1.4h, v2.4h
+# CHECK: usubl v0.2d, v1.2s, v2.2s
+0x20 0x20 0x22 0x2e
+0x20 0x20 0x62 0x2e
+0x20 0x20 0xa2 0x2e
+
+# CHECK: usubl2 v0.8h, v1.16b, v2.16b
+# CHECK: usubl2 v0.4s, v1.8h, v2.8h
+# CHECK: usubl2 v0.2d, v1.4s, v2.4s
+0x20 0x20 0x22 0x6e
+0x20 0x20 0x62 0x6e
+0x20 0x20 0xa2 0x6e
+
+# CHECK: sabal v0.8h, v1.8b, v2.8b
+# CHECK: sabal v0.4s, v1.4h, v2.4h
+# CHECK: sabal v0.2d, v1.2s, v2.2s
+0x20 0x50 0x22 0x0e
+0x20 0x50 0x62 0x0e
+0x20 0x50 0xa2 0x0e
+
+# CHECK: sabal2 v0.8h, v1.16b, v2.16b
+# CHECK: sabal2 v0.4s, v1.8h, v2.8h
+# CHECK: sabal2 v0.2d, v1.4s, v2.4s
+0x20 0x50 0x22 0x4e
+0x20 0x50 0x62 0x4e
+0x20 0x50 0xa2 0x4e
+
+# CHECK: uabal v0.8h, v1.8b, v2.8b
+# CHECK: uabal v0.4s, v1.4h, v2.4h
+# CHECK: uabal v0.2d, v1.2s, v2.2s
+0x20 0x50 0x22 0x2e
+0x20 0x50 0x62 0x2e
+0x20 0x50 0xa2 0x2e
+
+# CHECK: uabal2 v0.8h, v1.16b, v2.16b
+# CHECK: uabal2 v0.4s, v1.8h, v2.8h
+# CHECK: uabal2 v0.2d, v1.4s, v2.4s
+0x20 0x50 0x22 0x6e
+0x20 0x50 0x62 0x6e
+0x20 0x50 0xa2 0x6e
+
+# CHECK: sabdl v0.8h, v1.8b, v2.8b
+# CHECK: sabdl v0.4s, v1.4h, v2.4h
+# CHECK: sabdl v0.2d, v1.2s, v2.2s
+0x20 0x70 0x22 0x0e
+0x20 0x70 0x62 0x0e
+0x20 0x70 0xa2 0x0e
+
+# CHECK: sabdl2 v0.8h, v1.16b, v2.16b
+# CHECK: sabdl2 v0.4s, v1.8h, v2.8h
+# CHECK: sabdl2 v0.2d, v1.4s, v2.4s
+0x20 0x70 0x22 0x4e
+0x20 0x70 0x62 0x4e
+0x20 0x70 0xa2 0x4e
+
+# CHECK: uabdl v0.8h, v1.8b, v2.8b
+# CHECK: uabdl v0.4s, v1.4h, v2.4h
+# CHECK: uabdl v0.2d, v1.2s, v2.2s
+0x20 0x70 0x22 0x2e
+0x20 0x70 0x62 0x2e
+0x20 0x70 0xa2 0x2e
+
+# CHECK: uabdl2 v0.8h, v1.16b, v2.16b
+# CHECK: uabdl2 v0.4s, v1.8h, v2.8h
+# CHECK: uabdl2 v0.2d, v1.4s, v2.4s
+0x20 0x70 0x22 0x6e
+0x20 0x70 0x62 0x6e
+0x20 0x70 0xa2 0x6e
+
+# CHECK: smlal v0.8h, v1.8b, v2.8b
+# CHECK: smlal v0.4s, v1.4h, v2.4h
+# CHECK: smlal v0.2d, v1.2s, v2.2s
+0x20 0x80 0x22 0x0e
+0x20 0x80 0x62 0x0e
+0x20 0x80 0xa2 0x0e
+
+# CHECK: smlal2 v0.8h, v1.16b, v2.16b
+# CHECK: smlal2 v0.4s, v1.8h, v2.8h
+# CHECK: smlal2 v0.2d, v1.4s, v2.4s
+0x20 0x80 0x22 0x4e
+0x20 0x80 0x62 0x4e
+0x20 0x80 0xa2 0x4e
+
+# CHECK: umlal v0.8h, v1.8b, v2.8b
+# CHECK: umlal v0.4s, v1.4h, v2.4h
+# CHECK: umlal v0.2d, v1.2s, v2.2s
+
+0x20 0x80 0x22 0x2e
+0x20 0x80 0x62 0x2e
+0x20 0x80 0xa2 0x2e
+
+# CHECK: umlal2 v0.8h, v1.16b, v2.16b
+# CHECK: umlal2 v0.4s, v1.8h, v2.8h
+# CHECK: umlal2 v0.2d, v1.4s, v2.4s
+0x20 0x80 0x22 0x6e
+0x20 0x80 0x62 0x6e
+0x20 0x80 0xa2 0x6e
+
+# CHECK: smlsl v0.8h, v1.8b, v2.8b
+# CHECK: smlsl v0.4s, v1.4h, v2.4h
+# CHECK: smlsl v0.2d, v1.2s, v2.2s
+0x20 0xa0 0x22 0x0e
+0x20 0xa0 0x62 0x0e
+0x20 0xa0 0xa2 0x0e
+
+# CHECK: smlsl2 v0.8h, v1.16b, v2.16b
+# CHECK: smlsl2 v0.4s, v1.8h, v2.8h
+# CHECK: smlsl2 v0.2d, v1.4s, v2.4s
+0x20 0xa0 0x22 0x4e
+0x20 0xa0 0x62 0x4e
+0x20 0xa0 0xa2 0x4e
+
+# CHECK: umlsl v0.8h, v1.8b, v2.8b
+# CHECK: umlsl v0.4s, v1.4h, v2.4h
+# CHECK: umlsl v0.2d, v1.2s, v2.2s
+0x20 0xa0 0x22 0x2e
+0x20 0xa0 0x62 0x2e
+0x20 0xa0 0xa2 0x2e
+
+# CHECK: umlsl2 v0.8h, v1.16b, v2.16b
+# CHECK: umlsl2 v0.4s, v1.8h, v2.8h
+# CHECK: umlsl2 v0.2d, v1.4s, v2.4s
+0x20 0xa0 0x22 0x6e
+0x20 0xa0 0x62 0x6e
+0x20 0xa0 0xa2 0x6e
+
+# CHECK: smull v0.8h, v1.8b, v2.8b
+# CHECK: smull v0.4s, v1.4h, v2.4h
+# CHECK: smull v0.2d, v1.2s, v2.2s
+0x20 0xc0 0x22 0x0e
+0x20 0xc0 0x62 0x0e
+0x20 0xc0 0xa2 0x0e
+
+# CHECK: smull2 v0.8h, v1.16b, v2.16b
+# CHECK: smull2 v0.4s, v1.8h, v2.8h
+# CHECK: smull2 v0.2d, v1.4s, v2.4s
+0x20 0xc0 0x22 0x4e
+0x20 0xc0 0x62 0x4e
+0x20 0xc0 0xa2 0x4e
+
+# CHECK: umull v0.8h, v1.8b, v2.8b
+# CHECK: umull v0.4s, v1.4h, v2.4h
+# CHECK: umull v0.2d, v1.2s, v2.2s
+0x20 0xc0 0x22 0x2e
+0x20 0xc0 0x62 0x2e
+0x20 0xc0 0xa2 0x2e
+
+# CHECK: umull2 v0.8h, v1.16b, v2.16b
+# CHECK: umull2 v0.4s, v1.8h, v2.8h
+# CHECK: umull2 v0.2d, v1.4s, v2.4s
+0x20 0xc0 0x22 0x6e
+0x20 0xc0 0x62 0x6e
+0x20 0xc0 0xa2 0x6e
+
+#------------------------------------------------------------------------------
+# Long - Variant 2
+#------------------------------------------------------------------------------
+
+# CHECK: sqdmlal v0.4s, v1.4h, v2.4h
+# CHECK: sqdmlal v0.2d, v1.2s, v2.2s
+0x20 0x90 0x62 0x0e
+0x20 0x90 0xa2 0x0e
+
+# CHECK: sqdmlal2 v0.4s, v1.8h, v2.8h
+# CHECK: sqdmlal2 v0.2d, v1.4s, v2.4s
+0x20 0x90 0x62 0x4e
+0x20 0x90 0xa2 0x4e
+
+# CHECK: sqdmlsl v0.4s, v1.4h, v2.4h
+# CHECK: sqdmlsl v0.2d, v1.2s, v2.2s
+0x20 0xb0 0x62 0x0e
+0x20 0xb0 0xa2 0x0e
+
+# CHECK: sqdmlsl2 v0.4s, v1.8h, v2.8h
+# CHECK: sqdmlsl2 v0.2d, v1.4s, v2.4s
+0x20 0xb0 0x62 0x4e
+0x20 0xb0 0xa2 0x4e
+
+# CHECK: sqdmull v0.4s, v1.4h, v2.4h
+# CHECK: sqdmull v0.2d, v1.2s, v2.2s
+0x20 0xd0 0x62 0x0e
+0x20 0xd0 0xa2 0x0e
+
+# CHECK: sqdmull2 v0.4s, v1.8h, v2.8h
+# CHECK: sqdmull2 v0.2d, v1.4s, v2.4s
+0x20 0xd0 0x62 0x4e
+0x20 0xd0 0xa2 0x4e
+
+#------------------------------------------------------------------------------
+# Long - Variant 3
+#------------------------------------------------------------------------------
+
+# CHECK: pmull v0.8h, v1.8b, v2.8b
+0x20 0xe0 0x22 0x0e
+
+# CHECK: pmull2 v0.8h, v1.16b, v2.16b
+0x20 0xe0 0x22 0x4e
+
+#------------------------------------------------------------------------------
+# Widen
+#------------------------------------------------------------------------------
+
+# CHECK: saddw v0.8h, v1.8h, v2.8b
+# CHECK: saddw v0.4s, v1.4s, v2.4h
+# CHECK: saddw v0.2d, v1.2d, v2.2s
+0x20 0x10 0x22 0x0e
+0x20 0x10 0x62 0x0e
+0x20 0x10 0xa2 0x0e
+
+# CHECK: saddw2 v0.8h, v1.8h, v2.16b
+# CHECK: saddw2 v0.4s, v1.4s, v2.8h
+# CHECK: saddw2 v0.2d, v1.2d, v2.4s
+0x20 0x10 0x22 0x4e
+0x20 0x10 0x62 0x4e
+0x20 0x10 0xa2 0x4e
+
+# CHECK: uaddw v0.8h, v1.8h, v2.8b
+# CHECK: uaddw v0.4s, v1.4s, v2.4h
+# CHECK: uaddw v0.2d, v1.2d, v2.2s
+0x20 0x10 0x22 0x2e
+0x20 0x10 0x62 0x2e
+0x20 0x10 0xa2 0x2e
+
+# CHECK: uaddw2 v0.8h, v1.8h, v2.16b
+# CHECK: uaddw2 v0.4s, v1.4s, v2.8h
+# CHECK: uaddw2 v0.2d, v1.2d, v2.4s
+0x20 0x10 0x22 0x6e
+0x20 0x10 0x62 0x6e
+0x20 0x10 0xa2 0x6e
+
+# CHECK: ssubw v0.8h, v1.8h, v2.8b
+# CHECK: ssubw v0.4s, v1.4s, v2.4h
+# CHECK: ssubw v0.2d, v1.2d, v2.2s
+0x20 0x30 0x22 0x0e
+0x20 0x30 0x62 0x0e
+0x20 0x30 0xa2 0x0e
+
+# CHECK: ssubw2 v0.8h, v1.8h, v2.16b
+# CHECK: ssubw2 v0.4s, v1.4s, v2.8h
+# CHECK: ssubw2 v0.2d, v1.2d, v2.4s
+0x20 0x30 0x22 0x4e
+0x20 0x30 0x62 0x4e
+0x20 0x30 0xa2 0x4e
+
+# CHECK: usubw v0.8h, v1.8h, v2.8b
+# CHECK: usubw v0.4s, v1.4s, v2.4h
+# CHECK: usubw v0.2d, v1.2d, v2.2s
+0x20 0x30 0x22 0x2e
+0x20 0x30 0x62 0x2e
+0x20 0x30 0xa2 0x2e
+
+# CHECK: usubw2 v0.8h, v1.8h, v2.16b
+# CHECK: usubw2 v0.4s, v1.4s, v2.8h
+# CHECK: usubw2 v0.2d, v1.2d, v2.4s
+0x20 0x30 0x22 0x6e
+0x20 0x30 0x62 0x6e
+0x20 0x30 0xa2 0x6e
+
+#------------------------------------------------------------------------------
+# Narrow
+#------------------------------------------------------------------------------
+
+# CHECK: addhn v0.8b, v1.8h, v2.8h
+# CHECK: addhn v0.4h, v1.4s, v2.4s
+# CHECK: addhn v0.2s, v1.2d, v2.2d
+0x20 0x40 0x22 0x0e
+0x20 0x40 0x62 0x0e
+0x20 0x40 0xa2 0x0e
+
+# CHECK: addhn2 v0.16b, v1.8h, v2.8h
+# CHECK: addhn2 v0.8h, v1.4s, v2.4s
+# CHECK: addhn2 v0.4s, v1.2d, v2.2d
+0x20 0x40 0x22 0x4e
+0x20 0x40 0x62 0x4e
+0x20 0x40 0xa2 0x4e
+
+# CHECK: raddhn v0.8b, v1.8h, v2.8h
+# CHECK: raddhn v0.4h, v1.4s, v2.4s
+# CHECK: raddhn v0.2s, v1.2d, v2.2d
+0x20 0x40 0x22 0x2e
+0x20 0x40 0x62 0x2e
+0x20 0x40 0xa2 0x2e
+
+# CHECK: raddhn2 v0.16b, v1.8h, v2.8h
+# CHECK: raddhn2 v0.8h, v1.4s, v2.4s
+# CHECK: raddhn2 v0.4s, v1.2d, v2.2d
+0x20 0x40 0x22 0x6e
+0x20 0x40 0x62 0x6e
+0x20 0x40 0xa2 0x6e
+
+# CHECK: rsubhn v0.8b, v1.8h, v2.8h
+# CHECK: rsubhn v0.4h, v1.4s, v2.4s
+# CHECK: rsubhn v0.2s, v1.2d, v2.2d
+0x20 0x60 0x22 0x2e
+0x20 0x60 0x62 0x2e
+0x20 0x60 0xa2 0x2e
+
+# CHECK: rsubhn2 v0.16b, v1.8h, v2.8h
+# CHECK: rsubhn2 v0.8h, v1.4s, v2.4s
+# CHECK: rsubhn2 v0.4s, v1.2d, v2.2d
+0x20 0x60 0x22 0x6e
+0x20 0x60 0x62 0x6e
+0x20 0x60 0xa2 0x6e
+
+#----------------------------------------------------------------------
+# Scalar Integer Saturating Doubling Multiply Half High
+#----------------------------------------------------------------------
+# CHECK: sqdmulh h10, h11, h12
+# CHECK: sqdmulh s20, s21, s2
+0x6a,0xb5,0x6c,0x5e
+0xb4,0xb6,0xa2,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Integer Saturating Rounding Doubling Multiply Half High
+#----------------------------------------------------------------------
+# CHECK: sqrdmulh h10, h11, h12
+# CHECK: sqrdmulh s20, s21, s2
+0x6a,0xb5,0x6c,0x7e
+0xb4,0xb6,0xa2,0x7e
+
+#----------------------------------------------------------------------
+# Floating-point multiply extended
+#----------------------------------------------------------------------
+# CHECK: fmulx s20, s22, s15
+# CHECK: fmulx d23, d11, d1
+0xd4,0xde,0x2f,0x5e
+0x77,0xdd,0x61,0x5e
+
+#----------------------------------------------------------------------
+# Floating-point Reciprocal Step
+#----------------------------------------------------------------------
+# CHECK: frecps s21, s16, s13
+# CHECK: frecps d22, d30, d21
+0x15,0xfe,0x2d,0x5e
+0xd6,0xff,0x75,0x5e
+
+#----------------------------------------------------------------------
+# Floating-point Reciprocal Square Root Step
+#----------------------------------------------------------------------
+# CHECK: frsqrts s21, s5, s12
+# CHECK: frsqrts d8, d22, d18
+0xb5,0xfc,0xac,0x5e
+0xc8,0xfe,0xf2,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Signed Integer Convert To Floating-point
+#----------------------------------------------------------------------
+# CHECK: scvtf s22, s13
+# CHECK: scvtf d21, d12
+0xb6,0xd9,0x21,0x5e
+0x95,0xd9,0x61,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Unsigned Integer Convert To Floating-point
+#----------------------------------------------------------------------
+# CHECK: ucvtf s22, s13
+# CHECK: ucvtf d21, d14
+0xb6,0xd9,0x21,0x7e
+0xd5,0xd9,0x61,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Reciprocal Estimate
+#----------------------------------------------------------------------
+# CHECK: frecpe s19, s14
+# CHECK: frecpe d13, d13
+0xd3,0xd9,0xa1,0x5e
+0xad,0xd9,0xe1,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Reciprocal Exponent
+#----------------------------------------------------------------------
+# CHECK: frecpx s18, s10
+# CHECK: frecpx d16, d19
+0x52,0xf9,0xa1,0x5e
+0x70,0xfa,0xe1,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Reciprocal Square Root Estimate
+#----------------------------------------------------------------------
+# CHECK: frsqrte s22, s13
+# CHECK: frsqrte d21, d12
+0xb6,0xd9,0xa1,0x7e
+0x95,0xd9,0xe1,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Compare Bitwise Equal
+#----------------------------------------------------------------------
+# CHECK: cmeq d20, d21, d22
+0xb4,0x8e,0xf6,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Compare Bitwise Equal To Zero
+#----------------------------------------------------------------------
+# CHECK: cmeq d20, d21, #0x0
+0xb4,0x9a,0xe0,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Compare Unsigned Higher Or Same
+#----------------------------------------------------------------------
+# CHECK: cmhs d20, d21, d22
+0xb4,0x3e,0xf6,0x7e
+
+        
+#----------------------------------------------------------------------
+# Scalar Compare Signed Greather Than Or Equal
+#----------------------------------------------------------------------
+# CHECK: cmge d20, d21, d22
+0xb4,0x3e,0xf6,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Compare Signed Greather Than Or Equal To Zero
+#----------------------------------------------------------------------
+# CHECK: cmge d20, d21, #0x0
+0xb4,0x8a,0xe0,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Compare Unsigned Higher
+#----------------------------------------------------------------------
+# CHECK: cmhi d20, d21, d22
+0xb4,0x36,0xf6,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Compare Signed Greater Than
+#----------------------------------------------------------------------
+# CHECK: cmgt d20, d21, d22
+0xb4,0x36,0xf6,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Compare Signed Greater Than Zero
+#----------------------------------------------------------------------
+# CHECK: cmgt d20, d21, #0x0
+0xb4,0x8a,0xe0,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Compare Signed Less Than Or Equal To Zero
+#----------------------------------------------------------------------
+# CHECK: cmle d20, d21, #0x0
+0xb4,0x9a,0xe0,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Compare Less Than Zero
+#----------------------------------------------------------------------
+# CHECK: cmlt d20, d21, #0x0
+0xb4,0xaa,0xe0,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Compare Bitwise Test Bits
+#----------------------------------------------------------------------
+# CHECK: cmtst d20, d21, d22
+0xb4,0x8e,0xf6,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Compare Mask Equal
+#----------------------------------------------------------------------
+# CHECK: fcmeq s10, s11, s12
+# CHECK: fcmeq d20, d21, d22
+0x6a,0xe5,0x2c,0x5e
+0xb4,0xe6,0x76,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Compare Mask Equal To Zero
+#----------------------------------------------------------------------
+# CHECK: fcmeq s10, s11, #0.0
+# CHECK: fcmeq d20, d21, #0.0
+0x6a,0xd9,0xa0,0x5e
+0xb4,0xda,0xe0,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Compare Mask Greater Than Or Equal
+#----------------------------------------------------------------------
+# CHECK: fcmge s10, s11, s12
+# CHECK: fcmge d20, d21, d22
+0x6a,0xe5,0x2c,0x7e
+0xb4,0xe6,0x76,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
+#----------------------------------------------------------------------
+# CHECK: fcmge s10, s11, #0.0
+# CHECK: fcmge d20, d21, #0.0
+0x6a,0xc9,0xa0,0x7e
+0xb4,0xca,0xe0,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Compare Mask Greather Than
+#----------------------------------------------------------------------
+# CHECK: fcmgt s10, s11, s12
+# CHECK: fcmgt d20, d21, d22
+0x6a,0xe5,0xac,0x7e
+0xb4,0xe6,0xf6,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Compare Mask Greather Than Zero
+#----------------------------------------------------------------------
+# CHECK: fcmgt s10, s11, #0.0
+# CHECK: fcmgt d20, d21, #0.0
+0x6a,0xc9,0xa0,0x5e
+0xb4,0xca,0xe0,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Compare Mask Less Than Or Equal To Zero
+#----------------------------------------------------------------------
+# CHECK: fcmle s10, s11, #0.0
+# CHECK: fcmle d20, d21, #0.0
+0x6a,0xd9,0xa0,0x7e
+0xb4,0xda,0xe0,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Compare Mask Less Than
+#----------------------------------------------------------------------
+# CHECK: fcmlt s10, s11, #0.0
+# CHECK: fcmlt d20, d21, #0.0
+0x6a,0xe9,0xa0,0x5e
+0xb4,0xea,0xe0,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
+#----------------------------------------------------------------------
+# CHECK: facge s10, s11, s12
+# CHECK: facge d20, d21, d22
+0x6a,0xed,0x2c,0x7e
+0xb4,0xee,0x76,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Absolute Compare Mask Greater Than
+#----------------------------------------------------------------------
+# CHECK: facgt s10, s11, s12   
+# CHECK: facgt d20, d21, d22   
+0x6a,0xed,0xac,0x7e
+0xb4,0xee,0xf6,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Absolute Value
+#----------------------------------------------------------------------
+# CHECK: abs d29, d24
+0x1d,0xbb,0xe0,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Signed Saturating Absolute Value
+#----------------------------------------------------------------------
+# CHECK: sqabs b19, b14
+# CHECK: sqabs h21, h15
+# CHECK: sqabs s20, s12
+# CHECK: sqabs d18, d12
+0xd3,0x79,0x20,0x5e
+0xf5,0x79,0x60,0x5e
+0x94,0x79,0xa0,0x5e
+0x92,0x79,0xe0,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Negate
+#----------------------------------------------------------------------
+# CHECK: neg d29, d24
+0x1d,0xbb,0xe0,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Signed Saturating Negate
+#----------------------------------------------------------------------
+# CHECK: sqneg b19, b14
+# CHECK: sqneg h21, h15
+# CHECK: sqneg s20, s12
+# CHECK: sqneg d18, d12
+0xd3,0x79,0x20,0x7e
+0xf5,0x79,0x60,0x7e
+0x94,0x79,0xa0,0x7e
+0x92,0x79,0xe0,0x7e
+
+#----------------------------------------------------------------------
+# Signed Saturating Accumulated of Unsigned Value
+#----------------------------------------------------------------------
+# CHECK: suqadd b19, b14
+# CHECK: suqadd h20, h15
+# CHECK: suqadd s21, s12
+# CHECK: suqadd d18, d22
+0xd3,0x39,0x20,0x5e
+0xf4,0x39,0x60,0x5e
+0x95,0x39,0xa0,0x5e
+0xd2,0x3a,0xe0,0x5e
+
+#----------------------------------------------------------------------
+# Unsigned Saturating Accumulated of Signed Value
+#----------------------------------------------------------------------
+# CHECK: usqadd b19, b14
+# CHECK: usqadd h20, h15
+# CHECK: usqadd s21, s12
+# CHECK: usqadd d18, d22
+0xd3,0x39,0x20,0x7e
+0xf4,0x39,0x60,0x7e
+0x95,0x39,0xa0,0x7e
+0xd2,0x3a,0xe0,0x7e
+
+#----------------------------------------------------------------------
+# Signed Saturating Doubling Multiply-Add Long
+#----------------------------------------------------------------------
+# CHECK: sqdmlal s17, h27, h12
+# CHECK: sqdmlal d19, s24, s12
+0x71,0x93,0x6c,0x5e
+0x13,0x93,0xac,0x5e
+        
+#----------------------------------------------------------------------
+# Signed Saturating Doubling Multiply-Subtract Long
+#----------------------------------------------------------------------
+# CHECK: sqdmlsl s14, h12, h25
+# CHECK: sqdmlsl d12, s23, s13
+0x8e,0xb1,0x79,0x5e
+0xec,0xb2,0xad,0x5e
+        
+#----------------------------------------------------------------------
+# Signed Saturating Doubling Multiply Long
+#----------------------------------------------------------------------
+# CHECK: sqdmull s12, h22, h12
+# CHECK: sqdmull d15, s22, s12
+0xcc,0xd2,0x6c,0x5e
+0xcf,0xd2,0xac,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Signed Saturating Extract Unsigned Narrow
+#----------------------------------------------------------------------
+# CHECK: sqxtun b19, h14
+# CHECK: sqxtun h21, s15
+# CHECK: sqxtun s20, d12
+0xd3,0x29,0x21,0x7e
+0xf5,0x29,0x61,0x7e
+0x94,0x29,0xa1,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Signed Saturating Extract Signed Narrow
+#----------------------------------------------------------------------
+# CHECK: sqxtn b18, h18
+# CHECK: sqxtn h20, s17
+# CHECK: sqxtn s19, d14
+0x52,0x4a,0x21,0x5e
+0x34,0x4a,0x61,0x5e
+0xd3,0x49,0xa1,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Unsigned Saturating Extract Narrow
+#----------------------------------------------------------------------
+# CHECK: uqxtn b18, h18
+# CHECK: uqxtn h20, s17
+# CHECK: uqxtn s19, d14
+0x52,0x4a,0x21,0x7e
+0x34,0x4a,0x61,0x7e
+0xd3,0x49,0xa1,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Signed Shift Right (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sshr d15, d16, #12
+0x0f,0x06,0x74,0x5f
+
+#----------------------------------------------------------------------
+# Scalar Unsigned Shift Right (Immediate)
+#----------------------------------------------------------------------
+# CHECK: ushr d10, d17, #18
+0x2a,0x06,0x6e,0x7f
+
+#----------------------------------------------------------------------
+# Scalar Signed Rounding Shift Right (Immediate)
+#----------------------------------------------------------------------
+# CHECK: srshr d19, d18, #7
+0x53,0x26,0x79,0x5f
+
+#----------------------------------------------------------------------
+# Scalar Unigned Rounding Shift Right (Immediate)
+#----------------------------------------------------------------------
+# CHECK: urshr d20, d23, #31
+0xf4,0x26,0x61,0x7f
+
+#----------------------------------------------------------------------
+# Scalar Signed Shift Right and Accumulate (Immediate)
+#----------------------------------------------------------------------
+# CHECK: ssra d18, d12, #21
+0x92,0x15,0x6b,0x5f
+
+#----------------------------------------------------------------------
+# Scalar Unsigned Shift Right and Accumulate (Immediate)
+#----------------------------------------------------------------------
+# CHECK: usra d20, d13, #61
+0xb4,0x15,0x43,0x7f
+
+#----------------------------------------------------------------------
+# Scalar Signed Rounding Shift Right and Accumulate (Immediate)
+#----------------------------------------------------------------------
+# CHECK: srsra d15, d11, #19
+0x6f,0x35,0x6d,0x5f
+
+#----------------------------------------------------------------------
+# Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
+#----------------------------------------------------------------------
+# CHECK: ursra d18, d10, #13
+0x52,0x35,0x73,0x7f
+
+#----------------------------------------------------------------------
+# Scalar Shift Left (Immediate)
+#----------------------------------------------------------------------
+# CHECK: shl d7, d10, #12
+0x47,0x55,0x4c,0x5f
+
+#----------------------------------------------------------------------
+# Signed Saturating Shift Left (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sqshl b11, b19, #7
+# CHECK: sqshl h13, h18, #11
+# CHECK: sqshl s14, s17, #22
+# CHECK: sqshl d15, d16, #51
+0x6b,0x76,0x0f,0x5f
+0x4d,0x76,0x1b,0x5f
+0x2e,0x76,0x36,0x5f
+0x0f,0x76,0x73,0x5f
+
+#----------------------------------------------------------------------
+# Unsigned Saturating Shift Left (Immediate)
+#----------------------------------------------------------------------
+# CHECK: uqshl b18, b15, #6
+# CHECK: uqshl h11, h18, #7
+# CHECK: uqshl s14, s19, #18
+# CHECK: uqshl d15, d12, #19
+0xf2,0x75,0x0e,0x7f
+0x4b,0x76,0x17,0x7f
+0x6e,0x76,0x32,0x7f
+0x8f,0x75,0x53,0x7f
+
+#----------------------------------------------------------------------
+# Signed Saturating Shift Left Unsigned (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sqshlu b15, b18, #6
+# CHECK: sqshlu h19, h17, #6
+# CHECK: sqshlu s16, s14, #25
+# CHECK: sqshlu d11, d13, #32
+0x4f,0x66,0x0e,0x7f
+0x33,0x66,0x16,0x7f
+0xd0,0x65,0x39,0x7f
+0xab,0x65,0x60,0x7f
+
+#----------------------------------------------------------------------
+# Shift Right And Insert (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sri d10, d12, #14
+0x8a,0x45,0x72,0x7f
+
+#----------------------------------------------------------------------
+# Shift Left And Insert (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sli d10, d14, #12
+0xca,0x55,0x4c,0x7f
+
+#----------------------------------------------------------------------
+# Signed Saturating Shift Right Narrow (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sqshrn b10, h15, #5
+# CHECK: sqshrn h17, s10, #4
+# CHECK: sqshrn s18, d10, #31
+0xea,0x95,0x0b,0x5f
+0x51,0x95,0x1c,0x5f
+0x52,0x95,0x21,0x5f
+
+#----------------------------------------------------------------------
+# Unsigned Saturating Shift Right Narrow (Immediate)
+#----------------------------------------------------------------------
+# CHECK: uqshrn b12, h10, #7
+# CHECK: uqshrn h10, s14, #5
+# CHECK: uqshrn s10, d12, #13
+0x4c,0x95,0x09,0x7f
+0xca,0x95,0x1b,0x7f
+0x8a,0x95,0x33,0x7f
+
+#----------------------------------------------------------------------
+# Signed Saturating Rounded Shift Right Narrow (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sqrshrn b10, h13, #2
+# CHECK: sqrshrn h15, s10, #6
+# CHECK: sqrshrn s15, d12, #9
+0xaa,0x9d,0x0e,0x5f
+0x4f,0x9d,0x1a,0x5f
+0x8f,0x9d,0x37,0x5f
+
+#----------------------------------------------------------------------
+# Unsigned Saturating Rounded Shift Right Narrow (Immediate)
+#----------------------------------------------------------------------
+# CHECK: uqrshrn b10, h12, #5
+# CHECK: uqrshrn h12, s10, #14
+# CHECK: uqrshrn s10, d10, #25
+0x8a,0x9d,0x0b,0x7f
+0x4c,0x9d,0x12,0x7f
+0x4a,0x9d,0x27,0x7f
+
+#----------------------------------------------------------------------
+# Signed Saturating Shift Right Unsigned Narrow (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sqshrun b15, h10, #7
+# CHECK: sqshrun h20, s14, #3
+# CHECK: sqshrun s10, d15, #15
+0x4f,0x85,0x09,0x7f
+0xd4,0x85,0x1d,0x7f
+0xea,0x85,0x31,0x7f
+
+#----------------------------------------------------------------------
+# Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sqrshrun b17, h10, #6
+# CHECK: sqrshrun h10, s13, #15
+# CHECK: sqrshrun s22, d16, #31
+0x51,0x8d,0x0a,0x7f
+0xaa,0x8d,0x11,0x7f
+0x16,0x8e,0x21,0x7f
+
+#----------------------------------------------------------------------
+# Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
+#----------------------------------------------------------------------
+# CHECK: scvtf s22, s13, #32
+# CHECK: scvtf d21, d12, #64
+0xb6,0xe5,0x20,0x5f
+0x95,0xe5,0x40,0x5f
+
+#----------------------------------------------------------------------
+# Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
+#----------------------------------------------------------------------
+# CHECK: ucvtf s22, s13, #32
+# CHECK: ucvtf d21, d14, #64
+0xb6,0xe5,0x20,0x7f
+0xd5,0xe5,0x40,0x7f
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Signed Fixed-point (Immediate)
+#----------------------------------------------------------------------
+# CHECK: fcvtzs s21, s12, #1
+# CHECK: fcvtzs d21, d12, #1
+0x95,0xfd,0x3f,0x5f
+0x95,0xfd,0x7f,0x5f
+        
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
+#----------------------------------------------------------------------
+# CHECK: fcvtzu s21, s12, #1
+# CHECK: fcvtzu d21, d12, #1
+0x95,0xfd,0x3f,0x7f
+0x95,0xfd,0x7f,0x7f
+
+#----------------------------------------------------------------------
+# Vector load/store multiple N-element structure
+#----------------------------------------------------------------------
+# CHECK: ld1 {v0.16b}, [x0]
+# CHECK: ld1 {v15.8h, v16.8h}, [x15]
+# CHECK: ld1 {v31.4s, v0.4s, v1.4s}, [sp]
+# CHECK: ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
+0x00,0x70,0x40,0x4c
+0xef,0xa5,0x40,0x4c
+0xff,0x6b,0x40,0x4c
+0x00,0x2c,0x40,0x4c
+
+# CHECK: ld2 {v0.8b, v1.8b}, [x0]
+# CHECK: ld3 {v15.4h, v16.4h, v17.4h}, [x15]
+# CHECK: ld4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
+0x00,0x80,0x40,0x0c
+0xef,0x45,0x40,0x0c
+0xff,0x0b,0x40,0x0c
+
+# CHECK: st1 {v0.16b}, [x0]
+# CHECK: st1 {v15.8h, v16.8h}, [x15]
+# CHECK: st1 {v31.4s, v0.4s, v1.4s}, [sp]
+# CHECK: st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
+0x00,0x70,0x00,0x4c
+0xef,0xa5,0x00,0x4c
+0xff,0x6b,0x00,0x4c
+0x00,0x2c,0x00,0x4c
+
+# CHECK: st2 {v0.8b, v1.8b}, [x0]
+# CHECK: st3 {v15.4h, v16.4h, v17.4h}, [x15]
+# CHECK: st4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
+0x00,0x80,0x00,0x0c
+0xef,0x45,0x00,0x0c
+0xff,0x0b,0x00,0x0c
+
+#----------------------------------------------------------------------
+# Vector load/store multiple N-element structure (post-index)
+#----------------------------------------------------------------------
+# CHECK: ld1 {v15.8h}, [x15], x2
+# CHECK: ld1 {v31.4s, v0.4s}, [sp], #32
+# CHECK: ld1 {v0.2d, v1.2d, v2.2d}, [x0], #48
+# CHECK: ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+0xef,0x75,0xc2,0x4c
+0xff,0xab,0xdf,0x4c
+0x00,0x6c,0xdf,0x4c
+0x00,0x20,0xc3,0x0c
+
+# CHECK: ld2 {v0.16b, v1.16b}, [x0], x1
+# CHECK: ld3 {v15.8h, v16.8h, v17.8h}, [x15], x2
+# CHECK: ld4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+0x00,0x80,0xc1,0x4c
+0xef,0x45,0xc2,0x4c
+0xff,0x0b,0xdf,0x4c
+
+
+# CHECK: st1 {v15.8h}, [x15], x2
+# CHECK: st1 {v31.4s, v0.4s}, [sp], #32
+# CHECK: st1 {v0.2d, v1.2d, v2.2d}, [x0], #48
+# CHECK: st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+0xef,0x75,0x82,0x4c
+0xff,0xab,0x9f,0x4c
+0x00,0x6c,0x9f,0x4c
+0x00,0x20,0x83,0x0c
+
+# CHECK: st2 {v0.16b, v1.16b}, [x0], x1
+# CHECK: st3 {v15.8h, v16.8h, v17.8h}, [x15], x2
+# CHECK: st4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+0x00,0x80,0x81,0x4c
+0xef,0x45,0x82,0x4c
+0xff,0x0b,0x9f,0x4c
+
+#----------------------------------------------------------------------
+# Vector load single N-element structure to all lane of N
+# consecutive registers (N = 1,2,3,4)
+#----------------------------------------------------------------------
+# CHECK: ld1r {v0.16b}, [x0]
+# CHECK: ld1r {v15.8h}, [x15]
+# CHECK: ld2r {v31.4s, v0.4s}, [sp]
+# CHECK: ld2r {v0.2d, v1.2d}, [x0]
+# CHECK: ld3r {v0.8b, v1.8b, v2.8b}, [x0]
+# CHECK: ld3r {v15.4h, v16.4h, v17.4h}, [x15]
+# CHECK: ld4r {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
+# CHECK: ld4r {v31.1d, v0.1d, v1.1d, v2.1d}, [sp]
+0x00,0xc0,0x40,0x4d
+0xef,0xc5,0x40,0x4d
+0xff,0xcb,0x60,0x4d
+0x00,0xcc,0x60,0x4d
+0x00,0xe0,0x40,0x0d
+0xef,0xe5,0x40,0x0d
+0xff,0xeb,0x60,0x0d
+0xff,0xef,0x60,0x0d
+
+#----------------------------------------------------------------------
+# Vector load/store single N-element structure to/from one lane of N
+# consecutive registers (N = 1,2,3,4)
+#----------------------------------------------------------------------
+# CHECK: ld1 {v0.b}[9], [x0]
+# CHECK: ld2 {v15.h, v16.h}[7], [x15]
+# CHECK: ld3 {v31.s, v0.s, v1.s}[3], [sp]
+# CHECK: ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0]
+# CHECK: st1 {v0.d}[1], [x0]
+# CHECK: st2 {v31.s, v0.s}[3], [sp]
+# CHECK: st3 {v15.h, v16.h, v17.h}[7], [x15]
+# CHECK: st4 {v0.b, v1.b, v2.b, v3.b}[9], [x0]
+0x00,0x04,0x40,0x4d
+0xef,0x59,0x60,0x4d
+0xff,0xb3,0x40,0x4d
+0x00,0xa4,0x60,0x4d
+0x00,0x84,0x00,0x4d
+0xff,0x93,0x20,0x4d
+0xef,0x79,0x00,0x4d
+0x00,0x24,0x20,0x4d
+
+#----------------------------------------------------------------------
+# Post-index of vector load single N-element structure to all lane of N
+# consecutive registers (N = 1,2,3,4)
+#----------------------------------------------------------------------
+# CHECK: ld1r {v0.16b}, [x0], #1
+# CHECK: ld1r {v15.8h}, [x15], #2
+# CHECK: ld2r {v31.4s, v0.4s}, [sp], #8
+# CHECK: ld2r {v0.2d, v1.2d}, [x0], #16
+# CHECK: ld3r {v0.8b, v1.8b, v2.8b}, [x0], #3
+# CHECK: ld3r {v15.4h, v16.4h, v17.4h}, [x15], #6
+# CHECK: ld4r {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], x30
+# CHECK: ld4r {v31.1d, v0.1d, v1.1d, v2.1d}, [sp], x7
+0x00,0xc0,0xdf,0x4d
+0xef,0xc5,0xdf,0x4d
+0xff,0xcb,0xff,0x4d
+0x00,0xcc,0xff,0x4d
+0x00,0xe0,0xdf,0x0d
+0xef,0xe5,0xdf,0x0d
+0xff,0xeb,0xfe,0x0d
+0xff,0xef,0xe7,0x0d
+
+#----------------------------------------------------------------------
+# Post-index of vector load/store single N-element structure to/from
+#  one lane of N consecutive registers (N = 1,2,3,4)
+#----------------------------------------------------------------------
+# CHECK: ld1 {v0.b}[9], [x0], #1
+# CHECK: ld2 {v15.h, v16.h}[7], [x15], #4
+# CHECK: ld3 {v31.s, v0.s, v1.s}[3], [sp], x3
+# CHECK: ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32
+# CHECK: ld4 {v0.h, v1.h, v2.h, v3.h}[7], [x0], x0
+# CHECK: st1 {v0.d}[1], [x0], #8
+# CHECK: st2 {v31.s, v0.s}[3], [sp], #8
+# CHECK: st3 {v15.h, v16.h, v17.h}[7], [x15], #6
+# CHECK: st4 {v0.b, v1.b, v2.b, v3.b}[9], [x0], x5
+0x00,0x04,0xdf,0x4d
+0xef,0x59,0xff,0x4d
+0xff,0xb3,0xc3,0x4d
+0x00,0xa4,0xff,0x4d
+0x00,0x78,0xe0,0x4d
+0x00,0x84,0x9f,0x4d
+0xff,0x93,0xbf,0x4d
+0xef,0x79,0x9f,0x4d
+0x00,0x24,0xa5,0x4d
+
+#----------------------------------------------------------------------
+# Bitwise extract
+#----------------------------------------------------------------------
+0x20,0x18,0x02,0x2e
+0x20,0x18,0x02,0x6e
+# CHECK: ext v0.8b, v1.8b, v2.8b, #0x3
+# CHECK: ext v0.16b, v1.16b, v2.16b, #0x3
+
+#----------------------------------------------------------------------
+# unzip with 3 same vectors to get primary result
+#----------------------------------------------------------------------
+# CHECK: uzp1	v1.8b, v1.8b, v2.8b
+# CHECK: uzp1	v2.16b, v1.16b, v2.16b
+# CHECK: uzp1	v3.4h, v1.4h, v2.4h
+# CHECK: uzp1	v4.8h, v1.8h, v2.8h
+# CHECK: uzp1	v5.2s, v1.2s, v2.2s
+# CHECK: uzp1	v6.4s, v1.4s, v2.4s
+# CHECK: uzp1	v7.2d, v1.2d, v2.2d
+0x21,0x18,0x02,0x0e
+0x22,0x18,0x02,0x4e
+0x23,0x18,0x42,0x0e
+0x24,0x18,0x42,0x4e
+0x25,0x18,0x82,0x0e
+0x26,0x18,0x82,0x4e
+0x27,0x18,0xc2,0x4e
+
+#----------------------------------------------------------------------
+# transpose with 3 same vectors to get primary result
+#----------------------------------------------------------------------
+# CHECK: trn1	v8.8b, v1.8b, v2.8b
+# CHECK: trn1	v9.16b, v1.16b, v2.16b
+# CHECK: trn1	v10.4h, v1.4h, v2.4h
+# CHECK: trn1	v27.8h, v7.8h, v2.8h
+# CHECK: trn1	v12.2s, v7.2s, v2.2s
+# CHECK: trn1	v29.4s, v6.4s, v2.4s
+# CHECK: trn1	v14.2d, v6.2d, v2.2d
+0x28,0x28,0x02,0x0e
+0x29,0x28,0x02,0x4e
+0x2a,0x28,0x42,0x0e
+0xfb,0x28,0x42,0x4e
+0xec,0x28,0x82,0x0e
+0xdd,0x28,0x82,0x4e
+0xce,0x28,0xc2,0x4e
+
+#----------------------------------------------------------------------
+# zip with 3 same vectors to get primary result
+#----------------------------------------------------------------------
+# CHECK: zip1	v31.8b, v5.8b, v2.8b
+# CHECK: zip1	v0.16b, v5.16b, v2.16b
+# CHECK: zip1	v17.4h, v4.4h, v2.4h
+# CHECK: zip1	v2.8h, v4.8h, v2.8h
+# CHECK: zip1	v19.2s, v3.2s, v2.2s
+# CHECK: zip1	v4.4s, v3.4s, v2.4s
+# CHECK: zip1	v21.2d, v2.2d, v2.2d
+0xbf,0x38,0x02,0x0e
+0xa0,0x38,0x02,0x4e
+0x91,0x38,0x42,0x0e
+0x82,0x38,0x42,0x4e
+0x73,0x38,0x82,0x0e
+0x64,0x38,0x82,0x4e
+0x55,0x38,0xc2,0x4e
+
+#----------------------------------------------------------------------
+# unzip with 3 same vectors to get secondary result
+#----------------------------------------------------------------------
+# CHECK: uzp2	v6.8b, v2.8b, v2.8b
+# CHECK: uzp2	v23.16b, v1.16b, v2.16b
+# CHECK: uzp2	v8.4h, v1.4h, v2.4h
+# CHECK: uzp2	v25.8h, v0.8h, v2.8h
+# CHECK: uzp2	v10.2s, v0.2s, v2.2s
+# CHECK: uzp2	v27.4s, v7.4s, v2.4s
+# CHECK: uzp2	v12.2d, v7.2d, v2.2d
+0x46,0x58,0x02,0x0e
+0x37,0x58,0x02,0x4e
+0x28,0x58,0x42,0x0e
+0x19,0x58,0x42,0x4e
+0x0a,0x58,0x82,0x0e
+0xfb,0x58,0x82,0x4e
+0xec,0x58,0xc2,0x4e
+
+#----------------------------------------------------------------------
+# transpose with 3 same vectors to get secondary result
+#----------------------------------------------------------------------
+# CHECK: trn2	v29.8b, v6.8b, v2.8b
+# CHECK: trn2	v14.16b, v6.16b, v2.16b
+# CHECK: trn2	v31.4h, v5.4h, v2.4h
+# CHECK: trn2	v0.8h, v5.8h, v2.8h
+# CHECK: trn2	v17.2s, v4.2s, v2.2s
+# CHECK: trn2	v2.4s, v4.4s, v2.4s
+# CHECK: trn2	v19.2d, v3.2d, v2.2d
+0xdd,0x68,0x02,0x0e
+0xce,0x68,0x02,0x4e
+0xbf,0x68,0x42,0x0e
+0xa0,0x68,0x42,0x4e
+0x91,0x68,0x82,0x0e
+0x82,0x68,0x82,0x4e
+0x73,0x68,0xc2,0x4e
+
+#----------------------------------------------------------------------
+# zip with 3 same vectors to get secondary result
+#----------------------------------------------------------------------
+# CHECK: zip2	v4.8b, v3.8b, v2.8b
+# CHECK: zip2	v21.16b, v2.16b, v2.16b
+# CHECK: zip2	v6.4h, v2.4h, v2.4h
+# CHECK: zip2	v23.8h, v1.8h, v2.8h
+# CHECK: zip2	v8.2s, v1.2s, v2.2s
+# CHECK: zip2	v25.4s, v0.4s, v2.4s
+# CHECK: zip2	v10.2d, v0.2d, v2.2d
+0x64,0x78,0x02,0x0e
+0x55,0x78,0x02,0x4e
+0x46,0x78,0x42,0x0e
+0x37,0x78,0x42,0x4e
+0x28,0x78,0x82,0x0e
+0x19,0x78,0x82,0x4e
+0x0a,0x78,0xc2,0x4e
+
+#----------------------------------------------------------------------
+# Scalar Floating Point  multiply (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: fmul s0, s1, v1.s[0]
+# CHECK: fmul s0, s1, v1.s[3]
+# CHECK: fmul d0, d1, v1.d[0]
+# CHECK: fmul d0, d1, v1.d[1]
+# CHECK: fmul d15, d15, v15.d[1]
+0x20 0x90 0x81 0x5f
+0x20 0x98 0xa1 0x5f
+0x20 0x90 0xc1 0x5f
+0x20 0x98 0xc1 0x5f
+0xef 0x99 0xcf 0x5f
+
+#----------------------------------------------------------------------
+# Scalar Floating Point  multiply extended (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: fmulx s3, s5, v7.s[0]
+# CHECK: fmulx s3, s5, v7.s[3]
+# CHECK: fmulx s3, s5, v15.s[3]
+# CHECK: fmulx d0, d4, v8.d[0]
+# CHECK: fmulx d0, d4, v8.d[1]
+0xa3 0x90 0x87 0x7f
+0xa3 0x98 0xa7 0x7f
+0xa3 0x98 0xaf 0x7f
+0x80 0x90 0xc8 0x7f
+0x80 0x98 0xc8 0x7f
+
+#----------------------------------------------------------------------
+# Scalar Floating Point fused multiply-add (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: fmla s0, s1, v1.s[0]
+# CHECK: fmla s0, s1, v1.s[3]
+# CHECK: fmla d0, d1, v1.d[0]
+# CHECK: fmla d0, d1, v1.d[1]
+# CHECK: fmla d15, d15, v15.d[1]
+0x20 0x10 0x81 0x5f
+0x20 0x18 0xa1 0x5f
+0x20 0x10 0xc1 0x5f
+0x20 0x18 0xc1 0x5f
+0xef 0x19 0xcf 0x5f
+
+#----------------------------------------------------------------------
+# Scalar Floating Point fused multiply-sub (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: fmls s3, s5, v7.s[0]
+# CHECK: fmls s3, s5, v7.s[3]
+# CHECK: fmls s3, s5, v15.s[3]
+# CHECK: fmls d0, d4, v8.d[0]
+# CHECK: fmls d0, d4, v8.d[1]
+0xa3 0x50 0x87 0x5f
+0xa3 0x58 0xa7 0x5f
+0xa3 0x58 0xaf 0x5f
+0x80 0x50 0xc8 0x5f
+0x80 0x58 0xc8 0x5f
+
+#----------------------------------------------------------------------
+# Scalar Signed saturating doubling
+# multiply-add long (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: sqdmlal s0, h0, v0.h[0]
+# CHECK: sqdmlal s0, h0, v0.h[1]
+# CHECK: sqdmlal s0, h0, v0.h[2]
+# CHECK: sqdmlal s0, h0, v0.h[3]
+# CHECK: sqdmlal s0, h0, v0.h[4]
+# CHECK: sqdmlal s0, h0, v0.h[5]
+# CHECK: sqdmlal s0, h0, v0.h[6]
+# CHECK: sqdmlal s0, h0, v0.h[7]
+# CHECK: sqdmlal d8, s9, v15.s[0]
+# CHECK: sqdmlal d8, s9, v15.s[1]
+# CHECK: sqdmlal d8, s9, v15.s[2]
+# CHECK: sqdmlal d8, s9, v15.s[3]
+0x00 0x30 0x40 0x5f
+0x00 0x30 0x50 0x5f
+0x00 0x30 0x60 0x5f
+0x00 0x30 0x70 0x5f
+0x00 0x38 0x40 0x5f
+0x00 0x38 0x50 0x5f
+0x00 0x38 0x60 0x5f
+0x00 0x38 0x70 0x5f
+0x28 0x31 0x8f 0x5f
+0x28 0x31 0xaf 0x5f
+0x28 0x39 0x8f 0x5f
+0x28 0x39 0xaf 0x5f
+
+#----------------------------------------------------------------------
+# Scalar Signed saturating doubling
+# multiply-sub long (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: sqdmlsl s0, h0, v0.h[0]
+# CHECK: sqdmlsl s0, h0, v0.h[1]
+# CHECK: sqdmlsl s0, h0, v0.h[2]
+# CHECK: sqdmlsl s0, h0, v0.h[3]
+# CHECK: sqdmlsl s0, h0, v0.h[4]
+# CHECK: sqdmlsl s0, h0, v0.h[5]
+# CHECK: sqdmlsl s0, h0, v0.h[6]
+# CHECK: sqdmlsl s0, h0, v0.h[7]
+# CHECK: sqdmlsl d8, s9, v15.s[0]
+# CHECK: sqdmlsl d8, s9, v15.s[1]
+# CHECK: sqdmlsl d8, s9, v15.s[2]
+# CHECK: sqdmlsl d8, s9, v15.s[3]
+0x00 0x70 0x40 0x5f
+0x00 0x70 0x50 0x5f
+0x00 0x70 0x60 0x5f
+0x00 0x70 0x70 0x5f
+0x00 0x78 0x40 0x5f
+0x00 0x78 0x50 0x5f
+0x00 0x78 0x60 0x5f
+0x00 0x78 0x70 0x5f
+0x28 0x71 0x8f 0x5f
+0x28 0x71 0xaf 0x5f
+0x28 0x79 0x8f 0x5f
+0x28 0x79 0xaf 0x5f
+
+#----------------------------------------------------------------------
+# Scalar Signed saturating doubling multiply long (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: sqdmull s1, h1, v1.h[0]
+# CHECK: sqdmull s1, h1, v1.h[1]
+# CHECK: sqdmull s1, h1, v1.h[2]
+# CHECK: sqdmull s1, h1, v1.h[3]
+# CHECK: sqdmull s1, h1, v1.h[4]
+# CHECK: sqdmull s1, h1, v1.h[5]
+# CHECK: sqdmull s1, h1, v1.h[6]
+# CHECK: sqdmull s1, h1, v1.h[7]
+# CHECK: sqdmull d1, s1, v4.s[0]
+# CHECK: sqdmull d1, s1, v4.s[1]
+# CHECK: sqdmull d1, s1, v4.s[2]
+# CHECK: sqdmull d1, s1, v4.s[3]
+0x21 0xb0 0x41 0x5f
+0x21 0xb0 0x51 0x5f
+0x21 0xb0 0x61 0x5f
+0x21 0xb0 0x71 0x5f
+0x21 0xb8 0x41 0x5f
+0x21 0xb8 0x51 0x5f
+0x21 0xb8 0x61 0x5f
+0x21 0xb8 0x71 0x5f
+0x21 0xb0 0x84 0x5f
+0x21 0xb0 0xa4 0x5f
+0x21 0xb8 0x84 0x5f
+0x21 0xb8 0xa4 0x5f
+
+#----------------------------------------------------------------------
+# Scalar Signed saturating doubling multiply returning
+# high half (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: sqdmulh h7, h1, v14.h[0]
+# CHECK: sqdmulh h7, h15, v8.h[1]
+# CHECK: sqdmulh h7, h15, v8.h[2]
+# CHECK: sqdmulh h7, h15, v8.h[3]
+# CHECK: sqdmulh h7, h15, v8.h[4]
+# CHECK: sqdmulh h7, h15, v8.h[5]
+# CHECK: sqdmulh h7, h15, v8.h[6]
+# CHECK: sqdmulh h7, h15, v8.h[7]
+# CHECK: sqdmulh s15, s3, v4.s[0]
+# CHECK: sqdmulh s15, s14, v16.s[1]
+# CHECK: sqdmulh s15, s15, v16.s[2]
+# CHECK: sqdmulh s15, s16, v17.s[3]
+0x27 0xc0 0x4e 0x5f
+0xe7 0xc1 0x58 0x5f
+0xe7 0xc1 0x68 0x5f
+0xe7 0xc1 0x78 0x5f
+0xe7 0xc9 0x48 0x5f
+0xe7 0xc9 0x58 0x5f
+0xe7 0xc9 0x68 0x5f
+0xe7 0xc9 0x78 0x5f
+0x6f 0xc0 0x84 0x5f
+0xcf 0xc1 0xb0 0x5f
+0xef 0xc9 0x90 0x5f
+0x0f 0xca 0xb1 0x5f
+
+#----------------------------------------------------------------------
+# Scalar Signed saturating rounding doubling multiply
+# returning high half (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: sqrdmulh h7, h1, v14.h[0]
+# CHECK: sqrdmulh h7, h15, v8.h[1]
+# CHECK: sqrdmulh h7, h15, v8.h[2]
+# CHECK: sqrdmulh h7, h15, v8.h[3]
+# CHECK: sqrdmulh h7, h15, v8.h[4]
+# CHECK: sqrdmulh h7, h15, v8.h[5]
+# CHECK: sqrdmulh h7, h15, v8.h[6]
+# CHECK: sqrdmulh h7, h15, v8.h[7]
+# CHECK: sqrdmulh s15, s3, v4.s[0]
+# CHECK: sqrdmulh s15, s14, v16.s[1]
+# CHECK: sqrdmulh s15, s15, v16.s[2]
+# CHECK: sqrdmulh s15, s16, v17.s[3]
+0x27 0xd0 0x4e 0x5f
+0xe7 0xd1 0x58 0x5f
+0xe7 0xd1 0x68 0x5f
+0xe7 0xd1 0x78 0x5f
+0xe7 0xd9 0x48 0x5f
+0xe7 0xd9 0x58 0x5f
+0xe7 0xd9 0x68 0x5f
+0xe7 0xd9 0x78 0x5f
+0x6f 0xd0 0x84 0x5f
+0xcf 0xd1 0xb0 0x5f
+0xef 0xd9 0x90 0x5f
+0x0f 0xda 0xb1 0x5f
+
+#----------------------------------------------------------------------
+#Duplicate element (scalar)
+#----------------------------------------------------------------------
+# CHECK: dup b0, v0.b[15]
+# CHECK: dup h2, v31.h[5]
+# CHECK: dup s17, v2.s[2]
+# CHECK: dup d6, v12.d[1]
+0x00 0x04 0x1f 0x5e
+0xe2 0x07 0x16 0x5e
+0x51 0x04 0x14 0x5e
+0x86 0x05 0x18 0x5e
+
+#----------------------------------------------------------------------
+# Table look up
+#----------------------------------------------------------------------
+0x20,0x00,0x02,0x0e
+0xf0,0x23,0x02,0x0e
+0x20,0x40,0x02,0x0e
+0xf0,0x62,0x02,0x0e
+# CHECK: tbl v0.8b, {v1.16b}, v2.8b
+# CHECK: tbl v16.8b, {v31.16b, v0.16b}, v2.8b
+# CHECK: tbl v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b
+# CHECK: tbl v16.8b, {v23.16b, v24.16b, v25.16b, v26.16b}, v2.8b
+
+0x20,0x00,0x02,0x4e
+0xf0,0x23,0x02,0x4e
+0x20,0x40,0x02,0x4e
+0xe0,0x63,0x02,0x4e
+# CHECK: tbl v0.16b, {v1.16b}, v2.16b
+# CHECK: tbl v16.16b, {v31.16b, v0.16b}, v2.16b
+# CHECK: tbl v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b
+# CHECK: tbl v0.16b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.16b
+
+0x20,0x10,0x02,0x0e
+0xf0,0x33,0x02,0x0e
+0x20,0x50,0x02,0x0e
+0xf0,0x72,0x02,0x0e
+# CHECK: tbx v0.8b, {v1.16b}, v2.8b
+# CHECK: tbx v16.8b, {v31.16b, v0.16b}, v2.8b
+# CHECK: tbx v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b
+# CHECK: tbx v16.8b, {v23.16b, v24.16b, v25.16b, v26.16b}, v2.8b
+
+0x20,0x10,0x02,0x4e
+0xf0,0x33,0x02,0x4e
+0x20,0x50,0x02,0x4e
+0xf0,0x73,0x02,0x4e
+# CHECK: tbx v0.16b, {v1.16b}, v2.16b
+# CHECK: tbx v16.16b, {v31.16b, v0.16b}, v2.16b
+# CHECK: tbx v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b
+# CHECK: tbx v16.16b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.16b
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Lower Precision Narrow, Rounding To
+# Odd
+#----------------------------------------------------------------------
+# CHECK: fcvtxn s22, d13
+0xb6,0x69,0x61,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
+# With Ties To Away
+#----------------------------------------------------------------------
+# CHECK: fcvtas s12, s13
+# CHECK: fcvtas d21, d14
+
+0xac,0xc9,0x21,0x5e
+0xd5,0xc9,0x61,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Unsigned Integer, Rounding To
+# Nearest With Ties To Away
+#----------------------------------------------------------------------
+# CHECK: fcvtau s12, s13
+# CHECK: fcvtau d21, d14
+0xac,0xc9,0x21,0x7e
+0xd5,0xc9,0x61,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Signed Integer, Rounding Toward
+# Minus Infinity
+#----------------------------------------------------------------------
+# CHECK: fcvtms s22, s13
+# CHECK: fcvtms d21, d14
+0xb6,0xb9,0x21,0x5e
+0xd5,0xb9,0x61,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
+# Minus Infinity
+#----------------------------------------------------------------------
+# CHECK: fcvtmu s12, s13
+# CHECK: fcvtmu d21, d14
+0xac,0xb9,0x21,0x7e
+0xd5,0xb9,0x61,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
+# With Ties To Even
+#----------------------------------------------------------------------
+
+# CHECK: fcvtns s22, s13
+# CHECK: fcvtns d21, d14
+
+0xb6,0xa9,0x21,0x5e
+0xd5,0xa9,0x61,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Unsigned Integer, Rounding To
+# Nearest With Ties To Even
+#----------------------------------------------------------------------
+
+# CHECK: fcvtnu s12, s13
+# CHECK: fcvtnu d21, d14
+0xac,0xa9,0x21,0x7e
+0xd5,0xa9,0x61,0x7e
+        
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Signed Integer, Rounding Toward
+# Positive Infinity
+#----------------------------------------------------------------------
+# CHECK: fcvtps s22, s13
+# CHECK: fcvtps d21, d14
+0xb6,0xa9,0xa1,0x5e
+0xd5,0xa9,0xe1,0x5e
+        
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
+# Positive Infinity
+#----------------------------------------------------------------------
+# CHECK: fcvtpu s12, s13
+# CHECK: fcvtpu d21, d14
+0xac,0xa9,0xa1,0x7e
+0xd5,0xa9,0xe1,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Signed Integer, Rounding Toward Zero
+#----------------------------------------------------------------------
+# CHECK: fcvtzs s12, s13
+# CHECK: fcvtzs d21, d14
+0xac,0xb9,0xa1,0x5e
+0xd5,0xb9,0xe1,0x5e
+        
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Unsigned Integer, Rounding Toward 
+# Zero
+#----------------------------------------------------------------------
+# CHECK: fcvtzu s12, s13
+# CHECK: fcvtzu d21, d14
+0xac,0xb9,0xa1,0x7e
+0xd5,0xb9,0xe1,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Absolute Difference
+#----------------------------------------------------------------------
+# CHECK: fabd s29, s24, s20
+# CHECK: fabd d29, d24, d20
+0x1d,0xd7,0xb4,0x7e
+0x1d,0xd7,0xf4,0x7e
diff --git a/test/MC/Disassembler/ARM/basic-arm-instructions-v8.txt b/test/MC/Disassembler/ARM/basic-arm-instructions-v8.txt
new file mode 100644
index 0000000..d9286bf
--- /dev/null
+++ b/test/MC/Disassembler/ARM/basic-arm-instructions-v8.txt
@@ -0,0 +1,58 @@
+# RUN: llvm-mc -disassemble -triple armv8 -mattr=+db -show-encoding < %s | FileCheck %s
+
+# New v8 ARM instructions
+
+# HLT
+
+0x70 0x00 0x00 0xe1
+# CHECK: hlt #0
+
+0x7f 0xff 0x0f 0xe1
+# CHECK: hlt #65535
+
+0x59 0xf0 0x7f 0xf5
+0x51 0xf0 0x7f 0xf5
+0x55 0xf0 0x7f 0xf5
+0x5d 0xf0 0x7f 0xf5
+# CHECK: dmb ishld
+# CHECK: dmb oshld
+# CHECK: dmb nshld
+# CHECK: dmb ld
+
+0x05 0xf0 0x20 0xe3
+# CHECK: sevl
+
+
+# These are the only coprocessor instructions that remain defined in ARMv8
+# (The operations on p10/p11 disassemble into FP/NEON instructions)
+
+0x10 0x0e 0x00 0xee
+# CHECK: mcr p14
+
+0x10 0x0f 0x00 0xee
+# CHECK: mcr p15
+
+0x10 0x0e 0x10 0xee
+# CHECK: mrc p14
+
+0x10 0x0f 0x10 0xee
+# CHECK: mrc p15
+
+0x00 0x0e 0x40 0xec
+# CHECK: mcrr p14
+
+0x00 0x0f 0x40 0xec
+# CHECK: mcrr p15
+
+0x00 0x0e 0x50 0xec
+# CHECK: mrrc p14
+
+0x00 0x0f 0x50 0xec
+# CHECK: mrrc p15
+
+0x00 0x0e 0x80 0xec
+# CHECK: stc p14
+
+0x00 0x0e 0x90 0xec
+# CHECK: ldc p14
+
diff --git a/test/MC/Disassembler/ARM/basic-arm-instructions.txt b/test/MC/Disassembler/ARM/basic-arm-instructions.txt
index fd36268..8bcf4e6 100644
--- a/test/MC/Disassembler/ARM/basic-arm-instructions.txt
+++ b/test/MC/Disassembler/ARM/basic-arm-instructions.txt
@@ -2420,6 +2420,7 @@
 # CHECK: wfilt
 # CHECK: yield
 # CHECK: yieldne
+# CHECK: hint #5
 
 0x02 0xf0 0x20 0xe3
 0x02 0xf0 0x20 0x83
@@ -2427,3 +2428,4 @@
 0x03 0xf0 0x20 0xb3
 0x01 0xf0 0x20 0xe3
 0x01 0xf0 0x20 0x13
+0x05 0xf0 0x20 0xe3
diff --git a/test/MC/Disassembler/ARM/crc32-thumb.txt b/test/MC/Disassembler/ARM/crc32-thumb.txt
new file mode 100644
index 0000000..2f83b58
--- /dev/null
+++ b/test/MC/Disassembler/ARM/crc32-thumb.txt
@@ -0,0 +1,15 @@
+# RUN: llvm-mc --disassemble %s -triple=thumbv8 2>&1 | FileCheck %s
+
+# CHECK:  crc32b  r0, r1, r2
+# CHECK:  crc32h  r0, r1, r2
+# CHECK:  crc32w  r0, r1, r2
+# CHECK:  crc32cb r0, r1, r2
+# CHECK:  crc32ch r0, r1, r2
+# CHECK:  crc32cw r0, r1, r2
+
+0xc1 0xfa 0x82 0xf0
+0xc1 0xfa 0x92 0xf0
+0xc1 0xfa 0xa2 0xf0
+0xd1 0xfa 0x82 0xf0
+0xd1 0xfa 0x92 0xf0
+0xd1 0xfa 0xa2 0xf0
diff --git a/test/MC/Disassembler/ARM/crc32.txt b/test/MC/Disassembler/ARM/crc32.txt
new file mode 100644
index 0000000..17bb032
--- /dev/null
+++ b/test/MC/Disassembler/ARM/crc32.txt
@@ -0,0 +1,15 @@
+# RUN: llvm-mc --disassemble %s -triple=armv8 2>&1 | FileCheck %s
+
+# CHECK:  crc32b  r0, r1, r2
+# CHECK:  crc32h  r0, r1, r2
+# CHECK:  crc32w  r0, r1, r2
+# CHECK:  crc32cb r0, r1, r2
+# CHECK:  crc32ch r0, r1, r2
+# CHECK:  crc32cw r0, r1, r2
+
+0x42 0x00 0x01 0xe1
+0x42 0x00 0x21 0xe1
+0x42 0x00 0x41 0xe1
+0x42 0x02 0x01 0xe1
+0x42 0x02 0x21 0xe1
+0x42 0x02 0x41 0xe1
diff --git a/test/MC/Disassembler/ARM/v8fp.txt b/test/MC/Disassembler/ARM/fp-armv8.txt
index a6e88b6..46a26f5 100644
--- a/test/MC/Disassembler/ARM/v8fp.txt
+++ b/test/MC/Disassembler/ARM/fp-armv8.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -disassemble -triple armv8 -mattr=+v8fp -show-encoding < %s | FileCheck %s
+# RUN: llvm-mc -disassemble -triple armv8 -mattr=+fp-armv8 -show-encoding < %s | FileCheck %s
 
 0xe0 0x3b 0xb2 0xee
 # CHECK: vcvtt.f64.f16 d3, s1
@@ -153,3 +153,8 @@
 
 0x60 0x6a 0xbb 0xfe
 # CHECK: vrintm.f32 s12, s1
+
+
+0x10 0xa 0xf5 0xee
+# CHECK: vmrs r0, mvfr2
+
diff --git a/test/MC/Disassembler/ARM/invalid-armv7.txt b/test/MC/Disassembler/ARM/invalid-armv7.txt
index be79326..550173f 100644
--- a/test/MC/Disassembler/ARM/invalid-armv7.txt
+++ b/test/MC/Disassembler/ARM/invalid-armv7.txt
@@ -69,14 +69,6 @@
 # Undefined encoding space for hint instructions
 #------------------------------------------------------------------------------
 
-[0x05 0xf0 0x20 0xe3]
-# CHECK: invalid instruction encoding
-# CHECK-NEXT: [0x05 0xf0 0x20 0xe3]
-
-[0x41 0xf0 0x20 0xe3]
-# CHECK: invalid instruction encoding
-# CHECK-NEXT: [0x41 0xf0 0x20 0xe3]
-
 # FIXME: is it "dbg #14" or not????
 [0xfe 0xf0 0x20 0xe3]
 # CHCK: invalid instruction encoding
@@ -183,7 +175,7 @@
 # | 1: 1: 1: 1| 0: 0: 0: 1| 1: 0: 1: 1| 1: 1: 0: 0| 1: 1: 0: 1| 0: 0: 0: 1| 0: 0: 0: 0| 0: 0: 1: 0|
 # -------------------------------------------------------------------------------------------------
 # To qualify as an LSL (immediate) instruction, Inst{19-16} "should" be 0b0000, instead it is = 0b1100.
-# The instruction is UNPREDICTABLE, and is not a valid intruction.
+# The instruction is UNPREDICTABLE, and is not a valid instruction.
 #
 # See also
 # A8.6.88 LSL (immediate)
@@ -201,7 +193,7 @@
 # | 1: 1: 1: 1| 0: 0: 0: 1| 1: 0: 1: 1| 1: 1: 0: 0| 1: 1: 0: 1| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 1: 0|
 # -------------------------------------------------------------------------------------------------
 # To qualify as a MOV (register) instruction, Inst{19-16} "should" be 0b0000, instead it is = 0b1100.
-# The instruction is UNPREDICTABLE, and is not a valid intruction.
+# The instruction is UNPREDICTABLE, and is not a valid instruction.
 #
 # See also
 # A8.6.97 MOV (register)
diff --git a/test/MC/Disassembler/ARM/invalid-armv8.txt b/test/MC/Disassembler/ARM/invalid-armv8.txt
new file mode 100644
index 0000000..772ff1d
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-armv8.txt
@@ -0,0 +1,167 @@
+# RUN: not llvm-mc -triple armv8 -show-encoding -disassemble %s 2>&1 | FileCheck %s
+
+# Coprocessors other than CP10, CP11, CP14 and CP15 are undefined in ARMv8;
+# but in ARMv7, all these instructions are valid
+
+# RUN: llvm-mc -triple armv7 -show-encoding -disassemble %s | FileCheck %s --check-prefix=CHECK-V7
+
+[0x00 0x01 0x00 0xee]
+# CHECK-V7: cdp
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x01 0x00 0xee]
+
+[0x00 0x0e 0x00 0xee]
+# CHECK-V7: cdp
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0e 0x00 0xee]
+
+[0x00 0x0f 0x00 0xee]
+# CHECK-V7: cdp
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0f 0x00 0xee]
+
+[0x00 0x01 0x00 0xfe]
+# CHECK-V7: cdp2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x01 0x00 0xfe]
+
+[0x00 0x0e 0x00 0xfe]
+# CHECK-V7: cdp2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0e 0x00 0xfe]
+
+[0x00 0x0f 0x00 0xfe]
+# CHECK-V7: cdp2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0f 0x00 0xfe]
+
+[0x10 0x01 0x00 0xee]
+# CHECK-V7: mcr
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0x01 0x00 0xee]
+
+[0x10 0x01 0x00 0xfe]
+# CHECK-V7: mcr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0x01 0x00 0xfe]
+
+[0x10 0x0e 0x00 0xfe]
+# CHECK-V7: mcr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0x0e 0x00 0xfe]
+
+[0x10 0x0f 0x00 0xfe]
+# CHECK-V7: mcr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0x0f 0x00 0xfe]
+
+[0x10 0x01 0x10 0xee]
+# CHECK-V7: mrc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0x01 0x10 0xee]
+
+[0x10 0x01 0x10 0xfe]
+# CHECK-V7: mrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0x01 0x10 0xfe]
+
+[0x10 0x0e 0x10 0xfe]
+# CHECK-V7: mrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0x0e 0x10 0xfe]
+
+[0x10 0x0f 0x10 0xfe]
+# CHECK-V7: mrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0x0f 0x10 0xfe]
+
+[0x00 0x01 0x40 0xec]
+# CHECK-V7: mcrr
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x01 0x40 0xec]
+
+[0x00 0x01 0x40 0xfc]
+# CHECK-V7: mcrr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x01 0x40 0xfc]
+
+[0x00 0x0e 0x40 0xfc]
+# CHECK-V7: mcrr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0e 0x40 0xfc]
+
+[0x00 0x0f 0x40 0xfc]
+# CHECK-V7: mcrr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0f 0x40 0xfc]
+
+[0x00 0x01 0x50 0xec]
+# CHECK-V7: mrrc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x01 0x50 0xec]
+
+[0x00 0x0e 0x50 0xfc]
+# CHECK-V7: mrrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0e 0x50 0xfc]
+
+[0x00 0x0f 0x50 0xfc]
+# CHECK-V7: mrrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0f 0x50 0xfc]
+
+[0x00 0x01 0x50 0xfc]
+# CHECK-V7: mrrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x01 0x50 0xfc]
+
+[0x00 0x01 0x80 0xec]
+# CHECK-V7: stc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x01 0x80 0xec]
+
+[0x00 0x0f 0x80 0xec]
+# CHECK-V7: stc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0f 0x80 0xec]
+
+[0x00 0x01 0x80 0xfc]
+# CHECK-V7: stc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x01 0x80 0xfc]
+
+[0x00 0x0e 0x80 0xfc]
+# CHECK-V7: stc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0e 0x80 0xfc]
+
+[0x00 0x0f 0x80 0xfc]
+# CHECK-V7: stc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0f 0x80 0xfc]
+
+[0x00 0x01 0x90 0xec]
+# CHECK-V7: ldc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x01 0x90 0xec]
+
+[0x00 0x0f 0x90 0xec]
+# CHECK-V7: ldc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0f 0x90 0xec]
+
+[0x00 0x01 0x90 0xfc]
+# CHECK-V7: ldc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x01 0x90 0xfc]
+
+[0x00 0x0e 0x90 0xfc]
+# CHECK-V7: ldc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0e 0x90 0xfc]
+
+[0x00 0x0f 0x90 0xfc]
+# CHECK-V7: ldc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0f 0x90 0xfc]
+
diff --git a/test/MC/Disassembler/ARM/invalid-because-armv7.txt b/test/MC/Disassembler/ARM/invalid-because-armv7.txt
index 4bf4833..beed8e4 100644
--- a/test/MC/Disassembler/ARM/invalid-because-armv7.txt
+++ b/test/MC/Disassembler/ARM/invalid-because-armv7.txt
@@ -18,3 +18,9 @@
 [0x41 0x2b 0xb3 0xbe]
 # CHECK: invalid instruction encoding
 # CHECK-NEXT: [0x41 0x2b 0xb3 0xbe]
+
+# Would be vmrs r0, mvfr2
+[0x10 0xa 0xf5 0xee]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0xa 0xf5 0xee]
+
diff --git a/test/MC/Disassembler/ARM/invalid-thumbv7.txt b/test/MC/Disassembler/ARM/invalid-thumbv7.txt
index f465b3c..2c84b8a 100644
--- a/test/MC/Disassembler/ARM/invalid-thumbv7.txt
+++ b/test/MC/Disassembler/ARM/invalid-thumbv7.txt
@@ -32,16 +32,6 @@
 # CHECK: invalid instruction encoding
 # CHECK-NEXT: [0x6f 0xde]
 
-
-#------------------------------------------------------------------------------
-# Undefined encoding space for hint instructions
-#------------------------------------------------------------------------------
-
-[0xaf 0xf3 0x05 0x80]
-# CHECK: invalid instruction encoding
-# CHECK-NEXT: [0xaf 0xf3 0x05 0x80]
-
-
 #------------------------------------------------------------------------------
 # Undefined encoding for it
 #------------------------------------------------------------------------------
@@ -50,10 +40,7 @@
 # CHECK: potentially undefined instruction encoding
 # CHECK-NEXT: [0xff 0xbf 0x6b 0x80 0x00 0x75]
 
-# mask = 0
-[0x50 0xbf 0x00 0x00]
-# CHECK: invalid instruction encoding
-# CHECK-NEXT: [0x50 0xbf 0x00 0x00]
+[0x50 0xbf] # hint #5; legal as the third instruction for the iteee above
 
 # Two warnings from this block since there are two instructions in there
 [0xdb 0xbf 0x42 0xbb]
@@ -402,3 +389,19 @@
 [0x80 0xf9 0x30 0x0b]
 # CHECK: invalid instruction encoding
 # CHECK-NEXT: [0x80 0xf9 0x30 0x0b]
+
+
+#------------------------------------------------------------------------------
+# Unpredictable STMs
+#------------------------------------------------------------------------------
+
+# 32-bit Thumb STM instructions cannot have a writeback register which appears
+# in the list.
+
+[0xa1,0xe8,0x07,0x04]
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: [0xa1,0xe8,0x07,0x04]
+
+[0x21,0xe9,0x07,0x04]
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: [0x21,0xe9,0x07,0x04]
diff --git a/test/MC/Disassembler/ARM/invalid-thumbv8.txt b/test/MC/Disassembler/ARM/invalid-thumbv8.txt
new file mode 100644
index 0000000..4c6b249
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-thumbv8.txt
@@ -0,0 +1,167 @@
+# RUN: not llvm-mc -disassemble %s -show-encoding -triple thumbv8 2>&1 | FileCheck %s
+
+# Coprocessors other than CP10, CP11, CP14 and CP15 are undefined in ARMv8;
+# but in ARMv7, all these instructions are valid
+
+# RUN: llvm-mc -triple thumbv7 -show-encoding -disassemble %s | FileCheck %s --check-prefix=CHECK-V7
+
+[0x00 0xee 0x00 0x01]
+# CHECK-V7: cdp
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0xee 0x00 0x01]
+
+[0x00 0xee 0x00 0x0e]
+# CHECK-V7: cdp
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0xee 0x00 0x0e]
+
+[0x00 0xee 0x00 0x0f]
+# CHECK-V7: cdp
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0xee 0x00 0x0f]
+
+[0x00 0xfe 0x00 0x01]
+# CHECK-V7: cdp2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0xfe 0x00 0x01]
+
+[0x00 0xfe 0x00 0x0e]
+# CHECK-V7: cdp2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0xfe 0x00 0x0e]
+
+[0x00 0xfe 0x00 0x0f]
+# CHECK-V7: cdp2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0xfe 0x00 0x0f]
+
+[0x00 0xee 0x10 0x01]
+# CHECK-V7: mcr
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0xee 0x10 0x01]
+
+[0x00 0xfe 0x10 0x01]
+# CHECK-V7: mcr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0xfe 0x10 0x01]
+
+[0x00 0xfe 0x10 0x0e]
+# CHECK-V7: mcr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0xfe 0x10 0x0e]
+
+[0x00 0xfe 0x10 0x0f]
+# CHECK-V7: mcr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0xfe 0x10 0x0f]
+
+[0x10 0xee 0x10 0x01]
+# CHECK-V7: mrc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0xee 0x10 0x01]
+
+[0x10 0xfe 0x10 0x01]
+# CHECK-V7: mrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0xfe 0x10 0x01]
+
+[0x10 0xfe 0x10 0x0e]
+# CHECK-V7: mrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0xfe 0x10 0x0e]
+
+[0x10 0xfe 0x10 0x0f]
+# CHECK-V7: mrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0xfe 0x10 0x0f]
+
+[0x40 0xec 0x00 0x01]
+# CHECK-V7: mcrr
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x40 0xec 0x00 0x01]
+
+[0x40 0xfc 0x00 0x01]
+# CHECK-V7: mcrr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x40 0xfc 0x00 0x01]
+
+[0x40 0xfc 0x00 0x0e]
+# CHECK-V7: mcrr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x40 0xfc 0x00 0x0e]
+
+[0x40 0xfc 0x00 0x0f]
+# CHECK-V7: mcrr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x40 0xfc 0x00 0x0f]
+
+[0x50 0xec 0x00 0x01]
+# CHECK-V7: mrrc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x50 0xec 0x00 0x01]
+
+[0x50 0xfc 0x00 0x0e]
+# CHECK-V7: mrrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x50 0xfc 0x00 0x0e]
+
+[0x50 0xfc 0x00 0x0f]
+# CHECK-V7: mrrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x50 0xfc 0x00 0x0f]
+
+[0x50 0xfc 0x00 0x01]
+# CHECK-V7: mrrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x50 0xfc 0x00 0x01]
+
+[0x80 0xec 0x00 0x01]
+# CHECK-V7: stc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x80 0xec 0x00 0x01]
+
+[0x80 0xec 0x00 0x0f]
+# CHECK-V7: stc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x80 0xec 0x00 0x0f]
+
+[0x80 0xfc 0x00 0x01]
+# CHECK-V7: stc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x80 0xfc 0x00 0x01]
+
+[0x80 0xfc 0x00 0x0e]
+# CHECK-V7: stc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x80 0xfc 0x00 0x0e]
+
+[0x80 0xfc 0x00 0x0f]
+# CHECK-V7: stc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x80 0xfc 0x00 0x0f]
+
+[0x90 0xec 0x00 0x01]
+# CHECK-V7: ldc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x90 0xec 0x00 0x01]
+
+[0x90 0xec 0x00 0x0f]
+# CHECK-V7: ldc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x90 0xec 0x00 0x0f]
+
+[0x90 0xfc 0x00 0x01]
+# CHECK-V7: ldc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x90 0xfc 0x00 0x01]
+
+[0x90 0xfc 0x00 0x0e]
+# CHECK-V7: ldc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x90 0xfc 0x00 0x0e]
+
+[0x90 0xfc 0x00 0x0f]
+# CHECK-V7: ldc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x90 0xfc 0x00 0x0f]
+
diff --git a/test/MC/Disassembler/ARM/lit.local.cfg b/test/MC/Disassembler/ARM/lit.local.cfg
index 22a76e5..8a3ba96 100644
--- a/test/MC/Disassembler/ARM/lit.local.cfg
+++ b/test/MC/Disassembler/ARM/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.txt']
-
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
     config.unsupported = True
diff --git a/test/MC/Disassembler/ARM/load-store-acquire-release-v8-thumb.txt b/test/MC/Disassembler/ARM/load-store-acquire-release-v8-thumb.txt
new file mode 100644
index 0000000..8a2ba74
--- /dev/null
+++ b/test/MC/Disassembler/ARM/load-store-acquire-release-v8-thumb.txt
@@ -0,0 +1,33 @@
+# RUN: llvm-mc -triple=thumbv8 -disassemble -show-encoding < %s | FileCheck %s
+
+0xd4 0xe8 0xcf 0x3f
+0xd5 0xe8 0xdf 0x2f
+0xd7 0xe8 0xef 0x1f
+0xd8 0xe8 0xff 0x67
+# CHECK:  ldaexb	r3, [r4]                @ encoding: [0xd4,0xe8,0xcf,0x3f]
+# CHECK:  ldaexh	r2, [r5]                @ encoding: [0xd5,0xe8,0xdf,0x2f]
+# CHECK:  ldaex	r1, [r7]                @ encoding: [0xd7,0xe8,0xef,0x1f]
+# CHECK:  ldaexd	r6, r7, [r8]            @ encoding: [0xd8,0xe8,0xff,0x67]
+
+0xc4 0xe8 0xc1 0x3f
+0xc5 0xe8 0xd4 0x2f
+0xc7 0xe8 0xe2 0x1f
+0xc8 0xe8 0xf6 0x23
+# CHECK: stlexb r1, r3, [r4]            @ encoding: [0xc4,0xe8,0xc1,0x3f]
+# CHECK: stlexh r4, r2, [r5]            @ encoding: [0xc5,0xe8,0xd4,0x2f]
+# CHECK: stlex r2, r1, [r7]            @ encoding: [0xc7,0xe8,0xe2,0x1f]
+# CHECK: stlexd r6, r2, r3, [r8]        @ encoding: [0xc8,0xe8,0xf6,0x23]
+
+0xd6 0xe8 0xaf 0x5f
+0xd6 0xe8 0x8f 0x5f
+0xd9 0xe8 0x9f 0xcf
+# CHECK: lda r5, [r6]                   @ encoding: [0xd6,0xe8,0xaf,0x5f]
+# CHECK: ldab r5, [r6]                  @ encoding: [0xd6,0xe8,0x8f,0x5f]
+# CHECK: ldah r12, [r9]                 @ encoding: [0xd9,0xe8,0x9f,0xcf]
+
+0xc0 0xe8 0xaf 0x3f
+0xc1 0xe8 0x8f 0x2f
+0xc3 0xe8 0x9f 0x2f
+# CHECK: stl r3, [r0]                   @ encoding: [0xc0,0xe8,0xaf,0x3f]
+# CHECK: stlb r2, [r1]                  @ encoding: [0xc1,0xe8,0x8f,0x2f]
+# CHECK: stlh r2, [r3]                  @ encoding: [0xc3,0xe8,0x9f,0x2f]
diff --git a/test/MC/Disassembler/ARM/load-store-acquire-release-v8.txt b/test/MC/Disassembler/ARM/load-store-acquire-release-v8.txt
new file mode 100644
index 0000000..058f9cc
--- /dev/null
+++ b/test/MC/Disassembler/ARM/load-store-acquire-release-v8.txt
@@ -0,0 +1,32 @@
+# RUN: llvm-mc -triple=armv8 -disassemble -show-encoding < %s | FileCheck %s
+0x9f 0x0e 0xd8 0xe1
+0x9f 0x1e 0xfc 0xe1
+0x9f 0x1e 0x90 0xe1
+0x9f 0x8e 0xbd 0xe1
+# CHECK: ldaexb r0, [r8]       @ encoding: [0x9f,0x0e,0xd8,0xe1]
+# CHECK: ldaexh r1, [r12]      @ encoding: [0x9f,0x1e,0xfc,0xe1]
+# CHECK: ldaex  r1, [r0]       @ encoding: [0x9f,0x1e,0x90,0xe1]
+# CHECK: ldaexd r8, r9, [sp]   @ encoding: [0x9f,0x8e,0xbd,0xe1]
+
+0x93 0x1e 0xc4 0xe1
+0x92 0x4e 0xe5 0xe1
+0x91 0x2e 0x87 0xe1
+0x92 0x6e 0xa8 0xe1
+# CHECK: stlexb r1, r3, [r4]            @ encoding: [0x93,0x1e,0xc4,0xe1]
+# CHECK: stlexh r4, r2, [r5]            @ encoding: [0x92,0x4e,0xe5,0xe1]
+# CHECK: stlex r2, r1, [r7]            @ encoding: [0x91,0x2e,0x87,0xe1]
+# CHECK: stlexd r6, r2, r3, [r8]        @ encoding: [0x92,0x6e,0xa8,0xe1]
+
+0x9f 0x5c 0x96 0xe1
+0x9f 0x5c 0xd6 0xe1
+0x9f 0xcc 0xf9 0xe1
+# CHECK: lda r5, [r6]          @ encoding: [0x9f,0x5c,0x96,0xe1]
+# CHECK: ldab r5, [r6]         @ encoding: [0x9f,0x5c,0xd6,0xe1]
+# CHECK: ldah r12, [r9]        @ encoding: [0x9f,0xcc,0xf9,0xe1]
+
+0x93 0xfc 0x80 0xe1
+0x92 0xfc 0xc1 0xe1
+0x92 0xfc 0xe3 0xe1
+# CHECK: stl r3, [r0]                   @ encoding: [0x93,0xfc,0x80,0xe1]
+# CHECK: stlb r2, [r1]                  @ encoding: [0x92,0xfc,0xc1,0xe1]
+# CHECK: stlh r2, [r3]                  @ encoding: [0x92,0xfc,0xe3,0xe1]
diff --git a/test/MC/Disassembler/ARM/neon-crypto.txt b/test/MC/Disassembler/ARM/neon-crypto.txt
new file mode 100644
index 0000000..086c781
--- /dev/null
+++ b/test/MC/Disassembler/ARM/neon-crypto.txt
@@ -0,0 +1,35 @@
+# RUN: llvm-mc -triple armv8-unknown-unknown -mattr=+neon,+crypto -disassemble < %s | FileCheck %s
+
+0x42,0x03,0xb0,0xf3
+# CHECK: aesd.8 q0, q1
+0x02,0x03,0xb0,0xf3
+# CHECK: aese.8 q0, q1
+0xc2,0x03,0xb0,0xf3
+# CHECK: aesimc.8 q0, q1
+0x82,0x03,0xb0,0xf3
+# CHECK: aesmc.8 q0, q1
+
+0xc2,0x02,0xb9,0xf3
+# CHECK: sha1h.32  q0, q1
+0x82,0x03,0xba,0xf3
+# CHECK: sha1su1.32 q0, q1
+0xc2,0x03,0xba,0xf3
+# CHECK: sha256su0.32 q0, q1
+
+0x44,0x0c,0x02,0xf2
+# CHECK: sha1c.32  q0, q1, q2
+0x44,0x0c,0x22,0xf2
+# CHECK: sha1m.32  q0, q1, q2
+0x44,0x0c,0x12,0xf2
+# CHECK: sha1p.32 q0, q1, q2
+0x44,0x0c,0x32,0xf2
+# CHECK: sha1su0.32  q0, q1, q2
+0x44,0x0c,0x02,0xf3
+# CHECK: sha256h.32  q0, q1, q2
+0x44,0x0c,0x12,0xf3
+# CHECK: sha256h2.32 q0, q1, q2
+0x44,0x0c,0x22,0xf3
+# CHECK: sha256su1.32 q0, q1, q2
+
+0xa1,0x0e,0xe0,0xf2
+# CHECK: vmull.p64  q8, d16, d17
diff --git a/test/MC/Disassembler/ARM/thumb-v8fp.txt b/test/MC/Disassembler/ARM/thumb-fp-armv8.txt
index 3457192..c90eed6 100644
--- a/test/MC/Disassembler/ARM/thumb-v8fp.txt
+++ b/test/MC/Disassembler/ARM/thumb-fp-armv8.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -disassemble -triple thumbv8 -mattr=+v8fp -show-encoding < %s | FileCheck %s
+# RUN: llvm-mc -disassemble -triple thumbv8 -mattr=+fp-armv8 -show-encoding < %s | FileCheck %s
 
 0xb2 0xee 0xe0 0x3b
 # CHECK: vcvtt.f64.f16 d3, s1
diff --git a/test/MC/Disassembler/ARM/thumb-neon-crypto.txt b/test/MC/Disassembler/ARM/thumb-neon-crypto.txt
new file mode 100644
index 0000000..c725c7f
--- /dev/null
+++ b/test/MC/Disassembler/ARM/thumb-neon-crypto.txt
@@ -0,0 +1,43 @@
+# RUN: llvm-mc -triple thumbv8-unknown-unknown -mattr=+neon,+crypto -disassemble < %s | FileCheck %s
+
+0xb0 0xff 0x42 0x03
+# CHECK: aesd.8  q0, q1
+0xb0 0xff 0x02 0x03
+# CHECK: aese.8 q0, q1
+0xb0 0xff 0xc2 0x03
+# CHECK: aesimc.8 q0, q1
+0xb0 0xff 0x82 0x03
+# CHECK: aesmc.8 q0, q1
+
+0xb9 0xff 0xc2 0x02
+# CHECK: sha1h.32  q0, q1
+0xba 0xff 0x82 0x03
+# CHECK: sha1su1.32 q0, q1
+0xba 0xff 0xc2 0x03
+# CHECK: sha256su0.32 q0, q1
+
+0x02 0xef 0x44 0x0c
+# CHECK: sha1c.32  q0, q1, q2
+0x22 0xef 0x44 0x0c
+# CHECK: sha1m.32  q0, q1, q2
+0x12 0xef 0x44 0x0c
+# CHECK: sha1p.32 q0, q1, q2
+0x32 0xef 0x44 0x0c
+# CHECK: sha1su0.32  q0, q1, q2
+0x02 0xff 0x44 0x0c
+# CHECK: sha256h.32  q0, q1, q2
+0x12 0xff 0x44 0x0c
+# CHECK: sha256h2.32 q0, q1, q2
+0x22 0xff 0x44 0x0c
+# CHECK: sha256su1.32 q0, q1, q2
+
+0xe0 0xef 0xa1 0x0e
+# CHECK: vmull.p64  q8, d16, d17
+
+# This used to be incorrectly decoded into an sha256h.32 [0x00,0xff,0x40,0x0c]
+# The other similar encodings are stc2 [0x00,0xfd,0x40,0x0c] and cdp2 [0x00,0xfe,0x40,0x0c]
+0x00 0xfc 0x40 0x0c
+# CHECK-NOT: sha256h.32
+# CHECK-NOT: stc2
+# CHECK-NOT: cdp2
+
diff --git a/test/MC/Disassembler/ARM/thumb-tests.txt b/test/MC/Disassembler/ARM/thumb-tests.txt
index 84dd075..df2bac1 100644
--- a/test/MC/Disassembler/ARM/thumb-tests.txt
+++ b/test/MC/Disassembler/ARM/thumb-tests.txt
@@ -125,7 +125,7 @@
 # CHECK: cps  #15
 0xaf 0xf3 0x0f 0x81
 
-# CHECK: cpsie.w  if, #10
+# CHECK: cpsie  if, #10
 0xaf 0xf3 0x6a 0x85
 
 # CHECK: cpsie aif
diff --git a/test/MC/Disassembler/ARM/thumb-v8.txt b/test/MC/Disassembler/ARM/thumb-v8.txt
new file mode 100644
index 0000000..eb5ffea
--- /dev/null
+++ b/test/MC/Disassembler/ARM/thumb-v8.txt
@@ -0,0 +1,28 @@
+# RUN: llvm-mc -disassemble -triple thumbv8 -mattr=+db -show-encoding < %s | FileCheck %s
+
+0x80 0xba
+# CHECK: hlt #0
+
+0xbf 0xba
+# CHECK: hlt #63
+
+# DCPS{1,2,3}
+
+0x8f 0xf7 0x01 0x80 
+# CHECK: dcps1
+
+0x8f 0xf7 0x02 0x80 
+# CHECK: dcps2
+
+0x8f 0xf7 0x03 0x80 
+# CHECK: dcps3
+
+0xbf 0xf3 0x59 0x8f
+0xbf 0xf3 0x51 0x8f
+0xbf 0xf3 0x55 0x8f
+0xbf 0xf3 0x5d 0x8f
+
+# CHECK: dmb ishld
+# CHECK: dmb oshld
+# CHECK: dmb nshld
+# CHECK: dmb ld
diff --git a/test/MC/Disassembler/ARM/thumb2-v8.txt b/test/MC/Disassembler/ARM/thumb2-v8.txt
new file mode 100644
index 0000000..1b2f095
--- /dev/null
+++ b/test/MC/Disassembler/ARM/thumb2-v8.txt
@@ -0,0 +1,40 @@
+# RUN: llvm-mc -triple=thumbv8 -disassemble < %s | FileCheck %s
+# CHECK: sevl
+# CHECK: sevl.w
+0x50 0xbf
+0xaf 0xf3 0x05 0x80
+
+
+# These are the only coprocessor instructions that remain defined in ARMv8
+# (The operations on p10/p11 disassemble into FP/NEON instructions)
+
+0x00 0xee 0x10 0x0e
+# CHECK: mcr p14
+
+0x00 0xee 0x10 0x0f
+# CHECK: mcr p15
+
+0x10 0xee 0x10 0x0e
+# CHECK: mrc p14
+
+0x10 0xee 0x10 0x0f
+# CHECK: mrc p15
+
+0x40 0xec 0x00 0x0e
+# CHECK: mcrr p14
+
+0x40 0xec 0x00 0x0f
+# CHECK: mcrr p15
+
+0x50 0xec 0x00 0x0e
+# CHECK: mrrc p14
+
+0x50 0xec 0x00 0x0f
+# CHECK: mrrc p15
+
+0x80 0xec 0x00 0x0e
+# CHECK: stc p14
+
+0x90 0xec 0x00 0x0e
+# CHECK: ldc p14
+
diff --git a/test/MC/Disassembler/ARM/thumb2.txt b/test/MC/Disassembler/ARM/thumb2.txt
index 9fc166f..c8b4080 100644
--- a/test/MC/Disassembler/ARM/thumb2.txt
+++ b/test/MC/Disassembler/ARM/thumb2.txt
@@ -2707,3 +2707,14 @@
 0x30 0xbf
 0x10 0xbf
 
+#------------------------------------------------------------------------------
+# Unallocated hints (They execute as NOPs, but software must not use them.)
+#------------------------------------------------------------------------------
+# CHECK: hint #6
+# CHECK: hint.w #6
+# CHECK: hint.w #102
+
+0x60 0xbf
+0xaf 0xf3 0x06 0x80
+0xaf 0xf3 0x66 0x80
+
diff --git a/test/MC/Disassembler/Mips/lit.local.cfg b/test/MC/Disassembler/Mips/lit.local.cfg
index 9b698b2..1fa54b4 100644
--- a/test/MC/Disassembler/Mips/lit.local.cfg
+++ b/test/MC/Disassembler/Mips/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.txt']
-
 targets = set(config.root.targets_to_build.split())
 if not 'Mips' in targets:
     config.unsupported = True
diff --git a/test/MC/Disassembler/Mips/micromips.txt b/test/MC/Disassembler/Mips/micromips.txt
new file mode 100644
index 0000000..b2d0cc0
--- /dev/null
+++ b/test/MC/Disassembler/Mips/micromips.txt
@@ -0,0 +1,287 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mattr=micromips \
+# RUN: | FileCheck %s
+
+# CHECK: add $9, $6, $7
+0x00 0xe6 0x49 0x10
+
+# CHECK: addi $9, $6, 17767
+0x11 0x26 0x45 0x67
+
+# CHECK: addiu $9, $6, -15001
+0x31 0x26 0xc5 0x67
+
+# CHECK: addi $9, $6, 17767
+0x11 0x26 0x45 0x67
+
+# CHECK: addiu $9, $6, -15001
+0x31 0x26 0xc5 0x67
+
+# CHECK: addu $9, $6, $7
+0x00 0xe6 0x49 0x50
+
+# CHECK: sub $9, $6, $7
+0x00 0xe6 0x49 0x90
+
+# CHECK: subu $4, $3, $5
+0x00 0xa3 0x21 0xd0
+
+# CHECK: sub $6, $zero, $7
+0x00 0xe0 0x31 0x90
+
+# CHECK: subu $6, $zero, $7
+0x00 0xe0 0x31 0xd0
+
+# CHECK: addu $7, $8, $zero
+0x00 0x08 0x39 0x50
+
+# CHECK: slt $3, $3, $5
+0x00 0xa3 0x1b 0x50
+
+# CHECK: slti $3, $3, 103
+0x90 0x63 0x00 0x67
+
+# CHECK: slti $3, $3, 103
+0x90 0x63 0x00 0x67
+
+# CHECK: sltiu $3, $3, 103
+0xb0 0x63 0x00 0x67
+
+# CHECK: sltu $3, $3, $5
+0x00 0xa3 0x1b 0x90
+
+# CHECK: lui $9, 17767
+0x41 0xa9 0x45 0x67
+
+# CHECK: and $9, $6, $7
+0x00 0xe6 0x4a 0x50
+
+# CHECK: andi $9, $6, 17767
+0xd1 0x26 0x45 0x67
+
+# CHECK: andi $9, $6, 17767
+0xd1 0x26 0x45 0x67
+
+# CHECK: or $3, $4, $5
+0x00 0xa4 0x1a 0x90
+
+# CHECK: ori $9, $6, 17767
+0x51 0x26 0x45 0x67
+
+# CHECK: xor $3, $3, $5
+0x00 0xa3 0x1b 0x10
+
+# CHECK: xori $9, $6, 17767
+0x71 0x26 0x45 0x67
+
+# CHECK: xori $9, $6, 17767
+0x71 0x26 0x45 0x67
+
+# CHECK: nor $9, $6, $7
+0x00 0xe6 0x4a 0xd0
+
+# CHECK: not $7, $8
+0x00 0x08 0x3a 0xd0
+
+# CHECK: mul $9, $6, $7
+0x00 0xe6 0x4a 0x10
+
+# CHECK: mult $9, $7
+0x00 0xe9 0x8b 0x3c
+
+# CHECK: multu $9, $7
+0x00 0xe9 0x9b 0x3c
+
+# CHECK-EB: div $zero, $9, $7
+0x00 0xe9 0xab 0x3c
+
+# CHECK-EB: divu $zero, $9, $7
+0x00 0xe9 0xbb 0x3c
+
+# CHECK: sll $4, $3, 7
+0x00 0x83 0x38 0x00
+
+# CHECK: sllv $2, $3, $5
+0x00 0x65 0x10 0x10
+
+# CHECK: sra $4, $3, 7
+0x00 0x83 0x38 0x80
+
+# CHECK: srav $2, $3, $5
+0x00 0x65 0x10 0x90
+
+# CHECK: srl $4, $3, 7
+0x00 0x83 0x38 0x40
+
+# CHECK: srlv $2, $3, $5
+0x00 0x65 0x10 0x50
+
+# CHECK: rotr $9, $6, 7
+0x01 0x26 0x38 0xc0
+
+# CHECK: rotrv $9, $6, $7
+0x00 0xc7 0x48 0xd0
+
+# CHECK: lb $5, 8($4)
+0x1c 0xa4 0x00 0x08
+
+# CHECK: lbu $6, 8($4)
+0x14 0xc4 0x00 0x08
+
+# CHECK: lh $2, 8($4)
+0x3c 0x44 0x00 0x08
+
+# CHECK: lhu $4, 8($2)
+0x34 0x82 0x00 0x08
+
+# CHECK: lw  $6, 4($5)
+0xfc 0xc5 0x00 0x04
+
+# CHECK: sb $5, 8($4)
+0x18 0xa4 0x00 0x08
+
+# CHECK: sh  $2, 8($4)
+0x38 0x44 0x00 0x08
+
+# CHECK: sw  $5, 4($6)
+0xf8 0xa6 0x00 0x04
+
+# CHECK: lwl $4, 16($5)
+0x60 0x85 0x00 0x10
+
+# CHECK: lwr $4, 16($5)
+0x60 0x85 0x10 0x10
+
+# CHECK: swl $4, 16($5)
+0x60 0x85 0x80 0x10
+
+# CHECK: swr $4, 16($5)
+0x60 0x85 0x90 0x10
+
+# CHECK: movz $9, $6, $7
+0x00 0xe6 0x48 0x58
+
+# CHECK: movn $9, $6, $7
+0x00 0xe6 0x48 0x18
+
+# CHECK: movt $9, $6, $fcc0
+0x55 0x26 0x09 0x7b
+
+# CHECK: movf $9, $6, $fcc0
+0x55 0x26 0x01 0x7b
+
+# CHECK: mthi   $6
+0x00 0x06 0x2d 0x7c
+
+# CHECK: mfhi   $6
+0x00 0x06 0x0d 0x7c
+
+# CHECK: mtlo   $6
+0x00 0x06 0x3d 0x7c
+
+# CHECK: mflo   $6
+0x00 0x06 0x1d 0x7c
+
+# CHECK: madd   $4, $5
+0x00 0xa4 0xcb 0x3c
+
+# CHECK: maddu  $4, $5
+0x00 0xa4 0xdb 0x3c
+
+# CHECK: msub   $4, $5
+0x00 0xa4 0xeb 0x3c
+
+# CHECK: msubu  $4, $5
+0x00 0xa4 0xfb 0x3c
+
+# CHECK: clz $9, $6
+0x01 0x26 0x5b 0x3c
+
+# CHECK: clo $9, $6
+0x01 0x26 0x4b 0x3c
+
+# CHECK: seb $9, $6
+0x01 0x26 0x2b 0x3c
+
+# CHECK: seh $9, $6
+0x01 0x26 0x3b 0x3c
+
+# CHECK: wsbh $9, $6
+0x01 0x26 0x7b 0x3c
+
+# CHECK: ext $9, $6, 3, 7
+0x01 0x26 0x30 0xec
+
+# CHECK: ins $9, $6, 3, 7
+0x01 0x26 0x48 0xcc
+
+# CHECK: j 1328
+0xd4 0x00 0x02 0x98
+
+# CHECK: jal 1328
+0xf4 0x00 0x02 0x98
+
+# CHECK: jalr $ra, $6
+0x03 0xe6 0x0f 0x3c
+
+# CHECK: jr $7
+0x00 0x07 0x0f 0x3c
+
+# CHECK: beq $9, $6, 1332
+0x94 0xc9 0x02 0x9a
+
+# CHECK: bgez $6, 1332
+0x40 0x46 0x02 0x9a
+
+# CHECK: bgezal $6, 1332
+0x40 0x66 0x02 0x9a
+
+# CHECK: bltzal $6, 1332
+0x40 0x26 0x02 0x9a
+
+# CHECK: bgtz $6, 1332
+0x40 0xc6 0x02 0x9a
+
+# CHECK: blez $6, 1332
+0x40 0x86 0x02 0x9a
+
+# CHECK: bne $9, $6, 1332
+0xb4 0xc9 0x02 0x9a
+
+# CHECK: bltz $6, 1332
+0x40 0x06 0x02 0x9a
+
+# CHECK: teq $8, $9, 0
+0x01 0x28 0x00 0x3c
+
+# CHECK: tge $8, $9, 0
+0x01 0x28 0x02 0x3c
+
+# CHECK: tgeu $8, $9, 0
+0x01 0x28 0x04 0x3c
+
+# CHECK: tlt $8, $9, 0
+0x01 0x28 0x08 0x3c
+
+# CHECK: tltu $8, $9, 0
+0x01 0x28 0x0a 0x3c
+
+# CHECK: tne $8, $9, 0
+0x01 0x28 0x0c 0x3c
+
+# CHECK: teqi $9, 17767
+0x41,0xc9,0x45,0x67
+
+# CHECK: tgei $9, 17767
+0x41 0x29 0x45 0x67
+
+# CHECK: tgeiu $9, 17767
+0x41 0x69 0x45 0x67
+
+# CHECK: tlti $9, 17767
+0x41 0x09 0x45 0x67
+
+# CHECK: tltiu $9, 17767
+0x41 0x49 0x45 0x67
+
+# CHECK: tnei $9, 17767
+0x41 0x89 0x45 0x67
diff --git a/test/MC/Disassembler/Mips/micromips_le.txt b/test/MC/Disassembler/Mips/micromips_le.txt
new file mode 100644
index 0000000..5b2fe30
--- /dev/null
+++ b/test/MC/Disassembler/Mips/micromips_le.txt
@@ -0,0 +1,287 @@
+# RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux -mattr=micromips \
+# RUN: | FileCheck %s
+
+# CHECK: add $9, $6, $7
+0xe6 0x00 0x10 0x49
+
+# CHECK: addi $9, $6, 17767
+0x26 0x11 0x67 0x45
+
+# CHECK: addiu $9, $6, -15001
+0x26 0x31 0x67 0xc5
+
+# CHECK: addi $9, $6, 17767
+0x26 0x11 0x67 0x45
+
+# CHECK: addiu $9, $6, -15001
+0x26 0x31 0x67 0xc5
+
+# CHECK: addu $9, $6, $7
+0xe6 0x00 0x50 0x49
+
+# CHECK: sub $9, $6, $7
+0xe6 0x00 0x90 0x49
+
+# CHECK: subu  $4, $3, $5
+0xa3 0x00 0xd0 0x21
+
+# CHECK: sub $6, $zero, $7
+0xe0 0x00 0x90 0x31
+
+# CHECK: subu $6, $zero, $7
+0xe0 0x00 0xd0 0x31
+
+# CHECK: addu $7, $8, $zero
+0x08 0x00 0x50 0x39
+
+# CHECK: slt $3, $3, $5
+0xa3 0x00 0x50 0x1b
+
+# CHECK: slti $3, $3, 103
+0x63 0x90 0x67 0x00
+
+# CHECK: slti $3, $3, 103
+0x63 0x90 0x67 0x00
+
+# CHECK: sltiu $3, $3, 103
+0x63 0xb0 0x67 0x00
+
+# CHECK: sltu $3, $3, $5
+0xa3 0x00 0x90 0x1b
+
+# CHECK: lui $9, 17767
+0xa9 0x41 0x67 0x45
+
+# CHECK: and $9, $6, $7
+0xe6 0x00 0x50 0x4a
+
+# CHECK: andi $9, $6, 17767
+0x26 0xd1 0x67 0x45
+
+# CHECK: andi $9, $6, 17767
+0x26 0xd1 0x67 0x45
+
+# CHECK: or $3, $4, $5
+0xa4 0x00 0x90 0x1a
+
+# CHECK: ori $9, $6, 17767
+0x26 0x51 0x67 0x45
+
+# CHECK: xor $3, $3, $5
+0xa3 0x00 0x10 0x1b
+
+# CHECK: xori $9, $6, 17767
+0x26 0x71 0x67 0x45
+
+# CHECK: xori $9, $6, 17767
+0x26 0x71 0x67 0x45
+
+# CHECK: nor $9, $6, $7
+0xe6 0x00 0xd0 0x4a
+
+# CHECK: not $7, $8
+0x08 0x00 0xd0 0x3a
+
+# CHECK: mul $9, $6, $7
+0xe6 0x00 0x10 0x4a
+
+# CHECK: mult $9, $7
+0xe9 0x00 0x3c 0x8b
+
+# CHECK: multu $9, $7
+0xe9 0x00 0x3c 0x9b
+
+# CHECK: div $zero, $9, $7
+0xe9 0x00 0x3c 0xab
+
+# CHECK: divu $zero, $9, $7
+0xe9 0x00 0x3c 0xbb
+
+# CHECK: sll $4, $3, 7
+0x83 0x00 0x00 0x38
+
+# CHECK: sllv $2, $3, $5
+0x65 0x00 0x10 0x10
+
+# CHECK: sra $4, $3, 7
+0x83 0x00 0x80 0x38
+
+# CHECK: srav $2, $3, $5
+0x65 0x00 0x90 0x10
+
+# CHECK: srl $4, $3, 7
+0x83 0x00 0x40 0x38
+
+# CHECK: srlv $2, $3, $5
+0x65 0x00 0x50 0x10
+
+# CHECK: rotr $9, $6, 7
+0x26 0x01 0xc0 0x38
+
+# CHECK: rotrv $9, $6, $7
+0xc7 0x00 0xd0 0x48
+
+# CHECK: lb $5, 8($4)
+0xa4 0x1c 0x08 0x00
+
+# CHECK: lbu $6, 8($4)
+0xc4 0x14 0x08 0x00
+
+# CHECK: lh $2, 8($4)
+0x44 0x3c 0x08 0x00
+
+# CHECK: lhu $4, 8($2)
+0x82 0x34 0x08 0x00
+
+# CHECK: lw $6, 4($5)
+0xc5 0xfc 0x04 0x00
+
+# CHECK: sb $5, 8($4)
+0xa4 0x18 0x08 0x00
+
+# CHECK: sh $2, 8($4)
+0x44 0x38 0x08 0x00
+
+# CHECK: sw $5, 4($6)
+0xa6 0xf8 0x04 0x00
+
+# CHECK: lwl $4, 16($5)
+0x85 0x60 0x10 0x00
+
+# CHECK: lwr $4, 16($5)
+0x85 0x60 0x10 0x10
+
+# CHECK: swl $4, 16($5)
+0x85 0x60 0x10 0x80
+
+# CHECK: swr $4, 16($5)
+0x85 0x60 0x10 0x90
+
+# CHECK: movz $9, $6, $7
+0xe6 0x00 0x58 0x48
+
+# CHECK: movn $9, $6, $7
+0xe6 0x00 0x18 0x48
+
+# CHECK: movt $9, $6, $fcc0
+0x26 0x55 0x7b 0x09
+
+# CHECK: movf $9, $6, $fcc0
+0x26 0x55 0x7b 0x01
+
+# CHECK: mthi $6
+0x06 0x00 0x7c 0x2d
+
+# CHECK: mfhi $6
+0x06 0x00 0x7c 0x0d
+
+# CHECK: mtlo $6
+0x06 0x00 0x7c 0x3d
+
+# CHECK: mflo $6
+0x06 0x00 0x7c 0x1d
+
+# CHECK: madd $4, $5
+0xa4 0x00 0x3c 0xcb
+
+# CHECK: maddu $4, $5
+0xa4 0x00 0x3c 0xdb
+
+# CHECK: msub $4, $5
+0xa4 0x00 0x3c 0xeb
+
+# CHECK: msubu $4, $5
+0xa4 0x00 0x3c 0xfb
+
+# CHECK: clz $9, $6
+0x26 0x01 0x3c 0x5b
+
+# CHECK: clo $9, $6
+0x26 0x01 0x3c 0x4b
+
+# CHECK: seb $9, $6
+0x26 0x01 0x3c 0x2b
+
+# CHECK: seh $9, $6
+0x26 0x01 0x3c 0x3b
+
+# CHECK: wsbh $9, $6
+0x26 0x01 0x3c 0x7b
+
+# CHECK: ext $9, $6, 3, 7
+0x26 0x01 0xec 0x30
+
+# CHECK: ins $9, $6, 3, 7
+0x26 0x01 0xcc 0x48
+
+# CHECK: j 1328
+0x00 0xd4 0x98 0x02
+
+# CHECK: jal 1328
+0x00 0xf4 0x98 0x02
+
+# CHECK: jalr $ra, $6
+0xe6 0x03 0x3c 0x0f
+
+# CHECK: jr $7
+0x07 0x00 0x3c 0x0f
+
+# CHECK: beq $9, $6, 1332
+0xc9 0x94 0x9a 0x02
+
+# CHECK: bgez $6, 1332
+0x46 0x40 0x9a 0x02
+
+# CHECK: bgezal $6, 1332
+0x66 0x40 0x9a 0x02
+
+# CHECK: bltzal $6, 1332
+0x26 0x40 0x9a 0x02
+
+# CHECK: bgtz $6, 1332
+0xc6 0x40 0x9a 0x02
+
+# CHECK: blez $6, 1332
+0x86 0x40 0x9a 0x02
+
+# CHECK: bne $9, $6, 1332
+0xc9 0xb4 0x9a 0x02
+
+# CHECK: bltz $6, 1332
+0x06 0x40 0x9a 0x02
+
+# CHECK: teq $8, $9, 0
+0x28 0x01 0x3c 0x00
+
+# CHECK: tge $8, $9, 0
+0x28 0x01 0x3c 0x02
+
+# CHECK: tgeu $8, $9, 0
+0x28 0x01 0x3c 0x04
+
+# CHECK: tlt $8, $9, 0
+0x28 0x01 0x3c 0x08
+
+# CHECK: tltu $8, $9, 0
+0x28 0x01 0x3c 0x0a
+
+# CHECK: tne $8, $9, 0
+0x28 0x01 0x3c 0x0c
+
+# CHECK: teqi $9, 17767
+0xc9 0x41 0x67 0x45
+
+# CHECK: tgei $9, 17767
+0x29 0x41 0x67 0x45
+
+# CHECK: tgeiu $9, 17767
+0x69 0x41 0x67 0x45
+
+# CHECK: tlti $9, 17767
+0x09 0x41 0x67 0x45
+
+# CHECK: tltiu $9, 17767
+0x49 0x41 0x67 0x45
+
+# CHECK: tnei $9, 17767
+0x89 0x41 0x67 0x45
diff --git a/test/MC/Disassembler/Mips/mips-dsp.txt b/test/MC/Disassembler/Mips/mips-dsp.txt
index d10e62c..3f60ae1 100644
--- a/test/MC/Disassembler/Mips/mips-dsp.txt
+++ b/test/MC/Disassembler/Mips/mips-dsp.txt
@@ -11,3 +11,12 @@
 
 # CHECK: mtlo $21, $ac3
 0x13 0x18 0xa0 0x02
+
+# CHECK: lbux $10, $20($26)
+0x8a 0x51 0x54 0x7f
+
+# CHECK: lhx  $11, $21($27)
+0x0a 0x59 0x75 0x7f
+
+# CHECK: lwx  $12, $22($gp)
+0x0a 0x60 0x96 0x7f
diff --git a/test/MC/Disassembler/Mips/mips32r2.txt b/test/MC/Disassembler/Mips/mips32r2.txt
index 48b6ad4..11d9058 100644
--- a/test/MC/Disassembler/Mips/mips32r2.txt
+++ b/test/MC/Disassembler/Mips/mips32r2.txt
@@ -242,6 +242,9 @@
 # CHECK: lui  $6, 17767
 0x3c 0x06 0x45 0x67
 
+# CHECK: luxc1 $f0, $6($5)
+0x4c 0xa6 0x00 0x05
+
 # CHECK: lw  $4, 24($5)
 0x8c 0xa4 0x00 0x18
 
@@ -254,6 +257,9 @@
 # CHECK: lwr   $3, 16($5)
 0x98 0xa3 0x00 0x10
 
+# CHECK: lwxc1 $f20, $12($14)
+0x4d 0xcc 0x05 0x00
+
 # CHECK: madd   $6,  $7
 0x70 0xc7 0x00 0x00
 
@@ -404,6 +410,9 @@
 # CHECK: subu  $4, $3, $5
 0x00 0x65 0x20 0x23
 
+# CHECK: suxc1 $f4, $24($5)
+0x4c 0xb8 0x20 0x0d
+
 # CHECK: sw  $4, 24($5)
 0xac 0xa4 0x00 0x18
 
@@ -416,6 +425,9 @@
 # CHECK: swr $6, 16($7)
 0xb8 0xe6 0x00 0x10
 
+# CHECK: swxc1 $f26, $18($22)
+0x4e 0xd2 0xd0 0x08
+
 # CHECK: sync  7
 0x00 0x00 0x01 0xcf
 
diff --git a/test/MC/Disassembler/Mips/mips32r2_le.txt b/test/MC/Disassembler/Mips/mips32r2_le.txt
index c62c695..adafcf1 100644
--- a/test/MC/Disassembler/Mips/mips32r2_le.txt
+++ b/test/MC/Disassembler/Mips/mips32r2_le.txt
@@ -242,6 +242,9 @@
 # CHECK: lui  $6, 17767
 0x67 0x45 0x06 0x3c
 
+# CHECK: luxc1 $f0, $6($5)
+0x05 0x00 0xa6 0x4c
+
 # CHECK: lw  $4, 24($5)
 0x18 0x00 0xa4 0x8c
 
@@ -254,6 +257,9 @@
 # CHECK: lwr   $3, 16($5)
 0x10 0x00 0xa3 0x98
 
+# CHECK: lwxc1 $f20, $12($14)
+0x00 0x05 0xcc 0x4d
+
 # CHECK: madd   $6,  $7
 0x00 0x00 0xc7 0x70
 
@@ -404,6 +410,9 @@
 # CHECK: subu  $4, $3, $5
 0x23 0x20 0x65 0x00
 
+# CHECK: suxc1 $f4, $24($5)
+0x0d 0x20 0xb8 0x4c
+
 # CHECK: sw  $4, 24($5)
 0x18 0x00 0xa4 0xac
 
@@ -416,6 +425,9 @@
 # CHECK: swr $6, 16($7)
 0x10 0x00 0xe6 0xb8
 
+# CHECK: swxc1 $f26, $18($22)
+0x08 0xd0 0xd2 0x4e
+
 # CHECK: sync  7
 0xcf 0x01 0x00 0x00
 
diff --git a/test/MC/Disassembler/Mips/mips64.txt b/test/MC/Disassembler/Mips/mips64.txt
index b887473..f3d2d10 100644
--- a/test/MC/Disassembler/Mips/mips64.txt
+++ b/test/MC/Disassembler/Mips/mips64.txt
@@ -2,6 +2,9 @@
 # CHECK: daddiu $11, $26, 31949
 0x67 0x4b 0x7c 0xcd
 
+# CHECK: daddiu $sp, $sp, -32
+0x67 0xbd 0xff 0xe0
+
 # CHECK: daddu $26, $1, $11
 0x00 0x2b 0xd0 0x2d
 
@@ -64,3 +67,21 @@
 
 # CHECK: sd $6, 17767($zero)
 0xfc 0x06 0x45 0x67
+
+# CHECK: luxc1 $f0, $6($5)
+0x4c 0xa6 0x00 0x05
+
+# CHECK: lwxc1 $f20, $12($14)
+0x4d 0xcc 0x05 0x00
+
+# CHECK: suxc1 $f4, $24($5)
+0x4c 0xb8 0x20 0x0d
+
+# CHECK: swxc1 $f26, $18($22)
+0x4e 0xd2 0xd0 0x08
+
+# CHECK: ldxc1 $f2, $2($10)
+0x4d 0x42 0x00 0x81
+
+# CHECK: sdxc1 $f8, $4($25)
+0x4f 0x24 0x40 0x09
diff --git a/test/MC/Disassembler/Mips/mips64_le.txt b/test/MC/Disassembler/Mips/mips64_le.txt
index ddc3c2b..0d3d2fa 100644
--- a/test/MC/Disassembler/Mips/mips64_le.txt
+++ b/test/MC/Disassembler/Mips/mips64_le.txt
@@ -64,3 +64,21 @@
 
 # CHECK: sd $6, 17767($zero)
 0x67 0x45 0x06 0xfc
+
+# CHECK: luxc1 $f0, $6($5)
+0x05 0x00 0xa6 0x4c
+
+# CHECK: lwxc1 $f20, $12($14)
+0x00 0x05 0xcc 0x4d
+
+# CHECK: suxc1 $f4, $24($5)
+0x0d 0x20 0xb8 0x4c
+
+# CHECK: swxc1 $f26, $18($22)
+0x08 0xd0 0xd2 0x4e
+
+# CHECK: ldxc1 $f2, $2($10)
+0x81 0x00 0x42 0x4d
+
+# CHECK: sdxc1 $f8, $4($25)
+0x09 0x40 0x24 0x4f
diff --git a/test/MC/Disassembler/SystemZ/insns-pcrel.txt b/test/MC/Disassembler/SystemZ/insns-pcrel.txt
index c565b6e..b7edab6 100644
--- a/test/MC/Disassembler/SystemZ/insns-pcrel.txt
+++ b/test/MC/Disassembler/SystemZ/insns-pcrel.txt
@@ -1330,3 +1330,403 @@
 # 0x0000077c:
 # CHECK: brctg %r15, 0x1077a
 0xa7 0xf7 0x7f 0xff
+
+# 0x00000780:
+# CHECK: pfdrl 0, 0x780
+0xc6 0x02 0x00 0x00 0x00 0x00
+
+# 0x00000786:
+# CHECK: pfdrl 15, 0x786
+0xc6 0xf2 0x00 0x00 0x00 0x00
+
+# 0x0000078c:
+# CHECK: pfdrl 0, 0x78a
+0xc6 0x02 0xff 0xff 0xff 0xff
+
+# 0x00000792:
+# CHECK: pfdrl 15, 0x790
+0xc6 0xf2 0xff 0xff 0xff 0xff
+
+# 0x00000798:
+# CHECK: pfdrl 0, 0xffffffff00000798
+0xc6 0x02 0x80 0x00 0x00 0x00
+
+# 0x0000079e:
+# CHECK: pfdrl 15, 0xffffffff0000079e
+0xc6 0xf2 0x80 0x00 0x00 0x00
+
+# 0x000007a4:
+# CHECK: pfdrl 0, 0x1000007a2
+0xc6 0x02 0x7f 0xff 0xff 0xff
+
+# 0x000007aa:
+# CHECK: pfdrl 15, 0x1000007a8
+0xc6 0xf2 0x7f 0xff 0xff 0xff
+
+# 0x000007b0:
+# CHECK: clgrj %r0, %r0, 0, 0x7b0
+0xec 0x00 0x00 0x00 0x00 0x65
+
+# 0x000007b6:
+# CHECK: clgrj %r0, %r15, 0, 0x7b6
+0xec 0x0f 0x00 0x00 0x00 0x65
+
+# 0x000007bc:
+# CHECK: clgrj %r15, %r0, 0, 0x7bc
+0xec 0xf0 0x00 0x00 0x00 0x65
+
+# 0x000007c2:
+# CHECK: clgrj %r7, %r8, 0, 0x7c2
+0xec 0x78 0x00 0x00 0x00 0x65
+
+# 0x000007c8:
+# CHECK: clgrj %r0, %r0, 0, 0x7c6
+0xec 0x00 0xff 0xff 0x00 0x65
+
+# 0x000007ce:
+# CHECK: clgrj %r0, %r0, 0, 0xffffffffffff07ce
+0xec 0x00 0x80 0x00 0x00 0x65
+
+# 0x000007d4:
+# CHECK: clgrj %r0, %r0, 0, 0x107d2
+0xec 0x00 0x7f 0xff 0x00 0x65
+
+# 0x000007da:
+# CHECK: clgrj %r0, %r0, 1, 0x7da
+0xec 0x00 0x00 0x00 0x10 0x65
+
+# 0x000007e0:
+# CHECK: clgrjh %r0, %r0, 0x7e0
+0xec 0x00 0x00 0x00 0x20 0x65
+
+# 0x000007e6:
+# CHECK: clgrj %r0, %r0, 3, 0x7e6
+0xec 0x00 0x00 0x00 0x30 0x65
+
+# 0x000007ec:
+# CHECK: clgrjl %r0, %r0, 0x7ec
+0xec 0x00 0x00 0x00 0x40 0x65
+
+# 0x000007f2:
+# CHECK: clgrj %r0, %r0, 5, 0x7f2
+0xec 0x00 0x00 0x00 0x50 0x65
+
+# 0x000007f8:
+# CHECK: clgrjlh %r0, %r0, 0x7f8
+0xec 0x00 0x00 0x00 0x60 0x65
+
+# 0x000007fe:
+# CHECK: clgrj %r0, %r0, 7, 0x7fe
+0xec 0x00 0x00 0x00 0x70 0x65
+
+# 0x00000804:
+# CHECK: clgrje %r0, %r0, 0x804
+0xec 0x00 0x00 0x00 0x80 0x65
+
+# 0x0000080a:
+# CHECK: clgrj %r0, %r0, 9, 0x80a
+0xec 0x00 0x00 0x00 0x90 0x65
+
+# 0x00000810:
+# CHECK: clgrjhe %r0, %r0, 0x810
+0xec 0x00 0x00 0x00 0xa0 0x65
+
+# 0x00000816:
+# CHECK: clgrj %r0, %r0, 11, 0x816
+0xec 0x00 0x00 0x00 0xb0 0x65
+
+# 0x0000081c:
+# CHECK: clgrjle %r0, %r0, 0x81c
+0xec 0x00 0x00 0x00 0xc0 0x65
+
+# 0x00000822:
+# CHECK: clgrj %r0, %r0, 13, 0x822
+0xec 0x00 0x00 0x00 0xd0 0x65
+
+# 0x00000828:
+# CHECK: clgrj %r0, %r0, 14, 0x828
+0xec 0x00 0x00 0x00 0xe0 0x65
+
+# 0x0000082e:
+# CHECK: clgrj %r0, %r0, 15, 0x82e
+0xec 0x00 0x00 0x00 0xf0 0x65
+
+# 0x00000834:
+# CHECK: clrj %r0, %r0, 0, 0x834
+0xec 0x00 0x00 0x00 0x00 0x77
+
+# 0x0000083a:
+# CHECK: clrj %r0, %r15, 0, 0x83a
+0xec 0x0f 0x00 0x00 0x00 0x77
+
+# 0x00000840:
+# CHECK: clrj %r15, %r0, 0, 0x840
+0xec 0xf0 0x00 0x00 0x00 0x77
+
+# 0x00000846:
+# CHECK: clrj %r7, %r8, 0, 0x846
+0xec 0x78 0x00 0x00 0x00 0x77
+
+# 0x0000084c:
+# CHECK: clrj %r0, %r0, 0, 0x84a
+0xec 0x00 0xff 0xff 0x00 0x77
+
+# 0x00000852:
+# CHECK: clrj %r0, %r0, 0, 0xffffffffffff0852
+0xec 0x00 0x80 0x00 0x00 0x77
+
+# 0x00000858:
+# CHECK: clrj %r0, %r0, 0, 0x10856
+0xec 0x00 0x7f 0xff 0x00 0x77
+
+# 0x0000085e:
+# CHECK: clrj %r0, %r0, 1, 0x85e
+0xec 0x00 0x00 0x00 0x10 0x77
+
+# 0x00000864:
+# CHECK: clrjh %r0, %r0, 0x864
+0xec 0x00 0x00 0x00 0x20 0x77
+
+# 0x0000086a:
+# CHECK: clrj %r0, %r0, 3, 0x86a
+0xec 0x00 0x00 0x00 0x30 0x77
+
+# 0x00000870:
+# CHECK: clrjl %r0, %r0, 0x870
+0xec 0x00 0x00 0x00 0x40 0x77
+
+# 0x00000876:
+# CHECK: clrj %r0, %r0, 5, 0x876
+0xec 0x00 0x00 0x00 0x50 0x77
+
+# 0x0000087c:
+# CHECK: clrjlh %r0, %r0, 0x87c
+0xec 0x00 0x00 0x00 0x60 0x77
+
+# 0x00000882:
+# CHECK: clrj %r0, %r0, 7, 0x882
+0xec 0x00 0x00 0x00 0x70 0x77
+
+# 0x00000888:
+# CHECK: clrje %r0, %r0, 0x888
+0xec 0x00 0x00 0x00 0x80 0x77
+
+# 0x0000088e:
+# CHECK: clrj %r0, %r0, 9, 0x88e
+0xec 0x00 0x00 0x00 0x90 0x77
+
+# 0x00000894:
+# CHECK: clrjhe %r0, %r0, 0x894
+0xec 0x00 0x00 0x00 0xa0 0x77
+
+# 0x0000089a:
+# CHECK: clrj %r0, %r0, 11, 0x89a
+0xec 0x00 0x00 0x00 0xb0 0x77
+
+# 0x000008a0:
+# CHECK: clrjle %r0, %r0, 0x8a0
+0xec 0x00 0x00 0x00 0xc0 0x77
+
+# 0x000008a6:
+# CHECK: clrj %r0, %r0, 13, 0x8a6
+0xec 0x00 0x00 0x00 0xd0 0x77
+
+# 0x000008ac:
+# CHECK: clrj %r0, %r0, 14, 0x8ac
+0xec 0x00 0x00 0x00 0xe0 0x77
+
+# 0x000008b2:
+# CHECK: clrj %r0, %r0, 15, 0x8b2
+0xec 0x00 0x00 0x00 0xf0 0x77
+
+# 0x000008b8:
+# CHECK: clgij %r0, 0, 0, 0x8b8
+0xec 0x00 0x00 0x00 0x00 0x7d
+
+# 0x000008be:
+# CHECK: clgij %r0, 127, 0, 0x8be
+0xec 0x00 0x00 0x00 0x7f 0x7d
+
+# 0x000008c4:
+# CHECK: clgij %r0, 128, 0, 0x8c4
+0xec 0x00 0x00 0x00 0x80 0x7d
+
+# 0x000008ca:
+# CHECK: clgij %r0, 255, 0, 0x8ca
+0xec 0x00 0x00 0x00 0xff 0x7d
+
+# 0x000008d0:
+# CHECK: clgij %r15, 0, 0, 0x8d0
+0xec 0xf0 0x00 0x00 0x00 0x7d
+
+# 0x000008d6:
+# CHECK: clgij %r7, 100, 0, 0x8d6
+0xec 0x70 0x00 0x00 0x64 0x7d
+
+# 0x000008dc:
+# CHECK: clgij %r0, 0, 0, 0x8da
+0xec 0x00 0xff 0xff 0x00 0x7d
+
+# 0x000008e2:
+# CHECK: clgij %r0, 0, 0, 0xffffffffffff08e2
+0xec 0x00 0x80 0x00 0x00 0x7d
+
+# 0x000008e8:
+# CHECK: clgij %r0, 0, 0, 0x108e6
+0xec 0x00 0x7f 0xff 0x00 0x7d
+
+# 0x000008ee:
+# CHECK: clgij %r0, 0, 1, 0x8ee
+0xec 0x01 0x00 0x00 0x00 0x7d
+
+# 0x000008f4:
+# CHECK: clgijh %r0, 0, 0x8f4
+0xec 0x02 0x00 0x00 0x00 0x7d
+
+# 0x000008fa:
+# CHECK: clgij %r0, 0, 3, 0x8fa
+0xec 0x03 0x00 0x00 0x00 0x7d
+
+# 0x00000900:
+# CHECK: clgijl %r0, 0, 0x900
+0xec 0x04 0x00 0x00 0x00 0x7d
+
+# 0x00000906:
+# CHECK: clgij %r0, 0, 5, 0x906
+0xec 0x05 0x00 0x00 0x00 0x7d
+
+# 0x0000090c:
+# CHECK: clgijlh %r0, 0, 0x90c
+0xec 0x06 0x00 0x00 0x00 0x7d
+
+# 0x00000912:
+# CHECK: clgij %r0, 0, 7, 0x912
+0xec 0x07 0x00 0x00 0x00 0x7d
+
+# 0x00000918:
+# CHECK: clgije %r0, 0, 0x918
+0xec 0x08 0x00 0x00 0x00 0x7d
+
+# 0x0000091e:
+# CHECK: clgij %r0, 0, 9, 0x91e
+0xec 0x09 0x00 0x00 0x00 0x7d
+
+# 0x00000924:
+# CHECK: clgijhe %r0, 0, 0x924
+0xec 0x0a 0x00 0x00 0x00 0x7d
+
+# 0x0000092a:
+# CHECK: clgij %r0, 0, 11, 0x92a
+0xec 0x0b 0x00 0x00 0x00 0x7d
+
+# 0x00000930:
+# CHECK: clgijle %r0, 0, 0x930
+0xec 0x0c 0x00 0x00 0x00 0x7d
+
+# 0x00000936:
+# CHECK: clgij %r0, 0, 13, 0x936
+0xec 0x0d 0x00 0x00 0x00 0x7d
+
+# 0x0000093c:
+# CHECK: clgij %r0, 0, 14, 0x93c
+0xec 0x0e 0x00 0x00 0x00 0x7d
+
+# 0x00000942:
+# CHECK: clgij %r0, 0, 15, 0x942
+0xec 0x0f 0x00 0x00 0x00 0x7d
+
+# 0x00000948:
+# CHECK: clij %r0, 0, 0, 0x948
+0xec 0x00 0x00 0x00 0x00 0x7f
+
+# 0x0000094e:
+# CHECK: clij %r0, 127, 0, 0x94e
+0xec 0x00 0x00 0x00 0x7f 0x7f
+
+# 0x00000954:
+# CHECK: clij %r0, 128, 0, 0x954
+0xec 0x00 0x00 0x00 0x80 0x7f
+
+# 0x0000095a:
+# CHECK: clij %r0, 255, 0, 0x95a
+0xec 0x00 0x00 0x00 0xff 0x7f
+
+# 0x00000960:
+# CHECK: clij %r15, 0, 0, 0x960
+0xec 0xf0 0x00 0x00 0x00 0x7f
+
+# 0x00000966:
+# CHECK: clij %r7, 100, 0, 0x966
+0xec 0x70 0x00 0x00 0x64 0x7f
+
+# 0x0000096c:
+# CHECK: clij %r0, 0, 0, 0x96a
+0xec 0x00 0xff 0xff 0x00 0x7f
+
+# 0x00000972:
+# CHECK: clij %r0, 0, 0, 0xffffffffffff0972
+0xec 0x00 0x80 0x00 0x00 0x7f
+
+# 0x00000978:
+# CHECK: clij %r0, 0, 0, 0x10976
+0xec 0x00 0x7f 0xff 0x00 0x7f
+
+# 0x0000097e:
+# CHECK: clij %r0, 0, 1, 0x97e
+0xec 0x01 0x00 0x00 0x00 0x7f
+
+# 0x00000984:
+# CHECK: clijh %r0, 0, 0x984
+0xec 0x02 0x00 0x00 0x00 0x7f
+
+# 0x0000098a:
+# CHECK: clij %r0, 0, 3, 0x98a
+0xec 0x03 0x00 0x00 0x00 0x7f
+
+# 0x00000990:
+# CHECK: clijl %r0, 0, 0x990
+0xec 0x04 0x00 0x00 0x00 0x7f
+
+# 0x00000996:
+# CHECK: clij %r0, 0, 5, 0x996
+0xec 0x05 0x00 0x00 0x00 0x7f
+
+# 0x0000099c:
+# CHECK: clijlh %r0, 0, 0x99c
+0xec 0x06 0x00 0x00 0x00 0x7f
+
+# 0x000009a2:
+# CHECK: clij %r0, 0, 7, 0x9a2
+0xec 0x07 0x00 0x00 0x00 0x7f
+
+# 0x000009a8:
+# CHECK: clije %r0, 0, 0x9a8
+0xec 0x08 0x00 0x00 0x00 0x7f
+
+# 0x000009ae:
+# CHECK: clij %r0, 0, 9, 0x9ae
+0xec 0x09 0x00 0x00 0x00 0x7f
+
+# 0x000009b4:
+# CHECK: clijhe %r0, 0, 0x9b4
+0xec 0x0a 0x00 0x00 0x00 0x7f
+
+# 0x000009ba:
+# CHECK: clij %r0, 0, 11, 0x9ba
+0xec 0x0b 0x00 0x00 0x00 0x7f
+
+# 0x000009c0:
+# CHECK: clijle %r0, 0, 0x9c0
+0xec 0x0c 0x00 0x00 0x00 0x7f
+
+# 0x000009c6:
+# CHECK: clij %r0, 0, 13, 0x9c6
+0xec 0x0d 0x00 0x00 0x00 0x7f
+
+# 0x000009cc:
+# CHECK: clij %r0, 0, 14, 0x9cc
+0xec 0x0e 0x00 0x00 0x00 0x7f
+
+# 0x000009d2:
+# CHECK: clij %r0, 0, 15, 0x9d2
+0xec 0x0f 0x00 0x00 0x00 0x7f
diff --git a/test/MC/Disassembler/SystemZ/insns.txt b/test/MC/Disassembler/SystemZ/insns.txt
index 51860cc..78d348d 100644
--- a/test/MC/Disassembler/SystemZ/insns.txt
+++ b/test/MC/Disassembler/SystemZ/insns.txt
@@ -349,6 +349,24 @@
 # CHECK: ahy %r15, 0
 0xe3 0xf0 0x00 0x00 0x00 0x7a
 
+# CHECK: aih %r0, -2147483648
+0xcc 0x08 0x80 0x00 0x00 0x00
+
+# CHECK: aih %r0, -1
+0xcc 0x08 0xff 0xff 0xff 0xff
+
+# CHECK: aih %r0, 0
+0xcc 0x08 0x00 0x00 0x00 0x00
+
+# CHECK: aih %r0, 1
+0xcc 0x08 0x00 0x00 0x00 0x01
+
+# CHECK: aih %r0, 2147483647
+0xcc 0x08 0x7f 0xff 0xff 0xff
+
+# CHECK: aih %r15, 0
+0xcc 0xf8 0x00 0x00 0x00 0x00
+
 # CHECK: alcgr %r0, %r0
 0xb9 0x88 0x00 0x00
 
@@ -772,6 +790,51 @@
 # CHECK: basr %r15, %r1
 0x0d 0xf1
 
+# CHECK: bcr 0, %r14
+0x07 0x0e
+
+# CHECK: bor %r13
+0x07 0x1d
+
+# CHECK: bhr %r12
+0x07 0x2c
+
+# CHECK: bnler %r11
+0x07 0x3b
+
+# CHECK: blr %r10
+0x07 0x4a
+
+# CHECK: bnher %r9
+0x07 0x59
+
+# CHECK: blhr %r8
+0x07 0x68
+
+# CHECK: bner %r7
+0x07 0x77
+
+# CHECK: ber %r6
+0x07 0x86
+
+# CHECK: bnlhr %r5
+0x07 0x95
+
+# CHECK: bher %r4
+0x07 0xa4
+
+# CHECK: bnlr %r3
+0x07 0xb3
+
+# CHECK: bler %r2
+0x07 0xc2
+
+# CHECK: bnhr %r1
+0x07 0xd1
+
+# CHECK: bnor %r0
+0x07 0xe0
+
 # CHECK: br %r1
 0x07 0xf1
 
@@ -1198,6 +1261,36 @@
 # CHECK: cgxbr %r15, 0, %f0
 0xb3 0xaa 0x00 0xf0
 
+# CHECK: chf %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0xcd
+
+# CHECK: chf %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0xcd
+
+# CHECK: chf %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0xcd
+
+# CHECK: chf %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0xcd
+
+# CHECK: chf %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0xcd
+
+# CHECK: chf %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0xcd
+
+# CHECK: chf %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0xcd
+
+# CHECK: chf %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0xcd
+
+# CHECK: chf %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0xcd
+
+# CHECK: chf %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0xcd
+
 # CHECK: chhsi 0, 0
 0xe5 0x54 0x00 0x00 0x00 0x00
 
@@ -1333,6 +1426,60 @@
 # CHECK: chy %r15, 0
 0xe3 0xf0 0x00 0x00 0x00 0x79
 
+# CHECK: cih %r0, -2147483648
+0xcc 0x0d 0x80 0x00 0x00 0x00
+
+# CHECK: cih %r0, -1
+0xcc 0x0d 0xff 0xff 0xff 0xff
+
+# CHECK: cih %r0, 0
+0xcc 0x0d 0x00 0x00 0x00 0x00
+
+# CHECK: cih %r0, 1
+0xcc 0x0d 0x00 0x00 0x00 0x01
+
+# CHECK: cih %r0, 2147483647
+0xcc 0x0d 0x7f 0xff 0xff 0xff
+
+# CHECK: cih %r15, 0
+0xcc 0xfd 0x00 0x00 0x00 0x00
+
+# CHECK: clc 0(1), 0
+0xd5 0x00 0x00 0x00 0x00 0x00
+
+# CHECK: clc 0(1), 0(%r1)
+0xd5 0x00 0x00 0x00 0x10 0x00
+
+# CHECK: clc 0(1), 0(%r15)
+0xd5 0x00 0x00 0x00 0xf0 0x00
+
+# CHECK: clc 0(1), 4095
+0xd5 0x00 0x00 0x00 0x0f 0xff
+
+# CHECK: clc 0(1), 4095(%r1)
+0xd5 0x00 0x00 0x00 0x1f 0xff
+
+# CHECK: clc 0(1), 4095(%r15)
+0xd5 0x00 0x00 0x00 0xff 0xff
+
+# CHECK: clc 0(1,%r1), 0
+0xd5 0x00 0x10 0x00 0x00 0x00
+
+# CHECK: clc 0(1,%r15), 0
+0xd5 0x00 0xf0 0x00 0x00 0x00
+
+# CHECK: clc 4095(1,%r1), 0
+0xd5 0x00 0x1f 0xff 0x00 0x00
+
+# CHECK: clc 4095(1,%r15), 0
+0xd5 0x00 0xff 0xff 0x00 0x00
+
+# CHECK: clc 0(256,%r1), 0
+0xd5 0xff 0x10 0x00 0x00 0x00
+
+# CHECK: clc 0(256,%r15), 0
+0xd5 0xff 0xf0 0x00 0x00 0x00
+
 # CHECK: clfhsi 0, 0
 0xe5 0x5d 0x00 0x00 0x00 0x00
 
@@ -1477,6 +1624,36 @@
 # CHECK: clg %r15, 0
 0xe3 0xf0 0x00 0x00 0x00 0x21
 
+# CHECK: clhf %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0xcf
+
+# CHECK: clhf %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0xcf
+
+# CHECK: clhf %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0xcf
+
+# CHECK: clhf %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0xcf
+
+# CHECK: clhf %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0xcf
+
+# CHECK: clhf %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0xcf
+
+# CHECK: clhf %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0xcf
+
+# CHECK: clhf %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0xcf
+
+# CHECK: clhf %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0xcf
+
+# CHECK: clhf %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0xcf
+
 # CHECK: clhhsi 0, 0
 0xe5 0x55 0x00 0x00 0x00 0x00
 
@@ -1519,6 +1696,18 @@
 # CHECK: cli 4095(%r15), 42
 0x95 0x2a 0xff 0xff
 
+# CHECK: clih %r0, 0
+0xcc 0x0f 0x00 0x00 0x00 0x00
+
+# CHECK: clih %r0, 1
+0xcc 0x0f 0x00 0x00 0x00 0x01
+
+# CHECK: clih %r0, 4294967295
+0xcc 0x0f 0xff 0xff 0xff 0xff
+
+# CHECK: clih %r15, 0
+0xcc 0xff 0x00 0x00 0x00 0x00
+
 # CHECK: cliy -524288, 0
 0xeb 0x00 0x00 0x00 0x80 0x55
 
@@ -1561,6 +1750,18 @@
 # CHECK: clr %r7, %r8
 0x15 0x78
 
+# CHECK: clst %r0, %r0
+0xb2 0x5d 0x00 0x00
+
+# CHECK: clst %r0, %r15
+0xb2 0x5d 0x00 0x0f
+
+# CHECK: clst %r15, %r0
+0xb2 0x5d 0x00 0xf0
+
+# CHECK: clst %r7, %r8
+0xb2 0x5d 0x00 0x78
+
 # CHECK: cl %r0, 0
 0x55 0x00 0x00 0x00
 
@@ -2101,6 +2302,24 @@
 # CHECK: fidbr %f15, 0, %f0
 0xb3 0x5f 0x00 0xf0
 
+# CHECK: fidbra	%f0, 0, %f0, 1
+0xb3 0x5f 0x01 0x00
+
+# CHECK: fidbra	%f0, 0, %f0, 15
+0xb3 0x5f 0x0f 0x00
+
+# CHECK: fidbra	%f0, 0, %f15, 1
+0xb3 0x5f 0x01 0x0f
+
+# CHECK: fidbra	%f0, 15, %f0, 1
+0xb3 0x5f 0xf1 0x00
+
+# CHECK: fidbra	%f4, 5, %f6, 7
+0xb3 0x5f 0x57 0x46
+
+# CHECK: fidbra	%f15, 0, %f0, 1
+0xb3 0x5f 0x01 0xf0
+
 # CHECK: fiebr %f0, 0, %f0
 0xb3 0x57 0x00 0x00
 
@@ -2116,6 +2335,24 @@
 # CHECK: fiebr %f15, 0, %f0
 0xb3 0x57 0x00 0xf0
 
+# CHECK: fiebra	%f0, 0, %f0, 1
+0xb3 0x57 0x01 0x00
+
+# CHECK: fiebra	%f0, 0, %f0, 15
+0xb3 0x57 0x0f 0x00
+
+# CHECK: fiebra	%f0, 0, %f15, 1
+0xb3 0x57 0x01 0x0f
+
+# CHECK: fiebra	%f0, 15, %f0, 1
+0xb3 0x57 0xf1 0x00
+
+# CHECK: fiebra	%f4, 5, %f6, 7
+0xb3 0x57 0x57 0x46
+
+# CHECK: fiebra	%f15, 0, %f0, 1
+0xb3 0x57 0x01 0xf0
+
 # CHECK: fixbr %f0, 0, %f0
 0xb3 0x47 0x00 0x00
 
@@ -2131,6 +2368,24 @@
 # CHECK: fixbr %f13, 0, %f0
 0xb3 0x47 0x00 0xd0
 
+# CHECK: fixbra	%f0, 0, %f0, 1
+0xb3 0x47 0x01 0x00
+
+# CHECK: fixbra	%f0, 0, %f0, 15
+0xb3 0x47 0x0f 0x00
+
+# CHECK: fixbra	%f0, 0, %f13, 1
+0xb3 0x47 0x01 0x0d
+
+# CHECK: fixbra	%f0, 15, %f0, 1
+0xb3 0x47 0xf1 0x00
+
+# CHECK: fixbra	%f4, 5, %f8, 9
+0xb3 0x47 0x59 0x48
+
+# CHECK: fixbra	%f13, 0, %f0, 1
+0xb3 0x47 0x01 0xd0
+
 # CHECK: flogr %r0, %r0
 0xb9 0x83 0x00 0x00
 
@@ -2260,6 +2515,15 @@
 # CHECK: iill %r15, 0
 0xa5 0xf3 0x00 0x00
 
+# CHECK: ipm %r0
+0xb2 0x22 0x00 0x00
+
+# CHECK: ipm %r1
+0xb2 0x22 0x00 0x10
+
+# CHECK: ipm %r15
+0xb2 0x22 0x00 0xf0
+
 # CHECK: la %r0, 0
 0x41 0x00 0x00 0x00
 
@@ -2350,6 +2614,36 @@
 # CHECK: lb %r15, 0
 0xe3 0xf0 0x00 0x00 0x00 0x76
 
+# CHECK: lbh %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0xc0
+
+# CHECK: lbh %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0xc0
+
+# CHECK: lbh %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0xc0
+
+# CHECK: lbh %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0xc0
+
+# CHECK: lbh %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0xc0
+
+# CHECK: lbh %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0xc0
+
+# CHECK: lbh %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0xc0
+
+# CHECK: lbh %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0xc0
+
+# CHECK: lbh %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0xc0
+
+# CHECK: lbh %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0xc0
+
 # CHECK: lcdbr %f0, %f9
 0xb3 0x13 0x00 0x09
 
@@ -2638,6 +2932,36 @@
 # CHECK: ley %f15, 0
 0xed 0xf0 0x00 0x00 0x00 0x64
 
+# CHECK: lfh %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0xca
+
+# CHECK: lfh %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0xca
+
+# CHECK: lfh %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0xca
+
+# CHECK: lfh %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0xca
+
+# CHECK: lfh %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0xca
+
+# CHECK: lfh %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0xca
+
+# CHECK: lfh %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0xca
+
+# CHECK: lfh %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0xca
+
+# CHECK: lfh %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0xca
+
+# CHECK: lfh %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0xca
+
 # CHECK: lgbr %r0, %r15
 0xb9 0x06 0x00 0x0f
 
@@ -2866,6 +3190,36 @@
 # CHECK: lhi %r15, 0
 0xa7 0xf8 0x00 0x00
 
+# CHECK: lhh %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0xc4
+
+# CHECK: lhh %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0xc4
+
+# CHECK: lhh %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0xc4
+
+# CHECK: lhh %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0xc4
+
+# CHECK: lhh %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0xc4
+
+# CHECK: lhh %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0xc4
+
+# CHECK: lhh %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0xc4
+
+# CHECK: lhh %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0xc4
+
+# CHECK: lhh %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0xc4
+
+# CHECK: lhh %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0xc4
+
 # CHECK: lhr %r0, %r15
 0xb9 0x27 0x00 0x0f
 
@@ -2965,6 +3319,36 @@
 # CHECK: llc %r15, 0
 0xe3 0xf0 0x00 0x00 0x00 0x94
 
+# CHECK: llch %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0xc2
+
+# CHECK: llch %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0xc2
+
+# CHECK: llch %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0xc2
+
+# CHECK: llch %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0xc2
+
+# CHECK: llch %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0xc2
+
+# CHECK: llch %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0xc2
+
+# CHECK: llch %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0xc2
+
+# CHECK: llch %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0xc2
+
+# CHECK: llch %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0xc2
+
+# CHECK: llch %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0xc2
+
 # CHECK: llgcr %r0, %r15
 0xb9 0x84 0x00 0x0f
 
@@ -3121,6 +3505,36 @@
 # CHECK: llh %r15, 0
 0xe3 0xf0 0x00 0x00 0x00 0x95
 
+# CHECK: llhh %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0xc6
+
+# CHECK: llhh %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0xc6
+
+# CHECK: llhh %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0xc6
+
+# CHECK: llhh %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0xc6
+
+# CHECK: llhh %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0xc6
+
+# CHECK: llhh %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0xc6
+
+# CHECK: llhh %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0xc6
+
+# CHECK: llhh %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0xc6
+
+# CHECK: llhh %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0xc6
+
+# CHECK: llhh %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0xc6
+
 # CHECK: llihf %r0, 0
 0xc0 0x0e 0x00 0x00 0x00 0x00
 
@@ -3250,6 +3664,42 @@
 # CHECK: lnebr %f15, %f9
 0xb3 0x01 0x00 0xf9
 
+# CHECK: lngfr %r0, %r0
+0xb9 0x11 0x00 0x00
+
+# CHECK: lngfr %r0, %r15
+0xb9 0x11 0x00 0x0f
+
+# CHECK: lngfr %r15, %r0
+0xb9 0x11 0x00 0xf0
+
+# CHECK: lngfr %r7, %r8
+0xb9 0x11 0x00 0x78
+
+# CHECK: lngr %r0, %r0
+0xb9 0x01 0x00 0x00
+
+# CHECK: lngr %r0, %r15
+0xb9 0x01 0x00 0x0f
+
+# CHECK: lngr %r15, %r0
+0xb9 0x01 0x00 0xf0
+
+# CHECK: lngr %r7, %r8
+0xb9 0x01 0x00 0x78
+
+# CHECK: lnr %r0, %r0
+0x11 0x00
+
+# CHECK: lnr %r0, %r15
+0x11 0x0f
+
+# CHECK: lnr %r15, %r0
+0x11 0xf0
+
+# CHECK: lnr %r7, %r8
+0x11 0x78
+
 # CHECK: lnxbr %f0, %f8
 0xb3 0x41 0x00 0x08
 
@@ -3478,6 +3928,42 @@
 # CHECK: lpebr %f15, %f9
 0xb3 0x00 0x00 0xf9
 
+# CHECK: lpgfr %r0, %r0
+0xb9 0x10 0x00 0x00
+
+# CHECK: lpgfr %r0, %r15
+0xb9 0x10 0x00 0x0f
+
+# CHECK: lpgfr %r15, %r0
+0xb9 0x10 0x00 0xf0
+
+# CHECK: lpgfr %r7, %r8
+0xb9 0x10 0x00 0x78
+
+# CHECK: lpgr %r0, %r0
+0xb9 0x00 0x00 0x00
+
+# CHECK: lpgr %r0, %r15
+0xb9 0x00 0x00 0x0f
+
+# CHECK: lpgr %r15, %r0
+0xb9 0x00 0x00 0xf0
+
+# CHECK: lpgr %r7, %r8
+0xb9 0x00 0x00 0x78
+
+# CHECK: lpr %r0, %r0
+0x10 0x00
+
+# CHECK: lpr %r0, %r15
+0x10 0x0f
+
+# CHECK: lpr %r15, %r0
+0x10 0xf0
+
+# CHECK: lpr %r7, %r8
+0x10 0x78
+
 # CHECK: lpxbr %f0, %f8
 0xb3 0x40 0x00 0x08
 
@@ -4435,6 +4921,42 @@
 # CHECK: msy %r15, 0
 0xe3 0xf0 0x00 0x00 0x00 0x51
 
+# CHECK: mvc 0(1), 0
+0xd2 0x00 0x00 0x00 0x00 0x00
+
+# CHECK: mvc 0(1), 0(%r1)
+0xd2 0x00 0x00 0x00 0x10 0x00
+
+# CHECK: mvc 0(1), 0(%r15)
+0xd2 0x00 0x00 0x00 0xf0 0x00
+
+# CHECK: mvc 0(1), 4095
+0xd2 0x00 0x00 0x00 0x0f 0xff
+
+# CHECK: mvc 0(1), 4095(%r1)
+0xd2 0x00 0x00 0x00 0x1f 0xff
+
+# CHECK: mvc 0(1), 4095(%r15)
+0xd2 0x00 0x00 0x00 0xff 0xff
+
+# CHECK: mvc 0(1,%r1), 0
+0xd2 0x00 0x10 0x00 0x00 0x00
+
+# CHECK: mvc 0(1,%r15), 0
+0xd2 0x00 0xf0 0x00 0x00 0x00
+
+# CHECK: mvc 4095(1,%r1), 0
+0xd2 0x00 0x1f 0xff 0x00 0x00
+
+# CHECK: mvc 4095(1,%r15), 0
+0xd2 0x00 0xff 0xff 0x00 0x00
+
+# CHECK: mvc 0(256,%r1), 0
+0xd2 0xff 0x10 0x00 0x00 0x00
+
+# CHECK: mvc 0(256,%r15), 0
+0xd2 0xff 0xf0 0x00 0x00 0x00
+
 # CHECK: mvghi 0, 0
 0xe5 0x48 0x00 0x00 0x00 0x00
 
@@ -4585,6 +5107,18 @@
 # CHECK: mviy 524287(%r15), 42
 0xeb 0x2a 0xff 0xff 0x7f 0x52
 
+# CHECK: mvst %r0, %r0
+0xb2 0x55 0x00 0x00
+
+# CHECK: mvst %r0, %r15
+0xb2 0x55 0x00 0x0f
+
+# CHECK: mvst %r15, %r0
+0xb2 0x55 0x00 0xf0
+
+# CHECK: mvst %r7, %r8
+0xb2 0x55 0x00 0x78
+
 # CHECK: mxbr %f0, %f0
 0xb3 0x4c 0x00 0x00
 
@@ -4630,6 +5164,42 @@
 # CHECK: mxdb %f13, 0
 0xed 0xd0 0x00 0x00 0x00 0x07
 
+# CHECK: nc 0(1), 0
+0xd4 0x00 0x00 0x00 0x00 0x00
+
+# CHECK: nc 0(1), 0(%r1)
+0xd4 0x00 0x00 0x00 0x10 0x00
+
+# CHECK: nc 0(1), 0(%r15)
+0xd4 0x00 0x00 0x00 0xf0 0x00
+
+# CHECK: nc 0(1), 4095
+0xd4 0x00 0x00 0x00 0x0f 0xff
+
+# CHECK: nc 0(1), 4095(%r1)
+0xd4 0x00 0x00 0x00 0x1f 0xff
+
+# CHECK: nc 0(1), 4095(%r15)
+0xd4 0x00 0x00 0x00 0xff 0xff
+
+# CHECK: nc 0(1,%r1), 0
+0xd4 0x00 0x10 0x00 0x00 0x00
+
+# CHECK: nc 0(1,%r15), 0
+0xd4 0x00 0xf0 0x00 0x00 0x00
+
+# CHECK: nc 4095(1,%r1), 0
+0xd4 0x00 0x1f 0xff 0x00 0x00
+
+# CHECK: nc 4095(1,%r15), 0
+0xd4 0x00 0xff 0xff 0x00 0x00
+
+# CHECK: nc 0(256,%r1), 0
+0xd4 0xff 0x10 0x00 0x00 0x00
+
+# CHECK: nc 0(256,%r15), 0
+0xd4 0xff 0xf0 0x00 0x00 0x00
+
 # CHECK: ngr %r0, %r0
 0xb9 0x80 0x00 0x00
 
@@ -4864,6 +5434,42 @@
 # CHECK: ny %r15, 0
 0xe3 0xf0 0x00 0x00 0x00 0x54
 
+# CHECK: oc 0(1), 0
+0xd6 0x00 0x00 0x00 0x00 0x00
+
+# CHECK: oc 0(1), 0(%r1)
+0xd6 0x00 0x00 0x00 0x10 0x00
+
+# CHECK: oc 0(1), 0(%r15)
+0xd6 0x00 0x00 0x00 0xf0 0x00
+
+# CHECK: oc 0(1), 4095
+0xd6 0x00 0x00 0x00 0x0f 0xff
+
+# CHECK: oc 0(1), 4095(%r1)
+0xd6 0x00 0x00 0x00 0x1f 0xff
+
+# CHECK: oc 0(1), 4095(%r15)
+0xd6 0x00 0x00 0x00 0xff 0xff
+
+# CHECK: oc 0(1,%r1), 0
+0xd6 0x00 0x10 0x00 0x00 0x00
+
+# CHECK: oc 0(1,%r15), 0
+0xd6 0x00 0xf0 0x00 0x00 0x00
+
+# CHECK: oc 4095(1,%r1), 0
+0xd6 0x00 0x1f 0xff 0x00 0x00
+
+# CHECK: oc 4095(1,%r15), 0
+0xd6 0x00 0xff 0xff 0x00 0x00
+
+# CHECK: oc 0(256,%r1), 0
+0xd6 0xff 0x10 0x00 0x00 0x00
+
+# CHECK: oc 0(256,%r15), 0
+0xd6 0xff 0xf0 0x00 0x00 0x00
+
 # CHECK: ogr %r0, %r0
 0xb9 0x81 0x00 0x00
 
@@ -5098,6 +5704,36 @@
 # CHECK: oy %r15, 0
 0xe3 0xf0 0x00 0x00 0x00 0x56
 
+# CHECK: pfd 0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x36
+
+# CHECK: pfd 0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x36
+
+# CHECK: pfd 0, 0
+0xe3 0x00 0x00 0x00 0x00 0x36
+
+# CHECK: pfd 0, 1
+0xe3 0x00 0x00 0x01 0x00 0x36
+
+# CHECK: pfd 0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x36
+
+# CHECK: pfd 0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x36
+
+# CHECK: pfd 0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x36
+
+# CHECK: pfd 0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x36
+
+# CHECK: pfd 0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x36
+
+# CHECK: pfd 15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x36
+
 # CHECK: risbg %r0, %r0, 0, 0, 0
 0xec 0x00 0x00 0x00 0x00 0x55
 
@@ -6148,6 +6784,18 @@
 # CHECK: srk %r2, %r3, %r4
 0xb9 0xf9 0x40 0x23
 
+# CHECK: srst %r0, %r0
+0xb2 0x5e 0x00 0x00
+
+# CHECK: srst %r0, %r15
+0xb2 0x5e 0x00 0x0f
+
+# CHECK: srst %r15, %r0
+0xb2 0x5e 0x00 0xf0
+
+# CHECK: srst %r7, %r8
+0xb2 0x5e 0x00 0x78
+
 # CHECK: stc %r0, 0
 0x42 0x00 0x00 0x00
 
@@ -6169,6 +6817,36 @@
 # CHECK: stc %r15, 0
 0x42 0xf0 0x00 0x00
 
+# CHECK: stch %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0xc3
+
+# CHECK: stch %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0xc3
+
+# CHECK: stch %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0xc3
+
+# CHECK: stch %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0xc3
+
+# CHECK: stch %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0xc3
+
+# CHECK: stch %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0xc3
+
+# CHECK: stch %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0xc3
+
+# CHECK: stch %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0xc3
+
+# CHECK: stch %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0xc3
+
+# CHECK: stch %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0xc3
+
 # CHECK: stcy %r0, -524288
 0xe3 0x00 0x00 0x00 0x80 0x72
 
@@ -6352,6 +7030,66 @@
 # CHECK: sth %r15, 0
 0x40 0xf0 0x00 0x00
 
+# CHECK: sthh %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0xc7
+
+# CHECK: sthh %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0xc7
+
+# CHECK: sthh %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0xc7
+
+# CHECK: sthh %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0xc7
+
+# CHECK: sthh %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0xc7
+
+# CHECK: sthh %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0xc7
+
+# CHECK: sthh %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0xc7
+
+# CHECK: sthh %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0xc7
+
+# CHECK: sthh %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0xc7
+
+# CHECK: sthh %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0xc7
+
+# CHECK: stfh %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0xcb
+
+# CHECK: stfh %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0xcb
+
+# CHECK: stfh %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0xcb
+
+# CHECK: stfh %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0xcb
+
+# CHECK: stfh %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0xcb
+
+# CHECK: stfh %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0xcb
+
+# CHECK: stfh %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0xcb
+
+# CHECK: stfh %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0xcb
+
+# CHECK: stfh %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0xcb
+
+# CHECK: stfh %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0xcb
+
 # CHECK: sthy %r0, -524288
 0xe3 0x00 0x00 0x00 0x80 0x70
 
@@ -6691,6 +7429,141 @@
 # CHECK: sy %r15, 0
 0xe3 0xf0 0x00 0x00 0x00 0x5b
 
+# CHECK: tm 0, 0
+0x91 0x00 0x00 0x00
+
+# CHECK: tm 4095, 0
+0x91 0x00 0x0f 0xff
+
+# CHECK: tm 0, 255
+0x91 0xff 0x00 0x00
+
+# CHECK: tm 0(%r1), 42
+0x91 0x2a 0x10 0x00
+
+# CHECK: tm 0(%r15), 42
+0x91 0x2a 0xf0 0x00
+
+# CHECK: tm 4095(%r1), 42
+0x91 0x2a 0x1f 0xff
+
+# CHECK: tm 4095(%r15), 42
+0x91 0x2a 0xff 0xff
+
+# CHECK: tmhh %r0, 0
+0xa7 0x02 0x00 0x00
+
+# CHECK: tmhh %r0, 32768
+0xa7 0x02 0x80 0x00
+
+# CHECK: tmhh %r0, 65535
+0xa7 0x02 0xff 0xff
+
+# CHECK: tmhh %r15, 0
+0xa7 0xf2 0x00 0x00
+
+# CHECK: tmhl %r0, 0
+0xa7 0x03 0x00 0x00
+
+# CHECK: tmhl %r0, 32768
+0xa7 0x03 0x80 0x00
+
+# CHECK: tmhl %r0, 65535
+0xa7 0x03 0xff 0xff
+
+# CHECK: tmhl %r15, 0
+0xa7 0xf3 0x00 0x00
+
+# CHECK: tmlh %r0, 0
+0xa7 0x00 0x00 0x00
+
+# CHECK: tmlh %r0, 32768
+0xa7 0x00 0x80 0x00
+
+# CHECK: tmlh %r0, 65535
+0xa7 0x00 0xff 0xff
+
+# CHECK: tmlh %r15, 0
+0xa7 0xf0 0x00 0x00
+
+# CHECK: tmll %r0, 0
+0xa7 0x01 0x00 0x00
+
+# CHECK: tmll %r0, 32768
+0xa7 0x01 0x80 0x00
+
+# CHECK: tmll %r0, 65535
+0xa7 0x01 0xff 0xff
+
+# CHECK: tmll %r15, 0
+0xa7 0xf1 0x00 0x00
+
+# CHECK: tmy -524288, 0
+0xeb 0x00 0x00 0x00 0x80 0x51
+
+# CHECK: tmy -1, 0
+0xeb 0x00 0x0f 0xff 0xff 0x51
+
+# CHECK: tmy 0, 0
+0xeb 0x00 0x00 0x00 0x00 0x51
+
+# CHECK: tmy 1, 0
+0xeb 0x00 0x00 0x01 0x00 0x51
+
+# CHECK: tmy 524287, 0
+0xeb 0x00 0x0f 0xff 0x7f 0x51
+
+# CHECK: tmy 0, 255
+0xeb 0xff 0x00 0x00 0x00 0x51
+
+# CHECK: tmy 0(%r1), 42
+0xeb 0x2a 0x10 0x00 0x00 0x51
+
+# CHECK: tmy 0(%r15), 42
+0xeb 0x2a 0xf0 0x00 0x00 0x51
+
+# CHECK: tmy 524287(%r1), 42
+0xeb 0x2a 0x1f 0xff 0x7f 0x51
+
+# CHECK: tmy 524287(%r15), 42
+0xeb 0x2a 0xff 0xff 0x7f 0x51
+
+# CHECK: xc 0(1), 0
+0xd7 0x00 0x00 0x00 0x00 0x00
+
+# CHECK: xc 0(1), 0(%r1)
+0xd7 0x00 0x00 0x00 0x10 0x00
+
+# CHECK: xc 0(1), 0(%r15)
+0xd7 0x00 0x00 0x00 0xf0 0x00
+
+# CHECK: xc 0(1), 4095
+0xd7 0x00 0x00 0x00 0x0f 0xff
+
+# CHECK: xc 0(1), 4095(%r1)
+0xd7 0x00 0x00 0x00 0x1f 0xff
+
+# CHECK: xc 0(1), 4095(%r15)
+0xd7 0x00 0x00 0x00 0xff 0xff
+
+# CHECK: xc 0(1,%r1), 0
+0xd7 0x00 0x10 0x00 0x00 0x00
+
+# CHECK: xc 0(1,%r15), 0
+0xd7 0x00 0xf0 0x00 0x00 0x00
+
+# CHECK: xc 4095(1,%r1), 0
+0xd7 0x00 0x1f 0xff 0x00 0x00
+
+# CHECK: xc 4095(1,%r15), 0
+0xd7 0x00 0xff 0xff 0x00 0x00
+
+# CHECK: xc 0(256,%r1), 0
+0xd7 0xff 0x10 0x00 0x00 0x00
+
+# CHECK: xc 0(256,%r15), 0
+0xd7 0xff 0xf0 0x00 0x00 0x00
+
 # CHECK: xgr %r0, %r0
 0xb9 0x82 0x00 0x00
 
diff --git a/test/MC/Disassembler/SystemZ/lit.local.cfg b/test/MC/Disassembler/SystemZ/lit.local.cfg
index 1da00ea..b12af09 100644
--- a/test/MC/Disassembler/SystemZ/lit.local.cfg
+++ b/test/MC/Disassembler/SystemZ/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.txt']
-
 targets = set(config.root.targets_to_build.split())
 if not 'SystemZ' in targets:
     config.unsupported = True
diff --git a/test/MC/Disassembler/X86/intel-syntax-32.txt b/test/MC/Disassembler/X86/intel-syntax-32.txt
index 08bae6e..2298823 100644
--- a/test/MC/Disassembler/X86/intel-syntax-32.txt
+++ b/test/MC/Disassembler/X86/intel-syntax-32.txt
@@ -1,13 +1,31 @@
 # RUN: llvm-mc --disassemble %s -triple=i386 --output-asm-variant=1 | FileCheck %s
 
-# CHECK: sgdt
+# CHECK: sgdt opaque ptr [eax]
 0x0f 0x01 0x00
 
-# CHECK: sidt
+# CHECK: sidt opaque ptr [eax]
 0x0f 0x01 0x08
 
-# CHECK: lgdt
+# CHECK: lgdt opaque ptr [eax]
 0x0f 0x01 0x10
 
-# CHECK: lidt
+# CHECK: lidt opaque ptr [eax]
 0x0f 0x01 0x18
+
+# CHECK: mov al, byte ptr [878082192]
+0xa0 0x90 0x78 0x56 0x34
+
+# CHECK: mov ax, word ptr [878082192]
+0x66 0xa1 0x90 0x78 0x56 0x34
+
+# CHECK: mov eax, dword ptr [878082192]
+0xa1 0x90 0x78 0x56 0x34
+
+# CHECK: mov byte ptr [878082192], al
+0xa2 0x90 0x78 0x56 0x34
+
+# CHECK: mov word ptr [878082192], ax
+0x66 0xa3 0x90 0x78 0x56 0x34
+
+# CHECK: mov dword ptr [878082192], eax
+0xa3 0x90 0x78 0x56 0x34
diff --git a/test/MC/Disassembler/X86/intel-syntax.txt b/test/MC/Disassembler/X86/intel-syntax.txt
index 6c0c239..3689525 100644
--- a/test/MC/Disassembler/X86/intel-syntax.txt
+++ b/test/MC/Disassembler/X86/intel-syntax.txt
@@ -105,6 +105,9 @@
 # CHECK: retf
 0x66 0xcb
 
+# CHECK: vshufpd xmm0, xmm1, xmm2, 1
+0xc5 0xf1 0xc6 0xc2 0x01
+
 # CHECK: vpgatherqq ymm2, qword ptr [rdi + 2*ymm1], ymm0
 0xc4 0xe2 0xfd 0x91 0x14 0x4f
 
@@ -119,3 +122,33 @@
 
 # CHECK: xsaveopt64 opaque ptr [rax]
 0x48 0x0f 0xae 0x30
+
+# CHECK: movabs al, byte ptr [-6066930261531658096]
+0xa0 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabs al, byte ptr [-6066930261531658096]
+0x48 0xa0 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabs ax, word ptr [-6066930261531658096]
+0x66 0xa1 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabs eax, dword ptr [-6066930261531658096]
+0xa1 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabs rax, qword ptr [-6066930261531658096]
+0x48 0xa1 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabs byte ptr [-6066930261531658096], al
+0xa2 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabs byte ptr [-6066930261531658096], al
+0x48 0xa2 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabs word ptr [-6066930261531658096], ax
+0x66 0xa3 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabs dword ptr [-6066930261531658096], eax
+0xa3 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabs qword ptr [-6066930261531658096], rax
+0x48 0xa3 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
diff --git a/test/MC/Disassembler/X86/lit.local.cfg b/test/MC/Disassembler/X86/lit.local.cfg
index 6211b3e..ba763cf 100644
--- a/test/MC/Disassembler/X86/lit.local.cfg
+++ b/test/MC/Disassembler/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.txt']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/MC/Disassembler/X86/prefixes.txt b/test/MC/Disassembler/X86/prefixes.txt
new file mode 100644
index 0000000..56596e3
--- /dev/null
+++ b/test/MC/Disassembler/X86/prefixes.txt
@@ -0,0 +1,59 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s
+
+# CHECK: lock
+# CHECK-NEXT:	orl	$16, %fs:776
+0xf0 0x64 0x83 0x0c 0x25 0x08 0x03 0x00 0x00 0x10
+
+# CHECK: movq	%fs:768, %rdi
+0x64 0x48 0x8b 0x3c 0x25 0x00 0x03 0x00 0x00
+
+# CHECK: rep
+# CHECK-NEXT:		stosq
+0xf3 0x48 0xab
+
+# CHECK: rep
+# CHECK-NEXT:		stosl
+0xf3 0x67 0x48 0xab
+
+# CHECK: movl 32(%rbp), %eax
+0x8b 0x45 0x20
+
+# CHECK: movl %es:32(%rbp), %eax
+0x26 0x8b 0x45 0x20
+
+# CHECK: movl %es:32(%rbp), %eax
+0x2e 0x26 0x8b 0x45 0x20
+
+# Test that multiple prefixes stack.
+#    (todo- the correct disassembly is actually more like "es movl %cs:32(%rbp), %eax"
+#    but we don't support that)
+# CHECK: movl %cs:32(%rbp), %eax
+0x26 0x2e 0x8b 0x45 0x20
+
+# Test that 0xf3 as part of the opcode works.
+# CHECK: cvtdq2pd	(%rax), %xmm0
+0xf3 0x0f 0xe6 0x00
+
+# CHECK: pause
+0xf3 0x90
+
+# CHECK: nop
+0x90
+
+# CHECK: 		lock
+# CHECK-NEXT:	nop
+0xf0 0x90
+
+# Test that multiple redundant prefixes work (redundant, but valid x86).
+# CHECK: rep
+# CHECK-NEXT: rep
+# CHECK-NEXT: stosq
+0xf3 0xf3 0x48 0xab
+
+# Test that a prefix on it's own works. It's debatable as to if this is 
+# something that is considered valid, but however as LLVM's own disassembler
+# has decided to disassemble prefixes as being separate opcodes, it therefore 
+# should be capable of re-consuming it's own output.
+# CHECK: rep
+0xf3
+# ***IMPORTANT ^-- this must be at the end of the file to be a valid test *** 
diff --git a/test/MC/Disassembler/X86/simple-tests.txt b/test/MC/Disassembler/X86/simple-tests.txt
index 940b1f7..7ca0874 100644
--- a/test/MC/Disassembler/X86/simple-tests.txt
+++ b/test/MC/Disassembler/X86/simple-tests.txt
@@ -129,6 +129,9 @@
 # CHECK: vcvtsd2si %xmm0, %rax
 0xc4 0xe1 0xfb 0x2d 0xc0
 
+# CHECK: vcvtsd2si %xmm0, %rax
+0xc4 0xe1 0xff 0x2d 0xc0
+
 # CHECK: vmaskmovpd %xmm0, %xmm1, (%rax)
 0xc4 0xe2 0x71 0x2f 0x00
 
@@ -260,6 +263,9 @@
 # CHECK: vmovups %ymm0, %ymm1
 0xc5 0xfc 0x11 0xc1
 
+# CHECK: vmovups %ymm0, %ymm1
+0xc4 0xe1 0xfc 0x11 0xc1
+
 # CHECK: vmovaps %ymm1, %ymm0
 0xc5 0xfc 0x28 0xc1
 
@@ -722,9 +728,66 @@
 # CHECK: vfmaddss %xmm1, (%rcx), %xmm0, %xmm0
 0xc4 0xe3 0x79 0x6a 0x01 0x10
 
+# CHECK: vfmaddss (%rcx), %xmm1, %xmm0, %xmm0
+0xc4 0xe3 0xfd 0x6a 0x01 0x10
+
+# CHECK: vfmaddss %xmm1, (%rcx), %xmm0, %xmm0
+0xc4 0xe3 0x7d 0x6a 0x01 0x10
+
+# CHECK: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
+0xc4 0xe3 0xf9 0x6a 0xc2 0x10
+
+# CHECK: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0
+0xc4 0xe3 0x79 0x6a 0xc2 0x10
+
+# CHECK: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
+0xc4 0xe3 0xfd 0x6a 0xc2 0x10
+
+# CHECK: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0
+0xc4 0xe3 0x7d 0x6a 0xc2 0x10
+
+# CHECK: vfmaddps  (%rcx), %xmm1, %xmm0, %xmm0
+0xc4 0xe3 0xf9 0x68 0x01 0x10
+
+# CHECK: vfmaddps   %xmm1, (%rcx), %xmm0, %xmm0
+0xc4 0xe3 0x79 0x68 0x01 0x10
+
+# CHECK: vfmaddps   %xmm1, %xmm2, %xmm0, %xmm0
+0xc4 0xe3 0x79 0x68 0xc2 0x10
+
+# CHECK: vfmaddps   %xmm2, %xmm1, %xmm0, %xmm0
+0xc4 0xe3 0xf9 0x68 0xc2 0x10
+
+# CHECK: vfmaddps  (%rcx), %ymm1, %ymm0, %ymm0
+0xc4 0xe3 0xfd 0x68 0x01 0x10
+
+# CHECK: vfmaddps   %ymm1, (%rcx), %ymm0, %ymm0
+0xc4 0xe3 0x7d 0x68 0x01 0x10
+
+# CHECK: vfmaddps   %ymm1, %ymm2, %ymm0, %ymm0
+0xc4 0xe3 0x7d 0x68 0xc2 0x10
+
+# CHECK: vfmaddps   %ymm2, %ymm1, %ymm0, %ymm0
+0xc4 0xe3 0xfd 0x68 0xc2 0x10
+
+# CHECK: vpermil2ps $0, %xmm4, %xmm3, %xmm2, %xmm1
+0xc4 0xe3 0x69 0x48 0xcb 0x40
+
 # CHECK: vpermil2ps $1, 4(%rax), %xmm2, %xmm3, %xmm0
 0xc4 0xe3 0xe1 0x48 0x40 0x04 0x21
 
+# CHECK: vpermil2ps $2, (%rax), %ymm1, %ymm5, %ymm6
+0xc4 0xe3 0xd5 0x48 0x30 0x12
+
+# CHECK: vpermil2ps $3, %xmm1, (%rax), %xmm3, %xmm4
+0xc4 0xe3 0x61 0x48 0x20 0x13
+
+# CHECK: vpermil2ps $0, %ymm4, %ymm4, %ymm2, %ymm2
+0xc4 0xe3 0x6d 0x48 0xd4 0x40
+
+# CHECK: vpermil2pd $1, %ymm1, 4(%rax), %ymm1, %ymm0
+0xc4 0xe3 0x75 0x49 0x40 0x04 0x11
+
 # CHECK: vgatherdpd %xmm0, (%rdi,%xmm1,2), %xmm2
 0xc4 0xe2 0xf9 0x92 0x14 0x4f
 
@@ -796,3 +859,60 @@
 # CHECK: xacquire
 # CHECK-NEXT: xchgl %ebx, (%rax)
 0xf2 0x87 0x18
+
+# CHECK: bextr $2814, %edi, %eax
+0x8f 0xea 0x78 0x10 0xc7 0xfe 0x0a 0x00 0x00
+
+# CHECK: blci %rdi, %rax
+0x8f 0xe9 0xf8 0x02 0xf7
+
+# CHECK: vpcmov %xmm1, %xmm2, %xmm3, %xmm4
+0x8f 0xe8 0x60 0xa2 0xe2 0x10
+
+# CHECK: vpcmov (%rax), %xmm2, %xmm3, %xmm4
+0x8f 0xe8 0xe0 0xa2 0x20 0x20
+
+# CHECK: vpcmov %xmm1, (%rax), %xmm3, %xmm4
+0x8f 0xe8 0x60 0xa2 0x20 0x10
+
+# CHECK: vpcmov %ymm1, %ymm2, %ymm3, %ymm4
+0x8f 0xe8 0x64 0xa2 0xe2 0x10
+
+# CHECK: vpcmov (%rax), %ymm2, %ymm3, %ymm4
+0x8f 0xe8 0xe4 0xa2 0x20 0x20
+
+# CHECK: vpcmov %ymm1, (%rax), %ymm3, %ymm4
+0x8f 0xe8 0x64 0xa2 0x20 0x10
+
+# CHECK: vpcomb $55, %xmm6, %xmm4, %xmm2
+0x8f 0xe8 0x58 0xcc 0xd6 0x37
+
+# CHECK: vpcomb $56, 8(%rax), %xmm3, %xmm2
+0x8f 0xe8 0x60 0xcc 0x50 0x08 0x38
+
+# CHECK: vpmacsdd %xmm4, %xmm6, %xmm4, %xmm2
+0x8f 0xe8 0x58 0x9e 0xd6 0x40
+# CHECK: vpmacsdd %xmm4, (%rax,%rcx), %xmm4, %xmm3
+0x8f 0xe8 0x58 0x9e 0x1c 0x08 0x40
+
+# CHECK: vprotd (%rax), %xmm0, %xmm3
+0x8f 0xe9 0xf8 0x92 0x18
+# CHECK: vprotd %xmm2, (%rax,%rcx), %xmm4
+0x8f 0xe9 0x68 0x92 0x24 0x08
+# CHECK: vprotd %xmm5, %xmm3, %xmm2
+0x8f 0xe9 0x50 0x92 0xd3
+# CHECK: vprotd $43, (%rcx), %xmm6
+0x8f 0xe8 0x78 0xc2 0x31 0x2b
+# CHECK: vprotd $44, (%rax,%rcx), %xmm7
+0x8f 0xe8 0x78 0xc2 0x3c 0x08 0x2c
+# CHECK: vprotd $45, %xmm4, %xmm4
+0x8f 0xe8 0x78 0xc2 0xe4 0x2d
+
+# CHECK: vfrczps 4(%rax), %xmm3
+0x8f 0xe9 0x78 0x80 0x58 0x04
+# CHECK: vfrczps %xmm6, %xmm5
+0x8f 0xe9 0x78 0x80 0xee
+# CHECK: vfrczps (%rcx), %xmm1
+0x8f 0xe9 0x78 0x80 0x09
+# CHECK: vfrczps %ymm2, %ymm4
+0x8f 0xe9 0x7c 0x80 0xe2
diff --git a/test/MC/Disassembler/X86/x86-32.txt b/test/MC/Disassembler/X86/x86-32.txt
index 76d67d3..b6a62c4 100644
--- a/test/MC/Disassembler/X86/x86-32.txt
+++ b/test/MC/Disassembler/X86/x86-32.txt
@@ -648,3 +648,51 @@
 
 # CHECK: adoxl (%eax), %eax
 0xf3 0x0f 0x38 0xf6 0x00
+
+# CHECK: movb 878082192, %al
+0xa0 0x90 0x78 0x56 0x34
+
+# CHECK: movw 878082192, %ax
+0x66 0xa1 0x90 0x78 0x56 0x34
+
+# CHECK: movl 878082192, %eax
+0xa1 0x90 0x78 0x56 0x34
+
+# CHECK: movb %al, 878082192
+0xa2 0x90 0x78 0x56 0x34
+
+# CHECK: movw %ax, 878082192
+0x66 0xa3 0x90 0x78 0x56 0x34
+
+# CHECK: movl %eax, 878082192
+0xa3 0x90 0x78 0x56 0x34
+
+# CHECK: incl %ecx
+0xff 0xc1
+
+# CHECK: decl %ecx
+0xff 0xc9
+
+# CHECK: incw %cx
+0x66 0xff 0xc1
+
+# CHECK: decw %cx
+0x66 0xff 0xc9
+
+# CHECK: incb %cl
+0xfe 0xc1
+
+# CHECK: decb %cl
+0xfe 0xc9
+
+# CHECK: incl %ecx
+0x41
+
+# CHECK: decl %ecx
+0x49
+
+# CHECK: movq %xmm0, %xmm0
+0xf3 0x0f 0x7e 0xc0
+
+# CHECK: vmovq %xmm0, %xmm0
+0xc5 0xfa 0x7e 0xc0
diff --git a/test/MC/Disassembler/X86/x86-64.txt b/test/MC/Disassembler/X86/x86-64.txt
index bf1fa21..8c6bc0e 100644
--- a/test/MC/Disassembler/X86/x86-64.txt
+++ b/test/MC/Disassembler/X86/x86-64.txt
@@ -157,3 +157,87 @@
 
 # CHECK: movabsq %rax, -6066930261531658096
 0x48 0xa3 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: sha1rnds4 $1, %xmm1, %xmm2
+0x0f 0x3a 0xcc 0xd1 0x01
+
+# CHECK: sha1rnds4 $1, (%rax), %xmm2
+0x0f 0x3a 0xcc 0x10 0x01
+
+# CHECK: sha1nexte %xmm1, %xmm2
+0x0f 0x38 0xc8 0xd1
+
+# CHECK: sha1nexte (%rax), %xmm2
+0x0f 0x38 0xc8 0x10
+
+# CHECK: sha1msg1 %xmm1, %xmm2
+0x0f 0x38 0xc9 0xd1
+
+# CHECK: sha1msg1 (%rax), %xmm2
+0x0f 0x38 0xc9 0x10
+
+# CHECK: sha1msg2 %xmm1, %xmm2
+0x0f 0x38 0xca 0xd1
+
+# CHECK: sha1msg2 (%rax), %xmm2
+0x0f 0x38 0xca 0x10
+
+# CHECK: sha256rnds2 (%rax), %xmm2
+0x0f 0x38 0xcb 0x10
+
+# CHECK: sha256rnds2 %xmm1, %xmm2
+0x0f 0x38 0xcb 0xd1
+
+# CHECK: sha256msg1 %xmm1, %xmm2
+0x0f 0x38 0xcc 0xd1
+
+# CHECK: sha256msg1 (%rax), %xmm2
+0x0f 0x38 0xcc 0x10
+
+# CHECK: sha256msg2 %xmm1, %xmm2
+0x0f 0x38 0xcd 0xd1
+
+# CHECK: sha256msg2 (%rax), %xmm2
+0x0f 0x38 0xcd 0x10
+
+# CHECK: incl %ecx
+0xff 0xc1
+
+# CHECK: decl %ecx
+0xff 0xc9
+
+# CHECK: incw %cx
+0x66 0xff 0xc1
+
+# CHECK: decw %cx
+0x66 0xff 0xc9
+
+# CHECK: incb %cl
+0xfe 0xc1
+
+# CHECK: decb %cl
+0xfe 0xc9
+
+# CHECK: incq %rcx
+0x48 0xff 0xc1
+
+# CHECK: decq %rcx
+0x48 0xff 0xc9
+
+# CHECK: movq %xmm0, %xmm0
+0xf3 0x0f 0x7e 0xc0
+
+# CHECK: vmovq %xmm0, %xmm0
+0xc5 0xfa 0x7e 0xc0
+
+# CHECK: vmovq %xmm0, %rax
+0xc4 0xe1 0xf9 0x7e 0xc0
+
+# CHECK: movd %xmm0, %rax
+0x66 0x48 0x0f 0x7e 0xc0
+
+# CHECK: pextrw $3, %xmm3, %ecx
+0x66 0x0f 0x3a 0x15 0xd9 0x03
+
+# CHECK: pextrw $3, %xmm3, (%rax)
+0x66 0x0f 0x3a 0x15 0x18 0x03
diff --git a/test/MC/Disassembler/XCore/lit.local.cfg b/test/MC/Disassembler/XCore/lit.local.cfg
index 15b6583..4d17d46 100644
--- a/test/MC/Disassembler/XCore/lit.local.cfg
+++ b/test/MC/Disassembler/XCore/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.txt']
-
 targets = set(config.root.targets_to_build.split())
 if not 'XCore' in targets:
     config.unsupported = True
diff --git a/test/MC/ELF/bad-relocation.s b/test/MC/ELF/bad-relocation.s
new file mode 100644
index 0000000..1a66744
--- /dev/null
+++ b/test/MC/ELF/bad-relocation.s
@@ -0,0 +1,7 @@
+// RUN: not llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o /dev/null 2>&1 | FileCheck  %s
+
+// CHECK: error: invalid variant 'BADRELOC'
+
+        .text
+foo:
+	leal	.Lfoo@BADRELOC(%ebx), %eax
diff --git a/test/MC/ELF/cfi-window-save.s b/test/MC/ELF/cfi-window-save.s
new file mode 100644
index 0000000..c7d438a
--- /dev/null
+++ b/test/MC/ELF/cfi-window-save.s
@@ -0,0 +1,51 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
+
+# Should use SPARC as the target to test this. However, SPARC does not
+# use MC yet.
+
+f:
+        .cfi_startproc
+        nop
+        .cfi_window_save
+        nop
+        .cfi_endproc
+
+// CHECK:        Section {
+// CHECK:          Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 02000000 00412D00 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+
+// CHECK:        Section {
+// CHECK:          Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK:        }
+
diff --git a/test/MC/ELF/comdat-dup-group-name.s b/test/MC/ELF/comdat-dup-group-name.s
new file mode 100644
index 0000000..1181e2e
--- /dev/null
+++ b/test/MC/ELF/comdat-dup-group-name.s
@@ -0,0 +1,41 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -t | FileCheck %s
+
+// Test that we produce two foo sections, each in separate groups
+
+// CHECK: Index: 1
+// CHECK-NEXT: Name: .group
+
+// CHECK: Index: 2
+// CHECK-NEXT: Name: .group
+
+// CHECK: Index: 6
+// CHECK-NEXT: Name: .foo
+
+// CHECK: Index: 7
+// CHECK-NEXT: Name: .foo
+
+// CHECK: Symbols [
+
+// CHECK: Name: f1
+// CHECK-NOT: }
+// CHECK: Section: .group (0x1)
+
+// CHECK: Name: f2
+// CHECK-NOT: }
+// CHECK: Section: .group (0x2)
+
+// CHECK: Name: .foo
+// CHECK-NOT: }
+// CHECK: Section: .foo (0x6)
+
+// CHECK: Name: .foo
+// CHECK-NOT: }
+// CHECK: Section: .foo (0x7)
+
+
+	.section	.foo,"axG",@progbits,f1,comdat
+        nop
+
+	.section	.foo,"axG",@progbits,f2,comdat
+        nop
+
diff --git a/test/MC/ELF/comdat.s b/test/MC/ELF/comdat.s
index 6dbe583..3e4a001 100644
--- a/test/MC/ELF/comdat.s
+++ b/test/MC/ELF/comdat.s
@@ -82,7 +82,7 @@
 g1:
         nop
 
-        .section	.bar,"axG",@progbits,g1,comdat
+        .section	.bar,"ax?",@progbits
         nop
 
         .section	.zed,"axG",@progbits,g2,comdat
diff --git a/test/MC/ELF/comp-dir.s b/test/MC/ELF/comp-dir.s
index 59e3d7d..1b91f64 100644
--- a/test/MC/ELF/comp-dir.s
+++ b/test/MC/ELF/comp-dir.s
@@ -1,7 +1,17 @@
+// REQUIRES: shell
+// XFAIL: mingw
 // RUN: llvm-mc -triple=x86_64-linux-unknown -g -fdebug-compilation-dir=/test/comp/dir %s -filetype=obj -o %t.o
 // RUN: llvm-dwarfdump -debug-dump=info %t.o | FileCheck %s
 
 // CHECK: DW_AT_comp_dir [DW_FORM_string] ("{{([A-Za-z]:.*)?}}/test/comp/dir")
 
+// RUN: mkdir -p %t.foo
+// RUN: ln -sf %t.foo %t.bar
+// RUN: cd %t.foo
+// RUN: env PWD=%t.bar llvm-mc -triple=x86_64-linux-unknown -g %s -filetype=obj -o %t.o
+// RUN: llvm-dwarfdump -debug-dump=info %t.o | FileCheck --check-prefix=PWD %s
+// PWD: DW_AT_comp_dir [DW_FORM_string] ("{{.*}}.bar")
+
+
 f:
   nop
diff --git a/test/MC/ELF/debug-line.s b/test/MC/ELF/debug-line.s
index 6766f10..38ef828 100644
--- a/test/MC/ELF/debug-line.s
+++ b/test/MC/ELF/debug-line.s
@@ -1,6 +1,15 @@
 // RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck %s
 
-// Test that .debug_line is populated.
+// Test that .debug_line is populated.  TODO: This test should really be using
+// llvm-dwarfdump, but it cannot parse this particular object file.  The content
+// of .debug_line was checked using GNU binutils:
+
+// $ objdump --dwarf=decodedline debug-line.o
+// [...]
+// File name                            Line number    Starting address
+// foo.c                                          4                   0
+// foo.c                                          5                 0x4
+// foo.c                                          6                 0x5
 
 // CHECK:        Section {
 // CHECK:          Name: .debug_line
@@ -8,18 +17,18 @@
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x48
-// CHECK-NEXT:     Size: 72
+// CHECK-NEXT:     Offset: 0x50
+// CHECK-NEXT:     Size: 57
 // CHECK-NEXT:     Link: 0
 // CHECK-NEXT:     Info: 0
 // CHECK-NEXT:     AddressAlignment: 1
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 44000000 02001C00 00000101 FB0E0D00
+
+// CHECK-NEXT:       0000: 35000000 02001C00 00000101 FB0E0D00
 // CHECK-NEXT:       0010: 01010101 00000001 00000100 666F6F2E
 // CHECK-NEXT:       0020: 63000000 00000009 02000000 00000000
-// CHECK-NEXT:       0030: 00150205 00010100 09020000 00000000
-// CHECK-NEXT:       0040: 00001602 43000101
+// CHECK-NEXT:       0030: 00154B21 02080001 01
 // CHECK-NEXT:     )
 // CHECK-NEXT:   }
 
@@ -34,3 +43,7 @@
 
 	.loc 1 5 0
 	.byte 0xc3
+
+	.loc 1 6 0
+l:
+	.quad l
diff --git a/test/MC/ELF/file-double.s b/test/MC/ELF/file-double.s
new file mode 100644
index 0000000..b0731e6
--- /dev/null
+++ b/test/MC/ELF/file-double.s
@@ -0,0 +1,47 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
+
+// Test that a STT_FILE symbol and a symbol of the same name can coexist.
+
+.file "foo.c"
+.file "bar.c"
+	.globl foo.c
+foo.c:
+
+	.globl bar.c
+bar.c:
+
+// CHECK:        Symbol {
+// CHECK:          Name: foo.c (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: File
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0xFFF1)
+// CHECK-NEXT:   }
+// CHECK:          Name: bar.c (7)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: File
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0xFFF1)
+// CHECK-NEXT:   }
+// CHECK:        Symbol {
+// CHECK:        Name: bar.c (7)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK:        Symbol {
+// CHECK:        Name: foo.c (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/lit.local.cfg b/test/MC/ELF/lit.local.cfg
index 56bf008..ba763cf 100644
--- a/test/MC/ELF/lit.local.cfg
+++ b/test/MC/ELF/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/MC/ELF/relocation.s b/test/MC/ELF/relocation.s
index d2c4f2e..6823075 100644
--- a/test/MC/ELF/relocation.s
+++ b/test/MC/ELF/relocation.s
@@ -14,11 +14,14 @@ bar:
         leaq	foo@TPOFF(%rax), %rax    # R_X86_64_TPOFF32
         leaq	foo@TLSLD(%rip), %rdi    # R_X86_64_TLSLD
         leaq	foo@dtpoff(%rax), %rcx   # R_X86_64_DTPOFF32
+        movabs  foo@GOT, %rax		 # R_X86_64_GOT64
+        movabs  foo@GOTOFF, %rax	 # R_X86_64_GOTOFF64
         pushq    $bar
         movq	foo(%rip), %rdx
         leaq    foo-bar(%r14),%r14
         addq	$bar,%rax         # R_X86_64_32S
 	.quad	foo@DTPOFF
+        movabsq	$baz@TPOFF, %rax
 
 // CHECK:        Section {
 // CHECK:          Name: .rela.text
@@ -34,11 +37,14 @@ bar:
 // CHECK-NEXT:       0x3B R_X86_64_TPOFF32  foo 0x0
 // CHECK-NEXT:       0x42 R_X86_64_TLSLD    foo 0xFFFFFFFFFFFFFFFC
 // CHECK-NEXT:       0x49 R_X86_64_DTPOFF32 foo 0x0
-// CHECK-NEXT:       0x4E R_X86_64_32S      .text 0x0
-// CHECK-NEXT:       0x55 R_X86_64_PC32     foo 0xFFFFFFFFFFFFFFFC
-// CHECK-NEXT:       0x5C R_X86_64_PC32     foo 0x5C
-// CHECK-NEXT:       0x63 R_X86_64_32S      .text 0x0
-// CHECK-NEXT:       0x67 R_X86_64_DTPOFF64 foo 0x0
+// CHECK-NEXT:       0x4F R_X86_64_GOT64 foo 0x0
+// CHECK-NEXT:       0x59 R_X86_64_GOTOFF64 foo 0x0
+// CHECK-NEXT:       0x62 R_X86_64_32S .text 0x0
+// CHECK-NEXT:       0x69 R_X86_64_PC32 foo 0xFFFFFFFFFFFFFFFC
+// CHECK-NEXT:       0x70 R_X86_64_PC32 foo 0x70
+// CHECK-NEXT:       0x77 R_X86_64_32S .text 0x0
+// CHECK-NEXT:       0x7B R_X86_64_DTPOFF64 foo 0x0
+// CHECK-NEXT:       0x85 R_X86_64_TPOFF64 baz 0x0
 // CHECK-NEXT:     ]
 // CHECK-NEXT:   }
 
diff --git a/test/MC/ELF/section.s b/test/MC/ELF/section.s
index a679403..7dc23c2 100644
--- a/test/MC/ELF/section.s
+++ b/test/MC/ELF/section.s
@@ -5,12 +5,12 @@
 .section	.note.GNU-stack,"",@progbits
 .section	.note.GNU-stack2,"",%progbits
 .section	.note.GNU-,"",@progbits
-.section	-.note.GNU,"",@progbits
+.section	-.note.GNU,"","progbits"
 
 // CHECK: Name: .note.GNU-stack (56)
-// CHECK: Name: .note.GNU-stack2 (143)
-// CHECK: Name: .note.GNU- (160)
-// CHECK: Name: -.note.GNU (132)
+// CHECK: Name: .note.GNU-stack2 (153)
+// CHECK: Name: .note.GNU- (170)
+// CHECK: Name: -.note.GNU (142)
 
 // Test that the defaults are used
 
@@ -120,11 +120,28 @@ bar:
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:   }
 
+.section .excluded,"e",@progbits
+
+// CHECK:      Section {
+// CHECK:        Name: .excluded (92)
+// CHECK-NEXT:   Type: SHT_PROGBITS (0x1)
+// CHECK-NEXT:   Flags [ (0x80000000)
+// CHECK-NEXT:     SHF_EXCLUDE (0x80000000)
+// CHECK-NEXT:   ]
+// CHECK-NEXT:   Address: 0x0
+// CHECK-NEXT:   Offset: 0x50
+// CHECK-NEXT:   Size: 0
+// CHECK-NEXT:   Link: 0
+// CHECK-NEXT:   Info: 0
+// CHECK-NEXT:   AddressAlignment: 1
+// CHECK-NEXT:   EntrySize: 0
+// CHECK-NEXT: }
+
 // Test that we handle the strings like gas
 .section bar-"foo"
 .section "foo"
 
 // CHECK:        Section {
-// CHECK:          Name: bar-"foo" (171)
+// CHECK:          Name: bar-"foo" (181)
 // CHECK:        Section {
 // CHECK:          Name: foo (52)
diff --git a/test/MC/ELF/symbol-names.s b/test/MC/ELF/symbol-names.s
new file mode 100644
index 0000000..6459ac9
--- /dev/null
+++ b/test/MC/ELF/symbol-names.s
@@ -0,0 +1,12 @@
+// RUN: llvm-mc -triple i686-pc-linux -filetype=obj %s -o - | llvm-readobj -t | FileCheck %s
+
+// MC allows ?'s in symbol names as an extension.
+
+.text
+.globl foo?bar
+.type foo?bar, @function
+foo?bar:
+ret
+
+// CHECK: Symbol
+// CHECK: Name: foo?bar
diff --git a/test/MC/ELF/type.s b/test/MC/ELF/type.s
index a5b9812..c2f3631 100644
--- a/test/MC/ELF/type.s
+++ b/test/MC/ELF/type.s
@@ -31,6 +31,16 @@ tls:
         .type tls,@tls_object
         .type tls,@gnu_indirect_function
 
+// Test that "<type>" is accepted.
+tls_quoted:
+        .global tls_quoted
+        .type tls_quoted,"tls_object"
+
+// Test that "<type>" is accepted.
+tls_upper_case:
+        .global tls_upper_case
+        .type tls_upper_case,STT_TLS
+
 // CHECK:        Symbol {
 // CHECK:          Name: bar
 // CHECK-NEXT:     Value: 0x0
@@ -85,3 +95,21 @@ tls:
 // CHECK-NEXT:     Other: 0
 // CHECK-NEXT:     Section: .text (0x1)
 // CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: tls_quoted
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: tls_upper_case
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
diff --git a/test/MC/MachO/ARM/lit.local.cfg b/test/MC/MachO/ARM/lit.local.cfg
index 9f0d39d..8a3ba96 100644
--- a/test/MC/MachO/ARM/lit.local.cfg
+++ b/test/MC/MachO/ARM/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.s', '.ll']
-
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
     config.unsupported = True
diff --git a/test/MC/MachO/bad-darwin-x86_64-32-bit-abs-addr.s b/test/MC/MachO/bad-darwin-x86_64-32-bit-abs-addr.s
new file mode 100644
index 0000000..5fcd316
--- /dev/null
+++ b/test/MC/MachO/bad-darwin-x86_64-32-bit-abs-addr.s
@@ -0,0 +1,5 @@
+// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - 2> %t.err > %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t.err %s
+
+mov $_f, %rsi
+// CHECK-ERROR: 32-bit absolute addressing is not supported in 64-bit mode
diff --git a/test/MC/MachO/bad-darwin-x86_64-diff-relocs.s b/test/MC/MachO/bad-darwin-x86_64-diff-relocs.s
new file mode 100644
index 0000000..1ccebc5
--- /dev/null
+++ b/test/MC/MachO/bad-darwin-x86_64-diff-relocs.s
@@ -0,0 +1,5 @@
+// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - 2> %t.err > %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t.err %s
+
+.quad _foo - _bar
+// CHECK-ERROR: unsupported relocation with subtraction expression
diff --git a/test/MC/MachO/bad-indirect-symbols.s b/test/MC/MachO/bad-indirect-symbols.s
new file mode 100644
index 0000000..7c16e90
--- /dev/null
+++ b/test/MC/MachO/bad-indirect-symbols.s
@@ -0,0 +1,5 @@
+// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - 2> %t.err > %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t.err %s
+
+x: .indirect_symbol _y
+// CHECK-ERROR: 4:4: error: indirect symbol not in a symbol pointer or stub section
diff --git a/test/MC/MachO/bss.s b/test/MC/MachO/bss.s
new file mode 100644
index 0000000..15d490a
--- /dev/null
+++ b/test/MC/MachO/bss.s
@@ -0,0 +1,17 @@
+// The purpose of this test is to verify that bss sections are emited correctly.
+
+// RUN: llvm-mc -filetype=obj -triple i686-apple-darwin9 %s | llvm-readobj -s | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-apple-darwin9 %s | llvm-readobj -s | FileCheck %s
+
+    .bss
+    .globl _g0
+    .align 4
+_g0:
+    .long 0
+
+// CHECK:		Name: __bss (5F 5F 62 73 73 00 00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT:	Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT:	Address: 0x0
+// CHECK-NEXT:	Size: 0x4
+// CHECK-NEXT:	Offset: 0
+// CHECK-NEXT:	Alignment: 4
diff --git a/test/MC/MachO/darwin-x86_64-diff-reloc-assign-2.s b/test/MC/MachO/darwin-x86_64-diff-reloc-assign-2.s
new file mode 100644
index 0000000..5d54879
--- /dev/null
+++ b/test/MC/MachO/darwin-x86_64-diff-reloc-assign-2.s
@@ -0,0 +1,38 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+// Test case for rdar://9356266
+
+// This tests that this expression does not cause a crash and produces these
+// four relocation entries:
+// Relocation information (__DATA,__data) 4 entries
+// address  pcrel length extern type    scattered symbolnum/value
+// 00000004 False long   False  SUB     False     2 (__DATA,__data)
+// 00000004 False long   False  UNSIGND False     2 (__DATA,__data)
+// 00000000 False long   False  SUB     False     2 (__DATA,__data)
+// 00000000 False long   False  UNSIGND False     2 (__DATA,__data)
+
+	.data
+L_var1:
+L_var2:
+// This was working fine
+	.long L_var2 - L_var1
+	
+	.set L_var3, .
+	.set L_var4, .
+// But this was causing a crash
+	.long L_var4 - L_var3
+
+// CHECK:  ('_relocations', [
+// CHECK:    # Relocation 0
+// CHECK:    (('word-0', 0x4),
+// CHECK:     ('word-1', 0x54000002)),
+// CHECK:    # Relocation 1
+// CHECK:    (('word-0', 0x4),
+// CHECK:     ('word-1', 0x4000002)),
+// CHECK:    # Relocation 2
+// CHECK:    (('word-0', 0x0),
+// CHECK:     ('word-1', 0x54000002)),
+// CHECK:    # Relocation 3
+// CHECK:    (('word-0', 0x0),
+// CHECK:     ('word-1', 0x4000002)),
+// CHECK:  ])
diff --git a/test/MC/MachO/lit.local.cfg b/test/MC/MachO/lit.local.cfg
index 41a8434..ba763cf 100644
--- a/test/MC/MachO/lit.local.cfg
+++ b/test/MC/MachO/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.s', '.ll']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/MC/Mips/elf_eflags.ll b/test/MC/Mips/elf_eflags.ll
index 91217bc..9432dcf 100644
--- a/test/MC/Mips/elf_eflags.ll
+++ b/test/MC/Mips/elf_eflags.ll
@@ -16,53 +16,53 @@
 ; Note that EF_MIPS_CPIC is set by -mabicalls which is the default on Linux
 ; TODO need to support -mno-abicalls
 
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32 -relocation-model=static %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32 %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32_PIC %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -relocation-model=static %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32R2 %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32R2_PIC %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips -relocation-model=static %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32R2-MICROMIPS %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32R2-MICROMIPS_PIC %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32 -relocation-model=static %s -print-hack-directives -o - | FileCheck -check-prefix=CHECK-BE32 %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32 -print-hack-directives %s -o - | FileCheck -check-prefix=CHECK-BE32_PIC %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 -relocation-model=static %s -print-hack-directives -o - | FileCheck -check-prefix=CHECK-BE32R2 %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 -print-hack-directives %s -o - | FileCheck -check-prefix=CHECK-BE32R2_PIC %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips -relocation-model=static -print-hack-directives %s -o - | FileCheck -check-prefix=CHECK-BE32R2-MICROMIPS %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips -print-hack-directives %s -o - | FileCheck -check-prefix=CHECK-BE32R2-MICROMIPS_PIC %s
 
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64 -relocation-model=static %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE64 %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64 %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE64_PIC %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64r2 -relocation-model=static %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE64R2 %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64r2 %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE64R2_PIC %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips64 -relocation-model=static %s -print-hack-directives -o - | FileCheck -check-prefix=CHECK-BE64 %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips64 %s -print-hack-directives -o - | FileCheck -check-prefix=CHECK-BE64_PIC %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips64r2 -relocation-model=static -print-hack-directives %s -o - | FileCheck -check-prefix=CHECK-BE64R2 %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips64r2 -print-hack-directives %s -o - | FileCheck -check-prefix=CHECK-BE64R2_PIC %s
 
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+mips16 -relocation-model=pic %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-LE32R2-MIPS16 %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+mips16 -relocation-model=pic -print-hack-directives %s -o - | FileCheck -check-prefix=CHECK-LE32R2-MIPS16 %s
  
 ; 32(R1) bit with NO_REORDER and static
-; CHECK-BE32: Flags [ (0x50001005)
+; CHECK-BE32: .mips_hack_elf_flags 0x50001005
 ;
 ; 32(R1) bit with NO_REORDER and PIC
-; CHECK-BE32_PIC: Flags [ (0x50001007)
+; CHECK-BE32_PIC: .mips_hack_elf_flags 0x50001007
 ;
 ; 32R2 bit with NO_REORDER and static
-; CHECK-BE32R2: Flags [ (0x70001005)
+; CHECK-BE32R2: .mips_hack_elf_flags 0x70001005
 ;
 ; 32R2 bit with NO_REORDER and PIC
-; CHECK-BE32R2_PIC: Flags [ (0x70001007)
+; CHECK-BE32R2_PIC: .mips_hack_elf_flags 0x70001007
 ;
 ; 32R2 bit MICROMIPS with NO_REORDER and static
-; CHECK-BE32R2-MICROMIPS: Flags [ (0x72001005)
+; CHECK-BE32R2-MICROMIPS: .mips_hack_elf_flags 0x72001005
 ;
 ; 32R2 bit MICROMIPS with NO_REORDER and PIC
-;CHECK-BE32R2-MICROMIPS_PIC: Flags [ (0x72001007)
+; CHECK-BE32R2-MICROMIPS_PIC: .mips_hack_elf_flags 0x72001007
 ;
 ; 64(R1) bit with NO_REORDER and static
-; CHECK-BE64: Flags [ (0x60000005)
+; CHECK-BE64: .mips_hack_elf_flags 0x60000005
 ;
 ; 64(R1) bit with NO_REORDER and PIC
-; CHECK-BE64_PIC: Flags [ (0x60000007)
+; CHECK-BE64_PIC: .mips_hack_elf_flags 0x60000007
 ;
 ; 64R2 bit with NO_REORDER and static
-; CHECK-BE64R2: Flags [ (0x80000005)
+; CHECK-BE64R2: .mips_hack_elf_flags 0x80000005
 ;
 ; 64R2 bit with NO_REORDER and PIC
-; CHECK-BE64R2_PIC: Flags [ (0x80000007)
+; CHECK-BE64R2_PIC: .mips_hack_elf_flags 0x80000007
 ;
 ; 32R2 bit MIPS16 with PIC
-; CHECK-LE32R2-MIPS16: Flags [ (0x74001006)
- 
+; CHECK-LE32R2-MIPS16: .mips_hack_elf_flags 0x74001006
+
 define i32 @main() nounwind {
 entry:
   ret i32 0
diff --git a/test/MC/Mips/elf_eflags.s b/test/MC/Mips/elf_eflags.s
new file mode 100644
index 0000000..c565964
--- /dev/null
+++ b/test/MC/Mips/elf_eflags.s
@@ -0,0 +1,5 @@
+// RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux %s -o -| llvm-readobj -h | FileCheck %s
+
+        .mips_hack_elf_flags 0x50001005
+
+// CHECK: Flags [ (0x50001005)
diff --git a/test/MC/Mips/elf_st_other.ll b/test/MC/Mips/elf_st_other.ll
index bc56c00..31294c8 100644
--- a/test/MC/Mips/elf_st_other.ll
+++ b/test/MC/Mips/elf_st_other.ll
@@ -1,12 +1,11 @@
 ; This tests value of ELF st_other field for function symbol table entries.
 ; For microMIPS value should be equal to STO_MIPS_MICROMIPS.
 
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips %s -o - | llvm-readobj -t | FileCheck %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips -print-hack-directives %s -o - | FileCheck %s
 
 define i32 @main() nounwind {
 entry:
   ret i32 0
 }
 
-; CHECK:     Name: main
-; CHECK:     Other: 128
+; CHECK:     .mips_hack_stocg main, 128
diff --git a/test/MC/Mips/elf_st_other.s b/test/MC/Mips/elf_st_other.s
new file mode 100644
index 0000000..2d63288
--- /dev/null
+++ b/test/MC/Mips/elf_st_other.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux %s -o -| llvm-readobj -t | FileCheck %s
+
+        .text
+        .globl  main
+        .align  2
+        .type   main,@function
+        .set    nomips16                # @main
+        .ent    main
+        .mips_hack_stocg main, 128
+main:
+
+// CHECK:     Name: main
+// CHECK:     Other: 128
diff --git a/test/MC/Mips/lit.local.cfg b/test/MC/Mips/lit.local.cfg
index d2e3b28..1fa54b4 100644
--- a/test/MC/Mips/lit.local.cfg
+++ b/test/MC/Mips/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'Mips' in targets:
     config.unsupported = True
diff --git a/test/MC/Mips/micromips-alu-instructions.s b/test/MC/Mips/micromips-alu-instructions.s
index c541e1a..276a83e 100644
--- a/test/MC/Mips/micromips-alu-instructions.s
+++ b/test/MC/Mips/micromips-alu-instructions.s
@@ -1,38 +1,79 @@
-# RUN: llvm-mc %s -triple=mipsel -show-encoding -mattr=micromips | FileCheck %s
+# RUN: llvm-mc %s -triple=mipsel -show-encoding -mattr=micromips | FileCheck -check-prefix=CHECK-EL %s
+# RUN: llvm-mc %s -triple=mips -show-encoding -mattr=micromips | FileCheck -check-prefix=CHECK-EB %s
 # Check that the assembler can handle the documented syntax
 # for arithmetic and logical instructions.
 #------------------------------------------------------------------------------
 # Arithmetic and Logical Instructions
 #------------------------------------------------------------------------------
-# CHECK: add   $9, $6, $7      # encoding: [0x10,0x49,0xe6,0x00]
-# CHECK: addi  $9, $6, 17767   # encoding: [0x67,0x45,0x26,0x11]
-# CHECK: addiu $9, $6, -15001  # encoding: [0x67,0xc5,0x26,0x31]
-# CHECK: addi  $9, $6, 17767   # encoding: [0x67,0x45,0x26,0x11]
-# CHECK: addiu $9, $6, -15001  # encoding: [0x67,0xc5,0x26,0x31]
-# CHECK: addu  $9, $6, $7      # encoding: [0x50,0x49,0xe6,0x00]
-# CHECK: sub   $9, $6, $7      # encoding: [0x90,0x49,0xe6,0x00]
-# CHECK: subu  $4, $3, $5      # encoding: [0xd0,0x21,0xa3,0x00]
-# CHECK: neg   $6, $7          # encoding: [0x90,0x31,0xe0,0x00]
-# CHECK: negu  $6, $7          # encoding: [0xd0,0x31,0xe0,0x00]
-# CHECK: move  $7, $8          # encoding: [0x50,0x39,0x08,0x00]
-# CHECK: slt    $3, $3, $5     # encoding: [0x50,0x1b,0xa3,0x00]
-# CHECK: slti   $3, $3, 103    # encoding: [0x67,0x00,0x63,0x90]
-# CHECK: slti   $3, $3, 103    # encoding: [0x67,0x00,0x63,0x90]
-# CHECK: sltiu  $3, $3, 103    # encoding: [0x67,0x00,0x63,0xb0]
-# CHECK: sltu   $3, $3, $5     # encoding: [0x90,0x1b,0xa3,0x00]
-# CHECK: and    $9, $6, $7     # encoding: [0x50,0x4a,0xe6,0x00]
-# CHECK: andi   $9, $6, 17767  # encoding: [0x67,0x45,0x26,0xd1]
-# CHECK: andi   $9, $6, 17767  # encoding: [0x67,0x45,0x26,0xd1]
-# CHECK: or     $3, $4, $5     # encoding: [0x90,0x1a,0xa4,0x00]
-# CHECK: ori    $9, $6, 17767  # encoding: [0x67,0x45,0x26,0x51]
-# CHECK: xor    $3, $3, $5     # encoding: [0x10,0x1b,0xa3,0x00]
-# CHECK: xori   $9, $6, 17767  # encoding: [0x67,0x45,0x26,0x71]
-# CHECK: xori   $9, $6, 17767  # encoding: [0x67,0x45,0x26,0x71]
-# CHECK: nor    $9, $6, $7     # encoding: [0xd0,0x4a,0xe6,0x00]
-# CHECK: not    $7, $8         # encoding: [0xd0,0x3a,0x08,0x00]
-# CHECK: mul    $9, $6, $7     # encoding: [0x10,0x4a,0xe6,0x00]
-# CHECK: mult   $9, $7         # encoding: [0x3c,0x8b,0xe9,0x00]
-# CHECK: multu  $9, $7         # encoding: [0x3c,0x9b,0xe9,0x00]
+# Little endian
+#------------------------------------------------------------------------------
+# CHECK-EL: add   $9, $6, $7      # encoding: [0xe6,0x00,0x10,0x49]
+# CHECK-EL: addi  $9, $6, 17767   # encoding: [0x26,0x11,0x67,0x45]
+# CHECK-EL: addiu $9, $6, -15001  # encoding: [0x26,0x31,0x67,0xc5]
+# CHECK-EL: addi  $9, $6, 17767   # encoding: [0x26,0x11,0x67,0x45]
+# CHECK-EL: addiu $9, $6, -15001  # encoding: [0x26,0x31,0x67,0xc5]
+# CHECK-EL: addu  $9, $6, $7      # encoding: [0xe6,0x00,0x50,0x49]
+# CHECK-EL: sub   $9, $6, $7      # encoding: [0xe6,0x00,0x90,0x49]
+# CHECK-EL: subu  $4, $3, $5      # encoding: [0xa3,0x00,0xd0,0x21]
+# CHECK-EL: neg   $6, $7          # encoding: [0xe0,0x00,0x90,0x31]
+# CHECK-EL: negu  $6, $7          # encoding: [0xe0,0x00,0xd0,0x31]
+# CHECK-EL: move  $7, $8          # encoding: [0x08,0x00,0x50,0x39]
+# CHECK-EL: slt    $3, $3, $5     # encoding: [0xa3,0x00,0x50,0x1b]
+# CHECK-EL: slti   $3, $3, 103    # encoding: [0x63,0x90,0x67,0x00]
+# CHECK-EL: slti   $3, $3, 103    # encoding: [0x63,0x90,0x67,0x00]
+# CHECK-EL: sltiu  $3, $3, 103    # encoding: [0x63,0xb0,0x67,0x00]
+# CHECK-EL: sltu   $3, $3, $5     # encoding: [0xa3,0x00,0x90,0x1b]
+# CHECK-EL: lui    $9, 17767      # encoding: [0xa9,0x41,0x67,0x45]
+# CHECK-EL: and    $9, $6, $7     # encoding: [0xe6,0x00,0x50,0x4a]
+# CHECK-EL: andi   $9, $6, 17767  # encoding: [0x26,0xd1,0x67,0x45]
+# CHECK-EL: andi   $9, $6, 17767  # encoding: [0x26,0xd1,0x67,0x45]
+# CHECK-EL: or     $3, $4, $5     # encoding: [0xa4,0x00,0x90,0x1a]
+# CHECK-EL: ori    $9, $6, 17767  # encoding: [0x26,0x51,0x67,0x45]
+# CHECK-EL: xor    $3, $3, $5     # encoding: [0xa3,0x00,0x10,0x1b]
+# CHECK-EL: xori   $9, $6, 17767  # encoding: [0x26,0x71,0x67,0x45]
+# CHECK-EL: xori   $9, $6, 17767  # encoding: [0x26,0x71,0x67,0x45]
+# CHECK-EL: nor    $9, $6, $7     # encoding: [0xe6,0x00,0xd0,0x4a]
+# CHECK-EL: not    $7, $8         # encoding: [0x08,0x00,0xd0,0x3a]
+# CHECK-EL: mul    $9, $6, $7     # encoding: [0xe6,0x00,0x10,0x4a]
+# CHECK-EL: mult   $9, $7         # encoding: [0xe9,0x00,0x3c,0x8b]
+# CHECK-EL: multu  $9, $7         # encoding: [0xe9,0x00,0x3c,0x9b]
+# CHECK-EL: div    $zero, $9, $7  # encoding: [0xe9,0x00,0x3c,0xab]
+# CHECK-EL: divu   $zero, $9, $7  # encoding: [0xe9,0x00,0x3c,0xbb]
+#------------------------------------------------------------------------------
+# Big endian
+#------------------------------------------------------------------------------
+# CHECK-EB: add $9, $6, $7        # encoding: [0x00,0xe6,0x49,0x10]
+# CHECK-EB: addi  $9, $6, 17767   # encoding: [0x11,0x26,0x45,0x67]
+# CHECK-EB: addiu $9, $6, -15001  # encoding: [0x31,0x26,0xc5,0x67]
+# CHECK-EB: addi  $9, $6, 17767   # encoding: [0x11,0x26,0x45,0x67]
+# CHECK-EB: addiu $9, $6, -15001  # encoding: [0x31,0x26,0xc5,0x67]
+# CHECK-EB: addu  $9, $6, $7      # encoding: [0x00,0xe6,0x49,0x50]
+# CHECK-EB: sub $9, $6, $7        # encoding: [0x00,0xe6,0x49,0x90]
+# CHECK-EB: subu  $4, $3, $5      # encoding: [0x00,0xa3,0x21,0xd0]
+# CHECK-EB: neg $6, $7            # encoding: [0x00,0xe0,0x31,0x90]
+# CHECK-EB: negu  $6, $7          # encoding: [0x00,0xe0,0x31,0xd0]
+# CHECK-EB: move  $7, $8          # encoding: [0x00,0x08,0x39,0x50]
+# CHECK-EB: slt $3, $3, $5        # encoding: [0x00,0xa3,0x1b,0x50]
+# CHECK-EB: slti  $3, $3, 103     # encoding: [0x90,0x63,0x00,0x67]
+# CHECK-EB: slti  $3, $3, 103     # encoding: [0x90,0x63,0x00,0x67]
+# CHECK-EB: sltiu $3, $3, 103     # encoding: [0xb0,0x63,0x00,0x67]
+# CHECK-EB: sltu  $3, $3, $5      # encoding: [0x00,0xa3,0x1b,0x90]
+# CHECK-EB: lui $9, 17767         # encoding: [0x41,0xa9,0x45,0x67]
+# CHECK-EB: and $9, $6, $7        # encoding: [0x00,0xe6,0x4a,0x50]
+# CHECK-EB:  andi  $9, $6, 17767  # encoding: [0xd1,0x26,0x45,0x67]
+# CHECK-EB:  andi  $9, $6, 17767  # encoding: [0xd1,0x26,0x45,0x67]
+# CHECK-EB:  or  $3, $4, $5       # encoding: [0x00,0xa4,0x1a,0x90]
+# CHECK-EB:  ori $9, $6, 17767    # encoding: [0x51,0x26,0x45,0x67]
+# CHECK-EB:  xor $3, $3, $5       # encoding: [0x00,0xa3,0x1b,0x10]
+# CHECK-EB:  xori  $9, $6, 17767  # encoding: [0x71,0x26,0x45,0x67]
+# CHECK-EB:  xori  $9, $6, 17767  # encoding: [0x71,0x26,0x45,0x67]
+# CHECK-EB:  nor $9, $6, $7       # encoding: [0x00,0xe6,0x4a,0xd0]
+# CHECK-EB:  not $7, $8           # encoding: [0x00,0x08,0x3a,0xd0]
+# CHECK-EB:  mul $9, $6, $7       # encoding: [0x00,0xe6,0x4a,0x10]
+# CHECK-EB:  mult  $9, $7         # encoding: [0x00,0xe9,0x8b,0x3c]
+# CHECK-EB:  multu $9, $7         # encoding: [0x00,0xe9,0x9b,0x3c]
+# CHECK-EB: div  $zero, $9, $7    # encoding: [0x00,0xe9,0xab,0x3c]
+# CHECK-EB: divu $zero, $9, $7    # encoding: [0x00,0xe9,0xbb,0x3c]
     add    $9, $6, $7
     add    $9, $6, 17767
     addu   $9, $6, -15001
@@ -49,6 +90,7 @@
     slti   $3, $3, 103
     sltiu  $3, $3, 103
     sltu   $3, $3, $5
+    lui    $9, 17767
     and    $9, $6, $7
     and    $9, $6, 17767
     andi   $9, $6, 17767
@@ -62,3 +104,5 @@
     mul    $9, $6, $7
     mult   $9, $7
     multu  $9, $7
+    div    $0, $9, $7
+    divu   $0, $9, $7
diff --git a/test/MC/Mips/micromips-branch-instructions.s b/test/MC/Mips/micromips-branch-instructions.s
new file mode 100644
index 0000000..84df2a1
--- /dev/null
+++ b/test/MC/Mips/micromips-branch-instructions.s
@@ -0,0 +1,65 @@
+# RUN: llvm-mc %s -triple=mipsel -show-encoding -mattr=micromips \
+# RUN: | FileCheck %s -check-prefix=CHECK-EL
+# RUN: llvm-mc %s -triple=mips -show-encoding -mattr=micromips \
+# RUN: | FileCheck %s -check-prefix=CHECK-EB
+# Check that the assembler can handle the documented syntax
+# for arithmetic and logical instructions.
+#------------------------------------------------------------------------------
+# Branch Instructions
+#------------------------------------------------------------------------------
+# Little endian
+#------------------------------------------------------------------------------
+# CHECK-EL: b 1332               # encoding: [0x00,0x94,0x9a,0x02]
+# CHECK-EL: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: beq $9, $6, 1332     # encoding: [0xc9,0x94,0x9a,0x02]
+# CHECK-EL: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: bgez $6, 1332        # encoding: [0x46,0x40,0x9a,0x02]
+# CHECK-EL: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: bgezal $6, 1332      # encoding: [0x66,0x40,0x9a,0x02]
+# CHECK-EL: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: bltzal $6, 1332      # encoding: [0x26,0x40,0x9a,0x02]
+# CHECK-EL: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: bgtz $6, 1332        # encoding: [0xc6,0x40,0x9a,0x02]
+# CHECK-EL: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: blez $6, 1332        # encoding: [0x86,0x40,0x9a,0x02]
+# CHECK-EL: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: bne $9, $6, 1332     # encoding: [0xc9,0xb4,0x9a,0x02]
+# CHECK-EL: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: bal 1332             # encoding: [0x60,0x40,0x9a,0x02]
+# CHECK-EL: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: bltz $6, 1332        # encoding: [0x06,0x40,0x9a,0x02]
+# CHECK-EL: nop                  # encoding: [0x00,0x00,0x00,0x00]
+#------------------------------------------------------------------------------
+# Big endian
+#------------------------------------------------------------------------------
+# CHECK-EB: b 1332               # encoding: [0x94,0x00,0x02,0x9a]
+# CHECK-EB: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: beq $9, $6, 1332     # encoding: [0x94,0xc9,0x02,0x9a]
+# CHECK-EB: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: bgez $6, 1332        # encoding: [0x40,0x46,0x02,0x9a]
+# CHECK-EB: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: bgezal $6, 1332      # encoding: [0x40,0x66,0x02,0x9a]
+# CHECK-EB: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: bltzal $6, 1332      # encoding: [0x40,0x26,0x02,0x9a]
+# CHECK-EB: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: bgtz $6, 1332        # encoding: [0x40,0xc6,0x02,0x9a]
+# CHECK-EB: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: blez $6, 1332        # encoding: [0x40,0x86,0x02,0x9a]
+# CHECK-EB: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: bne $9, $6, 1332     # encoding: [0xb4,0xc9,0x02,0x9a]
+# CHECK-EB: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: bal 1332             # encoding: [0x40,0x60,0x02,0x9a]
+# CHECK-EB: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: bltz $6, 1332        # encoding: [0x40,0x06,0x02,0x9a]
+# CHECK-EB: nop                  # encoding: [0x00,0x00,0x00,0x00]
+
+     b      1332
+     beq    $9,$6,1332
+     bgez   $6,1332
+     bgezal $6,1332
+     bltzal $6,1332
+     bgtz   $6,1332
+     blez   $6,1332
+     bne    $9,$6,1332
+     bal    1332
+     bltz   $6,1332
diff --git a/test/MC/Mips/micromips-branch16.s b/test/MC/Mips/micromips-branch16.s
new file mode 100644
index 0000000..321ee86
--- /dev/null
+++ b/test/MC/Mips/micromips-branch16.s
@@ -0,0 +1,69 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding \
+# RUN: -mattr=micromips | FileCheck %s -check-prefix=CHECK-FIXUP
+# RUN: llvm-mc %s -filetype=obj -triple=mipsel-unknown-linux \
+# RUN: -mattr=micromips | llvm-readobj -r \
+# RUN: | FileCheck %s -check-prefix=CHECK-ELF
+#------------------------------------------------------------------------------
+# Check that the assembler can handle the documented syntax
+# for relocations.
+#------------------------------------------------------------------------------
+# CHECK-FIXUP: b           bar # encoding: [A,0x94'A',0x00,0x00]
+# CHECK-FIXUP:                 #   fixup A - offset: 0,
+# CHECK-FIXUP:                     value: bar, kind: fixup_MICROMIPS_PC16_S1
+# CHECK-FIXUP: nop             # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-FIXUP: beq $3, $4, bar # encoding: [0x83'A',0x94'A',0x00,0x00]
+# CHECK-FIXUP:                 #   fixup A - offset: 0,
+# CHECK-FIXUP:                     value: bar, kind: fixup_MICROMIPS_PC16_S1
+# CHECK-FIXUP: nop             # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-FIXUP: bne $3, $4, bar # encoding: [0x83'A',0xb4'A',0x00,0x00]
+# CHECK-FIXUP:                 #   fixup A - offset: 0,
+# CHECK-FIXUP:                     value: bar, kind: fixup_MICROMIPS_PC16_S1
+# CHECK-FIXUP: nop             # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-FIXUP: bgez    $4, bar # encoding: [0x44'A',0x40'A',0x00,0x00]
+# CHECK-FIXUP:                 #   fixup A - offset: 0,
+# CHECK-FIXUP:                     value: bar, kind: fixup_MICROMIPS_PC16_S1
+# CHECK-FIXUP: nop             # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-FIXUP: bgtz    $4, bar # encoding: [0xc4'A',0x40'A',0x00,0x00]
+# CHECK-FIXUP:                 #   fixup A - offset: 0,
+# CHECK-FIXUP:                     value: bar, kind: fixup_MICROMIPS_PC16_S1
+# CHECK-FIXUP: nop             # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-FIXUP: blez    $4, bar # encoding: [0x84'A',0x40'A',0x00,0x00]
+# CHECK-FIXUP:                 #   fixup A - offset: 0,
+# CHECK-FIXUP:                     value: bar, kind: fixup_MICROMIPS_PC16_S1
+# CHECK-FIXUP: nop             # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-FIXUP: bltz    $4, bar # encoding: [0x04'A',0x40'A',0x00,0x00]
+# CHECK-FIXUP:                 #   fixup A - offset: 0,
+# CHECK-FIXUP:                     value: bar, kind: fixup_MICROMIPS_PC16_S1
+# CHECK-FIXUP: nop             # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-FIXUP: bgezal  $4, bar # encoding: [0x64'A',0x40'A',0x00,0x00]
+# CHECK-FIXUP:                 #   fixup A - offset: 0,
+# CHECK-FIXUP:                     value: bar, kind: fixup_MICROMIPS_PC16_S1
+# CHECK-FIXUP: nop             # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-FIXUP: bltzal  $4, bar # encoding: [0x24'A',0x40'A',0x00,0x00]
+# CHECK-FIXUP:                 #   fixup A - offset: 0,
+# CHECK-FIXUP:                     value: bar, kind: fixup_MICROMIPS_PC16_S1
+# CHECK-FIXUP: nop             # encoding: [0x00,0x00,0x00,0x00]
+#------------------------------------------------------------------------------
+# Check that the appropriate relocations were created.
+#------------------------------------------------------------------------------
+# CHECK-ELF: Relocations [
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_PC16_S1
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_PC16_S1
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_PC16_S1
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_PC16_S1
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_PC16_S1
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_PC16_S1
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_PC16_S1
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_PC16_S1
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_PC16_S1
+# CHECK-ELF: ]
+
+  b       bar
+  beq     $3, $4, bar
+  bne     $3, $4, bar
+  bgez    $4, bar
+  bgtz    $4, bar
+  blez    $4, bar
+  bltz    $4, bar
+  bgezal  $4, bar
+  bltzal  $4, bar
diff --git a/test/MC/Mips/micromips-expansions.s b/test/MC/Mips/micromips-expansions.s
new file mode 100644
index 0000000..af4d3b5
--- /dev/null
+++ b/test/MC/Mips/micromips-expansions.s
@@ -0,0 +1,57 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding \
+# RUN: -mcpu=mips32r2 -mattr=micromips | FileCheck %s
+# Check that the assembler can handle the documented syntax
+# for macro instructions
+#------------------------------------------------------------------------------
+# Load immediate instructions
+#------------------------------------------------------------------------------
+# CHECK: ori   $5, $zero, 123        # encoding: [0xa0,0x50,0x7b,0x00]
+# CHECK: addiu $6, $zero, -2345      # encoding: [0xc0,0x30,0xd7,0xf6]
+# CHECK: lui   $7, 1                 # encoding: [0xa7,0x41,0x01,0x00]
+# CHECK: ori   $7, $7, 2             # encoding: [0xe7,0x50,0x02,0x00]
+# CHECK: addiu $4, $zero, 20         # encoding: [0x80,0x30,0x14,0x00]
+# CHECK: lui   $7, 1                 # encoding: [0xa7,0x41,0x01,0x00]
+# CHECK: ori   $7, $7, 2             # encoding: [0xe7,0x50,0x02,0x00]
+# CHECK: addiu $4, $5, 20            # encoding: [0x85,0x30,0x14,0x00]
+# CHECK: lui   $7, 1                 # encoding: [0xa7,0x41,0x01,0x00]
+# CHECK: ori   $7, $7, 2             # encoding: [0xe7,0x50,0x02,0x00]
+# CHECK: addu  $7, $7, $8            # encoding: [0x07,0x01,0x50,0x39]
+# CHECK: lui   $10, %hi(symbol)      # encoding: [0xaa'A',0x41'A',0x00,0x00]
+# CHECK:                             # fixup A - offset: 0,
+# CHECK:                               value: symbol@ABS_HI,
+# CHECK:                               kind: fixup_MICROMIPS_HI16
+# CHECK: addu  $10, $10, $4          # encoding: [0x8a,0x00,0x50,0x51]
+# CHECK: lw    $10, %lo(symbol)($10) # encoding: [0x4a'A',0xfd'A',0x00,0x00]
+# CHECK:                             # fixup A - offset: 0,
+# CHECK:                               value: symbol@ABS_LO,
+# CHECK:                               kind: fixup_MICROMIPS_LO16
+# CHECK: lui   $1, %hi(symbol)       # encoding: [0xa1'A',0x41'A',0x00,0x00]
+# CHECK:                             # fixup A - offset: 0,
+# CHECK:                               value: symbol@ABS_HI,
+# CHECK:                               kind: fixup_MICROMIPS_HI16
+# CHECK: addu  $1, $1, $9            # encoding: [0x21,0x01,0x50,0x09]
+# CHECK: sw    $10, %lo(symbol)($1)  # encoding: [0x41'A',0xf9'A',0x00,0x00]
+# CHECK:                             # fixup A - offset: 0,
+# CHECK:                               value: symbol@ABS_LO,
+# CHECK:                               kind: fixup_MICROMIPS_LO16
+# CHECK: lui   $10, 10               # encoding: [0xaa,0x41,0x0a,0x00]
+# CHECK: addu  $10, $10, $4          # encoding: [0x8a,0x00,0x50,0x51]
+# CHECK: lw    $10, 123($10)         # encoding: [0x4a,0xfd,0x7b,0x00]
+# CHECK: lui   $1, 2                 # encoding: [0xa1,0x41,0x02,0x00]
+# CHECK: addu  $1, $1, $9            # encoding: [0x21,0x01,0x50,0x09]
+# CHECK: sw    $10, 57920($1)        # encoding: [0x41,0xf9,0x40,0xe2]
+
+    li $5,123
+    li $6,-2345
+    li $7,65538
+
+    la $a0, 20
+    la $7,65538
+    la $a0, 20($a1)
+    la $7,65538($8)
+
+    lw  $t2, symbol($a0)
+    sw  $t2, symbol($t1)
+
+    lw  $t2, 655483($a0)
+    sw  $t2, 123456($t1)
diff --git a/test/MC/Mips/micromips-expressions.s b/test/MC/Mips/micromips-expressions.s
new file mode 100644
index 0000000..509e980
--- /dev/null
+++ b/test/MC/Mips/micromips-expressions.s
@@ -0,0 +1,35 @@
+# RUN: llvm-mc  %s -triple=mipsel-unknown-linux -show-encoding \
+# RUN: -mcpu=mips32r2 -mattr=micromips | FileCheck %s
+# Check that the assembler can handle the expressions as operands.
+# CHECK:  .text
+# CHECK:  .globl  foo
+# CHECK:  foo:
+# CHECK:  lw   $4, %lo(foo)($4)    # encoding: [0x84'A',0xfc'A',0x00,0x00]
+# CHECK:                           # fixup A - offset: 0,
+# CHECK:                             value: foo@ABS_LO,
+# CHECK:                             kind: fixup_MICROMIPS_LO16
+# CHECK:  lw   $4, 56($4)          # encoding: [0x84,0xfc,0x38,0x00]
+# CHECK:  lw   $4, %lo(foo+8)($4)  # encoding: [0x84'A',0xfc'A',0x08,0x00]
+# CHECK:                           # fixup A - offset: 0,
+# CHECK:                             value: foo@ABS_LO,
+# CHECK:                             kind: fixup_MICROMIPS_LO16
+# CHECK:  lw   $4, %lo(foo+8)($4)  # encoding: [0x84'A',0xfc'A',0x08,0x00]
+# CHECK:                           # fixup A - offset: 0,
+# CHECK:                             value: foo@ABS_LO,
+# CHECK:                             kind: fixup_MICROMIPS_LO16
+# CHECK:  lw   $4, %lo(foo+8)($4)  # encoding: [0x84'A',0xfc'A',0x08,0x00]
+# CHECK:                           # fixup A - offset: 0,
+# CHECK:                             value: foo@ABS_LO,
+# CHECK:                             kind: fixup_MICROMIPS_LO16
+# CHECK:  .space  64
+
+  .globl  foo
+  .ent  foo
+foo:
+  lw  $4,%lo(foo)($4)
+  lw  $4,((10 + 4) * 4)($4)
+  lw  $4,%lo (2 * 4) + foo($4)
+  lw  $4,%lo((2 * 4) + foo)($4)
+  lw  $4,(((%lo ((2 * 4) + foo))))($4)
+  .space  64
+  .end  foo
diff --git a/test/MC/Mips/micromips-jump-instructions.s b/test/MC/Mips/micromips-jump-instructions.s
new file mode 100644
index 0000000..6f571b6
--- /dev/null
+++ b/test/MC/Mips/micromips-jump-instructions.s
@@ -0,0 +1,40 @@
+# RUN: llvm-mc %s -triple=mipsel -show-encoding -mattr=micromips \
+# RUN: | FileCheck %s -check-prefix=CHECK-EL
+# RUN: llvm-mc %s -triple=mips -show-encoding -mattr=micromips \
+# RUN: | FileCheck %s -check-prefix=CHECK-EB
+# Check that the assembler can handle the documented syntax
+# for jump and branch instructions.
+#------------------------------------------------------------------------------
+# Jump instructions
+#------------------------------------------------------------------------------
+# Little endian
+#------------------------------------------------------------------------------
+# CHECK-EL: j 1328      # encoding: [0x00,0xd4,0x98,0x02]
+# CHECK-EL: nop         # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: jal 1328    # encoding: [0x00,0xf4,0x98,0x02]
+# CHECK-EL: nop         # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: jalr $6     # encoding: [0xe6,0x03,0x3c,0x0f]
+# CHECK-EL: nop         # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: jr $7       # encoding: [0x07,0x00,0x3c,0x0f]
+# CHECK-EL: nop         # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: jr $7       # encoding: [0x07,0x00,0x3c,0x0f]
+# CHECK-EL: nop         # encoding: [0x00,0x00,0x00,0x00]
+#------------------------------------------------------------------------------
+# Big endian
+#------------------------------------------------------------------------------
+# CHECK-EB: j 1328      # encoding: [0xd4,0x00,0x02,0x98]
+# CHECK-EB: nop         # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: jal 1328    # encoding: [0xf4,0x00,0x02,0x98]
+# CHECK-EB: nop         # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: jalr $6     # encoding: [0x03,0xe6,0x0f,0x3c]
+# CHECK-EB: nop         # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: jr $7       # encoding: [0x00,0x07,0x0f,0x3c]
+# CHECK-EB: nop         # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: jr $7       # encoding: [0x00,0x07,0x0f,0x3c]
+# CHECK-EB: nop         # encoding: [0x00,0x00,0x00,0x00]
+
+     j 1328
+     jal 1328
+     jalr $6
+     jr $7
+     j $7
diff --git a/test/MC/Mips/micromips-jump26.s b/test/MC/Mips/micromips-jump26.s
new file mode 100644
index 0000000..936a998
--- /dev/null
+++ b/test/MC/Mips/micromips-jump26.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding \
+# RUN: -mattr=micromips | FileCheck %s -check-prefix=CHECK-FIXUP
+# RUN: llvm-mc %s -filetype=obj -triple=mipsel-unknown-linux \
+# RUN: -mattr=micromips | llvm-readobj -r \
+# RUN: | FileCheck %s -check-prefix=CHECK-ELF
+#------------------------------------------------------------------------------
+# Check that the assembler can handle the documented syntax
+# for relocations.
+#------------------------------------------------------------------------------
+# CHECK-FIXUP: foo:
+# CHECK-FIXUP:   jal bar # encoding: [A,0xf4'A',A,0b000000AA]
+# CHECK-FIXUP:           #   fixup A - offset: 0,
+# CHECK-FIXUP:               value: bar, kind: fixup_MICROMIPS_26_S1
+# CHECK-FIXUP:   nop     # encoding: [0x00,0x00,0x00,0x00]
+#------------------------------------------------------------------------------
+# Check that the appropriate relocations were created.
+#------------------------------------------------------------------------------
+# CHECK-ELF: Relocations [
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_26_S1
+# CHECK-ELF: ]
+
+foo:
+  jal bar
diff --git a/test/MC/Mips/micromips-loadstore-instructions.s b/test/MC/Mips/micromips-loadstore-instructions.s
index 623e2ac..cc7514b 100644
--- a/test/MC/Mips/micromips-loadstore-instructions.s
+++ b/test/MC/Mips/micromips-loadstore-instructions.s
@@ -1,17 +1,31 @@
-# RUN: llvm-mc %s -triple=mipsel -show-encoding -mattr=micromips | FileCheck %s
+# RUN: llvm-mc %s -triple=mipsel -show-encoding -mattr=micromips | FileCheck -check-prefix=CHECK-EL %s
+# RUN: llvm-mc %s -triple=mips -show-encoding -mattr=micromips | FileCheck -check-prefix=CHECK-EB %s
 # Check that the assembler can handle the documented syntax
 # for load and store instructions.
 #------------------------------------------------------------------------------
 # Load and Store Instructions
 #------------------------------------------------------------------------------
-# CHECK: lb     $5, 8($4)      # encoding: [0x08,0x00,0xa4,0x1c]
-# CHECK: lbu    $6, 8($4)      # encoding: [0x08,0x00,0xc4,0x14]
-# CHECK: lh     $2, 8($4)      # encoding: [0x08,0x00,0x44,0x3c]
-# CHECK: lhu    $4, 8($2)      # encoding: [0x08,0x00,0x82,0x34]
-# CHECK: lw     $6, 4($5)      # encoding: [0x04,0x00,0xc5,0xfc]
-# CHECK: sb     $5, 8($4)      # encoding: [0x08,0x00,0xa4,0x18]
-# CHECK: sh     $2, 8($4)      # encoding: [0x08,0x00,0x44,0x38]
-# CHECK: sw     $5, 4($6)      # encoding: [0x04,0x00,0xa6,0xf8]
+# Little endian
+#------------------------------------------------------------------------------
+# CHECK-EL: lb     $5, 8($4)      # encoding: [0xa4,0x1c,0x08,0x00]
+# CHECK-EL: lbu    $6, 8($4)      # encoding: [0xc4,0x14,0x08,0x00]
+# CHECK-EL: lh     $2, 8($4)      # encoding: [0x44,0x3c,0x08,0x00]
+# CHECK-EL: lhu    $4, 8($2)      # encoding: [0x82,0x34,0x08,0x00]
+# CHECK-EL: lw     $6, 4($5)      # encoding: [0xc5,0xfc,0x04,0x00]
+# CHECK-EL: sb     $5, 8($4)      # encoding: [0xa4,0x18,0x08,0x00]
+# CHECK-EL: sh     $2, 8($4)      # encoding: [0x44,0x38,0x08,0x00]
+# CHECK-EL: sw     $5, 4($6)      # encoding: [0xa6,0xf8,0x04,0x00]
+#------------------------------------------------------------------------------
+# Big endian
+#------------------------------------------------------------------------------
+# CHECK-EB: lb     $5, 8($4)      # encoding: [0x1c,0xa4,0x00,0x08]
+# CHECK-EB: lbu    $6, 8($4)      # encoding: [0x14,0xc4,0x00,0x08]
+# CHECK-EB: lh     $2, 8($4)      # encoding: [0x3c,0x44,0x00,0x08]
+# CHECK-EB: lhu    $4, 8($2)      # encoding: [0x34,0x82,0x00,0x08]
+# CHECK-EB: lw     $6, 4($5)      # encoding: [0xfc,0xc5,0x00,0x04]
+# CHECK-EB: sb     $5, 8($4)      # encoding: [0x18,0xa4,0x00,0x08]
+# CHECK-EB: sh     $2, 8($4)      # encoding: [0x38,0x44,0x00,0x08]
+# CHECK-EB: sw     $5, 4($6)      # encoding: [0xf8,0xa6,0x00,0x04]
      lb     $5, 8($4)
      lbu    $6, 8($4)
      lh     $2, 8($4)
diff --git a/test/MC/Mips/micromips-loadstore-unaligned.s b/test/MC/Mips/micromips-loadstore-unaligned.s
new file mode 100644
index 0000000..ab1d8b9
--- /dev/null
+++ b/test/MC/Mips/micromips-loadstore-unaligned.s
@@ -0,0 +1,26 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding \
+# RUN: -mattr=micromips | FileCheck -check-prefix=CHECK-EL %s
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding \
+# RUN: -mattr=micromips | FileCheck -check-prefix=CHECK-EB %s
+# Check that the assembler can handle the documented syntax
+# for loads and stores.
+#------------------------------------------------------------------------------
+# Load and Store unaligned instructions
+#------------------------------------------------------------------------------
+# Little endian
+#------------------------------------------------------------------------------
+# CHECK-EL: lwl $4, 16($5)   # encoding: [0x85,0x60,0x10,0x00]
+# CHECK-EL: lwr $4, 16($5)   # encoding: [0x85,0x60,0x10,0x10]
+# CHECK-EL: swl $4, 16($5)   # encoding: [0x85,0x60,0x10,0x80]
+# CHECK-EL: swr $4, 16($5)   # encoding: [0x85,0x60,0x10,0x90]
+#------------------------------------------------------------------------------
+# Big endian
+#------------------------------------------------------------------------------
+# CHECK-EB: lwl $4, 16($5)   # encoding: [0x60,0x85,0x00,0x10]
+# CHECK-EB: lwr $4, 16($5)   # encoding: [0x60,0x85,0x10,0x10]
+# CHECK-EB: swl $4, 16($5)   # encoding: [0x60,0x85,0x80,0x10]
+# CHECK-EB: swr $4, 16($5)   # encoding: [0x60,0x85,0x90,0x10]
+     lwl  $4, 16($5)
+     lwr  $4, 16($5)
+     swl  $4, 16($5)
+     swr  $4, 16($5)
diff --git a/test/MC/Mips/micromips-long-branch.ll b/test/MC/Mips/micromips-long-branch.ll
new file mode 100644
index 0000000..3267f4a
--- /dev/null
+++ b/test/MC/Mips/micromips-long-branch.ll
@@ -0,0 +1,16437 @@
+; RUN: llc %s -march=mipsel -mcpu=mips32r2 -mattr=micromips -filetype=asm \
+; RUN: -relocation-model=pic -O3 -o - | FileCheck %s
+
+@a = common global [10 x i32] zeroinitializer, align 16
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 10
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK:  addiu $sp, $sp, -8
+; CHECK:  sw  $ra, 0($sp)
+; CHECK:  lui $[[REG1:[0-9]+]], 2
+; CHECK:  addiu $[[REG1]], $[[REG1]], 0
+; CHECK:  addu  $[[REG1]], $ra, $[[REG1]]
+; CHECK:  lw  $ra, 0($sp)
+; CHECK:  jr  $[[REG1]]
+; CHECK:  addiu $sp, $sp, 8
+
+for.body:
+  %1 = load i32* %i, align 4
+  %2 = load i32* %i, align 4
+  %idxprom = sext i32 %2 to i64
+  %arrayidx = getelementptr inbounds [10 x i32]* @a, i32 0, i64 %idxprom
+  store i32 %1, i32* %arrayidx, align 4  %nop0 = alloca i1, i1 0
+  %nop1 = alloca i1, i1 0
+  %nop2 = alloca i1, i1 0
+  %nop3 = alloca i1, i1 0
+  %nop4 = alloca i1, i1 0
+  %nop5 = alloca i1, i1 0
+  %nop6 = alloca i1, i1 0
+  %nop7 = alloca i1, i1 0
+  %nop8 = alloca i1, i1 0
+  %nop9 = alloca i1, i1 0
+  %nop10 = alloca i1, i1 0
+  %nop11 = alloca i1, i1 0
+  %nop12 = alloca i1, i1 0
+  %nop13 = alloca i1, i1 0
+  %nop14 = alloca i1, i1 0
+  %nop15 = alloca i1, i1 0
+  %nop16 = alloca i1, i1 0
+  %nop17 = alloca i1, i1 0
+  %nop18 = alloca i1, i1 0
+  %nop19 = alloca i1, i1 0
+  %nop20 = alloca i1, i1 0
+  %nop21 = alloca i1, i1 0
+  %nop22 = alloca i1, i1 0
+  %nop23 = alloca i1, i1 0
+  %nop24 = alloca i1, i1 0
+  %nop25 = alloca i1, i1 0
+  %nop26 = alloca i1, i1 0
+  %nop27 = alloca i1, i1 0
+  %nop28 = alloca i1, i1 0
+  %nop29 = alloca i1, i1 0
+  %nop30 = alloca i1, i1 0
+  %nop31 = alloca i1, i1 0
+  %nop32 = alloca i1, i1 0
+  %nop33 = alloca i1, i1 0
+  %nop34 = alloca i1, i1 0
+  %nop35 = alloca i1, i1 0
+  %nop36 = alloca i1, i1 0
+  %nop37 = alloca i1, i1 0
+  %nop38 = alloca i1, i1 0
+  %nop39 = alloca i1, i1 0
+  %nop40 = alloca i1, i1 0
+  %nop41 = alloca i1, i1 0
+  %nop42 = alloca i1, i1 0
+  %nop43 = alloca i1, i1 0
+  %nop44 = alloca i1, i1 0
+  %nop45 = alloca i1, i1 0
+  %nop46 = alloca i1, i1 0
+  %nop47 = alloca i1, i1 0
+  %nop48 = alloca i1, i1 0
+  %nop49 = alloca i1, i1 0
+  %nop50 = alloca i1, i1 0
+  %nop51 = alloca i1, i1 0
+  %nop52 = alloca i1, i1 0
+  %nop53 = alloca i1, i1 0
+  %nop54 = alloca i1, i1 0
+  %nop55 = alloca i1, i1 0
+  %nop56 = alloca i1, i1 0
+  %nop57 = alloca i1, i1 0
+  %nop58 = alloca i1, i1 0
+  %nop59 = alloca i1, i1 0
+  %nop60 = alloca i1, i1 0
+  %nop61 = alloca i1, i1 0
+  %nop62 = alloca i1, i1 0
+  %nop63 = alloca i1, i1 0
+  %nop64 = alloca i1, i1 0
+  %nop65 = alloca i1, i1 0
+  %nop66 = alloca i1, i1 0
+  %nop67 = alloca i1, i1 0
+  %nop68 = alloca i1, i1 0
+  %nop69 = alloca i1, i1 0
+  %nop70 = alloca i1, i1 0
+  %nop71 = alloca i1, i1 0
+  %nop72 = alloca i1, i1 0
+  %nop73 = alloca i1, i1 0
+  %nop74 = alloca i1, i1 0
+  %nop75 = alloca i1, i1 0
+  %nop76 = alloca i1, i1 0
+  %nop77 = alloca i1, i1 0
+  %nop78 = alloca i1, i1 0
+  %nop79 = alloca i1, i1 0
+  %nop80 = alloca i1, i1 0
+  %nop81 = alloca i1, i1 0
+  %nop82 = alloca i1, i1 0
+  %nop83 = alloca i1, i1 0
+  %nop84 = alloca i1, i1 0
+  %nop85 = alloca i1, i1 0
+  %nop86 = alloca i1, i1 0
+  %nop87 = alloca i1, i1 0
+  %nop88 = alloca i1, i1 0
+  %nop89 = alloca i1, i1 0
+  %nop90 = alloca i1, i1 0
+  %nop91 = alloca i1, i1 0
+  %nop92 = alloca i1, i1 0
+  %nop93 = alloca i1, i1 0
+  %nop94 = alloca i1, i1 0
+  %nop95 = alloca i1, i1 0
+  %nop96 = alloca i1, i1 0
+  %nop97 = alloca i1, i1 0
+  %nop98 = alloca i1, i1 0
+  %nop99 = alloca i1, i1 0
+  %nop100 = alloca i1, i1 0
+  %nop101 = alloca i1, i1 0
+  %nop102 = alloca i1, i1 0
+  %nop103 = alloca i1, i1 0
+  %nop104 = alloca i1, i1 0
+  %nop105 = alloca i1, i1 0
+  %nop106 = alloca i1, i1 0
+  %nop107 = alloca i1, i1 0
+  %nop108 = alloca i1, i1 0
+  %nop109 = alloca i1, i1 0
+  %nop110 = alloca i1, i1 0
+  %nop111 = alloca i1, i1 0
+  %nop112 = alloca i1, i1 0
+  %nop113 = alloca i1, i1 0
+  %nop114 = alloca i1, i1 0
+  %nop115 = alloca i1, i1 0
+  %nop116 = alloca i1, i1 0
+  %nop117 = alloca i1, i1 0
+  %nop118 = alloca i1, i1 0
+  %nop119 = alloca i1, i1 0
+  %nop120 = alloca i1, i1 0
+  %nop121 = alloca i1, i1 0
+  %nop122 = alloca i1, i1 0
+  %nop123 = alloca i1, i1 0
+  %nop124 = alloca i1, i1 0
+  %nop125 = alloca i1, i1 0
+  %nop126 = alloca i1, i1 0
+  %nop127 = alloca i1, i1 0
+  %nop128 = alloca i1, i1 0
+  %nop129 = alloca i1, i1 0
+  %nop130 = alloca i1, i1 0
+  %nop131 = alloca i1, i1 0
+  %nop132 = alloca i1, i1 0
+  %nop133 = alloca i1, i1 0
+  %nop134 = alloca i1, i1 0
+  %nop135 = alloca i1, i1 0
+  %nop136 = alloca i1, i1 0
+  %nop137 = alloca i1, i1 0
+  %nop138 = alloca i1, i1 0
+  %nop139 = alloca i1, i1 0
+  %nop140 = alloca i1, i1 0
+  %nop141 = alloca i1, i1 0
+  %nop142 = alloca i1, i1 0
+  %nop143 = alloca i1, i1 0
+  %nop144 = alloca i1, i1 0
+  %nop145 = alloca i1, i1 0
+  %nop146 = alloca i1, i1 0
+  %nop147 = alloca i1, i1 0
+  %nop148 = alloca i1, i1 0
+  %nop149 = alloca i1, i1 0
+  %nop150 = alloca i1, i1 0
+  %nop151 = alloca i1, i1 0
+  %nop152 = alloca i1, i1 0
+  %nop153 = alloca i1, i1 0
+  %nop154 = alloca i1, i1 0
+  %nop155 = alloca i1, i1 0
+  %nop156 = alloca i1, i1 0
+  %nop157 = alloca i1, i1 0
+  %nop158 = alloca i1, i1 0
+  %nop159 = alloca i1, i1 0
+  %nop160 = alloca i1, i1 0
+  %nop161 = alloca i1, i1 0
+  %nop162 = alloca i1, i1 0
+  %nop163 = alloca i1, i1 0
+  %nop164 = alloca i1, i1 0
+  %nop165 = alloca i1, i1 0
+  %nop166 = alloca i1, i1 0
+  %nop167 = alloca i1, i1 0
+  %nop168 = alloca i1, i1 0
+  %nop169 = alloca i1, i1 0
+  %nop170 = alloca i1, i1 0
+  %nop171 = alloca i1, i1 0
+  %nop172 = alloca i1, i1 0
+  %nop173 = alloca i1, i1 0
+  %nop174 = alloca i1, i1 0
+  %nop175 = alloca i1, i1 0
+  %nop176 = alloca i1, i1 0
+  %nop177 = alloca i1, i1 0
+  %nop178 = alloca i1, i1 0
+  %nop179 = alloca i1, i1 0
+  %nop180 = alloca i1, i1 0
+  %nop181 = alloca i1, i1 0
+  %nop182 = alloca i1, i1 0
+  %nop183 = alloca i1, i1 0
+  %nop184 = alloca i1, i1 0
+  %nop185 = alloca i1, i1 0
+  %nop186 = alloca i1, i1 0
+  %nop187 = alloca i1, i1 0
+  %nop188 = alloca i1, i1 0
+  %nop189 = alloca i1, i1 0
+  %nop190 = alloca i1, i1 0
+  %nop191 = alloca i1, i1 0
+  %nop192 = alloca i1, i1 0
+  %nop193 = alloca i1, i1 0
+  %nop194 = alloca i1, i1 0
+  %nop195 = alloca i1, i1 0
+  %nop196 = alloca i1, i1 0
+  %nop197 = alloca i1, i1 0
+  %nop198 = alloca i1, i1 0
+  %nop199 = alloca i1, i1 0
+  %nop200 = alloca i1, i1 0
+  %nop201 = alloca i1, i1 0
+  %nop202 = alloca i1, i1 0
+  %nop203 = alloca i1, i1 0
+  %nop204 = alloca i1, i1 0
+  %nop205 = alloca i1, i1 0
+  %nop206 = alloca i1, i1 0
+  %nop207 = alloca i1, i1 0
+  %nop208 = alloca i1, i1 0
+  %nop209 = alloca i1, i1 0
+  %nop210 = alloca i1, i1 0
+  %nop211 = alloca i1, i1 0
+  %nop212 = alloca i1, i1 0
+  %nop213 = alloca i1, i1 0
+  %nop214 = alloca i1, i1 0
+  %nop215 = alloca i1, i1 0
+  %nop216 = alloca i1, i1 0
+  %nop217 = alloca i1, i1 0
+  %nop218 = alloca i1, i1 0
+  %nop219 = alloca i1, i1 0
+  %nop220 = alloca i1, i1 0
+  %nop221 = alloca i1, i1 0
+  %nop222 = alloca i1, i1 0
+  %nop223 = alloca i1, i1 0
+  %nop224 = alloca i1, i1 0
+  %nop225 = alloca i1, i1 0
+  %nop226 = alloca i1, i1 0
+  %nop227 = alloca i1, i1 0
+  %nop228 = alloca i1, i1 0
+  %nop229 = alloca i1, i1 0
+  %nop230 = alloca i1, i1 0
+  %nop231 = alloca i1, i1 0
+  %nop232 = alloca i1, i1 0
+  %nop233 = alloca i1, i1 0
+  %nop234 = alloca i1, i1 0
+  %nop235 = alloca i1, i1 0
+  %nop236 = alloca i1, i1 0
+  %nop237 = alloca i1, i1 0
+  %nop238 = alloca i1, i1 0
+  %nop239 = alloca i1, i1 0
+  %nop240 = alloca i1, i1 0
+  %nop241 = alloca i1, i1 0
+  %nop242 = alloca i1, i1 0
+  %nop243 = alloca i1, i1 0
+  %nop244 = alloca i1, i1 0
+  %nop245 = alloca i1, i1 0
+  %nop246 = alloca i1, i1 0
+  %nop247 = alloca i1, i1 0
+  %nop248 = alloca i1, i1 0
+  %nop249 = alloca i1, i1 0
+  %nop250 = alloca i1, i1 0
+  %nop251 = alloca i1, i1 0
+  %nop252 = alloca i1, i1 0
+  %nop253 = alloca i1, i1 0
+  %nop254 = alloca i1, i1 0
+  %nop255 = alloca i1, i1 0
+  %nop256 = alloca i1, i1 0
+  %nop257 = alloca i1, i1 0
+  %nop258 = alloca i1, i1 0
+  %nop259 = alloca i1, i1 0
+  %nop260 = alloca i1, i1 0
+  %nop261 = alloca i1, i1 0
+  %nop262 = alloca i1, i1 0
+  %nop263 = alloca i1, i1 0
+  %nop264 = alloca i1, i1 0
+  %nop265 = alloca i1, i1 0
+  %nop266 = alloca i1, i1 0
+  %nop267 = alloca i1, i1 0
+  %nop268 = alloca i1, i1 0
+  %nop269 = alloca i1, i1 0
+  %nop270 = alloca i1, i1 0
+  %nop271 = alloca i1, i1 0
+  %nop272 = alloca i1, i1 0
+  %nop273 = alloca i1, i1 0
+  %nop274 = alloca i1, i1 0
+  %nop275 = alloca i1, i1 0
+  %nop276 = alloca i1, i1 0
+  %nop277 = alloca i1, i1 0
+  %nop278 = alloca i1, i1 0
+  %nop279 = alloca i1, i1 0
+  %nop280 = alloca i1, i1 0
+  %nop281 = alloca i1, i1 0
+  %nop282 = alloca i1, i1 0
+  %nop283 = alloca i1, i1 0
+  %nop284 = alloca i1, i1 0
+  %nop285 = alloca i1, i1 0
+  %nop286 = alloca i1, i1 0
+  %nop287 = alloca i1, i1 0
+  %nop288 = alloca i1, i1 0
+  %nop289 = alloca i1, i1 0
+  %nop290 = alloca i1, i1 0
+  %nop291 = alloca i1, i1 0
+  %nop292 = alloca i1, i1 0
+  %nop293 = alloca i1, i1 0
+  %nop294 = alloca i1, i1 0
+  %nop295 = alloca i1, i1 0
+  %nop296 = alloca i1, i1 0
+  %nop297 = alloca i1, i1 0
+  %nop298 = alloca i1, i1 0
+  %nop299 = alloca i1, i1 0
+  %nop300 = alloca i1, i1 0
+  %nop301 = alloca i1, i1 0
+  %nop302 = alloca i1, i1 0
+  %nop303 = alloca i1, i1 0
+  %nop304 = alloca i1, i1 0
+  %nop305 = alloca i1, i1 0
+  %nop306 = alloca i1, i1 0
+  %nop307 = alloca i1, i1 0
+  %nop308 = alloca i1, i1 0
+  %nop309 = alloca i1, i1 0
+  %nop310 = alloca i1, i1 0
+  %nop311 = alloca i1, i1 0
+  %nop312 = alloca i1, i1 0
+  %nop313 = alloca i1, i1 0
+  %nop314 = alloca i1, i1 0
+  %nop315 = alloca i1, i1 0
+  %nop316 = alloca i1, i1 0
+  %nop317 = alloca i1, i1 0
+  %nop318 = alloca i1, i1 0
+  %nop319 = alloca i1, i1 0
+  %nop320 = alloca i1, i1 0
+  %nop321 = alloca i1, i1 0
+  %nop322 = alloca i1, i1 0
+  %nop323 = alloca i1, i1 0
+  %nop324 = alloca i1, i1 0
+  %nop325 = alloca i1, i1 0
+  %nop326 = alloca i1, i1 0
+  %nop327 = alloca i1, i1 0
+  %nop328 = alloca i1, i1 0
+  %nop329 = alloca i1, i1 0
+  %nop330 = alloca i1, i1 0
+  %nop331 = alloca i1, i1 0
+  %nop332 = alloca i1, i1 0
+  %nop333 = alloca i1, i1 0
+  %nop334 = alloca i1, i1 0
+  %nop335 = alloca i1, i1 0
+  %nop336 = alloca i1, i1 0
+  %nop337 = alloca i1, i1 0
+  %nop338 = alloca i1, i1 0
+  %nop339 = alloca i1, i1 0
+  %nop340 = alloca i1, i1 0
+  %nop341 = alloca i1, i1 0
+  %nop342 = alloca i1, i1 0
+  %nop343 = alloca i1, i1 0
+  %nop344 = alloca i1, i1 0
+  %nop345 = alloca i1, i1 0
+  %nop346 = alloca i1, i1 0
+  %nop347 = alloca i1, i1 0
+  %nop348 = alloca i1, i1 0
+  %nop349 = alloca i1, i1 0
+  %nop350 = alloca i1, i1 0
+  %nop351 = alloca i1, i1 0
+  %nop352 = alloca i1, i1 0
+  %nop353 = alloca i1, i1 0
+  %nop354 = alloca i1, i1 0
+  %nop355 = alloca i1, i1 0
+  %nop356 = alloca i1, i1 0
+  %nop357 = alloca i1, i1 0
+  %nop358 = alloca i1, i1 0
+  %nop359 = alloca i1, i1 0
+  %nop360 = alloca i1, i1 0
+  %nop361 = alloca i1, i1 0
+  %nop362 = alloca i1, i1 0
+  %nop363 = alloca i1, i1 0
+  %nop364 = alloca i1, i1 0
+  %nop365 = alloca i1, i1 0
+  %nop366 = alloca i1, i1 0
+  %nop367 = alloca i1, i1 0
+  %nop368 = alloca i1, i1 0
+  %nop369 = alloca i1, i1 0
+  %nop370 = alloca i1, i1 0
+  %nop371 = alloca i1, i1 0
+  %nop372 = alloca i1, i1 0
+  %nop373 = alloca i1, i1 0
+  %nop374 = alloca i1, i1 0
+  %nop375 = alloca i1, i1 0
+  %nop376 = alloca i1, i1 0
+  %nop377 = alloca i1, i1 0
+  %nop378 = alloca i1, i1 0
+  %nop379 = alloca i1, i1 0
+  %nop380 = alloca i1, i1 0
+  %nop381 = alloca i1, i1 0
+  %nop382 = alloca i1, i1 0
+  %nop383 = alloca i1, i1 0
+  %nop384 = alloca i1, i1 0
+  %nop385 = alloca i1, i1 0
+  %nop386 = alloca i1, i1 0
+  %nop387 = alloca i1, i1 0
+  %nop388 = alloca i1, i1 0
+  %nop389 = alloca i1, i1 0
+  %nop390 = alloca i1, i1 0
+  %nop391 = alloca i1, i1 0
+  %nop392 = alloca i1, i1 0
+  %nop393 = alloca i1, i1 0
+  %nop394 = alloca i1, i1 0
+  %nop395 = alloca i1, i1 0
+  %nop396 = alloca i1, i1 0
+  %nop397 = alloca i1, i1 0
+  %nop398 = alloca i1, i1 0
+  %nop399 = alloca i1, i1 0
+  %nop400 = alloca i1, i1 0
+  %nop401 = alloca i1, i1 0
+  %nop402 = alloca i1, i1 0
+  %nop403 = alloca i1, i1 0
+  %nop404 = alloca i1, i1 0
+  %nop405 = alloca i1, i1 0
+  %nop406 = alloca i1, i1 0
+  %nop407 = alloca i1, i1 0
+  %nop408 = alloca i1, i1 0
+  %nop409 = alloca i1, i1 0
+  %nop410 = alloca i1, i1 0
+  %nop411 = alloca i1, i1 0
+  %nop412 = alloca i1, i1 0
+  %nop413 = alloca i1, i1 0
+  %nop414 = alloca i1, i1 0
+  %nop415 = alloca i1, i1 0
+  %nop416 = alloca i1, i1 0
+  %nop417 = alloca i1, i1 0
+  %nop418 = alloca i1, i1 0
+  %nop419 = alloca i1, i1 0
+  %nop420 = alloca i1, i1 0
+  %nop421 = alloca i1, i1 0
+  %nop422 = alloca i1, i1 0
+  %nop423 = alloca i1, i1 0
+  %nop424 = alloca i1, i1 0
+  %nop425 = alloca i1, i1 0
+  %nop426 = alloca i1, i1 0
+  %nop427 = alloca i1, i1 0
+  %nop428 = alloca i1, i1 0
+  %nop429 = alloca i1, i1 0
+  %nop430 = alloca i1, i1 0
+  %nop431 = alloca i1, i1 0
+  %nop432 = alloca i1, i1 0
+  %nop433 = alloca i1, i1 0
+  %nop434 = alloca i1, i1 0
+  %nop435 = alloca i1, i1 0
+  %nop436 = alloca i1, i1 0
+  %nop437 = alloca i1, i1 0
+  %nop438 = alloca i1, i1 0
+  %nop439 = alloca i1, i1 0
+  %nop440 = alloca i1, i1 0
+  %nop441 = alloca i1, i1 0
+  %nop442 = alloca i1, i1 0
+  %nop443 = alloca i1, i1 0
+  %nop444 = alloca i1, i1 0
+  %nop445 = alloca i1, i1 0
+  %nop446 = alloca i1, i1 0
+  %nop447 = alloca i1, i1 0
+  %nop448 = alloca i1, i1 0
+  %nop449 = alloca i1, i1 0
+  %nop450 = alloca i1, i1 0
+  %nop451 = alloca i1, i1 0
+  %nop452 = alloca i1, i1 0
+  %nop453 = alloca i1, i1 0
+  %nop454 = alloca i1, i1 0
+  %nop455 = alloca i1, i1 0
+  %nop456 = alloca i1, i1 0
+  %nop457 = alloca i1, i1 0
+  %nop458 = alloca i1, i1 0
+  %nop459 = alloca i1, i1 0
+  %nop460 = alloca i1, i1 0
+  %nop461 = alloca i1, i1 0
+  %nop462 = alloca i1, i1 0
+  %nop463 = alloca i1, i1 0
+  %nop464 = alloca i1, i1 0
+  %nop465 = alloca i1, i1 0
+  %nop466 = alloca i1, i1 0
+  %nop467 = alloca i1, i1 0
+  %nop468 = alloca i1, i1 0
+  %nop469 = alloca i1, i1 0
+  %nop470 = alloca i1, i1 0
+  %nop471 = alloca i1, i1 0
+  %nop472 = alloca i1, i1 0
+  %nop473 = alloca i1, i1 0
+  %nop474 = alloca i1, i1 0
+  %nop475 = alloca i1, i1 0
+  %nop476 = alloca i1, i1 0
+  %nop477 = alloca i1, i1 0
+  %nop478 = alloca i1, i1 0
+  %nop479 = alloca i1, i1 0
+  %nop480 = alloca i1, i1 0
+  %nop481 = alloca i1, i1 0
+  %nop482 = alloca i1, i1 0
+  %nop483 = alloca i1, i1 0
+  %nop484 = alloca i1, i1 0
+  %nop485 = alloca i1, i1 0
+  %nop486 = alloca i1, i1 0
+  %nop487 = alloca i1, i1 0
+  %nop488 = alloca i1, i1 0
+  %nop489 = alloca i1, i1 0
+  %nop490 = alloca i1, i1 0
+  %nop491 = alloca i1, i1 0
+  %nop492 = alloca i1, i1 0
+  %nop493 = alloca i1, i1 0
+  %nop494 = alloca i1, i1 0
+  %nop495 = alloca i1, i1 0
+  %nop496 = alloca i1, i1 0
+  %nop497 = alloca i1, i1 0
+  %nop498 = alloca i1, i1 0
+  %nop499 = alloca i1, i1 0
+  %nop500 = alloca i1, i1 0
+  %nop501 = alloca i1, i1 0
+  %nop502 = alloca i1, i1 0
+  %nop503 = alloca i1, i1 0
+  %nop504 = alloca i1, i1 0
+  %nop505 = alloca i1, i1 0
+  %nop506 = alloca i1, i1 0
+  %nop507 = alloca i1, i1 0
+  %nop508 = alloca i1, i1 0
+  %nop509 = alloca i1, i1 0
+  %nop510 = alloca i1, i1 0
+  %nop511 = alloca i1, i1 0
+  %nop512 = alloca i1, i1 0
+  %nop513 = alloca i1, i1 0
+  %nop514 = alloca i1, i1 0
+  %nop515 = alloca i1, i1 0
+  %nop516 = alloca i1, i1 0
+  %nop517 = alloca i1, i1 0
+  %nop518 = alloca i1, i1 0
+  %nop519 = alloca i1, i1 0
+  %nop520 = alloca i1, i1 0
+  %nop521 = alloca i1, i1 0
+  %nop522 = alloca i1, i1 0
+  %nop523 = alloca i1, i1 0
+  %nop524 = alloca i1, i1 0
+  %nop525 = alloca i1, i1 0
+  %nop526 = alloca i1, i1 0
+  %nop527 = alloca i1, i1 0
+  %nop528 = alloca i1, i1 0
+  %nop529 = alloca i1, i1 0
+  %nop530 = alloca i1, i1 0
+  %nop531 = alloca i1, i1 0
+  %nop532 = alloca i1, i1 0
+  %nop533 = alloca i1, i1 0
+  %nop534 = alloca i1, i1 0
+  %nop535 = alloca i1, i1 0
+  %nop536 = alloca i1, i1 0
+  %nop537 = alloca i1, i1 0
+  %nop538 = alloca i1, i1 0
+  %nop539 = alloca i1, i1 0
+  %nop540 = alloca i1, i1 0
+  %nop541 = alloca i1, i1 0
+  %nop542 = alloca i1, i1 0
+  %nop543 = alloca i1, i1 0
+  %nop544 = alloca i1, i1 0
+  %nop545 = alloca i1, i1 0
+  %nop546 = alloca i1, i1 0
+  %nop547 = alloca i1, i1 0
+  %nop548 = alloca i1, i1 0
+  %nop549 = alloca i1, i1 0
+  %nop550 = alloca i1, i1 0
+  %nop551 = alloca i1, i1 0
+  %nop552 = alloca i1, i1 0
+  %nop553 = alloca i1, i1 0
+  %nop554 = alloca i1, i1 0
+  %nop555 = alloca i1, i1 0
+  %nop556 = alloca i1, i1 0
+  %nop557 = alloca i1, i1 0
+  %nop558 = alloca i1, i1 0
+  %nop559 = alloca i1, i1 0
+  %nop560 = alloca i1, i1 0
+  %nop561 = alloca i1, i1 0
+  %nop562 = alloca i1, i1 0
+  %nop563 = alloca i1, i1 0
+  %nop564 = alloca i1, i1 0
+  %nop565 = alloca i1, i1 0
+  %nop566 = alloca i1, i1 0
+  %nop567 = alloca i1, i1 0
+  %nop568 = alloca i1, i1 0
+  %nop569 = alloca i1, i1 0
+  %nop570 = alloca i1, i1 0
+  %nop571 = alloca i1, i1 0
+  %nop572 = alloca i1, i1 0
+  %nop573 = alloca i1, i1 0
+  %nop574 = alloca i1, i1 0
+  %nop575 = alloca i1, i1 0
+  %nop576 = alloca i1, i1 0
+  %nop577 = alloca i1, i1 0
+  %nop578 = alloca i1, i1 0
+  %nop579 = alloca i1, i1 0
+  %nop580 = alloca i1, i1 0
+  %nop581 = alloca i1, i1 0
+  %nop582 = alloca i1, i1 0
+  %nop583 = alloca i1, i1 0
+  %nop584 = alloca i1, i1 0
+  %nop585 = alloca i1, i1 0
+  %nop586 = alloca i1, i1 0
+  %nop587 = alloca i1, i1 0
+  %nop588 = alloca i1, i1 0
+  %nop589 = alloca i1, i1 0
+  %nop590 = alloca i1, i1 0
+  %nop591 = alloca i1, i1 0
+  %nop592 = alloca i1, i1 0
+  %nop593 = alloca i1, i1 0
+  %nop594 = alloca i1, i1 0
+  %nop595 = alloca i1, i1 0
+  %nop596 = alloca i1, i1 0
+  %nop597 = alloca i1, i1 0
+  %nop598 = alloca i1, i1 0
+  %nop599 = alloca i1, i1 0
+  %nop600 = alloca i1, i1 0
+  %nop601 = alloca i1, i1 0
+  %nop602 = alloca i1, i1 0
+  %nop603 = alloca i1, i1 0
+  %nop604 = alloca i1, i1 0
+  %nop605 = alloca i1, i1 0
+  %nop606 = alloca i1, i1 0
+  %nop607 = alloca i1, i1 0
+  %nop608 = alloca i1, i1 0
+  %nop609 = alloca i1, i1 0
+  %nop610 = alloca i1, i1 0
+  %nop611 = alloca i1, i1 0
+  %nop612 = alloca i1, i1 0
+  %nop613 = alloca i1, i1 0
+  %nop614 = alloca i1, i1 0
+  %nop615 = alloca i1, i1 0
+  %nop616 = alloca i1, i1 0
+  %nop617 = alloca i1, i1 0
+  %nop618 = alloca i1, i1 0
+  %nop619 = alloca i1, i1 0
+  %nop620 = alloca i1, i1 0
+  %nop621 = alloca i1, i1 0
+  %nop622 = alloca i1, i1 0
+  %nop623 = alloca i1, i1 0
+  %nop624 = alloca i1, i1 0
+  %nop625 = alloca i1, i1 0
+  %nop626 = alloca i1, i1 0
+  %nop627 = alloca i1, i1 0
+  %nop628 = alloca i1, i1 0
+  %nop629 = alloca i1, i1 0
+  %nop630 = alloca i1, i1 0
+  %nop631 = alloca i1, i1 0
+  %nop632 = alloca i1, i1 0
+  %nop633 = alloca i1, i1 0
+  %nop634 = alloca i1, i1 0
+  %nop635 = alloca i1, i1 0
+  %nop636 = alloca i1, i1 0
+  %nop637 = alloca i1, i1 0
+  %nop638 = alloca i1, i1 0
+  %nop639 = alloca i1, i1 0
+  %nop640 = alloca i1, i1 0
+  %nop641 = alloca i1, i1 0
+  %nop642 = alloca i1, i1 0
+  %nop643 = alloca i1, i1 0
+  %nop644 = alloca i1, i1 0
+  %nop645 = alloca i1, i1 0
+  %nop646 = alloca i1, i1 0
+  %nop647 = alloca i1, i1 0
+  %nop648 = alloca i1, i1 0
+  %nop649 = alloca i1, i1 0
+  %nop650 = alloca i1, i1 0
+  %nop651 = alloca i1, i1 0
+  %nop652 = alloca i1, i1 0
+  %nop653 = alloca i1, i1 0
+  %nop654 = alloca i1, i1 0
+  %nop655 = alloca i1, i1 0
+  %nop656 = alloca i1, i1 0
+  %nop657 = alloca i1, i1 0
+  %nop658 = alloca i1, i1 0
+  %nop659 = alloca i1, i1 0
+  %nop660 = alloca i1, i1 0
+  %nop661 = alloca i1, i1 0
+  %nop662 = alloca i1, i1 0
+  %nop663 = alloca i1, i1 0
+  %nop664 = alloca i1, i1 0
+  %nop665 = alloca i1, i1 0
+  %nop666 = alloca i1, i1 0
+  %nop667 = alloca i1, i1 0
+  %nop668 = alloca i1, i1 0
+  %nop669 = alloca i1, i1 0
+  %nop670 = alloca i1, i1 0
+  %nop671 = alloca i1, i1 0
+  %nop672 = alloca i1, i1 0
+  %nop673 = alloca i1, i1 0
+  %nop674 = alloca i1, i1 0
+  %nop675 = alloca i1, i1 0
+  %nop676 = alloca i1, i1 0
+  %nop677 = alloca i1, i1 0
+  %nop678 = alloca i1, i1 0
+  %nop679 = alloca i1, i1 0
+  %nop680 = alloca i1, i1 0
+  %nop681 = alloca i1, i1 0
+  %nop682 = alloca i1, i1 0
+  %nop683 = alloca i1, i1 0
+  %nop684 = alloca i1, i1 0
+  %nop685 = alloca i1, i1 0
+  %nop686 = alloca i1, i1 0
+  %nop687 = alloca i1, i1 0
+  %nop688 = alloca i1, i1 0
+  %nop689 = alloca i1, i1 0
+  %nop690 = alloca i1, i1 0
+  %nop691 = alloca i1, i1 0
+  %nop692 = alloca i1, i1 0
+  %nop693 = alloca i1, i1 0
+  %nop694 = alloca i1, i1 0
+  %nop695 = alloca i1, i1 0
+  %nop696 = alloca i1, i1 0
+  %nop697 = alloca i1, i1 0
+  %nop698 = alloca i1, i1 0
+  %nop699 = alloca i1, i1 0
+  %nop700 = alloca i1, i1 0
+  %nop701 = alloca i1, i1 0
+  %nop702 = alloca i1, i1 0
+  %nop703 = alloca i1, i1 0
+  %nop704 = alloca i1, i1 0
+  %nop705 = alloca i1, i1 0
+  %nop706 = alloca i1, i1 0
+  %nop707 = alloca i1, i1 0
+  %nop708 = alloca i1, i1 0
+  %nop709 = alloca i1, i1 0
+  %nop710 = alloca i1, i1 0
+  %nop711 = alloca i1, i1 0
+  %nop712 = alloca i1, i1 0
+  %nop713 = alloca i1, i1 0
+  %nop714 = alloca i1, i1 0
+  %nop715 = alloca i1, i1 0
+  %nop716 = alloca i1, i1 0
+  %nop717 = alloca i1, i1 0
+  %nop718 = alloca i1, i1 0
+  %nop719 = alloca i1, i1 0
+  %nop720 = alloca i1, i1 0
+  %nop721 = alloca i1, i1 0
+  %nop722 = alloca i1, i1 0
+  %nop723 = alloca i1, i1 0
+  %nop724 = alloca i1, i1 0
+  %nop725 = alloca i1, i1 0
+  %nop726 = alloca i1, i1 0
+  %nop727 = alloca i1, i1 0
+  %nop728 = alloca i1, i1 0
+  %nop729 = alloca i1, i1 0
+  %nop730 = alloca i1, i1 0
+  %nop731 = alloca i1, i1 0
+  %nop732 = alloca i1, i1 0
+  %nop733 = alloca i1, i1 0
+  %nop734 = alloca i1, i1 0
+  %nop735 = alloca i1, i1 0
+  %nop736 = alloca i1, i1 0
+  %nop737 = alloca i1, i1 0
+  %nop738 = alloca i1, i1 0
+  %nop739 = alloca i1, i1 0
+  %nop740 = alloca i1, i1 0
+  %nop741 = alloca i1, i1 0
+  %nop742 = alloca i1, i1 0
+  %nop743 = alloca i1, i1 0
+  %nop744 = alloca i1, i1 0
+  %nop745 = alloca i1, i1 0
+  %nop746 = alloca i1, i1 0
+  %nop747 = alloca i1, i1 0
+  %nop748 = alloca i1, i1 0
+  %nop749 = alloca i1, i1 0
+  %nop750 = alloca i1, i1 0
+  %nop751 = alloca i1, i1 0
+  %nop752 = alloca i1, i1 0
+  %nop753 = alloca i1, i1 0
+  %nop754 = alloca i1, i1 0
+  %nop755 = alloca i1, i1 0
+  %nop756 = alloca i1, i1 0
+  %nop757 = alloca i1, i1 0
+  %nop758 = alloca i1, i1 0
+  %nop759 = alloca i1, i1 0
+  %nop760 = alloca i1, i1 0
+  %nop761 = alloca i1, i1 0
+  %nop762 = alloca i1, i1 0
+  %nop763 = alloca i1, i1 0
+  %nop764 = alloca i1, i1 0
+  %nop765 = alloca i1, i1 0
+  %nop766 = alloca i1, i1 0
+  %nop767 = alloca i1, i1 0
+  %nop768 = alloca i1, i1 0
+  %nop769 = alloca i1, i1 0
+  %nop770 = alloca i1, i1 0
+  %nop771 = alloca i1, i1 0
+  %nop772 = alloca i1, i1 0
+  %nop773 = alloca i1, i1 0
+  %nop774 = alloca i1, i1 0
+  %nop775 = alloca i1, i1 0
+  %nop776 = alloca i1, i1 0
+  %nop777 = alloca i1, i1 0
+  %nop778 = alloca i1, i1 0
+  %nop779 = alloca i1, i1 0
+  %nop780 = alloca i1, i1 0
+  %nop781 = alloca i1, i1 0
+  %nop782 = alloca i1, i1 0
+  %nop783 = alloca i1, i1 0
+  %nop784 = alloca i1, i1 0
+  %nop785 = alloca i1, i1 0
+  %nop786 = alloca i1, i1 0
+  %nop787 = alloca i1, i1 0
+  %nop788 = alloca i1, i1 0
+  %nop789 = alloca i1, i1 0
+  %nop790 = alloca i1, i1 0
+  %nop791 = alloca i1, i1 0
+  %nop792 = alloca i1, i1 0
+  %nop793 = alloca i1, i1 0
+  %nop794 = alloca i1, i1 0
+  %nop795 = alloca i1, i1 0
+  %nop796 = alloca i1, i1 0
+  %nop797 = alloca i1, i1 0
+  %nop798 = alloca i1, i1 0
+  %nop799 = alloca i1, i1 0
+  %nop800 = alloca i1, i1 0
+  %nop801 = alloca i1, i1 0
+  %nop802 = alloca i1, i1 0
+  %nop803 = alloca i1, i1 0
+  %nop804 = alloca i1, i1 0
+  %nop805 = alloca i1, i1 0
+  %nop806 = alloca i1, i1 0
+  %nop807 = alloca i1, i1 0
+  %nop808 = alloca i1, i1 0
+  %nop809 = alloca i1, i1 0
+  %nop810 = alloca i1, i1 0
+  %nop811 = alloca i1, i1 0
+  %nop812 = alloca i1, i1 0
+  %nop813 = alloca i1, i1 0
+  %nop814 = alloca i1, i1 0
+  %nop815 = alloca i1, i1 0
+  %nop816 = alloca i1, i1 0
+  %nop817 = alloca i1, i1 0
+  %nop818 = alloca i1, i1 0
+  %nop819 = alloca i1, i1 0
+  %nop820 = alloca i1, i1 0
+  %nop821 = alloca i1, i1 0
+  %nop822 = alloca i1, i1 0
+  %nop823 = alloca i1, i1 0
+  %nop824 = alloca i1, i1 0
+  %nop825 = alloca i1, i1 0
+  %nop826 = alloca i1, i1 0
+  %nop827 = alloca i1, i1 0
+  %nop828 = alloca i1, i1 0
+  %nop829 = alloca i1, i1 0
+  %nop830 = alloca i1, i1 0
+  %nop831 = alloca i1, i1 0
+  %nop832 = alloca i1, i1 0
+  %nop833 = alloca i1, i1 0
+  %nop834 = alloca i1, i1 0
+  %nop835 = alloca i1, i1 0
+  %nop836 = alloca i1, i1 0
+  %nop837 = alloca i1, i1 0
+  %nop838 = alloca i1, i1 0
+  %nop839 = alloca i1, i1 0
+  %nop840 = alloca i1, i1 0
+  %nop841 = alloca i1, i1 0
+  %nop842 = alloca i1, i1 0
+  %nop843 = alloca i1, i1 0
+  %nop844 = alloca i1, i1 0
+  %nop845 = alloca i1, i1 0
+  %nop846 = alloca i1, i1 0
+  %nop847 = alloca i1, i1 0
+  %nop848 = alloca i1, i1 0
+  %nop849 = alloca i1, i1 0
+  %nop850 = alloca i1, i1 0
+  %nop851 = alloca i1, i1 0
+  %nop852 = alloca i1, i1 0
+  %nop853 = alloca i1, i1 0
+  %nop854 = alloca i1, i1 0
+  %nop855 = alloca i1, i1 0
+  %nop856 = alloca i1, i1 0
+  %nop857 = alloca i1, i1 0
+  %nop858 = alloca i1, i1 0
+  %nop859 = alloca i1, i1 0
+  %nop860 = alloca i1, i1 0
+  %nop861 = alloca i1, i1 0
+  %nop862 = alloca i1, i1 0
+  %nop863 = alloca i1, i1 0
+  %nop864 = alloca i1, i1 0
+  %nop865 = alloca i1, i1 0
+  %nop866 = alloca i1, i1 0
+  %nop867 = alloca i1, i1 0
+  %nop868 = alloca i1, i1 0
+  %nop869 = alloca i1, i1 0
+  %nop870 = alloca i1, i1 0
+  %nop871 = alloca i1, i1 0
+  %nop872 = alloca i1, i1 0
+  %nop873 = alloca i1, i1 0
+  %nop874 = alloca i1, i1 0
+  %nop875 = alloca i1, i1 0
+  %nop876 = alloca i1, i1 0
+  %nop877 = alloca i1, i1 0
+  %nop878 = alloca i1, i1 0
+  %nop879 = alloca i1, i1 0
+  %nop880 = alloca i1, i1 0
+  %nop881 = alloca i1, i1 0
+  %nop882 = alloca i1, i1 0
+  %nop883 = alloca i1, i1 0
+  %nop884 = alloca i1, i1 0
+  %nop885 = alloca i1, i1 0
+  %nop886 = alloca i1, i1 0
+  %nop887 = alloca i1, i1 0
+  %nop888 = alloca i1, i1 0
+  %nop889 = alloca i1, i1 0
+  %nop890 = alloca i1, i1 0
+  %nop891 = alloca i1, i1 0
+  %nop892 = alloca i1, i1 0
+  %nop893 = alloca i1, i1 0
+  %nop894 = alloca i1, i1 0
+  %nop895 = alloca i1, i1 0
+  %nop896 = alloca i1, i1 0
+  %nop897 = alloca i1, i1 0
+  %nop898 = alloca i1, i1 0
+  %nop899 = alloca i1, i1 0
+  %nop900 = alloca i1, i1 0
+  %nop901 = alloca i1, i1 0
+  %nop902 = alloca i1, i1 0
+  %nop903 = alloca i1, i1 0
+  %nop904 = alloca i1, i1 0
+  %nop905 = alloca i1, i1 0
+  %nop906 = alloca i1, i1 0
+  %nop907 = alloca i1, i1 0
+  %nop908 = alloca i1, i1 0
+  %nop909 = alloca i1, i1 0
+  %nop910 = alloca i1, i1 0
+  %nop911 = alloca i1, i1 0
+  %nop912 = alloca i1, i1 0
+  %nop913 = alloca i1, i1 0
+  %nop914 = alloca i1, i1 0
+  %nop915 = alloca i1, i1 0
+  %nop916 = alloca i1, i1 0
+  %nop917 = alloca i1, i1 0
+  %nop918 = alloca i1, i1 0
+  %nop919 = alloca i1, i1 0
+  %nop920 = alloca i1, i1 0
+  %nop921 = alloca i1, i1 0
+  %nop922 = alloca i1, i1 0
+  %nop923 = alloca i1, i1 0
+  %nop924 = alloca i1, i1 0
+  %nop925 = alloca i1, i1 0
+  %nop926 = alloca i1, i1 0
+  %nop927 = alloca i1, i1 0
+  %nop928 = alloca i1, i1 0
+  %nop929 = alloca i1, i1 0
+  %nop930 = alloca i1, i1 0
+  %nop931 = alloca i1, i1 0
+  %nop932 = alloca i1, i1 0
+  %nop933 = alloca i1, i1 0
+  %nop934 = alloca i1, i1 0
+  %nop935 = alloca i1, i1 0
+  %nop936 = alloca i1, i1 0
+  %nop937 = alloca i1, i1 0
+  %nop938 = alloca i1, i1 0
+  %nop939 = alloca i1, i1 0
+  %nop940 = alloca i1, i1 0
+  %nop941 = alloca i1, i1 0
+  %nop942 = alloca i1, i1 0
+  %nop943 = alloca i1, i1 0
+  %nop944 = alloca i1, i1 0
+  %nop945 = alloca i1, i1 0
+  %nop946 = alloca i1, i1 0
+  %nop947 = alloca i1, i1 0
+  %nop948 = alloca i1, i1 0
+  %nop949 = alloca i1, i1 0
+  %nop950 = alloca i1, i1 0
+  %nop951 = alloca i1, i1 0
+  %nop952 = alloca i1, i1 0
+  %nop953 = alloca i1, i1 0
+  %nop954 = alloca i1, i1 0
+  %nop955 = alloca i1, i1 0
+  %nop956 = alloca i1, i1 0
+  %nop957 = alloca i1, i1 0
+  %nop958 = alloca i1, i1 0
+  %nop959 = alloca i1, i1 0
+  %nop960 = alloca i1, i1 0
+  %nop961 = alloca i1, i1 0
+  %nop962 = alloca i1, i1 0
+  %nop963 = alloca i1, i1 0
+  %nop964 = alloca i1, i1 0
+  %nop965 = alloca i1, i1 0
+  %nop966 = alloca i1, i1 0
+  %nop967 = alloca i1, i1 0
+  %nop968 = alloca i1, i1 0
+  %nop969 = alloca i1, i1 0
+  %nop970 = alloca i1, i1 0
+  %nop971 = alloca i1, i1 0
+  %nop972 = alloca i1, i1 0
+  %nop973 = alloca i1, i1 0
+  %nop974 = alloca i1, i1 0
+  %nop975 = alloca i1, i1 0
+  %nop976 = alloca i1, i1 0
+  %nop977 = alloca i1, i1 0
+  %nop978 = alloca i1, i1 0
+  %nop979 = alloca i1, i1 0
+  %nop980 = alloca i1, i1 0
+  %nop981 = alloca i1, i1 0
+  %nop982 = alloca i1, i1 0
+  %nop983 = alloca i1, i1 0
+  %nop984 = alloca i1, i1 0
+  %nop985 = alloca i1, i1 0
+  %nop986 = alloca i1, i1 0
+  %nop987 = alloca i1, i1 0
+  %nop988 = alloca i1, i1 0
+  %nop989 = alloca i1, i1 0
+  %nop990 = alloca i1, i1 0
+  %nop991 = alloca i1, i1 0
+  %nop992 = alloca i1, i1 0
+  %nop993 = alloca i1, i1 0
+  %nop994 = alloca i1, i1 0
+  %nop995 = alloca i1, i1 0
+  %nop996 = alloca i1, i1 0
+  %nop997 = alloca i1, i1 0
+  %nop998 = alloca i1, i1 0
+  %nop999 = alloca i1, i1 0
+  %nop1000 = alloca i1, i1 0
+  %nop1001 = alloca i1, i1 0
+  %nop1002 = alloca i1, i1 0
+  %nop1003 = alloca i1, i1 0
+  %nop1004 = alloca i1, i1 0
+  %nop1005 = alloca i1, i1 0
+  %nop1006 = alloca i1, i1 0
+  %nop1007 = alloca i1, i1 0
+  %nop1008 = alloca i1, i1 0
+  %nop1009 = alloca i1, i1 0
+  %nop1010 = alloca i1, i1 0
+  %nop1011 = alloca i1, i1 0
+  %nop1012 = alloca i1, i1 0
+  %nop1013 = alloca i1, i1 0
+  %nop1014 = alloca i1, i1 0
+  %nop1015 = alloca i1, i1 0
+  %nop1016 = alloca i1, i1 0
+  %nop1017 = alloca i1, i1 0
+  %nop1018 = alloca i1, i1 0
+  %nop1019 = alloca i1, i1 0
+  %nop1020 = alloca i1, i1 0
+  %nop1021 = alloca i1, i1 0
+  %nop1022 = alloca i1, i1 0
+  %nop1023 = alloca i1, i1 0
+  %nop1024 = alloca i1, i1 0
+  %nop1025 = alloca i1, i1 0
+  %nop1026 = alloca i1, i1 0
+  %nop1027 = alloca i1, i1 0
+  %nop1028 = alloca i1, i1 0
+  %nop1029 = alloca i1, i1 0
+  %nop1030 = alloca i1, i1 0
+  %nop1031 = alloca i1, i1 0
+  %nop1032 = alloca i1, i1 0
+  %nop1033 = alloca i1, i1 0
+  %nop1034 = alloca i1, i1 0
+  %nop1035 = alloca i1, i1 0
+  %nop1036 = alloca i1, i1 0
+  %nop1037 = alloca i1, i1 0
+  %nop1038 = alloca i1, i1 0
+  %nop1039 = alloca i1, i1 0
+  %nop1040 = alloca i1, i1 0
+  %nop1041 = alloca i1, i1 0
+  %nop1042 = alloca i1, i1 0
+  %nop1043 = alloca i1, i1 0
+  %nop1044 = alloca i1, i1 0
+  %nop1045 = alloca i1, i1 0
+  %nop1046 = alloca i1, i1 0
+  %nop1047 = alloca i1, i1 0
+  %nop1048 = alloca i1, i1 0
+  %nop1049 = alloca i1, i1 0
+  %nop1050 = alloca i1, i1 0
+  %nop1051 = alloca i1, i1 0
+  %nop1052 = alloca i1, i1 0
+  %nop1053 = alloca i1, i1 0
+  %nop1054 = alloca i1, i1 0
+  %nop1055 = alloca i1, i1 0
+  %nop1056 = alloca i1, i1 0
+  %nop1057 = alloca i1, i1 0
+  %nop1058 = alloca i1, i1 0
+  %nop1059 = alloca i1, i1 0
+  %nop1060 = alloca i1, i1 0
+  %nop1061 = alloca i1, i1 0
+  %nop1062 = alloca i1, i1 0
+  %nop1063 = alloca i1, i1 0
+  %nop1064 = alloca i1, i1 0
+  %nop1065 = alloca i1, i1 0
+  %nop1066 = alloca i1, i1 0
+  %nop1067 = alloca i1, i1 0
+  %nop1068 = alloca i1, i1 0
+  %nop1069 = alloca i1, i1 0
+  %nop1070 = alloca i1, i1 0
+  %nop1071 = alloca i1, i1 0
+  %nop1072 = alloca i1, i1 0
+  %nop1073 = alloca i1, i1 0
+  %nop1074 = alloca i1, i1 0
+  %nop1075 = alloca i1, i1 0
+  %nop1076 = alloca i1, i1 0
+  %nop1077 = alloca i1, i1 0
+  %nop1078 = alloca i1, i1 0
+  %nop1079 = alloca i1, i1 0
+  %nop1080 = alloca i1, i1 0
+  %nop1081 = alloca i1, i1 0
+  %nop1082 = alloca i1, i1 0
+  %nop1083 = alloca i1, i1 0
+  %nop1084 = alloca i1, i1 0
+  %nop1085 = alloca i1, i1 0
+  %nop1086 = alloca i1, i1 0
+  %nop1087 = alloca i1, i1 0
+  %nop1088 = alloca i1, i1 0
+  %nop1089 = alloca i1, i1 0
+  %nop1090 = alloca i1, i1 0
+  %nop1091 = alloca i1, i1 0
+  %nop1092 = alloca i1, i1 0
+  %nop1093 = alloca i1, i1 0
+  %nop1094 = alloca i1, i1 0
+  %nop1095 = alloca i1, i1 0
+  %nop1096 = alloca i1, i1 0
+  %nop1097 = alloca i1, i1 0
+  %nop1098 = alloca i1, i1 0
+  %nop1099 = alloca i1, i1 0
+  %nop1100 = alloca i1, i1 0
+  %nop1101 = alloca i1, i1 0
+  %nop1102 = alloca i1, i1 0
+  %nop1103 = alloca i1, i1 0
+  %nop1104 = alloca i1, i1 0
+  %nop1105 = alloca i1, i1 0
+  %nop1106 = alloca i1, i1 0
+  %nop1107 = alloca i1, i1 0
+  %nop1108 = alloca i1, i1 0
+  %nop1109 = alloca i1, i1 0
+  %nop1110 = alloca i1, i1 0
+  %nop1111 = alloca i1, i1 0
+  %nop1112 = alloca i1, i1 0
+  %nop1113 = alloca i1, i1 0
+  %nop1114 = alloca i1, i1 0
+  %nop1115 = alloca i1, i1 0
+  %nop1116 = alloca i1, i1 0
+  %nop1117 = alloca i1, i1 0
+  %nop1118 = alloca i1, i1 0
+  %nop1119 = alloca i1, i1 0
+  %nop1120 = alloca i1, i1 0
+  %nop1121 = alloca i1, i1 0
+  %nop1122 = alloca i1, i1 0
+  %nop1123 = alloca i1, i1 0
+  %nop1124 = alloca i1, i1 0
+  %nop1125 = alloca i1, i1 0
+  %nop1126 = alloca i1, i1 0
+  %nop1127 = alloca i1, i1 0
+  %nop1128 = alloca i1, i1 0
+  %nop1129 = alloca i1, i1 0
+  %nop1130 = alloca i1, i1 0
+  %nop1131 = alloca i1, i1 0
+  %nop1132 = alloca i1, i1 0
+  %nop1133 = alloca i1, i1 0
+  %nop1134 = alloca i1, i1 0
+  %nop1135 = alloca i1, i1 0
+  %nop1136 = alloca i1, i1 0
+  %nop1137 = alloca i1, i1 0
+  %nop1138 = alloca i1, i1 0
+  %nop1139 = alloca i1, i1 0
+  %nop1140 = alloca i1, i1 0
+  %nop1141 = alloca i1, i1 0
+  %nop1142 = alloca i1, i1 0
+  %nop1143 = alloca i1, i1 0
+  %nop1144 = alloca i1, i1 0
+  %nop1145 = alloca i1, i1 0
+  %nop1146 = alloca i1, i1 0
+  %nop1147 = alloca i1, i1 0
+  %nop1148 = alloca i1, i1 0
+  %nop1149 = alloca i1, i1 0
+  %nop1150 = alloca i1, i1 0
+  %nop1151 = alloca i1, i1 0
+  %nop1152 = alloca i1, i1 0
+  %nop1153 = alloca i1, i1 0
+  %nop1154 = alloca i1, i1 0
+  %nop1155 = alloca i1, i1 0
+  %nop1156 = alloca i1, i1 0
+  %nop1157 = alloca i1, i1 0
+  %nop1158 = alloca i1, i1 0
+  %nop1159 = alloca i1, i1 0
+  %nop1160 = alloca i1, i1 0
+  %nop1161 = alloca i1, i1 0
+  %nop1162 = alloca i1, i1 0
+  %nop1163 = alloca i1, i1 0
+  %nop1164 = alloca i1, i1 0
+  %nop1165 = alloca i1, i1 0
+  %nop1166 = alloca i1, i1 0
+  %nop1167 = alloca i1, i1 0
+  %nop1168 = alloca i1, i1 0
+  %nop1169 = alloca i1, i1 0
+  %nop1170 = alloca i1, i1 0
+  %nop1171 = alloca i1, i1 0
+  %nop1172 = alloca i1, i1 0
+  %nop1173 = alloca i1, i1 0
+  %nop1174 = alloca i1, i1 0
+  %nop1175 = alloca i1, i1 0
+  %nop1176 = alloca i1, i1 0
+  %nop1177 = alloca i1, i1 0
+  %nop1178 = alloca i1, i1 0
+  %nop1179 = alloca i1, i1 0
+  %nop1180 = alloca i1, i1 0
+  %nop1181 = alloca i1, i1 0
+  %nop1182 = alloca i1, i1 0
+  %nop1183 = alloca i1, i1 0
+  %nop1184 = alloca i1, i1 0
+  %nop1185 = alloca i1, i1 0
+  %nop1186 = alloca i1, i1 0
+  %nop1187 = alloca i1, i1 0
+  %nop1188 = alloca i1, i1 0
+  %nop1189 = alloca i1, i1 0
+  %nop1190 = alloca i1, i1 0
+  %nop1191 = alloca i1, i1 0
+  %nop1192 = alloca i1, i1 0
+  %nop1193 = alloca i1, i1 0
+  %nop1194 = alloca i1, i1 0
+  %nop1195 = alloca i1, i1 0
+  %nop1196 = alloca i1, i1 0
+  %nop1197 = alloca i1, i1 0
+  %nop1198 = alloca i1, i1 0
+  %nop1199 = alloca i1, i1 0
+  %nop1200 = alloca i1, i1 0
+  %nop1201 = alloca i1, i1 0
+  %nop1202 = alloca i1, i1 0
+  %nop1203 = alloca i1, i1 0
+  %nop1204 = alloca i1, i1 0
+  %nop1205 = alloca i1, i1 0
+  %nop1206 = alloca i1, i1 0
+  %nop1207 = alloca i1, i1 0
+  %nop1208 = alloca i1, i1 0
+  %nop1209 = alloca i1, i1 0
+  %nop1210 = alloca i1, i1 0
+  %nop1211 = alloca i1, i1 0
+  %nop1212 = alloca i1, i1 0
+  %nop1213 = alloca i1, i1 0
+  %nop1214 = alloca i1, i1 0
+  %nop1215 = alloca i1, i1 0
+  %nop1216 = alloca i1, i1 0
+  %nop1217 = alloca i1, i1 0
+  %nop1218 = alloca i1, i1 0
+  %nop1219 = alloca i1, i1 0
+  %nop1220 = alloca i1, i1 0
+  %nop1221 = alloca i1, i1 0
+  %nop1222 = alloca i1, i1 0
+  %nop1223 = alloca i1, i1 0
+  %nop1224 = alloca i1, i1 0
+  %nop1225 = alloca i1, i1 0
+  %nop1226 = alloca i1, i1 0
+  %nop1227 = alloca i1, i1 0
+  %nop1228 = alloca i1, i1 0
+  %nop1229 = alloca i1, i1 0
+  %nop1230 = alloca i1, i1 0
+  %nop1231 = alloca i1, i1 0
+  %nop1232 = alloca i1, i1 0
+  %nop1233 = alloca i1, i1 0
+  %nop1234 = alloca i1, i1 0
+  %nop1235 = alloca i1, i1 0
+  %nop1236 = alloca i1, i1 0
+  %nop1237 = alloca i1, i1 0
+  %nop1238 = alloca i1, i1 0
+  %nop1239 = alloca i1, i1 0
+  %nop1240 = alloca i1, i1 0
+  %nop1241 = alloca i1, i1 0
+  %nop1242 = alloca i1, i1 0
+  %nop1243 = alloca i1, i1 0
+  %nop1244 = alloca i1, i1 0
+  %nop1245 = alloca i1, i1 0
+  %nop1246 = alloca i1, i1 0
+  %nop1247 = alloca i1, i1 0
+  %nop1248 = alloca i1, i1 0
+  %nop1249 = alloca i1, i1 0
+  %nop1250 = alloca i1, i1 0
+  %nop1251 = alloca i1, i1 0
+  %nop1252 = alloca i1, i1 0
+  %nop1253 = alloca i1, i1 0
+  %nop1254 = alloca i1, i1 0
+  %nop1255 = alloca i1, i1 0
+  %nop1256 = alloca i1, i1 0
+  %nop1257 = alloca i1, i1 0
+  %nop1258 = alloca i1, i1 0
+  %nop1259 = alloca i1, i1 0
+  %nop1260 = alloca i1, i1 0
+  %nop1261 = alloca i1, i1 0
+  %nop1262 = alloca i1, i1 0
+  %nop1263 = alloca i1, i1 0
+  %nop1264 = alloca i1, i1 0
+  %nop1265 = alloca i1, i1 0
+  %nop1266 = alloca i1, i1 0
+  %nop1267 = alloca i1, i1 0
+  %nop1268 = alloca i1, i1 0
+  %nop1269 = alloca i1, i1 0
+  %nop1270 = alloca i1, i1 0
+  %nop1271 = alloca i1, i1 0
+  %nop1272 = alloca i1, i1 0
+  %nop1273 = alloca i1, i1 0
+  %nop1274 = alloca i1, i1 0
+  %nop1275 = alloca i1, i1 0
+  %nop1276 = alloca i1, i1 0
+  %nop1277 = alloca i1, i1 0
+  %nop1278 = alloca i1, i1 0
+  %nop1279 = alloca i1, i1 0
+  %nop1280 = alloca i1, i1 0
+  %nop1281 = alloca i1, i1 0
+  %nop1282 = alloca i1, i1 0
+  %nop1283 = alloca i1, i1 0
+  %nop1284 = alloca i1, i1 0
+  %nop1285 = alloca i1, i1 0
+  %nop1286 = alloca i1, i1 0
+  %nop1287 = alloca i1, i1 0
+  %nop1288 = alloca i1, i1 0
+  %nop1289 = alloca i1, i1 0
+  %nop1290 = alloca i1, i1 0
+  %nop1291 = alloca i1, i1 0
+  %nop1292 = alloca i1, i1 0
+  %nop1293 = alloca i1, i1 0
+  %nop1294 = alloca i1, i1 0
+  %nop1295 = alloca i1, i1 0
+  %nop1296 = alloca i1, i1 0
+  %nop1297 = alloca i1, i1 0
+  %nop1298 = alloca i1, i1 0
+  %nop1299 = alloca i1, i1 0
+  %nop1300 = alloca i1, i1 0
+  %nop1301 = alloca i1, i1 0
+  %nop1302 = alloca i1, i1 0
+  %nop1303 = alloca i1, i1 0
+  %nop1304 = alloca i1, i1 0
+  %nop1305 = alloca i1, i1 0
+  %nop1306 = alloca i1, i1 0
+  %nop1307 = alloca i1, i1 0
+  %nop1308 = alloca i1, i1 0
+  %nop1309 = alloca i1, i1 0
+  %nop1310 = alloca i1, i1 0
+  %nop1311 = alloca i1, i1 0
+  %nop1312 = alloca i1, i1 0
+  %nop1313 = alloca i1, i1 0
+  %nop1314 = alloca i1, i1 0
+  %nop1315 = alloca i1, i1 0
+  %nop1316 = alloca i1, i1 0
+  %nop1317 = alloca i1, i1 0
+  %nop1318 = alloca i1, i1 0
+  %nop1319 = alloca i1, i1 0
+  %nop1320 = alloca i1, i1 0
+  %nop1321 = alloca i1, i1 0
+  %nop1322 = alloca i1, i1 0
+  %nop1323 = alloca i1, i1 0
+  %nop1324 = alloca i1, i1 0
+  %nop1325 = alloca i1, i1 0
+  %nop1326 = alloca i1, i1 0
+  %nop1327 = alloca i1, i1 0
+  %nop1328 = alloca i1, i1 0
+  %nop1329 = alloca i1, i1 0
+  %nop1330 = alloca i1, i1 0
+  %nop1331 = alloca i1, i1 0
+  %nop1332 = alloca i1, i1 0
+  %nop1333 = alloca i1, i1 0
+  %nop1334 = alloca i1, i1 0
+  %nop1335 = alloca i1, i1 0
+  %nop1336 = alloca i1, i1 0
+  %nop1337 = alloca i1, i1 0
+  %nop1338 = alloca i1, i1 0
+  %nop1339 = alloca i1, i1 0
+  %nop1340 = alloca i1, i1 0
+  %nop1341 = alloca i1, i1 0
+  %nop1342 = alloca i1, i1 0
+  %nop1343 = alloca i1, i1 0
+  %nop1344 = alloca i1, i1 0
+  %nop1345 = alloca i1, i1 0
+  %nop1346 = alloca i1, i1 0
+  %nop1347 = alloca i1, i1 0
+  %nop1348 = alloca i1, i1 0
+  %nop1349 = alloca i1, i1 0
+  %nop1350 = alloca i1, i1 0
+  %nop1351 = alloca i1, i1 0
+  %nop1352 = alloca i1, i1 0
+  %nop1353 = alloca i1, i1 0
+  %nop1354 = alloca i1, i1 0
+  %nop1355 = alloca i1, i1 0
+  %nop1356 = alloca i1, i1 0
+  %nop1357 = alloca i1, i1 0
+  %nop1358 = alloca i1, i1 0
+  %nop1359 = alloca i1, i1 0
+  %nop1360 = alloca i1, i1 0
+  %nop1361 = alloca i1, i1 0
+  %nop1362 = alloca i1, i1 0
+  %nop1363 = alloca i1, i1 0
+  %nop1364 = alloca i1, i1 0
+  %nop1365 = alloca i1, i1 0
+  %nop1366 = alloca i1, i1 0
+  %nop1367 = alloca i1, i1 0
+  %nop1368 = alloca i1, i1 0
+  %nop1369 = alloca i1, i1 0
+  %nop1370 = alloca i1, i1 0
+  %nop1371 = alloca i1, i1 0
+  %nop1372 = alloca i1, i1 0
+  %nop1373 = alloca i1, i1 0
+  %nop1374 = alloca i1, i1 0
+  %nop1375 = alloca i1, i1 0
+  %nop1376 = alloca i1, i1 0
+  %nop1377 = alloca i1, i1 0
+  %nop1378 = alloca i1, i1 0
+  %nop1379 = alloca i1, i1 0
+  %nop1380 = alloca i1, i1 0
+  %nop1381 = alloca i1, i1 0
+  %nop1382 = alloca i1, i1 0
+  %nop1383 = alloca i1, i1 0
+  %nop1384 = alloca i1, i1 0
+  %nop1385 = alloca i1, i1 0
+  %nop1386 = alloca i1, i1 0
+  %nop1387 = alloca i1, i1 0
+  %nop1388 = alloca i1, i1 0
+  %nop1389 = alloca i1, i1 0
+  %nop1390 = alloca i1, i1 0
+  %nop1391 = alloca i1, i1 0
+  %nop1392 = alloca i1, i1 0
+  %nop1393 = alloca i1, i1 0
+  %nop1394 = alloca i1, i1 0
+  %nop1395 = alloca i1, i1 0
+  %nop1396 = alloca i1, i1 0
+  %nop1397 = alloca i1, i1 0
+  %nop1398 = alloca i1, i1 0
+  %nop1399 = alloca i1, i1 0
+  %nop1400 = alloca i1, i1 0
+  %nop1401 = alloca i1, i1 0
+  %nop1402 = alloca i1, i1 0
+  %nop1403 = alloca i1, i1 0
+  %nop1404 = alloca i1, i1 0
+  %nop1405 = alloca i1, i1 0
+  %nop1406 = alloca i1, i1 0
+  %nop1407 = alloca i1, i1 0
+  %nop1408 = alloca i1, i1 0
+  %nop1409 = alloca i1, i1 0
+  %nop1410 = alloca i1, i1 0
+  %nop1411 = alloca i1, i1 0
+  %nop1412 = alloca i1, i1 0
+  %nop1413 = alloca i1, i1 0
+  %nop1414 = alloca i1, i1 0
+  %nop1415 = alloca i1, i1 0
+  %nop1416 = alloca i1, i1 0
+  %nop1417 = alloca i1, i1 0
+  %nop1418 = alloca i1, i1 0
+  %nop1419 = alloca i1, i1 0
+  %nop1420 = alloca i1, i1 0
+  %nop1421 = alloca i1, i1 0
+  %nop1422 = alloca i1, i1 0
+  %nop1423 = alloca i1, i1 0
+  %nop1424 = alloca i1, i1 0
+  %nop1425 = alloca i1, i1 0
+  %nop1426 = alloca i1, i1 0
+  %nop1427 = alloca i1, i1 0
+  %nop1428 = alloca i1, i1 0
+  %nop1429 = alloca i1, i1 0
+  %nop1430 = alloca i1, i1 0
+  %nop1431 = alloca i1, i1 0
+  %nop1432 = alloca i1, i1 0
+  %nop1433 = alloca i1, i1 0
+  %nop1434 = alloca i1, i1 0
+  %nop1435 = alloca i1, i1 0
+  %nop1436 = alloca i1, i1 0
+  %nop1437 = alloca i1, i1 0
+  %nop1438 = alloca i1, i1 0
+  %nop1439 = alloca i1, i1 0
+  %nop1440 = alloca i1, i1 0
+  %nop1441 = alloca i1, i1 0
+  %nop1442 = alloca i1, i1 0
+  %nop1443 = alloca i1, i1 0
+  %nop1444 = alloca i1, i1 0
+  %nop1445 = alloca i1, i1 0
+  %nop1446 = alloca i1, i1 0
+  %nop1447 = alloca i1, i1 0
+  %nop1448 = alloca i1, i1 0
+  %nop1449 = alloca i1, i1 0
+  %nop1450 = alloca i1, i1 0
+  %nop1451 = alloca i1, i1 0
+  %nop1452 = alloca i1, i1 0
+  %nop1453 = alloca i1, i1 0
+  %nop1454 = alloca i1, i1 0
+  %nop1455 = alloca i1, i1 0
+  %nop1456 = alloca i1, i1 0
+  %nop1457 = alloca i1, i1 0
+  %nop1458 = alloca i1, i1 0
+  %nop1459 = alloca i1, i1 0
+  %nop1460 = alloca i1, i1 0
+  %nop1461 = alloca i1, i1 0
+  %nop1462 = alloca i1, i1 0
+  %nop1463 = alloca i1, i1 0
+  %nop1464 = alloca i1, i1 0
+  %nop1465 = alloca i1, i1 0
+  %nop1466 = alloca i1, i1 0
+  %nop1467 = alloca i1, i1 0
+  %nop1468 = alloca i1, i1 0
+  %nop1469 = alloca i1, i1 0
+  %nop1470 = alloca i1, i1 0
+  %nop1471 = alloca i1, i1 0
+  %nop1472 = alloca i1, i1 0
+  %nop1473 = alloca i1, i1 0
+  %nop1474 = alloca i1, i1 0
+  %nop1475 = alloca i1, i1 0
+  %nop1476 = alloca i1, i1 0
+  %nop1477 = alloca i1, i1 0
+  %nop1478 = alloca i1, i1 0
+  %nop1479 = alloca i1, i1 0
+  %nop1480 = alloca i1, i1 0
+  %nop1481 = alloca i1, i1 0
+  %nop1482 = alloca i1, i1 0
+  %nop1483 = alloca i1, i1 0
+  %nop1484 = alloca i1, i1 0
+  %nop1485 = alloca i1, i1 0
+  %nop1486 = alloca i1, i1 0
+  %nop1487 = alloca i1, i1 0
+  %nop1488 = alloca i1, i1 0
+  %nop1489 = alloca i1, i1 0
+  %nop1490 = alloca i1, i1 0
+  %nop1491 = alloca i1, i1 0
+  %nop1492 = alloca i1, i1 0
+  %nop1493 = alloca i1, i1 0
+  %nop1494 = alloca i1, i1 0
+  %nop1495 = alloca i1, i1 0
+  %nop1496 = alloca i1, i1 0
+  %nop1497 = alloca i1, i1 0
+  %nop1498 = alloca i1, i1 0
+  %nop1499 = alloca i1, i1 0
+  %nop1500 = alloca i1, i1 0
+  %nop1501 = alloca i1, i1 0
+  %nop1502 = alloca i1, i1 0
+  %nop1503 = alloca i1, i1 0
+  %nop1504 = alloca i1, i1 0
+  %nop1505 = alloca i1, i1 0
+  %nop1506 = alloca i1, i1 0
+  %nop1507 = alloca i1, i1 0
+  %nop1508 = alloca i1, i1 0
+  %nop1509 = alloca i1, i1 0
+  %nop1510 = alloca i1, i1 0
+  %nop1511 = alloca i1, i1 0
+  %nop1512 = alloca i1, i1 0
+  %nop1513 = alloca i1, i1 0
+  %nop1514 = alloca i1, i1 0
+  %nop1515 = alloca i1, i1 0
+  %nop1516 = alloca i1, i1 0
+  %nop1517 = alloca i1, i1 0
+  %nop1518 = alloca i1, i1 0
+  %nop1519 = alloca i1, i1 0
+  %nop1520 = alloca i1, i1 0
+  %nop1521 = alloca i1, i1 0
+  %nop1522 = alloca i1, i1 0
+  %nop1523 = alloca i1, i1 0
+  %nop1524 = alloca i1, i1 0
+  %nop1525 = alloca i1, i1 0
+  %nop1526 = alloca i1, i1 0
+  %nop1527 = alloca i1, i1 0
+  %nop1528 = alloca i1, i1 0
+  %nop1529 = alloca i1, i1 0
+  %nop1530 = alloca i1, i1 0
+  %nop1531 = alloca i1, i1 0
+  %nop1532 = alloca i1, i1 0
+  %nop1533 = alloca i1, i1 0
+  %nop1534 = alloca i1, i1 0
+  %nop1535 = alloca i1, i1 0
+  %nop1536 = alloca i1, i1 0
+  %nop1537 = alloca i1, i1 0
+  %nop1538 = alloca i1, i1 0
+  %nop1539 = alloca i1, i1 0
+  %nop1540 = alloca i1, i1 0
+  %nop1541 = alloca i1, i1 0
+  %nop1542 = alloca i1, i1 0
+  %nop1543 = alloca i1, i1 0
+  %nop1544 = alloca i1, i1 0
+  %nop1545 = alloca i1, i1 0
+  %nop1546 = alloca i1, i1 0
+  %nop1547 = alloca i1, i1 0
+  %nop1548 = alloca i1, i1 0
+  %nop1549 = alloca i1, i1 0
+  %nop1550 = alloca i1, i1 0
+  %nop1551 = alloca i1, i1 0
+  %nop1552 = alloca i1, i1 0
+  %nop1553 = alloca i1, i1 0
+  %nop1554 = alloca i1, i1 0
+  %nop1555 = alloca i1, i1 0
+  %nop1556 = alloca i1, i1 0
+  %nop1557 = alloca i1, i1 0
+  %nop1558 = alloca i1, i1 0
+  %nop1559 = alloca i1, i1 0
+  %nop1560 = alloca i1, i1 0
+  %nop1561 = alloca i1, i1 0
+  %nop1562 = alloca i1, i1 0
+  %nop1563 = alloca i1, i1 0
+  %nop1564 = alloca i1, i1 0
+  %nop1565 = alloca i1, i1 0
+  %nop1566 = alloca i1, i1 0
+  %nop1567 = alloca i1, i1 0
+  %nop1568 = alloca i1, i1 0
+  %nop1569 = alloca i1, i1 0
+  %nop1570 = alloca i1, i1 0
+  %nop1571 = alloca i1, i1 0
+  %nop1572 = alloca i1, i1 0
+  %nop1573 = alloca i1, i1 0
+  %nop1574 = alloca i1, i1 0
+  %nop1575 = alloca i1, i1 0
+  %nop1576 = alloca i1, i1 0
+  %nop1577 = alloca i1, i1 0
+  %nop1578 = alloca i1, i1 0
+  %nop1579 = alloca i1, i1 0
+  %nop1580 = alloca i1, i1 0
+  %nop1581 = alloca i1, i1 0
+  %nop1582 = alloca i1, i1 0
+  %nop1583 = alloca i1, i1 0
+  %nop1584 = alloca i1, i1 0
+  %nop1585 = alloca i1, i1 0
+  %nop1586 = alloca i1, i1 0
+  %nop1587 = alloca i1, i1 0
+  %nop1588 = alloca i1, i1 0
+  %nop1589 = alloca i1, i1 0
+  %nop1590 = alloca i1, i1 0
+  %nop1591 = alloca i1, i1 0
+  %nop1592 = alloca i1, i1 0
+  %nop1593 = alloca i1, i1 0
+  %nop1594 = alloca i1, i1 0
+  %nop1595 = alloca i1, i1 0
+  %nop1596 = alloca i1, i1 0
+  %nop1597 = alloca i1, i1 0
+  %nop1598 = alloca i1, i1 0
+  %nop1599 = alloca i1, i1 0
+  %nop1600 = alloca i1, i1 0
+  %nop1601 = alloca i1, i1 0
+  %nop1602 = alloca i1, i1 0
+  %nop1603 = alloca i1, i1 0
+  %nop1604 = alloca i1, i1 0
+  %nop1605 = alloca i1, i1 0
+  %nop1606 = alloca i1, i1 0
+  %nop1607 = alloca i1, i1 0
+  %nop1608 = alloca i1, i1 0
+  %nop1609 = alloca i1, i1 0
+  %nop1610 = alloca i1, i1 0
+  %nop1611 = alloca i1, i1 0
+  %nop1612 = alloca i1, i1 0
+  %nop1613 = alloca i1, i1 0
+  %nop1614 = alloca i1, i1 0
+  %nop1615 = alloca i1, i1 0
+  %nop1616 = alloca i1, i1 0
+  %nop1617 = alloca i1, i1 0
+  %nop1618 = alloca i1, i1 0
+  %nop1619 = alloca i1, i1 0
+  %nop1620 = alloca i1, i1 0
+  %nop1621 = alloca i1, i1 0
+  %nop1622 = alloca i1, i1 0
+  %nop1623 = alloca i1, i1 0
+  %nop1624 = alloca i1, i1 0
+  %nop1625 = alloca i1, i1 0
+  %nop1626 = alloca i1, i1 0
+  %nop1627 = alloca i1, i1 0
+  %nop1628 = alloca i1, i1 0
+  %nop1629 = alloca i1, i1 0
+  %nop1630 = alloca i1, i1 0
+  %nop1631 = alloca i1, i1 0
+  %nop1632 = alloca i1, i1 0
+  %nop1633 = alloca i1, i1 0
+  %nop1634 = alloca i1, i1 0
+  %nop1635 = alloca i1, i1 0
+  %nop1636 = alloca i1, i1 0
+  %nop1637 = alloca i1, i1 0
+  %nop1638 = alloca i1, i1 0
+  %nop1639 = alloca i1, i1 0
+  %nop1640 = alloca i1, i1 0
+  %nop1641 = alloca i1, i1 0
+  %nop1642 = alloca i1, i1 0
+  %nop1643 = alloca i1, i1 0
+  %nop1644 = alloca i1, i1 0
+  %nop1645 = alloca i1, i1 0
+  %nop1646 = alloca i1, i1 0
+  %nop1647 = alloca i1, i1 0
+  %nop1648 = alloca i1, i1 0
+  %nop1649 = alloca i1, i1 0
+  %nop1650 = alloca i1, i1 0
+  %nop1651 = alloca i1, i1 0
+  %nop1652 = alloca i1, i1 0
+  %nop1653 = alloca i1, i1 0
+  %nop1654 = alloca i1, i1 0
+  %nop1655 = alloca i1, i1 0
+  %nop1656 = alloca i1, i1 0
+  %nop1657 = alloca i1, i1 0
+  %nop1658 = alloca i1, i1 0
+  %nop1659 = alloca i1, i1 0
+  %nop1660 = alloca i1, i1 0
+  %nop1661 = alloca i1, i1 0
+  %nop1662 = alloca i1, i1 0
+  %nop1663 = alloca i1, i1 0
+  %nop1664 = alloca i1, i1 0
+  %nop1665 = alloca i1, i1 0
+  %nop1666 = alloca i1, i1 0
+  %nop1667 = alloca i1, i1 0
+  %nop1668 = alloca i1, i1 0
+  %nop1669 = alloca i1, i1 0
+  %nop1670 = alloca i1, i1 0
+  %nop1671 = alloca i1, i1 0
+  %nop1672 = alloca i1, i1 0
+  %nop1673 = alloca i1, i1 0
+  %nop1674 = alloca i1, i1 0
+  %nop1675 = alloca i1, i1 0
+  %nop1676 = alloca i1, i1 0
+  %nop1677 = alloca i1, i1 0
+  %nop1678 = alloca i1, i1 0
+  %nop1679 = alloca i1, i1 0
+  %nop1680 = alloca i1, i1 0
+  %nop1681 = alloca i1, i1 0
+  %nop1682 = alloca i1, i1 0
+  %nop1683 = alloca i1, i1 0
+  %nop1684 = alloca i1, i1 0
+  %nop1685 = alloca i1, i1 0
+  %nop1686 = alloca i1, i1 0
+  %nop1687 = alloca i1, i1 0
+  %nop1688 = alloca i1, i1 0
+  %nop1689 = alloca i1, i1 0
+  %nop1690 = alloca i1, i1 0
+  %nop1691 = alloca i1, i1 0
+  %nop1692 = alloca i1, i1 0
+  %nop1693 = alloca i1, i1 0
+  %nop1694 = alloca i1, i1 0
+  %nop1695 = alloca i1, i1 0
+  %nop1696 = alloca i1, i1 0
+  %nop1697 = alloca i1, i1 0
+  %nop1698 = alloca i1, i1 0
+  %nop1699 = alloca i1, i1 0
+  %nop1700 = alloca i1, i1 0
+  %nop1701 = alloca i1, i1 0
+  %nop1702 = alloca i1, i1 0
+  %nop1703 = alloca i1, i1 0
+  %nop1704 = alloca i1, i1 0
+  %nop1705 = alloca i1, i1 0
+  %nop1706 = alloca i1, i1 0
+  %nop1707 = alloca i1, i1 0
+  %nop1708 = alloca i1, i1 0
+  %nop1709 = alloca i1, i1 0
+  %nop1710 = alloca i1, i1 0
+  %nop1711 = alloca i1, i1 0
+  %nop1712 = alloca i1, i1 0
+  %nop1713 = alloca i1, i1 0
+  %nop1714 = alloca i1, i1 0
+  %nop1715 = alloca i1, i1 0
+  %nop1716 = alloca i1, i1 0
+  %nop1717 = alloca i1, i1 0
+  %nop1718 = alloca i1, i1 0
+  %nop1719 = alloca i1, i1 0
+  %nop1720 = alloca i1, i1 0
+  %nop1721 = alloca i1, i1 0
+  %nop1722 = alloca i1, i1 0
+  %nop1723 = alloca i1, i1 0
+  %nop1724 = alloca i1, i1 0
+  %nop1725 = alloca i1, i1 0
+  %nop1726 = alloca i1, i1 0
+  %nop1727 = alloca i1, i1 0
+  %nop1728 = alloca i1, i1 0
+  %nop1729 = alloca i1, i1 0
+  %nop1730 = alloca i1, i1 0
+  %nop1731 = alloca i1, i1 0
+  %nop1732 = alloca i1, i1 0
+  %nop1733 = alloca i1, i1 0
+  %nop1734 = alloca i1, i1 0
+  %nop1735 = alloca i1, i1 0
+  %nop1736 = alloca i1, i1 0
+  %nop1737 = alloca i1, i1 0
+  %nop1738 = alloca i1, i1 0
+  %nop1739 = alloca i1, i1 0
+  %nop1740 = alloca i1, i1 0
+  %nop1741 = alloca i1, i1 0
+  %nop1742 = alloca i1, i1 0
+  %nop1743 = alloca i1, i1 0
+  %nop1744 = alloca i1, i1 0
+  %nop1745 = alloca i1, i1 0
+  %nop1746 = alloca i1, i1 0
+  %nop1747 = alloca i1, i1 0
+  %nop1748 = alloca i1, i1 0
+  %nop1749 = alloca i1, i1 0
+  %nop1750 = alloca i1, i1 0
+  %nop1751 = alloca i1, i1 0
+  %nop1752 = alloca i1, i1 0
+  %nop1753 = alloca i1, i1 0
+  %nop1754 = alloca i1, i1 0
+  %nop1755 = alloca i1, i1 0
+  %nop1756 = alloca i1, i1 0
+  %nop1757 = alloca i1, i1 0
+  %nop1758 = alloca i1, i1 0
+  %nop1759 = alloca i1, i1 0
+  %nop1760 = alloca i1, i1 0
+  %nop1761 = alloca i1, i1 0
+  %nop1762 = alloca i1, i1 0
+  %nop1763 = alloca i1, i1 0
+  %nop1764 = alloca i1, i1 0
+  %nop1765 = alloca i1, i1 0
+  %nop1766 = alloca i1, i1 0
+  %nop1767 = alloca i1, i1 0
+  %nop1768 = alloca i1, i1 0
+  %nop1769 = alloca i1, i1 0
+  %nop1770 = alloca i1, i1 0
+  %nop1771 = alloca i1, i1 0
+  %nop1772 = alloca i1, i1 0
+  %nop1773 = alloca i1, i1 0
+  %nop1774 = alloca i1, i1 0
+  %nop1775 = alloca i1, i1 0
+  %nop1776 = alloca i1, i1 0
+  %nop1777 = alloca i1, i1 0
+  %nop1778 = alloca i1, i1 0
+  %nop1779 = alloca i1, i1 0
+  %nop1780 = alloca i1, i1 0
+  %nop1781 = alloca i1, i1 0
+  %nop1782 = alloca i1, i1 0
+  %nop1783 = alloca i1, i1 0
+  %nop1784 = alloca i1, i1 0
+  %nop1785 = alloca i1, i1 0
+  %nop1786 = alloca i1, i1 0
+  %nop1787 = alloca i1, i1 0
+  %nop1788 = alloca i1, i1 0
+  %nop1789 = alloca i1, i1 0
+  %nop1790 = alloca i1, i1 0
+  %nop1791 = alloca i1, i1 0
+  %nop1792 = alloca i1, i1 0
+  %nop1793 = alloca i1, i1 0
+  %nop1794 = alloca i1, i1 0
+  %nop1795 = alloca i1, i1 0
+  %nop1796 = alloca i1, i1 0
+  %nop1797 = alloca i1, i1 0
+  %nop1798 = alloca i1, i1 0
+  %nop1799 = alloca i1, i1 0
+  %nop1800 = alloca i1, i1 0
+  %nop1801 = alloca i1, i1 0
+  %nop1802 = alloca i1, i1 0
+  %nop1803 = alloca i1, i1 0
+  %nop1804 = alloca i1, i1 0
+  %nop1805 = alloca i1, i1 0
+  %nop1806 = alloca i1, i1 0
+  %nop1807 = alloca i1, i1 0
+  %nop1808 = alloca i1, i1 0
+  %nop1809 = alloca i1, i1 0
+  %nop1810 = alloca i1, i1 0
+  %nop1811 = alloca i1, i1 0
+  %nop1812 = alloca i1, i1 0
+  %nop1813 = alloca i1, i1 0
+  %nop1814 = alloca i1, i1 0
+  %nop1815 = alloca i1, i1 0
+  %nop1816 = alloca i1, i1 0
+  %nop1817 = alloca i1, i1 0
+  %nop1818 = alloca i1, i1 0
+  %nop1819 = alloca i1, i1 0
+  %nop1820 = alloca i1, i1 0
+  %nop1821 = alloca i1, i1 0
+  %nop1822 = alloca i1, i1 0
+  %nop1823 = alloca i1, i1 0
+  %nop1824 = alloca i1, i1 0
+  %nop1825 = alloca i1, i1 0
+  %nop1826 = alloca i1, i1 0
+  %nop1827 = alloca i1, i1 0
+  %nop1828 = alloca i1, i1 0
+  %nop1829 = alloca i1, i1 0
+  %nop1830 = alloca i1, i1 0
+  %nop1831 = alloca i1, i1 0
+  %nop1832 = alloca i1, i1 0
+  %nop1833 = alloca i1, i1 0
+  %nop1834 = alloca i1, i1 0
+  %nop1835 = alloca i1, i1 0
+  %nop1836 = alloca i1, i1 0
+  %nop1837 = alloca i1, i1 0
+  %nop1838 = alloca i1, i1 0
+  %nop1839 = alloca i1, i1 0
+  %nop1840 = alloca i1, i1 0
+  %nop1841 = alloca i1, i1 0
+  %nop1842 = alloca i1, i1 0
+  %nop1843 = alloca i1, i1 0
+  %nop1844 = alloca i1, i1 0
+  %nop1845 = alloca i1, i1 0
+  %nop1846 = alloca i1, i1 0
+  %nop1847 = alloca i1, i1 0
+  %nop1848 = alloca i1, i1 0
+  %nop1849 = alloca i1, i1 0
+  %nop1850 = alloca i1, i1 0
+  %nop1851 = alloca i1, i1 0
+  %nop1852 = alloca i1, i1 0
+  %nop1853 = alloca i1, i1 0
+  %nop1854 = alloca i1, i1 0
+  %nop1855 = alloca i1, i1 0
+  %nop1856 = alloca i1, i1 0
+  %nop1857 = alloca i1, i1 0
+  %nop1858 = alloca i1, i1 0
+  %nop1859 = alloca i1, i1 0
+  %nop1860 = alloca i1, i1 0
+  %nop1861 = alloca i1, i1 0
+  %nop1862 = alloca i1, i1 0
+  %nop1863 = alloca i1, i1 0
+  %nop1864 = alloca i1, i1 0
+  %nop1865 = alloca i1, i1 0
+  %nop1866 = alloca i1, i1 0
+  %nop1867 = alloca i1, i1 0
+  %nop1868 = alloca i1, i1 0
+  %nop1869 = alloca i1, i1 0
+  %nop1870 = alloca i1, i1 0
+  %nop1871 = alloca i1, i1 0
+  %nop1872 = alloca i1, i1 0
+  %nop1873 = alloca i1, i1 0
+  %nop1874 = alloca i1, i1 0
+  %nop1875 = alloca i1, i1 0
+  %nop1876 = alloca i1, i1 0
+  %nop1877 = alloca i1, i1 0
+  %nop1878 = alloca i1, i1 0
+  %nop1879 = alloca i1, i1 0
+  %nop1880 = alloca i1, i1 0
+  %nop1881 = alloca i1, i1 0
+  %nop1882 = alloca i1, i1 0
+  %nop1883 = alloca i1, i1 0
+  %nop1884 = alloca i1, i1 0
+  %nop1885 = alloca i1, i1 0
+  %nop1886 = alloca i1, i1 0
+  %nop1887 = alloca i1, i1 0
+  %nop1888 = alloca i1, i1 0
+  %nop1889 = alloca i1, i1 0
+  %nop1890 = alloca i1, i1 0
+  %nop1891 = alloca i1, i1 0
+  %nop1892 = alloca i1, i1 0
+  %nop1893 = alloca i1, i1 0
+  %nop1894 = alloca i1, i1 0
+  %nop1895 = alloca i1, i1 0
+  %nop1896 = alloca i1, i1 0
+  %nop1897 = alloca i1, i1 0
+  %nop1898 = alloca i1, i1 0
+  %nop1899 = alloca i1, i1 0
+  %nop1900 = alloca i1, i1 0
+  %nop1901 = alloca i1, i1 0
+  %nop1902 = alloca i1, i1 0
+  %nop1903 = alloca i1, i1 0
+  %nop1904 = alloca i1, i1 0
+  %nop1905 = alloca i1, i1 0
+  %nop1906 = alloca i1, i1 0
+  %nop1907 = alloca i1, i1 0
+  %nop1908 = alloca i1, i1 0
+  %nop1909 = alloca i1, i1 0
+  %nop1910 = alloca i1, i1 0
+  %nop1911 = alloca i1, i1 0
+  %nop1912 = alloca i1, i1 0
+  %nop1913 = alloca i1, i1 0
+  %nop1914 = alloca i1, i1 0
+  %nop1915 = alloca i1, i1 0
+  %nop1916 = alloca i1, i1 0
+  %nop1917 = alloca i1, i1 0
+  %nop1918 = alloca i1, i1 0
+  %nop1919 = alloca i1, i1 0
+  %nop1920 = alloca i1, i1 0
+  %nop1921 = alloca i1, i1 0
+  %nop1922 = alloca i1, i1 0
+  %nop1923 = alloca i1, i1 0
+  %nop1924 = alloca i1, i1 0
+  %nop1925 = alloca i1, i1 0
+  %nop1926 = alloca i1, i1 0
+  %nop1927 = alloca i1, i1 0
+  %nop1928 = alloca i1, i1 0
+  %nop1929 = alloca i1, i1 0
+  %nop1930 = alloca i1, i1 0
+  %nop1931 = alloca i1, i1 0
+  %nop1932 = alloca i1, i1 0
+  %nop1933 = alloca i1, i1 0
+  %nop1934 = alloca i1, i1 0
+  %nop1935 = alloca i1, i1 0
+  %nop1936 = alloca i1, i1 0
+  %nop1937 = alloca i1, i1 0
+  %nop1938 = alloca i1, i1 0
+  %nop1939 = alloca i1, i1 0
+  %nop1940 = alloca i1, i1 0
+  %nop1941 = alloca i1, i1 0
+  %nop1942 = alloca i1, i1 0
+  %nop1943 = alloca i1, i1 0
+  %nop1944 = alloca i1, i1 0
+  %nop1945 = alloca i1, i1 0
+  %nop1946 = alloca i1, i1 0
+  %nop1947 = alloca i1, i1 0
+  %nop1948 = alloca i1, i1 0
+  %nop1949 = alloca i1, i1 0
+  %nop1950 = alloca i1, i1 0
+  %nop1951 = alloca i1, i1 0
+  %nop1952 = alloca i1, i1 0
+  %nop1953 = alloca i1, i1 0
+  %nop1954 = alloca i1, i1 0
+  %nop1955 = alloca i1, i1 0
+  %nop1956 = alloca i1, i1 0
+  %nop1957 = alloca i1, i1 0
+  %nop1958 = alloca i1, i1 0
+  %nop1959 = alloca i1, i1 0
+  %nop1960 = alloca i1, i1 0
+  %nop1961 = alloca i1, i1 0
+  %nop1962 = alloca i1, i1 0
+  %nop1963 = alloca i1, i1 0
+  %nop1964 = alloca i1, i1 0
+  %nop1965 = alloca i1, i1 0
+  %nop1966 = alloca i1, i1 0
+  %nop1967 = alloca i1, i1 0
+  %nop1968 = alloca i1, i1 0
+  %nop1969 = alloca i1, i1 0
+  %nop1970 = alloca i1, i1 0
+  %nop1971 = alloca i1, i1 0
+  %nop1972 = alloca i1, i1 0
+  %nop1973 = alloca i1, i1 0
+  %nop1974 = alloca i1, i1 0
+  %nop1975 = alloca i1, i1 0
+  %nop1976 = alloca i1, i1 0
+  %nop1977 = alloca i1, i1 0
+  %nop1978 = alloca i1, i1 0
+  %nop1979 = alloca i1, i1 0
+  %nop1980 = alloca i1, i1 0
+  %nop1981 = alloca i1, i1 0
+  %nop1982 = alloca i1, i1 0
+  %nop1983 = alloca i1, i1 0
+  %nop1984 = alloca i1, i1 0
+  %nop1985 = alloca i1, i1 0
+  %nop1986 = alloca i1, i1 0
+  %nop1987 = alloca i1, i1 0
+  %nop1988 = alloca i1, i1 0
+  %nop1989 = alloca i1, i1 0
+  %nop1990 = alloca i1, i1 0
+  %nop1991 = alloca i1, i1 0
+  %nop1992 = alloca i1, i1 0
+  %nop1993 = alloca i1, i1 0
+  %nop1994 = alloca i1, i1 0
+  %nop1995 = alloca i1, i1 0
+  %nop1996 = alloca i1, i1 0
+  %nop1997 = alloca i1, i1 0
+  %nop1998 = alloca i1, i1 0
+  %nop1999 = alloca i1, i1 0
+  %nop2000 = alloca i1, i1 0
+  %nop2001 = alloca i1, i1 0
+  %nop2002 = alloca i1, i1 0
+  %nop2003 = alloca i1, i1 0
+  %nop2004 = alloca i1, i1 0
+  %nop2005 = alloca i1, i1 0
+  %nop2006 = alloca i1, i1 0
+  %nop2007 = alloca i1, i1 0
+  %nop2008 = alloca i1, i1 0
+  %nop2009 = alloca i1, i1 0
+  %nop2010 = alloca i1, i1 0
+  %nop2011 = alloca i1, i1 0
+  %nop2012 = alloca i1, i1 0
+  %nop2013 = alloca i1, i1 0
+  %nop2014 = alloca i1, i1 0
+  %nop2015 = alloca i1, i1 0
+  %nop2016 = alloca i1, i1 0
+  %nop2017 = alloca i1, i1 0
+  %nop2018 = alloca i1, i1 0
+  %nop2019 = alloca i1, i1 0
+  %nop2020 = alloca i1, i1 0
+  %nop2021 = alloca i1, i1 0
+  %nop2022 = alloca i1, i1 0
+  %nop2023 = alloca i1, i1 0
+  %nop2024 = alloca i1, i1 0
+  %nop2025 = alloca i1, i1 0
+  %nop2026 = alloca i1, i1 0
+  %nop2027 = alloca i1, i1 0
+  %nop2028 = alloca i1, i1 0
+  %nop2029 = alloca i1, i1 0
+  %nop2030 = alloca i1, i1 0
+  %nop2031 = alloca i1, i1 0
+  %nop2032 = alloca i1, i1 0
+  %nop2033 = alloca i1, i1 0
+  %nop2034 = alloca i1, i1 0
+  %nop2035 = alloca i1, i1 0
+  %nop2036 = alloca i1, i1 0
+  %nop2037 = alloca i1, i1 0
+  %nop2038 = alloca i1, i1 0
+  %nop2039 = alloca i1, i1 0
+  %nop2040 = alloca i1, i1 0
+  %nop2041 = alloca i1, i1 0
+  %nop2042 = alloca i1, i1 0
+  %nop2043 = alloca i1, i1 0
+  %nop2044 = alloca i1, i1 0
+  %nop2045 = alloca i1, i1 0
+  %nop2046 = alloca i1, i1 0
+  %nop2047 = alloca i1, i1 0
+  %nop2048 = alloca i1, i1 0
+  %nop2049 = alloca i1, i1 0
+  %nop2050 = alloca i1, i1 0
+  %nop2051 = alloca i1, i1 0
+  %nop2052 = alloca i1, i1 0
+  %nop2053 = alloca i1, i1 0
+  %nop2054 = alloca i1, i1 0
+  %nop2055 = alloca i1, i1 0
+  %nop2056 = alloca i1, i1 0
+  %nop2057 = alloca i1, i1 0
+  %nop2058 = alloca i1, i1 0
+  %nop2059 = alloca i1, i1 0
+  %nop2060 = alloca i1, i1 0
+  %nop2061 = alloca i1, i1 0
+  %nop2062 = alloca i1, i1 0
+  %nop2063 = alloca i1, i1 0
+  %nop2064 = alloca i1, i1 0
+  %nop2065 = alloca i1, i1 0
+  %nop2066 = alloca i1, i1 0
+  %nop2067 = alloca i1, i1 0
+  %nop2068 = alloca i1, i1 0
+  %nop2069 = alloca i1, i1 0
+  %nop2070 = alloca i1, i1 0
+  %nop2071 = alloca i1, i1 0
+  %nop2072 = alloca i1, i1 0
+  %nop2073 = alloca i1, i1 0
+  %nop2074 = alloca i1, i1 0
+  %nop2075 = alloca i1, i1 0
+  %nop2076 = alloca i1, i1 0
+  %nop2077 = alloca i1, i1 0
+  %nop2078 = alloca i1, i1 0
+  %nop2079 = alloca i1, i1 0
+  %nop2080 = alloca i1, i1 0
+  %nop2081 = alloca i1, i1 0
+  %nop2082 = alloca i1, i1 0
+  %nop2083 = alloca i1, i1 0
+  %nop2084 = alloca i1, i1 0
+  %nop2085 = alloca i1, i1 0
+  %nop2086 = alloca i1, i1 0
+  %nop2087 = alloca i1, i1 0
+  %nop2088 = alloca i1, i1 0
+  %nop2089 = alloca i1, i1 0
+  %nop2090 = alloca i1, i1 0
+  %nop2091 = alloca i1, i1 0
+  %nop2092 = alloca i1, i1 0
+  %nop2093 = alloca i1, i1 0
+  %nop2094 = alloca i1, i1 0
+  %nop2095 = alloca i1, i1 0
+  %nop2096 = alloca i1, i1 0
+  %nop2097 = alloca i1, i1 0
+  %nop2098 = alloca i1, i1 0
+  %nop2099 = alloca i1, i1 0
+  %nop2100 = alloca i1, i1 0
+  %nop2101 = alloca i1, i1 0
+  %nop2102 = alloca i1, i1 0
+  %nop2103 = alloca i1, i1 0
+  %nop2104 = alloca i1, i1 0
+  %nop2105 = alloca i1, i1 0
+  %nop2106 = alloca i1, i1 0
+  %nop2107 = alloca i1, i1 0
+  %nop2108 = alloca i1, i1 0
+  %nop2109 = alloca i1, i1 0
+  %nop2110 = alloca i1, i1 0
+  %nop2111 = alloca i1, i1 0
+  %nop2112 = alloca i1, i1 0
+  %nop2113 = alloca i1, i1 0
+  %nop2114 = alloca i1, i1 0
+  %nop2115 = alloca i1, i1 0
+  %nop2116 = alloca i1, i1 0
+  %nop2117 = alloca i1, i1 0
+  %nop2118 = alloca i1, i1 0
+  %nop2119 = alloca i1, i1 0
+  %nop2120 = alloca i1, i1 0
+  %nop2121 = alloca i1, i1 0
+  %nop2122 = alloca i1, i1 0
+  %nop2123 = alloca i1, i1 0
+  %nop2124 = alloca i1, i1 0
+  %nop2125 = alloca i1, i1 0
+  %nop2126 = alloca i1, i1 0
+  %nop2127 = alloca i1, i1 0
+  %nop2128 = alloca i1, i1 0
+  %nop2129 = alloca i1, i1 0
+  %nop2130 = alloca i1, i1 0
+  %nop2131 = alloca i1, i1 0
+  %nop2132 = alloca i1, i1 0
+  %nop2133 = alloca i1, i1 0
+  %nop2134 = alloca i1, i1 0
+  %nop2135 = alloca i1, i1 0
+  %nop2136 = alloca i1, i1 0
+  %nop2137 = alloca i1, i1 0
+  %nop2138 = alloca i1, i1 0
+  %nop2139 = alloca i1, i1 0
+  %nop2140 = alloca i1, i1 0
+  %nop2141 = alloca i1, i1 0
+  %nop2142 = alloca i1, i1 0
+  %nop2143 = alloca i1, i1 0
+  %nop2144 = alloca i1, i1 0
+  %nop2145 = alloca i1, i1 0
+  %nop2146 = alloca i1, i1 0
+  %nop2147 = alloca i1, i1 0
+  %nop2148 = alloca i1, i1 0
+  %nop2149 = alloca i1, i1 0
+  %nop2150 = alloca i1, i1 0
+  %nop2151 = alloca i1, i1 0
+  %nop2152 = alloca i1, i1 0
+  %nop2153 = alloca i1, i1 0
+  %nop2154 = alloca i1, i1 0
+  %nop2155 = alloca i1, i1 0
+  %nop2156 = alloca i1, i1 0
+  %nop2157 = alloca i1, i1 0
+  %nop2158 = alloca i1, i1 0
+  %nop2159 = alloca i1, i1 0
+  %nop2160 = alloca i1, i1 0
+  %nop2161 = alloca i1, i1 0
+  %nop2162 = alloca i1, i1 0
+  %nop2163 = alloca i1, i1 0
+  %nop2164 = alloca i1, i1 0
+  %nop2165 = alloca i1, i1 0
+  %nop2166 = alloca i1, i1 0
+  %nop2167 = alloca i1, i1 0
+  %nop2168 = alloca i1, i1 0
+  %nop2169 = alloca i1, i1 0
+  %nop2170 = alloca i1, i1 0
+  %nop2171 = alloca i1, i1 0
+  %nop2172 = alloca i1, i1 0
+  %nop2173 = alloca i1, i1 0
+  %nop2174 = alloca i1, i1 0
+  %nop2175 = alloca i1, i1 0
+  %nop2176 = alloca i1, i1 0
+  %nop2177 = alloca i1, i1 0
+  %nop2178 = alloca i1, i1 0
+  %nop2179 = alloca i1, i1 0
+  %nop2180 = alloca i1, i1 0
+  %nop2181 = alloca i1, i1 0
+  %nop2182 = alloca i1, i1 0
+  %nop2183 = alloca i1, i1 0
+  %nop2184 = alloca i1, i1 0
+  %nop2185 = alloca i1, i1 0
+  %nop2186 = alloca i1, i1 0
+  %nop2187 = alloca i1, i1 0
+  %nop2188 = alloca i1, i1 0
+  %nop2189 = alloca i1, i1 0
+  %nop2190 = alloca i1, i1 0
+  %nop2191 = alloca i1, i1 0
+  %nop2192 = alloca i1, i1 0
+  %nop2193 = alloca i1, i1 0
+  %nop2194 = alloca i1, i1 0
+  %nop2195 = alloca i1, i1 0
+  %nop2196 = alloca i1, i1 0
+  %nop2197 = alloca i1, i1 0
+  %nop2198 = alloca i1, i1 0
+  %nop2199 = alloca i1, i1 0
+  %nop2200 = alloca i1, i1 0
+  %nop2201 = alloca i1, i1 0
+  %nop2202 = alloca i1, i1 0
+  %nop2203 = alloca i1, i1 0
+  %nop2204 = alloca i1, i1 0
+  %nop2205 = alloca i1, i1 0
+  %nop2206 = alloca i1, i1 0
+  %nop2207 = alloca i1, i1 0
+  %nop2208 = alloca i1, i1 0
+  %nop2209 = alloca i1, i1 0
+  %nop2210 = alloca i1, i1 0
+  %nop2211 = alloca i1, i1 0
+  %nop2212 = alloca i1, i1 0
+  %nop2213 = alloca i1, i1 0
+  %nop2214 = alloca i1, i1 0
+  %nop2215 = alloca i1, i1 0
+  %nop2216 = alloca i1, i1 0
+  %nop2217 = alloca i1, i1 0
+  %nop2218 = alloca i1, i1 0
+  %nop2219 = alloca i1, i1 0
+  %nop2220 = alloca i1, i1 0
+  %nop2221 = alloca i1, i1 0
+  %nop2222 = alloca i1, i1 0
+  %nop2223 = alloca i1, i1 0
+  %nop2224 = alloca i1, i1 0
+  %nop2225 = alloca i1, i1 0
+  %nop2226 = alloca i1, i1 0
+  %nop2227 = alloca i1, i1 0
+  %nop2228 = alloca i1, i1 0
+  %nop2229 = alloca i1, i1 0
+  %nop2230 = alloca i1, i1 0
+  %nop2231 = alloca i1, i1 0
+  %nop2232 = alloca i1, i1 0
+  %nop2233 = alloca i1, i1 0
+  %nop2234 = alloca i1, i1 0
+  %nop2235 = alloca i1, i1 0
+  %nop2236 = alloca i1, i1 0
+  %nop2237 = alloca i1, i1 0
+  %nop2238 = alloca i1, i1 0
+  %nop2239 = alloca i1, i1 0
+  %nop2240 = alloca i1, i1 0
+  %nop2241 = alloca i1, i1 0
+  %nop2242 = alloca i1, i1 0
+  %nop2243 = alloca i1, i1 0
+  %nop2244 = alloca i1, i1 0
+  %nop2245 = alloca i1, i1 0
+  %nop2246 = alloca i1, i1 0
+  %nop2247 = alloca i1, i1 0
+  %nop2248 = alloca i1, i1 0
+  %nop2249 = alloca i1, i1 0
+  %nop2250 = alloca i1, i1 0
+  %nop2251 = alloca i1, i1 0
+  %nop2252 = alloca i1, i1 0
+  %nop2253 = alloca i1, i1 0
+  %nop2254 = alloca i1, i1 0
+  %nop2255 = alloca i1, i1 0
+  %nop2256 = alloca i1, i1 0
+  %nop2257 = alloca i1, i1 0
+  %nop2258 = alloca i1, i1 0
+  %nop2259 = alloca i1, i1 0
+  %nop2260 = alloca i1, i1 0
+  %nop2261 = alloca i1, i1 0
+  %nop2262 = alloca i1, i1 0
+  %nop2263 = alloca i1, i1 0
+  %nop2264 = alloca i1, i1 0
+  %nop2265 = alloca i1, i1 0
+  %nop2266 = alloca i1, i1 0
+  %nop2267 = alloca i1, i1 0
+  %nop2268 = alloca i1, i1 0
+  %nop2269 = alloca i1, i1 0
+  %nop2270 = alloca i1, i1 0
+  %nop2271 = alloca i1, i1 0
+  %nop2272 = alloca i1, i1 0
+  %nop2273 = alloca i1, i1 0
+  %nop2274 = alloca i1, i1 0
+  %nop2275 = alloca i1, i1 0
+  %nop2276 = alloca i1, i1 0
+  %nop2277 = alloca i1, i1 0
+  %nop2278 = alloca i1, i1 0
+  %nop2279 = alloca i1, i1 0
+  %nop2280 = alloca i1, i1 0
+  %nop2281 = alloca i1, i1 0
+  %nop2282 = alloca i1, i1 0
+  %nop2283 = alloca i1, i1 0
+  %nop2284 = alloca i1, i1 0
+  %nop2285 = alloca i1, i1 0
+  %nop2286 = alloca i1, i1 0
+  %nop2287 = alloca i1, i1 0
+  %nop2288 = alloca i1, i1 0
+  %nop2289 = alloca i1, i1 0
+  %nop2290 = alloca i1, i1 0
+  %nop2291 = alloca i1, i1 0
+  %nop2292 = alloca i1, i1 0
+  %nop2293 = alloca i1, i1 0
+  %nop2294 = alloca i1, i1 0
+  %nop2295 = alloca i1, i1 0
+  %nop2296 = alloca i1, i1 0
+  %nop2297 = alloca i1, i1 0
+  %nop2298 = alloca i1, i1 0
+  %nop2299 = alloca i1, i1 0
+  %nop2300 = alloca i1, i1 0
+  %nop2301 = alloca i1, i1 0
+  %nop2302 = alloca i1, i1 0
+  %nop2303 = alloca i1, i1 0
+  %nop2304 = alloca i1, i1 0
+  %nop2305 = alloca i1, i1 0
+  %nop2306 = alloca i1, i1 0
+  %nop2307 = alloca i1, i1 0
+  %nop2308 = alloca i1, i1 0
+  %nop2309 = alloca i1, i1 0
+  %nop2310 = alloca i1, i1 0
+  %nop2311 = alloca i1, i1 0
+  %nop2312 = alloca i1, i1 0
+  %nop2313 = alloca i1, i1 0
+  %nop2314 = alloca i1, i1 0
+  %nop2315 = alloca i1, i1 0
+  %nop2316 = alloca i1, i1 0
+  %nop2317 = alloca i1, i1 0
+  %nop2318 = alloca i1, i1 0
+  %nop2319 = alloca i1, i1 0
+  %nop2320 = alloca i1, i1 0
+  %nop2321 = alloca i1, i1 0
+  %nop2322 = alloca i1, i1 0
+  %nop2323 = alloca i1, i1 0
+  %nop2324 = alloca i1, i1 0
+  %nop2325 = alloca i1, i1 0
+  %nop2326 = alloca i1, i1 0
+  %nop2327 = alloca i1, i1 0
+  %nop2328 = alloca i1, i1 0
+  %nop2329 = alloca i1, i1 0
+  %nop2330 = alloca i1, i1 0
+  %nop2331 = alloca i1, i1 0
+  %nop2332 = alloca i1, i1 0
+  %nop2333 = alloca i1, i1 0
+  %nop2334 = alloca i1, i1 0
+  %nop2335 = alloca i1, i1 0
+  %nop2336 = alloca i1, i1 0
+  %nop2337 = alloca i1, i1 0
+  %nop2338 = alloca i1, i1 0
+  %nop2339 = alloca i1, i1 0
+  %nop2340 = alloca i1, i1 0
+  %nop2341 = alloca i1, i1 0
+  %nop2342 = alloca i1, i1 0
+  %nop2343 = alloca i1, i1 0
+  %nop2344 = alloca i1, i1 0
+  %nop2345 = alloca i1, i1 0
+  %nop2346 = alloca i1, i1 0
+  %nop2347 = alloca i1, i1 0
+  %nop2348 = alloca i1, i1 0
+  %nop2349 = alloca i1, i1 0
+  %nop2350 = alloca i1, i1 0
+  %nop2351 = alloca i1, i1 0
+  %nop2352 = alloca i1, i1 0
+  %nop2353 = alloca i1, i1 0
+  %nop2354 = alloca i1, i1 0
+  %nop2355 = alloca i1, i1 0
+  %nop2356 = alloca i1, i1 0
+  %nop2357 = alloca i1, i1 0
+  %nop2358 = alloca i1, i1 0
+  %nop2359 = alloca i1, i1 0
+  %nop2360 = alloca i1, i1 0
+  %nop2361 = alloca i1, i1 0
+  %nop2362 = alloca i1, i1 0
+  %nop2363 = alloca i1, i1 0
+  %nop2364 = alloca i1, i1 0
+  %nop2365 = alloca i1, i1 0
+  %nop2366 = alloca i1, i1 0
+  %nop2367 = alloca i1, i1 0
+  %nop2368 = alloca i1, i1 0
+  %nop2369 = alloca i1, i1 0
+  %nop2370 = alloca i1, i1 0
+  %nop2371 = alloca i1, i1 0
+  %nop2372 = alloca i1, i1 0
+  %nop2373 = alloca i1, i1 0
+  %nop2374 = alloca i1, i1 0
+  %nop2375 = alloca i1, i1 0
+  %nop2376 = alloca i1, i1 0
+  %nop2377 = alloca i1, i1 0
+  %nop2378 = alloca i1, i1 0
+  %nop2379 = alloca i1, i1 0
+  %nop2380 = alloca i1, i1 0
+  %nop2381 = alloca i1, i1 0
+  %nop2382 = alloca i1, i1 0
+  %nop2383 = alloca i1, i1 0
+  %nop2384 = alloca i1, i1 0
+  %nop2385 = alloca i1, i1 0
+  %nop2386 = alloca i1, i1 0
+  %nop2387 = alloca i1, i1 0
+  %nop2388 = alloca i1, i1 0
+  %nop2389 = alloca i1, i1 0
+  %nop2390 = alloca i1, i1 0
+  %nop2391 = alloca i1, i1 0
+  %nop2392 = alloca i1, i1 0
+  %nop2393 = alloca i1, i1 0
+  %nop2394 = alloca i1, i1 0
+  %nop2395 = alloca i1, i1 0
+  %nop2396 = alloca i1, i1 0
+  %nop2397 = alloca i1, i1 0
+  %nop2398 = alloca i1, i1 0
+  %nop2399 = alloca i1, i1 0
+  %nop2400 = alloca i1, i1 0
+  %nop2401 = alloca i1, i1 0
+  %nop2402 = alloca i1, i1 0
+  %nop2403 = alloca i1, i1 0
+  %nop2404 = alloca i1, i1 0
+  %nop2405 = alloca i1, i1 0
+  %nop2406 = alloca i1, i1 0
+  %nop2407 = alloca i1, i1 0
+  %nop2408 = alloca i1, i1 0
+  %nop2409 = alloca i1, i1 0
+  %nop2410 = alloca i1, i1 0
+  %nop2411 = alloca i1, i1 0
+  %nop2412 = alloca i1, i1 0
+  %nop2413 = alloca i1, i1 0
+  %nop2414 = alloca i1, i1 0
+  %nop2415 = alloca i1, i1 0
+  %nop2416 = alloca i1, i1 0
+  %nop2417 = alloca i1, i1 0
+  %nop2418 = alloca i1, i1 0
+  %nop2419 = alloca i1, i1 0
+  %nop2420 = alloca i1, i1 0
+  %nop2421 = alloca i1, i1 0
+  %nop2422 = alloca i1, i1 0
+  %nop2423 = alloca i1, i1 0
+  %nop2424 = alloca i1, i1 0
+  %nop2425 = alloca i1, i1 0
+  %nop2426 = alloca i1, i1 0
+  %nop2427 = alloca i1, i1 0
+  %nop2428 = alloca i1, i1 0
+  %nop2429 = alloca i1, i1 0
+  %nop2430 = alloca i1, i1 0
+  %nop2431 = alloca i1, i1 0
+  %nop2432 = alloca i1, i1 0
+  %nop2433 = alloca i1, i1 0
+  %nop2434 = alloca i1, i1 0
+  %nop2435 = alloca i1, i1 0
+  %nop2436 = alloca i1, i1 0
+  %nop2437 = alloca i1, i1 0
+  %nop2438 = alloca i1, i1 0
+  %nop2439 = alloca i1, i1 0
+  %nop2440 = alloca i1, i1 0
+  %nop2441 = alloca i1, i1 0
+  %nop2442 = alloca i1, i1 0
+  %nop2443 = alloca i1, i1 0
+  %nop2444 = alloca i1, i1 0
+  %nop2445 = alloca i1, i1 0
+  %nop2446 = alloca i1, i1 0
+  %nop2447 = alloca i1, i1 0
+  %nop2448 = alloca i1, i1 0
+  %nop2449 = alloca i1, i1 0
+  %nop2450 = alloca i1, i1 0
+  %nop2451 = alloca i1, i1 0
+  %nop2452 = alloca i1, i1 0
+  %nop2453 = alloca i1, i1 0
+  %nop2454 = alloca i1, i1 0
+  %nop2455 = alloca i1, i1 0
+  %nop2456 = alloca i1, i1 0
+  %nop2457 = alloca i1, i1 0
+  %nop2458 = alloca i1, i1 0
+  %nop2459 = alloca i1, i1 0
+  %nop2460 = alloca i1, i1 0
+  %nop2461 = alloca i1, i1 0
+  %nop2462 = alloca i1, i1 0
+  %nop2463 = alloca i1, i1 0
+  %nop2464 = alloca i1, i1 0
+  %nop2465 = alloca i1, i1 0
+  %nop2466 = alloca i1, i1 0
+  %nop2467 = alloca i1, i1 0
+  %nop2468 = alloca i1, i1 0
+  %nop2469 = alloca i1, i1 0
+  %nop2470 = alloca i1, i1 0
+  %nop2471 = alloca i1, i1 0
+  %nop2472 = alloca i1, i1 0
+  %nop2473 = alloca i1, i1 0
+  %nop2474 = alloca i1, i1 0
+  %nop2475 = alloca i1, i1 0
+  %nop2476 = alloca i1, i1 0
+  %nop2477 = alloca i1, i1 0
+  %nop2478 = alloca i1, i1 0
+  %nop2479 = alloca i1, i1 0
+  %nop2480 = alloca i1, i1 0
+  %nop2481 = alloca i1, i1 0
+  %nop2482 = alloca i1, i1 0
+  %nop2483 = alloca i1, i1 0
+  %nop2484 = alloca i1, i1 0
+  %nop2485 = alloca i1, i1 0
+  %nop2486 = alloca i1, i1 0
+  %nop2487 = alloca i1, i1 0
+  %nop2488 = alloca i1, i1 0
+  %nop2489 = alloca i1, i1 0
+  %nop2490 = alloca i1, i1 0
+  %nop2491 = alloca i1, i1 0
+  %nop2492 = alloca i1, i1 0
+  %nop2493 = alloca i1, i1 0
+  %nop2494 = alloca i1, i1 0
+  %nop2495 = alloca i1, i1 0
+  %nop2496 = alloca i1, i1 0
+  %nop2497 = alloca i1, i1 0
+  %nop2498 = alloca i1, i1 0
+  %nop2499 = alloca i1, i1 0
+  %nop2500 = alloca i1, i1 0
+  %nop2501 = alloca i1, i1 0
+  %nop2502 = alloca i1, i1 0
+  %nop2503 = alloca i1, i1 0
+  %nop2504 = alloca i1, i1 0
+  %nop2505 = alloca i1, i1 0
+  %nop2506 = alloca i1, i1 0
+  %nop2507 = alloca i1, i1 0
+  %nop2508 = alloca i1, i1 0
+  %nop2509 = alloca i1, i1 0
+  %nop2510 = alloca i1, i1 0
+  %nop2511 = alloca i1, i1 0
+  %nop2512 = alloca i1, i1 0
+  %nop2513 = alloca i1, i1 0
+  %nop2514 = alloca i1, i1 0
+  %nop2515 = alloca i1, i1 0
+  %nop2516 = alloca i1, i1 0
+  %nop2517 = alloca i1, i1 0
+  %nop2518 = alloca i1, i1 0
+  %nop2519 = alloca i1, i1 0
+  %nop2520 = alloca i1, i1 0
+  %nop2521 = alloca i1, i1 0
+  %nop2522 = alloca i1, i1 0
+  %nop2523 = alloca i1, i1 0
+  %nop2524 = alloca i1, i1 0
+  %nop2525 = alloca i1, i1 0
+  %nop2526 = alloca i1, i1 0
+  %nop2527 = alloca i1, i1 0
+  %nop2528 = alloca i1, i1 0
+  %nop2529 = alloca i1, i1 0
+  %nop2530 = alloca i1, i1 0
+  %nop2531 = alloca i1, i1 0
+  %nop2532 = alloca i1, i1 0
+  %nop2533 = alloca i1, i1 0
+  %nop2534 = alloca i1, i1 0
+  %nop2535 = alloca i1, i1 0
+  %nop2536 = alloca i1, i1 0
+  %nop2537 = alloca i1, i1 0
+  %nop2538 = alloca i1, i1 0
+  %nop2539 = alloca i1, i1 0
+  %nop2540 = alloca i1, i1 0
+  %nop2541 = alloca i1, i1 0
+  %nop2542 = alloca i1, i1 0
+  %nop2543 = alloca i1, i1 0
+  %nop2544 = alloca i1, i1 0
+  %nop2545 = alloca i1, i1 0
+  %nop2546 = alloca i1, i1 0
+  %nop2547 = alloca i1, i1 0
+  %nop2548 = alloca i1, i1 0
+  %nop2549 = alloca i1, i1 0
+  %nop2550 = alloca i1, i1 0
+  %nop2551 = alloca i1, i1 0
+  %nop2552 = alloca i1, i1 0
+  %nop2553 = alloca i1, i1 0
+  %nop2554 = alloca i1, i1 0
+  %nop2555 = alloca i1, i1 0
+  %nop2556 = alloca i1, i1 0
+  %nop2557 = alloca i1, i1 0
+  %nop2558 = alloca i1, i1 0
+  %nop2559 = alloca i1, i1 0
+  %nop2560 = alloca i1, i1 0
+  %nop2561 = alloca i1, i1 0
+  %nop2562 = alloca i1, i1 0
+  %nop2563 = alloca i1, i1 0
+  %nop2564 = alloca i1, i1 0
+  %nop2565 = alloca i1, i1 0
+  %nop2566 = alloca i1, i1 0
+  %nop2567 = alloca i1, i1 0
+  %nop2568 = alloca i1, i1 0
+  %nop2569 = alloca i1, i1 0
+  %nop2570 = alloca i1, i1 0
+  %nop2571 = alloca i1, i1 0
+  %nop2572 = alloca i1, i1 0
+  %nop2573 = alloca i1, i1 0
+  %nop2574 = alloca i1, i1 0
+  %nop2575 = alloca i1, i1 0
+  %nop2576 = alloca i1, i1 0
+  %nop2577 = alloca i1, i1 0
+  %nop2578 = alloca i1, i1 0
+  %nop2579 = alloca i1, i1 0
+  %nop2580 = alloca i1, i1 0
+  %nop2581 = alloca i1, i1 0
+  %nop2582 = alloca i1, i1 0
+  %nop2583 = alloca i1, i1 0
+  %nop2584 = alloca i1, i1 0
+  %nop2585 = alloca i1, i1 0
+  %nop2586 = alloca i1, i1 0
+  %nop2587 = alloca i1, i1 0
+  %nop2588 = alloca i1, i1 0
+  %nop2589 = alloca i1, i1 0
+  %nop2590 = alloca i1, i1 0
+  %nop2591 = alloca i1, i1 0
+  %nop2592 = alloca i1, i1 0
+  %nop2593 = alloca i1, i1 0
+  %nop2594 = alloca i1, i1 0
+  %nop2595 = alloca i1, i1 0
+  %nop2596 = alloca i1, i1 0
+  %nop2597 = alloca i1, i1 0
+  %nop2598 = alloca i1, i1 0
+  %nop2599 = alloca i1, i1 0
+  %nop2600 = alloca i1, i1 0
+  %nop2601 = alloca i1, i1 0
+  %nop2602 = alloca i1, i1 0
+  %nop2603 = alloca i1, i1 0
+  %nop2604 = alloca i1, i1 0
+  %nop2605 = alloca i1, i1 0
+  %nop2606 = alloca i1, i1 0
+  %nop2607 = alloca i1, i1 0
+  %nop2608 = alloca i1, i1 0
+  %nop2609 = alloca i1, i1 0
+  %nop2610 = alloca i1, i1 0
+  %nop2611 = alloca i1, i1 0
+  %nop2612 = alloca i1, i1 0
+  %nop2613 = alloca i1, i1 0
+  %nop2614 = alloca i1, i1 0
+  %nop2615 = alloca i1, i1 0
+  %nop2616 = alloca i1, i1 0
+  %nop2617 = alloca i1, i1 0
+  %nop2618 = alloca i1, i1 0
+  %nop2619 = alloca i1, i1 0
+  %nop2620 = alloca i1, i1 0
+  %nop2621 = alloca i1, i1 0
+  %nop2622 = alloca i1, i1 0
+  %nop2623 = alloca i1, i1 0
+  %nop2624 = alloca i1, i1 0
+  %nop2625 = alloca i1, i1 0
+  %nop2626 = alloca i1, i1 0
+  %nop2627 = alloca i1, i1 0
+  %nop2628 = alloca i1, i1 0
+  %nop2629 = alloca i1, i1 0
+  %nop2630 = alloca i1, i1 0
+  %nop2631 = alloca i1, i1 0
+  %nop2632 = alloca i1, i1 0
+  %nop2633 = alloca i1, i1 0
+  %nop2634 = alloca i1, i1 0
+  %nop2635 = alloca i1, i1 0
+  %nop2636 = alloca i1, i1 0
+  %nop2637 = alloca i1, i1 0
+  %nop2638 = alloca i1, i1 0
+  %nop2639 = alloca i1, i1 0
+  %nop2640 = alloca i1, i1 0
+  %nop2641 = alloca i1, i1 0
+  %nop2642 = alloca i1, i1 0
+  %nop2643 = alloca i1, i1 0
+  %nop2644 = alloca i1, i1 0
+  %nop2645 = alloca i1, i1 0
+  %nop2646 = alloca i1, i1 0
+  %nop2647 = alloca i1, i1 0
+  %nop2648 = alloca i1, i1 0
+  %nop2649 = alloca i1, i1 0
+  %nop2650 = alloca i1, i1 0
+  %nop2651 = alloca i1, i1 0
+  %nop2652 = alloca i1, i1 0
+  %nop2653 = alloca i1, i1 0
+  %nop2654 = alloca i1, i1 0
+  %nop2655 = alloca i1, i1 0
+  %nop2656 = alloca i1, i1 0
+  %nop2657 = alloca i1, i1 0
+  %nop2658 = alloca i1, i1 0
+  %nop2659 = alloca i1, i1 0
+  %nop2660 = alloca i1, i1 0
+  %nop2661 = alloca i1, i1 0
+  %nop2662 = alloca i1, i1 0
+  %nop2663 = alloca i1, i1 0
+  %nop2664 = alloca i1, i1 0
+  %nop2665 = alloca i1, i1 0
+  %nop2666 = alloca i1, i1 0
+  %nop2667 = alloca i1, i1 0
+  %nop2668 = alloca i1, i1 0
+  %nop2669 = alloca i1, i1 0
+  %nop2670 = alloca i1, i1 0
+  %nop2671 = alloca i1, i1 0
+  %nop2672 = alloca i1, i1 0
+  %nop2673 = alloca i1, i1 0
+  %nop2674 = alloca i1, i1 0
+  %nop2675 = alloca i1, i1 0
+  %nop2676 = alloca i1, i1 0
+  %nop2677 = alloca i1, i1 0
+  %nop2678 = alloca i1, i1 0
+  %nop2679 = alloca i1, i1 0
+  %nop2680 = alloca i1, i1 0
+  %nop2681 = alloca i1, i1 0
+  %nop2682 = alloca i1, i1 0
+  %nop2683 = alloca i1, i1 0
+  %nop2684 = alloca i1, i1 0
+  %nop2685 = alloca i1, i1 0
+  %nop2686 = alloca i1, i1 0
+  %nop2687 = alloca i1, i1 0
+  %nop2688 = alloca i1, i1 0
+  %nop2689 = alloca i1, i1 0
+  %nop2690 = alloca i1, i1 0
+  %nop2691 = alloca i1, i1 0
+  %nop2692 = alloca i1, i1 0
+  %nop2693 = alloca i1, i1 0
+  %nop2694 = alloca i1, i1 0
+  %nop2695 = alloca i1, i1 0
+  %nop2696 = alloca i1, i1 0
+  %nop2697 = alloca i1, i1 0
+  %nop2698 = alloca i1, i1 0
+  %nop2699 = alloca i1, i1 0
+  %nop2700 = alloca i1, i1 0
+  %nop2701 = alloca i1, i1 0
+  %nop2702 = alloca i1, i1 0
+  %nop2703 = alloca i1, i1 0
+  %nop2704 = alloca i1, i1 0
+  %nop2705 = alloca i1, i1 0
+  %nop2706 = alloca i1, i1 0
+  %nop2707 = alloca i1, i1 0
+  %nop2708 = alloca i1, i1 0
+  %nop2709 = alloca i1, i1 0
+  %nop2710 = alloca i1, i1 0
+  %nop2711 = alloca i1, i1 0
+  %nop2712 = alloca i1, i1 0
+  %nop2713 = alloca i1, i1 0
+  %nop2714 = alloca i1, i1 0
+  %nop2715 = alloca i1, i1 0
+  %nop2716 = alloca i1, i1 0
+  %nop2717 = alloca i1, i1 0
+  %nop2718 = alloca i1, i1 0
+  %nop2719 = alloca i1, i1 0
+  %nop2720 = alloca i1, i1 0
+  %nop2721 = alloca i1, i1 0
+  %nop2722 = alloca i1, i1 0
+  %nop2723 = alloca i1, i1 0
+  %nop2724 = alloca i1, i1 0
+  %nop2725 = alloca i1, i1 0
+  %nop2726 = alloca i1, i1 0
+  %nop2727 = alloca i1, i1 0
+  %nop2728 = alloca i1, i1 0
+  %nop2729 = alloca i1, i1 0
+  %nop2730 = alloca i1, i1 0
+  %nop2731 = alloca i1, i1 0
+  %nop2732 = alloca i1, i1 0
+  %nop2733 = alloca i1, i1 0
+  %nop2734 = alloca i1, i1 0
+  %nop2735 = alloca i1, i1 0
+  %nop2736 = alloca i1, i1 0
+  %nop2737 = alloca i1, i1 0
+  %nop2738 = alloca i1, i1 0
+  %nop2739 = alloca i1, i1 0
+  %nop2740 = alloca i1, i1 0
+  %nop2741 = alloca i1, i1 0
+  %nop2742 = alloca i1, i1 0
+  %nop2743 = alloca i1, i1 0
+  %nop2744 = alloca i1, i1 0
+  %nop2745 = alloca i1, i1 0
+  %nop2746 = alloca i1, i1 0
+  %nop2747 = alloca i1, i1 0
+  %nop2748 = alloca i1, i1 0
+  %nop2749 = alloca i1, i1 0
+  %nop2750 = alloca i1, i1 0
+  %nop2751 = alloca i1, i1 0
+  %nop2752 = alloca i1, i1 0
+  %nop2753 = alloca i1, i1 0
+  %nop2754 = alloca i1, i1 0
+  %nop2755 = alloca i1, i1 0
+  %nop2756 = alloca i1, i1 0
+  %nop2757 = alloca i1, i1 0
+  %nop2758 = alloca i1, i1 0
+  %nop2759 = alloca i1, i1 0
+  %nop2760 = alloca i1, i1 0
+  %nop2761 = alloca i1, i1 0
+  %nop2762 = alloca i1, i1 0
+  %nop2763 = alloca i1, i1 0
+  %nop2764 = alloca i1, i1 0
+  %nop2765 = alloca i1, i1 0
+  %nop2766 = alloca i1, i1 0
+  %nop2767 = alloca i1, i1 0
+  %nop2768 = alloca i1, i1 0
+  %nop2769 = alloca i1, i1 0
+  %nop2770 = alloca i1, i1 0
+  %nop2771 = alloca i1, i1 0
+  %nop2772 = alloca i1, i1 0
+  %nop2773 = alloca i1, i1 0
+  %nop2774 = alloca i1, i1 0
+  %nop2775 = alloca i1, i1 0
+  %nop2776 = alloca i1, i1 0
+  %nop2777 = alloca i1, i1 0
+  %nop2778 = alloca i1, i1 0
+  %nop2779 = alloca i1, i1 0
+  %nop2780 = alloca i1, i1 0
+  %nop2781 = alloca i1, i1 0
+  %nop2782 = alloca i1, i1 0
+  %nop2783 = alloca i1, i1 0
+  %nop2784 = alloca i1, i1 0
+  %nop2785 = alloca i1, i1 0
+  %nop2786 = alloca i1, i1 0
+  %nop2787 = alloca i1, i1 0
+  %nop2788 = alloca i1, i1 0
+  %nop2789 = alloca i1, i1 0
+  %nop2790 = alloca i1, i1 0
+  %nop2791 = alloca i1, i1 0
+  %nop2792 = alloca i1, i1 0
+  %nop2793 = alloca i1, i1 0
+  %nop2794 = alloca i1, i1 0
+  %nop2795 = alloca i1, i1 0
+  %nop2796 = alloca i1, i1 0
+  %nop2797 = alloca i1, i1 0
+  %nop2798 = alloca i1, i1 0
+  %nop2799 = alloca i1, i1 0
+  %nop2800 = alloca i1, i1 0
+  %nop2801 = alloca i1, i1 0
+  %nop2802 = alloca i1, i1 0
+  %nop2803 = alloca i1, i1 0
+  %nop2804 = alloca i1, i1 0
+  %nop2805 = alloca i1, i1 0
+  %nop2806 = alloca i1, i1 0
+  %nop2807 = alloca i1, i1 0
+  %nop2808 = alloca i1, i1 0
+  %nop2809 = alloca i1, i1 0
+  %nop2810 = alloca i1, i1 0
+  %nop2811 = alloca i1, i1 0
+  %nop2812 = alloca i1, i1 0
+  %nop2813 = alloca i1, i1 0
+  %nop2814 = alloca i1, i1 0
+  %nop2815 = alloca i1, i1 0
+  %nop2816 = alloca i1, i1 0
+  %nop2817 = alloca i1, i1 0
+  %nop2818 = alloca i1, i1 0
+  %nop2819 = alloca i1, i1 0
+  %nop2820 = alloca i1, i1 0
+  %nop2821 = alloca i1, i1 0
+  %nop2822 = alloca i1, i1 0
+  %nop2823 = alloca i1, i1 0
+  %nop2824 = alloca i1, i1 0
+  %nop2825 = alloca i1, i1 0
+  %nop2826 = alloca i1, i1 0
+  %nop2827 = alloca i1, i1 0
+  %nop2828 = alloca i1, i1 0
+  %nop2829 = alloca i1, i1 0
+  %nop2830 = alloca i1, i1 0
+  %nop2831 = alloca i1, i1 0
+  %nop2832 = alloca i1, i1 0
+  %nop2833 = alloca i1, i1 0
+  %nop2834 = alloca i1, i1 0
+  %nop2835 = alloca i1, i1 0
+  %nop2836 = alloca i1, i1 0
+  %nop2837 = alloca i1, i1 0
+  %nop2838 = alloca i1, i1 0
+  %nop2839 = alloca i1, i1 0
+  %nop2840 = alloca i1, i1 0
+  %nop2841 = alloca i1, i1 0
+  %nop2842 = alloca i1, i1 0
+  %nop2843 = alloca i1, i1 0
+  %nop2844 = alloca i1, i1 0
+  %nop2845 = alloca i1, i1 0
+  %nop2846 = alloca i1, i1 0
+  %nop2847 = alloca i1, i1 0
+  %nop2848 = alloca i1, i1 0
+  %nop2849 = alloca i1, i1 0
+  %nop2850 = alloca i1, i1 0
+  %nop2851 = alloca i1, i1 0
+  %nop2852 = alloca i1, i1 0
+  %nop2853 = alloca i1, i1 0
+  %nop2854 = alloca i1, i1 0
+  %nop2855 = alloca i1, i1 0
+  %nop2856 = alloca i1, i1 0
+  %nop2857 = alloca i1, i1 0
+  %nop2858 = alloca i1, i1 0
+  %nop2859 = alloca i1, i1 0
+  %nop2860 = alloca i1, i1 0
+  %nop2861 = alloca i1, i1 0
+  %nop2862 = alloca i1, i1 0
+  %nop2863 = alloca i1, i1 0
+  %nop2864 = alloca i1, i1 0
+  %nop2865 = alloca i1, i1 0
+  %nop2866 = alloca i1, i1 0
+  %nop2867 = alloca i1, i1 0
+  %nop2868 = alloca i1, i1 0
+  %nop2869 = alloca i1, i1 0
+  %nop2870 = alloca i1, i1 0
+  %nop2871 = alloca i1, i1 0
+  %nop2872 = alloca i1, i1 0
+  %nop2873 = alloca i1, i1 0
+  %nop2874 = alloca i1, i1 0
+  %nop2875 = alloca i1, i1 0
+  %nop2876 = alloca i1, i1 0
+  %nop2877 = alloca i1, i1 0
+  %nop2878 = alloca i1, i1 0
+  %nop2879 = alloca i1, i1 0
+  %nop2880 = alloca i1, i1 0
+  %nop2881 = alloca i1, i1 0
+  %nop2882 = alloca i1, i1 0
+  %nop2883 = alloca i1, i1 0
+  %nop2884 = alloca i1, i1 0
+  %nop2885 = alloca i1, i1 0
+  %nop2886 = alloca i1, i1 0
+  %nop2887 = alloca i1, i1 0
+  %nop2888 = alloca i1, i1 0
+  %nop2889 = alloca i1, i1 0
+  %nop2890 = alloca i1, i1 0
+  %nop2891 = alloca i1, i1 0
+  %nop2892 = alloca i1, i1 0
+  %nop2893 = alloca i1, i1 0
+  %nop2894 = alloca i1, i1 0
+  %nop2895 = alloca i1, i1 0
+  %nop2896 = alloca i1, i1 0
+  %nop2897 = alloca i1, i1 0
+  %nop2898 = alloca i1, i1 0
+  %nop2899 = alloca i1, i1 0
+  %nop2900 = alloca i1, i1 0
+  %nop2901 = alloca i1, i1 0
+  %nop2902 = alloca i1, i1 0
+  %nop2903 = alloca i1, i1 0
+  %nop2904 = alloca i1, i1 0
+  %nop2905 = alloca i1, i1 0
+  %nop2906 = alloca i1, i1 0
+  %nop2907 = alloca i1, i1 0
+  %nop2908 = alloca i1, i1 0
+  %nop2909 = alloca i1, i1 0
+  %nop2910 = alloca i1, i1 0
+  %nop2911 = alloca i1, i1 0
+  %nop2912 = alloca i1, i1 0
+  %nop2913 = alloca i1, i1 0
+  %nop2914 = alloca i1, i1 0
+  %nop2915 = alloca i1, i1 0
+  %nop2916 = alloca i1, i1 0
+  %nop2917 = alloca i1, i1 0
+  %nop2918 = alloca i1, i1 0
+  %nop2919 = alloca i1, i1 0
+  %nop2920 = alloca i1, i1 0
+  %nop2921 = alloca i1, i1 0
+  %nop2922 = alloca i1, i1 0
+  %nop2923 = alloca i1, i1 0
+  %nop2924 = alloca i1, i1 0
+  %nop2925 = alloca i1, i1 0
+  %nop2926 = alloca i1, i1 0
+  %nop2927 = alloca i1, i1 0
+  %nop2928 = alloca i1, i1 0
+  %nop2929 = alloca i1, i1 0
+  %nop2930 = alloca i1, i1 0
+  %nop2931 = alloca i1, i1 0
+  %nop2932 = alloca i1, i1 0
+  %nop2933 = alloca i1, i1 0
+  %nop2934 = alloca i1, i1 0
+  %nop2935 = alloca i1, i1 0
+  %nop2936 = alloca i1, i1 0
+  %nop2937 = alloca i1, i1 0
+  %nop2938 = alloca i1, i1 0
+  %nop2939 = alloca i1, i1 0
+  %nop2940 = alloca i1, i1 0
+  %nop2941 = alloca i1, i1 0
+  %nop2942 = alloca i1, i1 0
+  %nop2943 = alloca i1, i1 0
+  %nop2944 = alloca i1, i1 0
+  %nop2945 = alloca i1, i1 0
+  %nop2946 = alloca i1, i1 0
+  %nop2947 = alloca i1, i1 0
+  %nop2948 = alloca i1, i1 0
+  %nop2949 = alloca i1, i1 0
+  %nop2950 = alloca i1, i1 0
+  %nop2951 = alloca i1, i1 0
+  %nop2952 = alloca i1, i1 0
+  %nop2953 = alloca i1, i1 0
+  %nop2954 = alloca i1, i1 0
+  %nop2955 = alloca i1, i1 0
+  %nop2956 = alloca i1, i1 0
+  %nop2957 = alloca i1, i1 0
+  %nop2958 = alloca i1, i1 0
+  %nop2959 = alloca i1, i1 0
+  %nop2960 = alloca i1, i1 0
+  %nop2961 = alloca i1, i1 0
+  %nop2962 = alloca i1, i1 0
+  %nop2963 = alloca i1, i1 0
+  %nop2964 = alloca i1, i1 0
+  %nop2965 = alloca i1, i1 0
+  %nop2966 = alloca i1, i1 0
+  %nop2967 = alloca i1, i1 0
+  %nop2968 = alloca i1, i1 0
+  %nop2969 = alloca i1, i1 0
+  %nop2970 = alloca i1, i1 0
+  %nop2971 = alloca i1, i1 0
+  %nop2972 = alloca i1, i1 0
+  %nop2973 = alloca i1, i1 0
+  %nop2974 = alloca i1, i1 0
+  %nop2975 = alloca i1, i1 0
+  %nop2976 = alloca i1, i1 0
+  %nop2977 = alloca i1, i1 0
+  %nop2978 = alloca i1, i1 0
+  %nop2979 = alloca i1, i1 0
+  %nop2980 = alloca i1, i1 0
+  %nop2981 = alloca i1, i1 0
+  %nop2982 = alloca i1, i1 0
+  %nop2983 = alloca i1, i1 0
+  %nop2984 = alloca i1, i1 0
+  %nop2985 = alloca i1, i1 0
+  %nop2986 = alloca i1, i1 0
+  %nop2987 = alloca i1, i1 0
+  %nop2988 = alloca i1, i1 0
+  %nop2989 = alloca i1, i1 0
+  %nop2990 = alloca i1, i1 0
+  %nop2991 = alloca i1, i1 0
+  %nop2992 = alloca i1, i1 0
+  %nop2993 = alloca i1, i1 0
+  %nop2994 = alloca i1, i1 0
+  %nop2995 = alloca i1, i1 0
+  %nop2996 = alloca i1, i1 0
+  %nop2997 = alloca i1, i1 0
+  %nop2998 = alloca i1, i1 0
+  %nop2999 = alloca i1, i1 0
+  %nop3000 = alloca i1, i1 0
+  %nop3001 = alloca i1, i1 0
+  %nop3002 = alloca i1, i1 0
+  %nop3003 = alloca i1, i1 0
+  %nop3004 = alloca i1, i1 0
+  %nop3005 = alloca i1, i1 0
+  %nop3006 = alloca i1, i1 0
+  %nop3007 = alloca i1, i1 0
+  %nop3008 = alloca i1, i1 0
+  %nop3009 = alloca i1, i1 0
+  %nop3010 = alloca i1, i1 0
+  %nop3011 = alloca i1, i1 0
+  %nop3012 = alloca i1, i1 0
+  %nop3013 = alloca i1, i1 0
+  %nop3014 = alloca i1, i1 0
+  %nop3015 = alloca i1, i1 0
+  %nop3016 = alloca i1, i1 0
+  %nop3017 = alloca i1, i1 0
+  %nop3018 = alloca i1, i1 0
+  %nop3019 = alloca i1, i1 0
+  %nop3020 = alloca i1, i1 0
+  %nop3021 = alloca i1, i1 0
+  %nop3022 = alloca i1, i1 0
+  %nop3023 = alloca i1, i1 0
+  %nop3024 = alloca i1, i1 0
+  %nop3025 = alloca i1, i1 0
+  %nop3026 = alloca i1, i1 0
+  %nop3027 = alloca i1, i1 0
+  %nop3028 = alloca i1, i1 0
+  %nop3029 = alloca i1, i1 0
+  %nop3030 = alloca i1, i1 0
+  %nop3031 = alloca i1, i1 0
+  %nop3032 = alloca i1, i1 0
+  %nop3033 = alloca i1, i1 0
+  %nop3034 = alloca i1, i1 0
+  %nop3035 = alloca i1, i1 0
+  %nop3036 = alloca i1, i1 0
+  %nop3037 = alloca i1, i1 0
+  %nop3038 = alloca i1, i1 0
+  %nop3039 = alloca i1, i1 0
+  %nop3040 = alloca i1, i1 0
+  %nop3041 = alloca i1, i1 0
+  %nop3042 = alloca i1, i1 0
+  %nop3043 = alloca i1, i1 0
+  %nop3044 = alloca i1, i1 0
+  %nop3045 = alloca i1, i1 0
+  %nop3046 = alloca i1, i1 0
+  %nop3047 = alloca i1, i1 0
+  %nop3048 = alloca i1, i1 0
+  %nop3049 = alloca i1, i1 0
+  %nop3050 = alloca i1, i1 0
+  %nop3051 = alloca i1, i1 0
+  %nop3052 = alloca i1, i1 0
+  %nop3053 = alloca i1, i1 0
+  %nop3054 = alloca i1, i1 0
+  %nop3055 = alloca i1, i1 0
+  %nop3056 = alloca i1, i1 0
+  %nop3057 = alloca i1, i1 0
+  %nop3058 = alloca i1, i1 0
+  %nop3059 = alloca i1, i1 0
+  %nop3060 = alloca i1, i1 0
+  %nop3061 = alloca i1, i1 0
+  %nop3062 = alloca i1, i1 0
+  %nop3063 = alloca i1, i1 0
+  %nop3064 = alloca i1, i1 0
+  %nop3065 = alloca i1, i1 0
+  %nop3066 = alloca i1, i1 0
+  %nop3067 = alloca i1, i1 0
+  %nop3068 = alloca i1, i1 0
+  %nop3069 = alloca i1, i1 0
+  %nop3070 = alloca i1, i1 0
+  %nop3071 = alloca i1, i1 0
+  %nop3072 = alloca i1, i1 0
+  %nop3073 = alloca i1, i1 0
+  %nop3074 = alloca i1, i1 0
+  %nop3075 = alloca i1, i1 0
+  %nop3076 = alloca i1, i1 0
+  %nop3077 = alloca i1, i1 0
+  %nop3078 = alloca i1, i1 0
+  %nop3079 = alloca i1, i1 0
+  %nop3080 = alloca i1, i1 0
+  %nop3081 = alloca i1, i1 0
+  %nop3082 = alloca i1, i1 0
+  %nop3083 = alloca i1, i1 0
+  %nop3084 = alloca i1, i1 0
+  %nop3085 = alloca i1, i1 0
+  %nop3086 = alloca i1, i1 0
+  %nop3087 = alloca i1, i1 0
+  %nop3088 = alloca i1, i1 0
+  %nop3089 = alloca i1, i1 0
+  %nop3090 = alloca i1, i1 0
+  %nop3091 = alloca i1, i1 0
+  %nop3092 = alloca i1, i1 0
+  %nop3093 = alloca i1, i1 0
+  %nop3094 = alloca i1, i1 0
+  %nop3095 = alloca i1, i1 0
+  %nop3096 = alloca i1, i1 0
+  %nop3097 = alloca i1, i1 0
+  %nop3098 = alloca i1, i1 0
+  %nop3099 = alloca i1, i1 0
+  %nop3100 = alloca i1, i1 0
+  %nop3101 = alloca i1, i1 0
+  %nop3102 = alloca i1, i1 0
+  %nop3103 = alloca i1, i1 0
+  %nop3104 = alloca i1, i1 0
+  %nop3105 = alloca i1, i1 0
+  %nop3106 = alloca i1, i1 0
+  %nop3107 = alloca i1, i1 0
+  %nop3108 = alloca i1, i1 0
+  %nop3109 = alloca i1, i1 0
+  %nop3110 = alloca i1, i1 0
+  %nop3111 = alloca i1, i1 0
+  %nop3112 = alloca i1, i1 0
+  %nop3113 = alloca i1, i1 0
+  %nop3114 = alloca i1, i1 0
+  %nop3115 = alloca i1, i1 0
+  %nop3116 = alloca i1, i1 0
+  %nop3117 = alloca i1, i1 0
+  %nop3118 = alloca i1, i1 0
+  %nop3119 = alloca i1, i1 0
+  %nop3120 = alloca i1, i1 0
+  %nop3121 = alloca i1, i1 0
+  %nop3122 = alloca i1, i1 0
+  %nop3123 = alloca i1, i1 0
+  %nop3124 = alloca i1, i1 0
+  %nop3125 = alloca i1, i1 0
+  %nop3126 = alloca i1, i1 0
+  %nop3127 = alloca i1, i1 0
+  %nop3128 = alloca i1, i1 0
+  %nop3129 = alloca i1, i1 0
+  %nop3130 = alloca i1, i1 0
+  %nop3131 = alloca i1, i1 0
+  %nop3132 = alloca i1, i1 0
+  %nop3133 = alloca i1, i1 0
+  %nop3134 = alloca i1, i1 0
+  %nop3135 = alloca i1, i1 0
+  %nop3136 = alloca i1, i1 0
+  %nop3137 = alloca i1, i1 0
+  %nop3138 = alloca i1, i1 0
+  %nop3139 = alloca i1, i1 0
+  %nop3140 = alloca i1, i1 0
+  %nop3141 = alloca i1, i1 0
+  %nop3142 = alloca i1, i1 0
+  %nop3143 = alloca i1, i1 0
+  %nop3144 = alloca i1, i1 0
+  %nop3145 = alloca i1, i1 0
+  %nop3146 = alloca i1, i1 0
+  %nop3147 = alloca i1, i1 0
+  %nop3148 = alloca i1, i1 0
+  %nop3149 = alloca i1, i1 0
+  %nop3150 = alloca i1, i1 0
+  %nop3151 = alloca i1, i1 0
+  %nop3152 = alloca i1, i1 0
+  %nop3153 = alloca i1, i1 0
+  %nop3154 = alloca i1, i1 0
+  %nop3155 = alloca i1, i1 0
+  %nop3156 = alloca i1, i1 0
+  %nop3157 = alloca i1, i1 0
+  %nop3158 = alloca i1, i1 0
+  %nop3159 = alloca i1, i1 0
+  %nop3160 = alloca i1, i1 0
+  %nop3161 = alloca i1, i1 0
+  %nop3162 = alloca i1, i1 0
+  %nop3163 = alloca i1, i1 0
+  %nop3164 = alloca i1, i1 0
+  %nop3165 = alloca i1, i1 0
+  %nop3166 = alloca i1, i1 0
+  %nop3167 = alloca i1, i1 0
+  %nop3168 = alloca i1, i1 0
+  %nop3169 = alloca i1, i1 0
+  %nop3170 = alloca i1, i1 0
+  %nop3171 = alloca i1, i1 0
+  %nop3172 = alloca i1, i1 0
+  %nop3173 = alloca i1, i1 0
+  %nop3174 = alloca i1, i1 0
+  %nop3175 = alloca i1, i1 0
+  %nop3176 = alloca i1, i1 0
+  %nop3177 = alloca i1, i1 0
+  %nop3178 = alloca i1, i1 0
+  %nop3179 = alloca i1, i1 0
+  %nop3180 = alloca i1, i1 0
+  %nop3181 = alloca i1, i1 0
+  %nop3182 = alloca i1, i1 0
+  %nop3183 = alloca i1, i1 0
+  %nop3184 = alloca i1, i1 0
+  %nop3185 = alloca i1, i1 0
+  %nop3186 = alloca i1, i1 0
+  %nop3187 = alloca i1, i1 0
+  %nop3188 = alloca i1, i1 0
+  %nop3189 = alloca i1, i1 0
+  %nop3190 = alloca i1, i1 0
+  %nop3191 = alloca i1, i1 0
+  %nop3192 = alloca i1, i1 0
+  %nop3193 = alloca i1, i1 0
+  %nop3194 = alloca i1, i1 0
+  %nop3195 = alloca i1, i1 0
+  %nop3196 = alloca i1, i1 0
+  %nop3197 = alloca i1, i1 0
+  %nop3198 = alloca i1, i1 0
+  %nop3199 = alloca i1, i1 0
+  %nop3200 = alloca i1, i1 0
+  %nop3201 = alloca i1, i1 0
+  %nop3202 = alloca i1, i1 0
+  %nop3203 = alloca i1, i1 0
+  %nop3204 = alloca i1, i1 0
+  %nop3205 = alloca i1, i1 0
+  %nop3206 = alloca i1, i1 0
+  %nop3207 = alloca i1, i1 0
+  %nop3208 = alloca i1, i1 0
+  %nop3209 = alloca i1, i1 0
+  %nop3210 = alloca i1, i1 0
+  %nop3211 = alloca i1, i1 0
+  %nop3212 = alloca i1, i1 0
+  %nop3213 = alloca i1, i1 0
+  %nop3214 = alloca i1, i1 0
+  %nop3215 = alloca i1, i1 0
+  %nop3216 = alloca i1, i1 0
+  %nop3217 = alloca i1, i1 0
+  %nop3218 = alloca i1, i1 0
+  %nop3219 = alloca i1, i1 0
+  %nop3220 = alloca i1, i1 0
+  %nop3221 = alloca i1, i1 0
+  %nop3222 = alloca i1, i1 0
+  %nop3223 = alloca i1, i1 0
+  %nop3224 = alloca i1, i1 0
+  %nop3225 = alloca i1, i1 0
+  %nop3226 = alloca i1, i1 0
+  %nop3227 = alloca i1, i1 0
+  %nop3228 = alloca i1, i1 0
+  %nop3229 = alloca i1, i1 0
+  %nop3230 = alloca i1, i1 0
+  %nop3231 = alloca i1, i1 0
+  %nop3232 = alloca i1, i1 0
+  %nop3233 = alloca i1, i1 0
+  %nop3234 = alloca i1, i1 0
+  %nop3235 = alloca i1, i1 0
+  %nop3236 = alloca i1, i1 0
+  %nop3237 = alloca i1, i1 0
+  %nop3238 = alloca i1, i1 0
+  %nop3239 = alloca i1, i1 0
+  %nop3240 = alloca i1, i1 0
+  %nop3241 = alloca i1, i1 0
+  %nop3242 = alloca i1, i1 0
+  %nop3243 = alloca i1, i1 0
+  %nop3244 = alloca i1, i1 0
+  %nop3245 = alloca i1, i1 0
+  %nop3246 = alloca i1, i1 0
+  %nop3247 = alloca i1, i1 0
+  %nop3248 = alloca i1, i1 0
+  %nop3249 = alloca i1, i1 0
+  %nop3250 = alloca i1, i1 0
+  %nop3251 = alloca i1, i1 0
+  %nop3252 = alloca i1, i1 0
+  %nop3253 = alloca i1, i1 0
+  %nop3254 = alloca i1, i1 0
+  %nop3255 = alloca i1, i1 0
+  %nop3256 = alloca i1, i1 0
+  %nop3257 = alloca i1, i1 0
+  %nop3258 = alloca i1, i1 0
+  %nop3259 = alloca i1, i1 0
+  %nop3260 = alloca i1, i1 0
+  %nop3261 = alloca i1, i1 0
+  %nop3262 = alloca i1, i1 0
+  %nop3263 = alloca i1, i1 0
+  %nop3264 = alloca i1, i1 0
+  %nop3265 = alloca i1, i1 0
+  %nop3266 = alloca i1, i1 0
+  %nop3267 = alloca i1, i1 0
+  %nop3268 = alloca i1, i1 0
+  %nop3269 = alloca i1, i1 0
+  %nop3270 = alloca i1, i1 0
+  %nop3271 = alloca i1, i1 0
+  %nop3272 = alloca i1, i1 0
+  %nop3273 = alloca i1, i1 0
+  %nop3274 = alloca i1, i1 0
+  %nop3275 = alloca i1, i1 0
+  %nop3276 = alloca i1, i1 0
+  %nop3277 = alloca i1, i1 0
+  %nop3278 = alloca i1, i1 0
+  %nop3279 = alloca i1, i1 0
+  %nop3280 = alloca i1, i1 0
+  %nop3281 = alloca i1, i1 0
+  %nop3282 = alloca i1, i1 0
+  %nop3283 = alloca i1, i1 0
+  %nop3284 = alloca i1, i1 0
+  %nop3285 = alloca i1, i1 0
+  %nop3286 = alloca i1, i1 0
+  %nop3287 = alloca i1, i1 0
+  %nop3288 = alloca i1, i1 0
+  %nop3289 = alloca i1, i1 0
+  %nop3290 = alloca i1, i1 0
+  %nop3291 = alloca i1, i1 0
+  %nop3292 = alloca i1, i1 0
+  %nop3293 = alloca i1, i1 0
+  %nop3294 = alloca i1, i1 0
+  %nop3295 = alloca i1, i1 0
+  %nop3296 = alloca i1, i1 0
+  %nop3297 = alloca i1, i1 0
+  %nop3298 = alloca i1, i1 0
+  %nop3299 = alloca i1, i1 0
+  %nop3300 = alloca i1, i1 0
+  %nop3301 = alloca i1, i1 0
+  %nop3302 = alloca i1, i1 0
+  %nop3303 = alloca i1, i1 0
+  %nop3304 = alloca i1, i1 0
+  %nop3305 = alloca i1, i1 0
+  %nop3306 = alloca i1, i1 0
+  %nop3307 = alloca i1, i1 0
+  %nop3308 = alloca i1, i1 0
+  %nop3309 = alloca i1, i1 0
+  %nop3310 = alloca i1, i1 0
+  %nop3311 = alloca i1, i1 0
+  %nop3312 = alloca i1, i1 0
+  %nop3313 = alloca i1, i1 0
+  %nop3314 = alloca i1, i1 0
+  %nop3315 = alloca i1, i1 0
+  %nop3316 = alloca i1, i1 0
+  %nop3317 = alloca i1, i1 0
+  %nop3318 = alloca i1, i1 0
+  %nop3319 = alloca i1, i1 0
+  %nop3320 = alloca i1, i1 0
+  %nop3321 = alloca i1, i1 0
+  %nop3322 = alloca i1, i1 0
+  %nop3323 = alloca i1, i1 0
+  %nop3324 = alloca i1, i1 0
+  %nop3325 = alloca i1, i1 0
+  %nop3326 = alloca i1, i1 0
+  %nop3327 = alloca i1, i1 0
+  %nop3328 = alloca i1, i1 0
+  %nop3329 = alloca i1, i1 0
+  %nop3330 = alloca i1, i1 0
+  %nop3331 = alloca i1, i1 0
+  %nop3332 = alloca i1, i1 0
+  %nop3333 = alloca i1, i1 0
+  %nop3334 = alloca i1, i1 0
+  %nop3335 = alloca i1, i1 0
+  %nop3336 = alloca i1, i1 0
+  %nop3337 = alloca i1, i1 0
+  %nop3338 = alloca i1, i1 0
+  %nop3339 = alloca i1, i1 0
+  %nop3340 = alloca i1, i1 0
+  %nop3341 = alloca i1, i1 0
+  %nop3342 = alloca i1, i1 0
+  %nop3343 = alloca i1, i1 0
+  %nop3344 = alloca i1, i1 0
+  %nop3345 = alloca i1, i1 0
+  %nop3346 = alloca i1, i1 0
+  %nop3347 = alloca i1, i1 0
+  %nop3348 = alloca i1, i1 0
+  %nop3349 = alloca i1, i1 0
+  %nop3350 = alloca i1, i1 0
+  %nop3351 = alloca i1, i1 0
+  %nop3352 = alloca i1, i1 0
+  %nop3353 = alloca i1, i1 0
+  %nop3354 = alloca i1, i1 0
+  %nop3355 = alloca i1, i1 0
+  %nop3356 = alloca i1, i1 0
+  %nop3357 = alloca i1, i1 0
+  %nop3358 = alloca i1, i1 0
+  %nop3359 = alloca i1, i1 0
+  %nop3360 = alloca i1, i1 0
+  %nop3361 = alloca i1, i1 0
+  %nop3362 = alloca i1, i1 0
+  %nop3363 = alloca i1, i1 0
+  %nop3364 = alloca i1, i1 0
+  %nop3365 = alloca i1, i1 0
+  %nop3366 = alloca i1, i1 0
+  %nop3367 = alloca i1, i1 0
+  %nop3368 = alloca i1, i1 0
+  %nop3369 = alloca i1, i1 0
+  %nop3370 = alloca i1, i1 0
+  %nop3371 = alloca i1, i1 0
+  %nop3372 = alloca i1, i1 0
+  %nop3373 = alloca i1, i1 0
+  %nop3374 = alloca i1, i1 0
+  %nop3375 = alloca i1, i1 0
+  %nop3376 = alloca i1, i1 0
+  %nop3377 = alloca i1, i1 0
+  %nop3378 = alloca i1, i1 0
+  %nop3379 = alloca i1, i1 0
+  %nop3380 = alloca i1, i1 0
+  %nop3381 = alloca i1, i1 0
+  %nop3382 = alloca i1, i1 0
+  %nop3383 = alloca i1, i1 0
+  %nop3384 = alloca i1, i1 0
+  %nop3385 = alloca i1, i1 0
+  %nop3386 = alloca i1, i1 0
+  %nop3387 = alloca i1, i1 0
+  %nop3388 = alloca i1, i1 0
+  %nop3389 = alloca i1, i1 0
+  %nop3390 = alloca i1, i1 0
+  %nop3391 = alloca i1, i1 0
+  %nop3392 = alloca i1, i1 0
+  %nop3393 = alloca i1, i1 0
+  %nop3394 = alloca i1, i1 0
+  %nop3395 = alloca i1, i1 0
+  %nop3396 = alloca i1, i1 0
+  %nop3397 = alloca i1, i1 0
+  %nop3398 = alloca i1, i1 0
+  %nop3399 = alloca i1, i1 0
+  %nop3400 = alloca i1, i1 0
+  %nop3401 = alloca i1, i1 0
+  %nop3402 = alloca i1, i1 0
+  %nop3403 = alloca i1, i1 0
+  %nop3404 = alloca i1, i1 0
+  %nop3405 = alloca i1, i1 0
+  %nop3406 = alloca i1, i1 0
+  %nop3407 = alloca i1, i1 0
+  %nop3408 = alloca i1, i1 0
+  %nop3409 = alloca i1, i1 0
+  %nop3410 = alloca i1, i1 0
+  %nop3411 = alloca i1, i1 0
+  %nop3412 = alloca i1, i1 0
+  %nop3413 = alloca i1, i1 0
+  %nop3414 = alloca i1, i1 0
+  %nop3415 = alloca i1, i1 0
+  %nop3416 = alloca i1, i1 0
+  %nop3417 = alloca i1, i1 0
+  %nop3418 = alloca i1, i1 0
+  %nop3419 = alloca i1, i1 0
+  %nop3420 = alloca i1, i1 0
+  %nop3421 = alloca i1, i1 0
+  %nop3422 = alloca i1, i1 0
+  %nop3423 = alloca i1, i1 0
+  %nop3424 = alloca i1, i1 0
+  %nop3425 = alloca i1, i1 0
+  %nop3426 = alloca i1, i1 0
+  %nop3427 = alloca i1, i1 0
+  %nop3428 = alloca i1, i1 0
+  %nop3429 = alloca i1, i1 0
+  %nop3430 = alloca i1, i1 0
+  %nop3431 = alloca i1, i1 0
+  %nop3432 = alloca i1, i1 0
+  %nop3433 = alloca i1, i1 0
+  %nop3434 = alloca i1, i1 0
+  %nop3435 = alloca i1, i1 0
+  %nop3436 = alloca i1, i1 0
+  %nop3437 = alloca i1, i1 0
+  %nop3438 = alloca i1, i1 0
+  %nop3439 = alloca i1, i1 0
+  %nop3440 = alloca i1, i1 0
+  %nop3441 = alloca i1, i1 0
+  %nop3442 = alloca i1, i1 0
+  %nop3443 = alloca i1, i1 0
+  %nop3444 = alloca i1, i1 0
+  %nop3445 = alloca i1, i1 0
+  %nop3446 = alloca i1, i1 0
+  %nop3447 = alloca i1, i1 0
+  %nop3448 = alloca i1, i1 0
+  %nop3449 = alloca i1, i1 0
+  %nop3450 = alloca i1, i1 0
+  %nop3451 = alloca i1, i1 0
+  %nop3452 = alloca i1, i1 0
+  %nop3453 = alloca i1, i1 0
+  %nop3454 = alloca i1, i1 0
+  %nop3455 = alloca i1, i1 0
+  %nop3456 = alloca i1, i1 0
+  %nop3457 = alloca i1, i1 0
+  %nop3458 = alloca i1, i1 0
+  %nop3459 = alloca i1, i1 0
+  %nop3460 = alloca i1, i1 0
+  %nop3461 = alloca i1, i1 0
+  %nop3462 = alloca i1, i1 0
+  %nop3463 = alloca i1, i1 0
+  %nop3464 = alloca i1, i1 0
+  %nop3465 = alloca i1, i1 0
+  %nop3466 = alloca i1, i1 0
+  %nop3467 = alloca i1, i1 0
+  %nop3468 = alloca i1, i1 0
+  %nop3469 = alloca i1, i1 0
+  %nop3470 = alloca i1, i1 0
+  %nop3471 = alloca i1, i1 0
+  %nop3472 = alloca i1, i1 0
+  %nop3473 = alloca i1, i1 0
+  %nop3474 = alloca i1, i1 0
+  %nop3475 = alloca i1, i1 0
+  %nop3476 = alloca i1, i1 0
+  %nop3477 = alloca i1, i1 0
+  %nop3478 = alloca i1, i1 0
+  %nop3479 = alloca i1, i1 0
+  %nop3480 = alloca i1, i1 0
+  %nop3481 = alloca i1, i1 0
+  %nop3482 = alloca i1, i1 0
+  %nop3483 = alloca i1, i1 0
+  %nop3484 = alloca i1, i1 0
+  %nop3485 = alloca i1, i1 0
+  %nop3486 = alloca i1, i1 0
+  %nop3487 = alloca i1, i1 0
+  %nop3488 = alloca i1, i1 0
+  %nop3489 = alloca i1, i1 0
+  %nop3490 = alloca i1, i1 0
+  %nop3491 = alloca i1, i1 0
+  %nop3492 = alloca i1, i1 0
+  %nop3493 = alloca i1, i1 0
+  %nop3494 = alloca i1, i1 0
+  %nop3495 = alloca i1, i1 0
+  %nop3496 = alloca i1, i1 0
+  %nop3497 = alloca i1, i1 0
+  %nop3498 = alloca i1, i1 0
+  %nop3499 = alloca i1, i1 0
+  %nop3500 = alloca i1, i1 0
+  %nop3501 = alloca i1, i1 0
+  %nop3502 = alloca i1, i1 0
+  %nop3503 = alloca i1, i1 0
+  %nop3504 = alloca i1, i1 0
+  %nop3505 = alloca i1, i1 0
+  %nop3506 = alloca i1, i1 0
+  %nop3507 = alloca i1, i1 0
+  %nop3508 = alloca i1, i1 0
+  %nop3509 = alloca i1, i1 0
+  %nop3510 = alloca i1, i1 0
+  %nop3511 = alloca i1, i1 0
+  %nop3512 = alloca i1, i1 0
+  %nop3513 = alloca i1, i1 0
+  %nop3514 = alloca i1, i1 0
+  %nop3515 = alloca i1, i1 0
+  %nop3516 = alloca i1, i1 0
+  %nop3517 = alloca i1, i1 0
+  %nop3518 = alloca i1, i1 0
+  %nop3519 = alloca i1, i1 0
+  %nop3520 = alloca i1, i1 0
+  %nop3521 = alloca i1, i1 0
+  %nop3522 = alloca i1, i1 0
+  %nop3523 = alloca i1, i1 0
+  %nop3524 = alloca i1, i1 0
+  %nop3525 = alloca i1, i1 0
+  %nop3526 = alloca i1, i1 0
+  %nop3527 = alloca i1, i1 0
+  %nop3528 = alloca i1, i1 0
+  %nop3529 = alloca i1, i1 0
+  %nop3530 = alloca i1, i1 0
+  %nop3531 = alloca i1, i1 0
+  %nop3532 = alloca i1, i1 0
+  %nop3533 = alloca i1, i1 0
+  %nop3534 = alloca i1, i1 0
+  %nop3535 = alloca i1, i1 0
+  %nop3536 = alloca i1, i1 0
+  %nop3537 = alloca i1, i1 0
+  %nop3538 = alloca i1, i1 0
+  %nop3539 = alloca i1, i1 0
+  %nop3540 = alloca i1, i1 0
+  %nop3541 = alloca i1, i1 0
+  %nop3542 = alloca i1, i1 0
+  %nop3543 = alloca i1, i1 0
+  %nop3544 = alloca i1, i1 0
+  %nop3545 = alloca i1, i1 0
+  %nop3546 = alloca i1, i1 0
+  %nop3547 = alloca i1, i1 0
+  %nop3548 = alloca i1, i1 0
+  %nop3549 = alloca i1, i1 0
+  %nop3550 = alloca i1, i1 0
+  %nop3551 = alloca i1, i1 0
+  %nop3552 = alloca i1, i1 0
+  %nop3553 = alloca i1, i1 0
+  %nop3554 = alloca i1, i1 0
+  %nop3555 = alloca i1, i1 0
+  %nop3556 = alloca i1, i1 0
+  %nop3557 = alloca i1, i1 0
+  %nop3558 = alloca i1, i1 0
+  %nop3559 = alloca i1, i1 0
+  %nop3560 = alloca i1, i1 0
+  %nop3561 = alloca i1, i1 0
+  %nop3562 = alloca i1, i1 0
+  %nop3563 = alloca i1, i1 0
+  %nop3564 = alloca i1, i1 0
+  %nop3565 = alloca i1, i1 0
+  %nop3566 = alloca i1, i1 0
+  %nop3567 = alloca i1, i1 0
+  %nop3568 = alloca i1, i1 0
+  %nop3569 = alloca i1, i1 0
+  %nop3570 = alloca i1, i1 0
+  %nop3571 = alloca i1, i1 0
+  %nop3572 = alloca i1, i1 0
+  %nop3573 = alloca i1, i1 0
+  %nop3574 = alloca i1, i1 0
+  %nop3575 = alloca i1, i1 0
+  %nop3576 = alloca i1, i1 0
+  %nop3577 = alloca i1, i1 0
+  %nop3578 = alloca i1, i1 0
+  %nop3579 = alloca i1, i1 0
+  %nop3580 = alloca i1, i1 0
+  %nop3581 = alloca i1, i1 0
+  %nop3582 = alloca i1, i1 0
+  %nop3583 = alloca i1, i1 0
+  %nop3584 = alloca i1, i1 0
+  %nop3585 = alloca i1, i1 0
+  %nop3586 = alloca i1, i1 0
+  %nop3587 = alloca i1, i1 0
+  %nop3588 = alloca i1, i1 0
+  %nop3589 = alloca i1, i1 0
+  %nop3590 = alloca i1, i1 0
+  %nop3591 = alloca i1, i1 0
+  %nop3592 = alloca i1, i1 0
+  %nop3593 = alloca i1, i1 0
+  %nop3594 = alloca i1, i1 0
+  %nop3595 = alloca i1, i1 0
+  %nop3596 = alloca i1, i1 0
+  %nop3597 = alloca i1, i1 0
+  %nop3598 = alloca i1, i1 0
+  %nop3599 = alloca i1, i1 0
+  %nop3600 = alloca i1, i1 0
+  %nop3601 = alloca i1, i1 0
+  %nop3602 = alloca i1, i1 0
+  %nop3603 = alloca i1, i1 0
+  %nop3604 = alloca i1, i1 0
+  %nop3605 = alloca i1, i1 0
+  %nop3606 = alloca i1, i1 0
+  %nop3607 = alloca i1, i1 0
+  %nop3608 = alloca i1, i1 0
+  %nop3609 = alloca i1, i1 0
+  %nop3610 = alloca i1, i1 0
+  %nop3611 = alloca i1, i1 0
+  %nop3612 = alloca i1, i1 0
+  %nop3613 = alloca i1, i1 0
+  %nop3614 = alloca i1, i1 0
+  %nop3615 = alloca i1, i1 0
+  %nop3616 = alloca i1, i1 0
+  %nop3617 = alloca i1, i1 0
+  %nop3618 = alloca i1, i1 0
+  %nop3619 = alloca i1, i1 0
+  %nop3620 = alloca i1, i1 0
+  %nop3621 = alloca i1, i1 0
+  %nop3622 = alloca i1, i1 0
+  %nop3623 = alloca i1, i1 0
+  %nop3624 = alloca i1, i1 0
+  %nop3625 = alloca i1, i1 0
+  %nop3626 = alloca i1, i1 0
+  %nop3627 = alloca i1, i1 0
+  %nop3628 = alloca i1, i1 0
+  %nop3629 = alloca i1, i1 0
+  %nop3630 = alloca i1, i1 0
+  %nop3631 = alloca i1, i1 0
+  %nop3632 = alloca i1, i1 0
+  %nop3633 = alloca i1, i1 0
+  %nop3634 = alloca i1, i1 0
+  %nop3635 = alloca i1, i1 0
+  %nop3636 = alloca i1, i1 0
+  %nop3637 = alloca i1, i1 0
+  %nop3638 = alloca i1, i1 0
+  %nop3639 = alloca i1, i1 0
+  %nop3640 = alloca i1, i1 0
+  %nop3641 = alloca i1, i1 0
+  %nop3642 = alloca i1, i1 0
+  %nop3643 = alloca i1, i1 0
+  %nop3644 = alloca i1, i1 0
+  %nop3645 = alloca i1, i1 0
+  %nop3646 = alloca i1, i1 0
+  %nop3647 = alloca i1, i1 0
+  %nop3648 = alloca i1, i1 0
+  %nop3649 = alloca i1, i1 0
+  %nop3650 = alloca i1, i1 0
+  %nop3651 = alloca i1, i1 0
+  %nop3652 = alloca i1, i1 0
+  %nop3653 = alloca i1, i1 0
+  %nop3654 = alloca i1, i1 0
+  %nop3655 = alloca i1, i1 0
+  %nop3656 = alloca i1, i1 0
+  %nop3657 = alloca i1, i1 0
+  %nop3658 = alloca i1, i1 0
+  %nop3659 = alloca i1, i1 0
+  %nop3660 = alloca i1, i1 0
+  %nop3661 = alloca i1, i1 0
+  %nop3662 = alloca i1, i1 0
+  %nop3663 = alloca i1, i1 0
+  %nop3664 = alloca i1, i1 0
+  %nop3665 = alloca i1, i1 0
+  %nop3666 = alloca i1, i1 0
+  %nop3667 = alloca i1, i1 0
+  %nop3668 = alloca i1, i1 0
+  %nop3669 = alloca i1, i1 0
+  %nop3670 = alloca i1, i1 0
+  %nop3671 = alloca i1, i1 0
+  %nop3672 = alloca i1, i1 0
+  %nop3673 = alloca i1, i1 0
+  %nop3674 = alloca i1, i1 0
+  %nop3675 = alloca i1, i1 0
+  %nop3676 = alloca i1, i1 0
+  %nop3677 = alloca i1, i1 0
+  %nop3678 = alloca i1, i1 0
+  %nop3679 = alloca i1, i1 0
+  %nop3680 = alloca i1, i1 0
+  %nop3681 = alloca i1, i1 0
+  %nop3682 = alloca i1, i1 0
+  %nop3683 = alloca i1, i1 0
+  %nop3684 = alloca i1, i1 0
+  %nop3685 = alloca i1, i1 0
+  %nop3686 = alloca i1, i1 0
+  %nop3687 = alloca i1, i1 0
+  %nop3688 = alloca i1, i1 0
+  %nop3689 = alloca i1, i1 0
+  %nop3690 = alloca i1, i1 0
+  %nop3691 = alloca i1, i1 0
+  %nop3692 = alloca i1, i1 0
+  %nop3693 = alloca i1, i1 0
+  %nop3694 = alloca i1, i1 0
+  %nop3695 = alloca i1, i1 0
+  %nop3696 = alloca i1, i1 0
+  %nop3697 = alloca i1, i1 0
+  %nop3698 = alloca i1, i1 0
+  %nop3699 = alloca i1, i1 0
+  %nop3700 = alloca i1, i1 0
+  %nop3701 = alloca i1, i1 0
+  %nop3702 = alloca i1, i1 0
+  %nop3703 = alloca i1, i1 0
+  %nop3704 = alloca i1, i1 0
+  %nop3705 = alloca i1, i1 0
+  %nop3706 = alloca i1, i1 0
+  %nop3707 = alloca i1, i1 0
+  %nop3708 = alloca i1, i1 0
+  %nop3709 = alloca i1, i1 0
+  %nop3710 = alloca i1, i1 0
+  %nop3711 = alloca i1, i1 0
+  %nop3712 = alloca i1, i1 0
+  %nop3713 = alloca i1, i1 0
+  %nop3714 = alloca i1, i1 0
+  %nop3715 = alloca i1, i1 0
+  %nop3716 = alloca i1, i1 0
+  %nop3717 = alloca i1, i1 0
+  %nop3718 = alloca i1, i1 0
+  %nop3719 = alloca i1, i1 0
+  %nop3720 = alloca i1, i1 0
+  %nop3721 = alloca i1, i1 0
+  %nop3722 = alloca i1, i1 0
+  %nop3723 = alloca i1, i1 0
+  %nop3724 = alloca i1, i1 0
+  %nop3725 = alloca i1, i1 0
+  %nop3726 = alloca i1, i1 0
+  %nop3727 = alloca i1, i1 0
+  %nop3728 = alloca i1, i1 0
+  %nop3729 = alloca i1, i1 0
+  %nop3730 = alloca i1, i1 0
+  %nop3731 = alloca i1, i1 0
+  %nop3732 = alloca i1, i1 0
+  %nop3733 = alloca i1, i1 0
+  %nop3734 = alloca i1, i1 0
+  %nop3735 = alloca i1, i1 0
+  %nop3736 = alloca i1, i1 0
+  %nop3737 = alloca i1, i1 0
+  %nop3738 = alloca i1, i1 0
+  %nop3739 = alloca i1, i1 0
+  %nop3740 = alloca i1, i1 0
+  %nop3741 = alloca i1, i1 0
+  %nop3742 = alloca i1, i1 0
+  %nop3743 = alloca i1, i1 0
+  %nop3744 = alloca i1, i1 0
+  %nop3745 = alloca i1, i1 0
+  %nop3746 = alloca i1, i1 0
+  %nop3747 = alloca i1, i1 0
+  %nop3748 = alloca i1, i1 0
+  %nop3749 = alloca i1, i1 0
+  %nop3750 = alloca i1, i1 0
+  %nop3751 = alloca i1, i1 0
+  %nop3752 = alloca i1, i1 0
+  %nop3753 = alloca i1, i1 0
+  %nop3754 = alloca i1, i1 0
+  %nop3755 = alloca i1, i1 0
+  %nop3756 = alloca i1, i1 0
+  %nop3757 = alloca i1, i1 0
+  %nop3758 = alloca i1, i1 0
+  %nop3759 = alloca i1, i1 0
+  %nop3760 = alloca i1, i1 0
+  %nop3761 = alloca i1, i1 0
+  %nop3762 = alloca i1, i1 0
+  %nop3763 = alloca i1, i1 0
+  %nop3764 = alloca i1, i1 0
+  %nop3765 = alloca i1, i1 0
+  %nop3766 = alloca i1, i1 0
+  %nop3767 = alloca i1, i1 0
+  %nop3768 = alloca i1, i1 0
+  %nop3769 = alloca i1, i1 0
+  %nop3770 = alloca i1, i1 0
+  %nop3771 = alloca i1, i1 0
+  %nop3772 = alloca i1, i1 0
+  %nop3773 = alloca i1, i1 0
+  %nop3774 = alloca i1, i1 0
+  %nop3775 = alloca i1, i1 0
+  %nop3776 = alloca i1, i1 0
+  %nop3777 = alloca i1, i1 0
+  %nop3778 = alloca i1, i1 0
+  %nop3779 = alloca i1, i1 0
+  %nop3780 = alloca i1, i1 0
+  %nop3781 = alloca i1, i1 0
+  %nop3782 = alloca i1, i1 0
+  %nop3783 = alloca i1, i1 0
+  %nop3784 = alloca i1, i1 0
+  %nop3785 = alloca i1, i1 0
+  %nop3786 = alloca i1, i1 0
+  %nop3787 = alloca i1, i1 0
+  %nop3788 = alloca i1, i1 0
+  %nop3789 = alloca i1, i1 0
+  %nop3790 = alloca i1, i1 0
+  %nop3791 = alloca i1, i1 0
+  %nop3792 = alloca i1, i1 0
+  %nop3793 = alloca i1, i1 0
+  %nop3794 = alloca i1, i1 0
+  %nop3795 = alloca i1, i1 0
+  %nop3796 = alloca i1, i1 0
+  %nop3797 = alloca i1, i1 0
+  %nop3798 = alloca i1, i1 0
+  %nop3799 = alloca i1, i1 0
+  %nop3800 = alloca i1, i1 0
+  %nop3801 = alloca i1, i1 0
+  %nop3802 = alloca i1, i1 0
+  %nop3803 = alloca i1, i1 0
+  %nop3804 = alloca i1, i1 0
+  %nop3805 = alloca i1, i1 0
+  %nop3806 = alloca i1, i1 0
+  %nop3807 = alloca i1, i1 0
+  %nop3808 = alloca i1, i1 0
+  %nop3809 = alloca i1, i1 0
+  %nop3810 = alloca i1, i1 0
+  %nop3811 = alloca i1, i1 0
+  %nop3812 = alloca i1, i1 0
+  %nop3813 = alloca i1, i1 0
+  %nop3814 = alloca i1, i1 0
+  %nop3815 = alloca i1, i1 0
+  %nop3816 = alloca i1, i1 0
+  %nop3817 = alloca i1, i1 0
+  %nop3818 = alloca i1, i1 0
+  %nop3819 = alloca i1, i1 0
+  %nop3820 = alloca i1, i1 0
+  %nop3821 = alloca i1, i1 0
+  %nop3822 = alloca i1, i1 0
+  %nop3823 = alloca i1, i1 0
+  %nop3824 = alloca i1, i1 0
+  %nop3825 = alloca i1, i1 0
+  %nop3826 = alloca i1, i1 0
+  %nop3827 = alloca i1, i1 0
+  %nop3828 = alloca i1, i1 0
+  %nop3829 = alloca i1, i1 0
+  %nop3830 = alloca i1, i1 0
+  %nop3831 = alloca i1, i1 0
+  %nop3832 = alloca i1, i1 0
+  %nop3833 = alloca i1, i1 0
+  %nop3834 = alloca i1, i1 0
+  %nop3835 = alloca i1, i1 0
+  %nop3836 = alloca i1, i1 0
+  %nop3837 = alloca i1, i1 0
+  %nop3838 = alloca i1, i1 0
+  %nop3839 = alloca i1, i1 0
+  %nop3840 = alloca i1, i1 0
+  %nop3841 = alloca i1, i1 0
+  %nop3842 = alloca i1, i1 0
+  %nop3843 = alloca i1, i1 0
+  %nop3844 = alloca i1, i1 0
+  %nop3845 = alloca i1, i1 0
+  %nop3846 = alloca i1, i1 0
+  %nop3847 = alloca i1, i1 0
+  %nop3848 = alloca i1, i1 0
+  %nop3849 = alloca i1, i1 0
+  %nop3850 = alloca i1, i1 0
+  %nop3851 = alloca i1, i1 0
+  %nop3852 = alloca i1, i1 0
+  %nop3853 = alloca i1, i1 0
+  %nop3854 = alloca i1, i1 0
+  %nop3855 = alloca i1, i1 0
+  %nop3856 = alloca i1, i1 0
+  %nop3857 = alloca i1, i1 0
+  %nop3858 = alloca i1, i1 0
+  %nop3859 = alloca i1, i1 0
+  %nop3860 = alloca i1, i1 0
+  %nop3861 = alloca i1, i1 0
+  %nop3862 = alloca i1, i1 0
+  %nop3863 = alloca i1, i1 0
+  %nop3864 = alloca i1, i1 0
+  %nop3865 = alloca i1, i1 0
+  %nop3866 = alloca i1, i1 0
+  %nop3867 = alloca i1, i1 0
+  %nop3868 = alloca i1, i1 0
+  %nop3869 = alloca i1, i1 0
+  %nop3870 = alloca i1, i1 0
+  %nop3871 = alloca i1, i1 0
+  %nop3872 = alloca i1, i1 0
+  %nop3873 = alloca i1, i1 0
+  %nop3874 = alloca i1, i1 0
+  %nop3875 = alloca i1, i1 0
+  %nop3876 = alloca i1, i1 0
+  %nop3877 = alloca i1, i1 0
+  %nop3878 = alloca i1, i1 0
+  %nop3879 = alloca i1, i1 0
+  %nop3880 = alloca i1, i1 0
+  %nop3881 = alloca i1, i1 0
+  %nop3882 = alloca i1, i1 0
+  %nop3883 = alloca i1, i1 0
+  %nop3884 = alloca i1, i1 0
+  %nop3885 = alloca i1, i1 0
+  %nop3886 = alloca i1, i1 0
+  %nop3887 = alloca i1, i1 0
+  %nop3888 = alloca i1, i1 0
+  %nop3889 = alloca i1, i1 0
+  %nop3890 = alloca i1, i1 0
+  %nop3891 = alloca i1, i1 0
+  %nop3892 = alloca i1, i1 0
+  %nop3893 = alloca i1, i1 0
+  %nop3894 = alloca i1, i1 0
+  %nop3895 = alloca i1, i1 0
+  %nop3896 = alloca i1, i1 0
+  %nop3897 = alloca i1, i1 0
+  %nop3898 = alloca i1, i1 0
+  %nop3899 = alloca i1, i1 0
+  %nop3900 = alloca i1, i1 0
+  %nop3901 = alloca i1, i1 0
+  %nop3902 = alloca i1, i1 0
+  %nop3903 = alloca i1, i1 0
+  %nop3904 = alloca i1, i1 0
+  %nop3905 = alloca i1, i1 0
+  %nop3906 = alloca i1, i1 0
+  %nop3907 = alloca i1, i1 0
+  %nop3908 = alloca i1, i1 0
+  %nop3909 = alloca i1, i1 0
+  %nop3910 = alloca i1, i1 0
+  %nop3911 = alloca i1, i1 0
+  %nop3912 = alloca i1, i1 0
+  %nop3913 = alloca i1, i1 0
+  %nop3914 = alloca i1, i1 0
+  %nop3915 = alloca i1, i1 0
+  %nop3916 = alloca i1, i1 0
+  %nop3917 = alloca i1, i1 0
+  %nop3918 = alloca i1, i1 0
+  %nop3919 = alloca i1, i1 0
+  %nop3920 = alloca i1, i1 0
+  %nop3921 = alloca i1, i1 0
+  %nop3922 = alloca i1, i1 0
+  %nop3923 = alloca i1, i1 0
+  %nop3924 = alloca i1, i1 0
+  %nop3925 = alloca i1, i1 0
+  %nop3926 = alloca i1, i1 0
+  %nop3927 = alloca i1, i1 0
+  %nop3928 = alloca i1, i1 0
+  %nop3929 = alloca i1, i1 0
+  %nop3930 = alloca i1, i1 0
+  %nop3931 = alloca i1, i1 0
+  %nop3932 = alloca i1, i1 0
+  %nop3933 = alloca i1, i1 0
+  %nop3934 = alloca i1, i1 0
+  %nop3935 = alloca i1, i1 0
+  %nop3936 = alloca i1, i1 0
+  %nop3937 = alloca i1, i1 0
+  %nop3938 = alloca i1, i1 0
+  %nop3939 = alloca i1, i1 0
+  %nop3940 = alloca i1, i1 0
+  %nop3941 = alloca i1, i1 0
+  %nop3942 = alloca i1, i1 0
+  %nop3943 = alloca i1, i1 0
+  %nop3944 = alloca i1, i1 0
+  %nop3945 = alloca i1, i1 0
+  %nop3946 = alloca i1, i1 0
+  %nop3947 = alloca i1, i1 0
+  %nop3948 = alloca i1, i1 0
+  %nop3949 = alloca i1, i1 0
+  %nop3950 = alloca i1, i1 0
+  %nop3951 = alloca i1, i1 0
+  %nop3952 = alloca i1, i1 0
+  %nop3953 = alloca i1, i1 0
+  %nop3954 = alloca i1, i1 0
+  %nop3955 = alloca i1, i1 0
+  %nop3956 = alloca i1, i1 0
+  %nop3957 = alloca i1, i1 0
+  %nop3958 = alloca i1, i1 0
+  %nop3959 = alloca i1, i1 0
+  %nop3960 = alloca i1, i1 0
+  %nop3961 = alloca i1, i1 0
+  %nop3962 = alloca i1, i1 0
+  %nop3963 = alloca i1, i1 0
+  %nop3964 = alloca i1, i1 0
+  %nop3965 = alloca i1, i1 0
+  %nop3966 = alloca i1, i1 0
+  %nop3967 = alloca i1, i1 0
+  %nop3968 = alloca i1, i1 0
+  %nop3969 = alloca i1, i1 0
+  %nop3970 = alloca i1, i1 0
+  %nop3971 = alloca i1, i1 0
+  %nop3972 = alloca i1, i1 0
+  %nop3973 = alloca i1, i1 0
+  %nop3974 = alloca i1, i1 0
+  %nop3975 = alloca i1, i1 0
+  %nop3976 = alloca i1, i1 0
+  %nop3977 = alloca i1, i1 0
+  %nop3978 = alloca i1, i1 0
+  %nop3979 = alloca i1, i1 0
+  %nop3980 = alloca i1, i1 0
+  %nop3981 = alloca i1, i1 0
+  %nop3982 = alloca i1, i1 0
+  %nop3983 = alloca i1, i1 0
+  %nop3984 = alloca i1, i1 0
+  %nop3985 = alloca i1, i1 0
+  %nop3986 = alloca i1, i1 0
+  %nop3987 = alloca i1, i1 0
+  %nop3988 = alloca i1, i1 0
+  %nop3989 = alloca i1, i1 0
+  %nop3990 = alloca i1, i1 0
+  %nop3991 = alloca i1, i1 0
+  %nop3992 = alloca i1, i1 0
+  %nop3993 = alloca i1, i1 0
+  %nop3994 = alloca i1, i1 0
+  %nop3995 = alloca i1, i1 0
+  %nop3996 = alloca i1, i1 0
+  %nop3997 = alloca i1, i1 0
+  %nop3998 = alloca i1, i1 0
+  %nop3999 = alloca i1, i1 0
+  %nop4000 = alloca i1, i1 0
+  %nop4001 = alloca i1, i1 0
+  %nop4002 = alloca i1, i1 0
+  %nop4003 = alloca i1, i1 0
+  %nop4004 = alloca i1, i1 0
+  %nop4005 = alloca i1, i1 0
+  %nop4006 = alloca i1, i1 0
+  %nop4007 = alloca i1, i1 0
+  %nop4008 = alloca i1, i1 0
+  %nop4009 = alloca i1, i1 0
+  %nop4010 = alloca i1, i1 0
+  %nop4011 = alloca i1, i1 0
+  %nop4012 = alloca i1, i1 0
+  %nop4013 = alloca i1, i1 0
+  %nop4014 = alloca i1, i1 0
+  %nop4015 = alloca i1, i1 0
+  %nop4016 = alloca i1, i1 0
+  %nop4017 = alloca i1, i1 0
+  %nop4018 = alloca i1, i1 0
+  %nop4019 = alloca i1, i1 0
+  %nop4020 = alloca i1, i1 0
+  %nop4021 = alloca i1, i1 0
+  %nop4022 = alloca i1, i1 0
+  %nop4023 = alloca i1, i1 0
+  %nop4024 = alloca i1, i1 0
+  %nop4025 = alloca i1, i1 0
+  %nop4026 = alloca i1, i1 0
+  %nop4027 = alloca i1, i1 0
+  %nop4028 = alloca i1, i1 0
+  %nop4029 = alloca i1, i1 0
+  %nop4030 = alloca i1, i1 0
+  %nop4031 = alloca i1, i1 0
+  %nop4032 = alloca i1, i1 0
+  %nop4033 = alloca i1, i1 0
+  %nop4034 = alloca i1, i1 0
+  %nop4035 = alloca i1, i1 0
+  %nop4036 = alloca i1, i1 0
+  %nop4037 = alloca i1, i1 0
+  %nop4038 = alloca i1, i1 0
+  %nop4039 = alloca i1, i1 0
+  %nop4040 = alloca i1, i1 0
+  %nop4041 = alloca i1, i1 0
+  %nop4042 = alloca i1, i1 0
+  %nop4043 = alloca i1, i1 0
+  %nop4044 = alloca i1, i1 0
+  %nop4045 = alloca i1, i1 0
+  %nop4046 = alloca i1, i1 0
+  %nop4047 = alloca i1, i1 0
+  %nop4048 = alloca i1, i1 0
+  %nop4049 = alloca i1, i1 0
+  %nop4050 = alloca i1, i1 0
+  %nop4051 = alloca i1, i1 0
+  %nop4052 = alloca i1, i1 0
+  %nop4053 = alloca i1, i1 0
+  %nop4054 = alloca i1, i1 0
+  %nop4055 = alloca i1, i1 0
+  %nop4056 = alloca i1, i1 0
+  %nop4057 = alloca i1, i1 0
+  %nop4058 = alloca i1, i1 0
+  %nop4059 = alloca i1, i1 0
+  %nop4060 = alloca i1, i1 0
+  %nop4061 = alloca i1, i1 0
+  %nop4062 = alloca i1, i1 0
+  %nop4063 = alloca i1, i1 0
+  %nop4064 = alloca i1, i1 0
+  %nop4065 = alloca i1, i1 0
+  %nop4066 = alloca i1, i1 0
+  %nop4067 = alloca i1, i1 0
+  %nop4068 = alloca i1, i1 0
+  %nop4069 = alloca i1, i1 0
+  %nop4070 = alloca i1, i1 0
+  %nop4071 = alloca i1, i1 0
+  %nop4072 = alloca i1, i1 0
+  %nop4073 = alloca i1, i1 0
+  %nop4074 = alloca i1, i1 0
+  %nop4075 = alloca i1, i1 0
+  %nop4076 = alloca i1, i1 0
+  %nop4077 = alloca i1, i1 0
+  %nop4078 = alloca i1, i1 0
+  %nop4079 = alloca i1, i1 0
+  %nop4080 = alloca i1, i1 0
+  %nop4081 = alloca i1, i1 0
+  %nop4082 = alloca i1, i1 0
+  %nop4083 = alloca i1, i1 0
+  %nop4084 = alloca i1, i1 0
+  %nop4085 = alloca i1, i1 0
+  %nop4086 = alloca i1, i1 0
+  %nop4087 = alloca i1, i1 0
+  %nop4088 = alloca i1, i1 0
+  %nop4089 = alloca i1, i1 0
+  %nop4090 = alloca i1, i1 0
+  %nop4091 = alloca i1, i1 0
+  %nop4092 = alloca i1, i1 0
+  %nop4093 = alloca i1, i1 0
+  %nop4094 = alloca i1, i1 0
+  %nop4095 = alloca i1, i1 0
+  %nop4096 = alloca i1, i1 0
+  %nop4097 = alloca i1, i1 0
+  %nop4098 = alloca i1, i1 0
+  %nop4099 = alloca i1, i1 0
+  %nop4100 = alloca i1, i1 0
+  %nop4101 = alloca i1, i1 0
+  %nop4102 = alloca i1, i1 0
+  %nop4103 = alloca i1, i1 0
+  %nop4104 = alloca i1, i1 0
+  %nop4105 = alloca i1, i1 0
+  %nop4106 = alloca i1, i1 0
+  %nop4107 = alloca i1, i1 0
+  %nop4108 = alloca i1, i1 0
+  %nop4109 = alloca i1, i1 0
+  %nop4110 = alloca i1, i1 0
+  %nop4111 = alloca i1, i1 0
+  %nop4112 = alloca i1, i1 0
+  %nop4113 = alloca i1, i1 0
+  %nop4114 = alloca i1, i1 0
+  %nop4115 = alloca i1, i1 0
+  %nop4116 = alloca i1, i1 0
+  %nop4117 = alloca i1, i1 0
+  %nop4118 = alloca i1, i1 0
+  %nop4119 = alloca i1, i1 0
+  %nop4120 = alloca i1, i1 0
+  %nop4121 = alloca i1, i1 0
+  %nop4122 = alloca i1, i1 0
+  %nop4123 = alloca i1, i1 0
+  %nop4124 = alloca i1, i1 0
+  %nop4125 = alloca i1, i1 0
+  %nop4126 = alloca i1, i1 0
+  %nop4127 = alloca i1, i1 0
+  %nop4128 = alloca i1, i1 0
+  %nop4129 = alloca i1, i1 0
+  %nop4130 = alloca i1, i1 0
+  %nop4131 = alloca i1, i1 0
+  %nop4132 = alloca i1, i1 0
+  %nop4133 = alloca i1, i1 0
+  %nop4134 = alloca i1, i1 0
+  %nop4135 = alloca i1, i1 0
+  %nop4136 = alloca i1, i1 0
+  %nop4137 = alloca i1, i1 0
+  %nop4138 = alloca i1, i1 0
+  %nop4139 = alloca i1, i1 0
+  %nop4140 = alloca i1, i1 0
+  %nop4141 = alloca i1, i1 0
+  %nop4142 = alloca i1, i1 0
+  %nop4143 = alloca i1, i1 0
+  %nop4144 = alloca i1, i1 0
+  %nop4145 = alloca i1, i1 0
+  %nop4146 = alloca i1, i1 0
+  %nop4147 = alloca i1, i1 0
+  %nop4148 = alloca i1, i1 0
+  %nop4149 = alloca i1, i1 0
+  %nop4150 = alloca i1, i1 0
+  %nop4151 = alloca i1, i1 0
+  %nop4152 = alloca i1, i1 0
+  %nop4153 = alloca i1, i1 0
+  %nop4154 = alloca i1, i1 0
+  %nop4155 = alloca i1, i1 0
+  %nop4156 = alloca i1, i1 0
+  %nop4157 = alloca i1, i1 0
+  %nop4158 = alloca i1, i1 0
+  %nop4159 = alloca i1, i1 0
+  %nop4160 = alloca i1, i1 0
+  %nop4161 = alloca i1, i1 0
+  %nop4162 = alloca i1, i1 0
+  %nop4163 = alloca i1, i1 0
+  %nop4164 = alloca i1, i1 0
+  %nop4165 = alloca i1, i1 0
+  %nop4166 = alloca i1, i1 0
+  %nop4167 = alloca i1, i1 0
+  %nop4168 = alloca i1, i1 0
+  %nop4169 = alloca i1, i1 0
+  %nop4170 = alloca i1, i1 0
+  %nop4171 = alloca i1, i1 0
+  %nop4172 = alloca i1, i1 0
+  %nop4173 = alloca i1, i1 0
+  %nop4174 = alloca i1, i1 0
+  %nop4175 = alloca i1, i1 0
+  %nop4176 = alloca i1, i1 0
+  %nop4177 = alloca i1, i1 0
+  %nop4178 = alloca i1, i1 0
+  %nop4179 = alloca i1, i1 0
+  %nop4180 = alloca i1, i1 0
+  %nop4181 = alloca i1, i1 0
+  %nop4182 = alloca i1, i1 0
+  %nop4183 = alloca i1, i1 0
+  %nop4184 = alloca i1, i1 0
+  %nop4185 = alloca i1, i1 0
+  %nop4186 = alloca i1, i1 0
+  %nop4187 = alloca i1, i1 0
+  %nop4188 = alloca i1, i1 0
+  %nop4189 = alloca i1, i1 0
+  %nop4190 = alloca i1, i1 0
+  %nop4191 = alloca i1, i1 0
+  %nop4192 = alloca i1, i1 0
+  %nop4193 = alloca i1, i1 0
+  %nop4194 = alloca i1, i1 0
+  %nop4195 = alloca i1, i1 0
+  %nop4196 = alloca i1, i1 0
+  %nop4197 = alloca i1, i1 0
+  %nop4198 = alloca i1, i1 0
+  %nop4199 = alloca i1, i1 0
+  %nop4200 = alloca i1, i1 0
+  %nop4201 = alloca i1, i1 0
+  %nop4202 = alloca i1, i1 0
+  %nop4203 = alloca i1, i1 0
+  %nop4204 = alloca i1, i1 0
+  %nop4205 = alloca i1, i1 0
+  %nop4206 = alloca i1, i1 0
+  %nop4207 = alloca i1, i1 0
+  %nop4208 = alloca i1, i1 0
+  %nop4209 = alloca i1, i1 0
+  %nop4210 = alloca i1, i1 0
+  %nop4211 = alloca i1, i1 0
+  %nop4212 = alloca i1, i1 0
+  %nop4213 = alloca i1, i1 0
+  %nop4214 = alloca i1, i1 0
+  %nop4215 = alloca i1, i1 0
+  %nop4216 = alloca i1, i1 0
+  %nop4217 = alloca i1, i1 0
+  %nop4218 = alloca i1, i1 0
+  %nop4219 = alloca i1, i1 0
+  %nop4220 = alloca i1, i1 0
+  %nop4221 = alloca i1, i1 0
+  %nop4222 = alloca i1, i1 0
+  %nop4223 = alloca i1, i1 0
+  %nop4224 = alloca i1, i1 0
+  %nop4225 = alloca i1, i1 0
+  %nop4226 = alloca i1, i1 0
+  %nop4227 = alloca i1, i1 0
+  %nop4228 = alloca i1, i1 0
+  %nop4229 = alloca i1, i1 0
+  %nop4230 = alloca i1, i1 0
+  %nop4231 = alloca i1, i1 0
+  %nop4232 = alloca i1, i1 0
+  %nop4233 = alloca i1, i1 0
+  %nop4234 = alloca i1, i1 0
+  %nop4235 = alloca i1, i1 0
+  %nop4236 = alloca i1, i1 0
+  %nop4237 = alloca i1, i1 0
+  %nop4238 = alloca i1, i1 0
+  %nop4239 = alloca i1, i1 0
+  %nop4240 = alloca i1, i1 0
+  %nop4241 = alloca i1, i1 0
+  %nop4242 = alloca i1, i1 0
+  %nop4243 = alloca i1, i1 0
+  %nop4244 = alloca i1, i1 0
+  %nop4245 = alloca i1, i1 0
+  %nop4246 = alloca i1, i1 0
+  %nop4247 = alloca i1, i1 0
+  %nop4248 = alloca i1, i1 0
+  %nop4249 = alloca i1, i1 0
+  %nop4250 = alloca i1, i1 0
+  %nop4251 = alloca i1, i1 0
+  %nop4252 = alloca i1, i1 0
+  %nop4253 = alloca i1, i1 0
+  %nop4254 = alloca i1, i1 0
+  %nop4255 = alloca i1, i1 0
+  %nop4256 = alloca i1, i1 0
+  %nop4257 = alloca i1, i1 0
+  %nop4258 = alloca i1, i1 0
+  %nop4259 = alloca i1, i1 0
+  %nop4260 = alloca i1, i1 0
+  %nop4261 = alloca i1, i1 0
+  %nop4262 = alloca i1, i1 0
+  %nop4263 = alloca i1, i1 0
+  %nop4264 = alloca i1, i1 0
+  %nop4265 = alloca i1, i1 0
+  %nop4266 = alloca i1, i1 0
+  %nop4267 = alloca i1, i1 0
+  %nop4268 = alloca i1, i1 0
+  %nop4269 = alloca i1, i1 0
+  %nop4270 = alloca i1, i1 0
+  %nop4271 = alloca i1, i1 0
+  %nop4272 = alloca i1, i1 0
+  %nop4273 = alloca i1, i1 0
+  %nop4274 = alloca i1, i1 0
+  %nop4275 = alloca i1, i1 0
+  %nop4276 = alloca i1, i1 0
+  %nop4277 = alloca i1, i1 0
+  %nop4278 = alloca i1, i1 0
+  %nop4279 = alloca i1, i1 0
+  %nop4280 = alloca i1, i1 0
+  %nop4281 = alloca i1, i1 0
+  %nop4282 = alloca i1, i1 0
+  %nop4283 = alloca i1, i1 0
+  %nop4284 = alloca i1, i1 0
+  %nop4285 = alloca i1, i1 0
+  %nop4286 = alloca i1, i1 0
+  %nop4287 = alloca i1, i1 0
+  %nop4288 = alloca i1, i1 0
+  %nop4289 = alloca i1, i1 0
+  %nop4290 = alloca i1, i1 0
+  %nop4291 = alloca i1, i1 0
+  %nop4292 = alloca i1, i1 0
+  %nop4293 = alloca i1, i1 0
+  %nop4294 = alloca i1, i1 0
+  %nop4295 = alloca i1, i1 0
+  %nop4296 = alloca i1, i1 0
+  %nop4297 = alloca i1, i1 0
+  %nop4298 = alloca i1, i1 0
+  %nop4299 = alloca i1, i1 0
+  %nop4300 = alloca i1, i1 0
+  %nop4301 = alloca i1, i1 0
+  %nop4302 = alloca i1, i1 0
+  %nop4303 = alloca i1, i1 0
+  %nop4304 = alloca i1, i1 0
+  %nop4305 = alloca i1, i1 0
+  %nop4306 = alloca i1, i1 0
+  %nop4307 = alloca i1, i1 0
+  %nop4308 = alloca i1, i1 0
+  %nop4309 = alloca i1, i1 0
+  %nop4310 = alloca i1, i1 0
+  %nop4311 = alloca i1, i1 0
+  %nop4312 = alloca i1, i1 0
+  %nop4313 = alloca i1, i1 0
+  %nop4314 = alloca i1, i1 0
+  %nop4315 = alloca i1, i1 0
+  %nop4316 = alloca i1, i1 0
+  %nop4317 = alloca i1, i1 0
+  %nop4318 = alloca i1, i1 0
+  %nop4319 = alloca i1, i1 0
+  %nop4320 = alloca i1, i1 0
+  %nop4321 = alloca i1, i1 0
+  %nop4322 = alloca i1, i1 0
+  %nop4323 = alloca i1, i1 0
+  %nop4324 = alloca i1, i1 0
+  %nop4325 = alloca i1, i1 0
+  %nop4326 = alloca i1, i1 0
+  %nop4327 = alloca i1, i1 0
+  %nop4328 = alloca i1, i1 0
+  %nop4329 = alloca i1, i1 0
+  %nop4330 = alloca i1, i1 0
+  %nop4331 = alloca i1, i1 0
+  %nop4332 = alloca i1, i1 0
+  %nop4333 = alloca i1, i1 0
+  %nop4334 = alloca i1, i1 0
+  %nop4335 = alloca i1, i1 0
+  %nop4336 = alloca i1, i1 0
+  %nop4337 = alloca i1, i1 0
+  %nop4338 = alloca i1, i1 0
+  %nop4339 = alloca i1, i1 0
+  %nop4340 = alloca i1, i1 0
+  %nop4341 = alloca i1, i1 0
+  %nop4342 = alloca i1, i1 0
+  %nop4343 = alloca i1, i1 0
+  %nop4344 = alloca i1, i1 0
+  %nop4345 = alloca i1, i1 0
+  %nop4346 = alloca i1, i1 0
+  %nop4347 = alloca i1, i1 0
+  %nop4348 = alloca i1, i1 0
+  %nop4349 = alloca i1, i1 0
+  %nop4350 = alloca i1, i1 0
+  %nop4351 = alloca i1, i1 0
+  %nop4352 = alloca i1, i1 0
+  %nop4353 = alloca i1, i1 0
+  %nop4354 = alloca i1, i1 0
+  %nop4355 = alloca i1, i1 0
+  %nop4356 = alloca i1, i1 0
+  %nop4357 = alloca i1, i1 0
+  %nop4358 = alloca i1, i1 0
+  %nop4359 = alloca i1, i1 0
+  %nop4360 = alloca i1, i1 0
+  %nop4361 = alloca i1, i1 0
+  %nop4362 = alloca i1, i1 0
+  %nop4363 = alloca i1, i1 0
+  %nop4364 = alloca i1, i1 0
+  %nop4365 = alloca i1, i1 0
+  %nop4366 = alloca i1, i1 0
+  %nop4367 = alloca i1, i1 0
+  %nop4368 = alloca i1, i1 0
+  %nop4369 = alloca i1, i1 0
+  %nop4370 = alloca i1, i1 0
+  %nop4371 = alloca i1, i1 0
+  %nop4372 = alloca i1, i1 0
+  %nop4373 = alloca i1, i1 0
+  %nop4374 = alloca i1, i1 0
+  %nop4375 = alloca i1, i1 0
+  %nop4376 = alloca i1, i1 0
+  %nop4377 = alloca i1, i1 0
+  %nop4378 = alloca i1, i1 0
+  %nop4379 = alloca i1, i1 0
+  %nop4380 = alloca i1, i1 0
+  %nop4381 = alloca i1, i1 0
+  %nop4382 = alloca i1, i1 0
+  %nop4383 = alloca i1, i1 0
+  %nop4384 = alloca i1, i1 0
+  %nop4385 = alloca i1, i1 0
+  %nop4386 = alloca i1, i1 0
+  %nop4387 = alloca i1, i1 0
+  %nop4388 = alloca i1, i1 0
+  %nop4389 = alloca i1, i1 0
+  %nop4390 = alloca i1, i1 0
+  %nop4391 = alloca i1, i1 0
+  %nop4392 = alloca i1, i1 0
+  %nop4393 = alloca i1, i1 0
+  %nop4394 = alloca i1, i1 0
+  %nop4395 = alloca i1, i1 0
+  %nop4396 = alloca i1, i1 0
+  %nop4397 = alloca i1, i1 0
+  %nop4398 = alloca i1, i1 0
+  %nop4399 = alloca i1, i1 0
+  %nop4400 = alloca i1, i1 0
+  %nop4401 = alloca i1, i1 0
+  %nop4402 = alloca i1, i1 0
+  %nop4403 = alloca i1, i1 0
+  %nop4404 = alloca i1, i1 0
+  %nop4405 = alloca i1, i1 0
+  %nop4406 = alloca i1, i1 0
+  %nop4407 = alloca i1, i1 0
+  %nop4408 = alloca i1, i1 0
+  %nop4409 = alloca i1, i1 0
+  %nop4410 = alloca i1, i1 0
+  %nop4411 = alloca i1, i1 0
+  %nop4412 = alloca i1, i1 0
+  %nop4413 = alloca i1, i1 0
+  %nop4414 = alloca i1, i1 0
+  %nop4415 = alloca i1, i1 0
+  %nop4416 = alloca i1, i1 0
+  %nop4417 = alloca i1, i1 0
+  %nop4418 = alloca i1, i1 0
+  %nop4419 = alloca i1, i1 0
+  %nop4420 = alloca i1, i1 0
+  %nop4421 = alloca i1, i1 0
+  %nop4422 = alloca i1, i1 0
+  %nop4423 = alloca i1, i1 0
+  %nop4424 = alloca i1, i1 0
+  %nop4425 = alloca i1, i1 0
+  %nop4426 = alloca i1, i1 0
+  %nop4427 = alloca i1, i1 0
+  %nop4428 = alloca i1, i1 0
+  %nop4429 = alloca i1, i1 0
+  %nop4430 = alloca i1, i1 0
+  %nop4431 = alloca i1, i1 0
+  %nop4432 = alloca i1, i1 0
+  %nop4433 = alloca i1, i1 0
+  %nop4434 = alloca i1, i1 0
+  %nop4435 = alloca i1, i1 0
+  %nop4436 = alloca i1, i1 0
+  %nop4437 = alloca i1, i1 0
+  %nop4438 = alloca i1, i1 0
+  %nop4439 = alloca i1, i1 0
+  %nop4440 = alloca i1, i1 0
+  %nop4441 = alloca i1, i1 0
+  %nop4442 = alloca i1, i1 0
+  %nop4443 = alloca i1, i1 0
+  %nop4444 = alloca i1, i1 0
+  %nop4445 = alloca i1, i1 0
+  %nop4446 = alloca i1, i1 0
+  %nop4447 = alloca i1, i1 0
+  %nop4448 = alloca i1, i1 0
+  %nop4449 = alloca i1, i1 0
+  %nop4450 = alloca i1, i1 0
+  %nop4451 = alloca i1, i1 0
+  %nop4452 = alloca i1, i1 0
+  %nop4453 = alloca i1, i1 0
+  %nop4454 = alloca i1, i1 0
+  %nop4455 = alloca i1, i1 0
+  %nop4456 = alloca i1, i1 0
+  %nop4457 = alloca i1, i1 0
+  %nop4458 = alloca i1, i1 0
+  %nop4459 = alloca i1, i1 0
+  %nop4460 = alloca i1, i1 0
+  %nop4461 = alloca i1, i1 0
+  %nop4462 = alloca i1, i1 0
+  %nop4463 = alloca i1, i1 0
+  %nop4464 = alloca i1, i1 0
+  %nop4465 = alloca i1, i1 0
+  %nop4466 = alloca i1, i1 0
+  %nop4467 = alloca i1, i1 0
+  %nop4468 = alloca i1, i1 0
+  %nop4469 = alloca i1, i1 0
+  %nop4470 = alloca i1, i1 0
+  %nop4471 = alloca i1, i1 0
+  %nop4472 = alloca i1, i1 0
+  %nop4473 = alloca i1, i1 0
+  %nop4474 = alloca i1, i1 0
+  %nop4475 = alloca i1, i1 0
+  %nop4476 = alloca i1, i1 0
+  %nop4477 = alloca i1, i1 0
+  %nop4478 = alloca i1, i1 0
+  %nop4479 = alloca i1, i1 0
+  %nop4480 = alloca i1, i1 0
+  %nop4481 = alloca i1, i1 0
+  %nop4482 = alloca i1, i1 0
+  %nop4483 = alloca i1, i1 0
+  %nop4484 = alloca i1, i1 0
+  %nop4485 = alloca i1, i1 0
+  %nop4486 = alloca i1, i1 0
+  %nop4487 = alloca i1, i1 0
+  %nop4488 = alloca i1, i1 0
+  %nop4489 = alloca i1, i1 0
+  %nop4490 = alloca i1, i1 0
+  %nop4491 = alloca i1, i1 0
+  %nop4492 = alloca i1, i1 0
+  %nop4493 = alloca i1, i1 0
+  %nop4494 = alloca i1, i1 0
+  %nop4495 = alloca i1, i1 0
+  %nop4496 = alloca i1, i1 0
+  %nop4497 = alloca i1, i1 0
+  %nop4498 = alloca i1, i1 0
+  %nop4499 = alloca i1, i1 0
+  %nop4500 = alloca i1, i1 0
+  %nop4501 = alloca i1, i1 0
+  %nop4502 = alloca i1, i1 0
+  %nop4503 = alloca i1, i1 0
+  %nop4504 = alloca i1, i1 0
+  %nop4505 = alloca i1, i1 0
+  %nop4506 = alloca i1, i1 0
+  %nop4507 = alloca i1, i1 0
+  %nop4508 = alloca i1, i1 0
+  %nop4509 = alloca i1, i1 0
+  %nop4510 = alloca i1, i1 0
+  %nop4511 = alloca i1, i1 0
+  %nop4512 = alloca i1, i1 0
+  %nop4513 = alloca i1, i1 0
+  %nop4514 = alloca i1, i1 0
+  %nop4515 = alloca i1, i1 0
+  %nop4516 = alloca i1, i1 0
+  %nop4517 = alloca i1, i1 0
+  %nop4518 = alloca i1, i1 0
+  %nop4519 = alloca i1, i1 0
+  %nop4520 = alloca i1, i1 0
+  %nop4521 = alloca i1, i1 0
+  %nop4522 = alloca i1, i1 0
+  %nop4523 = alloca i1, i1 0
+  %nop4524 = alloca i1, i1 0
+  %nop4525 = alloca i1, i1 0
+  %nop4526 = alloca i1, i1 0
+  %nop4527 = alloca i1, i1 0
+  %nop4528 = alloca i1, i1 0
+  %nop4529 = alloca i1, i1 0
+  %nop4530 = alloca i1, i1 0
+  %nop4531 = alloca i1, i1 0
+  %nop4532 = alloca i1, i1 0
+  %nop4533 = alloca i1, i1 0
+  %nop4534 = alloca i1, i1 0
+  %nop4535 = alloca i1, i1 0
+  %nop4536 = alloca i1, i1 0
+  %nop4537 = alloca i1, i1 0
+  %nop4538 = alloca i1, i1 0
+  %nop4539 = alloca i1, i1 0
+  %nop4540 = alloca i1, i1 0
+  %nop4541 = alloca i1, i1 0
+  %nop4542 = alloca i1, i1 0
+  %nop4543 = alloca i1, i1 0
+  %nop4544 = alloca i1, i1 0
+  %nop4545 = alloca i1, i1 0
+  %nop4546 = alloca i1, i1 0
+  %nop4547 = alloca i1, i1 0
+  %nop4548 = alloca i1, i1 0
+  %nop4549 = alloca i1, i1 0
+  %nop4550 = alloca i1, i1 0
+  %nop4551 = alloca i1, i1 0
+  %nop4552 = alloca i1, i1 0
+  %nop4553 = alloca i1, i1 0
+  %nop4554 = alloca i1, i1 0
+  %nop4555 = alloca i1, i1 0
+  %nop4556 = alloca i1, i1 0
+  %nop4557 = alloca i1, i1 0
+  %nop4558 = alloca i1, i1 0
+  %nop4559 = alloca i1, i1 0
+  %nop4560 = alloca i1, i1 0
+  %nop4561 = alloca i1, i1 0
+  %nop4562 = alloca i1, i1 0
+  %nop4563 = alloca i1, i1 0
+  %nop4564 = alloca i1, i1 0
+  %nop4565 = alloca i1, i1 0
+  %nop4566 = alloca i1, i1 0
+  %nop4567 = alloca i1, i1 0
+  %nop4568 = alloca i1, i1 0
+  %nop4569 = alloca i1, i1 0
+  %nop4570 = alloca i1, i1 0
+  %nop4571 = alloca i1, i1 0
+  %nop4572 = alloca i1, i1 0
+  %nop4573 = alloca i1, i1 0
+  %nop4574 = alloca i1, i1 0
+  %nop4575 = alloca i1, i1 0
+  %nop4576 = alloca i1, i1 0
+  %nop4577 = alloca i1, i1 0
+  %nop4578 = alloca i1, i1 0
+  %nop4579 = alloca i1, i1 0
+  %nop4580 = alloca i1, i1 0
+  %nop4581 = alloca i1, i1 0
+  %nop4582 = alloca i1, i1 0
+  %nop4583 = alloca i1, i1 0
+  %nop4584 = alloca i1, i1 0
+  %nop4585 = alloca i1, i1 0
+  %nop4586 = alloca i1, i1 0
+  %nop4587 = alloca i1, i1 0
+  %nop4588 = alloca i1, i1 0
+  %nop4589 = alloca i1, i1 0
+  %nop4590 = alloca i1, i1 0
+  %nop4591 = alloca i1, i1 0
+  %nop4592 = alloca i1, i1 0
+  %nop4593 = alloca i1, i1 0
+  %nop4594 = alloca i1, i1 0
+  %nop4595 = alloca i1, i1 0
+  %nop4596 = alloca i1, i1 0
+  %nop4597 = alloca i1, i1 0
+  %nop4598 = alloca i1, i1 0
+  %nop4599 = alloca i1, i1 0
+  %nop4600 = alloca i1, i1 0
+  %nop4601 = alloca i1, i1 0
+  %nop4602 = alloca i1, i1 0
+  %nop4603 = alloca i1, i1 0
+  %nop4604 = alloca i1, i1 0
+  %nop4605 = alloca i1, i1 0
+  %nop4606 = alloca i1, i1 0
+  %nop4607 = alloca i1, i1 0
+  %nop4608 = alloca i1, i1 0
+  %nop4609 = alloca i1, i1 0
+  %nop4610 = alloca i1, i1 0
+  %nop4611 = alloca i1, i1 0
+  %nop4612 = alloca i1, i1 0
+  %nop4613 = alloca i1, i1 0
+  %nop4614 = alloca i1, i1 0
+  %nop4615 = alloca i1, i1 0
+  %nop4616 = alloca i1, i1 0
+  %nop4617 = alloca i1, i1 0
+  %nop4618 = alloca i1, i1 0
+  %nop4619 = alloca i1, i1 0
+  %nop4620 = alloca i1, i1 0
+  %nop4621 = alloca i1, i1 0
+  %nop4622 = alloca i1, i1 0
+  %nop4623 = alloca i1, i1 0
+  %nop4624 = alloca i1, i1 0
+  %nop4625 = alloca i1, i1 0
+  %nop4626 = alloca i1, i1 0
+  %nop4627 = alloca i1, i1 0
+  %nop4628 = alloca i1, i1 0
+  %nop4629 = alloca i1, i1 0
+  %nop4630 = alloca i1, i1 0
+  %nop4631 = alloca i1, i1 0
+  %nop4632 = alloca i1, i1 0
+  %nop4633 = alloca i1, i1 0
+  %nop4634 = alloca i1, i1 0
+  %nop4635 = alloca i1, i1 0
+  %nop4636 = alloca i1, i1 0
+  %nop4637 = alloca i1, i1 0
+  %nop4638 = alloca i1, i1 0
+  %nop4639 = alloca i1, i1 0
+  %nop4640 = alloca i1, i1 0
+  %nop4641 = alloca i1, i1 0
+  %nop4642 = alloca i1, i1 0
+  %nop4643 = alloca i1, i1 0
+  %nop4644 = alloca i1, i1 0
+  %nop4645 = alloca i1, i1 0
+  %nop4646 = alloca i1, i1 0
+  %nop4647 = alloca i1, i1 0
+  %nop4648 = alloca i1, i1 0
+  %nop4649 = alloca i1, i1 0
+  %nop4650 = alloca i1, i1 0
+  %nop4651 = alloca i1, i1 0
+  %nop4652 = alloca i1, i1 0
+  %nop4653 = alloca i1, i1 0
+  %nop4654 = alloca i1, i1 0
+  %nop4655 = alloca i1, i1 0
+  %nop4656 = alloca i1, i1 0
+  %nop4657 = alloca i1, i1 0
+  %nop4658 = alloca i1, i1 0
+  %nop4659 = alloca i1, i1 0
+  %nop4660 = alloca i1, i1 0
+  %nop4661 = alloca i1, i1 0
+  %nop4662 = alloca i1, i1 0
+  %nop4663 = alloca i1, i1 0
+  %nop4664 = alloca i1, i1 0
+  %nop4665 = alloca i1, i1 0
+  %nop4666 = alloca i1, i1 0
+  %nop4667 = alloca i1, i1 0
+  %nop4668 = alloca i1, i1 0
+  %nop4669 = alloca i1, i1 0
+  %nop4670 = alloca i1, i1 0
+  %nop4671 = alloca i1, i1 0
+  %nop4672 = alloca i1, i1 0
+  %nop4673 = alloca i1, i1 0
+  %nop4674 = alloca i1, i1 0
+  %nop4675 = alloca i1, i1 0
+  %nop4676 = alloca i1, i1 0
+  %nop4677 = alloca i1, i1 0
+  %nop4678 = alloca i1, i1 0
+  %nop4679 = alloca i1, i1 0
+  %nop4680 = alloca i1, i1 0
+  %nop4681 = alloca i1, i1 0
+  %nop4682 = alloca i1, i1 0
+  %nop4683 = alloca i1, i1 0
+  %nop4684 = alloca i1, i1 0
+  %nop4685 = alloca i1, i1 0
+  %nop4686 = alloca i1, i1 0
+  %nop4687 = alloca i1, i1 0
+  %nop4688 = alloca i1, i1 0
+  %nop4689 = alloca i1, i1 0
+  %nop4690 = alloca i1, i1 0
+  %nop4691 = alloca i1, i1 0
+  %nop4692 = alloca i1, i1 0
+  %nop4693 = alloca i1, i1 0
+  %nop4694 = alloca i1, i1 0
+  %nop4695 = alloca i1, i1 0
+  %nop4696 = alloca i1, i1 0
+  %nop4697 = alloca i1, i1 0
+  %nop4698 = alloca i1, i1 0
+  %nop4699 = alloca i1, i1 0
+  %nop4700 = alloca i1, i1 0
+  %nop4701 = alloca i1, i1 0
+  %nop4702 = alloca i1, i1 0
+  %nop4703 = alloca i1, i1 0
+  %nop4704 = alloca i1, i1 0
+  %nop4705 = alloca i1, i1 0
+  %nop4706 = alloca i1, i1 0
+  %nop4707 = alloca i1, i1 0
+  %nop4708 = alloca i1, i1 0
+  %nop4709 = alloca i1, i1 0
+  %nop4710 = alloca i1, i1 0
+  %nop4711 = alloca i1, i1 0
+  %nop4712 = alloca i1, i1 0
+  %nop4713 = alloca i1, i1 0
+  %nop4714 = alloca i1, i1 0
+  %nop4715 = alloca i1, i1 0
+  %nop4716 = alloca i1, i1 0
+  %nop4717 = alloca i1, i1 0
+  %nop4718 = alloca i1, i1 0
+  %nop4719 = alloca i1, i1 0
+  %nop4720 = alloca i1, i1 0
+  %nop4721 = alloca i1, i1 0
+  %nop4722 = alloca i1, i1 0
+  %nop4723 = alloca i1, i1 0
+  %nop4724 = alloca i1, i1 0
+  %nop4725 = alloca i1, i1 0
+  %nop4726 = alloca i1, i1 0
+  %nop4727 = alloca i1, i1 0
+  %nop4728 = alloca i1, i1 0
+  %nop4729 = alloca i1, i1 0
+  %nop4730 = alloca i1, i1 0
+  %nop4731 = alloca i1, i1 0
+  %nop4732 = alloca i1, i1 0
+  %nop4733 = alloca i1, i1 0
+  %nop4734 = alloca i1, i1 0
+  %nop4735 = alloca i1, i1 0
+  %nop4736 = alloca i1, i1 0
+  %nop4737 = alloca i1, i1 0
+  %nop4738 = alloca i1, i1 0
+  %nop4739 = alloca i1, i1 0
+  %nop4740 = alloca i1, i1 0
+  %nop4741 = alloca i1, i1 0
+  %nop4742 = alloca i1, i1 0
+  %nop4743 = alloca i1, i1 0
+  %nop4744 = alloca i1, i1 0
+  %nop4745 = alloca i1, i1 0
+  %nop4746 = alloca i1, i1 0
+  %nop4747 = alloca i1, i1 0
+  %nop4748 = alloca i1, i1 0
+  %nop4749 = alloca i1, i1 0
+  %nop4750 = alloca i1, i1 0
+  %nop4751 = alloca i1, i1 0
+  %nop4752 = alloca i1, i1 0
+  %nop4753 = alloca i1, i1 0
+  %nop4754 = alloca i1, i1 0
+  %nop4755 = alloca i1, i1 0
+  %nop4756 = alloca i1, i1 0
+  %nop4757 = alloca i1, i1 0
+  %nop4758 = alloca i1, i1 0
+  %nop4759 = alloca i1, i1 0
+  %nop4760 = alloca i1, i1 0
+  %nop4761 = alloca i1, i1 0
+  %nop4762 = alloca i1, i1 0
+  %nop4763 = alloca i1, i1 0
+  %nop4764 = alloca i1, i1 0
+  %nop4765 = alloca i1, i1 0
+  %nop4766 = alloca i1, i1 0
+  %nop4767 = alloca i1, i1 0
+  %nop4768 = alloca i1, i1 0
+  %nop4769 = alloca i1, i1 0
+  %nop4770 = alloca i1, i1 0
+  %nop4771 = alloca i1, i1 0
+  %nop4772 = alloca i1, i1 0
+  %nop4773 = alloca i1, i1 0
+  %nop4774 = alloca i1, i1 0
+  %nop4775 = alloca i1, i1 0
+  %nop4776 = alloca i1, i1 0
+  %nop4777 = alloca i1, i1 0
+  %nop4778 = alloca i1, i1 0
+  %nop4779 = alloca i1, i1 0
+  %nop4780 = alloca i1, i1 0
+  %nop4781 = alloca i1, i1 0
+  %nop4782 = alloca i1, i1 0
+  %nop4783 = alloca i1, i1 0
+  %nop4784 = alloca i1, i1 0
+  %nop4785 = alloca i1, i1 0
+  %nop4786 = alloca i1, i1 0
+  %nop4787 = alloca i1, i1 0
+  %nop4788 = alloca i1, i1 0
+  %nop4789 = alloca i1, i1 0
+  %nop4790 = alloca i1, i1 0
+  %nop4791 = alloca i1, i1 0
+  %nop4792 = alloca i1, i1 0
+  %nop4793 = alloca i1, i1 0
+  %nop4794 = alloca i1, i1 0
+  %nop4795 = alloca i1, i1 0
+  %nop4796 = alloca i1, i1 0
+  %nop4797 = alloca i1, i1 0
+  %nop4798 = alloca i1, i1 0
+  %nop4799 = alloca i1, i1 0
+  %nop4800 = alloca i1, i1 0
+  %nop4801 = alloca i1, i1 0
+  %nop4802 = alloca i1, i1 0
+  %nop4803 = alloca i1, i1 0
+  %nop4804 = alloca i1, i1 0
+  %nop4805 = alloca i1, i1 0
+  %nop4806 = alloca i1, i1 0
+  %nop4807 = alloca i1, i1 0
+  %nop4808 = alloca i1, i1 0
+  %nop4809 = alloca i1, i1 0
+  %nop4810 = alloca i1, i1 0
+  %nop4811 = alloca i1, i1 0
+  %nop4812 = alloca i1, i1 0
+  %nop4813 = alloca i1, i1 0
+  %nop4814 = alloca i1, i1 0
+  %nop4815 = alloca i1, i1 0
+  %nop4816 = alloca i1, i1 0
+  %nop4817 = alloca i1, i1 0
+  %nop4818 = alloca i1, i1 0
+  %nop4819 = alloca i1, i1 0
+  %nop4820 = alloca i1, i1 0
+  %nop4821 = alloca i1, i1 0
+  %nop4822 = alloca i1, i1 0
+  %nop4823 = alloca i1, i1 0
+  %nop4824 = alloca i1, i1 0
+  %nop4825 = alloca i1, i1 0
+  %nop4826 = alloca i1, i1 0
+  %nop4827 = alloca i1, i1 0
+  %nop4828 = alloca i1, i1 0
+  %nop4829 = alloca i1, i1 0
+  %nop4830 = alloca i1, i1 0
+  %nop4831 = alloca i1, i1 0
+  %nop4832 = alloca i1, i1 0
+  %nop4833 = alloca i1, i1 0
+  %nop4834 = alloca i1, i1 0
+  %nop4835 = alloca i1, i1 0
+  %nop4836 = alloca i1, i1 0
+  %nop4837 = alloca i1, i1 0
+  %nop4838 = alloca i1, i1 0
+  %nop4839 = alloca i1, i1 0
+  %nop4840 = alloca i1, i1 0
+  %nop4841 = alloca i1, i1 0
+  %nop4842 = alloca i1, i1 0
+  %nop4843 = alloca i1, i1 0
+  %nop4844 = alloca i1, i1 0
+  %nop4845 = alloca i1, i1 0
+  %nop4846 = alloca i1, i1 0
+  %nop4847 = alloca i1, i1 0
+  %nop4848 = alloca i1, i1 0
+  %nop4849 = alloca i1, i1 0
+  %nop4850 = alloca i1, i1 0
+  %nop4851 = alloca i1, i1 0
+  %nop4852 = alloca i1, i1 0
+  %nop4853 = alloca i1, i1 0
+  %nop4854 = alloca i1, i1 0
+  %nop4855 = alloca i1, i1 0
+  %nop4856 = alloca i1, i1 0
+  %nop4857 = alloca i1, i1 0
+  %nop4858 = alloca i1, i1 0
+  %nop4859 = alloca i1, i1 0
+  %nop4860 = alloca i1, i1 0
+  %nop4861 = alloca i1, i1 0
+  %nop4862 = alloca i1, i1 0
+  %nop4863 = alloca i1, i1 0
+  %nop4864 = alloca i1, i1 0
+  %nop4865 = alloca i1, i1 0
+  %nop4866 = alloca i1, i1 0
+  %nop4867 = alloca i1, i1 0
+  %nop4868 = alloca i1, i1 0
+  %nop4869 = alloca i1, i1 0
+  %nop4870 = alloca i1, i1 0
+  %nop4871 = alloca i1, i1 0
+  %nop4872 = alloca i1, i1 0
+  %nop4873 = alloca i1, i1 0
+  %nop4874 = alloca i1, i1 0
+  %nop4875 = alloca i1, i1 0
+  %nop4876 = alloca i1, i1 0
+  %nop4877 = alloca i1, i1 0
+  %nop4878 = alloca i1, i1 0
+  %nop4879 = alloca i1, i1 0
+  %nop4880 = alloca i1, i1 0
+  %nop4881 = alloca i1, i1 0
+  %nop4882 = alloca i1, i1 0
+  %nop4883 = alloca i1, i1 0
+  %nop4884 = alloca i1, i1 0
+  %nop4885 = alloca i1, i1 0
+  %nop4886 = alloca i1, i1 0
+  %nop4887 = alloca i1, i1 0
+  %nop4888 = alloca i1, i1 0
+  %nop4889 = alloca i1, i1 0
+  %nop4890 = alloca i1, i1 0
+  %nop4891 = alloca i1, i1 0
+  %nop4892 = alloca i1, i1 0
+  %nop4893 = alloca i1, i1 0
+  %nop4894 = alloca i1, i1 0
+  %nop4895 = alloca i1, i1 0
+  %nop4896 = alloca i1, i1 0
+  %nop4897 = alloca i1, i1 0
+  %nop4898 = alloca i1, i1 0
+  %nop4899 = alloca i1, i1 0
+  %nop4900 = alloca i1, i1 0
+  %nop4901 = alloca i1, i1 0
+  %nop4902 = alloca i1, i1 0
+  %nop4903 = alloca i1, i1 0
+  %nop4904 = alloca i1, i1 0
+  %nop4905 = alloca i1, i1 0
+  %nop4906 = alloca i1, i1 0
+  %nop4907 = alloca i1, i1 0
+  %nop4908 = alloca i1, i1 0
+  %nop4909 = alloca i1, i1 0
+  %nop4910 = alloca i1, i1 0
+  %nop4911 = alloca i1, i1 0
+  %nop4912 = alloca i1, i1 0
+  %nop4913 = alloca i1, i1 0
+  %nop4914 = alloca i1, i1 0
+  %nop4915 = alloca i1, i1 0
+  %nop4916 = alloca i1, i1 0
+  %nop4917 = alloca i1, i1 0
+  %nop4918 = alloca i1, i1 0
+  %nop4919 = alloca i1, i1 0
+  %nop4920 = alloca i1, i1 0
+  %nop4921 = alloca i1, i1 0
+  %nop4922 = alloca i1, i1 0
+  %nop4923 = alloca i1, i1 0
+  %nop4924 = alloca i1, i1 0
+  %nop4925 = alloca i1, i1 0
+  %nop4926 = alloca i1, i1 0
+  %nop4927 = alloca i1, i1 0
+  %nop4928 = alloca i1, i1 0
+  %nop4929 = alloca i1, i1 0
+  %nop4930 = alloca i1, i1 0
+  %nop4931 = alloca i1, i1 0
+  %nop4932 = alloca i1, i1 0
+  %nop4933 = alloca i1, i1 0
+  %nop4934 = alloca i1, i1 0
+  %nop4935 = alloca i1, i1 0
+  %nop4936 = alloca i1, i1 0
+  %nop4937 = alloca i1, i1 0
+  %nop4938 = alloca i1, i1 0
+  %nop4939 = alloca i1, i1 0
+  %nop4940 = alloca i1, i1 0
+  %nop4941 = alloca i1, i1 0
+  %nop4942 = alloca i1, i1 0
+  %nop4943 = alloca i1, i1 0
+  %nop4944 = alloca i1, i1 0
+  %nop4945 = alloca i1, i1 0
+  %nop4946 = alloca i1, i1 0
+  %nop4947 = alloca i1, i1 0
+  %nop4948 = alloca i1, i1 0
+  %nop4949 = alloca i1, i1 0
+  %nop4950 = alloca i1, i1 0
+  %nop4951 = alloca i1, i1 0
+  %nop4952 = alloca i1, i1 0
+  %nop4953 = alloca i1, i1 0
+  %nop4954 = alloca i1, i1 0
+  %nop4955 = alloca i1, i1 0
+  %nop4956 = alloca i1, i1 0
+  %nop4957 = alloca i1, i1 0
+  %nop4958 = alloca i1, i1 0
+  %nop4959 = alloca i1, i1 0
+  %nop4960 = alloca i1, i1 0
+  %nop4961 = alloca i1, i1 0
+  %nop4962 = alloca i1, i1 0
+  %nop4963 = alloca i1, i1 0
+  %nop4964 = alloca i1, i1 0
+  %nop4965 = alloca i1, i1 0
+  %nop4966 = alloca i1, i1 0
+  %nop4967 = alloca i1, i1 0
+  %nop4968 = alloca i1, i1 0
+  %nop4969 = alloca i1, i1 0
+  %nop4970 = alloca i1, i1 0
+  %nop4971 = alloca i1, i1 0
+  %nop4972 = alloca i1, i1 0
+  %nop4973 = alloca i1, i1 0
+  %nop4974 = alloca i1, i1 0
+  %nop4975 = alloca i1, i1 0
+  %nop4976 = alloca i1, i1 0
+  %nop4977 = alloca i1, i1 0
+  %nop4978 = alloca i1, i1 0
+  %nop4979 = alloca i1, i1 0
+  %nop4980 = alloca i1, i1 0
+  %nop4981 = alloca i1, i1 0
+  %nop4982 = alloca i1, i1 0
+  %nop4983 = alloca i1, i1 0
+  %nop4984 = alloca i1, i1 0
+  %nop4985 = alloca i1, i1 0
+  %nop4986 = alloca i1, i1 0
+  %nop4987 = alloca i1, i1 0
+  %nop4988 = alloca i1, i1 0
+  %nop4989 = alloca i1, i1 0
+  %nop4990 = alloca i1, i1 0
+  %nop4991 = alloca i1, i1 0
+  %nop4992 = alloca i1, i1 0
+  %nop4993 = alloca i1, i1 0
+  %nop4994 = alloca i1, i1 0
+  %nop4995 = alloca i1, i1 0
+  %nop4996 = alloca i1, i1 0
+  %nop4997 = alloca i1, i1 0
+  %nop4998 = alloca i1, i1 0
+  %nop4999 = alloca i1, i1 0
+  %nop5000 = alloca i1, i1 0
+  %nop5001 = alloca i1, i1 0
+  %nop5002 = alloca i1, i1 0
+  %nop5003 = alloca i1, i1 0
+  %nop5004 = alloca i1, i1 0
+  %nop5005 = alloca i1, i1 0
+  %nop5006 = alloca i1, i1 0
+  %nop5007 = alloca i1, i1 0
+  %nop5008 = alloca i1, i1 0
+  %nop5009 = alloca i1, i1 0
+  %nop5010 = alloca i1, i1 0
+  %nop5011 = alloca i1, i1 0
+  %nop5012 = alloca i1, i1 0
+  %nop5013 = alloca i1, i1 0
+  %nop5014 = alloca i1, i1 0
+  %nop5015 = alloca i1, i1 0
+  %nop5016 = alloca i1, i1 0
+  %nop5017 = alloca i1, i1 0
+  %nop5018 = alloca i1, i1 0
+  %nop5019 = alloca i1, i1 0
+  %nop5020 = alloca i1, i1 0
+  %nop5021 = alloca i1, i1 0
+  %nop5022 = alloca i1, i1 0
+  %nop5023 = alloca i1, i1 0
+  %nop5024 = alloca i1, i1 0
+  %nop5025 = alloca i1, i1 0
+  %nop5026 = alloca i1, i1 0
+  %nop5027 = alloca i1, i1 0
+  %nop5028 = alloca i1, i1 0
+  %nop5029 = alloca i1, i1 0
+  %nop5030 = alloca i1, i1 0
+  %nop5031 = alloca i1, i1 0
+  %nop5032 = alloca i1, i1 0
+  %nop5033 = alloca i1, i1 0
+  %nop5034 = alloca i1, i1 0
+  %nop5035 = alloca i1, i1 0
+  %nop5036 = alloca i1, i1 0
+  %nop5037 = alloca i1, i1 0
+  %nop5038 = alloca i1, i1 0
+  %nop5039 = alloca i1, i1 0
+  %nop5040 = alloca i1, i1 0
+  %nop5041 = alloca i1, i1 0
+  %nop5042 = alloca i1, i1 0
+  %nop5043 = alloca i1, i1 0
+  %nop5044 = alloca i1, i1 0
+  %nop5045 = alloca i1, i1 0
+  %nop5046 = alloca i1, i1 0
+  %nop5047 = alloca i1, i1 0
+  %nop5048 = alloca i1, i1 0
+  %nop5049 = alloca i1, i1 0
+  %nop5050 = alloca i1, i1 0
+  %nop5051 = alloca i1, i1 0
+  %nop5052 = alloca i1, i1 0
+  %nop5053 = alloca i1, i1 0
+  %nop5054 = alloca i1, i1 0
+  %nop5055 = alloca i1, i1 0
+  %nop5056 = alloca i1, i1 0
+  %nop5057 = alloca i1, i1 0
+  %nop5058 = alloca i1, i1 0
+  %nop5059 = alloca i1, i1 0
+  %nop5060 = alloca i1, i1 0
+  %nop5061 = alloca i1, i1 0
+  %nop5062 = alloca i1, i1 0
+  %nop5063 = alloca i1, i1 0
+  %nop5064 = alloca i1, i1 0
+  %nop5065 = alloca i1, i1 0
+  %nop5066 = alloca i1, i1 0
+  %nop5067 = alloca i1, i1 0
+  %nop5068 = alloca i1, i1 0
+  %nop5069 = alloca i1, i1 0
+  %nop5070 = alloca i1, i1 0
+  %nop5071 = alloca i1, i1 0
+  %nop5072 = alloca i1, i1 0
+  %nop5073 = alloca i1, i1 0
+  %nop5074 = alloca i1, i1 0
+  %nop5075 = alloca i1, i1 0
+  %nop5076 = alloca i1, i1 0
+  %nop5077 = alloca i1, i1 0
+  %nop5078 = alloca i1, i1 0
+  %nop5079 = alloca i1, i1 0
+  %nop5080 = alloca i1, i1 0
+  %nop5081 = alloca i1, i1 0
+  %nop5082 = alloca i1, i1 0
+  %nop5083 = alloca i1, i1 0
+  %nop5084 = alloca i1, i1 0
+  %nop5085 = alloca i1, i1 0
+  %nop5086 = alloca i1, i1 0
+  %nop5087 = alloca i1, i1 0
+  %nop5088 = alloca i1, i1 0
+  %nop5089 = alloca i1, i1 0
+  %nop5090 = alloca i1, i1 0
+  %nop5091 = alloca i1, i1 0
+  %nop5092 = alloca i1, i1 0
+  %nop5093 = alloca i1, i1 0
+  %nop5094 = alloca i1, i1 0
+  %nop5095 = alloca i1, i1 0
+  %nop5096 = alloca i1, i1 0
+  %nop5097 = alloca i1, i1 0
+  %nop5098 = alloca i1, i1 0
+  %nop5099 = alloca i1, i1 0
+  %nop5100 = alloca i1, i1 0
+  %nop5101 = alloca i1, i1 0
+  %nop5102 = alloca i1, i1 0
+  %nop5103 = alloca i1, i1 0
+  %nop5104 = alloca i1, i1 0
+  %nop5105 = alloca i1, i1 0
+  %nop5106 = alloca i1, i1 0
+  %nop5107 = alloca i1, i1 0
+  %nop5108 = alloca i1, i1 0
+  %nop5109 = alloca i1, i1 0
+  %nop5110 = alloca i1, i1 0
+  %nop5111 = alloca i1, i1 0
+  %nop5112 = alloca i1, i1 0
+  %nop5113 = alloca i1, i1 0
+  %nop5114 = alloca i1, i1 0
+  %nop5115 = alloca i1, i1 0
+  %nop5116 = alloca i1, i1 0
+  %nop5117 = alloca i1, i1 0
+  %nop5118 = alloca i1, i1 0
+  %nop5119 = alloca i1, i1 0
+  %nop5120 = alloca i1, i1 0
+  %nop5121 = alloca i1, i1 0
+  %nop5122 = alloca i1, i1 0
+  %nop5123 = alloca i1, i1 0
+  %nop5124 = alloca i1, i1 0
+  %nop5125 = alloca i1, i1 0
+  %nop5126 = alloca i1, i1 0
+  %nop5127 = alloca i1, i1 0
+  %nop5128 = alloca i1, i1 0
+  %nop5129 = alloca i1, i1 0
+  %nop5130 = alloca i1, i1 0
+  %nop5131 = alloca i1, i1 0
+  %nop5132 = alloca i1, i1 0
+  %nop5133 = alloca i1, i1 0
+  %nop5134 = alloca i1, i1 0
+  %nop5135 = alloca i1, i1 0
+  %nop5136 = alloca i1, i1 0
+  %nop5137 = alloca i1, i1 0
+  %nop5138 = alloca i1, i1 0
+  %nop5139 = alloca i1, i1 0
+  %nop5140 = alloca i1, i1 0
+  %nop5141 = alloca i1, i1 0
+  %nop5142 = alloca i1, i1 0
+  %nop5143 = alloca i1, i1 0
+  %nop5144 = alloca i1, i1 0
+  %nop5145 = alloca i1, i1 0
+  %nop5146 = alloca i1, i1 0
+  %nop5147 = alloca i1, i1 0
+  %nop5148 = alloca i1, i1 0
+  %nop5149 = alloca i1, i1 0
+  %nop5150 = alloca i1, i1 0
+  %nop5151 = alloca i1, i1 0
+  %nop5152 = alloca i1, i1 0
+  %nop5153 = alloca i1, i1 0
+  %nop5154 = alloca i1, i1 0
+  %nop5155 = alloca i1, i1 0
+  %nop5156 = alloca i1, i1 0
+  %nop5157 = alloca i1, i1 0
+  %nop5158 = alloca i1, i1 0
+  %nop5159 = alloca i1, i1 0
+  %nop5160 = alloca i1, i1 0
+  %nop5161 = alloca i1, i1 0
+  %nop5162 = alloca i1, i1 0
+  %nop5163 = alloca i1, i1 0
+  %nop5164 = alloca i1, i1 0
+  %nop5165 = alloca i1, i1 0
+  %nop5166 = alloca i1, i1 0
+  %nop5167 = alloca i1, i1 0
+  %nop5168 = alloca i1, i1 0
+  %nop5169 = alloca i1, i1 0
+  %nop5170 = alloca i1, i1 0
+  %nop5171 = alloca i1, i1 0
+  %nop5172 = alloca i1, i1 0
+  %nop5173 = alloca i1, i1 0
+  %nop5174 = alloca i1, i1 0
+  %nop5175 = alloca i1, i1 0
+  %nop5176 = alloca i1, i1 0
+  %nop5177 = alloca i1, i1 0
+  %nop5178 = alloca i1, i1 0
+  %nop5179 = alloca i1, i1 0
+  %nop5180 = alloca i1, i1 0
+  %nop5181 = alloca i1, i1 0
+  %nop5182 = alloca i1, i1 0
+  %nop5183 = alloca i1, i1 0
+  %nop5184 = alloca i1, i1 0
+  %nop5185 = alloca i1, i1 0
+  %nop5186 = alloca i1, i1 0
+  %nop5187 = alloca i1, i1 0
+  %nop5188 = alloca i1, i1 0
+  %nop5189 = alloca i1, i1 0
+  %nop5190 = alloca i1, i1 0
+  %nop5191 = alloca i1, i1 0
+  %nop5192 = alloca i1, i1 0
+  %nop5193 = alloca i1, i1 0
+  %nop5194 = alloca i1, i1 0
+  %nop5195 = alloca i1, i1 0
+  %nop5196 = alloca i1, i1 0
+  %nop5197 = alloca i1, i1 0
+  %nop5198 = alloca i1, i1 0
+  %nop5199 = alloca i1, i1 0
+  %nop5200 = alloca i1, i1 0
+  %nop5201 = alloca i1, i1 0
+  %nop5202 = alloca i1, i1 0
+  %nop5203 = alloca i1, i1 0
+  %nop5204 = alloca i1, i1 0
+  %nop5205 = alloca i1, i1 0
+  %nop5206 = alloca i1, i1 0
+  %nop5207 = alloca i1, i1 0
+  %nop5208 = alloca i1, i1 0
+  %nop5209 = alloca i1, i1 0
+  %nop5210 = alloca i1, i1 0
+  %nop5211 = alloca i1, i1 0
+  %nop5212 = alloca i1, i1 0
+  %nop5213 = alloca i1, i1 0
+  %nop5214 = alloca i1, i1 0
+  %nop5215 = alloca i1, i1 0
+  %nop5216 = alloca i1, i1 0
+  %nop5217 = alloca i1, i1 0
+  %nop5218 = alloca i1, i1 0
+  %nop5219 = alloca i1, i1 0
+  %nop5220 = alloca i1, i1 0
+  %nop5221 = alloca i1, i1 0
+  %nop5222 = alloca i1, i1 0
+  %nop5223 = alloca i1, i1 0
+  %nop5224 = alloca i1, i1 0
+  %nop5225 = alloca i1, i1 0
+  %nop5226 = alloca i1, i1 0
+  %nop5227 = alloca i1, i1 0
+  %nop5228 = alloca i1, i1 0
+  %nop5229 = alloca i1, i1 0
+  %nop5230 = alloca i1, i1 0
+  %nop5231 = alloca i1, i1 0
+  %nop5232 = alloca i1, i1 0
+  %nop5233 = alloca i1, i1 0
+  %nop5234 = alloca i1, i1 0
+  %nop5235 = alloca i1, i1 0
+  %nop5236 = alloca i1, i1 0
+  %nop5237 = alloca i1, i1 0
+  %nop5238 = alloca i1, i1 0
+  %nop5239 = alloca i1, i1 0
+  %nop5240 = alloca i1, i1 0
+  %nop5241 = alloca i1, i1 0
+  %nop5242 = alloca i1, i1 0
+  %nop5243 = alloca i1, i1 0
+  %nop5244 = alloca i1, i1 0
+  %nop5245 = alloca i1, i1 0
+  %nop5246 = alloca i1, i1 0
+  %nop5247 = alloca i1, i1 0
+  %nop5248 = alloca i1, i1 0
+  %nop5249 = alloca i1, i1 0
+  %nop5250 = alloca i1, i1 0
+  %nop5251 = alloca i1, i1 0
+  %nop5252 = alloca i1, i1 0
+  %nop5253 = alloca i1, i1 0
+  %nop5254 = alloca i1, i1 0
+  %nop5255 = alloca i1, i1 0
+  %nop5256 = alloca i1, i1 0
+  %nop5257 = alloca i1, i1 0
+  %nop5258 = alloca i1, i1 0
+  %nop5259 = alloca i1, i1 0
+  %nop5260 = alloca i1, i1 0
+  %nop5261 = alloca i1, i1 0
+  %nop5262 = alloca i1, i1 0
+  %nop5263 = alloca i1, i1 0
+  %nop5264 = alloca i1, i1 0
+  %nop5265 = alloca i1, i1 0
+  %nop5266 = alloca i1, i1 0
+  %nop5267 = alloca i1, i1 0
+  %nop5268 = alloca i1, i1 0
+  %nop5269 = alloca i1, i1 0
+  %nop5270 = alloca i1, i1 0
+  %nop5271 = alloca i1, i1 0
+  %nop5272 = alloca i1, i1 0
+  %nop5273 = alloca i1, i1 0
+  %nop5274 = alloca i1, i1 0
+  %nop5275 = alloca i1, i1 0
+  %nop5276 = alloca i1, i1 0
+  %nop5277 = alloca i1, i1 0
+  %nop5278 = alloca i1, i1 0
+  %nop5279 = alloca i1, i1 0
+  %nop5280 = alloca i1, i1 0
+  %nop5281 = alloca i1, i1 0
+  %nop5282 = alloca i1, i1 0
+  %nop5283 = alloca i1, i1 0
+  %nop5284 = alloca i1, i1 0
+  %nop5285 = alloca i1, i1 0
+  %nop5286 = alloca i1, i1 0
+  %nop5287 = alloca i1, i1 0
+  %nop5288 = alloca i1, i1 0
+  %nop5289 = alloca i1, i1 0
+  %nop5290 = alloca i1, i1 0
+  %nop5291 = alloca i1, i1 0
+  %nop5292 = alloca i1, i1 0
+  %nop5293 = alloca i1, i1 0
+  %nop5294 = alloca i1, i1 0
+  %nop5295 = alloca i1, i1 0
+  %nop5296 = alloca i1, i1 0
+  %nop5297 = alloca i1, i1 0
+  %nop5298 = alloca i1, i1 0
+  %nop5299 = alloca i1, i1 0
+  %nop5300 = alloca i1, i1 0
+  %nop5301 = alloca i1, i1 0
+  %nop5302 = alloca i1, i1 0
+  %nop5303 = alloca i1, i1 0
+  %nop5304 = alloca i1, i1 0
+  %nop5305 = alloca i1, i1 0
+  %nop5306 = alloca i1, i1 0
+  %nop5307 = alloca i1, i1 0
+  %nop5308 = alloca i1, i1 0
+  %nop5309 = alloca i1, i1 0
+  %nop5310 = alloca i1, i1 0
+  %nop5311 = alloca i1, i1 0
+  %nop5312 = alloca i1, i1 0
+  %nop5313 = alloca i1, i1 0
+  %nop5314 = alloca i1, i1 0
+  %nop5315 = alloca i1, i1 0
+  %nop5316 = alloca i1, i1 0
+  %nop5317 = alloca i1, i1 0
+  %nop5318 = alloca i1, i1 0
+  %nop5319 = alloca i1, i1 0
+  %nop5320 = alloca i1, i1 0
+  %nop5321 = alloca i1, i1 0
+  %nop5322 = alloca i1, i1 0
+  %nop5323 = alloca i1, i1 0
+  %nop5324 = alloca i1, i1 0
+  %nop5325 = alloca i1, i1 0
+  %nop5326 = alloca i1, i1 0
+  %nop5327 = alloca i1, i1 0
+  %nop5328 = alloca i1, i1 0
+  %nop5329 = alloca i1, i1 0
+  %nop5330 = alloca i1, i1 0
+  %nop5331 = alloca i1, i1 0
+  %nop5332 = alloca i1, i1 0
+  %nop5333 = alloca i1, i1 0
+  %nop5334 = alloca i1, i1 0
+  %nop5335 = alloca i1, i1 0
+  %nop5336 = alloca i1, i1 0
+  %nop5337 = alloca i1, i1 0
+  %nop5338 = alloca i1, i1 0
+  %nop5339 = alloca i1, i1 0
+  %nop5340 = alloca i1, i1 0
+  %nop5341 = alloca i1, i1 0
+  %nop5342 = alloca i1, i1 0
+  %nop5343 = alloca i1, i1 0
+  %nop5344 = alloca i1, i1 0
+  %nop5345 = alloca i1, i1 0
+  %nop5346 = alloca i1, i1 0
+  %nop5347 = alloca i1, i1 0
+  %nop5348 = alloca i1, i1 0
+  %nop5349 = alloca i1, i1 0
+  %nop5350 = alloca i1, i1 0
+  %nop5351 = alloca i1, i1 0
+  %nop5352 = alloca i1, i1 0
+  %nop5353 = alloca i1, i1 0
+  %nop5354 = alloca i1, i1 0
+  %nop5355 = alloca i1, i1 0
+  %nop5356 = alloca i1, i1 0
+  %nop5357 = alloca i1, i1 0
+  %nop5358 = alloca i1, i1 0
+  %nop5359 = alloca i1, i1 0
+  %nop5360 = alloca i1, i1 0
+  %nop5361 = alloca i1, i1 0
+  %nop5362 = alloca i1, i1 0
+  %nop5363 = alloca i1, i1 0
+  %nop5364 = alloca i1, i1 0
+  %nop5365 = alloca i1, i1 0
+  %nop5366 = alloca i1, i1 0
+  %nop5367 = alloca i1, i1 0
+  %nop5368 = alloca i1, i1 0
+  %nop5369 = alloca i1, i1 0
+  %nop5370 = alloca i1, i1 0
+  %nop5371 = alloca i1, i1 0
+  %nop5372 = alloca i1, i1 0
+  %nop5373 = alloca i1, i1 0
+  %nop5374 = alloca i1, i1 0
+  %nop5375 = alloca i1, i1 0
+  %nop5376 = alloca i1, i1 0
+  %nop5377 = alloca i1, i1 0
+  %nop5378 = alloca i1, i1 0
+  %nop5379 = alloca i1, i1 0
+  %nop5380 = alloca i1, i1 0
+  %nop5381 = alloca i1, i1 0
+  %nop5382 = alloca i1, i1 0
+  %nop5383 = alloca i1, i1 0
+  %nop5384 = alloca i1, i1 0
+  %nop5385 = alloca i1, i1 0
+  %nop5386 = alloca i1, i1 0
+  %nop5387 = alloca i1, i1 0
+  %nop5388 = alloca i1, i1 0
+  %nop5389 = alloca i1, i1 0
+  %nop5390 = alloca i1, i1 0
+  %nop5391 = alloca i1, i1 0
+  %nop5392 = alloca i1, i1 0
+  %nop5393 = alloca i1, i1 0
+  %nop5394 = alloca i1, i1 0
+  %nop5395 = alloca i1, i1 0
+  %nop5396 = alloca i1, i1 0
+  %nop5397 = alloca i1, i1 0
+  %nop5398 = alloca i1, i1 0
+  %nop5399 = alloca i1, i1 0
+  %nop5400 = alloca i1, i1 0
+  %nop5401 = alloca i1, i1 0
+  %nop5402 = alloca i1, i1 0
+  %nop5403 = alloca i1, i1 0
+  %nop5404 = alloca i1, i1 0
+  %nop5405 = alloca i1, i1 0
+  %nop5406 = alloca i1, i1 0
+  %nop5407 = alloca i1, i1 0
+  %nop5408 = alloca i1, i1 0
+  %nop5409 = alloca i1, i1 0
+  %nop5410 = alloca i1, i1 0
+  %nop5411 = alloca i1, i1 0
+  %nop5412 = alloca i1, i1 0
+  %nop5413 = alloca i1, i1 0
+  %nop5414 = alloca i1, i1 0
+  %nop5415 = alloca i1, i1 0
+  %nop5416 = alloca i1, i1 0
+  %nop5417 = alloca i1, i1 0
+  %nop5418 = alloca i1, i1 0
+  %nop5419 = alloca i1, i1 0
+  %nop5420 = alloca i1, i1 0
+  %nop5421 = alloca i1, i1 0
+  %nop5422 = alloca i1, i1 0
+  %nop5423 = alloca i1, i1 0
+  %nop5424 = alloca i1, i1 0
+  %nop5425 = alloca i1, i1 0
+  %nop5426 = alloca i1, i1 0
+  %nop5427 = alloca i1, i1 0
+  %nop5428 = alloca i1, i1 0
+  %nop5429 = alloca i1, i1 0
+  %nop5430 = alloca i1, i1 0
+  %nop5431 = alloca i1, i1 0
+  %nop5432 = alloca i1, i1 0
+  %nop5433 = alloca i1, i1 0
+  %nop5434 = alloca i1, i1 0
+  %nop5435 = alloca i1, i1 0
+  %nop5436 = alloca i1, i1 0
+  %nop5437 = alloca i1, i1 0
+  %nop5438 = alloca i1, i1 0
+  %nop5439 = alloca i1, i1 0
+  %nop5440 = alloca i1, i1 0
+  %nop5441 = alloca i1, i1 0
+  %nop5442 = alloca i1, i1 0
+  %nop5443 = alloca i1, i1 0
+  %nop5444 = alloca i1, i1 0
+  %nop5445 = alloca i1, i1 0
+  %nop5446 = alloca i1, i1 0
+  %nop5447 = alloca i1, i1 0
+  %nop5448 = alloca i1, i1 0
+  %nop5449 = alloca i1, i1 0
+  %nop5450 = alloca i1, i1 0
+  %nop5451 = alloca i1, i1 0
+  %nop5452 = alloca i1, i1 0
+  %nop5453 = alloca i1, i1 0
+  %nop5454 = alloca i1, i1 0
+  %nop5455 = alloca i1, i1 0
+  %nop5456 = alloca i1, i1 0
+  %nop5457 = alloca i1, i1 0
+  %nop5458 = alloca i1, i1 0
+  %nop5459 = alloca i1, i1 0
+  %nop5460 = alloca i1, i1 0
+  %nop5461 = alloca i1, i1 0
+  %nop5462 = alloca i1, i1 0
+  %nop5463 = alloca i1, i1 0
+  %nop5464 = alloca i1, i1 0
+  %nop5465 = alloca i1, i1 0
+  %nop5466 = alloca i1, i1 0
+  %nop5467 = alloca i1, i1 0
+  %nop5468 = alloca i1, i1 0
+  %nop5469 = alloca i1, i1 0
+  %nop5470 = alloca i1, i1 0
+  %nop5471 = alloca i1, i1 0
+  %nop5472 = alloca i1, i1 0
+  %nop5473 = alloca i1, i1 0
+  %nop5474 = alloca i1, i1 0
+  %nop5475 = alloca i1, i1 0
+  %nop5476 = alloca i1, i1 0
+  %nop5477 = alloca i1, i1 0
+  %nop5478 = alloca i1, i1 0
+  %nop5479 = alloca i1, i1 0
+  %nop5480 = alloca i1, i1 0
+  %nop5481 = alloca i1, i1 0
+  %nop5482 = alloca i1, i1 0
+  %nop5483 = alloca i1, i1 0
+  %nop5484 = alloca i1, i1 0
+  %nop5485 = alloca i1, i1 0
+  %nop5486 = alloca i1, i1 0
+  %nop5487 = alloca i1, i1 0
+  %nop5488 = alloca i1, i1 0
+  %nop5489 = alloca i1, i1 0
+  %nop5490 = alloca i1, i1 0
+  %nop5491 = alloca i1, i1 0
+  %nop5492 = alloca i1, i1 0
+  %nop5493 = alloca i1, i1 0
+  %nop5494 = alloca i1, i1 0
+  %nop5495 = alloca i1, i1 0
+  %nop5496 = alloca i1, i1 0
+  %nop5497 = alloca i1, i1 0
+  %nop5498 = alloca i1, i1 0
+  %nop5499 = alloca i1, i1 0
+  %nop5500 = alloca i1, i1 0
+  %nop5501 = alloca i1, i1 0
+  %nop5502 = alloca i1, i1 0
+  %nop5503 = alloca i1, i1 0
+  %nop5504 = alloca i1, i1 0
+  %nop5505 = alloca i1, i1 0
+  %nop5506 = alloca i1, i1 0
+  %nop5507 = alloca i1, i1 0
+  %nop5508 = alloca i1, i1 0
+  %nop5509 = alloca i1, i1 0
+  %nop5510 = alloca i1, i1 0
+  %nop5511 = alloca i1, i1 0
+  %nop5512 = alloca i1, i1 0
+  %nop5513 = alloca i1, i1 0
+  %nop5514 = alloca i1, i1 0
+  %nop5515 = alloca i1, i1 0
+  %nop5516 = alloca i1, i1 0
+  %nop5517 = alloca i1, i1 0
+  %nop5518 = alloca i1, i1 0
+  %nop5519 = alloca i1, i1 0
+  %nop5520 = alloca i1, i1 0
+  %nop5521 = alloca i1, i1 0
+  %nop5522 = alloca i1, i1 0
+  %nop5523 = alloca i1, i1 0
+  %nop5524 = alloca i1, i1 0
+  %nop5525 = alloca i1, i1 0
+  %nop5526 = alloca i1, i1 0
+  %nop5527 = alloca i1, i1 0
+  %nop5528 = alloca i1, i1 0
+  %nop5529 = alloca i1, i1 0
+  %nop5530 = alloca i1, i1 0
+  %nop5531 = alloca i1, i1 0
+  %nop5532 = alloca i1, i1 0
+  %nop5533 = alloca i1, i1 0
+  %nop5534 = alloca i1, i1 0
+  %nop5535 = alloca i1, i1 0
+  %nop5536 = alloca i1, i1 0
+  %nop5537 = alloca i1, i1 0
+  %nop5538 = alloca i1, i1 0
+  %nop5539 = alloca i1, i1 0
+  %nop5540 = alloca i1, i1 0
+  %nop5541 = alloca i1, i1 0
+  %nop5542 = alloca i1, i1 0
+  %nop5543 = alloca i1, i1 0
+  %nop5544 = alloca i1, i1 0
+  %nop5545 = alloca i1, i1 0
+  %nop5546 = alloca i1, i1 0
+  %nop5547 = alloca i1, i1 0
+  %nop5548 = alloca i1, i1 0
+  %nop5549 = alloca i1, i1 0
+  %nop5550 = alloca i1, i1 0
+  %nop5551 = alloca i1, i1 0
+  %nop5552 = alloca i1, i1 0
+  %nop5553 = alloca i1, i1 0
+  %nop5554 = alloca i1, i1 0
+  %nop5555 = alloca i1, i1 0
+  %nop5556 = alloca i1, i1 0
+  %nop5557 = alloca i1, i1 0
+  %nop5558 = alloca i1, i1 0
+  %nop5559 = alloca i1, i1 0
+  %nop5560 = alloca i1, i1 0
+  %nop5561 = alloca i1, i1 0
+  %nop5562 = alloca i1, i1 0
+  %nop5563 = alloca i1, i1 0
+  %nop5564 = alloca i1, i1 0
+  %nop5565 = alloca i1, i1 0
+  %nop5566 = alloca i1, i1 0
+  %nop5567 = alloca i1, i1 0
+  %nop5568 = alloca i1, i1 0
+  %nop5569 = alloca i1, i1 0
+  %nop5570 = alloca i1, i1 0
+  %nop5571 = alloca i1, i1 0
+  %nop5572 = alloca i1, i1 0
+  %nop5573 = alloca i1, i1 0
+  %nop5574 = alloca i1, i1 0
+  %nop5575 = alloca i1, i1 0
+  %nop5576 = alloca i1, i1 0
+  %nop5577 = alloca i1, i1 0
+  %nop5578 = alloca i1, i1 0
+  %nop5579 = alloca i1, i1 0
+  %nop5580 = alloca i1, i1 0
+  %nop5581 = alloca i1, i1 0
+  %nop5582 = alloca i1, i1 0
+  %nop5583 = alloca i1, i1 0
+  %nop5584 = alloca i1, i1 0
+  %nop5585 = alloca i1, i1 0
+  %nop5586 = alloca i1, i1 0
+  %nop5587 = alloca i1, i1 0
+  %nop5588 = alloca i1, i1 0
+  %nop5589 = alloca i1, i1 0
+  %nop5590 = alloca i1, i1 0
+  %nop5591 = alloca i1, i1 0
+  %nop5592 = alloca i1, i1 0
+  %nop5593 = alloca i1, i1 0
+  %nop5594 = alloca i1, i1 0
+  %nop5595 = alloca i1, i1 0
+  %nop5596 = alloca i1, i1 0
+  %nop5597 = alloca i1, i1 0
+  %nop5598 = alloca i1, i1 0
+  %nop5599 = alloca i1, i1 0
+  %nop5600 = alloca i1, i1 0
+  %nop5601 = alloca i1, i1 0
+  %nop5602 = alloca i1, i1 0
+  %nop5603 = alloca i1, i1 0
+  %nop5604 = alloca i1, i1 0
+  %nop5605 = alloca i1, i1 0
+  %nop5606 = alloca i1, i1 0
+  %nop5607 = alloca i1, i1 0
+  %nop5608 = alloca i1, i1 0
+  %nop5609 = alloca i1, i1 0
+  %nop5610 = alloca i1, i1 0
+  %nop5611 = alloca i1, i1 0
+  %nop5612 = alloca i1, i1 0
+  %nop5613 = alloca i1, i1 0
+  %nop5614 = alloca i1, i1 0
+  %nop5615 = alloca i1, i1 0
+  %nop5616 = alloca i1, i1 0
+  %nop5617 = alloca i1, i1 0
+  %nop5618 = alloca i1, i1 0
+  %nop5619 = alloca i1, i1 0
+  %nop5620 = alloca i1, i1 0
+  %nop5621 = alloca i1, i1 0
+  %nop5622 = alloca i1, i1 0
+  %nop5623 = alloca i1, i1 0
+  %nop5624 = alloca i1, i1 0
+  %nop5625 = alloca i1, i1 0
+  %nop5626 = alloca i1, i1 0
+  %nop5627 = alloca i1, i1 0
+  %nop5628 = alloca i1, i1 0
+  %nop5629 = alloca i1, i1 0
+  %nop5630 = alloca i1, i1 0
+  %nop5631 = alloca i1, i1 0
+  %nop5632 = alloca i1, i1 0
+  %nop5633 = alloca i1, i1 0
+  %nop5634 = alloca i1, i1 0
+  %nop5635 = alloca i1, i1 0
+  %nop5636 = alloca i1, i1 0
+  %nop5637 = alloca i1, i1 0
+  %nop5638 = alloca i1, i1 0
+  %nop5639 = alloca i1, i1 0
+  %nop5640 = alloca i1, i1 0
+  %nop5641 = alloca i1, i1 0
+  %nop5642 = alloca i1, i1 0
+  %nop5643 = alloca i1, i1 0
+  %nop5644 = alloca i1, i1 0
+  %nop5645 = alloca i1, i1 0
+  %nop5646 = alloca i1, i1 0
+  %nop5647 = alloca i1, i1 0
+  %nop5648 = alloca i1, i1 0
+  %nop5649 = alloca i1, i1 0
+  %nop5650 = alloca i1, i1 0
+  %nop5651 = alloca i1, i1 0
+  %nop5652 = alloca i1, i1 0
+  %nop5653 = alloca i1, i1 0
+  %nop5654 = alloca i1, i1 0
+  %nop5655 = alloca i1, i1 0
+  %nop5656 = alloca i1, i1 0
+  %nop5657 = alloca i1, i1 0
+  %nop5658 = alloca i1, i1 0
+  %nop5659 = alloca i1, i1 0
+  %nop5660 = alloca i1, i1 0
+  %nop5661 = alloca i1, i1 0
+  %nop5662 = alloca i1, i1 0
+  %nop5663 = alloca i1, i1 0
+  %nop5664 = alloca i1, i1 0
+  %nop5665 = alloca i1, i1 0
+  %nop5666 = alloca i1, i1 0
+  %nop5667 = alloca i1, i1 0
+  %nop5668 = alloca i1, i1 0
+  %nop5669 = alloca i1, i1 0
+  %nop5670 = alloca i1, i1 0
+  %nop5671 = alloca i1, i1 0
+  %nop5672 = alloca i1, i1 0
+  %nop5673 = alloca i1, i1 0
+  %nop5674 = alloca i1, i1 0
+  %nop5675 = alloca i1, i1 0
+  %nop5676 = alloca i1, i1 0
+  %nop5677 = alloca i1, i1 0
+  %nop5678 = alloca i1, i1 0
+  %nop5679 = alloca i1, i1 0
+  %nop5680 = alloca i1, i1 0
+  %nop5681 = alloca i1, i1 0
+  %nop5682 = alloca i1, i1 0
+  %nop5683 = alloca i1, i1 0
+  %nop5684 = alloca i1, i1 0
+  %nop5685 = alloca i1, i1 0
+  %nop5686 = alloca i1, i1 0
+  %nop5687 = alloca i1, i1 0
+  %nop5688 = alloca i1, i1 0
+  %nop5689 = alloca i1, i1 0
+  %nop5690 = alloca i1, i1 0
+  %nop5691 = alloca i1, i1 0
+  %nop5692 = alloca i1, i1 0
+  %nop5693 = alloca i1, i1 0
+  %nop5694 = alloca i1, i1 0
+  %nop5695 = alloca i1, i1 0
+  %nop5696 = alloca i1, i1 0
+  %nop5697 = alloca i1, i1 0
+  %nop5698 = alloca i1, i1 0
+  %nop5699 = alloca i1, i1 0
+  %nop5700 = alloca i1, i1 0
+  %nop5701 = alloca i1, i1 0
+  %nop5702 = alloca i1, i1 0
+  %nop5703 = alloca i1, i1 0
+  %nop5704 = alloca i1, i1 0
+  %nop5705 = alloca i1, i1 0
+  %nop5706 = alloca i1, i1 0
+  %nop5707 = alloca i1, i1 0
+  %nop5708 = alloca i1, i1 0
+  %nop5709 = alloca i1, i1 0
+  %nop5710 = alloca i1, i1 0
+  %nop5711 = alloca i1, i1 0
+  %nop5712 = alloca i1, i1 0
+  %nop5713 = alloca i1, i1 0
+  %nop5714 = alloca i1, i1 0
+  %nop5715 = alloca i1, i1 0
+  %nop5716 = alloca i1, i1 0
+  %nop5717 = alloca i1, i1 0
+  %nop5718 = alloca i1, i1 0
+  %nop5719 = alloca i1, i1 0
+  %nop5720 = alloca i1, i1 0
+  %nop5721 = alloca i1, i1 0
+  %nop5722 = alloca i1, i1 0
+  %nop5723 = alloca i1, i1 0
+  %nop5724 = alloca i1, i1 0
+  %nop5725 = alloca i1, i1 0
+  %nop5726 = alloca i1, i1 0
+  %nop5727 = alloca i1, i1 0
+  %nop5728 = alloca i1, i1 0
+  %nop5729 = alloca i1, i1 0
+  %nop5730 = alloca i1, i1 0
+  %nop5731 = alloca i1, i1 0
+  %nop5732 = alloca i1, i1 0
+  %nop5733 = alloca i1, i1 0
+  %nop5734 = alloca i1, i1 0
+  %nop5735 = alloca i1, i1 0
+  %nop5736 = alloca i1, i1 0
+  %nop5737 = alloca i1, i1 0
+  %nop5738 = alloca i1, i1 0
+  %nop5739 = alloca i1, i1 0
+  %nop5740 = alloca i1, i1 0
+  %nop5741 = alloca i1, i1 0
+  %nop5742 = alloca i1, i1 0
+  %nop5743 = alloca i1, i1 0
+  %nop5744 = alloca i1, i1 0
+  %nop5745 = alloca i1, i1 0
+  %nop5746 = alloca i1, i1 0
+  %nop5747 = alloca i1, i1 0
+  %nop5748 = alloca i1, i1 0
+  %nop5749 = alloca i1, i1 0
+  %nop5750 = alloca i1, i1 0
+  %nop5751 = alloca i1, i1 0
+  %nop5752 = alloca i1, i1 0
+  %nop5753 = alloca i1, i1 0
+  %nop5754 = alloca i1, i1 0
+  %nop5755 = alloca i1, i1 0
+  %nop5756 = alloca i1, i1 0
+  %nop5757 = alloca i1, i1 0
+  %nop5758 = alloca i1, i1 0
+  %nop5759 = alloca i1, i1 0
+  %nop5760 = alloca i1, i1 0
+  %nop5761 = alloca i1, i1 0
+  %nop5762 = alloca i1, i1 0
+  %nop5763 = alloca i1, i1 0
+  %nop5764 = alloca i1, i1 0
+  %nop5765 = alloca i1, i1 0
+  %nop5766 = alloca i1, i1 0
+  %nop5767 = alloca i1, i1 0
+  %nop5768 = alloca i1, i1 0
+  %nop5769 = alloca i1, i1 0
+  %nop5770 = alloca i1, i1 0
+  %nop5771 = alloca i1, i1 0
+  %nop5772 = alloca i1, i1 0
+  %nop5773 = alloca i1, i1 0
+  %nop5774 = alloca i1, i1 0
+  %nop5775 = alloca i1, i1 0
+  %nop5776 = alloca i1, i1 0
+  %nop5777 = alloca i1, i1 0
+  %nop5778 = alloca i1, i1 0
+  %nop5779 = alloca i1, i1 0
+  %nop5780 = alloca i1, i1 0
+  %nop5781 = alloca i1, i1 0
+  %nop5782 = alloca i1, i1 0
+  %nop5783 = alloca i1, i1 0
+  %nop5784 = alloca i1, i1 0
+  %nop5785 = alloca i1, i1 0
+  %nop5786 = alloca i1, i1 0
+  %nop5787 = alloca i1, i1 0
+  %nop5788 = alloca i1, i1 0
+  %nop5789 = alloca i1, i1 0
+  %nop5790 = alloca i1, i1 0
+  %nop5791 = alloca i1, i1 0
+  %nop5792 = alloca i1, i1 0
+  %nop5793 = alloca i1, i1 0
+  %nop5794 = alloca i1, i1 0
+  %nop5795 = alloca i1, i1 0
+  %nop5796 = alloca i1, i1 0
+  %nop5797 = alloca i1, i1 0
+  %nop5798 = alloca i1, i1 0
+  %nop5799 = alloca i1, i1 0
+  %nop5800 = alloca i1, i1 0
+  %nop5801 = alloca i1, i1 0
+  %nop5802 = alloca i1, i1 0
+  %nop5803 = alloca i1, i1 0
+  %nop5804 = alloca i1, i1 0
+  %nop5805 = alloca i1, i1 0
+  %nop5806 = alloca i1, i1 0
+  %nop5807 = alloca i1, i1 0
+  %nop5808 = alloca i1, i1 0
+  %nop5809 = alloca i1, i1 0
+  %nop5810 = alloca i1, i1 0
+  %nop5811 = alloca i1, i1 0
+  %nop5812 = alloca i1, i1 0
+  %nop5813 = alloca i1, i1 0
+  %nop5814 = alloca i1, i1 0
+  %nop5815 = alloca i1, i1 0
+  %nop5816 = alloca i1, i1 0
+  %nop5817 = alloca i1, i1 0
+  %nop5818 = alloca i1, i1 0
+  %nop5819 = alloca i1, i1 0
+  %nop5820 = alloca i1, i1 0
+  %nop5821 = alloca i1, i1 0
+  %nop5822 = alloca i1, i1 0
+  %nop5823 = alloca i1, i1 0
+  %nop5824 = alloca i1, i1 0
+  %nop5825 = alloca i1, i1 0
+  %nop5826 = alloca i1, i1 0
+  %nop5827 = alloca i1, i1 0
+  %nop5828 = alloca i1, i1 0
+  %nop5829 = alloca i1, i1 0
+  %nop5830 = alloca i1, i1 0
+  %nop5831 = alloca i1, i1 0
+  %nop5832 = alloca i1, i1 0
+  %nop5833 = alloca i1, i1 0
+  %nop5834 = alloca i1, i1 0
+  %nop5835 = alloca i1, i1 0
+  %nop5836 = alloca i1, i1 0
+  %nop5837 = alloca i1, i1 0
+  %nop5838 = alloca i1, i1 0
+  %nop5839 = alloca i1, i1 0
+  %nop5840 = alloca i1, i1 0
+  %nop5841 = alloca i1, i1 0
+  %nop5842 = alloca i1, i1 0
+  %nop5843 = alloca i1, i1 0
+  %nop5844 = alloca i1, i1 0
+  %nop5845 = alloca i1, i1 0
+  %nop5846 = alloca i1, i1 0
+  %nop5847 = alloca i1, i1 0
+  %nop5848 = alloca i1, i1 0
+  %nop5849 = alloca i1, i1 0
+  %nop5850 = alloca i1, i1 0
+  %nop5851 = alloca i1, i1 0
+  %nop5852 = alloca i1, i1 0
+  %nop5853 = alloca i1, i1 0
+  %nop5854 = alloca i1, i1 0
+  %nop5855 = alloca i1, i1 0
+  %nop5856 = alloca i1, i1 0
+  %nop5857 = alloca i1, i1 0
+  %nop5858 = alloca i1, i1 0
+  %nop5859 = alloca i1, i1 0
+  %nop5860 = alloca i1, i1 0
+  %nop5861 = alloca i1, i1 0
+  %nop5862 = alloca i1, i1 0
+  %nop5863 = alloca i1, i1 0
+  %nop5864 = alloca i1, i1 0
+  %nop5865 = alloca i1, i1 0
+  %nop5866 = alloca i1, i1 0
+  %nop5867 = alloca i1, i1 0
+  %nop5868 = alloca i1, i1 0
+  %nop5869 = alloca i1, i1 0
+  %nop5870 = alloca i1, i1 0
+  %nop5871 = alloca i1, i1 0
+  %nop5872 = alloca i1, i1 0
+  %nop5873 = alloca i1, i1 0
+  %nop5874 = alloca i1, i1 0
+  %nop5875 = alloca i1, i1 0
+  %nop5876 = alloca i1, i1 0
+  %nop5877 = alloca i1, i1 0
+  %nop5878 = alloca i1, i1 0
+  %nop5879 = alloca i1, i1 0
+  %nop5880 = alloca i1, i1 0
+  %nop5881 = alloca i1, i1 0
+  %nop5882 = alloca i1, i1 0
+  %nop5883 = alloca i1, i1 0
+  %nop5884 = alloca i1, i1 0
+  %nop5885 = alloca i1, i1 0
+  %nop5886 = alloca i1, i1 0
+  %nop5887 = alloca i1, i1 0
+  %nop5888 = alloca i1, i1 0
+  %nop5889 = alloca i1, i1 0
+  %nop5890 = alloca i1, i1 0
+  %nop5891 = alloca i1, i1 0
+  %nop5892 = alloca i1, i1 0
+  %nop5893 = alloca i1, i1 0
+  %nop5894 = alloca i1, i1 0
+  %nop5895 = alloca i1, i1 0
+  %nop5896 = alloca i1, i1 0
+  %nop5897 = alloca i1, i1 0
+  %nop5898 = alloca i1, i1 0
+  %nop5899 = alloca i1, i1 0
+  %nop5900 = alloca i1, i1 0
+  %nop5901 = alloca i1, i1 0
+  %nop5902 = alloca i1, i1 0
+  %nop5903 = alloca i1, i1 0
+  %nop5904 = alloca i1, i1 0
+  %nop5905 = alloca i1, i1 0
+  %nop5906 = alloca i1, i1 0
+  %nop5907 = alloca i1, i1 0
+  %nop5908 = alloca i1, i1 0
+  %nop5909 = alloca i1, i1 0
+  %nop5910 = alloca i1, i1 0
+  %nop5911 = alloca i1, i1 0
+  %nop5912 = alloca i1, i1 0
+  %nop5913 = alloca i1, i1 0
+  %nop5914 = alloca i1, i1 0
+  %nop5915 = alloca i1, i1 0
+  %nop5916 = alloca i1, i1 0
+  %nop5917 = alloca i1, i1 0
+  %nop5918 = alloca i1, i1 0
+  %nop5919 = alloca i1, i1 0
+  %nop5920 = alloca i1, i1 0
+  %nop5921 = alloca i1, i1 0
+  %nop5922 = alloca i1, i1 0
+  %nop5923 = alloca i1, i1 0
+  %nop5924 = alloca i1, i1 0
+  %nop5925 = alloca i1, i1 0
+  %nop5926 = alloca i1, i1 0
+  %nop5927 = alloca i1, i1 0
+  %nop5928 = alloca i1, i1 0
+  %nop5929 = alloca i1, i1 0
+  %nop5930 = alloca i1, i1 0
+  %nop5931 = alloca i1, i1 0
+  %nop5932 = alloca i1, i1 0
+  %nop5933 = alloca i1, i1 0
+  %nop5934 = alloca i1, i1 0
+  %nop5935 = alloca i1, i1 0
+  %nop5936 = alloca i1, i1 0
+  %nop5937 = alloca i1, i1 0
+  %nop5938 = alloca i1, i1 0
+  %nop5939 = alloca i1, i1 0
+  %nop5940 = alloca i1, i1 0
+  %nop5941 = alloca i1, i1 0
+  %nop5942 = alloca i1, i1 0
+  %nop5943 = alloca i1, i1 0
+  %nop5944 = alloca i1, i1 0
+  %nop5945 = alloca i1, i1 0
+  %nop5946 = alloca i1, i1 0
+  %nop5947 = alloca i1, i1 0
+  %nop5948 = alloca i1, i1 0
+  %nop5949 = alloca i1, i1 0
+  %nop5950 = alloca i1, i1 0
+  %nop5951 = alloca i1, i1 0
+  %nop5952 = alloca i1, i1 0
+  %nop5953 = alloca i1, i1 0
+  %nop5954 = alloca i1, i1 0
+  %nop5955 = alloca i1, i1 0
+  %nop5956 = alloca i1, i1 0
+  %nop5957 = alloca i1, i1 0
+  %nop5958 = alloca i1, i1 0
+  %nop5959 = alloca i1, i1 0
+  %nop5960 = alloca i1, i1 0
+  %nop5961 = alloca i1, i1 0
+  %nop5962 = alloca i1, i1 0
+  %nop5963 = alloca i1, i1 0
+  %nop5964 = alloca i1, i1 0
+  %nop5965 = alloca i1, i1 0
+  %nop5966 = alloca i1, i1 0
+  %nop5967 = alloca i1, i1 0
+  %nop5968 = alloca i1, i1 0
+  %nop5969 = alloca i1, i1 0
+  %nop5970 = alloca i1, i1 0
+  %nop5971 = alloca i1, i1 0
+  %nop5972 = alloca i1, i1 0
+  %nop5973 = alloca i1, i1 0
+  %nop5974 = alloca i1, i1 0
+  %nop5975 = alloca i1, i1 0
+  %nop5976 = alloca i1, i1 0
+  %nop5977 = alloca i1, i1 0
+  %nop5978 = alloca i1, i1 0
+  %nop5979 = alloca i1, i1 0
+  %nop5980 = alloca i1, i1 0
+  %nop5981 = alloca i1, i1 0
+  %nop5982 = alloca i1, i1 0
+  %nop5983 = alloca i1, i1 0
+  %nop5984 = alloca i1, i1 0
+  %nop5985 = alloca i1, i1 0
+  %nop5986 = alloca i1, i1 0
+  %nop5987 = alloca i1, i1 0
+  %nop5988 = alloca i1, i1 0
+  %nop5989 = alloca i1, i1 0
+  %nop5990 = alloca i1, i1 0
+  %nop5991 = alloca i1, i1 0
+  %nop5992 = alloca i1, i1 0
+  %nop5993 = alloca i1, i1 0
+  %nop5994 = alloca i1, i1 0
+  %nop5995 = alloca i1, i1 0
+  %nop5996 = alloca i1, i1 0
+  %nop5997 = alloca i1, i1 0
+  %nop5998 = alloca i1, i1 0
+  %nop5999 = alloca i1, i1 0
+  %nop6000 = alloca i1, i1 0
+  %nop6001 = alloca i1, i1 0
+  %nop6002 = alloca i1, i1 0
+  %nop6003 = alloca i1, i1 0
+  %nop6004 = alloca i1, i1 0
+  %nop6005 = alloca i1, i1 0
+  %nop6006 = alloca i1, i1 0
+  %nop6007 = alloca i1, i1 0
+  %nop6008 = alloca i1, i1 0
+  %nop6009 = alloca i1, i1 0
+  %nop6010 = alloca i1, i1 0
+  %nop6011 = alloca i1, i1 0
+  %nop6012 = alloca i1, i1 0
+  %nop6013 = alloca i1, i1 0
+  %nop6014 = alloca i1, i1 0
+  %nop6015 = alloca i1, i1 0
+  %nop6016 = alloca i1, i1 0
+  %nop6017 = alloca i1, i1 0
+  %nop6018 = alloca i1, i1 0
+  %nop6019 = alloca i1, i1 0
+  %nop6020 = alloca i1, i1 0
+  %nop6021 = alloca i1, i1 0
+  %nop6022 = alloca i1, i1 0
+  %nop6023 = alloca i1, i1 0
+  %nop6024 = alloca i1, i1 0
+  %nop6025 = alloca i1, i1 0
+  %nop6026 = alloca i1, i1 0
+  %nop6027 = alloca i1, i1 0
+  %nop6028 = alloca i1, i1 0
+  %nop6029 = alloca i1, i1 0
+  %nop6030 = alloca i1, i1 0
+  %nop6031 = alloca i1, i1 0
+  %nop6032 = alloca i1, i1 0
+  %nop6033 = alloca i1, i1 0
+  %nop6034 = alloca i1, i1 0
+  %nop6035 = alloca i1, i1 0
+  %nop6036 = alloca i1, i1 0
+  %nop6037 = alloca i1, i1 0
+  %nop6038 = alloca i1, i1 0
+  %nop6039 = alloca i1, i1 0
+  %nop6040 = alloca i1, i1 0
+  %nop6041 = alloca i1, i1 0
+  %nop6042 = alloca i1, i1 0
+  %nop6043 = alloca i1, i1 0
+  %nop6044 = alloca i1, i1 0
+  %nop6045 = alloca i1, i1 0
+  %nop6046 = alloca i1, i1 0
+  %nop6047 = alloca i1, i1 0
+  %nop6048 = alloca i1, i1 0
+  %nop6049 = alloca i1, i1 0
+  %nop6050 = alloca i1, i1 0
+  %nop6051 = alloca i1, i1 0
+  %nop6052 = alloca i1, i1 0
+  %nop6053 = alloca i1, i1 0
+  %nop6054 = alloca i1, i1 0
+  %nop6055 = alloca i1, i1 0
+  %nop6056 = alloca i1, i1 0
+  %nop6057 = alloca i1, i1 0
+  %nop6058 = alloca i1, i1 0
+  %nop6059 = alloca i1, i1 0
+  %nop6060 = alloca i1, i1 0
+  %nop6061 = alloca i1, i1 0
+  %nop6062 = alloca i1, i1 0
+  %nop6063 = alloca i1, i1 0
+  %nop6064 = alloca i1, i1 0
+  %nop6065 = alloca i1, i1 0
+  %nop6066 = alloca i1, i1 0
+  %nop6067 = alloca i1, i1 0
+  %nop6068 = alloca i1, i1 0
+  %nop6069 = alloca i1, i1 0
+  %nop6070 = alloca i1, i1 0
+  %nop6071 = alloca i1, i1 0
+  %nop6072 = alloca i1, i1 0
+  %nop6073 = alloca i1, i1 0
+  %nop6074 = alloca i1, i1 0
+  %nop6075 = alloca i1, i1 0
+  %nop6076 = alloca i1, i1 0
+  %nop6077 = alloca i1, i1 0
+  %nop6078 = alloca i1, i1 0
+  %nop6079 = alloca i1, i1 0
+  %nop6080 = alloca i1, i1 0
+  %nop6081 = alloca i1, i1 0
+  %nop6082 = alloca i1, i1 0
+  %nop6083 = alloca i1, i1 0
+  %nop6084 = alloca i1, i1 0
+  %nop6085 = alloca i1, i1 0
+  %nop6086 = alloca i1, i1 0
+  %nop6087 = alloca i1, i1 0
+  %nop6088 = alloca i1, i1 0
+  %nop6089 = alloca i1, i1 0
+  %nop6090 = alloca i1, i1 0
+  %nop6091 = alloca i1, i1 0
+  %nop6092 = alloca i1, i1 0
+  %nop6093 = alloca i1, i1 0
+  %nop6094 = alloca i1, i1 0
+  %nop6095 = alloca i1, i1 0
+  %nop6096 = alloca i1, i1 0
+  %nop6097 = alloca i1, i1 0
+  %nop6098 = alloca i1, i1 0
+  %nop6099 = alloca i1, i1 0
+  %nop6100 = alloca i1, i1 0
+  %nop6101 = alloca i1, i1 0
+  %nop6102 = alloca i1, i1 0
+  %nop6103 = alloca i1, i1 0
+  %nop6104 = alloca i1, i1 0
+  %nop6105 = alloca i1, i1 0
+  %nop6106 = alloca i1, i1 0
+  %nop6107 = alloca i1, i1 0
+  %nop6108 = alloca i1, i1 0
+  %nop6109 = alloca i1, i1 0
+  %nop6110 = alloca i1, i1 0
+  %nop6111 = alloca i1, i1 0
+  %nop6112 = alloca i1, i1 0
+  %nop6113 = alloca i1, i1 0
+  %nop6114 = alloca i1, i1 0
+  %nop6115 = alloca i1, i1 0
+  %nop6116 = alloca i1, i1 0
+  %nop6117 = alloca i1, i1 0
+  %nop6118 = alloca i1, i1 0
+  %nop6119 = alloca i1, i1 0
+  %nop6120 = alloca i1, i1 0
+  %nop6121 = alloca i1, i1 0
+  %nop6122 = alloca i1, i1 0
+  %nop6123 = alloca i1, i1 0
+  %nop6124 = alloca i1, i1 0
+  %nop6125 = alloca i1, i1 0
+  %nop6126 = alloca i1, i1 0
+  %nop6127 = alloca i1, i1 0
+  %nop6128 = alloca i1, i1 0
+  %nop6129 = alloca i1, i1 0
+  %nop6130 = alloca i1, i1 0
+  %nop6131 = alloca i1, i1 0
+  %nop6132 = alloca i1, i1 0
+  %nop6133 = alloca i1, i1 0
+  %nop6134 = alloca i1, i1 0
+  %nop6135 = alloca i1, i1 0
+  %nop6136 = alloca i1, i1 0
+  %nop6137 = alloca i1, i1 0
+  %nop6138 = alloca i1, i1 0
+  %nop6139 = alloca i1, i1 0
+  %nop6140 = alloca i1, i1 0
+  %nop6141 = alloca i1, i1 0
+  %nop6142 = alloca i1, i1 0
+  %nop6143 = alloca i1, i1 0
+  %nop6144 = alloca i1, i1 0
+  %nop6145 = alloca i1, i1 0
+  %nop6146 = alloca i1, i1 0
+  %nop6147 = alloca i1, i1 0
+  %nop6148 = alloca i1, i1 0
+  %nop6149 = alloca i1, i1 0
+  %nop6150 = alloca i1, i1 0
+  %nop6151 = alloca i1, i1 0
+  %nop6152 = alloca i1, i1 0
+  %nop6153 = alloca i1, i1 0
+  %nop6154 = alloca i1, i1 0
+  %nop6155 = alloca i1, i1 0
+  %nop6156 = alloca i1, i1 0
+  %nop6157 = alloca i1, i1 0
+  %nop6158 = alloca i1, i1 0
+  %nop6159 = alloca i1, i1 0
+  %nop6160 = alloca i1, i1 0
+  %nop6161 = alloca i1, i1 0
+  %nop6162 = alloca i1, i1 0
+  %nop6163 = alloca i1, i1 0
+  %nop6164 = alloca i1, i1 0
+  %nop6165 = alloca i1, i1 0
+  %nop6166 = alloca i1, i1 0
+  %nop6167 = alloca i1, i1 0
+  %nop6168 = alloca i1, i1 0
+  %nop6169 = alloca i1, i1 0
+  %nop6170 = alloca i1, i1 0
+  %nop6171 = alloca i1, i1 0
+  %nop6172 = alloca i1, i1 0
+  %nop6173 = alloca i1, i1 0
+  %nop6174 = alloca i1, i1 0
+  %nop6175 = alloca i1, i1 0
+  %nop6176 = alloca i1, i1 0
+  %nop6177 = alloca i1, i1 0
+  %nop6178 = alloca i1, i1 0
+  %nop6179 = alloca i1, i1 0
+  %nop6180 = alloca i1, i1 0
+  %nop6181 = alloca i1, i1 0
+  %nop6182 = alloca i1, i1 0
+  %nop6183 = alloca i1, i1 0
+  %nop6184 = alloca i1, i1 0
+  %nop6185 = alloca i1, i1 0
+  %nop6186 = alloca i1, i1 0
+  %nop6187 = alloca i1, i1 0
+  %nop6188 = alloca i1, i1 0
+  %nop6189 = alloca i1, i1 0
+  %nop6190 = alloca i1, i1 0
+  %nop6191 = alloca i1, i1 0
+  %nop6192 = alloca i1, i1 0
+  %nop6193 = alloca i1, i1 0
+  %nop6194 = alloca i1, i1 0
+  %nop6195 = alloca i1, i1 0
+  %nop6196 = alloca i1, i1 0
+  %nop6197 = alloca i1, i1 0
+  %nop6198 = alloca i1, i1 0
+  %nop6199 = alloca i1, i1 0
+  %nop6200 = alloca i1, i1 0
+  %nop6201 = alloca i1, i1 0
+  %nop6202 = alloca i1, i1 0
+  %nop6203 = alloca i1, i1 0
+  %nop6204 = alloca i1, i1 0
+  %nop6205 = alloca i1, i1 0
+  %nop6206 = alloca i1, i1 0
+  %nop6207 = alloca i1, i1 0
+  %nop6208 = alloca i1, i1 0
+  %nop6209 = alloca i1, i1 0
+  %nop6210 = alloca i1, i1 0
+  %nop6211 = alloca i1, i1 0
+  %nop6212 = alloca i1, i1 0
+  %nop6213 = alloca i1, i1 0
+  %nop6214 = alloca i1, i1 0
+  %nop6215 = alloca i1, i1 0
+  %nop6216 = alloca i1, i1 0
+  %nop6217 = alloca i1, i1 0
+  %nop6218 = alloca i1, i1 0
+  %nop6219 = alloca i1, i1 0
+  %nop6220 = alloca i1, i1 0
+  %nop6221 = alloca i1, i1 0
+  %nop6222 = alloca i1, i1 0
+  %nop6223 = alloca i1, i1 0
+  %nop6224 = alloca i1, i1 0
+  %nop6225 = alloca i1, i1 0
+  %nop6226 = alloca i1, i1 0
+  %nop6227 = alloca i1, i1 0
+  %nop6228 = alloca i1, i1 0
+  %nop6229 = alloca i1, i1 0
+  %nop6230 = alloca i1, i1 0
+  %nop6231 = alloca i1, i1 0
+  %nop6232 = alloca i1, i1 0
+  %nop6233 = alloca i1, i1 0
+  %nop6234 = alloca i1, i1 0
+  %nop6235 = alloca i1, i1 0
+  %nop6236 = alloca i1, i1 0
+  %nop6237 = alloca i1, i1 0
+  %nop6238 = alloca i1, i1 0
+  %nop6239 = alloca i1, i1 0
+  %nop6240 = alloca i1, i1 0
+  %nop6241 = alloca i1, i1 0
+  %nop6242 = alloca i1, i1 0
+  %nop6243 = alloca i1, i1 0
+  %nop6244 = alloca i1, i1 0
+  %nop6245 = alloca i1, i1 0
+  %nop6246 = alloca i1, i1 0
+  %nop6247 = alloca i1, i1 0
+  %nop6248 = alloca i1, i1 0
+  %nop6249 = alloca i1, i1 0
+  %nop6250 = alloca i1, i1 0
+  %nop6251 = alloca i1, i1 0
+  %nop6252 = alloca i1, i1 0
+  %nop6253 = alloca i1, i1 0
+  %nop6254 = alloca i1, i1 0
+  %nop6255 = alloca i1, i1 0
+  %nop6256 = alloca i1, i1 0
+  %nop6257 = alloca i1, i1 0
+  %nop6258 = alloca i1, i1 0
+  %nop6259 = alloca i1, i1 0
+  %nop6260 = alloca i1, i1 0
+  %nop6261 = alloca i1, i1 0
+  %nop6262 = alloca i1, i1 0
+  %nop6263 = alloca i1, i1 0
+  %nop6264 = alloca i1, i1 0
+  %nop6265 = alloca i1, i1 0
+  %nop6266 = alloca i1, i1 0
+  %nop6267 = alloca i1, i1 0
+  %nop6268 = alloca i1, i1 0
+  %nop6269 = alloca i1, i1 0
+  %nop6270 = alloca i1, i1 0
+  %nop6271 = alloca i1, i1 0
+  %nop6272 = alloca i1, i1 0
+  %nop6273 = alloca i1, i1 0
+  %nop6274 = alloca i1, i1 0
+  %nop6275 = alloca i1, i1 0
+  %nop6276 = alloca i1, i1 0
+  %nop6277 = alloca i1, i1 0
+  %nop6278 = alloca i1, i1 0
+  %nop6279 = alloca i1, i1 0
+  %nop6280 = alloca i1, i1 0
+  %nop6281 = alloca i1, i1 0
+  %nop6282 = alloca i1, i1 0
+  %nop6283 = alloca i1, i1 0
+  %nop6284 = alloca i1, i1 0
+  %nop6285 = alloca i1, i1 0
+  %nop6286 = alloca i1, i1 0
+  %nop6287 = alloca i1, i1 0
+  %nop6288 = alloca i1, i1 0
+  %nop6289 = alloca i1, i1 0
+  %nop6290 = alloca i1, i1 0
+  %nop6291 = alloca i1, i1 0
+  %nop6292 = alloca i1, i1 0
+  %nop6293 = alloca i1, i1 0
+  %nop6294 = alloca i1, i1 0
+  %nop6295 = alloca i1, i1 0
+  %nop6296 = alloca i1, i1 0
+  %nop6297 = alloca i1, i1 0
+  %nop6298 = alloca i1, i1 0
+  %nop6299 = alloca i1, i1 0
+  %nop6300 = alloca i1, i1 0
+  %nop6301 = alloca i1, i1 0
+  %nop6302 = alloca i1, i1 0
+  %nop6303 = alloca i1, i1 0
+  %nop6304 = alloca i1, i1 0
+  %nop6305 = alloca i1, i1 0
+  %nop6306 = alloca i1, i1 0
+  %nop6307 = alloca i1, i1 0
+  %nop6308 = alloca i1, i1 0
+  %nop6309 = alloca i1, i1 0
+  %nop6310 = alloca i1, i1 0
+  %nop6311 = alloca i1, i1 0
+  %nop6312 = alloca i1, i1 0
+  %nop6313 = alloca i1, i1 0
+  %nop6314 = alloca i1, i1 0
+  %nop6315 = alloca i1, i1 0
+  %nop6316 = alloca i1, i1 0
+  %nop6317 = alloca i1, i1 0
+  %nop6318 = alloca i1, i1 0
+  %nop6319 = alloca i1, i1 0
+  %nop6320 = alloca i1, i1 0
+  %nop6321 = alloca i1, i1 0
+  %nop6322 = alloca i1, i1 0
+  %nop6323 = alloca i1, i1 0
+  %nop6324 = alloca i1, i1 0
+  %nop6325 = alloca i1, i1 0
+  %nop6326 = alloca i1, i1 0
+  %nop6327 = alloca i1, i1 0
+  %nop6328 = alloca i1, i1 0
+  %nop6329 = alloca i1, i1 0
+  %nop6330 = alloca i1, i1 0
+  %nop6331 = alloca i1, i1 0
+  %nop6332 = alloca i1, i1 0
+  %nop6333 = alloca i1, i1 0
+  %nop6334 = alloca i1, i1 0
+  %nop6335 = alloca i1, i1 0
+  %nop6336 = alloca i1, i1 0
+  %nop6337 = alloca i1, i1 0
+  %nop6338 = alloca i1, i1 0
+  %nop6339 = alloca i1, i1 0
+  %nop6340 = alloca i1, i1 0
+  %nop6341 = alloca i1, i1 0
+  %nop6342 = alloca i1, i1 0
+  %nop6343 = alloca i1, i1 0
+  %nop6344 = alloca i1, i1 0
+  %nop6345 = alloca i1, i1 0
+  %nop6346 = alloca i1, i1 0
+  %nop6347 = alloca i1, i1 0
+  %nop6348 = alloca i1, i1 0
+  %nop6349 = alloca i1, i1 0
+  %nop6350 = alloca i1, i1 0
+  %nop6351 = alloca i1, i1 0
+  %nop6352 = alloca i1, i1 0
+  %nop6353 = alloca i1, i1 0
+  %nop6354 = alloca i1, i1 0
+  %nop6355 = alloca i1, i1 0
+  %nop6356 = alloca i1, i1 0
+  %nop6357 = alloca i1, i1 0
+  %nop6358 = alloca i1, i1 0
+  %nop6359 = alloca i1, i1 0
+  %nop6360 = alloca i1, i1 0
+  %nop6361 = alloca i1, i1 0
+  %nop6362 = alloca i1, i1 0
+  %nop6363 = alloca i1, i1 0
+  %nop6364 = alloca i1, i1 0
+  %nop6365 = alloca i1, i1 0
+  %nop6366 = alloca i1, i1 0
+  %nop6367 = alloca i1, i1 0
+  %nop6368 = alloca i1, i1 0
+  %nop6369 = alloca i1, i1 0
+  %nop6370 = alloca i1, i1 0
+  %nop6371 = alloca i1, i1 0
+  %nop6372 = alloca i1, i1 0
+  %nop6373 = alloca i1, i1 0
+  %nop6374 = alloca i1, i1 0
+  %nop6375 = alloca i1, i1 0
+  %nop6376 = alloca i1, i1 0
+  %nop6377 = alloca i1, i1 0
+  %nop6378 = alloca i1, i1 0
+  %nop6379 = alloca i1, i1 0
+  %nop6380 = alloca i1, i1 0
+  %nop6381 = alloca i1, i1 0
+  %nop6382 = alloca i1, i1 0
+  %nop6383 = alloca i1, i1 0
+  %nop6384 = alloca i1, i1 0
+  %nop6385 = alloca i1, i1 0
+  %nop6386 = alloca i1, i1 0
+  %nop6387 = alloca i1, i1 0
+  %nop6388 = alloca i1, i1 0
+  %nop6389 = alloca i1, i1 0
+  %nop6390 = alloca i1, i1 0
+  %nop6391 = alloca i1, i1 0
+  %nop6392 = alloca i1, i1 0
+  %nop6393 = alloca i1, i1 0
+  %nop6394 = alloca i1, i1 0
+  %nop6395 = alloca i1, i1 0
+  %nop6396 = alloca i1, i1 0
+  %nop6397 = alloca i1, i1 0
+  %nop6398 = alloca i1, i1 0
+  %nop6399 = alloca i1, i1 0
+  %nop6400 = alloca i1, i1 0
+  %nop6401 = alloca i1, i1 0
+  %nop6402 = alloca i1, i1 0
+  %nop6403 = alloca i1, i1 0
+  %nop6404 = alloca i1, i1 0
+  %nop6405 = alloca i1, i1 0
+  %nop6406 = alloca i1, i1 0
+  %nop6407 = alloca i1, i1 0
+  %nop6408 = alloca i1, i1 0
+  %nop6409 = alloca i1, i1 0
+  %nop6410 = alloca i1, i1 0
+  %nop6411 = alloca i1, i1 0
+  %nop6412 = alloca i1, i1 0
+  %nop6413 = alloca i1, i1 0
+  %nop6414 = alloca i1, i1 0
+  %nop6415 = alloca i1, i1 0
+  %nop6416 = alloca i1, i1 0
+  %nop6417 = alloca i1, i1 0
+  %nop6418 = alloca i1, i1 0
+  %nop6419 = alloca i1, i1 0
+  %nop6420 = alloca i1, i1 0
+  %nop6421 = alloca i1, i1 0
+  %nop6422 = alloca i1, i1 0
+  %nop6423 = alloca i1, i1 0
+  %nop6424 = alloca i1, i1 0
+  %nop6425 = alloca i1, i1 0
+  %nop6426 = alloca i1, i1 0
+  %nop6427 = alloca i1, i1 0
+  %nop6428 = alloca i1, i1 0
+  %nop6429 = alloca i1, i1 0
+  %nop6430 = alloca i1, i1 0
+  %nop6431 = alloca i1, i1 0
+  %nop6432 = alloca i1, i1 0
+  %nop6433 = alloca i1, i1 0
+  %nop6434 = alloca i1, i1 0
+  %nop6435 = alloca i1, i1 0
+  %nop6436 = alloca i1, i1 0
+  %nop6437 = alloca i1, i1 0
+  %nop6438 = alloca i1, i1 0
+  %nop6439 = alloca i1, i1 0
+  %nop6440 = alloca i1, i1 0
+  %nop6441 = alloca i1, i1 0
+  %nop6442 = alloca i1, i1 0
+  %nop6443 = alloca i1, i1 0
+  %nop6444 = alloca i1, i1 0
+  %nop6445 = alloca i1, i1 0
+  %nop6446 = alloca i1, i1 0
+  %nop6447 = alloca i1, i1 0
+  %nop6448 = alloca i1, i1 0
+  %nop6449 = alloca i1, i1 0
+  %nop6450 = alloca i1, i1 0
+  %nop6451 = alloca i1, i1 0
+  %nop6452 = alloca i1, i1 0
+  %nop6453 = alloca i1, i1 0
+  %nop6454 = alloca i1, i1 0
+  %nop6455 = alloca i1, i1 0
+  %nop6456 = alloca i1, i1 0
+  %nop6457 = alloca i1, i1 0
+  %nop6458 = alloca i1, i1 0
+  %nop6459 = alloca i1, i1 0
+  %nop6460 = alloca i1, i1 0
+  %nop6461 = alloca i1, i1 0
+  %nop6462 = alloca i1, i1 0
+  %nop6463 = alloca i1, i1 0
+  %nop6464 = alloca i1, i1 0
+  %nop6465 = alloca i1, i1 0
+  %nop6466 = alloca i1, i1 0
+  %nop6467 = alloca i1, i1 0
+  %nop6468 = alloca i1, i1 0
+  %nop6469 = alloca i1, i1 0
+  %nop6470 = alloca i1, i1 0
+  %nop6471 = alloca i1, i1 0
+  %nop6472 = alloca i1, i1 0
+  %nop6473 = alloca i1, i1 0
+  %nop6474 = alloca i1, i1 0
+  %nop6475 = alloca i1, i1 0
+  %nop6476 = alloca i1, i1 0
+  %nop6477 = alloca i1, i1 0
+  %nop6478 = alloca i1, i1 0
+  %nop6479 = alloca i1, i1 0
+  %nop6480 = alloca i1, i1 0
+  %nop6481 = alloca i1, i1 0
+  %nop6482 = alloca i1, i1 0
+  %nop6483 = alloca i1, i1 0
+  %nop6484 = alloca i1, i1 0
+  %nop6485 = alloca i1, i1 0
+  %nop6486 = alloca i1, i1 0
+  %nop6487 = alloca i1, i1 0
+  %nop6488 = alloca i1, i1 0
+  %nop6489 = alloca i1, i1 0
+  %nop6490 = alloca i1, i1 0
+  %nop6491 = alloca i1, i1 0
+  %nop6492 = alloca i1, i1 0
+  %nop6493 = alloca i1, i1 0
+  %nop6494 = alloca i1, i1 0
+  %nop6495 = alloca i1, i1 0
+  %nop6496 = alloca i1, i1 0
+  %nop6497 = alloca i1, i1 0
+  %nop6498 = alloca i1, i1 0
+  %nop6499 = alloca i1, i1 0
+  %nop6500 = alloca i1, i1 0
+  %nop6501 = alloca i1, i1 0
+  %nop6502 = alloca i1, i1 0
+  %nop6503 = alloca i1, i1 0
+  %nop6504 = alloca i1, i1 0
+  %nop6505 = alloca i1, i1 0
+  %nop6506 = alloca i1, i1 0
+  %nop6507 = alloca i1, i1 0
+  %nop6508 = alloca i1, i1 0
+  %nop6509 = alloca i1, i1 0
+  %nop6510 = alloca i1, i1 0
+  %nop6511 = alloca i1, i1 0
+  %nop6512 = alloca i1, i1 0
+  %nop6513 = alloca i1, i1 0
+  %nop6514 = alloca i1, i1 0
+  %nop6515 = alloca i1, i1 0
+  %nop6516 = alloca i1, i1 0
+  %nop6517 = alloca i1, i1 0
+  %nop6518 = alloca i1, i1 0
+  %nop6519 = alloca i1, i1 0
+  %nop6520 = alloca i1, i1 0
+  %nop6521 = alloca i1, i1 0
+  %nop6522 = alloca i1, i1 0
+  %nop6523 = alloca i1, i1 0
+  %nop6524 = alloca i1, i1 0
+  %nop6525 = alloca i1, i1 0
+  %nop6526 = alloca i1, i1 0
+  %nop6527 = alloca i1, i1 0
+  %nop6528 = alloca i1, i1 0
+  %nop6529 = alloca i1, i1 0
+  %nop6530 = alloca i1, i1 0
+  %nop6531 = alloca i1, i1 0
+  %nop6532 = alloca i1, i1 0
+  %nop6533 = alloca i1, i1 0
+  %nop6534 = alloca i1, i1 0
+  %nop6535 = alloca i1, i1 0
+  %nop6536 = alloca i1, i1 0
+  %nop6537 = alloca i1, i1 0
+  %nop6538 = alloca i1, i1 0
+  %nop6539 = alloca i1, i1 0
+  %nop6540 = alloca i1, i1 0
+  %nop6541 = alloca i1, i1 0
+  %nop6542 = alloca i1, i1 0
+  %nop6543 = alloca i1, i1 0
+  %nop6544 = alloca i1, i1 0
+  %nop6545 = alloca i1, i1 0
+  %nop6546 = alloca i1, i1 0
+  %nop6547 = alloca i1, i1 0
+  %nop6548 = alloca i1, i1 0
+  %nop6549 = alloca i1, i1 0
+  %nop6550 = alloca i1, i1 0
+  %nop6551 = alloca i1, i1 0
+  %nop6552 = alloca i1, i1 0
+  %nop6553 = alloca i1, i1 0
+  %nop6554 = alloca i1, i1 0
+  %nop6555 = alloca i1, i1 0
+  %nop6556 = alloca i1, i1 0
+  %nop6557 = alloca i1, i1 0
+  %nop6558 = alloca i1, i1 0
+  %nop6559 = alloca i1, i1 0
+  %nop6560 = alloca i1, i1 0
+  %nop6561 = alloca i1, i1 0
+  %nop6562 = alloca i1, i1 0
+  %nop6563 = alloca i1, i1 0
+  %nop6564 = alloca i1, i1 0
+  %nop6565 = alloca i1, i1 0
+  %nop6566 = alloca i1, i1 0
+  %nop6567 = alloca i1, i1 0
+  %nop6568 = alloca i1, i1 0
+  %nop6569 = alloca i1, i1 0
+  %nop6570 = alloca i1, i1 0
+  %nop6571 = alloca i1, i1 0
+  %nop6572 = alloca i1, i1 0
+  %nop6573 = alloca i1, i1 0
+  %nop6574 = alloca i1, i1 0
+  %nop6575 = alloca i1, i1 0
+  %nop6576 = alloca i1, i1 0
+  %nop6577 = alloca i1, i1 0
+  %nop6578 = alloca i1, i1 0
+  %nop6579 = alloca i1, i1 0
+  %nop6580 = alloca i1, i1 0
+  %nop6581 = alloca i1, i1 0
+  %nop6582 = alloca i1, i1 0
+  %nop6583 = alloca i1, i1 0
+  %nop6584 = alloca i1, i1 0
+  %nop6585 = alloca i1, i1 0
+  %nop6586 = alloca i1, i1 0
+  %nop6587 = alloca i1, i1 0
+  %nop6588 = alloca i1, i1 0
+  %nop6589 = alloca i1, i1 0
+  %nop6590 = alloca i1, i1 0
+  %nop6591 = alloca i1, i1 0
+  %nop6592 = alloca i1, i1 0
+  %nop6593 = alloca i1, i1 0
+  %nop6594 = alloca i1, i1 0
+  %nop6595 = alloca i1, i1 0
+  %nop6596 = alloca i1, i1 0
+  %nop6597 = alloca i1, i1 0
+  %nop6598 = alloca i1, i1 0
+  %nop6599 = alloca i1, i1 0
+  %nop6600 = alloca i1, i1 0
+  %nop6601 = alloca i1, i1 0
+  %nop6602 = alloca i1, i1 0
+  %nop6603 = alloca i1, i1 0
+  %nop6604 = alloca i1, i1 0
+  %nop6605 = alloca i1, i1 0
+  %nop6606 = alloca i1, i1 0
+  %nop6607 = alloca i1, i1 0
+  %nop6608 = alloca i1, i1 0
+  %nop6609 = alloca i1, i1 0
+  %nop6610 = alloca i1, i1 0
+  %nop6611 = alloca i1, i1 0
+  %nop6612 = alloca i1, i1 0
+  %nop6613 = alloca i1, i1 0
+  %nop6614 = alloca i1, i1 0
+  %nop6615 = alloca i1, i1 0
+  %nop6616 = alloca i1, i1 0
+  %nop6617 = alloca i1, i1 0
+  %nop6618 = alloca i1, i1 0
+  %nop6619 = alloca i1, i1 0
+  %nop6620 = alloca i1, i1 0
+  %nop6621 = alloca i1, i1 0
+  %nop6622 = alloca i1, i1 0
+  %nop6623 = alloca i1, i1 0
+  %nop6624 = alloca i1, i1 0
+  %nop6625 = alloca i1, i1 0
+  %nop6626 = alloca i1, i1 0
+  %nop6627 = alloca i1, i1 0
+  %nop6628 = alloca i1, i1 0
+  %nop6629 = alloca i1, i1 0
+  %nop6630 = alloca i1, i1 0
+  %nop6631 = alloca i1, i1 0
+  %nop6632 = alloca i1, i1 0
+  %nop6633 = alloca i1, i1 0
+  %nop6634 = alloca i1, i1 0
+  %nop6635 = alloca i1, i1 0
+  %nop6636 = alloca i1, i1 0
+  %nop6637 = alloca i1, i1 0
+  %nop6638 = alloca i1, i1 0
+  %nop6639 = alloca i1, i1 0
+  %nop6640 = alloca i1, i1 0
+  %nop6641 = alloca i1, i1 0
+  %nop6642 = alloca i1, i1 0
+  %nop6643 = alloca i1, i1 0
+  %nop6644 = alloca i1, i1 0
+  %nop6645 = alloca i1, i1 0
+  %nop6646 = alloca i1, i1 0
+  %nop6647 = alloca i1, i1 0
+  %nop6648 = alloca i1, i1 0
+  %nop6649 = alloca i1, i1 0
+  %nop6650 = alloca i1, i1 0
+  %nop6651 = alloca i1, i1 0
+  %nop6652 = alloca i1, i1 0
+  %nop6653 = alloca i1, i1 0
+  %nop6654 = alloca i1, i1 0
+  %nop6655 = alloca i1, i1 0
+  %nop6656 = alloca i1, i1 0
+  %nop6657 = alloca i1, i1 0
+  %nop6658 = alloca i1, i1 0
+  %nop6659 = alloca i1, i1 0
+  %nop6660 = alloca i1, i1 0
+  %nop6661 = alloca i1, i1 0
+  %nop6662 = alloca i1, i1 0
+  %nop6663 = alloca i1, i1 0
+  %nop6664 = alloca i1, i1 0
+  %nop6665 = alloca i1, i1 0
+  %nop6666 = alloca i1, i1 0
+  %nop6667 = alloca i1, i1 0
+  %nop6668 = alloca i1, i1 0
+  %nop6669 = alloca i1, i1 0
+  %nop6670 = alloca i1, i1 0
+  %nop6671 = alloca i1, i1 0
+  %nop6672 = alloca i1, i1 0
+  %nop6673 = alloca i1, i1 0
+  %nop6674 = alloca i1, i1 0
+  %nop6675 = alloca i1, i1 0
+  %nop6676 = alloca i1, i1 0
+  %nop6677 = alloca i1, i1 0
+  %nop6678 = alloca i1, i1 0
+  %nop6679 = alloca i1, i1 0
+  %nop6680 = alloca i1, i1 0
+  %nop6681 = alloca i1, i1 0
+  %nop6682 = alloca i1, i1 0
+  %nop6683 = alloca i1, i1 0
+  %nop6684 = alloca i1, i1 0
+  %nop6685 = alloca i1, i1 0
+  %nop6686 = alloca i1, i1 0
+  %nop6687 = alloca i1, i1 0
+  %nop6688 = alloca i1, i1 0
+  %nop6689 = alloca i1, i1 0
+  %nop6690 = alloca i1, i1 0
+  %nop6691 = alloca i1, i1 0
+  %nop6692 = alloca i1, i1 0
+  %nop6693 = alloca i1, i1 0
+  %nop6694 = alloca i1, i1 0
+  %nop6695 = alloca i1, i1 0
+  %nop6696 = alloca i1, i1 0
+  %nop6697 = alloca i1, i1 0
+  %nop6698 = alloca i1, i1 0
+  %nop6699 = alloca i1, i1 0
+  %nop6700 = alloca i1, i1 0
+  %nop6701 = alloca i1, i1 0
+  %nop6702 = alloca i1, i1 0
+  %nop6703 = alloca i1, i1 0
+  %nop6704 = alloca i1, i1 0
+  %nop6705 = alloca i1, i1 0
+  %nop6706 = alloca i1, i1 0
+  %nop6707 = alloca i1, i1 0
+  %nop6708 = alloca i1, i1 0
+  %nop6709 = alloca i1, i1 0
+  %nop6710 = alloca i1, i1 0
+  %nop6711 = alloca i1, i1 0
+  %nop6712 = alloca i1, i1 0
+  %nop6713 = alloca i1, i1 0
+  %nop6714 = alloca i1, i1 0
+  %nop6715 = alloca i1, i1 0
+  %nop6716 = alloca i1, i1 0
+  %nop6717 = alloca i1, i1 0
+  %nop6718 = alloca i1, i1 0
+  %nop6719 = alloca i1, i1 0
+  %nop6720 = alloca i1, i1 0
+  %nop6721 = alloca i1, i1 0
+  %nop6722 = alloca i1, i1 0
+  %nop6723 = alloca i1, i1 0
+  %nop6724 = alloca i1, i1 0
+  %nop6725 = alloca i1, i1 0
+  %nop6726 = alloca i1, i1 0
+  %nop6727 = alloca i1, i1 0
+  %nop6728 = alloca i1, i1 0
+  %nop6729 = alloca i1, i1 0
+  %nop6730 = alloca i1, i1 0
+  %nop6731 = alloca i1, i1 0
+  %nop6732 = alloca i1, i1 0
+  %nop6733 = alloca i1, i1 0
+  %nop6734 = alloca i1, i1 0
+  %nop6735 = alloca i1, i1 0
+  %nop6736 = alloca i1, i1 0
+  %nop6737 = alloca i1, i1 0
+  %nop6738 = alloca i1, i1 0
+  %nop6739 = alloca i1, i1 0
+  %nop6740 = alloca i1, i1 0
+  %nop6741 = alloca i1, i1 0
+  %nop6742 = alloca i1, i1 0
+  %nop6743 = alloca i1, i1 0
+  %nop6744 = alloca i1, i1 0
+  %nop6745 = alloca i1, i1 0
+  %nop6746 = alloca i1, i1 0
+  %nop6747 = alloca i1, i1 0
+  %nop6748 = alloca i1, i1 0
+  %nop6749 = alloca i1, i1 0
+  %nop6750 = alloca i1, i1 0
+  %nop6751 = alloca i1, i1 0
+  %nop6752 = alloca i1, i1 0
+  %nop6753 = alloca i1, i1 0
+  %nop6754 = alloca i1, i1 0
+  %nop6755 = alloca i1, i1 0
+  %nop6756 = alloca i1, i1 0
+  %nop6757 = alloca i1, i1 0
+  %nop6758 = alloca i1, i1 0
+  %nop6759 = alloca i1, i1 0
+  %nop6760 = alloca i1, i1 0
+  %nop6761 = alloca i1, i1 0
+  %nop6762 = alloca i1, i1 0
+  %nop6763 = alloca i1, i1 0
+  %nop6764 = alloca i1, i1 0
+  %nop6765 = alloca i1, i1 0
+  %nop6766 = alloca i1, i1 0
+  %nop6767 = alloca i1, i1 0
+  %nop6768 = alloca i1, i1 0
+  %nop6769 = alloca i1, i1 0
+  %nop6770 = alloca i1, i1 0
+  %nop6771 = alloca i1, i1 0
+  %nop6772 = alloca i1, i1 0
+  %nop6773 = alloca i1, i1 0
+  %nop6774 = alloca i1, i1 0
+  %nop6775 = alloca i1, i1 0
+  %nop6776 = alloca i1, i1 0
+  %nop6777 = alloca i1, i1 0
+  %nop6778 = alloca i1, i1 0
+  %nop6779 = alloca i1, i1 0
+  %nop6780 = alloca i1, i1 0
+  %nop6781 = alloca i1, i1 0
+  %nop6782 = alloca i1, i1 0
+  %nop6783 = alloca i1, i1 0
+  %nop6784 = alloca i1, i1 0
+  %nop6785 = alloca i1, i1 0
+  %nop6786 = alloca i1, i1 0
+  %nop6787 = alloca i1, i1 0
+  %nop6788 = alloca i1, i1 0
+  %nop6789 = alloca i1, i1 0
+  %nop6790 = alloca i1, i1 0
+  %nop6791 = alloca i1, i1 0
+  %nop6792 = alloca i1, i1 0
+  %nop6793 = alloca i1, i1 0
+  %nop6794 = alloca i1, i1 0
+  %nop6795 = alloca i1, i1 0
+  %nop6796 = alloca i1, i1 0
+  %nop6797 = alloca i1, i1 0
+  %nop6798 = alloca i1, i1 0
+  %nop6799 = alloca i1, i1 0
+  %nop6800 = alloca i1, i1 0
+  %nop6801 = alloca i1, i1 0
+  %nop6802 = alloca i1, i1 0
+  %nop6803 = alloca i1, i1 0
+  %nop6804 = alloca i1, i1 0
+  %nop6805 = alloca i1, i1 0
+  %nop6806 = alloca i1, i1 0
+  %nop6807 = alloca i1, i1 0
+  %nop6808 = alloca i1, i1 0
+  %nop6809 = alloca i1, i1 0
+  %nop6810 = alloca i1, i1 0
+  %nop6811 = alloca i1, i1 0
+  %nop6812 = alloca i1, i1 0
+  %nop6813 = alloca i1, i1 0
+  %nop6814 = alloca i1, i1 0
+  %nop6815 = alloca i1, i1 0
+  %nop6816 = alloca i1, i1 0
+  %nop6817 = alloca i1, i1 0
+  %nop6818 = alloca i1, i1 0
+  %nop6819 = alloca i1, i1 0
+  %nop6820 = alloca i1, i1 0
+  %nop6821 = alloca i1, i1 0
+  %nop6822 = alloca i1, i1 0
+  %nop6823 = alloca i1, i1 0
+  %nop6824 = alloca i1, i1 0
+  %nop6825 = alloca i1, i1 0
+  %nop6826 = alloca i1, i1 0
+  %nop6827 = alloca i1, i1 0
+  %nop6828 = alloca i1, i1 0
+  %nop6829 = alloca i1, i1 0
+  %nop6830 = alloca i1, i1 0
+  %nop6831 = alloca i1, i1 0
+  %nop6832 = alloca i1, i1 0
+  %nop6833 = alloca i1, i1 0
+  %nop6834 = alloca i1, i1 0
+  %nop6835 = alloca i1, i1 0
+  %nop6836 = alloca i1, i1 0
+  %nop6837 = alloca i1, i1 0
+  %nop6838 = alloca i1, i1 0
+  %nop6839 = alloca i1, i1 0
+  %nop6840 = alloca i1, i1 0
+  %nop6841 = alloca i1, i1 0
+  %nop6842 = alloca i1, i1 0
+  %nop6843 = alloca i1, i1 0
+  %nop6844 = alloca i1, i1 0
+  %nop6845 = alloca i1, i1 0
+  %nop6846 = alloca i1, i1 0
+  %nop6847 = alloca i1, i1 0
+  %nop6848 = alloca i1, i1 0
+  %nop6849 = alloca i1, i1 0
+  %nop6850 = alloca i1, i1 0
+  %nop6851 = alloca i1, i1 0
+  %nop6852 = alloca i1, i1 0
+  %nop6853 = alloca i1, i1 0
+  %nop6854 = alloca i1, i1 0
+  %nop6855 = alloca i1, i1 0
+  %nop6856 = alloca i1, i1 0
+  %nop6857 = alloca i1, i1 0
+  %nop6858 = alloca i1, i1 0
+  %nop6859 = alloca i1, i1 0
+  %nop6860 = alloca i1, i1 0
+  %nop6861 = alloca i1, i1 0
+  %nop6862 = alloca i1, i1 0
+  %nop6863 = alloca i1, i1 0
+  %nop6864 = alloca i1, i1 0
+  %nop6865 = alloca i1, i1 0
+  %nop6866 = alloca i1, i1 0
+  %nop6867 = alloca i1, i1 0
+  %nop6868 = alloca i1, i1 0
+  %nop6869 = alloca i1, i1 0
+  %nop6870 = alloca i1, i1 0
+  %nop6871 = alloca i1, i1 0
+  %nop6872 = alloca i1, i1 0
+  %nop6873 = alloca i1, i1 0
+  %nop6874 = alloca i1, i1 0
+  %nop6875 = alloca i1, i1 0
+  %nop6876 = alloca i1, i1 0
+  %nop6877 = alloca i1, i1 0
+  %nop6878 = alloca i1, i1 0
+  %nop6879 = alloca i1, i1 0
+  %nop6880 = alloca i1, i1 0
+  %nop6881 = alloca i1, i1 0
+  %nop6882 = alloca i1, i1 0
+  %nop6883 = alloca i1, i1 0
+  %nop6884 = alloca i1, i1 0
+  %nop6885 = alloca i1, i1 0
+  %nop6886 = alloca i1, i1 0
+  %nop6887 = alloca i1, i1 0
+  %nop6888 = alloca i1, i1 0
+  %nop6889 = alloca i1, i1 0
+  %nop6890 = alloca i1, i1 0
+  %nop6891 = alloca i1, i1 0
+  %nop6892 = alloca i1, i1 0
+  %nop6893 = alloca i1, i1 0
+  %nop6894 = alloca i1, i1 0
+  %nop6895 = alloca i1, i1 0
+  %nop6896 = alloca i1, i1 0
+  %nop6897 = alloca i1, i1 0
+  %nop6898 = alloca i1, i1 0
+  %nop6899 = alloca i1, i1 0
+  %nop6900 = alloca i1, i1 0
+  %nop6901 = alloca i1, i1 0
+  %nop6902 = alloca i1, i1 0
+  %nop6903 = alloca i1, i1 0
+  %nop6904 = alloca i1, i1 0
+  %nop6905 = alloca i1, i1 0
+  %nop6906 = alloca i1, i1 0
+  %nop6907 = alloca i1, i1 0
+  %nop6908 = alloca i1, i1 0
+  %nop6909 = alloca i1, i1 0
+  %nop6910 = alloca i1, i1 0
+  %nop6911 = alloca i1, i1 0
+  %nop6912 = alloca i1, i1 0
+  %nop6913 = alloca i1, i1 0
+  %nop6914 = alloca i1, i1 0
+  %nop6915 = alloca i1, i1 0
+  %nop6916 = alloca i1, i1 0
+  %nop6917 = alloca i1, i1 0
+  %nop6918 = alloca i1, i1 0
+  %nop6919 = alloca i1, i1 0
+  %nop6920 = alloca i1, i1 0
+  %nop6921 = alloca i1, i1 0
+  %nop6922 = alloca i1, i1 0
+  %nop6923 = alloca i1, i1 0
+  %nop6924 = alloca i1, i1 0
+  %nop6925 = alloca i1, i1 0
+  %nop6926 = alloca i1, i1 0
+  %nop6927 = alloca i1, i1 0
+  %nop6928 = alloca i1, i1 0
+  %nop6929 = alloca i1, i1 0
+  %nop6930 = alloca i1, i1 0
+  %nop6931 = alloca i1, i1 0
+  %nop6932 = alloca i1, i1 0
+  %nop6933 = alloca i1, i1 0
+  %nop6934 = alloca i1, i1 0
+  %nop6935 = alloca i1, i1 0
+  %nop6936 = alloca i1, i1 0
+  %nop6937 = alloca i1, i1 0
+  %nop6938 = alloca i1, i1 0
+  %nop6939 = alloca i1, i1 0
+  %nop6940 = alloca i1, i1 0
+  %nop6941 = alloca i1, i1 0
+  %nop6942 = alloca i1, i1 0
+  %nop6943 = alloca i1, i1 0
+  %nop6944 = alloca i1, i1 0
+  %nop6945 = alloca i1, i1 0
+  %nop6946 = alloca i1, i1 0
+  %nop6947 = alloca i1, i1 0
+  %nop6948 = alloca i1, i1 0
+  %nop6949 = alloca i1, i1 0
+  %nop6950 = alloca i1, i1 0
+  %nop6951 = alloca i1, i1 0
+  %nop6952 = alloca i1, i1 0
+  %nop6953 = alloca i1, i1 0
+  %nop6954 = alloca i1, i1 0
+  %nop6955 = alloca i1, i1 0
+  %nop6956 = alloca i1, i1 0
+  %nop6957 = alloca i1, i1 0
+  %nop6958 = alloca i1, i1 0
+  %nop6959 = alloca i1, i1 0
+  %nop6960 = alloca i1, i1 0
+  %nop6961 = alloca i1, i1 0
+  %nop6962 = alloca i1, i1 0
+  %nop6963 = alloca i1, i1 0
+  %nop6964 = alloca i1, i1 0
+  %nop6965 = alloca i1, i1 0
+  %nop6966 = alloca i1, i1 0
+  %nop6967 = alloca i1, i1 0
+  %nop6968 = alloca i1, i1 0
+  %nop6969 = alloca i1, i1 0
+  %nop6970 = alloca i1, i1 0
+  %nop6971 = alloca i1, i1 0
+  %nop6972 = alloca i1, i1 0
+  %nop6973 = alloca i1, i1 0
+  %nop6974 = alloca i1, i1 0
+  %nop6975 = alloca i1, i1 0
+  %nop6976 = alloca i1, i1 0
+  %nop6977 = alloca i1, i1 0
+  %nop6978 = alloca i1, i1 0
+  %nop6979 = alloca i1, i1 0
+  %nop6980 = alloca i1, i1 0
+  %nop6981 = alloca i1, i1 0
+  %nop6982 = alloca i1, i1 0
+  %nop6983 = alloca i1, i1 0
+  %nop6984 = alloca i1, i1 0
+  %nop6985 = alloca i1, i1 0
+  %nop6986 = alloca i1, i1 0
+  %nop6987 = alloca i1, i1 0
+  %nop6988 = alloca i1, i1 0
+  %nop6989 = alloca i1, i1 0
+  %nop6990 = alloca i1, i1 0
+  %nop6991 = alloca i1, i1 0
+  %nop6992 = alloca i1, i1 0
+  %nop6993 = alloca i1, i1 0
+  %nop6994 = alloca i1, i1 0
+  %nop6995 = alloca i1, i1 0
+  %nop6996 = alloca i1, i1 0
+  %nop6997 = alloca i1, i1 0
+  %nop6998 = alloca i1, i1 0
+  %nop6999 = alloca i1, i1 0
+  %nop7000 = alloca i1, i1 0
+  %nop7001 = alloca i1, i1 0
+  %nop7002 = alloca i1, i1 0
+  %nop7003 = alloca i1, i1 0
+  %nop7004 = alloca i1, i1 0
+  %nop7005 = alloca i1, i1 0
+  %nop7006 = alloca i1, i1 0
+  %nop7007 = alloca i1, i1 0
+  %nop7008 = alloca i1, i1 0
+  %nop7009 = alloca i1, i1 0
+  %nop7010 = alloca i1, i1 0
+  %nop7011 = alloca i1, i1 0
+  %nop7012 = alloca i1, i1 0
+  %nop7013 = alloca i1, i1 0
+  %nop7014 = alloca i1, i1 0
+  %nop7015 = alloca i1, i1 0
+  %nop7016 = alloca i1, i1 0
+  %nop7017 = alloca i1, i1 0
+  %nop7018 = alloca i1, i1 0
+  %nop7019 = alloca i1, i1 0
+  %nop7020 = alloca i1, i1 0
+  %nop7021 = alloca i1, i1 0
+  %nop7022 = alloca i1, i1 0
+  %nop7023 = alloca i1, i1 0
+  %nop7024 = alloca i1, i1 0
+  %nop7025 = alloca i1, i1 0
+  %nop7026 = alloca i1, i1 0
+  %nop7027 = alloca i1, i1 0
+  %nop7028 = alloca i1, i1 0
+  %nop7029 = alloca i1, i1 0
+  %nop7030 = alloca i1, i1 0
+  %nop7031 = alloca i1, i1 0
+  %nop7032 = alloca i1, i1 0
+  %nop7033 = alloca i1, i1 0
+  %nop7034 = alloca i1, i1 0
+  %nop7035 = alloca i1, i1 0
+  %nop7036 = alloca i1, i1 0
+  %nop7037 = alloca i1, i1 0
+  %nop7038 = alloca i1, i1 0
+  %nop7039 = alloca i1, i1 0
+  %nop7040 = alloca i1, i1 0
+  %nop7041 = alloca i1, i1 0
+  %nop7042 = alloca i1, i1 0
+  %nop7043 = alloca i1, i1 0
+  %nop7044 = alloca i1, i1 0
+  %nop7045 = alloca i1, i1 0
+  %nop7046 = alloca i1, i1 0
+  %nop7047 = alloca i1, i1 0
+  %nop7048 = alloca i1, i1 0
+  %nop7049 = alloca i1, i1 0
+  %nop7050 = alloca i1, i1 0
+  %nop7051 = alloca i1, i1 0
+  %nop7052 = alloca i1, i1 0
+  %nop7053 = alloca i1, i1 0
+  %nop7054 = alloca i1, i1 0
+  %nop7055 = alloca i1, i1 0
+  %nop7056 = alloca i1, i1 0
+  %nop7057 = alloca i1, i1 0
+  %nop7058 = alloca i1, i1 0
+  %nop7059 = alloca i1, i1 0
+  %nop7060 = alloca i1, i1 0
+  %nop7061 = alloca i1, i1 0
+  %nop7062 = alloca i1, i1 0
+  %nop7063 = alloca i1, i1 0
+  %nop7064 = alloca i1, i1 0
+  %nop7065 = alloca i1, i1 0
+  %nop7066 = alloca i1, i1 0
+  %nop7067 = alloca i1, i1 0
+  %nop7068 = alloca i1, i1 0
+  %nop7069 = alloca i1, i1 0
+  %nop7070 = alloca i1, i1 0
+  %nop7071 = alloca i1, i1 0
+  %nop7072 = alloca i1, i1 0
+  %nop7073 = alloca i1, i1 0
+  %nop7074 = alloca i1, i1 0
+  %nop7075 = alloca i1, i1 0
+  %nop7076 = alloca i1, i1 0
+  %nop7077 = alloca i1, i1 0
+  %nop7078 = alloca i1, i1 0
+  %nop7079 = alloca i1, i1 0
+  %nop7080 = alloca i1, i1 0
+  %nop7081 = alloca i1, i1 0
+  %nop7082 = alloca i1, i1 0
+  %nop7083 = alloca i1, i1 0
+  %nop7084 = alloca i1, i1 0
+  %nop7085 = alloca i1, i1 0
+  %nop7086 = alloca i1, i1 0
+  %nop7087 = alloca i1, i1 0
+  %nop7088 = alloca i1, i1 0
+  %nop7089 = alloca i1, i1 0
+  %nop7090 = alloca i1, i1 0
+  %nop7091 = alloca i1, i1 0
+  %nop7092 = alloca i1, i1 0
+  %nop7093 = alloca i1, i1 0
+  %nop7094 = alloca i1, i1 0
+  %nop7095 = alloca i1, i1 0
+  %nop7096 = alloca i1, i1 0
+  %nop7097 = alloca i1, i1 0
+  %nop7098 = alloca i1, i1 0
+  %nop7099 = alloca i1, i1 0
+  %nop7100 = alloca i1, i1 0
+  %nop7101 = alloca i1, i1 0
+  %nop7102 = alloca i1, i1 0
+  %nop7103 = alloca i1, i1 0
+  %nop7104 = alloca i1, i1 0
+  %nop7105 = alloca i1, i1 0
+  %nop7106 = alloca i1, i1 0
+  %nop7107 = alloca i1, i1 0
+  %nop7108 = alloca i1, i1 0
+  %nop7109 = alloca i1, i1 0
+  %nop7110 = alloca i1, i1 0
+  %nop7111 = alloca i1, i1 0
+  %nop7112 = alloca i1, i1 0
+  %nop7113 = alloca i1, i1 0
+  %nop7114 = alloca i1, i1 0
+  %nop7115 = alloca i1, i1 0
+  %nop7116 = alloca i1, i1 0
+  %nop7117 = alloca i1, i1 0
+  %nop7118 = alloca i1, i1 0
+  %nop7119 = alloca i1, i1 0
+  %nop7120 = alloca i1, i1 0
+  %nop7121 = alloca i1, i1 0
+  %nop7122 = alloca i1, i1 0
+  %nop7123 = alloca i1, i1 0
+  %nop7124 = alloca i1, i1 0
+  %nop7125 = alloca i1, i1 0
+  %nop7126 = alloca i1, i1 0
+  %nop7127 = alloca i1, i1 0
+  %nop7128 = alloca i1, i1 0
+  %nop7129 = alloca i1, i1 0
+  %nop7130 = alloca i1, i1 0
+  %nop7131 = alloca i1, i1 0
+  %nop7132 = alloca i1, i1 0
+  %nop7133 = alloca i1, i1 0
+  %nop7134 = alloca i1, i1 0
+  %nop7135 = alloca i1, i1 0
+  %nop7136 = alloca i1, i1 0
+  %nop7137 = alloca i1, i1 0
+  %nop7138 = alloca i1, i1 0
+  %nop7139 = alloca i1, i1 0
+  %nop7140 = alloca i1, i1 0
+  %nop7141 = alloca i1, i1 0
+  %nop7142 = alloca i1, i1 0
+  %nop7143 = alloca i1, i1 0
+  %nop7144 = alloca i1, i1 0
+  %nop7145 = alloca i1, i1 0
+  %nop7146 = alloca i1, i1 0
+  %nop7147 = alloca i1, i1 0
+  %nop7148 = alloca i1, i1 0
+  %nop7149 = alloca i1, i1 0
+  %nop7150 = alloca i1, i1 0
+  %nop7151 = alloca i1, i1 0
+  %nop7152 = alloca i1, i1 0
+  %nop7153 = alloca i1, i1 0
+  %nop7154 = alloca i1, i1 0
+  %nop7155 = alloca i1, i1 0
+  %nop7156 = alloca i1, i1 0
+  %nop7157 = alloca i1, i1 0
+  %nop7158 = alloca i1, i1 0
+  %nop7159 = alloca i1, i1 0
+  %nop7160 = alloca i1, i1 0
+  %nop7161 = alloca i1, i1 0
+  %nop7162 = alloca i1, i1 0
+  %nop7163 = alloca i1, i1 0
+  %nop7164 = alloca i1, i1 0
+  %nop7165 = alloca i1, i1 0
+  %nop7166 = alloca i1, i1 0
+  %nop7167 = alloca i1, i1 0
+  %nop7168 = alloca i1, i1 0
+  %nop7169 = alloca i1, i1 0
+  %nop7170 = alloca i1, i1 0
+  %nop7171 = alloca i1, i1 0
+  %nop7172 = alloca i1, i1 0
+  %nop7173 = alloca i1, i1 0
+  %nop7174 = alloca i1, i1 0
+  %nop7175 = alloca i1, i1 0
+  %nop7176 = alloca i1, i1 0
+  %nop7177 = alloca i1, i1 0
+  %nop7178 = alloca i1, i1 0
+  %nop7179 = alloca i1, i1 0
+  %nop7180 = alloca i1, i1 0
+  %nop7181 = alloca i1, i1 0
+  %nop7182 = alloca i1, i1 0
+  %nop7183 = alloca i1, i1 0
+  %nop7184 = alloca i1, i1 0
+  %nop7185 = alloca i1, i1 0
+  %nop7186 = alloca i1, i1 0
+  %nop7187 = alloca i1, i1 0
+  %nop7188 = alloca i1, i1 0
+  %nop7189 = alloca i1, i1 0
+  %nop7190 = alloca i1, i1 0
+  %nop7191 = alloca i1, i1 0
+  %nop7192 = alloca i1, i1 0
+  %nop7193 = alloca i1, i1 0
+  %nop7194 = alloca i1, i1 0
+  %nop7195 = alloca i1, i1 0
+  %nop7196 = alloca i1, i1 0
+  %nop7197 = alloca i1, i1 0
+  %nop7198 = alloca i1, i1 0
+  %nop7199 = alloca i1, i1 0
+  %nop7200 = alloca i1, i1 0
+  %nop7201 = alloca i1, i1 0
+  %nop7202 = alloca i1, i1 0
+  %nop7203 = alloca i1, i1 0
+  %nop7204 = alloca i1, i1 0
+  %nop7205 = alloca i1, i1 0
+  %nop7206 = alloca i1, i1 0
+  %nop7207 = alloca i1, i1 0
+  %nop7208 = alloca i1, i1 0
+  %nop7209 = alloca i1, i1 0
+  %nop7210 = alloca i1, i1 0
+  %nop7211 = alloca i1, i1 0
+  %nop7212 = alloca i1, i1 0
+  %nop7213 = alloca i1, i1 0
+  %nop7214 = alloca i1, i1 0
+  %nop7215 = alloca i1, i1 0
+  %nop7216 = alloca i1, i1 0
+  %nop7217 = alloca i1, i1 0
+  %nop7218 = alloca i1, i1 0
+  %nop7219 = alloca i1, i1 0
+  %nop7220 = alloca i1, i1 0
+  %nop7221 = alloca i1, i1 0
+  %nop7222 = alloca i1, i1 0
+  %nop7223 = alloca i1, i1 0
+  %nop7224 = alloca i1, i1 0
+  %nop7225 = alloca i1, i1 0
+  %nop7226 = alloca i1, i1 0
+  %nop7227 = alloca i1, i1 0
+  %nop7228 = alloca i1, i1 0
+  %nop7229 = alloca i1, i1 0
+  %nop7230 = alloca i1, i1 0
+  %nop7231 = alloca i1, i1 0
+  %nop7232 = alloca i1, i1 0
+  %nop7233 = alloca i1, i1 0
+  %nop7234 = alloca i1, i1 0
+  %nop7235 = alloca i1, i1 0
+  %nop7236 = alloca i1, i1 0
+  %nop7237 = alloca i1, i1 0
+  %nop7238 = alloca i1, i1 0
+  %nop7239 = alloca i1, i1 0
+  %nop7240 = alloca i1, i1 0
+  %nop7241 = alloca i1, i1 0
+  %nop7242 = alloca i1, i1 0
+  %nop7243 = alloca i1, i1 0
+  %nop7244 = alloca i1, i1 0
+  %nop7245 = alloca i1, i1 0
+  %nop7246 = alloca i1, i1 0
+  %nop7247 = alloca i1, i1 0
+  %nop7248 = alloca i1, i1 0
+  %nop7249 = alloca i1, i1 0
+  %nop7250 = alloca i1, i1 0
+  %nop7251 = alloca i1, i1 0
+  %nop7252 = alloca i1, i1 0
+  %nop7253 = alloca i1, i1 0
+  %nop7254 = alloca i1, i1 0
+  %nop7255 = alloca i1, i1 0
+  %nop7256 = alloca i1, i1 0
+  %nop7257 = alloca i1, i1 0
+  %nop7258 = alloca i1, i1 0
+  %nop7259 = alloca i1, i1 0
+  %nop7260 = alloca i1, i1 0
+  %nop7261 = alloca i1, i1 0
+  %nop7262 = alloca i1, i1 0
+  %nop7263 = alloca i1, i1 0
+  %nop7264 = alloca i1, i1 0
+  %nop7265 = alloca i1, i1 0
+  %nop7266 = alloca i1, i1 0
+  %nop7267 = alloca i1, i1 0
+  %nop7268 = alloca i1, i1 0
+  %nop7269 = alloca i1, i1 0
+  %nop7270 = alloca i1, i1 0
+  %nop7271 = alloca i1, i1 0
+  %nop7272 = alloca i1, i1 0
+  %nop7273 = alloca i1, i1 0
+  %nop7274 = alloca i1, i1 0
+  %nop7275 = alloca i1, i1 0
+  %nop7276 = alloca i1, i1 0
+  %nop7277 = alloca i1, i1 0
+  %nop7278 = alloca i1, i1 0
+  %nop7279 = alloca i1, i1 0
+  %nop7280 = alloca i1, i1 0
+  %nop7281 = alloca i1, i1 0
+  %nop7282 = alloca i1, i1 0
+  %nop7283 = alloca i1, i1 0
+  %nop7284 = alloca i1, i1 0
+  %nop7285 = alloca i1, i1 0
+  %nop7286 = alloca i1, i1 0
+  %nop7287 = alloca i1, i1 0
+  %nop7288 = alloca i1, i1 0
+  %nop7289 = alloca i1, i1 0
+  %nop7290 = alloca i1, i1 0
+  %nop7291 = alloca i1, i1 0
+  %nop7292 = alloca i1, i1 0
+  %nop7293 = alloca i1, i1 0
+  %nop7294 = alloca i1, i1 0
+  %nop7295 = alloca i1, i1 0
+  %nop7296 = alloca i1, i1 0
+  %nop7297 = alloca i1, i1 0
+  %nop7298 = alloca i1, i1 0
+  %nop7299 = alloca i1, i1 0
+  %nop7300 = alloca i1, i1 0
+  %nop7301 = alloca i1, i1 0
+  %nop7302 = alloca i1, i1 0
+  %nop7303 = alloca i1, i1 0
+  %nop7304 = alloca i1, i1 0
+  %nop7305 = alloca i1, i1 0
+  %nop7306 = alloca i1, i1 0
+  %nop7307 = alloca i1, i1 0
+  %nop7308 = alloca i1, i1 0
+  %nop7309 = alloca i1, i1 0
+  %nop7310 = alloca i1, i1 0
+  %nop7311 = alloca i1, i1 0
+  %nop7312 = alloca i1, i1 0
+  %nop7313 = alloca i1, i1 0
+  %nop7314 = alloca i1, i1 0
+  %nop7315 = alloca i1, i1 0
+  %nop7316 = alloca i1, i1 0
+  %nop7317 = alloca i1, i1 0
+  %nop7318 = alloca i1, i1 0
+  %nop7319 = alloca i1, i1 0
+  %nop7320 = alloca i1, i1 0
+  %nop7321 = alloca i1, i1 0
+  %nop7322 = alloca i1, i1 0
+  %nop7323 = alloca i1, i1 0
+  %nop7324 = alloca i1, i1 0
+  %nop7325 = alloca i1, i1 0
+  %nop7326 = alloca i1, i1 0
+  %nop7327 = alloca i1, i1 0
+  %nop7328 = alloca i1, i1 0
+  %nop7329 = alloca i1, i1 0
+  %nop7330 = alloca i1, i1 0
+  %nop7331 = alloca i1, i1 0
+  %nop7332 = alloca i1, i1 0
+  %nop7333 = alloca i1, i1 0
+  %nop7334 = alloca i1, i1 0
+  %nop7335 = alloca i1, i1 0
+  %nop7336 = alloca i1, i1 0
+  %nop7337 = alloca i1, i1 0
+  %nop7338 = alloca i1, i1 0
+  %nop7339 = alloca i1, i1 0
+  %nop7340 = alloca i1, i1 0
+  %nop7341 = alloca i1, i1 0
+  %nop7342 = alloca i1, i1 0
+  %nop7343 = alloca i1, i1 0
+  %nop7344 = alloca i1, i1 0
+  %nop7345 = alloca i1, i1 0
+  %nop7346 = alloca i1, i1 0
+  %nop7347 = alloca i1, i1 0
+  %nop7348 = alloca i1, i1 0
+  %nop7349 = alloca i1, i1 0
+  %nop7350 = alloca i1, i1 0
+  %nop7351 = alloca i1, i1 0
+  %nop7352 = alloca i1, i1 0
+  %nop7353 = alloca i1, i1 0
+  %nop7354 = alloca i1, i1 0
+  %nop7355 = alloca i1, i1 0
+  %nop7356 = alloca i1, i1 0
+  %nop7357 = alloca i1, i1 0
+  %nop7358 = alloca i1, i1 0
+  %nop7359 = alloca i1, i1 0
+  %nop7360 = alloca i1, i1 0
+  %nop7361 = alloca i1, i1 0
+  %nop7362 = alloca i1, i1 0
+  %nop7363 = alloca i1, i1 0
+  %nop7364 = alloca i1, i1 0
+  %nop7365 = alloca i1, i1 0
+  %nop7366 = alloca i1, i1 0
+  %nop7367 = alloca i1, i1 0
+  %nop7368 = alloca i1, i1 0
+  %nop7369 = alloca i1, i1 0
+  %nop7370 = alloca i1, i1 0
+  %nop7371 = alloca i1, i1 0
+  %nop7372 = alloca i1, i1 0
+  %nop7373 = alloca i1, i1 0
+  %nop7374 = alloca i1, i1 0
+  %nop7375 = alloca i1, i1 0
+  %nop7376 = alloca i1, i1 0
+  %nop7377 = alloca i1, i1 0
+  %nop7378 = alloca i1, i1 0
+  %nop7379 = alloca i1, i1 0
+  %nop7380 = alloca i1, i1 0
+  %nop7381 = alloca i1, i1 0
+  %nop7382 = alloca i1, i1 0
+  %nop7383 = alloca i1, i1 0
+  %nop7384 = alloca i1, i1 0
+  %nop7385 = alloca i1, i1 0
+  %nop7386 = alloca i1, i1 0
+  %nop7387 = alloca i1, i1 0
+  %nop7388 = alloca i1, i1 0
+  %nop7389 = alloca i1, i1 0
+  %nop7390 = alloca i1, i1 0
+  %nop7391 = alloca i1, i1 0
+  %nop7392 = alloca i1, i1 0
+  %nop7393 = alloca i1, i1 0
+  %nop7394 = alloca i1, i1 0
+  %nop7395 = alloca i1, i1 0
+  %nop7396 = alloca i1, i1 0
+  %nop7397 = alloca i1, i1 0
+  %nop7398 = alloca i1, i1 0
+  %nop7399 = alloca i1, i1 0
+  %nop7400 = alloca i1, i1 0
+  %nop7401 = alloca i1, i1 0
+  %nop7402 = alloca i1, i1 0
+  %nop7403 = alloca i1, i1 0
+  %nop7404 = alloca i1, i1 0
+  %nop7405 = alloca i1, i1 0
+  %nop7406 = alloca i1, i1 0
+  %nop7407 = alloca i1, i1 0
+  %nop7408 = alloca i1, i1 0
+  %nop7409 = alloca i1, i1 0
+  %nop7410 = alloca i1, i1 0
+  %nop7411 = alloca i1, i1 0
+  %nop7412 = alloca i1, i1 0
+  %nop7413 = alloca i1, i1 0
+  %nop7414 = alloca i1, i1 0
+  %nop7415 = alloca i1, i1 0
+  %nop7416 = alloca i1, i1 0
+  %nop7417 = alloca i1, i1 0
+  %nop7418 = alloca i1, i1 0
+  %nop7419 = alloca i1, i1 0
+  %nop7420 = alloca i1, i1 0
+  %nop7421 = alloca i1, i1 0
+  %nop7422 = alloca i1, i1 0
+  %nop7423 = alloca i1, i1 0
+  %nop7424 = alloca i1, i1 0
+  %nop7425 = alloca i1, i1 0
+  %nop7426 = alloca i1, i1 0
+  %nop7427 = alloca i1, i1 0
+  %nop7428 = alloca i1, i1 0
+  %nop7429 = alloca i1, i1 0
+  %nop7430 = alloca i1, i1 0
+  %nop7431 = alloca i1, i1 0
+  %nop7432 = alloca i1, i1 0
+  %nop7433 = alloca i1, i1 0
+  %nop7434 = alloca i1, i1 0
+  %nop7435 = alloca i1, i1 0
+  %nop7436 = alloca i1, i1 0
+  %nop7437 = alloca i1, i1 0
+  %nop7438 = alloca i1, i1 0
+  %nop7439 = alloca i1, i1 0
+  %nop7440 = alloca i1, i1 0
+  %nop7441 = alloca i1, i1 0
+  %nop7442 = alloca i1, i1 0
+  %nop7443 = alloca i1, i1 0
+  %nop7444 = alloca i1, i1 0
+  %nop7445 = alloca i1, i1 0
+  %nop7446 = alloca i1, i1 0
+  %nop7447 = alloca i1, i1 0
+  %nop7448 = alloca i1, i1 0
+  %nop7449 = alloca i1, i1 0
+  %nop7450 = alloca i1, i1 0
+  %nop7451 = alloca i1, i1 0
+  %nop7452 = alloca i1, i1 0
+  %nop7453 = alloca i1, i1 0
+  %nop7454 = alloca i1, i1 0
+  %nop7455 = alloca i1, i1 0
+  %nop7456 = alloca i1, i1 0
+  %nop7457 = alloca i1, i1 0
+  %nop7458 = alloca i1, i1 0
+  %nop7459 = alloca i1, i1 0
+  %nop7460 = alloca i1, i1 0
+  %nop7461 = alloca i1, i1 0
+  %nop7462 = alloca i1, i1 0
+  %nop7463 = alloca i1, i1 0
+  %nop7464 = alloca i1, i1 0
+  %nop7465 = alloca i1, i1 0
+  %nop7466 = alloca i1, i1 0
+  %nop7467 = alloca i1, i1 0
+  %nop7468 = alloca i1, i1 0
+  %nop7469 = alloca i1, i1 0
+  %nop7470 = alloca i1, i1 0
+  %nop7471 = alloca i1, i1 0
+  %nop7472 = alloca i1, i1 0
+  %nop7473 = alloca i1, i1 0
+  %nop7474 = alloca i1, i1 0
+  %nop7475 = alloca i1, i1 0
+  %nop7476 = alloca i1, i1 0
+  %nop7477 = alloca i1, i1 0
+  %nop7478 = alloca i1, i1 0
+  %nop7479 = alloca i1, i1 0
+  %nop7480 = alloca i1, i1 0
+  %nop7481 = alloca i1, i1 0
+  %nop7482 = alloca i1, i1 0
+  %nop7483 = alloca i1, i1 0
+  %nop7484 = alloca i1, i1 0
+  %nop7485 = alloca i1, i1 0
+  %nop7486 = alloca i1, i1 0
+  %nop7487 = alloca i1, i1 0
+  %nop7488 = alloca i1, i1 0
+  %nop7489 = alloca i1, i1 0
+  %nop7490 = alloca i1, i1 0
+  %nop7491 = alloca i1, i1 0
+  %nop7492 = alloca i1, i1 0
+  %nop7493 = alloca i1, i1 0
+  %nop7494 = alloca i1, i1 0
+  %nop7495 = alloca i1, i1 0
+  %nop7496 = alloca i1, i1 0
+  %nop7497 = alloca i1, i1 0
+  %nop7498 = alloca i1, i1 0
+  %nop7499 = alloca i1, i1 0
+  %nop7500 = alloca i1, i1 0
+  %nop7501 = alloca i1, i1 0
+  %nop7502 = alloca i1, i1 0
+  %nop7503 = alloca i1, i1 0
+  %nop7504 = alloca i1, i1 0
+  %nop7505 = alloca i1, i1 0
+  %nop7506 = alloca i1, i1 0
+  %nop7507 = alloca i1, i1 0
+  %nop7508 = alloca i1, i1 0
+  %nop7509 = alloca i1, i1 0
+  %nop7510 = alloca i1, i1 0
+  %nop7511 = alloca i1, i1 0
+  %nop7512 = alloca i1, i1 0
+  %nop7513 = alloca i1, i1 0
+  %nop7514 = alloca i1, i1 0
+  %nop7515 = alloca i1, i1 0
+  %nop7516 = alloca i1, i1 0
+  %nop7517 = alloca i1, i1 0
+  %nop7518 = alloca i1, i1 0
+  %nop7519 = alloca i1, i1 0
+  %nop7520 = alloca i1, i1 0
+  %nop7521 = alloca i1, i1 0
+  %nop7522 = alloca i1, i1 0
+  %nop7523 = alloca i1, i1 0
+  %nop7524 = alloca i1, i1 0
+  %nop7525 = alloca i1, i1 0
+  %nop7526 = alloca i1, i1 0
+  %nop7527 = alloca i1, i1 0
+  %nop7528 = alloca i1, i1 0
+  %nop7529 = alloca i1, i1 0
+  %nop7530 = alloca i1, i1 0
+  %nop7531 = alloca i1, i1 0
+  %nop7532 = alloca i1, i1 0
+  %nop7533 = alloca i1, i1 0
+  %nop7534 = alloca i1, i1 0
+  %nop7535 = alloca i1, i1 0
+  %nop7536 = alloca i1, i1 0
+  %nop7537 = alloca i1, i1 0
+  %nop7538 = alloca i1, i1 0
+  %nop7539 = alloca i1, i1 0
+  %nop7540 = alloca i1, i1 0
+  %nop7541 = alloca i1, i1 0
+  %nop7542 = alloca i1, i1 0
+  %nop7543 = alloca i1, i1 0
+  %nop7544 = alloca i1, i1 0
+  %nop7545 = alloca i1, i1 0
+  %nop7546 = alloca i1, i1 0
+  %nop7547 = alloca i1, i1 0
+  %nop7548 = alloca i1, i1 0
+  %nop7549 = alloca i1, i1 0
+  %nop7550 = alloca i1, i1 0
+  %nop7551 = alloca i1, i1 0
+  %nop7552 = alloca i1, i1 0
+  %nop7553 = alloca i1, i1 0
+  %nop7554 = alloca i1, i1 0
+  %nop7555 = alloca i1, i1 0
+  %nop7556 = alloca i1, i1 0
+  %nop7557 = alloca i1, i1 0
+  %nop7558 = alloca i1, i1 0
+  %nop7559 = alloca i1, i1 0
+  %nop7560 = alloca i1, i1 0
+  %nop7561 = alloca i1, i1 0
+  %nop7562 = alloca i1, i1 0
+  %nop7563 = alloca i1, i1 0
+  %nop7564 = alloca i1, i1 0
+  %nop7565 = alloca i1, i1 0
+  %nop7566 = alloca i1, i1 0
+  %nop7567 = alloca i1, i1 0
+  %nop7568 = alloca i1, i1 0
+  %nop7569 = alloca i1, i1 0
+  %nop7570 = alloca i1, i1 0
+  %nop7571 = alloca i1, i1 0
+  %nop7572 = alloca i1, i1 0
+  %nop7573 = alloca i1, i1 0
+  %nop7574 = alloca i1, i1 0
+  %nop7575 = alloca i1, i1 0
+  %nop7576 = alloca i1, i1 0
+  %nop7577 = alloca i1, i1 0
+  %nop7578 = alloca i1, i1 0
+  %nop7579 = alloca i1, i1 0
+  %nop7580 = alloca i1, i1 0
+  %nop7581 = alloca i1, i1 0
+  %nop7582 = alloca i1, i1 0
+  %nop7583 = alloca i1, i1 0
+  %nop7584 = alloca i1, i1 0
+  %nop7585 = alloca i1, i1 0
+  %nop7586 = alloca i1, i1 0
+  %nop7587 = alloca i1, i1 0
+  %nop7588 = alloca i1, i1 0
+  %nop7589 = alloca i1, i1 0
+  %nop7590 = alloca i1, i1 0
+  %nop7591 = alloca i1, i1 0
+  %nop7592 = alloca i1, i1 0
+  %nop7593 = alloca i1, i1 0
+  %nop7594 = alloca i1, i1 0
+  %nop7595 = alloca i1, i1 0
+  %nop7596 = alloca i1, i1 0
+  %nop7597 = alloca i1, i1 0
+  %nop7598 = alloca i1, i1 0
+  %nop7599 = alloca i1, i1 0
+  %nop7600 = alloca i1, i1 0
+  %nop7601 = alloca i1, i1 0
+  %nop7602 = alloca i1, i1 0
+  %nop7603 = alloca i1, i1 0
+  %nop7604 = alloca i1, i1 0
+  %nop7605 = alloca i1, i1 0
+  %nop7606 = alloca i1, i1 0
+  %nop7607 = alloca i1, i1 0
+  %nop7608 = alloca i1, i1 0
+  %nop7609 = alloca i1, i1 0
+  %nop7610 = alloca i1, i1 0
+  %nop7611 = alloca i1, i1 0
+  %nop7612 = alloca i1, i1 0
+  %nop7613 = alloca i1, i1 0
+  %nop7614 = alloca i1, i1 0
+  %nop7615 = alloca i1, i1 0
+  %nop7616 = alloca i1, i1 0
+  %nop7617 = alloca i1, i1 0
+  %nop7618 = alloca i1, i1 0
+  %nop7619 = alloca i1, i1 0
+  %nop7620 = alloca i1, i1 0
+  %nop7621 = alloca i1, i1 0
+  %nop7622 = alloca i1, i1 0
+  %nop7623 = alloca i1, i1 0
+  %nop7624 = alloca i1, i1 0
+  %nop7625 = alloca i1, i1 0
+  %nop7626 = alloca i1, i1 0
+  %nop7627 = alloca i1, i1 0
+  %nop7628 = alloca i1, i1 0
+  %nop7629 = alloca i1, i1 0
+  %nop7630 = alloca i1, i1 0
+  %nop7631 = alloca i1, i1 0
+  %nop7632 = alloca i1, i1 0
+  %nop7633 = alloca i1, i1 0
+  %nop7634 = alloca i1, i1 0
+  %nop7635 = alloca i1, i1 0
+  %nop7636 = alloca i1, i1 0
+  %nop7637 = alloca i1, i1 0
+  %nop7638 = alloca i1, i1 0
+  %nop7639 = alloca i1, i1 0
+  %nop7640 = alloca i1, i1 0
+  %nop7641 = alloca i1, i1 0
+  %nop7642 = alloca i1, i1 0
+  %nop7643 = alloca i1, i1 0
+  %nop7644 = alloca i1, i1 0
+  %nop7645 = alloca i1, i1 0
+  %nop7646 = alloca i1, i1 0
+  %nop7647 = alloca i1, i1 0
+  %nop7648 = alloca i1, i1 0
+  %nop7649 = alloca i1, i1 0
+  %nop7650 = alloca i1, i1 0
+  %nop7651 = alloca i1, i1 0
+  %nop7652 = alloca i1, i1 0
+  %nop7653 = alloca i1, i1 0
+  %nop7654 = alloca i1, i1 0
+  %nop7655 = alloca i1, i1 0
+  %nop7656 = alloca i1, i1 0
+  %nop7657 = alloca i1, i1 0
+  %nop7658 = alloca i1, i1 0
+  %nop7659 = alloca i1, i1 0
+  %nop7660 = alloca i1, i1 0
+  %nop7661 = alloca i1, i1 0
+  %nop7662 = alloca i1, i1 0
+  %nop7663 = alloca i1, i1 0
+  %nop7664 = alloca i1, i1 0
+  %nop7665 = alloca i1, i1 0
+  %nop7666 = alloca i1, i1 0
+  %nop7667 = alloca i1, i1 0
+  %nop7668 = alloca i1, i1 0
+  %nop7669 = alloca i1, i1 0
+  %nop7670 = alloca i1, i1 0
+  %nop7671 = alloca i1, i1 0
+  %nop7672 = alloca i1, i1 0
+  %nop7673 = alloca i1, i1 0
+  %nop7674 = alloca i1, i1 0
+  %nop7675 = alloca i1, i1 0
+  %nop7676 = alloca i1, i1 0
+  %nop7677 = alloca i1, i1 0
+  %nop7678 = alloca i1, i1 0
+  %nop7679 = alloca i1, i1 0
+  %nop7680 = alloca i1, i1 0
+  %nop7681 = alloca i1, i1 0
+  %nop7682 = alloca i1, i1 0
+  %nop7683 = alloca i1, i1 0
+  %nop7684 = alloca i1, i1 0
+  %nop7685 = alloca i1, i1 0
+  %nop7686 = alloca i1, i1 0
+  %nop7687 = alloca i1, i1 0
+  %nop7688 = alloca i1, i1 0
+  %nop7689 = alloca i1, i1 0
+  %nop7690 = alloca i1, i1 0
+  %nop7691 = alloca i1, i1 0
+  %nop7692 = alloca i1, i1 0
+  %nop7693 = alloca i1, i1 0
+  %nop7694 = alloca i1, i1 0
+  %nop7695 = alloca i1, i1 0
+  %nop7696 = alloca i1, i1 0
+  %nop7697 = alloca i1, i1 0
+  %nop7698 = alloca i1, i1 0
+  %nop7699 = alloca i1, i1 0
+  %nop7700 = alloca i1, i1 0
+  %nop7701 = alloca i1, i1 0
+  %nop7702 = alloca i1, i1 0
+  %nop7703 = alloca i1, i1 0
+  %nop7704 = alloca i1, i1 0
+  %nop7705 = alloca i1, i1 0
+  %nop7706 = alloca i1, i1 0
+  %nop7707 = alloca i1, i1 0
+  %nop7708 = alloca i1, i1 0
+  %nop7709 = alloca i1, i1 0
+  %nop7710 = alloca i1, i1 0
+  %nop7711 = alloca i1, i1 0
+  %nop7712 = alloca i1, i1 0
+  %nop7713 = alloca i1, i1 0
+  %nop7714 = alloca i1, i1 0
+  %nop7715 = alloca i1, i1 0
+  %nop7716 = alloca i1, i1 0
+  %nop7717 = alloca i1, i1 0
+  %nop7718 = alloca i1, i1 0
+  %nop7719 = alloca i1, i1 0
+  %nop7720 = alloca i1, i1 0
+  %nop7721 = alloca i1, i1 0
+  %nop7722 = alloca i1, i1 0
+  %nop7723 = alloca i1, i1 0
+  %nop7724 = alloca i1, i1 0
+  %nop7725 = alloca i1, i1 0
+  %nop7726 = alloca i1, i1 0
+  %nop7727 = alloca i1, i1 0
+  %nop7728 = alloca i1, i1 0
+  %nop7729 = alloca i1, i1 0
+  %nop7730 = alloca i1, i1 0
+  %nop7731 = alloca i1, i1 0
+  %nop7732 = alloca i1, i1 0
+  %nop7733 = alloca i1, i1 0
+  %nop7734 = alloca i1, i1 0
+  %nop7735 = alloca i1, i1 0
+  %nop7736 = alloca i1, i1 0
+  %nop7737 = alloca i1, i1 0
+  %nop7738 = alloca i1, i1 0
+  %nop7739 = alloca i1, i1 0
+  %nop7740 = alloca i1, i1 0
+  %nop7741 = alloca i1, i1 0
+  %nop7742 = alloca i1, i1 0
+  %nop7743 = alloca i1, i1 0
+  %nop7744 = alloca i1, i1 0
+  %nop7745 = alloca i1, i1 0
+  %nop7746 = alloca i1, i1 0
+  %nop7747 = alloca i1, i1 0
+  %nop7748 = alloca i1, i1 0
+  %nop7749 = alloca i1, i1 0
+  %nop7750 = alloca i1, i1 0
+  %nop7751 = alloca i1, i1 0
+  %nop7752 = alloca i1, i1 0
+  %nop7753 = alloca i1, i1 0
+  %nop7754 = alloca i1, i1 0
+  %nop7755 = alloca i1, i1 0
+  %nop7756 = alloca i1, i1 0
+  %nop7757 = alloca i1, i1 0
+  %nop7758 = alloca i1, i1 0
+  %nop7759 = alloca i1, i1 0
+  %nop7760 = alloca i1, i1 0
+  %nop7761 = alloca i1, i1 0
+  %nop7762 = alloca i1, i1 0
+  %nop7763 = alloca i1, i1 0
+  %nop7764 = alloca i1, i1 0
+  %nop7765 = alloca i1, i1 0
+  %nop7766 = alloca i1, i1 0
+  %nop7767 = alloca i1, i1 0
+  %nop7768 = alloca i1, i1 0
+  %nop7769 = alloca i1, i1 0
+  %nop7770 = alloca i1, i1 0
+  %nop7771 = alloca i1, i1 0
+  %nop7772 = alloca i1, i1 0
+  %nop7773 = alloca i1, i1 0
+  %nop7774 = alloca i1, i1 0
+  %nop7775 = alloca i1, i1 0
+  %nop7776 = alloca i1, i1 0
+  %nop7777 = alloca i1, i1 0
+  %nop7778 = alloca i1, i1 0
+  %nop7779 = alloca i1, i1 0
+  %nop7780 = alloca i1, i1 0
+  %nop7781 = alloca i1, i1 0
+  %nop7782 = alloca i1, i1 0
+  %nop7783 = alloca i1, i1 0
+  %nop7784 = alloca i1, i1 0
+  %nop7785 = alloca i1, i1 0
+  %nop7786 = alloca i1, i1 0
+  %nop7787 = alloca i1, i1 0
+  %nop7788 = alloca i1, i1 0
+  %nop7789 = alloca i1, i1 0
+  %nop7790 = alloca i1, i1 0
+  %nop7791 = alloca i1, i1 0
+  %nop7792 = alloca i1, i1 0
+  %nop7793 = alloca i1, i1 0
+  %nop7794 = alloca i1, i1 0
+  %nop7795 = alloca i1, i1 0
+  %nop7796 = alloca i1, i1 0
+  %nop7797 = alloca i1, i1 0
+  %nop7798 = alloca i1, i1 0
+  %nop7799 = alloca i1, i1 0
+  %nop7800 = alloca i1, i1 0
+  %nop7801 = alloca i1, i1 0
+  %nop7802 = alloca i1, i1 0
+  %nop7803 = alloca i1, i1 0
+  %nop7804 = alloca i1, i1 0
+  %nop7805 = alloca i1, i1 0
+  %nop7806 = alloca i1, i1 0
+  %nop7807 = alloca i1, i1 0
+  %nop7808 = alloca i1, i1 0
+  %nop7809 = alloca i1, i1 0
+  %nop7810 = alloca i1, i1 0
+  %nop7811 = alloca i1, i1 0
+  %nop7812 = alloca i1, i1 0
+  %nop7813 = alloca i1, i1 0
+  %nop7814 = alloca i1, i1 0
+  %nop7815 = alloca i1, i1 0
+  %nop7816 = alloca i1, i1 0
+  %nop7817 = alloca i1, i1 0
+  %nop7818 = alloca i1, i1 0
+  %nop7819 = alloca i1, i1 0
+  %nop7820 = alloca i1, i1 0
+  %nop7821 = alloca i1, i1 0
+  %nop7822 = alloca i1, i1 0
+  %nop7823 = alloca i1, i1 0
+  %nop7824 = alloca i1, i1 0
+  %nop7825 = alloca i1, i1 0
+  %nop7826 = alloca i1, i1 0
+  %nop7827 = alloca i1, i1 0
+  %nop7828 = alloca i1, i1 0
+  %nop7829 = alloca i1, i1 0
+  %nop7830 = alloca i1, i1 0
+  %nop7831 = alloca i1, i1 0
+  %nop7832 = alloca i1, i1 0
+  %nop7833 = alloca i1, i1 0
+  %nop7834 = alloca i1, i1 0
+  %nop7835 = alloca i1, i1 0
+  %nop7836 = alloca i1, i1 0
+  %nop7837 = alloca i1, i1 0
+  %nop7838 = alloca i1, i1 0
+  %nop7839 = alloca i1, i1 0
+  %nop7840 = alloca i1, i1 0
+  %nop7841 = alloca i1, i1 0
+  %nop7842 = alloca i1, i1 0
+  %nop7843 = alloca i1, i1 0
+  %nop7844 = alloca i1, i1 0
+  %nop7845 = alloca i1, i1 0
+  %nop7846 = alloca i1, i1 0
+  %nop7847 = alloca i1, i1 0
+  %nop7848 = alloca i1, i1 0
+  %nop7849 = alloca i1, i1 0
+  %nop7850 = alloca i1, i1 0
+  %nop7851 = alloca i1, i1 0
+  %nop7852 = alloca i1, i1 0
+  %nop7853 = alloca i1, i1 0
+  %nop7854 = alloca i1, i1 0
+  %nop7855 = alloca i1, i1 0
+  %nop7856 = alloca i1, i1 0
+  %nop7857 = alloca i1, i1 0
+  %nop7858 = alloca i1, i1 0
+  %nop7859 = alloca i1, i1 0
+  %nop7860 = alloca i1, i1 0
+  %nop7861 = alloca i1, i1 0
+  %nop7862 = alloca i1, i1 0
+  %nop7863 = alloca i1, i1 0
+  %nop7864 = alloca i1, i1 0
+  %nop7865 = alloca i1, i1 0
+  %nop7866 = alloca i1, i1 0
+  %nop7867 = alloca i1, i1 0
+  %nop7868 = alloca i1, i1 0
+  %nop7869 = alloca i1, i1 0
+  %nop7870 = alloca i1, i1 0
+  %nop7871 = alloca i1, i1 0
+  %nop7872 = alloca i1, i1 0
+  %nop7873 = alloca i1, i1 0
+  %nop7874 = alloca i1, i1 0
+  %nop7875 = alloca i1, i1 0
+  %nop7876 = alloca i1, i1 0
+  %nop7877 = alloca i1, i1 0
+  %nop7878 = alloca i1, i1 0
+  %nop7879 = alloca i1, i1 0
+  %nop7880 = alloca i1, i1 0
+  %nop7881 = alloca i1, i1 0
+  %nop7882 = alloca i1, i1 0
+  %nop7883 = alloca i1, i1 0
+  %nop7884 = alloca i1, i1 0
+  %nop7885 = alloca i1, i1 0
+  %nop7886 = alloca i1, i1 0
+  %nop7887 = alloca i1, i1 0
+  %nop7888 = alloca i1, i1 0
+  %nop7889 = alloca i1, i1 0
+  %nop7890 = alloca i1, i1 0
+  %nop7891 = alloca i1, i1 0
+  %nop7892 = alloca i1, i1 0
+  %nop7893 = alloca i1, i1 0
+  %nop7894 = alloca i1, i1 0
+  %nop7895 = alloca i1, i1 0
+  %nop7896 = alloca i1, i1 0
+  %nop7897 = alloca i1, i1 0
+  %nop7898 = alloca i1, i1 0
+  %nop7899 = alloca i1, i1 0
+  %nop7900 = alloca i1, i1 0
+  %nop7901 = alloca i1, i1 0
+  %nop7902 = alloca i1, i1 0
+  %nop7903 = alloca i1, i1 0
+  %nop7904 = alloca i1, i1 0
+  %nop7905 = alloca i1, i1 0
+  %nop7906 = alloca i1, i1 0
+  %nop7907 = alloca i1, i1 0
+  %nop7908 = alloca i1, i1 0
+  %nop7909 = alloca i1, i1 0
+  %nop7910 = alloca i1, i1 0
+  %nop7911 = alloca i1, i1 0
+  %nop7912 = alloca i1, i1 0
+  %nop7913 = alloca i1, i1 0
+  %nop7914 = alloca i1, i1 0
+  %nop7915 = alloca i1, i1 0
+  %nop7916 = alloca i1, i1 0
+  %nop7917 = alloca i1, i1 0
+  %nop7918 = alloca i1, i1 0
+  %nop7919 = alloca i1, i1 0
+  %nop7920 = alloca i1, i1 0
+  %nop7921 = alloca i1, i1 0
+  %nop7922 = alloca i1, i1 0
+  %nop7923 = alloca i1, i1 0
+  %nop7924 = alloca i1, i1 0
+  %nop7925 = alloca i1, i1 0
+  %nop7926 = alloca i1, i1 0
+  %nop7927 = alloca i1, i1 0
+  %nop7928 = alloca i1, i1 0
+  %nop7929 = alloca i1, i1 0
+  %nop7930 = alloca i1, i1 0
+  %nop7931 = alloca i1, i1 0
+  %nop7932 = alloca i1, i1 0
+  %nop7933 = alloca i1, i1 0
+  %nop7934 = alloca i1, i1 0
+  %nop7935 = alloca i1, i1 0
+  %nop7936 = alloca i1, i1 0
+  %nop7937 = alloca i1, i1 0
+  %nop7938 = alloca i1, i1 0
+  %nop7939 = alloca i1, i1 0
+  %nop7940 = alloca i1, i1 0
+  %nop7941 = alloca i1, i1 0
+  %nop7942 = alloca i1, i1 0
+  %nop7943 = alloca i1, i1 0
+  %nop7944 = alloca i1, i1 0
+  %nop7945 = alloca i1, i1 0
+  %nop7946 = alloca i1, i1 0
+  %nop7947 = alloca i1, i1 0
+  %nop7948 = alloca i1, i1 0
+  %nop7949 = alloca i1, i1 0
+  %nop7950 = alloca i1, i1 0
+  %nop7951 = alloca i1, i1 0
+  %nop7952 = alloca i1, i1 0
+  %nop7953 = alloca i1, i1 0
+  %nop7954 = alloca i1, i1 0
+  %nop7955 = alloca i1, i1 0
+  %nop7956 = alloca i1, i1 0
+  %nop7957 = alloca i1, i1 0
+  %nop7958 = alloca i1, i1 0
+  %nop7959 = alloca i1, i1 0
+  %nop7960 = alloca i1, i1 0
+  %nop7961 = alloca i1, i1 0
+  %nop7962 = alloca i1, i1 0
+  %nop7963 = alloca i1, i1 0
+  %nop7964 = alloca i1, i1 0
+  %nop7965 = alloca i1, i1 0
+  %nop7966 = alloca i1, i1 0
+  %nop7967 = alloca i1, i1 0
+  %nop7968 = alloca i1, i1 0
+  %nop7969 = alloca i1, i1 0
+  %nop7970 = alloca i1, i1 0
+  %nop7971 = alloca i1, i1 0
+  %nop7972 = alloca i1, i1 0
+  %nop7973 = alloca i1, i1 0
+  %nop7974 = alloca i1, i1 0
+  %nop7975 = alloca i1, i1 0
+  %nop7976 = alloca i1, i1 0
+  %nop7977 = alloca i1, i1 0
+  %nop7978 = alloca i1, i1 0
+  %nop7979 = alloca i1, i1 0
+  %nop7980 = alloca i1, i1 0
+  %nop7981 = alloca i1, i1 0
+  %nop7982 = alloca i1, i1 0
+  %nop7983 = alloca i1, i1 0
+  %nop7984 = alloca i1, i1 0
+  %nop7985 = alloca i1, i1 0
+  %nop7986 = alloca i1, i1 0
+  %nop7987 = alloca i1, i1 0
+  %nop7988 = alloca i1, i1 0
+  %nop7989 = alloca i1, i1 0
+  %nop7990 = alloca i1, i1 0
+  %nop7991 = alloca i1, i1 0
+  %nop7992 = alloca i1, i1 0
+  %nop7993 = alloca i1, i1 0
+  %nop7994 = alloca i1, i1 0
+  %nop7995 = alloca i1, i1 0
+  %nop7996 = alloca i1, i1 0
+  %nop7997 = alloca i1, i1 0
+  %nop7998 = alloca i1, i1 0
+  %nop7999 = alloca i1, i1 0
+  %nop8000 = alloca i1, i1 0
+  %nop8001 = alloca i1, i1 0
+  %nop8002 = alloca i1, i1 0
+  %nop8003 = alloca i1, i1 0
+  %nop8004 = alloca i1, i1 0
+  %nop8005 = alloca i1, i1 0
+  %nop8006 = alloca i1, i1 0
+  %nop8007 = alloca i1, i1 0
+  %nop8008 = alloca i1, i1 0
+  %nop8009 = alloca i1, i1 0
+  %nop8010 = alloca i1, i1 0
+  %nop8011 = alloca i1, i1 0
+  %nop8012 = alloca i1, i1 0
+  %nop8013 = alloca i1, i1 0
+  %nop8014 = alloca i1, i1 0
+  %nop8015 = alloca i1, i1 0
+  %nop8016 = alloca i1, i1 0
+  %nop8017 = alloca i1, i1 0
+  %nop8018 = alloca i1, i1 0
+  %nop8019 = alloca i1, i1 0
+  %nop8020 = alloca i1, i1 0
+  %nop8021 = alloca i1, i1 0
+  %nop8022 = alloca i1, i1 0
+  %nop8023 = alloca i1, i1 0
+  %nop8024 = alloca i1, i1 0
+  %nop8025 = alloca i1, i1 0
+  %nop8026 = alloca i1, i1 0
+  %nop8027 = alloca i1, i1 0
+  %nop8028 = alloca i1, i1 0
+  %nop8029 = alloca i1, i1 0
+  %nop8030 = alloca i1, i1 0
+  %nop8031 = alloca i1, i1 0
+  %nop8032 = alloca i1, i1 0
+  %nop8033 = alloca i1, i1 0
+  %nop8034 = alloca i1, i1 0
+  %nop8035 = alloca i1, i1 0
+  %nop8036 = alloca i1, i1 0
+  %nop8037 = alloca i1, i1 0
+  %nop8038 = alloca i1, i1 0
+  %nop8039 = alloca i1, i1 0
+  %nop8040 = alloca i1, i1 0
+  %nop8041 = alloca i1, i1 0
+  %nop8042 = alloca i1, i1 0
+  %nop8043 = alloca i1, i1 0
+  %nop8044 = alloca i1, i1 0
+  %nop8045 = alloca i1, i1 0
+  %nop8046 = alloca i1, i1 0
+  %nop8047 = alloca i1, i1 0
+  %nop8048 = alloca i1, i1 0
+  %nop8049 = alloca i1, i1 0
+  %nop8050 = alloca i1, i1 0
+  %nop8051 = alloca i1, i1 0
+  %nop8052 = alloca i1, i1 0
+  %nop8053 = alloca i1, i1 0
+  %nop8054 = alloca i1, i1 0
+  %nop8055 = alloca i1, i1 0
+  %nop8056 = alloca i1, i1 0
+  %nop8057 = alloca i1, i1 0
+  %nop8058 = alloca i1, i1 0
+  %nop8059 = alloca i1, i1 0
+  %nop8060 = alloca i1, i1 0
+  %nop8061 = alloca i1, i1 0
+  %nop8062 = alloca i1, i1 0
+  %nop8063 = alloca i1, i1 0
+  %nop8064 = alloca i1, i1 0
+  %nop8065 = alloca i1, i1 0
+  %nop8066 = alloca i1, i1 0
+  %nop8067 = alloca i1, i1 0
+  %nop8068 = alloca i1, i1 0
+  %nop8069 = alloca i1, i1 0
+  %nop8070 = alloca i1, i1 0
+  %nop8071 = alloca i1, i1 0
+  %nop8072 = alloca i1, i1 0
+  %nop8073 = alloca i1, i1 0
+  %nop8074 = alloca i1, i1 0
+  %nop8075 = alloca i1, i1 0
+  %nop8076 = alloca i1, i1 0
+  %nop8077 = alloca i1, i1 0
+  %nop8078 = alloca i1, i1 0
+  %nop8079 = alloca i1, i1 0
+  %nop8080 = alloca i1, i1 0
+  %nop8081 = alloca i1, i1 0
+  %nop8082 = alloca i1, i1 0
+  %nop8083 = alloca i1, i1 0
+  %nop8084 = alloca i1, i1 0
+  %nop8085 = alloca i1, i1 0
+  %nop8086 = alloca i1, i1 0
+  %nop8087 = alloca i1, i1 0
+  %nop8088 = alloca i1, i1 0
+  %nop8089 = alloca i1, i1 0
+  %nop8090 = alloca i1, i1 0
+  %nop8091 = alloca i1, i1 0
+  %nop8092 = alloca i1, i1 0
+  %nop8093 = alloca i1, i1 0
+  %nop8094 = alloca i1, i1 0
+  %nop8095 = alloca i1, i1 0
+  %nop8096 = alloca i1, i1 0
+  %nop8097 = alloca i1, i1 0
+  %nop8098 = alloca i1, i1 0
+  %nop8099 = alloca i1, i1 0
+  %nop8100 = alloca i1, i1 0
+  %nop8101 = alloca i1, i1 0
+  %nop8102 = alloca i1, i1 0
+  %nop8103 = alloca i1, i1 0
+  %nop8104 = alloca i1, i1 0
+  %nop8105 = alloca i1, i1 0
+  %nop8106 = alloca i1, i1 0
+  %nop8107 = alloca i1, i1 0
+  %nop8108 = alloca i1, i1 0
+  %nop8109 = alloca i1, i1 0
+  %nop8110 = alloca i1, i1 0
+  %nop8111 = alloca i1, i1 0
+  %nop8112 = alloca i1, i1 0
+  %nop8113 = alloca i1, i1 0
+  %nop8114 = alloca i1, i1 0
+  %nop8115 = alloca i1, i1 0
+  %nop8116 = alloca i1, i1 0
+  %nop8117 = alloca i1, i1 0
+  %nop8118 = alloca i1, i1 0
+  %nop8119 = alloca i1, i1 0
+  %nop8120 = alloca i1, i1 0
+  %nop8121 = alloca i1, i1 0
+  %nop8122 = alloca i1, i1 0
+  %nop8123 = alloca i1, i1 0
+  %nop8124 = alloca i1, i1 0
+  %nop8125 = alloca i1, i1 0
+  %nop8126 = alloca i1, i1 0
+  %nop8127 = alloca i1, i1 0
+  %nop8128 = alloca i1, i1 0
+  %nop8129 = alloca i1, i1 0
+  %nop8130 = alloca i1, i1 0
+  %nop8131 = alloca i1, i1 0
+  %nop8132 = alloca i1, i1 0
+  %nop8133 = alloca i1, i1 0
+  %nop8134 = alloca i1, i1 0
+  %nop8135 = alloca i1, i1 0
+  %nop8136 = alloca i1, i1 0
+  %nop8137 = alloca i1, i1 0
+  %nop8138 = alloca i1, i1 0
+  %nop8139 = alloca i1, i1 0
+  %nop8140 = alloca i1, i1 0
+  %nop8141 = alloca i1, i1 0
+  %nop8142 = alloca i1, i1 0
+  %nop8143 = alloca i1, i1 0
+  %nop8144 = alloca i1, i1 0
+  %nop8145 = alloca i1, i1 0
+  %nop8146 = alloca i1, i1 0
+  %nop8147 = alloca i1, i1 0
+  %nop8148 = alloca i1, i1 0
+  %nop8149 = alloca i1, i1 0
+  %nop8150 = alloca i1, i1 0
+  %nop8151 = alloca i1, i1 0
+  %nop8152 = alloca i1, i1 0
+  %nop8153 = alloca i1, i1 0
+  %nop8154 = alloca i1, i1 0
+  %nop8155 = alloca i1, i1 0
+  %nop8156 = alloca i1, i1 0
+  %nop8157 = alloca i1, i1 0
+  %nop8158 = alloca i1, i1 0
+  %nop8159 = alloca i1, i1 0
+  %nop8160 = alloca i1, i1 0
+  %nop8161 = alloca i1, i1 0
+  %nop8162 = alloca i1, i1 0
+  %nop8163 = alloca i1, i1 0
+  %nop8164 = alloca i1, i1 0
+  %nop8165 = alloca i1, i1 0
+  %nop8166 = alloca i1, i1 0
+  %nop8167 = alloca i1, i1 0
+  %nop8168 = alloca i1, i1 0
+  %nop8169 = alloca i1, i1 0
+  %nop8170 = alloca i1, i1 0
+  %nop8171 = alloca i1, i1 0
+  %nop8172 = alloca i1, i1 0
+  %nop8173 = alloca i1, i1 0
+  %nop8174 = alloca i1, i1 0
+  %nop8175 = alloca i1, i1 0
+  %nop8176 = alloca i1, i1 0
+  %nop8177 = alloca i1, i1 0
+  %nop8178 = alloca i1, i1 0
+  %nop8179 = alloca i1, i1 0
+  %nop8180 = alloca i1, i1 0
+  %nop8181 = alloca i1, i1 0
+  %nop8182 = alloca i1, i1 0
+  %nop8183 = alloca i1, i1 0
+  %nop8184 = alloca i1, i1 0
+  %nop8185 = alloca i1, i1 0
+  %nop8186 = alloca i1, i1 0
+  %nop8187 = alloca i1, i1 0
+  %nop8188 = alloca i1, i1 0
+  %nop8189 = alloca i1, i1 0
+  %nop8190 = alloca i1, i1 0
+  %nop8191 = alloca i1, i1 0
+  %nop8192 = alloca i1, i1 0
+  %nop8193 = alloca i1, i1 0
+  %nop8194 = alloca i1, i1 0
+  %nop8195 = alloca i1, i1 0
+  %nop8196 = alloca i1, i1 0
+  %nop8197 = alloca i1, i1 0
+  %nop8198 = alloca i1, i1 0
+  %nop8199 = alloca i1, i1 0
+  %nop8200 = alloca i1, i1 0
+  %nop8201 = alloca i1, i1 0
+  %nop8202 = alloca i1, i1 0
+  %nop8203 = alloca i1, i1 0
+  %nop8204 = alloca i1, i1 0
+  %nop8205 = alloca i1, i1 0
+  %nop8206 = alloca i1, i1 0
+  %nop8207 = alloca i1, i1 0
+  %nop8208 = alloca i1, i1 0
+  %nop8209 = alloca i1, i1 0
+  %nop8210 = alloca i1, i1 0
+  %nop8211 = alloca i1, i1 0
+  %nop8212 = alloca i1, i1 0
+  %nop8213 = alloca i1, i1 0
+  %nop8214 = alloca i1, i1 0
+  %nop8215 = alloca i1, i1 0
+  %nop8216 = alloca i1, i1 0
+  %nop8217 = alloca i1, i1 0
+  %nop8218 = alloca i1, i1 0
+  %nop8219 = alloca i1, i1 0
+  %nop8220 = alloca i1, i1 0
+  %nop8221 = alloca i1, i1 0
+  %nop8222 = alloca i1, i1 0
+  %nop8223 = alloca i1, i1 0
+  %nop8224 = alloca i1, i1 0
+  %nop8225 = alloca i1, i1 0
+  %nop8226 = alloca i1, i1 0
+  %nop8227 = alloca i1, i1 0
+  %nop8228 = alloca i1, i1 0
+  %nop8229 = alloca i1, i1 0
+  %nop8230 = alloca i1, i1 0
+  %nop8231 = alloca i1, i1 0
+  %nop8232 = alloca i1, i1 0
+  %nop8233 = alloca i1, i1 0
+  %nop8234 = alloca i1, i1 0
+  %nop8235 = alloca i1, i1 0
+  %nop8236 = alloca i1, i1 0
+  %nop8237 = alloca i1, i1 0
+  %nop8238 = alloca i1, i1 0
+  %nop8239 = alloca i1, i1 0
+  %nop8240 = alloca i1, i1 0
+  %nop8241 = alloca i1, i1 0
+  %nop8242 = alloca i1, i1 0
+  %nop8243 = alloca i1, i1 0
+  %nop8244 = alloca i1, i1 0
+  %nop8245 = alloca i1, i1 0
+  %nop8246 = alloca i1, i1 0
+  %nop8247 = alloca i1, i1 0
+  %nop8248 = alloca i1, i1 0
+  %nop8249 = alloca i1, i1 0
+  %nop8250 = alloca i1, i1 0
+  %nop8251 = alloca i1, i1 0
+  %nop8252 = alloca i1, i1 0
+  %nop8253 = alloca i1, i1 0
+  %nop8254 = alloca i1, i1 0
+  %nop8255 = alloca i1, i1 0
+  %nop8256 = alloca i1, i1 0
+  %nop8257 = alloca i1, i1 0
+  %nop8258 = alloca i1, i1 0
+  %nop8259 = alloca i1, i1 0
+  %nop8260 = alloca i1, i1 0
+  %nop8261 = alloca i1, i1 0
+  %nop8262 = alloca i1, i1 0
+  %nop8263 = alloca i1, i1 0
+  %nop8264 = alloca i1, i1 0
+  %nop8265 = alloca i1, i1 0
+  %nop8266 = alloca i1, i1 0
+  %nop8267 = alloca i1, i1 0
+  %nop8268 = alloca i1, i1 0
+  %nop8269 = alloca i1, i1 0
+  %nop8270 = alloca i1, i1 0
+  %nop8271 = alloca i1, i1 0
+  %nop8272 = alloca i1, i1 0
+  %nop8273 = alloca i1, i1 0
+  %nop8274 = alloca i1, i1 0
+  %nop8275 = alloca i1, i1 0
+  %nop8276 = alloca i1, i1 0
+  %nop8277 = alloca i1, i1 0
+  %nop8278 = alloca i1, i1 0
+  %nop8279 = alloca i1, i1 0
+  %nop8280 = alloca i1, i1 0
+  %nop8281 = alloca i1, i1 0
+  %nop8282 = alloca i1, i1 0
+  %nop8283 = alloca i1, i1 0
+  %nop8284 = alloca i1, i1 0
+  %nop8285 = alloca i1, i1 0
+  %nop8286 = alloca i1, i1 0
+  %nop8287 = alloca i1, i1 0
+  %nop8288 = alloca i1, i1 0
+  %nop8289 = alloca i1, i1 0
+  %nop8290 = alloca i1, i1 0
+  %nop8291 = alloca i1, i1 0
+  %nop8292 = alloca i1, i1 0
+  %nop8293 = alloca i1, i1 0
+  %nop8294 = alloca i1, i1 0
+  %nop8295 = alloca i1, i1 0
+  %nop8296 = alloca i1, i1 0
+  %nop8297 = alloca i1, i1 0
+  %nop8298 = alloca i1, i1 0
+  %nop8299 = alloca i1, i1 0
+  %nop8300 = alloca i1, i1 0
+  %nop8301 = alloca i1, i1 0
+  %nop8302 = alloca i1, i1 0
+  %nop8303 = alloca i1, i1 0
+  %nop8304 = alloca i1, i1 0
+  %nop8305 = alloca i1, i1 0
+  %nop8306 = alloca i1, i1 0
+  %nop8307 = alloca i1, i1 0
+  %nop8308 = alloca i1, i1 0
+  %nop8309 = alloca i1, i1 0
+  %nop8310 = alloca i1, i1 0
+  %nop8311 = alloca i1, i1 0
+  %nop8312 = alloca i1, i1 0
+  %nop8313 = alloca i1, i1 0
+  %nop8314 = alloca i1, i1 0
+  %nop8315 = alloca i1, i1 0
+  %nop8316 = alloca i1, i1 0
+  %nop8317 = alloca i1, i1 0
+  %nop8318 = alloca i1, i1 0
+  %nop8319 = alloca i1, i1 0
+  %nop8320 = alloca i1, i1 0
+  %nop8321 = alloca i1, i1 0
+  %nop8322 = alloca i1, i1 0
+  %nop8323 = alloca i1, i1 0
+  %nop8324 = alloca i1, i1 0
+  %nop8325 = alloca i1, i1 0
+  %nop8326 = alloca i1, i1 0
+  %nop8327 = alloca i1, i1 0
+  %nop8328 = alloca i1, i1 0
+  %nop8329 = alloca i1, i1 0
+  %nop8330 = alloca i1, i1 0
+  %nop8331 = alloca i1, i1 0
+  %nop8332 = alloca i1, i1 0
+  %nop8333 = alloca i1, i1 0
+  %nop8334 = alloca i1, i1 0
+  %nop8335 = alloca i1, i1 0
+  %nop8336 = alloca i1, i1 0
+  %nop8337 = alloca i1, i1 0
+  %nop8338 = alloca i1, i1 0
+  %nop8339 = alloca i1, i1 0
+  %nop8340 = alloca i1, i1 0
+  %nop8341 = alloca i1, i1 0
+  %nop8342 = alloca i1, i1 0
+  %nop8343 = alloca i1, i1 0
+  %nop8344 = alloca i1, i1 0
+  %nop8345 = alloca i1, i1 0
+  %nop8346 = alloca i1, i1 0
+  %nop8347 = alloca i1, i1 0
+  %nop8348 = alloca i1, i1 0
+  %nop8349 = alloca i1, i1 0
+  %nop8350 = alloca i1, i1 0
+  %nop8351 = alloca i1, i1 0
+  %nop8352 = alloca i1, i1 0
+  %nop8353 = alloca i1, i1 0
+  %nop8354 = alloca i1, i1 0
+  %nop8355 = alloca i1, i1 0
+  %nop8356 = alloca i1, i1 0
+  %nop8357 = alloca i1, i1 0
+  %nop8358 = alloca i1, i1 0
+  %nop8359 = alloca i1, i1 0
+  %nop8360 = alloca i1, i1 0
+  %nop8361 = alloca i1, i1 0
+  %nop8362 = alloca i1, i1 0
+  %nop8363 = alloca i1, i1 0
+  %nop8364 = alloca i1, i1 0
+  %nop8365 = alloca i1, i1 0
+  %nop8366 = alloca i1, i1 0
+  %nop8367 = alloca i1, i1 0
+  %nop8368 = alloca i1, i1 0
+  %nop8369 = alloca i1, i1 0
+  %nop8370 = alloca i1, i1 0
+  %nop8371 = alloca i1, i1 0
+  %nop8372 = alloca i1, i1 0
+  %nop8373 = alloca i1, i1 0
+  %nop8374 = alloca i1, i1 0
+  %nop8375 = alloca i1, i1 0
+  %nop8376 = alloca i1, i1 0
+  %nop8377 = alloca i1, i1 0
+  %nop8378 = alloca i1, i1 0
+  %nop8379 = alloca i1, i1 0
+  %nop8380 = alloca i1, i1 0
+  %nop8381 = alloca i1, i1 0
+  %nop8382 = alloca i1, i1 0
+  %nop8383 = alloca i1, i1 0
+  %nop8384 = alloca i1, i1 0
+  %nop8385 = alloca i1, i1 0
+  %nop8386 = alloca i1, i1 0
+  %nop8387 = alloca i1, i1 0
+  %nop8388 = alloca i1, i1 0
+  %nop8389 = alloca i1, i1 0
+  %nop8390 = alloca i1, i1 0
+  %nop8391 = alloca i1, i1 0
+  %nop8392 = alloca i1, i1 0
+  %nop8393 = alloca i1, i1 0
+  %nop8394 = alloca i1, i1 0
+  %nop8395 = alloca i1, i1 0
+  %nop8396 = alloca i1, i1 0
+  %nop8397 = alloca i1, i1 0
+  %nop8398 = alloca i1, i1 0
+  %nop8399 = alloca i1, i1 0
+  %nop8400 = alloca i1, i1 0
+  %nop8401 = alloca i1, i1 0
+  %nop8402 = alloca i1, i1 0
+  %nop8403 = alloca i1, i1 0
+  %nop8404 = alloca i1, i1 0
+  %nop8405 = alloca i1, i1 0
+  %nop8406 = alloca i1, i1 0
+  %nop8407 = alloca i1, i1 0
+  %nop8408 = alloca i1, i1 0
+  %nop8409 = alloca i1, i1 0
+  %nop8410 = alloca i1, i1 0
+  %nop8411 = alloca i1, i1 0
+  %nop8412 = alloca i1, i1 0
+  %nop8413 = alloca i1, i1 0
+  %nop8414 = alloca i1, i1 0
+  %nop8415 = alloca i1, i1 0
+  %nop8416 = alloca i1, i1 0
+  %nop8417 = alloca i1, i1 0
+  %nop8418 = alloca i1, i1 0
+  %nop8419 = alloca i1, i1 0
+  %nop8420 = alloca i1, i1 0
+  %nop8421 = alloca i1, i1 0
+  %nop8422 = alloca i1, i1 0
+  %nop8423 = alloca i1, i1 0
+  %nop8424 = alloca i1, i1 0
+  %nop8425 = alloca i1, i1 0
+  %nop8426 = alloca i1, i1 0
+  %nop8427 = alloca i1, i1 0
+  %nop8428 = alloca i1, i1 0
+  %nop8429 = alloca i1, i1 0
+  %nop8430 = alloca i1, i1 0
+  %nop8431 = alloca i1, i1 0
+  %nop8432 = alloca i1, i1 0
+  %nop8433 = alloca i1, i1 0
+  %nop8434 = alloca i1, i1 0
+  %nop8435 = alloca i1, i1 0
+  %nop8436 = alloca i1, i1 0
+  %nop8437 = alloca i1, i1 0
+  %nop8438 = alloca i1, i1 0
+  %nop8439 = alloca i1, i1 0
+  %nop8440 = alloca i1, i1 0
+  %nop8441 = alloca i1, i1 0
+  %nop8442 = alloca i1, i1 0
+  %nop8443 = alloca i1, i1 0
+  %nop8444 = alloca i1, i1 0
+  %nop8445 = alloca i1, i1 0
+  %nop8446 = alloca i1, i1 0
+  %nop8447 = alloca i1, i1 0
+  %nop8448 = alloca i1, i1 0
+  %nop8449 = alloca i1, i1 0
+  %nop8450 = alloca i1, i1 0
+  %nop8451 = alloca i1, i1 0
+  %nop8452 = alloca i1, i1 0
+  %nop8453 = alloca i1, i1 0
+  %nop8454 = alloca i1, i1 0
+  %nop8455 = alloca i1, i1 0
+  %nop8456 = alloca i1, i1 0
+  %nop8457 = alloca i1, i1 0
+  %nop8458 = alloca i1, i1 0
+  %nop8459 = alloca i1, i1 0
+  %nop8460 = alloca i1, i1 0
+  %nop8461 = alloca i1, i1 0
+  %nop8462 = alloca i1, i1 0
+  %nop8463 = alloca i1, i1 0
+  %nop8464 = alloca i1, i1 0
+  %nop8465 = alloca i1, i1 0
+  %nop8466 = alloca i1, i1 0
+  %nop8467 = alloca i1, i1 0
+  %nop8468 = alloca i1, i1 0
+  %nop8469 = alloca i1, i1 0
+  %nop8470 = alloca i1, i1 0
+  %nop8471 = alloca i1, i1 0
+  %nop8472 = alloca i1, i1 0
+  %nop8473 = alloca i1, i1 0
+  %nop8474 = alloca i1, i1 0
+  %nop8475 = alloca i1, i1 0
+  %nop8476 = alloca i1, i1 0
+  %nop8477 = alloca i1, i1 0
+  %nop8478 = alloca i1, i1 0
+  %nop8479 = alloca i1, i1 0
+  %nop8480 = alloca i1, i1 0
+  %nop8481 = alloca i1, i1 0
+  %nop8482 = alloca i1, i1 0
+  %nop8483 = alloca i1, i1 0
+  %nop8484 = alloca i1, i1 0
+  %nop8485 = alloca i1, i1 0
+  %nop8486 = alloca i1, i1 0
+  %nop8487 = alloca i1, i1 0
+  %nop8488 = alloca i1, i1 0
+  %nop8489 = alloca i1, i1 0
+  %nop8490 = alloca i1, i1 0
+  %nop8491 = alloca i1, i1 0
+  %nop8492 = alloca i1, i1 0
+  %nop8493 = alloca i1, i1 0
+  %nop8494 = alloca i1, i1 0
+  %nop8495 = alloca i1, i1 0
+  %nop8496 = alloca i1, i1 0
+  %nop8497 = alloca i1, i1 0
+  %nop8498 = alloca i1, i1 0
+  %nop8499 = alloca i1, i1 0
+  %nop8500 = alloca i1, i1 0
+  %nop8501 = alloca i1, i1 0
+  %nop8502 = alloca i1, i1 0
+  %nop8503 = alloca i1, i1 0
+  %nop8504 = alloca i1, i1 0
+  %nop8505 = alloca i1, i1 0
+  %nop8506 = alloca i1, i1 0
+  %nop8507 = alloca i1, i1 0
+  %nop8508 = alloca i1, i1 0
+  %nop8509 = alloca i1, i1 0
+  %nop8510 = alloca i1, i1 0
+  %nop8511 = alloca i1, i1 0
+  %nop8512 = alloca i1, i1 0
+  %nop8513 = alloca i1, i1 0
+  %nop8514 = alloca i1, i1 0
+  %nop8515 = alloca i1, i1 0
+  %nop8516 = alloca i1, i1 0
+  %nop8517 = alloca i1, i1 0
+  %nop8518 = alloca i1, i1 0
+  %nop8519 = alloca i1, i1 0
+  %nop8520 = alloca i1, i1 0
+  %nop8521 = alloca i1, i1 0
+  %nop8522 = alloca i1, i1 0
+  %nop8523 = alloca i1, i1 0
+  %nop8524 = alloca i1, i1 0
+  %nop8525 = alloca i1, i1 0
+  %nop8526 = alloca i1, i1 0
+  %nop8527 = alloca i1, i1 0
+  %nop8528 = alloca i1, i1 0
+  %nop8529 = alloca i1, i1 0
+  %nop8530 = alloca i1, i1 0
+  %nop8531 = alloca i1, i1 0
+  %nop8532 = alloca i1, i1 0
+  %nop8533 = alloca i1, i1 0
+  %nop8534 = alloca i1, i1 0
+  %nop8535 = alloca i1, i1 0
+  %nop8536 = alloca i1, i1 0
+  %nop8537 = alloca i1, i1 0
+  %nop8538 = alloca i1, i1 0
+  %nop8539 = alloca i1, i1 0
+  %nop8540 = alloca i1, i1 0
+  %nop8541 = alloca i1, i1 0
+  %nop8542 = alloca i1, i1 0
+  %nop8543 = alloca i1, i1 0
+  %nop8544 = alloca i1, i1 0
+  %nop8545 = alloca i1, i1 0
+  %nop8546 = alloca i1, i1 0
+  %nop8547 = alloca i1, i1 0
+  %nop8548 = alloca i1, i1 0
+  %nop8549 = alloca i1, i1 0
+  %nop8550 = alloca i1, i1 0
+  %nop8551 = alloca i1, i1 0
+  %nop8552 = alloca i1, i1 0
+  %nop8553 = alloca i1, i1 0
+  %nop8554 = alloca i1, i1 0
+  %nop8555 = alloca i1, i1 0
+  %nop8556 = alloca i1, i1 0
+  %nop8557 = alloca i1, i1 0
+  %nop8558 = alloca i1, i1 0
+  %nop8559 = alloca i1, i1 0
+  %nop8560 = alloca i1, i1 0
+  %nop8561 = alloca i1, i1 0
+  %nop8562 = alloca i1, i1 0
+  %nop8563 = alloca i1, i1 0
+  %nop8564 = alloca i1, i1 0
+  %nop8565 = alloca i1, i1 0
+  %nop8566 = alloca i1, i1 0
+  %nop8567 = alloca i1, i1 0
+  %nop8568 = alloca i1, i1 0
+  %nop8569 = alloca i1, i1 0
+  %nop8570 = alloca i1, i1 0
+  %nop8571 = alloca i1, i1 0
+  %nop8572 = alloca i1, i1 0
+  %nop8573 = alloca i1, i1 0
+  %nop8574 = alloca i1, i1 0
+  %nop8575 = alloca i1, i1 0
+  %nop8576 = alloca i1, i1 0
+  %nop8577 = alloca i1, i1 0
+  %nop8578 = alloca i1, i1 0
+  %nop8579 = alloca i1, i1 0
+  %nop8580 = alloca i1, i1 0
+  %nop8581 = alloca i1, i1 0
+  %nop8582 = alloca i1, i1 0
+  %nop8583 = alloca i1, i1 0
+  %nop8584 = alloca i1, i1 0
+  %nop8585 = alloca i1, i1 0
+  %nop8586 = alloca i1, i1 0
+  %nop8587 = alloca i1, i1 0
+  %nop8588 = alloca i1, i1 0
+  %nop8589 = alloca i1, i1 0
+  %nop8590 = alloca i1, i1 0
+  %nop8591 = alloca i1, i1 0
+  %nop8592 = alloca i1, i1 0
+  %nop8593 = alloca i1, i1 0
+  %nop8594 = alloca i1, i1 0
+  %nop8595 = alloca i1, i1 0
+  %nop8596 = alloca i1, i1 0
+  %nop8597 = alloca i1, i1 0
+  %nop8598 = alloca i1, i1 0
+  %nop8599 = alloca i1, i1 0
+  %nop8600 = alloca i1, i1 0
+  %nop8601 = alloca i1, i1 0
+  %nop8602 = alloca i1, i1 0
+  %nop8603 = alloca i1, i1 0
+  %nop8604 = alloca i1, i1 0
+  %nop8605 = alloca i1, i1 0
+  %nop8606 = alloca i1, i1 0
+  %nop8607 = alloca i1, i1 0
+  %nop8608 = alloca i1, i1 0
+  %nop8609 = alloca i1, i1 0
+  %nop8610 = alloca i1, i1 0
+  %nop8611 = alloca i1, i1 0
+  %nop8612 = alloca i1, i1 0
+  %nop8613 = alloca i1, i1 0
+  %nop8614 = alloca i1, i1 0
+  %nop8615 = alloca i1, i1 0
+  %nop8616 = alloca i1, i1 0
+  %nop8617 = alloca i1, i1 0
+  %nop8618 = alloca i1, i1 0
+  %nop8619 = alloca i1, i1 0
+  %nop8620 = alloca i1, i1 0
+  %nop8621 = alloca i1, i1 0
+  %nop8622 = alloca i1, i1 0
+  %nop8623 = alloca i1, i1 0
+  %nop8624 = alloca i1, i1 0
+  %nop8625 = alloca i1, i1 0
+  %nop8626 = alloca i1, i1 0
+  %nop8627 = alloca i1, i1 0
+  %nop8628 = alloca i1, i1 0
+  %nop8629 = alloca i1, i1 0
+  %nop8630 = alloca i1, i1 0
+  %nop8631 = alloca i1, i1 0
+  %nop8632 = alloca i1, i1 0
+  %nop8633 = alloca i1, i1 0
+  %nop8634 = alloca i1, i1 0
+  %nop8635 = alloca i1, i1 0
+  %nop8636 = alloca i1, i1 0
+  %nop8637 = alloca i1, i1 0
+  %nop8638 = alloca i1, i1 0
+  %nop8639 = alloca i1, i1 0
+  %nop8640 = alloca i1, i1 0
+  %nop8641 = alloca i1, i1 0
+  %nop8642 = alloca i1, i1 0
+  %nop8643 = alloca i1, i1 0
+  %nop8644 = alloca i1, i1 0
+  %nop8645 = alloca i1, i1 0
+  %nop8646 = alloca i1, i1 0
+  %nop8647 = alloca i1, i1 0
+  %nop8648 = alloca i1, i1 0
+  %nop8649 = alloca i1, i1 0
+  %nop8650 = alloca i1, i1 0
+  %nop8651 = alloca i1, i1 0
+  %nop8652 = alloca i1, i1 0
+  %nop8653 = alloca i1, i1 0
+  %nop8654 = alloca i1, i1 0
+  %nop8655 = alloca i1, i1 0
+  %nop8656 = alloca i1, i1 0
+  %nop8657 = alloca i1, i1 0
+  %nop8658 = alloca i1, i1 0
+  %nop8659 = alloca i1, i1 0
+  %nop8660 = alloca i1, i1 0
+  %nop8661 = alloca i1, i1 0
+  %nop8662 = alloca i1, i1 0
+  %nop8663 = alloca i1, i1 0
+  %nop8664 = alloca i1, i1 0
+  %nop8665 = alloca i1, i1 0
+  %nop8666 = alloca i1, i1 0
+  %nop8667 = alloca i1, i1 0
+  %nop8668 = alloca i1, i1 0
+  %nop8669 = alloca i1, i1 0
+  %nop8670 = alloca i1, i1 0
+  %nop8671 = alloca i1, i1 0
+  %nop8672 = alloca i1, i1 0
+  %nop8673 = alloca i1, i1 0
+  %nop8674 = alloca i1, i1 0
+  %nop8675 = alloca i1, i1 0
+  %nop8676 = alloca i1, i1 0
+  %nop8677 = alloca i1, i1 0
+  %nop8678 = alloca i1, i1 0
+  %nop8679 = alloca i1, i1 0
+  %nop8680 = alloca i1, i1 0
+  %nop8681 = alloca i1, i1 0
+  %nop8682 = alloca i1, i1 0
+  %nop8683 = alloca i1, i1 0
+  %nop8684 = alloca i1, i1 0
+  %nop8685 = alloca i1, i1 0
+  %nop8686 = alloca i1, i1 0
+  %nop8687 = alloca i1, i1 0
+  %nop8688 = alloca i1, i1 0
+  %nop8689 = alloca i1, i1 0
+  %nop8690 = alloca i1, i1 0
+  %nop8691 = alloca i1, i1 0
+  %nop8692 = alloca i1, i1 0
+  %nop8693 = alloca i1, i1 0
+  %nop8694 = alloca i1, i1 0
+  %nop8695 = alloca i1, i1 0
+  %nop8696 = alloca i1, i1 0
+  %nop8697 = alloca i1, i1 0
+  %nop8698 = alloca i1, i1 0
+  %nop8699 = alloca i1, i1 0
+  %nop8700 = alloca i1, i1 0
+  %nop8701 = alloca i1, i1 0
+  %nop8702 = alloca i1, i1 0
+  %nop8703 = alloca i1, i1 0
+  %nop8704 = alloca i1, i1 0
+  %nop8705 = alloca i1, i1 0
+  %nop8706 = alloca i1, i1 0
+  %nop8707 = alloca i1, i1 0
+  %nop8708 = alloca i1, i1 0
+  %nop8709 = alloca i1, i1 0
+  %nop8710 = alloca i1, i1 0
+  %nop8711 = alloca i1, i1 0
+  %nop8712 = alloca i1, i1 0
+  %nop8713 = alloca i1, i1 0
+  %nop8714 = alloca i1, i1 0
+  %nop8715 = alloca i1, i1 0
+  %nop8716 = alloca i1, i1 0
+  %nop8717 = alloca i1, i1 0
+  %nop8718 = alloca i1, i1 0
+  %nop8719 = alloca i1, i1 0
+  %nop8720 = alloca i1, i1 0
+  %nop8721 = alloca i1, i1 0
+  %nop8722 = alloca i1, i1 0
+  %nop8723 = alloca i1, i1 0
+  %nop8724 = alloca i1, i1 0
+  %nop8725 = alloca i1, i1 0
+  %nop8726 = alloca i1, i1 0
+  %nop8727 = alloca i1, i1 0
+  %nop8728 = alloca i1, i1 0
+  %nop8729 = alloca i1, i1 0
+  %nop8730 = alloca i1, i1 0
+  %nop8731 = alloca i1, i1 0
+  %nop8732 = alloca i1, i1 0
+  %nop8733 = alloca i1, i1 0
+  %nop8734 = alloca i1, i1 0
+  %nop8735 = alloca i1, i1 0
+  %nop8736 = alloca i1, i1 0
+  %nop8737 = alloca i1, i1 0
+  %nop8738 = alloca i1, i1 0
+  %nop8739 = alloca i1, i1 0
+  %nop8740 = alloca i1, i1 0
+  %nop8741 = alloca i1, i1 0
+  %nop8742 = alloca i1, i1 0
+  %nop8743 = alloca i1, i1 0
+  %nop8744 = alloca i1, i1 0
+  %nop8745 = alloca i1, i1 0
+  %nop8746 = alloca i1, i1 0
+  %nop8747 = alloca i1, i1 0
+  %nop8748 = alloca i1, i1 0
+  %nop8749 = alloca i1, i1 0
+  %nop8750 = alloca i1, i1 0
+  %nop8751 = alloca i1, i1 0
+  %nop8752 = alloca i1, i1 0
+  %nop8753 = alloca i1, i1 0
+  %nop8754 = alloca i1, i1 0
+  %nop8755 = alloca i1, i1 0
+  %nop8756 = alloca i1, i1 0
+  %nop8757 = alloca i1, i1 0
+  %nop8758 = alloca i1, i1 0
+  %nop8759 = alloca i1, i1 0
+  %nop8760 = alloca i1, i1 0
+  %nop8761 = alloca i1, i1 0
+  %nop8762 = alloca i1, i1 0
+  %nop8763 = alloca i1, i1 0
+  %nop8764 = alloca i1, i1 0
+  %nop8765 = alloca i1, i1 0
+  %nop8766 = alloca i1, i1 0
+  %nop8767 = alloca i1, i1 0
+  %nop8768 = alloca i1, i1 0
+  %nop8769 = alloca i1, i1 0
+  %nop8770 = alloca i1, i1 0
+  %nop8771 = alloca i1, i1 0
+  %nop8772 = alloca i1, i1 0
+  %nop8773 = alloca i1, i1 0
+  %nop8774 = alloca i1, i1 0
+  %nop8775 = alloca i1, i1 0
+  %nop8776 = alloca i1, i1 0
+  %nop8777 = alloca i1, i1 0
+  %nop8778 = alloca i1, i1 0
+  %nop8779 = alloca i1, i1 0
+  %nop8780 = alloca i1, i1 0
+  %nop8781 = alloca i1, i1 0
+  %nop8782 = alloca i1, i1 0
+  %nop8783 = alloca i1, i1 0
+  %nop8784 = alloca i1, i1 0
+  %nop8785 = alloca i1, i1 0
+  %nop8786 = alloca i1, i1 0
+  %nop8787 = alloca i1, i1 0
+  %nop8788 = alloca i1, i1 0
+  %nop8789 = alloca i1, i1 0
+  %nop8790 = alloca i1, i1 0
+  %nop8791 = alloca i1, i1 0
+  %nop8792 = alloca i1, i1 0
+  %nop8793 = alloca i1, i1 0
+  %nop8794 = alloca i1, i1 0
+  %nop8795 = alloca i1, i1 0
+  %nop8796 = alloca i1, i1 0
+  %nop8797 = alloca i1, i1 0
+  %nop8798 = alloca i1, i1 0
+  %nop8799 = alloca i1, i1 0
+  %nop8800 = alloca i1, i1 0
+  %nop8801 = alloca i1, i1 0
+  %nop8802 = alloca i1, i1 0
+  %nop8803 = alloca i1, i1 0
+  %nop8804 = alloca i1, i1 0
+  %nop8805 = alloca i1, i1 0
+  %nop8806 = alloca i1, i1 0
+  %nop8807 = alloca i1, i1 0
+  %nop8808 = alloca i1, i1 0
+  %nop8809 = alloca i1, i1 0
+  %nop8810 = alloca i1, i1 0
+  %nop8811 = alloca i1, i1 0
+  %nop8812 = alloca i1, i1 0
+  %nop8813 = alloca i1, i1 0
+  %nop8814 = alloca i1, i1 0
+  %nop8815 = alloca i1, i1 0
+  %nop8816 = alloca i1, i1 0
+  %nop8817 = alloca i1, i1 0
+  %nop8818 = alloca i1, i1 0
+  %nop8819 = alloca i1, i1 0
+  %nop8820 = alloca i1, i1 0
+  %nop8821 = alloca i1, i1 0
+  %nop8822 = alloca i1, i1 0
+  %nop8823 = alloca i1, i1 0
+  %nop8824 = alloca i1, i1 0
+  %nop8825 = alloca i1, i1 0
+  %nop8826 = alloca i1, i1 0
+  %nop8827 = alloca i1, i1 0
+  %nop8828 = alloca i1, i1 0
+  %nop8829 = alloca i1, i1 0
+  %nop8830 = alloca i1, i1 0
+  %nop8831 = alloca i1, i1 0
+  %nop8832 = alloca i1, i1 0
+  %nop8833 = alloca i1, i1 0
+  %nop8834 = alloca i1, i1 0
+  %nop8835 = alloca i1, i1 0
+  %nop8836 = alloca i1, i1 0
+  %nop8837 = alloca i1, i1 0
+  %nop8838 = alloca i1, i1 0
+  %nop8839 = alloca i1, i1 0
+  %nop8840 = alloca i1, i1 0
+  %nop8841 = alloca i1, i1 0
+  %nop8842 = alloca i1, i1 0
+  %nop8843 = alloca i1, i1 0
+  %nop8844 = alloca i1, i1 0
+  %nop8845 = alloca i1, i1 0
+  %nop8846 = alloca i1, i1 0
+  %nop8847 = alloca i1, i1 0
+  %nop8848 = alloca i1, i1 0
+  %nop8849 = alloca i1, i1 0
+  %nop8850 = alloca i1, i1 0
+  %nop8851 = alloca i1, i1 0
+  %nop8852 = alloca i1, i1 0
+  %nop8853 = alloca i1, i1 0
+  %nop8854 = alloca i1, i1 0
+  %nop8855 = alloca i1, i1 0
+  %nop8856 = alloca i1, i1 0
+  %nop8857 = alloca i1, i1 0
+  %nop8858 = alloca i1, i1 0
+  %nop8859 = alloca i1, i1 0
+  %nop8860 = alloca i1, i1 0
+  %nop8861 = alloca i1, i1 0
+  %nop8862 = alloca i1, i1 0
+  %nop8863 = alloca i1, i1 0
+  %nop8864 = alloca i1, i1 0
+  %nop8865 = alloca i1, i1 0
+  %nop8866 = alloca i1, i1 0
+  %nop8867 = alloca i1, i1 0
+  %nop8868 = alloca i1, i1 0
+  %nop8869 = alloca i1, i1 0
+  %nop8870 = alloca i1, i1 0
+  %nop8871 = alloca i1, i1 0
+  %nop8872 = alloca i1, i1 0
+  %nop8873 = alloca i1, i1 0
+  %nop8874 = alloca i1, i1 0
+  %nop8875 = alloca i1, i1 0
+  %nop8876 = alloca i1, i1 0
+  %nop8877 = alloca i1, i1 0
+  %nop8878 = alloca i1, i1 0
+  %nop8879 = alloca i1, i1 0
+  %nop8880 = alloca i1, i1 0
+  %nop8881 = alloca i1, i1 0
+  %nop8882 = alloca i1, i1 0
+  %nop8883 = alloca i1, i1 0
+  %nop8884 = alloca i1, i1 0
+  %nop8885 = alloca i1, i1 0
+  %nop8886 = alloca i1, i1 0
+  %nop8887 = alloca i1, i1 0
+  %nop8888 = alloca i1, i1 0
+  %nop8889 = alloca i1, i1 0
+  %nop8890 = alloca i1, i1 0
+  %nop8891 = alloca i1, i1 0
+  %nop8892 = alloca i1, i1 0
+  %nop8893 = alloca i1, i1 0
+  %nop8894 = alloca i1, i1 0
+  %nop8895 = alloca i1, i1 0
+  %nop8896 = alloca i1, i1 0
+  %nop8897 = alloca i1, i1 0
+  %nop8898 = alloca i1, i1 0
+  %nop8899 = alloca i1, i1 0
+  %nop8900 = alloca i1, i1 0
+  %nop8901 = alloca i1, i1 0
+  %nop8902 = alloca i1, i1 0
+  %nop8903 = alloca i1, i1 0
+  %nop8904 = alloca i1, i1 0
+  %nop8905 = alloca i1, i1 0
+  %nop8906 = alloca i1, i1 0
+  %nop8907 = alloca i1, i1 0
+  %nop8908 = alloca i1, i1 0
+  %nop8909 = alloca i1, i1 0
+  %nop8910 = alloca i1, i1 0
+  %nop8911 = alloca i1, i1 0
+  %nop8912 = alloca i1, i1 0
+  %nop8913 = alloca i1, i1 0
+  %nop8914 = alloca i1, i1 0
+  %nop8915 = alloca i1, i1 0
+  %nop8916 = alloca i1, i1 0
+  %nop8917 = alloca i1, i1 0
+  %nop8918 = alloca i1, i1 0
+  %nop8919 = alloca i1, i1 0
+  %nop8920 = alloca i1, i1 0
+  %nop8921 = alloca i1, i1 0
+  %nop8922 = alloca i1, i1 0
+  %nop8923 = alloca i1, i1 0
+  %nop8924 = alloca i1, i1 0
+  %nop8925 = alloca i1, i1 0
+  %nop8926 = alloca i1, i1 0
+  %nop8927 = alloca i1, i1 0
+  %nop8928 = alloca i1, i1 0
+  %nop8929 = alloca i1, i1 0
+  %nop8930 = alloca i1, i1 0
+  %nop8931 = alloca i1, i1 0
+  %nop8932 = alloca i1, i1 0
+  %nop8933 = alloca i1, i1 0
+  %nop8934 = alloca i1, i1 0
+  %nop8935 = alloca i1, i1 0
+  %nop8936 = alloca i1, i1 0
+  %nop8937 = alloca i1, i1 0
+  %nop8938 = alloca i1, i1 0
+  %nop8939 = alloca i1, i1 0
+  %nop8940 = alloca i1, i1 0
+  %nop8941 = alloca i1, i1 0
+  %nop8942 = alloca i1, i1 0
+  %nop8943 = alloca i1, i1 0
+  %nop8944 = alloca i1, i1 0
+  %nop8945 = alloca i1, i1 0
+  %nop8946 = alloca i1, i1 0
+  %nop8947 = alloca i1, i1 0
+  %nop8948 = alloca i1, i1 0
+  %nop8949 = alloca i1, i1 0
+  %nop8950 = alloca i1, i1 0
+  %nop8951 = alloca i1, i1 0
+  %nop8952 = alloca i1, i1 0
+  %nop8953 = alloca i1, i1 0
+  %nop8954 = alloca i1, i1 0
+  %nop8955 = alloca i1, i1 0
+  %nop8956 = alloca i1, i1 0
+  %nop8957 = alloca i1, i1 0
+  %nop8958 = alloca i1, i1 0
+  %nop8959 = alloca i1, i1 0
+  %nop8960 = alloca i1, i1 0
+  %nop8961 = alloca i1, i1 0
+  %nop8962 = alloca i1, i1 0
+  %nop8963 = alloca i1, i1 0
+  %nop8964 = alloca i1, i1 0
+  %nop8965 = alloca i1, i1 0
+  %nop8966 = alloca i1, i1 0
+  %nop8967 = alloca i1, i1 0
+  %nop8968 = alloca i1, i1 0
+  %nop8969 = alloca i1, i1 0
+  %nop8970 = alloca i1, i1 0
+  %nop8971 = alloca i1, i1 0
+  %nop8972 = alloca i1, i1 0
+  %nop8973 = alloca i1, i1 0
+  %nop8974 = alloca i1, i1 0
+  %nop8975 = alloca i1, i1 0
+  %nop8976 = alloca i1, i1 0
+  %nop8977 = alloca i1, i1 0
+  %nop8978 = alloca i1, i1 0
+  %nop8979 = alloca i1, i1 0
+  %nop8980 = alloca i1, i1 0
+  %nop8981 = alloca i1, i1 0
+  %nop8982 = alloca i1, i1 0
+  %nop8983 = alloca i1, i1 0
+  %nop8984 = alloca i1, i1 0
+  %nop8985 = alloca i1, i1 0
+  %nop8986 = alloca i1, i1 0
+  %nop8987 = alloca i1, i1 0
+  %nop8988 = alloca i1, i1 0
+  %nop8989 = alloca i1, i1 0
+  %nop8990 = alloca i1, i1 0
+  %nop8991 = alloca i1, i1 0
+  %nop8992 = alloca i1, i1 0
+  %nop8993 = alloca i1, i1 0
+  %nop8994 = alloca i1, i1 0
+  %nop8995 = alloca i1, i1 0
+  %nop8996 = alloca i1, i1 0
+  %nop8997 = alloca i1, i1 0
+  %nop8998 = alloca i1, i1 0
+  %nop8999 = alloca i1, i1 0
+  %nop9000 = alloca i1, i1 0
+  %nop9001 = alloca i1, i1 0
+  %nop9002 = alloca i1, i1 0
+  %nop9003 = alloca i1, i1 0
+  %nop9004 = alloca i1, i1 0
+  %nop9005 = alloca i1, i1 0
+  %nop9006 = alloca i1, i1 0
+  %nop9007 = alloca i1, i1 0
+  %nop9008 = alloca i1, i1 0
+  %nop9009 = alloca i1, i1 0
+  %nop9010 = alloca i1, i1 0
+  %nop9011 = alloca i1, i1 0
+  %nop9012 = alloca i1, i1 0
+  %nop9013 = alloca i1, i1 0
+  %nop9014 = alloca i1, i1 0
+  %nop9015 = alloca i1, i1 0
+  %nop9016 = alloca i1, i1 0
+  %nop9017 = alloca i1, i1 0
+  %nop9018 = alloca i1, i1 0
+  %nop9019 = alloca i1, i1 0
+  %nop9020 = alloca i1, i1 0
+  %nop9021 = alloca i1, i1 0
+  %nop9022 = alloca i1, i1 0
+  %nop9023 = alloca i1, i1 0
+  %nop9024 = alloca i1, i1 0
+  %nop9025 = alloca i1, i1 0
+  %nop9026 = alloca i1, i1 0
+  %nop9027 = alloca i1, i1 0
+  %nop9028 = alloca i1, i1 0
+  %nop9029 = alloca i1, i1 0
+  %nop9030 = alloca i1, i1 0
+  %nop9031 = alloca i1, i1 0
+  %nop9032 = alloca i1, i1 0
+  %nop9033 = alloca i1, i1 0
+  %nop9034 = alloca i1, i1 0
+  %nop9035 = alloca i1, i1 0
+  %nop9036 = alloca i1, i1 0
+  %nop9037 = alloca i1, i1 0
+  %nop9038 = alloca i1, i1 0
+  %nop9039 = alloca i1, i1 0
+  %nop9040 = alloca i1, i1 0
+  %nop9041 = alloca i1, i1 0
+  %nop9042 = alloca i1, i1 0
+  %nop9043 = alloca i1, i1 0
+  %nop9044 = alloca i1, i1 0
+  %nop9045 = alloca i1, i1 0
+  %nop9046 = alloca i1, i1 0
+  %nop9047 = alloca i1, i1 0
+  %nop9048 = alloca i1, i1 0
+  %nop9049 = alloca i1, i1 0
+  %nop9050 = alloca i1, i1 0
+  %nop9051 = alloca i1, i1 0
+  %nop9052 = alloca i1, i1 0
+  %nop9053 = alloca i1, i1 0
+  %nop9054 = alloca i1, i1 0
+  %nop9055 = alloca i1, i1 0
+  %nop9056 = alloca i1, i1 0
+  %nop9057 = alloca i1, i1 0
+  %nop9058 = alloca i1, i1 0
+  %nop9059 = alloca i1, i1 0
+  %nop9060 = alloca i1, i1 0
+  %nop9061 = alloca i1, i1 0
+  %nop9062 = alloca i1, i1 0
+  %nop9063 = alloca i1, i1 0
+  %nop9064 = alloca i1, i1 0
+  %nop9065 = alloca i1, i1 0
+  %nop9066 = alloca i1, i1 0
+  %nop9067 = alloca i1, i1 0
+  %nop9068 = alloca i1, i1 0
+  %nop9069 = alloca i1, i1 0
+  %nop9070 = alloca i1, i1 0
+  %nop9071 = alloca i1, i1 0
+  %nop9072 = alloca i1, i1 0
+  %nop9073 = alloca i1, i1 0
+  %nop9074 = alloca i1, i1 0
+  %nop9075 = alloca i1, i1 0
+  %nop9076 = alloca i1, i1 0
+  %nop9077 = alloca i1, i1 0
+  %nop9078 = alloca i1, i1 0
+  %nop9079 = alloca i1, i1 0
+  %nop9080 = alloca i1, i1 0
+  %nop9081 = alloca i1, i1 0
+  %nop9082 = alloca i1, i1 0
+  %nop9083 = alloca i1, i1 0
+  %nop9084 = alloca i1, i1 0
+  %nop9085 = alloca i1, i1 0
+  %nop9086 = alloca i1, i1 0
+  %nop9087 = alloca i1, i1 0
+  %nop9088 = alloca i1, i1 0
+  %nop9089 = alloca i1, i1 0
+  %nop9090 = alloca i1, i1 0
+  %nop9091 = alloca i1, i1 0
+  %nop9092 = alloca i1, i1 0
+  %nop9093 = alloca i1, i1 0
+  %nop9094 = alloca i1, i1 0
+  %nop9095 = alloca i1, i1 0
+  %nop9096 = alloca i1, i1 0
+  %nop9097 = alloca i1, i1 0
+  %nop9098 = alloca i1, i1 0
+  %nop9099 = alloca i1, i1 0
+  %nop9100 = alloca i1, i1 0
+  %nop9101 = alloca i1, i1 0
+  %nop9102 = alloca i1, i1 0
+  %nop9103 = alloca i1, i1 0
+  %nop9104 = alloca i1, i1 0
+  %nop9105 = alloca i1, i1 0
+  %nop9106 = alloca i1, i1 0
+  %nop9107 = alloca i1, i1 0
+  %nop9108 = alloca i1, i1 0
+  %nop9109 = alloca i1, i1 0
+  %nop9110 = alloca i1, i1 0
+  %nop9111 = alloca i1, i1 0
+  %nop9112 = alloca i1, i1 0
+  %nop9113 = alloca i1, i1 0
+  %nop9114 = alloca i1, i1 0
+  %nop9115 = alloca i1, i1 0
+  %nop9116 = alloca i1, i1 0
+  %nop9117 = alloca i1, i1 0
+  %nop9118 = alloca i1, i1 0
+  %nop9119 = alloca i1, i1 0
+  %nop9120 = alloca i1, i1 0
+  %nop9121 = alloca i1, i1 0
+  %nop9122 = alloca i1, i1 0
+  %nop9123 = alloca i1, i1 0
+  %nop9124 = alloca i1, i1 0
+  %nop9125 = alloca i1, i1 0
+  %nop9126 = alloca i1, i1 0
+  %nop9127 = alloca i1, i1 0
+  %nop9128 = alloca i1, i1 0
+  %nop9129 = alloca i1, i1 0
+  %nop9130 = alloca i1, i1 0
+  %nop9131 = alloca i1, i1 0
+  %nop9132 = alloca i1, i1 0
+  %nop9133 = alloca i1, i1 0
+  %nop9134 = alloca i1, i1 0
+  %nop9135 = alloca i1, i1 0
+  %nop9136 = alloca i1, i1 0
+  %nop9137 = alloca i1, i1 0
+  %nop9138 = alloca i1, i1 0
+  %nop9139 = alloca i1, i1 0
+  %nop9140 = alloca i1, i1 0
+  %nop9141 = alloca i1, i1 0
+  %nop9142 = alloca i1, i1 0
+  %nop9143 = alloca i1, i1 0
+  %nop9144 = alloca i1, i1 0
+  %nop9145 = alloca i1, i1 0
+  %nop9146 = alloca i1, i1 0
+  %nop9147 = alloca i1, i1 0
+  %nop9148 = alloca i1, i1 0
+  %nop9149 = alloca i1, i1 0
+  %nop9150 = alloca i1, i1 0
+  %nop9151 = alloca i1, i1 0
+  %nop9152 = alloca i1, i1 0
+  %nop9153 = alloca i1, i1 0
+  %nop9154 = alloca i1, i1 0
+  %nop9155 = alloca i1, i1 0
+  %nop9156 = alloca i1, i1 0
+  %nop9157 = alloca i1, i1 0
+  %nop9158 = alloca i1, i1 0
+  %nop9159 = alloca i1, i1 0
+  %nop9160 = alloca i1, i1 0
+  %nop9161 = alloca i1, i1 0
+  %nop9162 = alloca i1, i1 0
+  %nop9163 = alloca i1, i1 0
+  %nop9164 = alloca i1, i1 0
+  %nop9165 = alloca i1, i1 0
+  %nop9166 = alloca i1, i1 0
+  %nop9167 = alloca i1, i1 0
+  %nop9168 = alloca i1, i1 0
+  %nop9169 = alloca i1, i1 0
+  %nop9170 = alloca i1, i1 0
+  %nop9171 = alloca i1, i1 0
+  %nop9172 = alloca i1, i1 0
+  %nop9173 = alloca i1, i1 0
+  %nop9174 = alloca i1, i1 0
+  %nop9175 = alloca i1, i1 0
+  %nop9176 = alloca i1, i1 0
+  %nop9177 = alloca i1, i1 0
+  %nop9178 = alloca i1, i1 0
+  %nop9179 = alloca i1, i1 0
+  %nop9180 = alloca i1, i1 0
+  %nop9181 = alloca i1, i1 0
+  %nop9182 = alloca i1, i1 0
+  %nop9183 = alloca i1, i1 0
+  %nop9184 = alloca i1, i1 0
+  %nop9185 = alloca i1, i1 0
+  %nop9186 = alloca i1, i1 0
+  %nop9187 = alloca i1, i1 0
+  %nop9188 = alloca i1, i1 0
+  %nop9189 = alloca i1, i1 0
+  %nop9190 = alloca i1, i1 0
+  %nop9191 = alloca i1, i1 0
+  %nop9192 = alloca i1, i1 0
+  %nop9193 = alloca i1, i1 0
+  %nop9194 = alloca i1, i1 0
+  %nop9195 = alloca i1, i1 0
+  %nop9196 = alloca i1, i1 0
+  %nop9197 = alloca i1, i1 0
+  %nop9198 = alloca i1, i1 0
+  %nop9199 = alloca i1, i1 0
+  %nop9200 = alloca i1, i1 0
+  %nop9201 = alloca i1, i1 0
+  %nop9202 = alloca i1, i1 0
+  %nop9203 = alloca i1, i1 0
+  %nop9204 = alloca i1, i1 0
+  %nop9205 = alloca i1, i1 0
+  %nop9206 = alloca i1, i1 0
+  %nop9207 = alloca i1, i1 0
+  %nop9208 = alloca i1, i1 0
+  %nop9209 = alloca i1, i1 0
+  %nop9210 = alloca i1, i1 0
+  %nop9211 = alloca i1, i1 0
+  %nop9212 = alloca i1, i1 0
+  %nop9213 = alloca i1, i1 0
+  %nop9214 = alloca i1, i1 0
+  %nop9215 = alloca i1, i1 0
+  %nop9216 = alloca i1, i1 0
+  %nop9217 = alloca i1, i1 0
+  %nop9218 = alloca i1, i1 0
+  %nop9219 = alloca i1, i1 0
+  %nop9220 = alloca i1, i1 0
+  %nop9221 = alloca i1, i1 0
+  %nop9222 = alloca i1, i1 0
+  %nop9223 = alloca i1, i1 0
+  %nop9224 = alloca i1, i1 0
+  %nop9225 = alloca i1, i1 0
+  %nop9226 = alloca i1, i1 0
+  %nop9227 = alloca i1, i1 0
+  %nop9228 = alloca i1, i1 0
+  %nop9229 = alloca i1, i1 0
+  %nop9230 = alloca i1, i1 0
+  %nop9231 = alloca i1, i1 0
+  %nop9232 = alloca i1, i1 0
+  %nop9233 = alloca i1, i1 0
+  %nop9234 = alloca i1, i1 0
+  %nop9235 = alloca i1, i1 0
+  %nop9236 = alloca i1, i1 0
+  %nop9237 = alloca i1, i1 0
+  %nop9238 = alloca i1, i1 0
+  %nop9239 = alloca i1, i1 0
+  %nop9240 = alloca i1, i1 0
+  %nop9241 = alloca i1, i1 0
+  %nop9242 = alloca i1, i1 0
+  %nop9243 = alloca i1, i1 0
+  %nop9244 = alloca i1, i1 0
+  %nop9245 = alloca i1, i1 0
+  %nop9246 = alloca i1, i1 0
+  %nop9247 = alloca i1, i1 0
+  %nop9248 = alloca i1, i1 0
+  %nop9249 = alloca i1, i1 0
+  %nop9250 = alloca i1, i1 0
+  %nop9251 = alloca i1, i1 0
+  %nop9252 = alloca i1, i1 0
+  %nop9253 = alloca i1, i1 0
+  %nop9254 = alloca i1, i1 0
+  %nop9255 = alloca i1, i1 0
+  %nop9256 = alloca i1, i1 0
+  %nop9257 = alloca i1, i1 0
+  %nop9258 = alloca i1, i1 0
+  %nop9259 = alloca i1, i1 0
+  %nop9260 = alloca i1, i1 0
+  %nop9261 = alloca i1, i1 0
+  %nop9262 = alloca i1, i1 0
+  %nop9263 = alloca i1, i1 0
+  %nop9264 = alloca i1, i1 0
+  %nop9265 = alloca i1, i1 0
+  %nop9266 = alloca i1, i1 0
+  %nop9267 = alloca i1, i1 0
+  %nop9268 = alloca i1, i1 0
+  %nop9269 = alloca i1, i1 0
+  %nop9270 = alloca i1, i1 0
+  %nop9271 = alloca i1, i1 0
+  %nop9272 = alloca i1, i1 0
+  %nop9273 = alloca i1, i1 0
+  %nop9274 = alloca i1, i1 0
+  %nop9275 = alloca i1, i1 0
+  %nop9276 = alloca i1, i1 0
+  %nop9277 = alloca i1, i1 0
+  %nop9278 = alloca i1, i1 0
+  %nop9279 = alloca i1, i1 0
+  %nop9280 = alloca i1, i1 0
+  %nop9281 = alloca i1, i1 0
+  %nop9282 = alloca i1, i1 0
+  %nop9283 = alloca i1, i1 0
+  %nop9284 = alloca i1, i1 0
+  %nop9285 = alloca i1, i1 0
+  %nop9286 = alloca i1, i1 0
+  %nop9287 = alloca i1, i1 0
+  %nop9288 = alloca i1, i1 0
+  %nop9289 = alloca i1, i1 0
+  %nop9290 = alloca i1, i1 0
+  %nop9291 = alloca i1, i1 0
+  %nop9292 = alloca i1, i1 0
+  %nop9293 = alloca i1, i1 0
+  %nop9294 = alloca i1, i1 0
+  %nop9295 = alloca i1, i1 0
+  %nop9296 = alloca i1, i1 0
+  %nop9297 = alloca i1, i1 0
+  %nop9298 = alloca i1, i1 0
+  %nop9299 = alloca i1, i1 0
+  %nop9300 = alloca i1, i1 0
+  %nop9301 = alloca i1, i1 0
+  %nop9302 = alloca i1, i1 0
+  %nop9303 = alloca i1, i1 0
+  %nop9304 = alloca i1, i1 0
+  %nop9305 = alloca i1, i1 0
+  %nop9306 = alloca i1, i1 0
+  %nop9307 = alloca i1, i1 0
+  %nop9308 = alloca i1, i1 0
+  %nop9309 = alloca i1, i1 0
+  %nop9310 = alloca i1, i1 0
+  %nop9311 = alloca i1, i1 0
+  %nop9312 = alloca i1, i1 0
+  %nop9313 = alloca i1, i1 0
+  %nop9314 = alloca i1, i1 0
+  %nop9315 = alloca i1, i1 0
+  %nop9316 = alloca i1, i1 0
+  %nop9317 = alloca i1, i1 0
+  %nop9318 = alloca i1, i1 0
+  %nop9319 = alloca i1, i1 0
+  %nop9320 = alloca i1, i1 0
+  %nop9321 = alloca i1, i1 0
+  %nop9322 = alloca i1, i1 0
+  %nop9323 = alloca i1, i1 0
+  %nop9324 = alloca i1, i1 0
+  %nop9325 = alloca i1, i1 0
+  %nop9326 = alloca i1, i1 0
+  %nop9327 = alloca i1, i1 0
+  %nop9328 = alloca i1, i1 0
+  %nop9329 = alloca i1, i1 0
+  %nop9330 = alloca i1, i1 0
+  %nop9331 = alloca i1, i1 0
+  %nop9332 = alloca i1, i1 0
+  %nop9333 = alloca i1, i1 0
+  %nop9334 = alloca i1, i1 0
+  %nop9335 = alloca i1, i1 0
+  %nop9336 = alloca i1, i1 0
+  %nop9337 = alloca i1, i1 0
+  %nop9338 = alloca i1, i1 0
+  %nop9339 = alloca i1, i1 0
+  %nop9340 = alloca i1, i1 0
+  %nop9341 = alloca i1, i1 0
+  %nop9342 = alloca i1, i1 0
+  %nop9343 = alloca i1, i1 0
+  %nop9344 = alloca i1, i1 0
+  %nop9345 = alloca i1, i1 0
+  %nop9346 = alloca i1, i1 0
+  %nop9347 = alloca i1, i1 0
+  %nop9348 = alloca i1, i1 0
+  %nop9349 = alloca i1, i1 0
+  %nop9350 = alloca i1, i1 0
+  %nop9351 = alloca i1, i1 0
+  %nop9352 = alloca i1, i1 0
+  %nop9353 = alloca i1, i1 0
+  %nop9354 = alloca i1, i1 0
+  %nop9355 = alloca i1, i1 0
+  %nop9356 = alloca i1, i1 0
+  %nop9357 = alloca i1, i1 0
+  %nop9358 = alloca i1, i1 0
+  %nop9359 = alloca i1, i1 0
+  %nop9360 = alloca i1, i1 0
+  %nop9361 = alloca i1, i1 0
+  %nop9362 = alloca i1, i1 0
+  %nop9363 = alloca i1, i1 0
+  %nop9364 = alloca i1, i1 0
+  %nop9365 = alloca i1, i1 0
+  %nop9366 = alloca i1, i1 0
+  %nop9367 = alloca i1, i1 0
+  %nop9368 = alloca i1, i1 0
+  %nop9369 = alloca i1, i1 0
+  %nop9370 = alloca i1, i1 0
+  %nop9371 = alloca i1, i1 0
+  %nop9372 = alloca i1, i1 0
+  %nop9373 = alloca i1, i1 0
+  %nop9374 = alloca i1, i1 0
+  %nop9375 = alloca i1, i1 0
+  %nop9376 = alloca i1, i1 0
+  %nop9377 = alloca i1, i1 0
+  %nop9378 = alloca i1, i1 0
+  %nop9379 = alloca i1, i1 0
+  %nop9380 = alloca i1, i1 0
+  %nop9381 = alloca i1, i1 0
+  %nop9382 = alloca i1, i1 0
+  %nop9383 = alloca i1, i1 0
+  %nop9384 = alloca i1, i1 0
+  %nop9385 = alloca i1, i1 0
+  %nop9386 = alloca i1, i1 0
+  %nop9387 = alloca i1, i1 0
+  %nop9388 = alloca i1, i1 0
+  %nop9389 = alloca i1, i1 0
+  %nop9390 = alloca i1, i1 0
+  %nop9391 = alloca i1, i1 0
+  %nop9392 = alloca i1, i1 0
+  %nop9393 = alloca i1, i1 0
+  %nop9394 = alloca i1, i1 0
+  %nop9395 = alloca i1, i1 0
+  %nop9396 = alloca i1, i1 0
+  %nop9397 = alloca i1, i1 0
+  %nop9398 = alloca i1, i1 0
+  %nop9399 = alloca i1, i1 0
+  %nop9400 = alloca i1, i1 0
+  %nop9401 = alloca i1, i1 0
+  %nop9402 = alloca i1, i1 0
+  %nop9403 = alloca i1, i1 0
+  %nop9404 = alloca i1, i1 0
+  %nop9405 = alloca i1, i1 0
+  %nop9406 = alloca i1, i1 0
+  %nop9407 = alloca i1, i1 0
+  %nop9408 = alloca i1, i1 0
+  %nop9409 = alloca i1, i1 0
+  %nop9410 = alloca i1, i1 0
+  %nop9411 = alloca i1, i1 0
+  %nop9412 = alloca i1, i1 0
+  %nop9413 = alloca i1, i1 0
+  %nop9414 = alloca i1, i1 0
+  %nop9415 = alloca i1, i1 0
+  %nop9416 = alloca i1, i1 0
+  %nop9417 = alloca i1, i1 0
+  %nop9418 = alloca i1, i1 0
+  %nop9419 = alloca i1, i1 0
+  %nop9420 = alloca i1, i1 0
+  %nop9421 = alloca i1, i1 0
+  %nop9422 = alloca i1, i1 0
+  %nop9423 = alloca i1, i1 0
+  %nop9424 = alloca i1, i1 0
+  %nop9425 = alloca i1, i1 0
+  %nop9426 = alloca i1, i1 0
+  %nop9427 = alloca i1, i1 0
+  %nop9428 = alloca i1, i1 0
+  %nop9429 = alloca i1, i1 0
+  %nop9430 = alloca i1, i1 0
+  %nop9431 = alloca i1, i1 0
+  %nop9432 = alloca i1, i1 0
+  %nop9433 = alloca i1, i1 0
+  %nop9434 = alloca i1, i1 0
+  %nop9435 = alloca i1, i1 0
+  %nop9436 = alloca i1, i1 0
+  %nop9437 = alloca i1, i1 0
+  %nop9438 = alloca i1, i1 0
+  %nop9439 = alloca i1, i1 0
+  %nop9440 = alloca i1, i1 0
+  %nop9441 = alloca i1, i1 0
+  %nop9442 = alloca i1, i1 0
+  %nop9443 = alloca i1, i1 0
+  %nop9444 = alloca i1, i1 0
+  %nop9445 = alloca i1, i1 0
+  %nop9446 = alloca i1, i1 0
+  %nop9447 = alloca i1, i1 0
+  %nop9448 = alloca i1, i1 0
+  %nop9449 = alloca i1, i1 0
+  %nop9450 = alloca i1, i1 0
+  %nop9451 = alloca i1, i1 0
+  %nop9452 = alloca i1, i1 0
+  %nop9453 = alloca i1, i1 0
+  %nop9454 = alloca i1, i1 0
+  %nop9455 = alloca i1, i1 0
+  %nop9456 = alloca i1, i1 0
+  %nop9457 = alloca i1, i1 0
+  %nop9458 = alloca i1, i1 0
+  %nop9459 = alloca i1, i1 0
+  %nop9460 = alloca i1, i1 0
+  %nop9461 = alloca i1, i1 0
+  %nop9462 = alloca i1, i1 0
+  %nop9463 = alloca i1, i1 0
+  %nop9464 = alloca i1, i1 0
+  %nop9465 = alloca i1, i1 0
+  %nop9466 = alloca i1, i1 0
+  %nop9467 = alloca i1, i1 0
+  %nop9468 = alloca i1, i1 0
+  %nop9469 = alloca i1, i1 0
+  %nop9470 = alloca i1, i1 0
+  %nop9471 = alloca i1, i1 0
+  %nop9472 = alloca i1, i1 0
+  %nop9473 = alloca i1, i1 0
+  %nop9474 = alloca i1, i1 0
+  %nop9475 = alloca i1, i1 0
+  %nop9476 = alloca i1, i1 0
+  %nop9477 = alloca i1, i1 0
+  %nop9478 = alloca i1, i1 0
+  %nop9479 = alloca i1, i1 0
+  %nop9480 = alloca i1, i1 0
+  %nop9481 = alloca i1, i1 0
+  %nop9482 = alloca i1, i1 0
+  %nop9483 = alloca i1, i1 0
+  %nop9484 = alloca i1, i1 0
+  %nop9485 = alloca i1, i1 0
+  %nop9486 = alloca i1, i1 0
+  %nop9487 = alloca i1, i1 0
+  %nop9488 = alloca i1, i1 0
+  %nop9489 = alloca i1, i1 0
+  %nop9490 = alloca i1, i1 0
+  %nop9491 = alloca i1, i1 0
+  %nop9492 = alloca i1, i1 0
+  %nop9493 = alloca i1, i1 0
+  %nop9494 = alloca i1, i1 0
+  %nop9495 = alloca i1, i1 0
+  %nop9496 = alloca i1, i1 0
+  %nop9497 = alloca i1, i1 0
+  %nop9498 = alloca i1, i1 0
+  %nop9499 = alloca i1, i1 0
+  %nop9500 = alloca i1, i1 0
+  %nop9501 = alloca i1, i1 0
+  %nop9502 = alloca i1, i1 0
+  %nop9503 = alloca i1, i1 0
+  %nop9504 = alloca i1, i1 0
+  %nop9505 = alloca i1, i1 0
+  %nop9506 = alloca i1, i1 0
+  %nop9507 = alloca i1, i1 0
+  %nop9508 = alloca i1, i1 0
+  %nop9509 = alloca i1, i1 0
+  %nop9510 = alloca i1, i1 0
+  %nop9511 = alloca i1, i1 0
+  %nop9512 = alloca i1, i1 0
+  %nop9513 = alloca i1, i1 0
+  %nop9514 = alloca i1, i1 0
+  %nop9515 = alloca i1, i1 0
+  %nop9516 = alloca i1, i1 0
+  %nop9517 = alloca i1, i1 0
+  %nop9518 = alloca i1, i1 0
+  %nop9519 = alloca i1, i1 0
+  %nop9520 = alloca i1, i1 0
+  %nop9521 = alloca i1, i1 0
+  %nop9522 = alloca i1, i1 0
+  %nop9523 = alloca i1, i1 0
+  %nop9524 = alloca i1, i1 0
+  %nop9525 = alloca i1, i1 0
+  %nop9526 = alloca i1, i1 0
+  %nop9527 = alloca i1, i1 0
+  %nop9528 = alloca i1, i1 0
+  %nop9529 = alloca i1, i1 0
+  %nop9530 = alloca i1, i1 0
+  %nop9531 = alloca i1, i1 0
+  %nop9532 = alloca i1, i1 0
+  %nop9533 = alloca i1, i1 0
+  %nop9534 = alloca i1, i1 0
+  %nop9535 = alloca i1, i1 0
+  %nop9536 = alloca i1, i1 0
+  %nop9537 = alloca i1, i1 0
+  %nop9538 = alloca i1, i1 0
+  %nop9539 = alloca i1, i1 0
+  %nop9540 = alloca i1, i1 0
+  %nop9541 = alloca i1, i1 0
+  %nop9542 = alloca i1, i1 0
+  %nop9543 = alloca i1, i1 0
+  %nop9544 = alloca i1, i1 0
+  %nop9545 = alloca i1, i1 0
+  %nop9546 = alloca i1, i1 0
+  %nop9547 = alloca i1, i1 0
+  %nop9548 = alloca i1, i1 0
+  %nop9549 = alloca i1, i1 0
+  %nop9550 = alloca i1, i1 0
+  %nop9551 = alloca i1, i1 0
+  %nop9552 = alloca i1, i1 0
+  %nop9553 = alloca i1, i1 0
+  %nop9554 = alloca i1, i1 0
+  %nop9555 = alloca i1, i1 0
+  %nop9556 = alloca i1, i1 0
+  %nop9557 = alloca i1, i1 0
+  %nop9558 = alloca i1, i1 0
+  %nop9559 = alloca i1, i1 0
+  %nop9560 = alloca i1, i1 0
+  %nop9561 = alloca i1, i1 0
+  %nop9562 = alloca i1, i1 0
+  %nop9563 = alloca i1, i1 0
+  %nop9564 = alloca i1, i1 0
+  %nop9565 = alloca i1, i1 0
+  %nop9566 = alloca i1, i1 0
+  %nop9567 = alloca i1, i1 0
+  %nop9568 = alloca i1, i1 0
+  %nop9569 = alloca i1, i1 0
+  %nop9570 = alloca i1, i1 0
+  %nop9571 = alloca i1, i1 0
+  %nop9572 = alloca i1, i1 0
+  %nop9573 = alloca i1, i1 0
+  %nop9574 = alloca i1, i1 0
+  %nop9575 = alloca i1, i1 0
+  %nop9576 = alloca i1, i1 0
+  %nop9577 = alloca i1, i1 0
+  %nop9578 = alloca i1, i1 0
+  %nop9579 = alloca i1, i1 0
+  %nop9580 = alloca i1, i1 0
+  %nop9581 = alloca i1, i1 0
+  %nop9582 = alloca i1, i1 0
+  %nop9583 = alloca i1, i1 0
+  %nop9584 = alloca i1, i1 0
+  %nop9585 = alloca i1, i1 0
+  %nop9586 = alloca i1, i1 0
+  %nop9587 = alloca i1, i1 0
+  %nop9588 = alloca i1, i1 0
+  %nop9589 = alloca i1, i1 0
+  %nop9590 = alloca i1, i1 0
+  %nop9591 = alloca i1, i1 0
+  %nop9592 = alloca i1, i1 0
+  %nop9593 = alloca i1, i1 0
+  %nop9594 = alloca i1, i1 0
+  %nop9595 = alloca i1, i1 0
+  %nop9596 = alloca i1, i1 0
+  %nop9597 = alloca i1, i1 0
+  %nop9598 = alloca i1, i1 0
+  %nop9599 = alloca i1, i1 0
+  %nop9600 = alloca i1, i1 0
+  %nop9601 = alloca i1, i1 0
+  %nop9602 = alloca i1, i1 0
+  %nop9603 = alloca i1, i1 0
+  %nop9604 = alloca i1, i1 0
+  %nop9605 = alloca i1, i1 0
+  %nop9606 = alloca i1, i1 0
+  %nop9607 = alloca i1, i1 0
+  %nop9608 = alloca i1, i1 0
+  %nop9609 = alloca i1, i1 0
+  %nop9610 = alloca i1, i1 0
+  %nop9611 = alloca i1, i1 0
+  %nop9612 = alloca i1, i1 0
+  %nop9613 = alloca i1, i1 0
+  %nop9614 = alloca i1, i1 0
+  %nop9615 = alloca i1, i1 0
+  %nop9616 = alloca i1, i1 0
+  %nop9617 = alloca i1, i1 0
+  %nop9618 = alloca i1, i1 0
+  %nop9619 = alloca i1, i1 0
+  %nop9620 = alloca i1, i1 0
+  %nop9621 = alloca i1, i1 0
+  %nop9622 = alloca i1, i1 0
+  %nop9623 = alloca i1, i1 0
+  %nop9624 = alloca i1, i1 0
+  %nop9625 = alloca i1, i1 0
+  %nop9626 = alloca i1, i1 0
+  %nop9627 = alloca i1, i1 0
+  %nop9628 = alloca i1, i1 0
+  %nop9629 = alloca i1, i1 0
+  %nop9630 = alloca i1, i1 0
+  %nop9631 = alloca i1, i1 0
+  %nop9632 = alloca i1, i1 0
+  %nop9633 = alloca i1, i1 0
+  %nop9634 = alloca i1, i1 0
+  %nop9635 = alloca i1, i1 0
+  %nop9636 = alloca i1, i1 0
+  %nop9637 = alloca i1, i1 0
+  %nop9638 = alloca i1, i1 0
+  %nop9639 = alloca i1, i1 0
+  %nop9640 = alloca i1, i1 0
+  %nop9641 = alloca i1, i1 0
+  %nop9642 = alloca i1, i1 0
+  %nop9643 = alloca i1, i1 0
+  %nop9644 = alloca i1, i1 0
+  %nop9645 = alloca i1, i1 0
+  %nop9646 = alloca i1, i1 0
+  %nop9647 = alloca i1, i1 0
+  %nop9648 = alloca i1, i1 0
+  %nop9649 = alloca i1, i1 0
+  %nop9650 = alloca i1, i1 0
+  %nop9651 = alloca i1, i1 0
+  %nop9652 = alloca i1, i1 0
+  %nop9653 = alloca i1, i1 0
+  %nop9654 = alloca i1, i1 0
+  %nop9655 = alloca i1, i1 0
+  %nop9656 = alloca i1, i1 0
+  %nop9657 = alloca i1, i1 0
+  %nop9658 = alloca i1, i1 0
+  %nop9659 = alloca i1, i1 0
+  %nop9660 = alloca i1, i1 0
+  %nop9661 = alloca i1, i1 0
+  %nop9662 = alloca i1, i1 0
+  %nop9663 = alloca i1, i1 0
+  %nop9664 = alloca i1, i1 0
+  %nop9665 = alloca i1, i1 0
+  %nop9666 = alloca i1, i1 0
+  %nop9667 = alloca i1, i1 0
+  %nop9668 = alloca i1, i1 0
+  %nop9669 = alloca i1, i1 0
+  %nop9670 = alloca i1, i1 0
+  %nop9671 = alloca i1, i1 0
+  %nop9672 = alloca i1, i1 0
+  %nop9673 = alloca i1, i1 0
+  %nop9674 = alloca i1, i1 0
+  %nop9675 = alloca i1, i1 0
+  %nop9676 = alloca i1, i1 0
+  %nop9677 = alloca i1, i1 0
+  %nop9678 = alloca i1, i1 0
+  %nop9679 = alloca i1, i1 0
+  %nop9680 = alloca i1, i1 0
+  %nop9681 = alloca i1, i1 0
+  %nop9682 = alloca i1, i1 0
+  %nop9683 = alloca i1, i1 0
+  %nop9684 = alloca i1, i1 0
+  %nop9685 = alloca i1, i1 0
+  %nop9686 = alloca i1, i1 0
+  %nop9687 = alloca i1, i1 0
+  %nop9688 = alloca i1, i1 0
+  %nop9689 = alloca i1, i1 0
+  %nop9690 = alloca i1, i1 0
+  %nop9691 = alloca i1, i1 0
+  %nop9692 = alloca i1, i1 0
+  %nop9693 = alloca i1, i1 0
+  %nop9694 = alloca i1, i1 0
+  %nop9695 = alloca i1, i1 0
+  %nop9696 = alloca i1, i1 0
+  %nop9697 = alloca i1, i1 0
+  %nop9698 = alloca i1, i1 0
+  %nop9699 = alloca i1, i1 0
+  %nop9700 = alloca i1, i1 0
+  %nop9701 = alloca i1, i1 0
+  %nop9702 = alloca i1, i1 0
+  %nop9703 = alloca i1, i1 0
+  %nop9704 = alloca i1, i1 0
+  %nop9705 = alloca i1, i1 0
+  %nop9706 = alloca i1, i1 0
+  %nop9707 = alloca i1, i1 0
+  %nop9708 = alloca i1, i1 0
+  %nop9709 = alloca i1, i1 0
+  %nop9710 = alloca i1, i1 0
+  %nop9711 = alloca i1, i1 0
+  %nop9712 = alloca i1, i1 0
+  %nop9713 = alloca i1, i1 0
+  %nop9714 = alloca i1, i1 0
+  %nop9715 = alloca i1, i1 0
+  %nop9716 = alloca i1, i1 0
+  %nop9717 = alloca i1, i1 0
+  %nop9718 = alloca i1, i1 0
+  %nop9719 = alloca i1, i1 0
+  %nop9720 = alloca i1, i1 0
+  %nop9721 = alloca i1, i1 0
+  %nop9722 = alloca i1, i1 0
+  %nop9723 = alloca i1, i1 0
+  %nop9724 = alloca i1, i1 0
+  %nop9725 = alloca i1, i1 0
+  %nop9726 = alloca i1, i1 0
+  %nop9727 = alloca i1, i1 0
+  %nop9728 = alloca i1, i1 0
+  %nop9729 = alloca i1, i1 0
+  %nop9730 = alloca i1, i1 0
+  %nop9731 = alloca i1, i1 0
+  %nop9732 = alloca i1, i1 0
+  %nop9733 = alloca i1, i1 0
+  %nop9734 = alloca i1, i1 0
+  %nop9735 = alloca i1, i1 0
+  %nop9736 = alloca i1, i1 0
+  %nop9737 = alloca i1, i1 0
+  %nop9738 = alloca i1, i1 0
+  %nop9739 = alloca i1, i1 0
+  %nop9740 = alloca i1, i1 0
+  %nop9741 = alloca i1, i1 0
+  %nop9742 = alloca i1, i1 0
+  %nop9743 = alloca i1, i1 0
+  %nop9744 = alloca i1, i1 0
+  %nop9745 = alloca i1, i1 0
+  %nop9746 = alloca i1, i1 0
+  %nop9747 = alloca i1, i1 0
+  %nop9748 = alloca i1, i1 0
+  %nop9749 = alloca i1, i1 0
+  %nop9750 = alloca i1, i1 0
+  %nop9751 = alloca i1, i1 0
+  %nop9752 = alloca i1, i1 0
+  %nop9753 = alloca i1, i1 0
+  %nop9754 = alloca i1, i1 0
+  %nop9755 = alloca i1, i1 0
+  %nop9756 = alloca i1, i1 0
+  %nop9757 = alloca i1, i1 0
+  %nop9758 = alloca i1, i1 0
+  %nop9759 = alloca i1, i1 0
+  %nop9760 = alloca i1, i1 0
+  %nop9761 = alloca i1, i1 0
+  %nop9762 = alloca i1, i1 0
+  %nop9763 = alloca i1, i1 0
+  %nop9764 = alloca i1, i1 0
+  %nop9765 = alloca i1, i1 0
+  %nop9766 = alloca i1, i1 0
+  %nop9767 = alloca i1, i1 0
+  %nop9768 = alloca i1, i1 0
+  %nop9769 = alloca i1, i1 0
+  %nop9770 = alloca i1, i1 0
+  %nop9771 = alloca i1, i1 0
+  %nop9772 = alloca i1, i1 0
+  %nop9773 = alloca i1, i1 0
+  %nop9774 = alloca i1, i1 0
+  %nop9775 = alloca i1, i1 0
+  %nop9776 = alloca i1, i1 0
+  %nop9777 = alloca i1, i1 0
+  %nop9778 = alloca i1, i1 0
+  %nop9779 = alloca i1, i1 0
+  %nop9780 = alloca i1, i1 0
+  %nop9781 = alloca i1, i1 0
+  %nop9782 = alloca i1, i1 0
+  %nop9783 = alloca i1, i1 0
+  %nop9784 = alloca i1, i1 0
+  %nop9785 = alloca i1, i1 0
+  %nop9786 = alloca i1, i1 0
+  %nop9787 = alloca i1, i1 0
+  %nop9788 = alloca i1, i1 0
+  %nop9789 = alloca i1, i1 0
+  %nop9790 = alloca i1, i1 0
+  %nop9791 = alloca i1, i1 0
+  %nop9792 = alloca i1, i1 0
+  %nop9793 = alloca i1, i1 0
+  %nop9794 = alloca i1, i1 0
+  %nop9795 = alloca i1, i1 0
+  %nop9796 = alloca i1, i1 0
+  %nop9797 = alloca i1, i1 0
+  %nop9798 = alloca i1, i1 0
+  %nop9799 = alloca i1, i1 0
+  %nop9800 = alloca i1, i1 0
+  %nop9801 = alloca i1, i1 0
+  %nop9802 = alloca i1, i1 0
+  %nop9803 = alloca i1, i1 0
+  %nop9804 = alloca i1, i1 0
+  %nop9805 = alloca i1, i1 0
+  %nop9806 = alloca i1, i1 0
+  %nop9807 = alloca i1, i1 0
+  %nop9808 = alloca i1, i1 0
+  %nop9809 = alloca i1, i1 0
+  %nop9810 = alloca i1, i1 0
+  %nop9811 = alloca i1, i1 0
+  %nop9812 = alloca i1, i1 0
+  %nop9813 = alloca i1, i1 0
+  %nop9814 = alloca i1, i1 0
+  %nop9815 = alloca i1, i1 0
+  %nop9816 = alloca i1, i1 0
+  %nop9817 = alloca i1, i1 0
+  %nop9818 = alloca i1, i1 0
+  %nop9819 = alloca i1, i1 0
+  %nop9820 = alloca i1, i1 0
+  %nop9821 = alloca i1, i1 0
+  %nop9822 = alloca i1, i1 0
+  %nop9823 = alloca i1, i1 0
+  %nop9824 = alloca i1, i1 0
+  %nop9825 = alloca i1, i1 0
+  %nop9826 = alloca i1, i1 0
+  %nop9827 = alloca i1, i1 0
+  %nop9828 = alloca i1, i1 0
+  %nop9829 = alloca i1, i1 0
+  %nop9830 = alloca i1, i1 0
+  %nop9831 = alloca i1, i1 0
+  %nop9832 = alloca i1, i1 0
+  %nop9833 = alloca i1, i1 0
+  %nop9834 = alloca i1, i1 0
+  %nop9835 = alloca i1, i1 0
+  %nop9836 = alloca i1, i1 0
+  %nop9837 = alloca i1, i1 0
+  %nop9838 = alloca i1, i1 0
+  %nop9839 = alloca i1, i1 0
+  %nop9840 = alloca i1, i1 0
+  %nop9841 = alloca i1, i1 0
+  %nop9842 = alloca i1, i1 0
+  %nop9843 = alloca i1, i1 0
+  %nop9844 = alloca i1, i1 0
+  %nop9845 = alloca i1, i1 0
+  %nop9846 = alloca i1, i1 0
+  %nop9847 = alloca i1, i1 0
+  %nop9848 = alloca i1, i1 0
+  %nop9849 = alloca i1, i1 0
+  %nop9850 = alloca i1, i1 0
+  %nop9851 = alloca i1, i1 0
+  %nop9852 = alloca i1, i1 0
+  %nop9853 = alloca i1, i1 0
+  %nop9854 = alloca i1, i1 0
+  %nop9855 = alloca i1, i1 0
+  %nop9856 = alloca i1, i1 0
+  %nop9857 = alloca i1, i1 0
+  %nop9858 = alloca i1, i1 0
+  %nop9859 = alloca i1, i1 0
+  %nop9860 = alloca i1, i1 0
+  %nop9861 = alloca i1, i1 0
+  %nop9862 = alloca i1, i1 0
+  %nop9863 = alloca i1, i1 0
+  %nop9864 = alloca i1, i1 0
+  %nop9865 = alloca i1, i1 0
+  %nop9866 = alloca i1, i1 0
+  %nop9867 = alloca i1, i1 0
+  %nop9868 = alloca i1, i1 0
+  %nop9869 = alloca i1, i1 0
+  %nop9870 = alloca i1, i1 0
+  %nop9871 = alloca i1, i1 0
+  %nop9872 = alloca i1, i1 0
+  %nop9873 = alloca i1, i1 0
+  %nop9874 = alloca i1, i1 0
+  %nop9875 = alloca i1, i1 0
+  %nop9876 = alloca i1, i1 0
+  %nop9877 = alloca i1, i1 0
+  %nop9878 = alloca i1, i1 0
+  %nop9879 = alloca i1, i1 0
+  %nop9880 = alloca i1, i1 0
+  %nop9881 = alloca i1, i1 0
+  %nop9882 = alloca i1, i1 0
+  %nop9883 = alloca i1, i1 0
+  %nop9884 = alloca i1, i1 0
+  %nop9885 = alloca i1, i1 0
+  %nop9886 = alloca i1, i1 0
+  %nop9887 = alloca i1, i1 0
+  %nop9888 = alloca i1, i1 0
+  %nop9889 = alloca i1, i1 0
+  %nop9890 = alloca i1, i1 0
+  %nop9891 = alloca i1, i1 0
+  %nop9892 = alloca i1, i1 0
+  %nop9893 = alloca i1, i1 0
+  %nop9894 = alloca i1, i1 0
+  %nop9895 = alloca i1, i1 0
+  %nop9896 = alloca i1, i1 0
+  %nop9897 = alloca i1, i1 0
+  %nop9898 = alloca i1, i1 0
+  %nop9899 = alloca i1, i1 0
+  %nop9900 = alloca i1, i1 0
+  %nop9901 = alloca i1, i1 0
+  %nop9902 = alloca i1, i1 0
+  %nop9903 = alloca i1, i1 0
+  %nop9904 = alloca i1, i1 0
+  %nop9905 = alloca i1, i1 0
+  %nop9906 = alloca i1, i1 0
+  %nop9907 = alloca i1, i1 0
+  %nop9908 = alloca i1, i1 0
+  %nop9909 = alloca i1, i1 0
+  %nop9910 = alloca i1, i1 0
+  %nop9911 = alloca i1, i1 0
+  %nop9912 = alloca i1, i1 0
+  %nop9913 = alloca i1, i1 0
+  %nop9914 = alloca i1, i1 0
+  %nop9915 = alloca i1, i1 0
+  %nop9916 = alloca i1, i1 0
+  %nop9917 = alloca i1, i1 0
+  %nop9918 = alloca i1, i1 0
+  %nop9919 = alloca i1, i1 0
+  %nop9920 = alloca i1, i1 0
+  %nop9921 = alloca i1, i1 0
+  %nop9922 = alloca i1, i1 0
+  %nop9923 = alloca i1, i1 0
+  %nop9924 = alloca i1, i1 0
+  %nop9925 = alloca i1, i1 0
+  %nop9926 = alloca i1, i1 0
+  %nop9927 = alloca i1, i1 0
+  %nop9928 = alloca i1, i1 0
+  %nop9929 = alloca i1, i1 0
+  %nop9930 = alloca i1, i1 0
+  %nop9931 = alloca i1, i1 0
+  %nop9932 = alloca i1, i1 0
+  %nop9933 = alloca i1, i1 0
+  %nop9934 = alloca i1, i1 0
+  %nop9935 = alloca i1, i1 0
+  %nop9936 = alloca i1, i1 0
+  %nop9937 = alloca i1, i1 0
+  %nop9938 = alloca i1, i1 0
+  %nop9939 = alloca i1, i1 0
+  %nop9940 = alloca i1, i1 0
+  %nop9941 = alloca i1, i1 0
+  %nop9942 = alloca i1, i1 0
+  %nop9943 = alloca i1, i1 0
+  %nop9944 = alloca i1, i1 0
+  %nop9945 = alloca i1, i1 0
+  %nop9946 = alloca i1, i1 0
+  %nop9947 = alloca i1, i1 0
+  %nop9948 = alloca i1, i1 0
+  %nop9949 = alloca i1, i1 0
+  %nop9950 = alloca i1, i1 0
+  %nop9951 = alloca i1, i1 0
+  %nop9952 = alloca i1, i1 0
+  %nop9953 = alloca i1, i1 0
+  %nop9954 = alloca i1, i1 0
+  %nop9955 = alloca i1, i1 0
+  %nop9956 = alloca i1, i1 0
+  %nop9957 = alloca i1, i1 0
+  %nop9958 = alloca i1, i1 0
+  %nop9959 = alloca i1, i1 0
+  %nop9960 = alloca i1, i1 0
+  %nop9961 = alloca i1, i1 0
+  %nop9962 = alloca i1, i1 0
+  %nop9963 = alloca i1, i1 0
+  %nop9964 = alloca i1, i1 0
+  %nop9965 = alloca i1, i1 0
+  %nop9966 = alloca i1, i1 0
+  %nop9967 = alloca i1, i1 0
+  %nop9968 = alloca i1, i1 0
+  %nop9969 = alloca i1, i1 0
+  %nop9970 = alloca i1, i1 0
+  %nop9971 = alloca i1, i1 0
+  %nop9972 = alloca i1, i1 0
+  %nop9973 = alloca i1, i1 0
+  %nop9974 = alloca i1, i1 0
+  %nop9975 = alloca i1, i1 0
+  %nop9976 = alloca i1, i1 0
+  %nop9977 = alloca i1, i1 0
+  %nop9978 = alloca i1, i1 0
+  %nop9979 = alloca i1, i1 0
+  %nop9980 = alloca i1, i1 0
+  %nop9981 = alloca i1, i1 0
+  %nop9982 = alloca i1, i1 0
+  %nop9983 = alloca i1, i1 0
+  %nop9984 = alloca i1, i1 0
+  %nop9985 = alloca i1, i1 0
+  %nop9986 = alloca i1, i1 0
+  %nop9987 = alloca i1, i1 0
+  %nop9988 = alloca i1, i1 0
+  %nop9989 = alloca i1, i1 0
+  %nop9990 = alloca i1, i1 0
+  %nop9991 = alloca i1, i1 0
+  %nop9992 = alloca i1, i1 0
+  %nop9993 = alloca i1, i1 0
+  %nop9994 = alloca i1, i1 0
+  %nop9995 = alloca i1, i1 0
+  %nop9996 = alloca i1, i1 0
+  %nop9997 = alloca i1, i1 0
+  %nop9998 = alloca i1, i1 0
+  %nop9999 = alloca i1, i1 0
+  %nop10000 = alloca i1, i1 0
+  %nop10001 = alloca i1, i1 0
+  %nop10002 = alloca i1, i1 0
+  %nop10003 = alloca i1, i1 0
+  %nop10004 = alloca i1, i1 0
+  %nop10005 = alloca i1, i1 0
+  %nop10006 = alloca i1, i1 0
+  %nop10007 = alloca i1, i1 0
+  %nop10008 = alloca i1, i1 0
+  %nop10009 = alloca i1, i1 0
+  %nop10010 = alloca i1, i1 0
+  %nop10011 = alloca i1, i1 0
+  %nop10012 = alloca i1, i1 0
+  %nop10013 = alloca i1, i1 0
+  %nop10014 = alloca i1, i1 0
+  %nop10015 = alloca i1, i1 0
+  %nop10016 = alloca i1, i1 0
+  %nop10017 = alloca i1, i1 0
+  %nop10018 = alloca i1, i1 0
+  %nop10019 = alloca i1, i1 0
+  %nop10020 = alloca i1, i1 0
+  %nop10021 = alloca i1, i1 0
+  %nop10022 = alloca i1, i1 0
+  %nop10023 = alloca i1, i1 0
+  %nop10024 = alloca i1, i1 0
+  %nop10025 = alloca i1, i1 0
+  %nop10026 = alloca i1, i1 0
+  %nop10027 = alloca i1, i1 0
+  %nop10028 = alloca i1, i1 0
+  %nop10029 = alloca i1, i1 0
+  %nop10030 = alloca i1, i1 0
+  %nop10031 = alloca i1, i1 0
+  %nop10032 = alloca i1, i1 0
+  %nop10033 = alloca i1, i1 0
+  %nop10034 = alloca i1, i1 0
+  %nop10035 = alloca i1, i1 0
+  %nop10036 = alloca i1, i1 0
+  %nop10037 = alloca i1, i1 0
+  %nop10038 = alloca i1, i1 0
+  %nop10039 = alloca i1, i1 0
+  %nop10040 = alloca i1, i1 0
+  %nop10041 = alloca i1, i1 0
+  %nop10042 = alloca i1, i1 0
+  %nop10043 = alloca i1, i1 0
+  %nop10044 = alloca i1, i1 0
+  %nop10045 = alloca i1, i1 0
+  %nop10046 = alloca i1, i1 0
+  %nop10047 = alloca i1, i1 0
+  %nop10048 = alloca i1, i1 0
+  %nop10049 = alloca i1, i1 0
+  %nop10050 = alloca i1, i1 0
+  %nop10051 = alloca i1, i1 0
+  %nop10052 = alloca i1, i1 0
+  %nop10053 = alloca i1, i1 0
+  %nop10054 = alloca i1, i1 0
+  %nop10055 = alloca i1, i1 0
+  %nop10056 = alloca i1, i1 0
+  %nop10057 = alloca i1, i1 0
+  %nop10058 = alloca i1, i1 0
+  %nop10059 = alloca i1, i1 0
+  %nop10060 = alloca i1, i1 0
+  %nop10061 = alloca i1, i1 0
+  %nop10062 = alloca i1, i1 0
+  %nop10063 = alloca i1, i1 0
+  %nop10064 = alloca i1, i1 0
+  %nop10065 = alloca i1, i1 0
+  %nop10066 = alloca i1, i1 0
+  %nop10067 = alloca i1, i1 0
+  %nop10068 = alloca i1, i1 0
+  %nop10069 = alloca i1, i1 0
+  %nop10070 = alloca i1, i1 0
+  %nop10071 = alloca i1, i1 0
+  %nop10072 = alloca i1, i1 0
+  %nop10073 = alloca i1, i1 0
+  %nop10074 = alloca i1, i1 0
+  %nop10075 = alloca i1, i1 0
+  %nop10076 = alloca i1, i1 0
+  %nop10077 = alloca i1, i1 0
+  %nop10078 = alloca i1, i1 0
+  %nop10079 = alloca i1, i1 0
+  %nop10080 = alloca i1, i1 0
+  %nop10081 = alloca i1, i1 0
+  %nop10082 = alloca i1, i1 0
+  %nop10083 = alloca i1, i1 0
+  %nop10084 = alloca i1, i1 0
+  %nop10085 = alloca i1, i1 0
+  %nop10086 = alloca i1, i1 0
+  %nop10087 = alloca i1, i1 0
+  %nop10088 = alloca i1, i1 0
+  %nop10089 = alloca i1, i1 0
+  %nop10090 = alloca i1, i1 0
+  %nop10091 = alloca i1, i1 0
+  %nop10092 = alloca i1, i1 0
+  %nop10093 = alloca i1, i1 0
+  %nop10094 = alloca i1, i1 0
+  %nop10095 = alloca i1, i1 0
+  %nop10096 = alloca i1, i1 0
+  %nop10097 = alloca i1, i1 0
+  %nop10098 = alloca i1, i1 0
+  %nop10099 = alloca i1, i1 0
+  %nop10100 = alloca i1, i1 0
+  %nop10101 = alloca i1, i1 0
+  %nop10102 = alloca i1, i1 0
+  %nop10103 = alloca i1, i1 0
+  %nop10104 = alloca i1, i1 0
+  %nop10105 = alloca i1, i1 0
+  %nop10106 = alloca i1, i1 0
+  %nop10107 = alloca i1, i1 0
+  %nop10108 = alloca i1, i1 0
+  %nop10109 = alloca i1, i1 0
+  %nop10110 = alloca i1, i1 0
+  %nop10111 = alloca i1, i1 0
+  %nop10112 = alloca i1, i1 0
+  %nop10113 = alloca i1, i1 0
+  %nop10114 = alloca i1, i1 0
+  %nop10115 = alloca i1, i1 0
+  %nop10116 = alloca i1, i1 0
+  %nop10117 = alloca i1, i1 0
+  %nop10118 = alloca i1, i1 0
+  %nop10119 = alloca i1, i1 0
+  %nop10120 = alloca i1, i1 0
+  %nop10121 = alloca i1, i1 0
+  %nop10122 = alloca i1, i1 0
+  %nop10123 = alloca i1, i1 0
+  %nop10124 = alloca i1, i1 0
+  %nop10125 = alloca i1, i1 0
+  %nop10126 = alloca i1, i1 0
+  %nop10127 = alloca i1, i1 0
+  %nop10128 = alloca i1, i1 0
+  %nop10129 = alloca i1, i1 0
+  %nop10130 = alloca i1, i1 0
+  %nop10131 = alloca i1, i1 0
+  %nop10132 = alloca i1, i1 0
+  %nop10133 = alloca i1, i1 0
+  %nop10134 = alloca i1, i1 0
+  %nop10135 = alloca i1, i1 0
+  %nop10136 = alloca i1, i1 0
+  %nop10137 = alloca i1, i1 0
+  %nop10138 = alloca i1, i1 0
+  %nop10139 = alloca i1, i1 0
+  %nop10140 = alloca i1, i1 0
+  %nop10141 = alloca i1, i1 0
+  %nop10142 = alloca i1, i1 0
+  %nop10143 = alloca i1, i1 0
+  %nop10144 = alloca i1, i1 0
+  %nop10145 = alloca i1, i1 0
+  %nop10146 = alloca i1, i1 0
+  %nop10147 = alloca i1, i1 0
+  %nop10148 = alloca i1, i1 0
+  %nop10149 = alloca i1, i1 0
+  %nop10150 = alloca i1, i1 0
+  %nop10151 = alloca i1, i1 0
+  %nop10152 = alloca i1, i1 0
+  %nop10153 = alloca i1, i1 0
+  %nop10154 = alloca i1, i1 0
+  %nop10155 = alloca i1, i1 0
+  %nop10156 = alloca i1, i1 0
+  %nop10157 = alloca i1, i1 0
+  %nop10158 = alloca i1, i1 0
+  %nop10159 = alloca i1, i1 0
+  %nop10160 = alloca i1, i1 0
+  %nop10161 = alloca i1, i1 0
+  %nop10162 = alloca i1, i1 0
+  %nop10163 = alloca i1, i1 0
+  %nop10164 = alloca i1, i1 0
+  %nop10165 = alloca i1, i1 0
+  %nop10166 = alloca i1, i1 0
+  %nop10167 = alloca i1, i1 0
+  %nop10168 = alloca i1, i1 0
+  %nop10169 = alloca i1, i1 0
+  %nop10170 = alloca i1, i1 0
+  %nop10171 = alloca i1, i1 0
+  %nop10172 = alloca i1, i1 0
+  %nop10173 = alloca i1, i1 0
+  %nop10174 = alloca i1, i1 0
+  %nop10175 = alloca i1, i1 0
+  %nop10176 = alloca i1, i1 0
+  %nop10177 = alloca i1, i1 0
+  %nop10178 = alloca i1, i1 0
+  %nop10179 = alloca i1, i1 0
+  %nop10180 = alloca i1, i1 0
+  %nop10181 = alloca i1, i1 0
+  %nop10182 = alloca i1, i1 0
+  %nop10183 = alloca i1, i1 0
+  %nop10184 = alloca i1, i1 0
+  %nop10185 = alloca i1, i1 0
+  %nop10186 = alloca i1, i1 0
+  %nop10187 = alloca i1, i1 0
+  %nop10188 = alloca i1, i1 0
+  %nop10189 = alloca i1, i1 0
+  %nop10190 = alloca i1, i1 0
+  %nop10191 = alloca i1, i1 0
+  %nop10192 = alloca i1, i1 0
+  %nop10193 = alloca i1, i1 0
+  %nop10194 = alloca i1, i1 0
+  %nop10195 = alloca i1, i1 0
+  %nop10196 = alloca i1, i1 0
+  %nop10197 = alloca i1, i1 0
+  %nop10198 = alloca i1, i1 0
+  %nop10199 = alloca i1, i1 0
+  %nop10200 = alloca i1, i1 0
+  %nop10201 = alloca i1, i1 0
+  %nop10202 = alloca i1, i1 0
+  %nop10203 = alloca i1, i1 0
+  %nop10204 = alloca i1, i1 0
+  %nop10205 = alloca i1, i1 0
+  %nop10206 = alloca i1, i1 0
+  %nop10207 = alloca i1, i1 0
+  %nop10208 = alloca i1, i1 0
+  %nop10209 = alloca i1, i1 0
+  %nop10210 = alloca i1, i1 0
+  %nop10211 = alloca i1, i1 0
+  %nop10212 = alloca i1, i1 0
+  %nop10213 = alloca i1, i1 0
+  %nop10214 = alloca i1, i1 0
+  %nop10215 = alloca i1, i1 0
+  %nop10216 = alloca i1, i1 0
+  %nop10217 = alloca i1, i1 0
+  %nop10218 = alloca i1, i1 0
+  %nop10219 = alloca i1, i1 0
+  %nop10220 = alloca i1, i1 0
+  %nop10221 = alloca i1, i1 0
+  %nop10222 = alloca i1, i1 0
+  %nop10223 = alloca i1, i1 0
+  %nop10224 = alloca i1, i1 0
+  %nop10225 = alloca i1, i1 0
+  %nop10226 = alloca i1, i1 0
+  %nop10227 = alloca i1, i1 0
+  %nop10228 = alloca i1, i1 0
+  %nop10229 = alloca i1, i1 0
+  %nop10230 = alloca i1, i1 0
+  %nop10231 = alloca i1, i1 0
+  %nop10232 = alloca i1, i1 0
+  %nop10233 = alloca i1, i1 0
+  %nop10234 = alloca i1, i1 0
+  %nop10235 = alloca i1, i1 0
+  %nop10236 = alloca i1, i1 0
+  %nop10237 = alloca i1, i1 0
+  %nop10238 = alloca i1, i1 0
+  %nop10239 = alloca i1, i1 0
+  %nop10240 = alloca i1, i1 0
+  %nop10241 = alloca i1, i1 0
+  %nop10242 = alloca i1, i1 0
+  %nop10243 = alloca i1, i1 0
+  %nop10244 = alloca i1, i1 0
+  %nop10245 = alloca i1, i1 0
+  %nop10246 = alloca i1, i1 0
+  %nop10247 = alloca i1, i1 0
+  %nop10248 = alloca i1, i1 0
+  %nop10249 = alloca i1, i1 0
+  %nop10250 = alloca i1, i1 0
+  %nop10251 = alloca i1, i1 0
+  %nop10252 = alloca i1, i1 0
+  %nop10253 = alloca i1, i1 0
+  %nop10254 = alloca i1, i1 0
+  %nop10255 = alloca i1, i1 0
+  %nop10256 = alloca i1, i1 0
+  %nop10257 = alloca i1, i1 0
+  %nop10258 = alloca i1, i1 0
+  %nop10259 = alloca i1, i1 0
+  %nop10260 = alloca i1, i1 0
+  %nop10261 = alloca i1, i1 0
+  %nop10262 = alloca i1, i1 0
+  %nop10263 = alloca i1, i1 0
+  %nop10264 = alloca i1, i1 0
+  %nop10265 = alloca i1, i1 0
+  %nop10266 = alloca i1, i1 0
+  %nop10267 = alloca i1, i1 0
+  %nop10268 = alloca i1, i1 0
+  %nop10269 = alloca i1, i1 0
+  %nop10270 = alloca i1, i1 0
+  %nop10271 = alloca i1, i1 0
+  %nop10272 = alloca i1, i1 0
+  %nop10273 = alloca i1, i1 0
+  %nop10274 = alloca i1, i1 0
+  %nop10275 = alloca i1, i1 0
+  %nop10276 = alloca i1, i1 0
+  %nop10277 = alloca i1, i1 0
+  %nop10278 = alloca i1, i1 0
+  %nop10279 = alloca i1, i1 0
+  %nop10280 = alloca i1, i1 0
+  %nop10281 = alloca i1, i1 0
+  %nop10282 = alloca i1, i1 0
+  %nop10283 = alloca i1, i1 0
+  %nop10284 = alloca i1, i1 0
+  %nop10285 = alloca i1, i1 0
+  %nop10286 = alloca i1, i1 0
+  %nop10287 = alloca i1, i1 0
+  %nop10288 = alloca i1, i1 0
+  %nop10289 = alloca i1, i1 0
+  %nop10290 = alloca i1, i1 0
+  %nop10291 = alloca i1, i1 0
+  %nop10292 = alloca i1, i1 0
+  %nop10293 = alloca i1, i1 0
+  %nop10294 = alloca i1, i1 0
+  %nop10295 = alloca i1, i1 0
+  %nop10296 = alloca i1, i1 0
+  %nop10297 = alloca i1, i1 0
+  %nop10298 = alloca i1, i1 0
+  %nop10299 = alloca i1, i1 0
+  %nop10300 = alloca i1, i1 0
+  %nop10301 = alloca i1, i1 0
+  %nop10302 = alloca i1, i1 0
+  %nop10303 = alloca i1, i1 0
+  %nop10304 = alloca i1, i1 0
+  %nop10305 = alloca i1, i1 0
+  %nop10306 = alloca i1, i1 0
+  %nop10307 = alloca i1, i1 0
+  %nop10308 = alloca i1, i1 0
+  %nop10309 = alloca i1, i1 0
+  %nop10310 = alloca i1, i1 0
+  %nop10311 = alloca i1, i1 0
+  %nop10312 = alloca i1, i1 0
+  %nop10313 = alloca i1, i1 0
+  %nop10314 = alloca i1, i1 0
+  %nop10315 = alloca i1, i1 0
+  %nop10316 = alloca i1, i1 0
+  %nop10317 = alloca i1, i1 0
+  %nop10318 = alloca i1, i1 0
+  %nop10319 = alloca i1, i1 0
+  %nop10320 = alloca i1, i1 0
+  %nop10321 = alloca i1, i1 0
+  %nop10322 = alloca i1, i1 0
+  %nop10323 = alloca i1, i1 0
+  %nop10324 = alloca i1, i1 0
+  %nop10325 = alloca i1, i1 0
+  %nop10326 = alloca i1, i1 0
+  %nop10327 = alloca i1, i1 0
+  %nop10328 = alloca i1, i1 0
+  %nop10329 = alloca i1, i1 0
+  %nop10330 = alloca i1, i1 0
+  %nop10331 = alloca i1, i1 0
+  %nop10332 = alloca i1, i1 0
+  %nop10333 = alloca i1, i1 0
+  %nop10334 = alloca i1, i1 0
+  %nop10335 = alloca i1, i1 0
+  %nop10336 = alloca i1, i1 0
+  %nop10337 = alloca i1, i1 0
+  %nop10338 = alloca i1, i1 0
+  %nop10339 = alloca i1, i1 0
+  %nop10340 = alloca i1, i1 0
+  %nop10341 = alloca i1, i1 0
+  %nop10342 = alloca i1, i1 0
+  %nop10343 = alloca i1, i1 0
+  %nop10344 = alloca i1, i1 0
+  %nop10345 = alloca i1, i1 0
+  %nop10346 = alloca i1, i1 0
+  %nop10347 = alloca i1, i1 0
+  %nop10348 = alloca i1, i1 0
+  %nop10349 = alloca i1, i1 0
+  %nop10350 = alloca i1, i1 0
+  %nop10351 = alloca i1, i1 0
+  %nop10352 = alloca i1, i1 0
+  %nop10353 = alloca i1, i1 0
+  %nop10354 = alloca i1, i1 0
+  %nop10355 = alloca i1, i1 0
+  %nop10356 = alloca i1, i1 0
+  %nop10357 = alloca i1, i1 0
+  %nop10358 = alloca i1, i1 0
+  %nop10359 = alloca i1, i1 0
+  %nop10360 = alloca i1, i1 0
+  %nop10361 = alloca i1, i1 0
+  %nop10362 = alloca i1, i1 0
+  %nop10363 = alloca i1, i1 0
+  %nop10364 = alloca i1, i1 0
+  %nop10365 = alloca i1, i1 0
+  %nop10366 = alloca i1, i1 0
+  %nop10367 = alloca i1, i1 0
+  %nop10368 = alloca i1, i1 0
+  %nop10369 = alloca i1, i1 0
+  %nop10370 = alloca i1, i1 0
+  %nop10371 = alloca i1, i1 0
+  %nop10372 = alloca i1, i1 0
+  %nop10373 = alloca i1, i1 0
+  %nop10374 = alloca i1, i1 0
+  %nop10375 = alloca i1, i1 0
+  %nop10376 = alloca i1, i1 0
+  %nop10377 = alloca i1, i1 0
+  %nop10378 = alloca i1, i1 0
+  %nop10379 = alloca i1, i1 0
+  %nop10380 = alloca i1, i1 0
+  %nop10381 = alloca i1, i1 0
+  %nop10382 = alloca i1, i1 0
+  %nop10383 = alloca i1, i1 0
+  %nop10384 = alloca i1, i1 0
+  %nop10385 = alloca i1, i1 0
+  %nop10386 = alloca i1, i1 0
+  %nop10387 = alloca i1, i1 0
+  %nop10388 = alloca i1, i1 0
+  %nop10389 = alloca i1, i1 0
+  %nop10390 = alloca i1, i1 0
+  %nop10391 = alloca i1, i1 0
+  %nop10392 = alloca i1, i1 0
+  %nop10393 = alloca i1, i1 0
+  %nop10394 = alloca i1, i1 0
+  %nop10395 = alloca i1, i1 0
+  %nop10396 = alloca i1, i1 0
+  %nop10397 = alloca i1, i1 0
+  %nop10398 = alloca i1, i1 0
+  %nop10399 = alloca i1, i1 0
+  %nop10400 = alloca i1, i1 0
+  %nop10401 = alloca i1, i1 0
+  %nop10402 = alloca i1, i1 0
+  %nop10403 = alloca i1, i1 0
+  %nop10404 = alloca i1, i1 0
+  %nop10405 = alloca i1, i1 0
+  %nop10406 = alloca i1, i1 0
+  %nop10407 = alloca i1, i1 0
+  %nop10408 = alloca i1, i1 0
+  %nop10409 = alloca i1, i1 0
+  %nop10410 = alloca i1, i1 0
+  %nop10411 = alloca i1, i1 0
+  %nop10412 = alloca i1, i1 0
+  %nop10413 = alloca i1, i1 0
+  %nop10414 = alloca i1, i1 0
+  %nop10415 = alloca i1, i1 0
+  %nop10416 = alloca i1, i1 0
+  %nop10417 = alloca i1, i1 0
+  %nop10418 = alloca i1, i1 0
+  %nop10419 = alloca i1, i1 0
+  %nop10420 = alloca i1, i1 0
+  %nop10421 = alloca i1, i1 0
+  %nop10422 = alloca i1, i1 0
+  %nop10423 = alloca i1, i1 0
+  %nop10424 = alloca i1, i1 0
+  %nop10425 = alloca i1, i1 0
+  %nop10426 = alloca i1, i1 0
+  %nop10427 = alloca i1, i1 0
+  %nop10428 = alloca i1, i1 0
+  %nop10429 = alloca i1, i1 0
+  %nop10430 = alloca i1, i1 0
+  %nop10431 = alloca i1, i1 0
+  %nop10432 = alloca i1, i1 0
+  %nop10433 = alloca i1, i1 0
+  %nop10434 = alloca i1, i1 0
+  %nop10435 = alloca i1, i1 0
+  %nop10436 = alloca i1, i1 0
+  %nop10437 = alloca i1, i1 0
+  %nop10438 = alloca i1, i1 0
+  %nop10439 = alloca i1, i1 0
+  %nop10440 = alloca i1, i1 0
+  %nop10441 = alloca i1, i1 0
+  %nop10442 = alloca i1, i1 0
+  %nop10443 = alloca i1, i1 0
+  %nop10444 = alloca i1, i1 0
+  %nop10445 = alloca i1, i1 0
+  %nop10446 = alloca i1, i1 0
+  %nop10447 = alloca i1, i1 0
+  %nop10448 = alloca i1, i1 0
+  %nop10449 = alloca i1, i1 0
+  %nop10450 = alloca i1, i1 0
+  %nop10451 = alloca i1, i1 0
+  %nop10452 = alloca i1, i1 0
+  %nop10453 = alloca i1, i1 0
+  %nop10454 = alloca i1, i1 0
+  %nop10455 = alloca i1, i1 0
+  %nop10456 = alloca i1, i1 0
+  %nop10457 = alloca i1, i1 0
+  %nop10458 = alloca i1, i1 0
+  %nop10459 = alloca i1, i1 0
+  %nop10460 = alloca i1, i1 0
+  %nop10461 = alloca i1, i1 0
+  %nop10462 = alloca i1, i1 0
+  %nop10463 = alloca i1, i1 0
+  %nop10464 = alloca i1, i1 0
+  %nop10465 = alloca i1, i1 0
+  %nop10466 = alloca i1, i1 0
+  %nop10467 = alloca i1, i1 0
+  %nop10468 = alloca i1, i1 0
+  %nop10469 = alloca i1, i1 0
+  %nop10470 = alloca i1, i1 0
+  %nop10471 = alloca i1, i1 0
+  %nop10472 = alloca i1, i1 0
+  %nop10473 = alloca i1, i1 0
+  %nop10474 = alloca i1, i1 0
+  %nop10475 = alloca i1, i1 0
+  %nop10476 = alloca i1, i1 0
+  %nop10477 = alloca i1, i1 0
+  %nop10478 = alloca i1, i1 0
+  %nop10479 = alloca i1, i1 0
+  %nop10480 = alloca i1, i1 0
+  %nop10481 = alloca i1, i1 0
+  %nop10482 = alloca i1, i1 0
+  %nop10483 = alloca i1, i1 0
+  %nop10484 = alloca i1, i1 0
+  %nop10485 = alloca i1, i1 0
+  %nop10486 = alloca i1, i1 0
+  %nop10487 = alloca i1, i1 0
+  %nop10488 = alloca i1, i1 0
+  %nop10489 = alloca i1, i1 0
+  %nop10490 = alloca i1, i1 0
+  %nop10491 = alloca i1, i1 0
+  %nop10492 = alloca i1, i1 0
+  %nop10493 = alloca i1, i1 0
+  %nop10494 = alloca i1, i1 0
+  %nop10495 = alloca i1, i1 0
+  %nop10496 = alloca i1, i1 0
+  %nop10497 = alloca i1, i1 0
+  %nop10498 = alloca i1, i1 0
+  %nop10499 = alloca i1, i1 0
+  %nop10500 = alloca i1, i1 0
+  %nop10501 = alloca i1, i1 0
+  %nop10502 = alloca i1, i1 0
+  %nop10503 = alloca i1, i1 0
+  %nop10504 = alloca i1, i1 0
+  %nop10505 = alloca i1, i1 0
+  %nop10506 = alloca i1, i1 0
+  %nop10507 = alloca i1, i1 0
+  %nop10508 = alloca i1, i1 0
+  %nop10509 = alloca i1, i1 0
+  %nop10510 = alloca i1, i1 0
+  %nop10511 = alloca i1, i1 0
+  %nop10512 = alloca i1, i1 0
+  %nop10513 = alloca i1, i1 0
+  %nop10514 = alloca i1, i1 0
+  %nop10515 = alloca i1, i1 0
+  %nop10516 = alloca i1, i1 0
+  %nop10517 = alloca i1, i1 0
+  %nop10518 = alloca i1, i1 0
+  %nop10519 = alloca i1, i1 0
+  %nop10520 = alloca i1, i1 0
+  %nop10521 = alloca i1, i1 0
+  %nop10522 = alloca i1, i1 0
+  %nop10523 = alloca i1, i1 0
+  %nop10524 = alloca i1, i1 0
+  %nop10525 = alloca i1, i1 0
+  %nop10526 = alloca i1, i1 0
+  %nop10527 = alloca i1, i1 0
+  %nop10528 = alloca i1, i1 0
+  %nop10529 = alloca i1, i1 0
+  %nop10530 = alloca i1, i1 0
+  %nop10531 = alloca i1, i1 0
+  %nop10532 = alloca i1, i1 0
+  %nop10533 = alloca i1, i1 0
+  %nop10534 = alloca i1, i1 0
+  %nop10535 = alloca i1, i1 0
+  %nop10536 = alloca i1, i1 0
+  %nop10537 = alloca i1, i1 0
+  %nop10538 = alloca i1, i1 0
+  %nop10539 = alloca i1, i1 0
+  %nop10540 = alloca i1, i1 0
+  %nop10541 = alloca i1, i1 0
+  %nop10542 = alloca i1, i1 0
+  %nop10543 = alloca i1, i1 0
+  %nop10544 = alloca i1, i1 0
+  %nop10545 = alloca i1, i1 0
+  %nop10546 = alloca i1, i1 0
+  %nop10547 = alloca i1, i1 0
+  %nop10548 = alloca i1, i1 0
+  %nop10549 = alloca i1, i1 0
+  %nop10550 = alloca i1, i1 0
+  %nop10551 = alloca i1, i1 0
+  %nop10552 = alloca i1, i1 0
+  %nop10553 = alloca i1, i1 0
+  %nop10554 = alloca i1, i1 0
+  %nop10555 = alloca i1, i1 0
+  %nop10556 = alloca i1, i1 0
+  %nop10557 = alloca i1, i1 0
+  %nop10558 = alloca i1, i1 0
+  %nop10559 = alloca i1, i1 0
+  %nop10560 = alloca i1, i1 0
+  %nop10561 = alloca i1, i1 0
+  %nop10562 = alloca i1, i1 0
+  %nop10563 = alloca i1, i1 0
+  %nop10564 = alloca i1, i1 0
+  %nop10565 = alloca i1, i1 0
+  %nop10566 = alloca i1, i1 0
+  %nop10567 = alloca i1, i1 0
+  %nop10568 = alloca i1, i1 0
+  %nop10569 = alloca i1, i1 0
+  %nop10570 = alloca i1, i1 0
+  %nop10571 = alloca i1, i1 0
+  %nop10572 = alloca i1, i1 0
+  %nop10573 = alloca i1, i1 0
+  %nop10574 = alloca i1, i1 0
+  %nop10575 = alloca i1, i1 0
+  %nop10576 = alloca i1, i1 0
+  %nop10577 = alloca i1, i1 0
+  %nop10578 = alloca i1, i1 0
+  %nop10579 = alloca i1, i1 0
+  %nop10580 = alloca i1, i1 0
+  %nop10581 = alloca i1, i1 0
+  %nop10582 = alloca i1, i1 0
+  %nop10583 = alloca i1, i1 0
+  %nop10584 = alloca i1, i1 0
+  %nop10585 = alloca i1, i1 0
+  %nop10586 = alloca i1, i1 0
+  %nop10587 = alloca i1, i1 0
+  %nop10588 = alloca i1, i1 0
+  %nop10589 = alloca i1, i1 0
+  %nop10590 = alloca i1, i1 0
+  %nop10591 = alloca i1, i1 0
+  %nop10592 = alloca i1, i1 0
+  %nop10593 = alloca i1, i1 0
+  %nop10594 = alloca i1, i1 0
+  %nop10595 = alloca i1, i1 0
+  %nop10596 = alloca i1, i1 0
+  %nop10597 = alloca i1, i1 0
+  %nop10598 = alloca i1, i1 0
+  %nop10599 = alloca i1, i1 0
+  %nop10600 = alloca i1, i1 0
+  %nop10601 = alloca i1, i1 0
+  %nop10602 = alloca i1, i1 0
+  %nop10603 = alloca i1, i1 0
+  %nop10604 = alloca i1, i1 0
+  %nop10605 = alloca i1, i1 0
+  %nop10606 = alloca i1, i1 0
+  %nop10607 = alloca i1, i1 0
+  %nop10608 = alloca i1, i1 0
+  %nop10609 = alloca i1, i1 0
+  %nop10610 = alloca i1, i1 0
+  %nop10611 = alloca i1, i1 0
+  %nop10612 = alloca i1, i1 0
+  %nop10613 = alloca i1, i1 0
+  %nop10614 = alloca i1, i1 0
+  %nop10615 = alloca i1, i1 0
+  %nop10616 = alloca i1, i1 0
+  %nop10617 = alloca i1, i1 0
+  %nop10618 = alloca i1, i1 0
+  %nop10619 = alloca i1, i1 0
+  %nop10620 = alloca i1, i1 0
+  %nop10621 = alloca i1, i1 0
+  %nop10622 = alloca i1, i1 0
+  %nop10623 = alloca i1, i1 0
+  %nop10624 = alloca i1, i1 0
+  %nop10625 = alloca i1, i1 0
+  %nop10626 = alloca i1, i1 0
+  %nop10627 = alloca i1, i1 0
+  %nop10628 = alloca i1, i1 0
+  %nop10629 = alloca i1, i1 0
+  %nop10630 = alloca i1, i1 0
+  %nop10631 = alloca i1, i1 0
+  %nop10632 = alloca i1, i1 0
+  %nop10633 = alloca i1, i1 0
+  %nop10634 = alloca i1, i1 0
+  %nop10635 = alloca i1, i1 0
+  %nop10636 = alloca i1, i1 0
+  %nop10637 = alloca i1, i1 0
+  %nop10638 = alloca i1, i1 0
+  %nop10639 = alloca i1, i1 0
+  %nop10640 = alloca i1, i1 0
+  %nop10641 = alloca i1, i1 0
+  %nop10642 = alloca i1, i1 0
+  %nop10643 = alloca i1, i1 0
+  %nop10644 = alloca i1, i1 0
+  %nop10645 = alloca i1, i1 0
+  %nop10646 = alloca i1, i1 0
+  %nop10647 = alloca i1, i1 0
+  %nop10648 = alloca i1, i1 0
+  %nop10649 = alloca i1, i1 0
+  %nop10650 = alloca i1, i1 0
+  %nop10651 = alloca i1, i1 0
+  %nop10652 = alloca i1, i1 0
+  %nop10653 = alloca i1, i1 0
+  %nop10654 = alloca i1, i1 0
+  %nop10655 = alloca i1, i1 0
+  %nop10656 = alloca i1, i1 0
+  %nop10657 = alloca i1, i1 0
+  %nop10658 = alloca i1, i1 0
+  %nop10659 = alloca i1, i1 0
+  %nop10660 = alloca i1, i1 0
+  %nop10661 = alloca i1, i1 0
+  %nop10662 = alloca i1, i1 0
+  %nop10663 = alloca i1, i1 0
+  %nop10664 = alloca i1, i1 0
+  %nop10665 = alloca i1, i1 0
+  %nop10666 = alloca i1, i1 0
+  %nop10667 = alloca i1, i1 0
+  %nop10668 = alloca i1, i1 0
+  %nop10669 = alloca i1, i1 0
+  %nop10670 = alloca i1, i1 0
+  %nop10671 = alloca i1, i1 0
+  %nop10672 = alloca i1, i1 0
+  %nop10673 = alloca i1, i1 0
+  %nop10674 = alloca i1, i1 0
+  %nop10675 = alloca i1, i1 0
+  %nop10676 = alloca i1, i1 0
+  %nop10677 = alloca i1, i1 0
+  %nop10678 = alloca i1, i1 0
+  %nop10679 = alloca i1, i1 0
+  %nop10680 = alloca i1, i1 0
+  %nop10681 = alloca i1, i1 0
+  %nop10682 = alloca i1, i1 0
+  %nop10683 = alloca i1, i1 0
+  %nop10684 = alloca i1, i1 0
+  %nop10685 = alloca i1, i1 0
+  %nop10686 = alloca i1, i1 0
+  %nop10687 = alloca i1, i1 0
+  %nop10688 = alloca i1, i1 0
+  %nop10689 = alloca i1, i1 0
+  %nop10690 = alloca i1, i1 0
+  %nop10691 = alloca i1, i1 0
+  %nop10692 = alloca i1, i1 0
+  %nop10693 = alloca i1, i1 0
+  %nop10694 = alloca i1, i1 0
+  %nop10695 = alloca i1, i1 0
+  %nop10696 = alloca i1, i1 0
+  %nop10697 = alloca i1, i1 0
+  %nop10698 = alloca i1, i1 0
+  %nop10699 = alloca i1, i1 0
+  %nop10700 = alloca i1, i1 0
+  %nop10701 = alloca i1, i1 0
+  %nop10702 = alloca i1, i1 0
+  %nop10703 = alloca i1, i1 0
+  %nop10704 = alloca i1, i1 0
+  %nop10705 = alloca i1, i1 0
+  %nop10706 = alloca i1, i1 0
+  %nop10707 = alloca i1, i1 0
+  %nop10708 = alloca i1, i1 0
+  %nop10709 = alloca i1, i1 0
+  %nop10710 = alloca i1, i1 0
+  %nop10711 = alloca i1, i1 0
+  %nop10712 = alloca i1, i1 0
+  %nop10713 = alloca i1, i1 0
+  %nop10714 = alloca i1, i1 0
+  %nop10715 = alloca i1, i1 0
+  %nop10716 = alloca i1, i1 0
+  %nop10717 = alloca i1, i1 0
+  %nop10718 = alloca i1, i1 0
+  %nop10719 = alloca i1, i1 0
+  %nop10720 = alloca i1, i1 0
+  %nop10721 = alloca i1, i1 0
+  %nop10722 = alloca i1, i1 0
+  %nop10723 = alloca i1, i1 0
+  %nop10724 = alloca i1, i1 0
+  %nop10725 = alloca i1, i1 0
+  %nop10726 = alloca i1, i1 0
+  %nop10727 = alloca i1, i1 0
+  %nop10728 = alloca i1, i1 0
+  %nop10729 = alloca i1, i1 0
+  %nop10730 = alloca i1, i1 0
+  %nop10731 = alloca i1, i1 0
+  %nop10732 = alloca i1, i1 0
+  %nop10733 = alloca i1, i1 0
+  %nop10734 = alloca i1, i1 0
+  %nop10735 = alloca i1, i1 0
+  %nop10736 = alloca i1, i1 0
+  %nop10737 = alloca i1, i1 0
+  %nop10738 = alloca i1, i1 0
+  %nop10739 = alloca i1, i1 0
+  %nop10740 = alloca i1, i1 0
+  %nop10741 = alloca i1, i1 0
+  %nop10742 = alloca i1, i1 0
+  %nop10743 = alloca i1, i1 0
+  %nop10744 = alloca i1, i1 0
+  %nop10745 = alloca i1, i1 0
+  %nop10746 = alloca i1, i1 0
+  %nop10747 = alloca i1, i1 0
+  %nop10748 = alloca i1, i1 0
+  %nop10749 = alloca i1, i1 0
+  %nop10750 = alloca i1, i1 0
+  %nop10751 = alloca i1, i1 0
+  %nop10752 = alloca i1, i1 0
+  %nop10753 = alloca i1, i1 0
+  %nop10754 = alloca i1, i1 0
+  %nop10755 = alloca i1, i1 0
+  %nop10756 = alloca i1, i1 0
+  %nop10757 = alloca i1, i1 0
+  %nop10758 = alloca i1, i1 0
+  %nop10759 = alloca i1, i1 0
+  %nop10760 = alloca i1, i1 0
+  %nop10761 = alloca i1, i1 0
+  %nop10762 = alloca i1, i1 0
+  %nop10763 = alloca i1, i1 0
+  %nop10764 = alloca i1, i1 0
+  %nop10765 = alloca i1, i1 0
+  %nop10766 = alloca i1, i1 0
+  %nop10767 = alloca i1, i1 0
+  %nop10768 = alloca i1, i1 0
+  %nop10769 = alloca i1, i1 0
+  %nop10770 = alloca i1, i1 0
+  %nop10771 = alloca i1, i1 0
+  %nop10772 = alloca i1, i1 0
+  %nop10773 = alloca i1, i1 0
+  %nop10774 = alloca i1, i1 0
+  %nop10775 = alloca i1, i1 0
+  %nop10776 = alloca i1, i1 0
+  %nop10777 = alloca i1, i1 0
+  %nop10778 = alloca i1, i1 0
+  %nop10779 = alloca i1, i1 0
+  %nop10780 = alloca i1, i1 0
+  %nop10781 = alloca i1, i1 0
+  %nop10782 = alloca i1, i1 0
+  %nop10783 = alloca i1, i1 0
+  %nop10784 = alloca i1, i1 0
+  %nop10785 = alloca i1, i1 0
+  %nop10786 = alloca i1, i1 0
+  %nop10787 = alloca i1, i1 0
+  %nop10788 = alloca i1, i1 0
+  %nop10789 = alloca i1, i1 0
+  %nop10790 = alloca i1, i1 0
+  %nop10791 = alloca i1, i1 0
+  %nop10792 = alloca i1, i1 0
+  %nop10793 = alloca i1, i1 0
+  %nop10794 = alloca i1, i1 0
+  %nop10795 = alloca i1, i1 0
+  %nop10796 = alloca i1, i1 0
+  %nop10797 = alloca i1, i1 0
+  %nop10798 = alloca i1, i1 0
+  %nop10799 = alloca i1, i1 0
+  %nop10800 = alloca i1, i1 0
+  %nop10801 = alloca i1, i1 0
+  %nop10802 = alloca i1, i1 0
+  %nop10803 = alloca i1, i1 0
+  %nop10804 = alloca i1, i1 0
+  %nop10805 = alloca i1, i1 0
+  %nop10806 = alloca i1, i1 0
+  %nop10807 = alloca i1, i1 0
+  %nop10808 = alloca i1, i1 0
+  %nop10809 = alloca i1, i1 0
+  %nop10810 = alloca i1, i1 0
+  %nop10811 = alloca i1, i1 0
+  %nop10812 = alloca i1, i1 0
+  %nop10813 = alloca i1, i1 0
+  %nop10814 = alloca i1, i1 0
+  %nop10815 = alloca i1, i1 0
+  %nop10816 = alloca i1, i1 0
+  %nop10817 = alloca i1, i1 0
+  %nop10818 = alloca i1, i1 0
+  %nop10819 = alloca i1, i1 0
+  %nop10820 = alloca i1, i1 0
+  %nop10821 = alloca i1, i1 0
+  %nop10822 = alloca i1, i1 0
+  %nop10823 = alloca i1, i1 0
+  %nop10824 = alloca i1, i1 0
+  %nop10825 = alloca i1, i1 0
+  %nop10826 = alloca i1, i1 0
+  %nop10827 = alloca i1, i1 0
+  %nop10828 = alloca i1, i1 0
+  %nop10829 = alloca i1, i1 0
+  %nop10830 = alloca i1, i1 0
+  %nop10831 = alloca i1, i1 0
+  %nop10832 = alloca i1, i1 0
+  %nop10833 = alloca i1, i1 0
+  %nop10834 = alloca i1, i1 0
+  %nop10835 = alloca i1, i1 0
+  %nop10836 = alloca i1, i1 0
+  %nop10837 = alloca i1, i1 0
+  %nop10838 = alloca i1, i1 0
+  %nop10839 = alloca i1, i1 0
+  %nop10840 = alloca i1, i1 0
+  %nop10841 = alloca i1, i1 0
+  %nop10842 = alloca i1, i1 0
+  %nop10843 = alloca i1, i1 0
+  %nop10844 = alloca i1, i1 0
+  %nop10845 = alloca i1, i1 0
+  %nop10846 = alloca i1, i1 0
+  %nop10847 = alloca i1, i1 0
+  %nop10848 = alloca i1, i1 0
+  %nop10849 = alloca i1, i1 0
+  %nop10850 = alloca i1, i1 0
+  %nop10851 = alloca i1, i1 0
+  %nop10852 = alloca i1, i1 0
+  %nop10853 = alloca i1, i1 0
+  %nop10854 = alloca i1, i1 0
+  %nop10855 = alloca i1, i1 0
+  %nop10856 = alloca i1, i1 0
+  %nop10857 = alloca i1, i1 0
+  %nop10858 = alloca i1, i1 0
+  %nop10859 = alloca i1, i1 0
+  %nop10860 = alloca i1, i1 0
+  %nop10861 = alloca i1, i1 0
+  %nop10862 = alloca i1, i1 0
+  %nop10863 = alloca i1, i1 0
+  %nop10864 = alloca i1, i1 0
+  %nop10865 = alloca i1, i1 0
+  %nop10866 = alloca i1, i1 0
+  %nop10867 = alloca i1, i1 0
+  %nop10868 = alloca i1, i1 0
+  %nop10869 = alloca i1, i1 0
+  %nop10870 = alloca i1, i1 0
+  %nop10871 = alloca i1, i1 0
+  %nop10872 = alloca i1, i1 0
+  %nop10873 = alloca i1, i1 0
+  %nop10874 = alloca i1, i1 0
+  %nop10875 = alloca i1, i1 0
+  %nop10876 = alloca i1, i1 0
+  %nop10877 = alloca i1, i1 0
+  %nop10878 = alloca i1, i1 0
+  %nop10879 = alloca i1, i1 0
+  %nop10880 = alloca i1, i1 0
+  %nop10881 = alloca i1, i1 0
+  %nop10882 = alloca i1, i1 0
+  %nop10883 = alloca i1, i1 0
+  %nop10884 = alloca i1, i1 0
+  %nop10885 = alloca i1, i1 0
+  %nop10886 = alloca i1, i1 0
+  %nop10887 = alloca i1, i1 0
+  %nop10888 = alloca i1, i1 0
+  %nop10889 = alloca i1, i1 0
+  %nop10890 = alloca i1, i1 0
+  %nop10891 = alloca i1, i1 0
+  %nop10892 = alloca i1, i1 0
+  %nop10893 = alloca i1, i1 0
+  %nop10894 = alloca i1, i1 0
+  %nop10895 = alloca i1, i1 0
+  %nop10896 = alloca i1, i1 0
+  %nop10897 = alloca i1, i1 0
+  %nop10898 = alloca i1, i1 0
+  %nop10899 = alloca i1, i1 0
+  %nop10900 = alloca i1, i1 0
+  %nop10901 = alloca i1, i1 0
+  %nop10902 = alloca i1, i1 0
+  %nop10903 = alloca i1, i1 0
+  %nop10904 = alloca i1, i1 0
+  %nop10905 = alloca i1, i1 0
+  %nop10906 = alloca i1, i1 0
+  %nop10907 = alloca i1, i1 0
+  %nop10908 = alloca i1, i1 0
+  %nop10909 = alloca i1, i1 0
+  %nop10910 = alloca i1, i1 0
+  %nop10911 = alloca i1, i1 0
+  %nop10912 = alloca i1, i1 0
+  %nop10913 = alloca i1, i1 0
+  %nop10914 = alloca i1, i1 0
+  %nop10915 = alloca i1, i1 0
+  %nop10916 = alloca i1, i1 0
+  %nop10917 = alloca i1, i1 0
+  %nop10918 = alloca i1, i1 0
+  %nop10919 = alloca i1, i1 0
+  %nop10920 = alloca i1, i1 0
+  %nop10921 = alloca i1, i1 0
+  %nop10922 = alloca i1, i1 0
+  %nop10923 = alloca i1, i1 0
+  %nop10924 = alloca i1, i1 0
+  %nop10925 = alloca i1, i1 0
+  %nop10926 = alloca i1, i1 0
+  %nop10927 = alloca i1, i1 0
+  %nop10928 = alloca i1, i1 0
+  %nop10929 = alloca i1, i1 0
+  %nop10930 = alloca i1, i1 0
+  %nop10931 = alloca i1, i1 0
+  %nop10932 = alloca i1, i1 0
+  %nop10933 = alloca i1, i1 0
+  %nop10934 = alloca i1, i1 0
+  %nop10935 = alloca i1, i1 0
+  %nop10936 = alloca i1, i1 0
+  %nop10937 = alloca i1, i1 0
+  %nop10938 = alloca i1, i1 0
+  %nop10939 = alloca i1, i1 0
+  %nop10940 = alloca i1, i1 0
+  %nop10941 = alloca i1, i1 0
+  %nop10942 = alloca i1, i1 0
+  %nop10943 = alloca i1, i1 0
+  %nop10944 = alloca i1, i1 0
+  %nop10945 = alloca i1, i1 0
+  %nop10946 = alloca i1, i1 0
+  %nop10947 = alloca i1, i1 0
+  %nop10948 = alloca i1, i1 0
+  %nop10949 = alloca i1, i1 0
+  %nop10950 = alloca i1, i1 0
+  %nop10951 = alloca i1, i1 0
+  %nop10952 = alloca i1, i1 0
+  %nop10953 = alloca i1, i1 0
+  %nop10954 = alloca i1, i1 0
+  %nop10955 = alloca i1, i1 0
+  %nop10956 = alloca i1, i1 0
+  %nop10957 = alloca i1, i1 0
+  %nop10958 = alloca i1, i1 0
+  %nop10959 = alloca i1, i1 0
+  %nop10960 = alloca i1, i1 0
+  %nop10961 = alloca i1, i1 0
+  %nop10962 = alloca i1, i1 0
+  %nop10963 = alloca i1, i1 0
+  %nop10964 = alloca i1, i1 0
+  %nop10965 = alloca i1, i1 0
+  %nop10966 = alloca i1, i1 0
+  %nop10967 = alloca i1, i1 0
+  %nop10968 = alloca i1, i1 0
+  %nop10969 = alloca i1, i1 0
+  %nop10970 = alloca i1, i1 0
+  %nop10971 = alloca i1, i1 0
+  %nop10972 = alloca i1, i1 0
+  %nop10973 = alloca i1, i1 0
+  %nop10974 = alloca i1, i1 0
+  %nop10975 = alloca i1, i1 0
+  %nop10976 = alloca i1, i1 0
+  %nop10977 = alloca i1, i1 0
+  %nop10978 = alloca i1, i1 0
+  %nop10979 = alloca i1, i1 0
+  %nop10980 = alloca i1, i1 0
+  %nop10981 = alloca i1, i1 0
+  %nop10982 = alloca i1, i1 0
+  %nop10983 = alloca i1, i1 0
+  %nop10984 = alloca i1, i1 0
+  %nop10985 = alloca i1, i1 0
+  %nop10986 = alloca i1, i1 0
+  %nop10987 = alloca i1, i1 0
+  %nop10988 = alloca i1, i1 0
+  %nop10989 = alloca i1, i1 0
+  %nop10990 = alloca i1, i1 0
+  %nop10991 = alloca i1, i1 0
+  %nop10992 = alloca i1, i1 0
+  %nop10993 = alloca i1, i1 0
+  %nop10994 = alloca i1, i1 0
+  %nop10995 = alloca i1, i1 0
+  %nop10996 = alloca i1, i1 0
+  %nop10997 = alloca i1, i1 0
+  %nop10998 = alloca i1, i1 0
+  %nop10999 = alloca i1, i1 0
+  %nop11000 = alloca i1, i1 0
+  %nop11001 = alloca i1, i1 0
+  %nop11002 = alloca i1, i1 0
+  %nop11003 = alloca i1, i1 0
+  %nop11004 = alloca i1, i1 0
+  %nop11005 = alloca i1, i1 0
+  %nop11006 = alloca i1, i1 0
+  %nop11007 = alloca i1, i1 0
+  %nop11008 = alloca i1, i1 0
+  %nop11009 = alloca i1, i1 0
+  %nop11010 = alloca i1, i1 0
+  %nop11011 = alloca i1, i1 0
+  %nop11012 = alloca i1, i1 0
+  %nop11013 = alloca i1, i1 0
+  %nop11014 = alloca i1, i1 0
+  %nop11015 = alloca i1, i1 0
+  %nop11016 = alloca i1, i1 0
+  %nop11017 = alloca i1, i1 0
+  %nop11018 = alloca i1, i1 0
+  %nop11019 = alloca i1, i1 0
+  %nop11020 = alloca i1, i1 0
+  %nop11021 = alloca i1, i1 0
+  %nop11022 = alloca i1, i1 0
+  %nop11023 = alloca i1, i1 0
+  %nop11024 = alloca i1, i1 0
+  %nop11025 = alloca i1, i1 0
+  %nop11026 = alloca i1, i1 0
+  %nop11027 = alloca i1, i1 0
+  %nop11028 = alloca i1, i1 0
+  %nop11029 = alloca i1, i1 0
+  %nop11030 = alloca i1, i1 0
+  %nop11031 = alloca i1, i1 0
+  %nop11032 = alloca i1, i1 0
+  %nop11033 = alloca i1, i1 0
+  %nop11034 = alloca i1, i1 0
+  %nop11035 = alloca i1, i1 0
+  %nop11036 = alloca i1, i1 0
+  %nop11037 = alloca i1, i1 0
+  %nop11038 = alloca i1, i1 0
+  %nop11039 = alloca i1, i1 0
+  %nop11040 = alloca i1, i1 0
+  %nop11041 = alloca i1, i1 0
+  %nop11042 = alloca i1, i1 0
+  %nop11043 = alloca i1, i1 0
+  %nop11044 = alloca i1, i1 0
+  %nop11045 = alloca i1, i1 0
+  %nop11046 = alloca i1, i1 0
+  %nop11047 = alloca i1, i1 0
+  %nop11048 = alloca i1, i1 0
+  %nop11049 = alloca i1, i1 0
+  %nop11050 = alloca i1, i1 0
+  %nop11051 = alloca i1, i1 0
+  %nop11052 = alloca i1, i1 0
+  %nop11053 = alloca i1, i1 0
+  %nop11054 = alloca i1, i1 0
+  %nop11055 = alloca i1, i1 0
+  %nop11056 = alloca i1, i1 0
+  %nop11057 = alloca i1, i1 0
+  %nop11058 = alloca i1, i1 0
+  %nop11059 = alloca i1, i1 0
+  %nop11060 = alloca i1, i1 0
+  %nop11061 = alloca i1, i1 0
+  %nop11062 = alloca i1, i1 0
+  %nop11063 = alloca i1, i1 0
+  %nop11064 = alloca i1, i1 0
+  %nop11065 = alloca i1, i1 0
+  %nop11066 = alloca i1, i1 0
+  %nop11067 = alloca i1, i1 0
+  %nop11068 = alloca i1, i1 0
+  %nop11069 = alloca i1, i1 0
+  %nop11070 = alloca i1, i1 0
+  %nop11071 = alloca i1, i1 0
+  %nop11072 = alloca i1, i1 0
+  %nop11073 = alloca i1, i1 0
+  %nop11074 = alloca i1, i1 0
+  %nop11075 = alloca i1, i1 0
+  %nop11076 = alloca i1, i1 0
+  %nop11077 = alloca i1, i1 0
+  %nop11078 = alloca i1, i1 0
+  %nop11079 = alloca i1, i1 0
+  %nop11080 = alloca i1, i1 0
+  %nop11081 = alloca i1, i1 0
+  %nop11082 = alloca i1, i1 0
+  %nop11083 = alloca i1, i1 0
+  %nop11084 = alloca i1, i1 0
+  %nop11085 = alloca i1, i1 0
+  %nop11086 = alloca i1, i1 0
+  %nop11087 = alloca i1, i1 0
+  %nop11088 = alloca i1, i1 0
+  %nop11089 = alloca i1, i1 0
+  %nop11090 = alloca i1, i1 0
+  %nop11091 = alloca i1, i1 0
+  %nop11092 = alloca i1, i1 0
+  %nop11093 = alloca i1, i1 0
+  %nop11094 = alloca i1, i1 0
+  %nop11095 = alloca i1, i1 0
+  %nop11096 = alloca i1, i1 0
+  %nop11097 = alloca i1, i1 0
+  %nop11098 = alloca i1, i1 0
+  %nop11099 = alloca i1, i1 0
+  %nop11100 = alloca i1, i1 0
+  %nop11101 = alloca i1, i1 0
+  %nop11102 = alloca i1, i1 0
+  %nop11103 = alloca i1, i1 0
+  %nop11104 = alloca i1, i1 0
+  %nop11105 = alloca i1, i1 0
+  %nop11106 = alloca i1, i1 0
+  %nop11107 = alloca i1, i1 0
+  %nop11108 = alloca i1, i1 0
+  %nop11109 = alloca i1, i1 0
+  %nop11110 = alloca i1, i1 0
+  %nop11111 = alloca i1, i1 0
+  %nop11112 = alloca i1, i1 0
+  %nop11113 = alloca i1, i1 0
+  %nop11114 = alloca i1, i1 0
+  %nop11115 = alloca i1, i1 0
+  %nop11116 = alloca i1, i1 0
+  %nop11117 = alloca i1, i1 0
+  %nop11118 = alloca i1, i1 0
+  %nop11119 = alloca i1, i1 0
+  %nop11120 = alloca i1, i1 0
+  %nop11121 = alloca i1, i1 0
+  %nop11122 = alloca i1, i1 0
+  %nop11123 = alloca i1, i1 0
+  %nop11124 = alloca i1, i1 0
+  %nop11125 = alloca i1, i1 0
+  %nop11126 = alloca i1, i1 0
+  %nop11127 = alloca i1, i1 0
+  %nop11128 = alloca i1, i1 0
+  %nop11129 = alloca i1, i1 0
+  %nop11130 = alloca i1, i1 0
+  %nop11131 = alloca i1, i1 0
+  %nop11132 = alloca i1, i1 0
+  %nop11133 = alloca i1, i1 0
+  %nop11134 = alloca i1, i1 0
+  %nop11135 = alloca i1, i1 0
+  %nop11136 = alloca i1, i1 0
+  %nop11137 = alloca i1, i1 0
+  %nop11138 = alloca i1, i1 0
+  %nop11139 = alloca i1, i1 0
+  %nop11140 = alloca i1, i1 0
+  %nop11141 = alloca i1, i1 0
+  %nop11142 = alloca i1, i1 0
+  %nop11143 = alloca i1, i1 0
+  %nop11144 = alloca i1, i1 0
+  %nop11145 = alloca i1, i1 0
+  %nop11146 = alloca i1, i1 0
+  %nop11147 = alloca i1, i1 0
+  %nop11148 = alloca i1, i1 0
+  %nop11149 = alloca i1, i1 0
+  %nop11150 = alloca i1, i1 0
+  %nop11151 = alloca i1, i1 0
+  %nop11152 = alloca i1, i1 0
+  %nop11153 = alloca i1, i1 0
+  %nop11154 = alloca i1, i1 0
+  %nop11155 = alloca i1, i1 0
+  %nop11156 = alloca i1, i1 0
+  %nop11157 = alloca i1, i1 0
+  %nop11158 = alloca i1, i1 0
+  %nop11159 = alloca i1, i1 0
+  %nop11160 = alloca i1, i1 0
+  %nop11161 = alloca i1, i1 0
+  %nop11162 = alloca i1, i1 0
+  %nop11163 = alloca i1, i1 0
+  %nop11164 = alloca i1, i1 0
+  %nop11165 = alloca i1, i1 0
+  %nop11166 = alloca i1, i1 0
+  %nop11167 = alloca i1, i1 0
+  %nop11168 = alloca i1, i1 0
+  %nop11169 = alloca i1, i1 0
+  %nop11170 = alloca i1, i1 0
+  %nop11171 = alloca i1, i1 0
+  %nop11172 = alloca i1, i1 0
+  %nop11173 = alloca i1, i1 0
+  %nop11174 = alloca i1, i1 0
+  %nop11175 = alloca i1, i1 0
+  %nop11176 = alloca i1, i1 0
+  %nop11177 = alloca i1, i1 0
+  %nop11178 = alloca i1, i1 0
+  %nop11179 = alloca i1, i1 0
+  %nop11180 = alloca i1, i1 0
+  %nop11181 = alloca i1, i1 0
+  %nop11182 = alloca i1, i1 0
+  %nop11183 = alloca i1, i1 0
+  %nop11184 = alloca i1, i1 0
+  %nop11185 = alloca i1, i1 0
+  %nop11186 = alloca i1, i1 0
+  %nop11187 = alloca i1, i1 0
+  %nop11188 = alloca i1, i1 0
+  %nop11189 = alloca i1, i1 0
+  %nop11190 = alloca i1, i1 0
+  %nop11191 = alloca i1, i1 0
+  %nop11192 = alloca i1, i1 0
+  %nop11193 = alloca i1, i1 0
+  %nop11194 = alloca i1, i1 0
+  %nop11195 = alloca i1, i1 0
+  %nop11196 = alloca i1, i1 0
+  %nop11197 = alloca i1, i1 0
+  %nop11198 = alloca i1, i1 0
+  %nop11199 = alloca i1, i1 0
+  %nop11200 = alloca i1, i1 0
+  %nop11201 = alloca i1, i1 0
+  %nop11202 = alloca i1, i1 0
+  %nop11203 = alloca i1, i1 0
+  %nop11204 = alloca i1, i1 0
+  %nop11205 = alloca i1, i1 0
+  %nop11206 = alloca i1, i1 0
+  %nop11207 = alloca i1, i1 0
+  %nop11208 = alloca i1, i1 0
+  %nop11209 = alloca i1, i1 0
+  %nop11210 = alloca i1, i1 0
+  %nop11211 = alloca i1, i1 0
+  %nop11212 = alloca i1, i1 0
+  %nop11213 = alloca i1, i1 0
+  %nop11214 = alloca i1, i1 0
+  %nop11215 = alloca i1, i1 0
+  %nop11216 = alloca i1, i1 0
+  %nop11217 = alloca i1, i1 0
+  %nop11218 = alloca i1, i1 0
+  %nop11219 = alloca i1, i1 0
+  %nop11220 = alloca i1, i1 0
+  %nop11221 = alloca i1, i1 0
+  %nop11222 = alloca i1, i1 0
+  %nop11223 = alloca i1, i1 0
+  %nop11224 = alloca i1, i1 0
+  %nop11225 = alloca i1, i1 0
+  %nop11226 = alloca i1, i1 0
+  %nop11227 = alloca i1, i1 0
+  %nop11228 = alloca i1, i1 0
+  %nop11229 = alloca i1, i1 0
+  %nop11230 = alloca i1, i1 0
+  %nop11231 = alloca i1, i1 0
+  %nop11232 = alloca i1, i1 0
+  %nop11233 = alloca i1, i1 0
+  %nop11234 = alloca i1, i1 0
+  %nop11235 = alloca i1, i1 0
+  %nop11236 = alloca i1, i1 0
+  %nop11237 = alloca i1, i1 0
+  %nop11238 = alloca i1, i1 0
+  %nop11239 = alloca i1, i1 0
+  %nop11240 = alloca i1, i1 0
+  %nop11241 = alloca i1, i1 0
+  %nop11242 = alloca i1, i1 0
+  %nop11243 = alloca i1, i1 0
+  %nop11244 = alloca i1, i1 0
+  %nop11245 = alloca i1, i1 0
+  %nop11246 = alloca i1, i1 0
+  %nop11247 = alloca i1, i1 0
+  %nop11248 = alloca i1, i1 0
+  %nop11249 = alloca i1, i1 0
+  %nop11250 = alloca i1, i1 0
+  %nop11251 = alloca i1, i1 0
+  %nop11252 = alloca i1, i1 0
+  %nop11253 = alloca i1, i1 0
+  %nop11254 = alloca i1, i1 0
+  %nop11255 = alloca i1, i1 0
+  %nop11256 = alloca i1, i1 0
+  %nop11257 = alloca i1, i1 0
+  %nop11258 = alloca i1, i1 0
+  %nop11259 = alloca i1, i1 0
+  %nop11260 = alloca i1, i1 0
+  %nop11261 = alloca i1, i1 0
+  %nop11262 = alloca i1, i1 0
+  %nop11263 = alloca i1, i1 0
+  %nop11264 = alloca i1, i1 0
+  %nop11265 = alloca i1, i1 0
+  %nop11266 = alloca i1, i1 0
+  %nop11267 = alloca i1, i1 0
+  %nop11268 = alloca i1, i1 0
+  %nop11269 = alloca i1, i1 0
+  %nop11270 = alloca i1, i1 0
+  %nop11271 = alloca i1, i1 0
+  %nop11272 = alloca i1, i1 0
+  %nop11273 = alloca i1, i1 0
+  %nop11274 = alloca i1, i1 0
+  %nop11275 = alloca i1, i1 0
+  %nop11276 = alloca i1, i1 0
+  %nop11277 = alloca i1, i1 0
+  %nop11278 = alloca i1, i1 0
+  %nop11279 = alloca i1, i1 0
+  %nop11280 = alloca i1, i1 0
+  %nop11281 = alloca i1, i1 0
+  %nop11282 = alloca i1, i1 0
+  %nop11283 = alloca i1, i1 0
+  %nop11284 = alloca i1, i1 0
+  %nop11285 = alloca i1, i1 0
+  %nop11286 = alloca i1, i1 0
+  %nop11287 = alloca i1, i1 0
+  %nop11288 = alloca i1, i1 0
+  %nop11289 = alloca i1, i1 0
+  %nop11290 = alloca i1, i1 0
+  %nop11291 = alloca i1, i1 0
+  %nop11292 = alloca i1, i1 0
+  %nop11293 = alloca i1, i1 0
+  %nop11294 = alloca i1, i1 0
+  %nop11295 = alloca i1, i1 0
+  %nop11296 = alloca i1, i1 0
+  %nop11297 = alloca i1, i1 0
+  %nop11298 = alloca i1, i1 0
+  %nop11299 = alloca i1, i1 0
+  %nop11300 = alloca i1, i1 0
+  %nop11301 = alloca i1, i1 0
+  %nop11302 = alloca i1, i1 0
+  %nop11303 = alloca i1, i1 0
+  %nop11304 = alloca i1, i1 0
+  %nop11305 = alloca i1, i1 0
+  %nop11306 = alloca i1, i1 0
+  %nop11307 = alloca i1, i1 0
+  %nop11308 = alloca i1, i1 0
+  %nop11309 = alloca i1, i1 0
+  %nop11310 = alloca i1, i1 0
+  %nop11311 = alloca i1, i1 0
+  %nop11312 = alloca i1, i1 0
+  %nop11313 = alloca i1, i1 0
+  %nop11314 = alloca i1, i1 0
+  %nop11315 = alloca i1, i1 0
+  %nop11316 = alloca i1, i1 0
+  %nop11317 = alloca i1, i1 0
+  %nop11318 = alloca i1, i1 0
+  %nop11319 = alloca i1, i1 0
+  %nop11320 = alloca i1, i1 0
+  %nop11321 = alloca i1, i1 0
+  %nop11322 = alloca i1, i1 0
+  %nop11323 = alloca i1, i1 0
+  %nop11324 = alloca i1, i1 0
+  %nop11325 = alloca i1, i1 0
+  %nop11326 = alloca i1, i1 0
+  %nop11327 = alloca i1, i1 0
+  %nop11328 = alloca i1, i1 0
+  %nop11329 = alloca i1, i1 0
+  %nop11330 = alloca i1, i1 0
+  %nop11331 = alloca i1, i1 0
+  %nop11332 = alloca i1, i1 0
+  %nop11333 = alloca i1, i1 0
+  %nop11334 = alloca i1, i1 0
+  %nop11335 = alloca i1, i1 0
+  %nop11336 = alloca i1, i1 0
+  %nop11337 = alloca i1, i1 0
+  %nop11338 = alloca i1, i1 0
+  %nop11339 = alloca i1, i1 0
+  %nop11340 = alloca i1, i1 0
+  %nop11341 = alloca i1, i1 0
+  %nop11342 = alloca i1, i1 0
+  %nop11343 = alloca i1, i1 0
+  %nop11344 = alloca i1, i1 0
+  %nop11345 = alloca i1, i1 0
+  %nop11346 = alloca i1, i1 0
+  %nop11347 = alloca i1, i1 0
+  %nop11348 = alloca i1, i1 0
+  %nop11349 = alloca i1, i1 0
+  %nop11350 = alloca i1, i1 0
+  %nop11351 = alloca i1, i1 0
+  %nop11352 = alloca i1, i1 0
+  %nop11353 = alloca i1, i1 0
+  %nop11354 = alloca i1, i1 0
+  %nop11355 = alloca i1, i1 0
+  %nop11356 = alloca i1, i1 0
+  %nop11357 = alloca i1, i1 0
+  %nop11358 = alloca i1, i1 0
+  %nop11359 = alloca i1, i1 0
+  %nop11360 = alloca i1, i1 0
+  %nop11361 = alloca i1, i1 0
+  %nop11362 = alloca i1, i1 0
+  %nop11363 = alloca i1, i1 0
+  %nop11364 = alloca i1, i1 0
+  %nop11365 = alloca i1, i1 0
+  %nop11366 = alloca i1, i1 0
+  %nop11367 = alloca i1, i1 0
+  %nop11368 = alloca i1, i1 0
+  %nop11369 = alloca i1, i1 0
+  %nop11370 = alloca i1, i1 0
+  %nop11371 = alloca i1, i1 0
+  %nop11372 = alloca i1, i1 0
+  %nop11373 = alloca i1, i1 0
+  %nop11374 = alloca i1, i1 0
+  %nop11375 = alloca i1, i1 0
+  %nop11376 = alloca i1, i1 0
+  %nop11377 = alloca i1, i1 0
+  %nop11378 = alloca i1, i1 0
+  %nop11379 = alloca i1, i1 0
+  %nop11380 = alloca i1, i1 0
+  %nop11381 = alloca i1, i1 0
+  %nop11382 = alloca i1, i1 0
+  %nop11383 = alloca i1, i1 0
+  %nop11384 = alloca i1, i1 0
+  %nop11385 = alloca i1, i1 0
+  %nop11386 = alloca i1, i1 0
+  %nop11387 = alloca i1, i1 0
+  %nop11388 = alloca i1, i1 0
+  %nop11389 = alloca i1, i1 0
+  %nop11390 = alloca i1, i1 0
+  %nop11391 = alloca i1, i1 0
+  %nop11392 = alloca i1, i1 0
+  %nop11393 = alloca i1, i1 0
+  %nop11394 = alloca i1, i1 0
+  %nop11395 = alloca i1, i1 0
+  %nop11396 = alloca i1, i1 0
+  %nop11397 = alloca i1, i1 0
+  %nop11398 = alloca i1, i1 0
+  %nop11399 = alloca i1, i1 0
+  %nop11400 = alloca i1, i1 0
+  %nop11401 = alloca i1, i1 0
+  %nop11402 = alloca i1, i1 0
+  %nop11403 = alloca i1, i1 0
+  %nop11404 = alloca i1, i1 0
+  %nop11405 = alloca i1, i1 0
+  %nop11406 = alloca i1, i1 0
+  %nop11407 = alloca i1, i1 0
+  %nop11408 = alloca i1, i1 0
+  %nop11409 = alloca i1, i1 0
+  %nop11410 = alloca i1, i1 0
+  %nop11411 = alloca i1, i1 0
+  %nop11412 = alloca i1, i1 0
+  %nop11413 = alloca i1, i1 0
+  %nop11414 = alloca i1, i1 0
+  %nop11415 = alloca i1, i1 0
+  %nop11416 = alloca i1, i1 0
+  %nop11417 = alloca i1, i1 0
+  %nop11418 = alloca i1, i1 0
+  %nop11419 = alloca i1, i1 0
+  %nop11420 = alloca i1, i1 0
+  %nop11421 = alloca i1, i1 0
+  %nop11422 = alloca i1, i1 0
+  %nop11423 = alloca i1, i1 0
+  %nop11424 = alloca i1, i1 0
+  %nop11425 = alloca i1, i1 0
+  %nop11426 = alloca i1, i1 0
+  %nop11427 = alloca i1, i1 0
+  %nop11428 = alloca i1, i1 0
+  %nop11429 = alloca i1, i1 0
+  %nop11430 = alloca i1, i1 0
+  %nop11431 = alloca i1, i1 0
+  %nop11432 = alloca i1, i1 0
+  %nop11433 = alloca i1, i1 0
+  %nop11434 = alloca i1, i1 0
+  %nop11435 = alloca i1, i1 0
+  %nop11436 = alloca i1, i1 0
+  %nop11437 = alloca i1, i1 0
+  %nop11438 = alloca i1, i1 0
+  %nop11439 = alloca i1, i1 0
+  %nop11440 = alloca i1, i1 0
+  %nop11441 = alloca i1, i1 0
+  %nop11442 = alloca i1, i1 0
+  %nop11443 = alloca i1, i1 0
+  %nop11444 = alloca i1, i1 0
+  %nop11445 = alloca i1, i1 0
+  %nop11446 = alloca i1, i1 0
+  %nop11447 = alloca i1, i1 0
+  %nop11448 = alloca i1, i1 0
+  %nop11449 = alloca i1, i1 0
+  %nop11450 = alloca i1, i1 0
+  %nop11451 = alloca i1, i1 0
+  %nop11452 = alloca i1, i1 0
+  %nop11453 = alloca i1, i1 0
+  %nop11454 = alloca i1, i1 0
+  %nop11455 = alloca i1, i1 0
+  %nop11456 = alloca i1, i1 0
+  %nop11457 = alloca i1, i1 0
+  %nop11458 = alloca i1, i1 0
+  %nop11459 = alloca i1, i1 0
+  %nop11460 = alloca i1, i1 0
+  %nop11461 = alloca i1, i1 0
+  %nop11462 = alloca i1, i1 0
+  %nop11463 = alloca i1, i1 0
+  %nop11464 = alloca i1, i1 0
+  %nop11465 = alloca i1, i1 0
+  %nop11466 = alloca i1, i1 0
+  %nop11467 = alloca i1, i1 0
+  %nop11468 = alloca i1, i1 0
+  %nop11469 = alloca i1, i1 0
+  %nop11470 = alloca i1, i1 0
+  %nop11471 = alloca i1, i1 0
+  %nop11472 = alloca i1, i1 0
+  %nop11473 = alloca i1, i1 0
+  %nop11474 = alloca i1, i1 0
+  %nop11475 = alloca i1, i1 0
+  %nop11476 = alloca i1, i1 0
+  %nop11477 = alloca i1, i1 0
+  %nop11478 = alloca i1, i1 0
+  %nop11479 = alloca i1, i1 0
+  %nop11480 = alloca i1, i1 0
+  %nop11481 = alloca i1, i1 0
+  %nop11482 = alloca i1, i1 0
+  %nop11483 = alloca i1, i1 0
+  %nop11484 = alloca i1, i1 0
+  %nop11485 = alloca i1, i1 0
+  %nop11486 = alloca i1, i1 0
+  %nop11487 = alloca i1, i1 0
+  %nop11488 = alloca i1, i1 0
+  %nop11489 = alloca i1, i1 0
+  %nop11490 = alloca i1, i1 0
+  %nop11491 = alloca i1, i1 0
+  %nop11492 = alloca i1, i1 0
+  %nop11493 = alloca i1, i1 0
+  %nop11494 = alloca i1, i1 0
+  %nop11495 = alloca i1, i1 0
+  %nop11496 = alloca i1, i1 0
+  %nop11497 = alloca i1, i1 0
+  %nop11498 = alloca i1, i1 0
+  %nop11499 = alloca i1, i1 0
+  %nop11500 = alloca i1, i1 0
+  %nop11501 = alloca i1, i1 0
+  %nop11502 = alloca i1, i1 0
+  %nop11503 = alloca i1, i1 0
+  %nop11504 = alloca i1, i1 0
+  %nop11505 = alloca i1, i1 0
+  %nop11506 = alloca i1, i1 0
+  %nop11507 = alloca i1, i1 0
+  %nop11508 = alloca i1, i1 0
+  %nop11509 = alloca i1, i1 0
+  %nop11510 = alloca i1, i1 0
+  %nop11511 = alloca i1, i1 0
+  %nop11512 = alloca i1, i1 0
+  %nop11513 = alloca i1, i1 0
+  %nop11514 = alloca i1, i1 0
+  %nop11515 = alloca i1, i1 0
+  %nop11516 = alloca i1, i1 0
+  %nop11517 = alloca i1, i1 0
+  %nop11518 = alloca i1, i1 0
+  %nop11519 = alloca i1, i1 0
+  %nop11520 = alloca i1, i1 0
+  %nop11521 = alloca i1, i1 0
+  %nop11522 = alloca i1, i1 0
+  %nop11523 = alloca i1, i1 0
+  %nop11524 = alloca i1, i1 0
+  %nop11525 = alloca i1, i1 0
+  %nop11526 = alloca i1, i1 0
+  %nop11527 = alloca i1, i1 0
+  %nop11528 = alloca i1, i1 0
+  %nop11529 = alloca i1, i1 0
+  %nop11530 = alloca i1, i1 0
+  %nop11531 = alloca i1, i1 0
+  %nop11532 = alloca i1, i1 0
+  %nop11533 = alloca i1, i1 0
+  %nop11534 = alloca i1, i1 0
+  %nop11535 = alloca i1, i1 0
+  %nop11536 = alloca i1, i1 0
+  %nop11537 = alloca i1, i1 0
+  %nop11538 = alloca i1, i1 0
+  %nop11539 = alloca i1, i1 0
+  %nop11540 = alloca i1, i1 0
+  %nop11541 = alloca i1, i1 0
+  %nop11542 = alloca i1, i1 0
+  %nop11543 = alloca i1, i1 0
+  %nop11544 = alloca i1, i1 0
+  %nop11545 = alloca i1, i1 0
+  %nop11546 = alloca i1, i1 0
+  %nop11547 = alloca i1, i1 0
+  %nop11548 = alloca i1, i1 0
+  %nop11549 = alloca i1, i1 0
+  %nop11550 = alloca i1, i1 0
+  %nop11551 = alloca i1, i1 0
+  %nop11552 = alloca i1, i1 0
+  %nop11553 = alloca i1, i1 0
+  %nop11554 = alloca i1, i1 0
+  %nop11555 = alloca i1, i1 0
+  %nop11556 = alloca i1, i1 0
+  %nop11557 = alloca i1, i1 0
+  %nop11558 = alloca i1, i1 0
+  %nop11559 = alloca i1, i1 0
+  %nop11560 = alloca i1, i1 0
+  %nop11561 = alloca i1, i1 0
+  %nop11562 = alloca i1, i1 0
+  %nop11563 = alloca i1, i1 0
+  %nop11564 = alloca i1, i1 0
+  %nop11565 = alloca i1, i1 0
+  %nop11566 = alloca i1, i1 0
+  %nop11567 = alloca i1, i1 0
+  %nop11568 = alloca i1, i1 0
+  %nop11569 = alloca i1, i1 0
+  %nop11570 = alloca i1, i1 0
+  %nop11571 = alloca i1, i1 0
+  %nop11572 = alloca i1, i1 0
+  %nop11573 = alloca i1, i1 0
+  %nop11574 = alloca i1, i1 0
+  %nop11575 = alloca i1, i1 0
+  %nop11576 = alloca i1, i1 0
+  %nop11577 = alloca i1, i1 0
+  %nop11578 = alloca i1, i1 0
+  %nop11579 = alloca i1, i1 0
+  %nop11580 = alloca i1, i1 0
+  %nop11581 = alloca i1, i1 0
+  %nop11582 = alloca i1, i1 0
+  %nop11583 = alloca i1, i1 0
+  %nop11584 = alloca i1, i1 0
+  %nop11585 = alloca i1, i1 0
+  %nop11586 = alloca i1, i1 0
+  %nop11587 = alloca i1, i1 0
+  %nop11588 = alloca i1, i1 0
+  %nop11589 = alloca i1, i1 0
+  %nop11590 = alloca i1, i1 0
+  %nop11591 = alloca i1, i1 0
+  %nop11592 = alloca i1, i1 0
+  %nop11593 = alloca i1, i1 0
+  %nop11594 = alloca i1, i1 0
+  %nop11595 = alloca i1, i1 0
+  %nop11596 = alloca i1, i1 0
+  %nop11597 = alloca i1, i1 0
+  %nop11598 = alloca i1, i1 0
+  %nop11599 = alloca i1, i1 0
+  %nop11600 = alloca i1, i1 0
+  %nop11601 = alloca i1, i1 0
+  %nop11602 = alloca i1, i1 0
+  %nop11603 = alloca i1, i1 0
+  %nop11604 = alloca i1, i1 0
+  %nop11605 = alloca i1, i1 0
+  %nop11606 = alloca i1, i1 0
+  %nop11607 = alloca i1, i1 0
+  %nop11608 = alloca i1, i1 0
+  %nop11609 = alloca i1, i1 0
+  %nop11610 = alloca i1, i1 0
+  %nop11611 = alloca i1, i1 0
+  %nop11612 = alloca i1, i1 0
+  %nop11613 = alloca i1, i1 0
+  %nop11614 = alloca i1, i1 0
+  %nop11615 = alloca i1, i1 0
+  %nop11616 = alloca i1, i1 0
+  %nop11617 = alloca i1, i1 0
+  %nop11618 = alloca i1, i1 0
+  %nop11619 = alloca i1, i1 0
+  %nop11620 = alloca i1, i1 0
+  %nop11621 = alloca i1, i1 0
+  %nop11622 = alloca i1, i1 0
+  %nop11623 = alloca i1, i1 0
+  %nop11624 = alloca i1, i1 0
+  %nop11625 = alloca i1, i1 0
+  %nop11626 = alloca i1, i1 0
+  %nop11627 = alloca i1, i1 0
+  %nop11628 = alloca i1, i1 0
+  %nop11629 = alloca i1, i1 0
+  %nop11630 = alloca i1, i1 0
+  %nop11631 = alloca i1, i1 0
+  %nop11632 = alloca i1, i1 0
+  %nop11633 = alloca i1, i1 0
+  %nop11634 = alloca i1, i1 0
+  %nop11635 = alloca i1, i1 0
+  %nop11636 = alloca i1, i1 0
+  %nop11637 = alloca i1, i1 0
+  %nop11638 = alloca i1, i1 0
+  %nop11639 = alloca i1, i1 0
+  %nop11640 = alloca i1, i1 0
+  %nop11641 = alloca i1, i1 0
+  %nop11642 = alloca i1, i1 0
+  %nop11643 = alloca i1, i1 0
+  %nop11644 = alloca i1, i1 0
+  %nop11645 = alloca i1, i1 0
+  %nop11646 = alloca i1, i1 0
+  %nop11647 = alloca i1, i1 0
+  %nop11648 = alloca i1, i1 0
+  %nop11649 = alloca i1, i1 0
+  %nop11650 = alloca i1, i1 0
+  %nop11651 = alloca i1, i1 0
+  %nop11652 = alloca i1, i1 0
+  %nop11653 = alloca i1, i1 0
+  %nop11654 = alloca i1, i1 0
+  %nop11655 = alloca i1, i1 0
+  %nop11656 = alloca i1, i1 0
+  %nop11657 = alloca i1, i1 0
+  %nop11658 = alloca i1, i1 0
+  %nop11659 = alloca i1, i1 0
+  %nop11660 = alloca i1, i1 0
+  %nop11661 = alloca i1, i1 0
+  %nop11662 = alloca i1, i1 0
+  %nop11663 = alloca i1, i1 0
+  %nop11664 = alloca i1, i1 0
+  %nop11665 = alloca i1, i1 0
+  %nop11666 = alloca i1, i1 0
+  %nop11667 = alloca i1, i1 0
+  %nop11668 = alloca i1, i1 0
+  %nop11669 = alloca i1, i1 0
+  %nop11670 = alloca i1, i1 0
+  %nop11671 = alloca i1, i1 0
+  %nop11672 = alloca i1, i1 0
+  %nop11673 = alloca i1, i1 0
+  %nop11674 = alloca i1, i1 0
+  %nop11675 = alloca i1, i1 0
+  %nop11676 = alloca i1, i1 0
+  %nop11677 = alloca i1, i1 0
+  %nop11678 = alloca i1, i1 0
+  %nop11679 = alloca i1, i1 0
+  %nop11680 = alloca i1, i1 0
+  %nop11681 = alloca i1, i1 0
+  %nop11682 = alloca i1, i1 0
+  %nop11683 = alloca i1, i1 0
+  %nop11684 = alloca i1, i1 0
+  %nop11685 = alloca i1, i1 0
+  %nop11686 = alloca i1, i1 0
+  %nop11687 = alloca i1, i1 0
+  %nop11688 = alloca i1, i1 0
+  %nop11689 = alloca i1, i1 0
+  %nop11690 = alloca i1, i1 0
+  %nop11691 = alloca i1, i1 0
+  %nop11692 = alloca i1, i1 0
+  %nop11693 = alloca i1, i1 0
+  %nop11694 = alloca i1, i1 0
+  %nop11695 = alloca i1, i1 0
+  %nop11696 = alloca i1, i1 0
+  %nop11697 = alloca i1, i1 0
+  %nop11698 = alloca i1, i1 0
+  %nop11699 = alloca i1, i1 0
+  %nop11700 = alloca i1, i1 0
+  %nop11701 = alloca i1, i1 0
+  %nop11702 = alloca i1, i1 0
+  %nop11703 = alloca i1, i1 0
+  %nop11704 = alloca i1, i1 0
+  %nop11705 = alloca i1, i1 0
+  %nop11706 = alloca i1, i1 0
+  %nop11707 = alloca i1, i1 0
+  %nop11708 = alloca i1, i1 0
+  %nop11709 = alloca i1, i1 0
+  %nop11710 = alloca i1, i1 0
+  %nop11711 = alloca i1, i1 0
+  %nop11712 = alloca i1, i1 0
+  %nop11713 = alloca i1, i1 0
+  %nop11714 = alloca i1, i1 0
+  %nop11715 = alloca i1, i1 0
+  %nop11716 = alloca i1, i1 0
+  %nop11717 = alloca i1, i1 0
+  %nop11718 = alloca i1, i1 0
+  %nop11719 = alloca i1, i1 0
+  %nop11720 = alloca i1, i1 0
+  %nop11721 = alloca i1, i1 0
+  %nop11722 = alloca i1, i1 0
+  %nop11723 = alloca i1, i1 0
+  %nop11724 = alloca i1, i1 0
+  %nop11725 = alloca i1, i1 0
+  %nop11726 = alloca i1, i1 0
+  %nop11727 = alloca i1, i1 0
+  %nop11728 = alloca i1, i1 0
+  %nop11729 = alloca i1, i1 0
+  %nop11730 = alloca i1, i1 0
+  %nop11731 = alloca i1, i1 0
+  %nop11732 = alloca i1, i1 0
+  %nop11733 = alloca i1, i1 0
+  %nop11734 = alloca i1, i1 0
+  %nop11735 = alloca i1, i1 0
+  %nop11736 = alloca i1, i1 0
+  %nop11737 = alloca i1, i1 0
+  %nop11738 = alloca i1, i1 0
+  %nop11739 = alloca i1, i1 0
+  %nop11740 = alloca i1, i1 0
+  %nop11741 = alloca i1, i1 0
+  %nop11742 = alloca i1, i1 0
+  %nop11743 = alloca i1, i1 0
+  %nop11744 = alloca i1, i1 0
+  %nop11745 = alloca i1, i1 0
+  %nop11746 = alloca i1, i1 0
+  %nop11747 = alloca i1, i1 0
+  %nop11748 = alloca i1, i1 0
+  %nop11749 = alloca i1, i1 0
+  %nop11750 = alloca i1, i1 0
+  %nop11751 = alloca i1, i1 0
+  %nop11752 = alloca i1, i1 0
+  %nop11753 = alloca i1, i1 0
+  %nop11754 = alloca i1, i1 0
+  %nop11755 = alloca i1, i1 0
+  %nop11756 = alloca i1, i1 0
+  %nop11757 = alloca i1, i1 0
+  %nop11758 = alloca i1, i1 0
+  %nop11759 = alloca i1, i1 0
+  %nop11760 = alloca i1, i1 0
+  %nop11761 = alloca i1, i1 0
+  %nop11762 = alloca i1, i1 0
+  %nop11763 = alloca i1, i1 0
+  %nop11764 = alloca i1, i1 0
+  %nop11765 = alloca i1, i1 0
+  %nop11766 = alloca i1, i1 0
+  %nop11767 = alloca i1, i1 0
+  %nop11768 = alloca i1, i1 0
+  %nop11769 = alloca i1, i1 0
+  %nop11770 = alloca i1, i1 0
+  %nop11771 = alloca i1, i1 0
+  %nop11772 = alloca i1, i1 0
+  %nop11773 = alloca i1, i1 0
+  %nop11774 = alloca i1, i1 0
+  %nop11775 = alloca i1, i1 0
+  %nop11776 = alloca i1, i1 0
+  %nop11777 = alloca i1, i1 0
+  %nop11778 = alloca i1, i1 0
+  %nop11779 = alloca i1, i1 0
+  %nop11780 = alloca i1, i1 0
+  %nop11781 = alloca i1, i1 0
+  %nop11782 = alloca i1, i1 0
+  %nop11783 = alloca i1, i1 0
+  %nop11784 = alloca i1, i1 0
+  %nop11785 = alloca i1, i1 0
+  %nop11786 = alloca i1, i1 0
+  %nop11787 = alloca i1, i1 0
+  %nop11788 = alloca i1, i1 0
+  %nop11789 = alloca i1, i1 0
+  %nop11790 = alloca i1, i1 0
+  %nop11791 = alloca i1, i1 0
+  %nop11792 = alloca i1, i1 0
+  %nop11793 = alloca i1, i1 0
+  %nop11794 = alloca i1, i1 0
+  %nop11795 = alloca i1, i1 0
+  %nop11796 = alloca i1, i1 0
+  %nop11797 = alloca i1, i1 0
+  %nop11798 = alloca i1, i1 0
+  %nop11799 = alloca i1, i1 0
+  %nop11800 = alloca i1, i1 0
+  %nop11801 = alloca i1, i1 0
+  %nop11802 = alloca i1, i1 0
+  %nop11803 = alloca i1, i1 0
+  %nop11804 = alloca i1, i1 0
+  %nop11805 = alloca i1, i1 0
+  %nop11806 = alloca i1, i1 0
+  %nop11807 = alloca i1, i1 0
+  %nop11808 = alloca i1, i1 0
+  %nop11809 = alloca i1, i1 0
+  %nop11810 = alloca i1, i1 0
+  %nop11811 = alloca i1, i1 0
+  %nop11812 = alloca i1, i1 0
+  %nop11813 = alloca i1, i1 0
+  %nop11814 = alloca i1, i1 0
+  %nop11815 = alloca i1, i1 0
+  %nop11816 = alloca i1, i1 0
+  %nop11817 = alloca i1, i1 0
+  %nop11818 = alloca i1, i1 0
+  %nop11819 = alloca i1, i1 0
+  %nop11820 = alloca i1, i1 0
+  %nop11821 = alloca i1, i1 0
+  %nop11822 = alloca i1, i1 0
+  %nop11823 = alloca i1, i1 0
+  %nop11824 = alloca i1, i1 0
+  %nop11825 = alloca i1, i1 0
+  %nop11826 = alloca i1, i1 0
+  %nop11827 = alloca i1, i1 0
+  %nop11828 = alloca i1, i1 0
+  %nop11829 = alloca i1, i1 0
+  %nop11830 = alloca i1, i1 0
+  %nop11831 = alloca i1, i1 0
+  %nop11832 = alloca i1, i1 0
+  %nop11833 = alloca i1, i1 0
+  %nop11834 = alloca i1, i1 0
+  %nop11835 = alloca i1, i1 0
+  %nop11836 = alloca i1, i1 0
+  %nop11837 = alloca i1, i1 0
+  %nop11838 = alloca i1, i1 0
+  %nop11839 = alloca i1, i1 0
+  %nop11840 = alloca i1, i1 0
+  %nop11841 = alloca i1, i1 0
+  %nop11842 = alloca i1, i1 0
+  %nop11843 = alloca i1, i1 0
+  %nop11844 = alloca i1, i1 0
+  %nop11845 = alloca i1, i1 0
+  %nop11846 = alloca i1, i1 0
+  %nop11847 = alloca i1, i1 0
+  %nop11848 = alloca i1, i1 0
+  %nop11849 = alloca i1, i1 0
+  %nop11850 = alloca i1, i1 0
+  %nop11851 = alloca i1, i1 0
+  %nop11852 = alloca i1, i1 0
+  %nop11853 = alloca i1, i1 0
+  %nop11854 = alloca i1, i1 0
+  %nop11855 = alloca i1, i1 0
+  %nop11856 = alloca i1, i1 0
+  %nop11857 = alloca i1, i1 0
+  %nop11858 = alloca i1, i1 0
+  %nop11859 = alloca i1, i1 0
+  %nop11860 = alloca i1, i1 0
+  %nop11861 = alloca i1, i1 0
+  %nop11862 = alloca i1, i1 0
+  %nop11863 = alloca i1, i1 0
+  %nop11864 = alloca i1, i1 0
+  %nop11865 = alloca i1, i1 0
+  %nop11866 = alloca i1, i1 0
+  %nop11867 = alloca i1, i1 0
+  %nop11868 = alloca i1, i1 0
+  %nop11869 = alloca i1, i1 0
+  %nop11870 = alloca i1, i1 0
+  %nop11871 = alloca i1, i1 0
+  %nop11872 = alloca i1, i1 0
+  %nop11873 = alloca i1, i1 0
+  %nop11874 = alloca i1, i1 0
+  %nop11875 = alloca i1, i1 0
+  %nop11876 = alloca i1, i1 0
+  %nop11877 = alloca i1, i1 0
+  %nop11878 = alloca i1, i1 0
+  %nop11879 = alloca i1, i1 0
+  %nop11880 = alloca i1, i1 0
+  %nop11881 = alloca i1, i1 0
+  %nop11882 = alloca i1, i1 0
+  %nop11883 = alloca i1, i1 0
+  %nop11884 = alloca i1, i1 0
+  %nop11885 = alloca i1, i1 0
+  %nop11886 = alloca i1, i1 0
+  %nop11887 = alloca i1, i1 0
+  %nop11888 = alloca i1, i1 0
+  %nop11889 = alloca i1, i1 0
+  %nop11890 = alloca i1, i1 0
+  %nop11891 = alloca i1, i1 0
+  %nop11892 = alloca i1, i1 0
+  %nop11893 = alloca i1, i1 0
+  %nop11894 = alloca i1, i1 0
+  %nop11895 = alloca i1, i1 0
+  %nop11896 = alloca i1, i1 0
+  %nop11897 = alloca i1, i1 0
+  %nop11898 = alloca i1, i1 0
+  %nop11899 = alloca i1, i1 0
+  %nop11900 = alloca i1, i1 0
+  %nop11901 = alloca i1, i1 0
+  %nop11902 = alloca i1, i1 0
+  %nop11903 = alloca i1, i1 0
+  %nop11904 = alloca i1, i1 0
+  %nop11905 = alloca i1, i1 0
+  %nop11906 = alloca i1, i1 0
+  %nop11907 = alloca i1, i1 0
+  %nop11908 = alloca i1, i1 0
+  %nop11909 = alloca i1, i1 0
+  %nop11910 = alloca i1, i1 0
+  %nop11911 = alloca i1, i1 0
+  %nop11912 = alloca i1, i1 0
+  %nop11913 = alloca i1, i1 0
+  %nop11914 = alloca i1, i1 0
+  %nop11915 = alloca i1, i1 0
+  %nop11916 = alloca i1, i1 0
+  %nop11917 = alloca i1, i1 0
+  %nop11918 = alloca i1, i1 0
+  %nop11919 = alloca i1, i1 0
+  %nop11920 = alloca i1, i1 0
+  %nop11921 = alloca i1, i1 0
+  %nop11922 = alloca i1, i1 0
+  %nop11923 = alloca i1, i1 0
+  %nop11924 = alloca i1, i1 0
+  %nop11925 = alloca i1, i1 0
+  %nop11926 = alloca i1, i1 0
+  %nop11927 = alloca i1, i1 0
+  %nop11928 = alloca i1, i1 0
+  %nop11929 = alloca i1, i1 0
+  %nop11930 = alloca i1, i1 0
+  %nop11931 = alloca i1, i1 0
+  %nop11932 = alloca i1, i1 0
+  %nop11933 = alloca i1, i1 0
+  %nop11934 = alloca i1, i1 0
+  %nop11935 = alloca i1, i1 0
+  %nop11936 = alloca i1, i1 0
+  %nop11937 = alloca i1, i1 0
+  %nop11938 = alloca i1, i1 0
+  %nop11939 = alloca i1, i1 0
+  %nop11940 = alloca i1, i1 0
+  %nop11941 = alloca i1, i1 0
+  %nop11942 = alloca i1, i1 0
+  %nop11943 = alloca i1, i1 0
+  %nop11944 = alloca i1, i1 0
+  %nop11945 = alloca i1, i1 0
+  %nop11946 = alloca i1, i1 0
+  %nop11947 = alloca i1, i1 0
+  %nop11948 = alloca i1, i1 0
+  %nop11949 = alloca i1, i1 0
+  %nop11950 = alloca i1, i1 0
+  %nop11951 = alloca i1, i1 0
+  %nop11952 = alloca i1, i1 0
+  %nop11953 = alloca i1, i1 0
+  %nop11954 = alloca i1, i1 0
+  %nop11955 = alloca i1, i1 0
+  %nop11956 = alloca i1, i1 0
+  %nop11957 = alloca i1, i1 0
+  %nop11958 = alloca i1, i1 0
+  %nop11959 = alloca i1, i1 0
+  %nop11960 = alloca i1, i1 0
+  %nop11961 = alloca i1, i1 0
+  %nop11962 = alloca i1, i1 0
+  %nop11963 = alloca i1, i1 0
+  %nop11964 = alloca i1, i1 0
+  %nop11965 = alloca i1, i1 0
+  %nop11966 = alloca i1, i1 0
+  %nop11967 = alloca i1, i1 0
+  %nop11968 = alloca i1, i1 0
+  %nop11969 = alloca i1, i1 0
+  %nop11970 = alloca i1, i1 0
+  %nop11971 = alloca i1, i1 0
+  %nop11972 = alloca i1, i1 0
+  %nop11973 = alloca i1, i1 0
+  %nop11974 = alloca i1, i1 0
+  %nop11975 = alloca i1, i1 0
+  %nop11976 = alloca i1, i1 0
+  %nop11977 = alloca i1, i1 0
+  %nop11978 = alloca i1, i1 0
+  %nop11979 = alloca i1, i1 0
+  %nop11980 = alloca i1, i1 0
+  %nop11981 = alloca i1, i1 0
+  %nop11982 = alloca i1, i1 0
+  %nop11983 = alloca i1, i1 0
+  %nop11984 = alloca i1, i1 0
+  %nop11985 = alloca i1, i1 0
+  %nop11986 = alloca i1, i1 0
+  %nop11987 = alloca i1, i1 0
+  %nop11988 = alloca i1, i1 0
+  %nop11989 = alloca i1, i1 0
+  %nop11990 = alloca i1, i1 0
+  %nop11991 = alloca i1, i1 0
+  %nop11992 = alloca i1, i1 0
+  %nop11993 = alloca i1, i1 0
+  %nop11994 = alloca i1, i1 0
+  %nop11995 = alloca i1, i1 0
+  %nop11996 = alloca i1, i1 0
+  %nop11997 = alloca i1, i1 0
+  %nop11998 = alloca i1, i1 0
+  %nop11999 = alloca i1, i1 0
+  %nop12000 = alloca i1, i1 0
+  %nop12001 = alloca i1, i1 0
+  %nop12002 = alloca i1, i1 0
+  %nop12003 = alloca i1, i1 0
+  %nop12004 = alloca i1, i1 0
+  %nop12005 = alloca i1, i1 0
+  %nop12006 = alloca i1, i1 0
+  %nop12007 = alloca i1, i1 0
+  %nop12008 = alloca i1, i1 0
+  %nop12009 = alloca i1, i1 0
+  %nop12010 = alloca i1, i1 0
+  %nop12011 = alloca i1, i1 0
+  %nop12012 = alloca i1, i1 0
+  %nop12013 = alloca i1, i1 0
+  %nop12014 = alloca i1, i1 0
+  %nop12015 = alloca i1, i1 0
+  %nop12016 = alloca i1, i1 0
+  %nop12017 = alloca i1, i1 0
+  %nop12018 = alloca i1, i1 0
+  %nop12019 = alloca i1, i1 0
+  %nop12020 = alloca i1, i1 0
+  %nop12021 = alloca i1, i1 0
+  %nop12022 = alloca i1, i1 0
+  %nop12023 = alloca i1, i1 0
+  %nop12024 = alloca i1, i1 0
+  %nop12025 = alloca i1, i1 0
+  %nop12026 = alloca i1, i1 0
+  %nop12027 = alloca i1, i1 0
+  %nop12028 = alloca i1, i1 0
+  %nop12029 = alloca i1, i1 0
+  %nop12030 = alloca i1, i1 0
+  %nop12031 = alloca i1, i1 0
+  %nop12032 = alloca i1, i1 0
+  %nop12033 = alloca i1, i1 0
+  %nop12034 = alloca i1, i1 0
+  %nop12035 = alloca i1, i1 0
+  %nop12036 = alloca i1, i1 0
+  %nop12037 = alloca i1, i1 0
+  %nop12038 = alloca i1, i1 0
+  %nop12039 = alloca i1, i1 0
+  %nop12040 = alloca i1, i1 0
+  %nop12041 = alloca i1, i1 0
+  %nop12042 = alloca i1, i1 0
+  %nop12043 = alloca i1, i1 0
+  %nop12044 = alloca i1, i1 0
+  %nop12045 = alloca i1, i1 0
+  %nop12046 = alloca i1, i1 0
+  %nop12047 = alloca i1, i1 0
+  %nop12048 = alloca i1, i1 0
+  %nop12049 = alloca i1, i1 0
+  %nop12050 = alloca i1, i1 0
+  %nop12051 = alloca i1, i1 0
+  %nop12052 = alloca i1, i1 0
+  %nop12053 = alloca i1, i1 0
+  %nop12054 = alloca i1, i1 0
+  %nop12055 = alloca i1, i1 0
+  %nop12056 = alloca i1, i1 0
+  %nop12057 = alloca i1, i1 0
+  %nop12058 = alloca i1, i1 0
+  %nop12059 = alloca i1, i1 0
+  %nop12060 = alloca i1, i1 0
+  %nop12061 = alloca i1, i1 0
+  %nop12062 = alloca i1, i1 0
+  %nop12063 = alloca i1, i1 0
+  %nop12064 = alloca i1, i1 0
+  %nop12065 = alloca i1, i1 0
+  %nop12066 = alloca i1, i1 0
+  %nop12067 = alloca i1, i1 0
+  %nop12068 = alloca i1, i1 0
+  %nop12069 = alloca i1, i1 0
+  %nop12070 = alloca i1, i1 0
+  %nop12071 = alloca i1, i1 0
+  %nop12072 = alloca i1, i1 0
+  %nop12073 = alloca i1, i1 0
+  %nop12074 = alloca i1, i1 0
+  %nop12075 = alloca i1, i1 0
+  %nop12076 = alloca i1, i1 0
+  %nop12077 = alloca i1, i1 0
+  %nop12078 = alloca i1, i1 0
+  %nop12079 = alloca i1, i1 0
+  %nop12080 = alloca i1, i1 0
+  %nop12081 = alloca i1, i1 0
+  %nop12082 = alloca i1, i1 0
+  %nop12083 = alloca i1, i1 0
+  %nop12084 = alloca i1, i1 0
+  %nop12085 = alloca i1, i1 0
+  %nop12086 = alloca i1, i1 0
+  %nop12087 = alloca i1, i1 0
+  %nop12088 = alloca i1, i1 0
+  %nop12089 = alloca i1, i1 0
+  %nop12090 = alloca i1, i1 0
+  %nop12091 = alloca i1, i1 0
+  %nop12092 = alloca i1, i1 0
+  %nop12093 = alloca i1, i1 0
+  %nop12094 = alloca i1, i1 0
+  %nop12095 = alloca i1, i1 0
+  %nop12096 = alloca i1, i1 0
+  %nop12097 = alloca i1, i1 0
+  %nop12098 = alloca i1, i1 0
+  %nop12099 = alloca i1, i1 0
+  %nop12100 = alloca i1, i1 0
+  %nop12101 = alloca i1, i1 0
+  %nop12102 = alloca i1, i1 0
+  %nop12103 = alloca i1, i1 0
+  %nop12104 = alloca i1, i1 0
+  %nop12105 = alloca i1, i1 0
+  %nop12106 = alloca i1, i1 0
+  %nop12107 = alloca i1, i1 0
+  %nop12108 = alloca i1, i1 0
+  %nop12109 = alloca i1, i1 0
+  %nop12110 = alloca i1, i1 0
+  %nop12111 = alloca i1, i1 0
+  %nop12112 = alloca i1, i1 0
+  %nop12113 = alloca i1, i1 0
+  %nop12114 = alloca i1, i1 0
+  %nop12115 = alloca i1, i1 0
+  %nop12116 = alloca i1, i1 0
+  %nop12117 = alloca i1, i1 0
+  %nop12118 = alloca i1, i1 0
+  %nop12119 = alloca i1, i1 0
+  %nop12120 = alloca i1, i1 0
+  %nop12121 = alloca i1, i1 0
+  %nop12122 = alloca i1, i1 0
+  %nop12123 = alloca i1, i1 0
+  %nop12124 = alloca i1, i1 0
+  %nop12125 = alloca i1, i1 0
+  %nop12126 = alloca i1, i1 0
+  %nop12127 = alloca i1, i1 0
+  %nop12128 = alloca i1, i1 0
+  %nop12129 = alloca i1, i1 0
+  %nop12130 = alloca i1, i1 0
+  %nop12131 = alloca i1, i1 0
+  %nop12132 = alloca i1, i1 0
+  %nop12133 = alloca i1, i1 0
+  %nop12134 = alloca i1, i1 0
+  %nop12135 = alloca i1, i1 0
+  %nop12136 = alloca i1, i1 0
+  %nop12137 = alloca i1, i1 0
+  %nop12138 = alloca i1, i1 0
+  %nop12139 = alloca i1, i1 0
+  %nop12140 = alloca i1, i1 0
+  %nop12141 = alloca i1, i1 0
+  %nop12142 = alloca i1, i1 0
+  %nop12143 = alloca i1, i1 0
+  %nop12144 = alloca i1, i1 0
+  %nop12145 = alloca i1, i1 0
+  %nop12146 = alloca i1, i1 0
+  %nop12147 = alloca i1, i1 0
+  %nop12148 = alloca i1, i1 0
+  %nop12149 = alloca i1, i1 0
+  %nop12150 = alloca i1, i1 0
+  %nop12151 = alloca i1, i1 0
+  %nop12152 = alloca i1, i1 0
+  %nop12153 = alloca i1, i1 0
+  %nop12154 = alloca i1, i1 0
+  %nop12155 = alloca i1, i1 0
+  %nop12156 = alloca i1, i1 0
+  %nop12157 = alloca i1, i1 0
+  %nop12158 = alloca i1, i1 0
+  %nop12159 = alloca i1, i1 0
+  %nop12160 = alloca i1, i1 0
+  %nop12161 = alloca i1, i1 0
+  %nop12162 = alloca i1, i1 0
+  %nop12163 = alloca i1, i1 0
+  %nop12164 = alloca i1, i1 0
+  %nop12165 = alloca i1, i1 0
+  %nop12166 = alloca i1, i1 0
+  %nop12167 = alloca i1, i1 0
+  %nop12168 = alloca i1, i1 0
+  %nop12169 = alloca i1, i1 0
+  %nop12170 = alloca i1, i1 0
+  %nop12171 = alloca i1, i1 0
+  %nop12172 = alloca i1, i1 0
+  %nop12173 = alloca i1, i1 0
+  %nop12174 = alloca i1, i1 0
+  %nop12175 = alloca i1, i1 0
+  %nop12176 = alloca i1, i1 0
+  %nop12177 = alloca i1, i1 0
+  %nop12178 = alloca i1, i1 0
+  %nop12179 = alloca i1, i1 0
+  %nop12180 = alloca i1, i1 0
+  %nop12181 = alloca i1, i1 0
+  %nop12182 = alloca i1, i1 0
+  %nop12183 = alloca i1, i1 0
+  %nop12184 = alloca i1, i1 0
+  %nop12185 = alloca i1, i1 0
+  %nop12186 = alloca i1, i1 0
+  %nop12187 = alloca i1, i1 0
+  %nop12188 = alloca i1, i1 0
+  %nop12189 = alloca i1, i1 0
+  %nop12190 = alloca i1, i1 0
+  %nop12191 = alloca i1, i1 0
+  %nop12192 = alloca i1, i1 0
+  %nop12193 = alloca i1, i1 0
+  %nop12194 = alloca i1, i1 0
+  %nop12195 = alloca i1, i1 0
+  %nop12196 = alloca i1, i1 0
+  %nop12197 = alloca i1, i1 0
+  %nop12198 = alloca i1, i1 0
+  %nop12199 = alloca i1, i1 0
+  %nop12200 = alloca i1, i1 0
+  %nop12201 = alloca i1, i1 0
+  %nop12202 = alloca i1, i1 0
+  %nop12203 = alloca i1, i1 0
+  %nop12204 = alloca i1, i1 0
+  %nop12205 = alloca i1, i1 0
+  %nop12206 = alloca i1, i1 0
+  %nop12207 = alloca i1, i1 0
+  %nop12208 = alloca i1, i1 0
+  %nop12209 = alloca i1, i1 0
+  %nop12210 = alloca i1, i1 0
+  %nop12211 = alloca i1, i1 0
+  %nop12212 = alloca i1, i1 0
+  %nop12213 = alloca i1, i1 0
+  %nop12214 = alloca i1, i1 0
+  %nop12215 = alloca i1, i1 0
+  %nop12216 = alloca i1, i1 0
+  %nop12217 = alloca i1, i1 0
+  %nop12218 = alloca i1, i1 0
+  %nop12219 = alloca i1, i1 0
+  %nop12220 = alloca i1, i1 0
+  %nop12221 = alloca i1, i1 0
+  %nop12222 = alloca i1, i1 0
+  %nop12223 = alloca i1, i1 0
+  %nop12224 = alloca i1, i1 0
+  %nop12225 = alloca i1, i1 0
+  %nop12226 = alloca i1, i1 0
+  %nop12227 = alloca i1, i1 0
+  %nop12228 = alloca i1, i1 0
+  %nop12229 = alloca i1, i1 0
+  %nop12230 = alloca i1, i1 0
+  %nop12231 = alloca i1, i1 0
+  %nop12232 = alloca i1, i1 0
+  %nop12233 = alloca i1, i1 0
+  %nop12234 = alloca i1, i1 0
+  %nop12235 = alloca i1, i1 0
+  %nop12236 = alloca i1, i1 0
+  %nop12237 = alloca i1, i1 0
+  %nop12238 = alloca i1, i1 0
+  %nop12239 = alloca i1, i1 0
+  %nop12240 = alloca i1, i1 0
+  %nop12241 = alloca i1, i1 0
+  %nop12242 = alloca i1, i1 0
+  %nop12243 = alloca i1, i1 0
+  %nop12244 = alloca i1, i1 0
+  %nop12245 = alloca i1, i1 0
+  %nop12246 = alloca i1, i1 0
+  %nop12247 = alloca i1, i1 0
+  %nop12248 = alloca i1, i1 0
+  %nop12249 = alloca i1, i1 0
+  %nop12250 = alloca i1, i1 0
+  %nop12251 = alloca i1, i1 0
+  %nop12252 = alloca i1, i1 0
+  %nop12253 = alloca i1, i1 0
+  %nop12254 = alloca i1, i1 0
+  %nop12255 = alloca i1, i1 0
+  %nop12256 = alloca i1, i1 0
+  %nop12257 = alloca i1, i1 0
+  %nop12258 = alloca i1, i1 0
+  %nop12259 = alloca i1, i1 0
+  %nop12260 = alloca i1, i1 0
+  %nop12261 = alloca i1, i1 0
+  %nop12262 = alloca i1, i1 0
+  %nop12263 = alloca i1, i1 0
+  %nop12264 = alloca i1, i1 0
+  %nop12265 = alloca i1, i1 0
+  %nop12266 = alloca i1, i1 0
+  %nop12267 = alloca i1, i1 0
+  %nop12268 = alloca i1, i1 0
+  %nop12269 = alloca i1, i1 0
+  %nop12270 = alloca i1, i1 0
+  %nop12271 = alloca i1, i1 0
+  %nop12272 = alloca i1, i1 0
+  %nop12273 = alloca i1, i1 0
+  %nop12274 = alloca i1, i1 0
+  %nop12275 = alloca i1, i1 0
+  %nop12276 = alloca i1, i1 0
+  %nop12277 = alloca i1, i1 0
+  %nop12278 = alloca i1, i1 0
+  %nop12279 = alloca i1, i1 0
+  %nop12280 = alloca i1, i1 0
+  %nop12281 = alloca i1, i1 0
+  %nop12282 = alloca i1, i1 0
+  %nop12283 = alloca i1, i1 0
+  %nop12284 = alloca i1, i1 0
+  %nop12285 = alloca i1, i1 0
+  %nop12286 = alloca i1, i1 0
+  %nop12287 = alloca i1, i1 0
+  %nop12288 = alloca i1, i1 0
+  %nop12289 = alloca i1, i1 0
+  %nop12290 = alloca i1, i1 0
+  %nop12291 = alloca i1, i1 0
+  %nop12292 = alloca i1, i1 0
+  %nop12293 = alloca i1, i1 0
+  %nop12294 = alloca i1, i1 0
+  %nop12295 = alloca i1, i1 0
+  %nop12296 = alloca i1, i1 0
+  %nop12297 = alloca i1, i1 0
+  %nop12298 = alloca i1, i1 0
+  %nop12299 = alloca i1, i1 0
+  %nop12300 = alloca i1, i1 0
+  %nop12301 = alloca i1, i1 0
+  %nop12302 = alloca i1, i1 0
+  %nop12303 = alloca i1, i1 0
+  %nop12304 = alloca i1, i1 0
+  %nop12305 = alloca i1, i1 0
+  %nop12306 = alloca i1, i1 0
+  %nop12307 = alloca i1, i1 0
+  %nop12308 = alloca i1, i1 0
+  %nop12309 = alloca i1, i1 0
+  %nop12310 = alloca i1, i1 0
+  %nop12311 = alloca i1, i1 0
+  %nop12312 = alloca i1, i1 0
+  %nop12313 = alloca i1, i1 0
+  %nop12314 = alloca i1, i1 0
+  %nop12315 = alloca i1, i1 0
+  %nop12316 = alloca i1, i1 0
+  %nop12317 = alloca i1, i1 0
+  %nop12318 = alloca i1, i1 0
+  %nop12319 = alloca i1, i1 0
+  %nop12320 = alloca i1, i1 0
+  %nop12321 = alloca i1, i1 0
+  %nop12322 = alloca i1, i1 0
+  %nop12323 = alloca i1, i1 0
+  %nop12324 = alloca i1, i1 0
+  %nop12325 = alloca i1, i1 0
+  %nop12326 = alloca i1, i1 0
+  %nop12327 = alloca i1, i1 0
+  %nop12328 = alloca i1, i1 0
+  %nop12329 = alloca i1, i1 0
+  %nop12330 = alloca i1, i1 0
+  %nop12331 = alloca i1, i1 0
+  %nop12332 = alloca i1, i1 0
+  %nop12333 = alloca i1, i1 0
+  %nop12334 = alloca i1, i1 0
+  %nop12335 = alloca i1, i1 0
+  %nop12336 = alloca i1, i1 0
+  %nop12337 = alloca i1, i1 0
+  %nop12338 = alloca i1, i1 0
+  %nop12339 = alloca i1, i1 0
+  %nop12340 = alloca i1, i1 0
+  %nop12341 = alloca i1, i1 0
+  %nop12342 = alloca i1, i1 0
+  %nop12343 = alloca i1, i1 0
+  %nop12344 = alloca i1, i1 0
+  %nop12345 = alloca i1, i1 0
+  %nop12346 = alloca i1, i1 0
+  %nop12347 = alloca i1, i1 0
+  %nop12348 = alloca i1, i1 0
+  %nop12349 = alloca i1, i1 0
+  %nop12350 = alloca i1, i1 0
+  %nop12351 = alloca i1, i1 0
+  %nop12352 = alloca i1, i1 0
+  %nop12353 = alloca i1, i1 0
+  %nop12354 = alloca i1, i1 0
+  %nop12355 = alloca i1, i1 0
+  %nop12356 = alloca i1, i1 0
+  %nop12357 = alloca i1, i1 0
+  %nop12358 = alloca i1, i1 0
+  %nop12359 = alloca i1, i1 0
+  %nop12360 = alloca i1, i1 0
+  %nop12361 = alloca i1, i1 0
+  %nop12362 = alloca i1, i1 0
+  %nop12363 = alloca i1, i1 0
+  %nop12364 = alloca i1, i1 0
+  %nop12365 = alloca i1, i1 0
+  %nop12366 = alloca i1, i1 0
+  %nop12367 = alloca i1, i1 0
+  %nop12368 = alloca i1, i1 0
+  %nop12369 = alloca i1, i1 0
+  %nop12370 = alloca i1, i1 0
+  %nop12371 = alloca i1, i1 0
+  %nop12372 = alloca i1, i1 0
+  %nop12373 = alloca i1, i1 0
+  %nop12374 = alloca i1, i1 0
+  %nop12375 = alloca i1, i1 0
+  %nop12376 = alloca i1, i1 0
+  %nop12377 = alloca i1, i1 0
+  %nop12378 = alloca i1, i1 0
+  %nop12379 = alloca i1, i1 0
+  %nop12380 = alloca i1, i1 0
+  %nop12381 = alloca i1, i1 0
+  %nop12382 = alloca i1, i1 0
+  %nop12383 = alloca i1, i1 0
+  %nop12384 = alloca i1, i1 0
+  %nop12385 = alloca i1, i1 0
+  %nop12386 = alloca i1, i1 0
+  %nop12387 = alloca i1, i1 0
+  %nop12388 = alloca i1, i1 0
+  %nop12389 = alloca i1, i1 0
+  %nop12390 = alloca i1, i1 0
+  %nop12391 = alloca i1, i1 0
+  %nop12392 = alloca i1, i1 0
+  %nop12393 = alloca i1, i1 0
+  %nop12394 = alloca i1, i1 0
+  %nop12395 = alloca i1, i1 0
+  %nop12396 = alloca i1, i1 0
+  %nop12397 = alloca i1, i1 0
+  %nop12398 = alloca i1, i1 0
+  %nop12399 = alloca i1, i1 0
+  %nop12400 = alloca i1, i1 0
+  %nop12401 = alloca i1, i1 0
+  %nop12402 = alloca i1, i1 0
+  %nop12403 = alloca i1, i1 0
+  %nop12404 = alloca i1, i1 0
+  %nop12405 = alloca i1, i1 0
+  %nop12406 = alloca i1, i1 0
+  %nop12407 = alloca i1, i1 0
+  %nop12408 = alloca i1, i1 0
+  %nop12409 = alloca i1, i1 0
+  %nop12410 = alloca i1, i1 0
+  %nop12411 = alloca i1, i1 0
+  %nop12412 = alloca i1, i1 0
+  %nop12413 = alloca i1, i1 0
+  %nop12414 = alloca i1, i1 0
+  %nop12415 = alloca i1, i1 0
+  %nop12416 = alloca i1, i1 0
+  %nop12417 = alloca i1, i1 0
+  %nop12418 = alloca i1, i1 0
+  %nop12419 = alloca i1, i1 0
+  %nop12420 = alloca i1, i1 0
+  %nop12421 = alloca i1, i1 0
+  %nop12422 = alloca i1, i1 0
+  %nop12423 = alloca i1, i1 0
+  %nop12424 = alloca i1, i1 0
+  %nop12425 = alloca i1, i1 0
+  %nop12426 = alloca i1, i1 0
+  %nop12427 = alloca i1, i1 0
+  %nop12428 = alloca i1, i1 0
+  %nop12429 = alloca i1, i1 0
+  %nop12430 = alloca i1, i1 0
+  %nop12431 = alloca i1, i1 0
+  %nop12432 = alloca i1, i1 0
+  %nop12433 = alloca i1, i1 0
+  %nop12434 = alloca i1, i1 0
+  %nop12435 = alloca i1, i1 0
+  %nop12436 = alloca i1, i1 0
+  %nop12437 = alloca i1, i1 0
+  %nop12438 = alloca i1, i1 0
+  %nop12439 = alloca i1, i1 0
+  %nop12440 = alloca i1, i1 0
+  %nop12441 = alloca i1, i1 0
+  %nop12442 = alloca i1, i1 0
+  %nop12443 = alloca i1, i1 0
+  %nop12444 = alloca i1, i1 0
+  %nop12445 = alloca i1, i1 0
+  %nop12446 = alloca i1, i1 0
+  %nop12447 = alloca i1, i1 0
+  %nop12448 = alloca i1, i1 0
+  %nop12449 = alloca i1, i1 0
+  %nop12450 = alloca i1, i1 0
+  %nop12451 = alloca i1, i1 0
+  %nop12452 = alloca i1, i1 0
+  %nop12453 = alloca i1, i1 0
+  %nop12454 = alloca i1, i1 0
+  %nop12455 = alloca i1, i1 0
+  %nop12456 = alloca i1, i1 0
+  %nop12457 = alloca i1, i1 0
+  %nop12458 = alloca i1, i1 0
+  %nop12459 = alloca i1, i1 0
+  %nop12460 = alloca i1, i1 0
+  %nop12461 = alloca i1, i1 0
+  %nop12462 = alloca i1, i1 0
+  %nop12463 = alloca i1, i1 0
+  %nop12464 = alloca i1, i1 0
+  %nop12465 = alloca i1, i1 0
+  %nop12466 = alloca i1, i1 0
+  %nop12467 = alloca i1, i1 0
+  %nop12468 = alloca i1, i1 0
+  %nop12469 = alloca i1, i1 0
+  %nop12470 = alloca i1, i1 0
+  %nop12471 = alloca i1, i1 0
+  %nop12472 = alloca i1, i1 0
+  %nop12473 = alloca i1, i1 0
+  %nop12474 = alloca i1, i1 0
+  %nop12475 = alloca i1, i1 0
+  %nop12476 = alloca i1, i1 0
+  %nop12477 = alloca i1, i1 0
+  %nop12478 = alloca i1, i1 0
+  %nop12479 = alloca i1, i1 0
+  %nop12480 = alloca i1, i1 0
+  %nop12481 = alloca i1, i1 0
+  %nop12482 = alloca i1, i1 0
+  %nop12483 = alloca i1, i1 0
+  %nop12484 = alloca i1, i1 0
+  %nop12485 = alloca i1, i1 0
+  %nop12486 = alloca i1, i1 0
+  %nop12487 = alloca i1, i1 0
+  %nop12488 = alloca i1, i1 0
+  %nop12489 = alloca i1, i1 0
+  %nop12490 = alloca i1, i1 0
+  %nop12491 = alloca i1, i1 0
+  %nop12492 = alloca i1, i1 0
+  %nop12493 = alloca i1, i1 0
+  %nop12494 = alloca i1, i1 0
+  %nop12495 = alloca i1, i1 0
+  %nop12496 = alloca i1, i1 0
+  %nop12497 = alloca i1, i1 0
+  %nop12498 = alloca i1, i1 0
+  %nop12499 = alloca i1, i1 0
+  %nop12500 = alloca i1, i1 0
+  %nop12501 = alloca i1, i1 0
+  %nop12502 = alloca i1, i1 0
+  %nop12503 = alloca i1, i1 0
+  %nop12504 = alloca i1, i1 0
+  %nop12505 = alloca i1, i1 0
+  %nop12506 = alloca i1, i1 0
+  %nop12507 = alloca i1, i1 0
+  %nop12508 = alloca i1, i1 0
+  %nop12509 = alloca i1, i1 0
+  %nop12510 = alloca i1, i1 0
+  %nop12511 = alloca i1, i1 0
+  %nop12512 = alloca i1, i1 0
+  %nop12513 = alloca i1, i1 0
+  %nop12514 = alloca i1, i1 0
+  %nop12515 = alloca i1, i1 0
+  %nop12516 = alloca i1, i1 0
+  %nop12517 = alloca i1, i1 0
+  %nop12518 = alloca i1, i1 0
+  %nop12519 = alloca i1, i1 0
+  %nop12520 = alloca i1, i1 0
+  %nop12521 = alloca i1, i1 0
+  %nop12522 = alloca i1, i1 0
+  %nop12523 = alloca i1, i1 0
+  %nop12524 = alloca i1, i1 0
+  %nop12525 = alloca i1, i1 0
+  %nop12526 = alloca i1, i1 0
+  %nop12527 = alloca i1, i1 0
+  %nop12528 = alloca i1, i1 0
+  %nop12529 = alloca i1, i1 0
+  %nop12530 = alloca i1, i1 0
+  %nop12531 = alloca i1, i1 0
+  %nop12532 = alloca i1, i1 0
+  %nop12533 = alloca i1, i1 0
+  %nop12534 = alloca i1, i1 0
+  %nop12535 = alloca i1, i1 0
+  %nop12536 = alloca i1, i1 0
+  %nop12537 = alloca i1, i1 0
+  %nop12538 = alloca i1, i1 0
+  %nop12539 = alloca i1, i1 0
+  %nop12540 = alloca i1, i1 0
+  %nop12541 = alloca i1, i1 0
+  %nop12542 = alloca i1, i1 0
+  %nop12543 = alloca i1, i1 0
+  %nop12544 = alloca i1, i1 0
+  %nop12545 = alloca i1, i1 0
+  %nop12546 = alloca i1, i1 0
+  %nop12547 = alloca i1, i1 0
+  %nop12548 = alloca i1, i1 0
+  %nop12549 = alloca i1, i1 0
+  %nop12550 = alloca i1, i1 0
+  %nop12551 = alloca i1, i1 0
+  %nop12552 = alloca i1, i1 0
+  %nop12553 = alloca i1, i1 0
+  %nop12554 = alloca i1, i1 0
+  %nop12555 = alloca i1, i1 0
+  %nop12556 = alloca i1, i1 0
+  %nop12557 = alloca i1, i1 0
+  %nop12558 = alloca i1, i1 0
+  %nop12559 = alloca i1, i1 0
+  %nop12560 = alloca i1, i1 0
+  %nop12561 = alloca i1, i1 0
+  %nop12562 = alloca i1, i1 0
+  %nop12563 = alloca i1, i1 0
+  %nop12564 = alloca i1, i1 0
+  %nop12565 = alloca i1, i1 0
+  %nop12566 = alloca i1, i1 0
+  %nop12567 = alloca i1, i1 0
+  %nop12568 = alloca i1, i1 0
+  %nop12569 = alloca i1, i1 0
+  %nop12570 = alloca i1, i1 0
+  %nop12571 = alloca i1, i1 0
+  %nop12572 = alloca i1, i1 0
+  %nop12573 = alloca i1, i1 0
+  %nop12574 = alloca i1, i1 0
+  %nop12575 = alloca i1, i1 0
+  %nop12576 = alloca i1, i1 0
+  %nop12577 = alloca i1, i1 0
+  %nop12578 = alloca i1, i1 0
+  %nop12579 = alloca i1, i1 0
+  %nop12580 = alloca i1, i1 0
+  %nop12581 = alloca i1, i1 0
+  %nop12582 = alloca i1, i1 0
+  %nop12583 = alloca i1, i1 0
+  %nop12584 = alloca i1, i1 0
+  %nop12585 = alloca i1, i1 0
+  %nop12586 = alloca i1, i1 0
+  %nop12587 = alloca i1, i1 0
+  %nop12588 = alloca i1, i1 0
+  %nop12589 = alloca i1, i1 0
+  %nop12590 = alloca i1, i1 0
+  %nop12591 = alloca i1, i1 0
+  %nop12592 = alloca i1, i1 0
+  %nop12593 = alloca i1, i1 0
+  %nop12594 = alloca i1, i1 0
+  %nop12595 = alloca i1, i1 0
+  %nop12596 = alloca i1, i1 0
+  %nop12597 = alloca i1, i1 0
+  %nop12598 = alloca i1, i1 0
+  %nop12599 = alloca i1, i1 0
+  %nop12600 = alloca i1, i1 0
+  %nop12601 = alloca i1, i1 0
+  %nop12602 = alloca i1, i1 0
+  %nop12603 = alloca i1, i1 0
+  %nop12604 = alloca i1, i1 0
+  %nop12605 = alloca i1, i1 0
+  %nop12606 = alloca i1, i1 0
+  %nop12607 = alloca i1, i1 0
+  %nop12608 = alloca i1, i1 0
+  %nop12609 = alloca i1, i1 0
+  %nop12610 = alloca i1, i1 0
+  %nop12611 = alloca i1, i1 0
+  %nop12612 = alloca i1, i1 0
+  %nop12613 = alloca i1, i1 0
+  %nop12614 = alloca i1, i1 0
+  %nop12615 = alloca i1, i1 0
+  %nop12616 = alloca i1, i1 0
+  %nop12617 = alloca i1, i1 0
+  %nop12618 = alloca i1, i1 0
+  %nop12619 = alloca i1, i1 0
+  %nop12620 = alloca i1, i1 0
+  %nop12621 = alloca i1, i1 0
+  %nop12622 = alloca i1, i1 0
+  %nop12623 = alloca i1, i1 0
+  %nop12624 = alloca i1, i1 0
+  %nop12625 = alloca i1, i1 0
+  %nop12626 = alloca i1, i1 0
+  %nop12627 = alloca i1, i1 0
+  %nop12628 = alloca i1, i1 0
+  %nop12629 = alloca i1, i1 0
+  %nop12630 = alloca i1, i1 0
+  %nop12631 = alloca i1, i1 0
+  %nop12632 = alloca i1, i1 0
+  %nop12633 = alloca i1, i1 0
+  %nop12634 = alloca i1, i1 0
+  %nop12635 = alloca i1, i1 0
+  %nop12636 = alloca i1, i1 0
+  %nop12637 = alloca i1, i1 0
+  %nop12638 = alloca i1, i1 0
+  %nop12639 = alloca i1, i1 0
+  %nop12640 = alloca i1, i1 0
+  %nop12641 = alloca i1, i1 0
+  %nop12642 = alloca i1, i1 0
+  %nop12643 = alloca i1, i1 0
+  %nop12644 = alloca i1, i1 0
+  %nop12645 = alloca i1, i1 0
+  %nop12646 = alloca i1, i1 0
+  %nop12647 = alloca i1, i1 0
+  %nop12648 = alloca i1, i1 0
+  %nop12649 = alloca i1, i1 0
+  %nop12650 = alloca i1, i1 0
+  %nop12651 = alloca i1, i1 0
+  %nop12652 = alloca i1, i1 0
+  %nop12653 = alloca i1, i1 0
+  %nop12654 = alloca i1, i1 0
+  %nop12655 = alloca i1, i1 0
+  %nop12656 = alloca i1, i1 0
+  %nop12657 = alloca i1, i1 0
+  %nop12658 = alloca i1, i1 0
+  %nop12659 = alloca i1, i1 0
+  %nop12660 = alloca i1, i1 0
+  %nop12661 = alloca i1, i1 0
+  %nop12662 = alloca i1, i1 0
+  %nop12663 = alloca i1, i1 0
+  %nop12664 = alloca i1, i1 0
+  %nop12665 = alloca i1, i1 0
+  %nop12666 = alloca i1, i1 0
+  %nop12667 = alloca i1, i1 0
+  %nop12668 = alloca i1, i1 0
+  %nop12669 = alloca i1, i1 0
+  %nop12670 = alloca i1, i1 0
+  %nop12671 = alloca i1, i1 0
+  %nop12672 = alloca i1, i1 0
+  %nop12673 = alloca i1, i1 0
+  %nop12674 = alloca i1, i1 0
+  %nop12675 = alloca i1, i1 0
+  %nop12676 = alloca i1, i1 0
+  %nop12677 = alloca i1, i1 0
+  %nop12678 = alloca i1, i1 0
+  %nop12679 = alloca i1, i1 0
+  %nop12680 = alloca i1, i1 0
+  %nop12681 = alloca i1, i1 0
+  %nop12682 = alloca i1, i1 0
+  %nop12683 = alloca i1, i1 0
+  %nop12684 = alloca i1, i1 0
+  %nop12685 = alloca i1, i1 0
+  %nop12686 = alloca i1, i1 0
+  %nop12687 = alloca i1, i1 0
+  %nop12688 = alloca i1, i1 0
+  %nop12689 = alloca i1, i1 0
+  %nop12690 = alloca i1, i1 0
+  %nop12691 = alloca i1, i1 0
+  %nop12692 = alloca i1, i1 0
+  %nop12693 = alloca i1, i1 0
+  %nop12694 = alloca i1, i1 0
+  %nop12695 = alloca i1, i1 0
+  %nop12696 = alloca i1, i1 0
+  %nop12697 = alloca i1, i1 0
+  %nop12698 = alloca i1, i1 0
+  %nop12699 = alloca i1, i1 0
+  %nop12700 = alloca i1, i1 0
+  %nop12701 = alloca i1, i1 0
+  %nop12702 = alloca i1, i1 0
+  %nop12703 = alloca i1, i1 0
+  %nop12704 = alloca i1, i1 0
+  %nop12705 = alloca i1, i1 0
+  %nop12706 = alloca i1, i1 0
+  %nop12707 = alloca i1, i1 0
+  %nop12708 = alloca i1, i1 0
+  %nop12709 = alloca i1, i1 0
+  %nop12710 = alloca i1, i1 0
+  %nop12711 = alloca i1, i1 0
+  %nop12712 = alloca i1, i1 0
+  %nop12713 = alloca i1, i1 0
+  %nop12714 = alloca i1, i1 0
+  %nop12715 = alloca i1, i1 0
+  %nop12716 = alloca i1, i1 0
+  %nop12717 = alloca i1, i1 0
+  %nop12718 = alloca i1, i1 0
+  %nop12719 = alloca i1, i1 0
+  %nop12720 = alloca i1, i1 0
+  %nop12721 = alloca i1, i1 0
+  %nop12722 = alloca i1, i1 0
+  %nop12723 = alloca i1, i1 0
+  %nop12724 = alloca i1, i1 0
+  %nop12725 = alloca i1, i1 0
+  %nop12726 = alloca i1, i1 0
+  %nop12727 = alloca i1, i1 0
+  %nop12728 = alloca i1, i1 0
+  %nop12729 = alloca i1, i1 0
+  %nop12730 = alloca i1, i1 0
+  %nop12731 = alloca i1, i1 0
+  %nop12732 = alloca i1, i1 0
+  %nop12733 = alloca i1, i1 0
+  %nop12734 = alloca i1, i1 0
+  %nop12735 = alloca i1, i1 0
+  %nop12736 = alloca i1, i1 0
+  %nop12737 = alloca i1, i1 0
+  %nop12738 = alloca i1, i1 0
+  %nop12739 = alloca i1, i1 0
+  %nop12740 = alloca i1, i1 0
+  %nop12741 = alloca i1, i1 0
+  %nop12742 = alloca i1, i1 0
+  %nop12743 = alloca i1, i1 0
+  %nop12744 = alloca i1, i1 0
+  %nop12745 = alloca i1, i1 0
+  %nop12746 = alloca i1, i1 0
+  %nop12747 = alloca i1, i1 0
+  %nop12748 = alloca i1, i1 0
+  %nop12749 = alloca i1, i1 0
+  %nop12750 = alloca i1, i1 0
+  %nop12751 = alloca i1, i1 0
+  %nop12752 = alloca i1, i1 0
+  %nop12753 = alloca i1, i1 0
+  %nop12754 = alloca i1, i1 0
+  %nop12755 = alloca i1, i1 0
+  %nop12756 = alloca i1, i1 0
+  %nop12757 = alloca i1, i1 0
+  %nop12758 = alloca i1, i1 0
+  %nop12759 = alloca i1, i1 0
+  %nop12760 = alloca i1, i1 0
+  %nop12761 = alloca i1, i1 0
+  %nop12762 = alloca i1, i1 0
+  %nop12763 = alloca i1, i1 0
+  %nop12764 = alloca i1, i1 0
+  %nop12765 = alloca i1, i1 0
+  %nop12766 = alloca i1, i1 0
+  %nop12767 = alloca i1, i1 0
+  %nop12768 = alloca i1, i1 0
+  %nop12769 = alloca i1, i1 0
+  %nop12770 = alloca i1, i1 0
+  %nop12771 = alloca i1, i1 0
+  %nop12772 = alloca i1, i1 0
+  %nop12773 = alloca i1, i1 0
+  %nop12774 = alloca i1, i1 0
+  %nop12775 = alloca i1, i1 0
+  %nop12776 = alloca i1, i1 0
+  %nop12777 = alloca i1, i1 0
+  %nop12778 = alloca i1, i1 0
+  %nop12779 = alloca i1, i1 0
+  %nop12780 = alloca i1, i1 0
+  %nop12781 = alloca i1, i1 0
+  %nop12782 = alloca i1, i1 0
+  %nop12783 = alloca i1, i1 0
+  %nop12784 = alloca i1, i1 0
+  %nop12785 = alloca i1, i1 0
+  %nop12786 = alloca i1, i1 0
+  %nop12787 = alloca i1, i1 0
+  %nop12788 = alloca i1, i1 0
+  %nop12789 = alloca i1, i1 0
+  %nop12790 = alloca i1, i1 0
+  %nop12791 = alloca i1, i1 0
+  %nop12792 = alloca i1, i1 0
+  %nop12793 = alloca i1, i1 0
+  %nop12794 = alloca i1, i1 0
+  %nop12795 = alloca i1, i1 0
+  %nop12796 = alloca i1, i1 0
+  %nop12797 = alloca i1, i1 0
+  %nop12798 = alloca i1, i1 0
+  %nop12799 = alloca i1, i1 0
+  %nop12800 = alloca i1, i1 0
+  %nop12801 = alloca i1, i1 0
+  %nop12802 = alloca i1, i1 0
+  %nop12803 = alloca i1, i1 0
+  %nop12804 = alloca i1, i1 0
+  %nop12805 = alloca i1, i1 0
+  %nop12806 = alloca i1, i1 0
+  %nop12807 = alloca i1, i1 0
+  %nop12808 = alloca i1, i1 0
+  %nop12809 = alloca i1, i1 0
+  %nop12810 = alloca i1, i1 0
+  %nop12811 = alloca i1, i1 0
+  %nop12812 = alloca i1, i1 0
+  %nop12813 = alloca i1, i1 0
+  %nop12814 = alloca i1, i1 0
+  %nop12815 = alloca i1, i1 0
+  %nop12816 = alloca i1, i1 0
+  %nop12817 = alloca i1, i1 0
+  %nop12818 = alloca i1, i1 0
+  %nop12819 = alloca i1, i1 0
+  %nop12820 = alloca i1, i1 0
+  %nop12821 = alloca i1, i1 0
+  %nop12822 = alloca i1, i1 0
+  %nop12823 = alloca i1, i1 0
+  %nop12824 = alloca i1, i1 0
+  %nop12825 = alloca i1, i1 0
+  %nop12826 = alloca i1, i1 0
+  %nop12827 = alloca i1, i1 0
+  %nop12828 = alloca i1, i1 0
+  %nop12829 = alloca i1, i1 0
+  %nop12830 = alloca i1, i1 0
+  %nop12831 = alloca i1, i1 0
+  %nop12832 = alloca i1, i1 0
+  %nop12833 = alloca i1, i1 0
+  %nop12834 = alloca i1, i1 0
+  %nop12835 = alloca i1, i1 0
+  %nop12836 = alloca i1, i1 0
+  %nop12837 = alloca i1, i1 0
+  %nop12838 = alloca i1, i1 0
+  %nop12839 = alloca i1, i1 0
+  %nop12840 = alloca i1, i1 0
+  %nop12841 = alloca i1, i1 0
+  %nop12842 = alloca i1, i1 0
+  %nop12843 = alloca i1, i1 0
+  %nop12844 = alloca i1, i1 0
+  %nop12845 = alloca i1, i1 0
+  %nop12846 = alloca i1, i1 0
+  %nop12847 = alloca i1, i1 0
+  %nop12848 = alloca i1, i1 0
+  %nop12849 = alloca i1, i1 0
+  %nop12850 = alloca i1, i1 0
+  %nop12851 = alloca i1, i1 0
+  %nop12852 = alloca i1, i1 0
+  %nop12853 = alloca i1, i1 0
+  %nop12854 = alloca i1, i1 0
+  %nop12855 = alloca i1, i1 0
+  %nop12856 = alloca i1, i1 0
+  %nop12857 = alloca i1, i1 0
+  %nop12858 = alloca i1, i1 0
+  %nop12859 = alloca i1, i1 0
+  %nop12860 = alloca i1, i1 0
+  %nop12861 = alloca i1, i1 0
+  %nop12862 = alloca i1, i1 0
+  %nop12863 = alloca i1, i1 0
+  %nop12864 = alloca i1, i1 0
+  %nop12865 = alloca i1, i1 0
+  %nop12866 = alloca i1, i1 0
+  %nop12867 = alloca i1, i1 0
+  %nop12868 = alloca i1, i1 0
+  %nop12869 = alloca i1, i1 0
+  %nop12870 = alloca i1, i1 0
+  %nop12871 = alloca i1, i1 0
+  %nop12872 = alloca i1, i1 0
+  %nop12873 = alloca i1, i1 0
+  %nop12874 = alloca i1, i1 0
+  %nop12875 = alloca i1, i1 0
+  %nop12876 = alloca i1, i1 0
+  %nop12877 = alloca i1, i1 0
+  %nop12878 = alloca i1, i1 0
+  %nop12879 = alloca i1, i1 0
+  %nop12880 = alloca i1, i1 0
+  %nop12881 = alloca i1, i1 0
+  %nop12882 = alloca i1, i1 0
+  %nop12883 = alloca i1, i1 0
+  %nop12884 = alloca i1, i1 0
+  %nop12885 = alloca i1, i1 0
+  %nop12886 = alloca i1, i1 0
+  %nop12887 = alloca i1, i1 0
+  %nop12888 = alloca i1, i1 0
+  %nop12889 = alloca i1, i1 0
+  %nop12890 = alloca i1, i1 0
+  %nop12891 = alloca i1, i1 0
+  %nop12892 = alloca i1, i1 0
+  %nop12893 = alloca i1, i1 0
+  %nop12894 = alloca i1, i1 0
+  %nop12895 = alloca i1, i1 0
+  %nop12896 = alloca i1, i1 0
+  %nop12897 = alloca i1, i1 0
+  %nop12898 = alloca i1, i1 0
+  %nop12899 = alloca i1, i1 0
+  %nop12900 = alloca i1, i1 0
+  %nop12901 = alloca i1, i1 0
+  %nop12902 = alloca i1, i1 0
+  %nop12903 = alloca i1, i1 0
+  %nop12904 = alloca i1, i1 0
+  %nop12905 = alloca i1, i1 0
+  %nop12906 = alloca i1, i1 0
+  %nop12907 = alloca i1, i1 0
+  %nop12908 = alloca i1, i1 0
+  %nop12909 = alloca i1, i1 0
+  %nop12910 = alloca i1, i1 0
+  %nop12911 = alloca i1, i1 0
+  %nop12912 = alloca i1, i1 0
+  %nop12913 = alloca i1, i1 0
+  %nop12914 = alloca i1, i1 0
+  %nop12915 = alloca i1, i1 0
+  %nop12916 = alloca i1, i1 0
+  %nop12917 = alloca i1, i1 0
+  %nop12918 = alloca i1, i1 0
+  %nop12919 = alloca i1, i1 0
+  %nop12920 = alloca i1, i1 0
+  %nop12921 = alloca i1, i1 0
+  %nop12922 = alloca i1, i1 0
+  %nop12923 = alloca i1, i1 0
+  %nop12924 = alloca i1, i1 0
+  %nop12925 = alloca i1, i1 0
+  %nop12926 = alloca i1, i1 0
+  %nop12927 = alloca i1, i1 0
+  %nop12928 = alloca i1, i1 0
+  %nop12929 = alloca i1, i1 0
+  %nop12930 = alloca i1, i1 0
+  %nop12931 = alloca i1, i1 0
+  %nop12932 = alloca i1, i1 0
+  %nop12933 = alloca i1, i1 0
+  %nop12934 = alloca i1, i1 0
+  %nop12935 = alloca i1, i1 0
+  %nop12936 = alloca i1, i1 0
+  %nop12937 = alloca i1, i1 0
+  %nop12938 = alloca i1, i1 0
+  %nop12939 = alloca i1, i1 0
+  %nop12940 = alloca i1, i1 0
+  %nop12941 = alloca i1, i1 0
+  %nop12942 = alloca i1, i1 0
+  %nop12943 = alloca i1, i1 0
+  %nop12944 = alloca i1, i1 0
+  %nop12945 = alloca i1, i1 0
+  %nop12946 = alloca i1, i1 0
+  %nop12947 = alloca i1, i1 0
+  %nop12948 = alloca i1, i1 0
+  %nop12949 = alloca i1, i1 0
+  %nop12950 = alloca i1, i1 0
+  %nop12951 = alloca i1, i1 0
+  %nop12952 = alloca i1, i1 0
+  %nop12953 = alloca i1, i1 0
+  %nop12954 = alloca i1, i1 0
+  %nop12955 = alloca i1, i1 0
+  %nop12956 = alloca i1, i1 0
+  %nop12957 = alloca i1, i1 0
+  %nop12958 = alloca i1, i1 0
+  %nop12959 = alloca i1, i1 0
+  %nop12960 = alloca i1, i1 0
+  %nop12961 = alloca i1, i1 0
+  %nop12962 = alloca i1, i1 0
+  %nop12963 = alloca i1, i1 0
+  %nop12964 = alloca i1, i1 0
+  %nop12965 = alloca i1, i1 0
+  %nop12966 = alloca i1, i1 0
+  %nop12967 = alloca i1, i1 0
+  %nop12968 = alloca i1, i1 0
+  %nop12969 = alloca i1, i1 0
+  %nop12970 = alloca i1, i1 0
+  %nop12971 = alloca i1, i1 0
+  %nop12972 = alloca i1, i1 0
+  %nop12973 = alloca i1, i1 0
+  %nop12974 = alloca i1, i1 0
+  %nop12975 = alloca i1, i1 0
+  %nop12976 = alloca i1, i1 0
+  %nop12977 = alloca i1, i1 0
+  %nop12978 = alloca i1, i1 0
+  %nop12979 = alloca i1, i1 0
+  %nop12980 = alloca i1, i1 0
+  %nop12981 = alloca i1, i1 0
+  %nop12982 = alloca i1, i1 0
+  %nop12983 = alloca i1, i1 0
+  %nop12984 = alloca i1, i1 0
+  %nop12985 = alloca i1, i1 0
+  %nop12986 = alloca i1, i1 0
+  %nop12987 = alloca i1, i1 0
+  %nop12988 = alloca i1, i1 0
+  %nop12989 = alloca i1, i1 0
+  %nop12990 = alloca i1, i1 0
+  %nop12991 = alloca i1, i1 0
+  %nop12992 = alloca i1, i1 0
+  %nop12993 = alloca i1, i1 0
+  %nop12994 = alloca i1, i1 0
+  %nop12995 = alloca i1, i1 0
+  %nop12996 = alloca i1, i1 0
+  %nop12997 = alloca i1, i1 0
+  %nop12998 = alloca i1, i1 0
+  %nop12999 = alloca i1, i1 0
+  %nop13000 = alloca i1, i1 0
+  %nop13001 = alloca i1, i1 0
+  %nop13002 = alloca i1, i1 0
+  %nop13003 = alloca i1, i1 0
+  %nop13004 = alloca i1, i1 0
+  %nop13005 = alloca i1, i1 0
+  %nop13006 = alloca i1, i1 0
+  %nop13007 = alloca i1, i1 0
+  %nop13008 = alloca i1, i1 0
+  %nop13009 = alloca i1, i1 0
+  %nop13010 = alloca i1, i1 0
+  %nop13011 = alloca i1, i1 0
+  %nop13012 = alloca i1, i1 0
+  %nop13013 = alloca i1, i1 0
+  %nop13014 = alloca i1, i1 0
+  %nop13015 = alloca i1, i1 0
+  %nop13016 = alloca i1, i1 0
+  %nop13017 = alloca i1, i1 0
+  %nop13018 = alloca i1, i1 0
+  %nop13019 = alloca i1, i1 0
+  %nop13020 = alloca i1, i1 0
+  %nop13021 = alloca i1, i1 0
+  %nop13022 = alloca i1, i1 0
+  %nop13023 = alloca i1, i1 0
+  %nop13024 = alloca i1, i1 0
+  %nop13025 = alloca i1, i1 0
+  %nop13026 = alloca i1, i1 0
+  %nop13027 = alloca i1, i1 0
+  %nop13028 = alloca i1, i1 0
+  %nop13029 = alloca i1, i1 0
+  %nop13030 = alloca i1, i1 0
+  %nop13031 = alloca i1, i1 0
+  %nop13032 = alloca i1, i1 0
+  %nop13033 = alloca i1, i1 0
+  %nop13034 = alloca i1, i1 0
+  %nop13035 = alloca i1, i1 0
+  %nop13036 = alloca i1, i1 0
+  %nop13037 = alloca i1, i1 0
+  %nop13038 = alloca i1, i1 0
+  %nop13039 = alloca i1, i1 0
+  %nop13040 = alloca i1, i1 0
+  %nop13041 = alloca i1, i1 0
+  %nop13042 = alloca i1, i1 0
+  %nop13043 = alloca i1, i1 0
+  %nop13044 = alloca i1, i1 0
+  %nop13045 = alloca i1, i1 0
+  %nop13046 = alloca i1, i1 0
+  %nop13047 = alloca i1, i1 0
+  %nop13048 = alloca i1, i1 0
+  %nop13049 = alloca i1, i1 0
+  %nop13050 = alloca i1, i1 0
+  %nop13051 = alloca i1, i1 0
+  %nop13052 = alloca i1, i1 0
+  %nop13053 = alloca i1, i1 0
+  %nop13054 = alloca i1, i1 0
+  %nop13055 = alloca i1, i1 0
+  %nop13056 = alloca i1, i1 0
+  %nop13057 = alloca i1, i1 0
+  %nop13058 = alloca i1, i1 0
+  %nop13059 = alloca i1, i1 0
+  %nop13060 = alloca i1, i1 0
+  %nop13061 = alloca i1, i1 0
+  %nop13062 = alloca i1, i1 0
+  %nop13063 = alloca i1, i1 0
+  %nop13064 = alloca i1, i1 0
+  %nop13065 = alloca i1, i1 0
+  %nop13066 = alloca i1, i1 0
+  %nop13067 = alloca i1, i1 0
+  %nop13068 = alloca i1, i1 0
+  %nop13069 = alloca i1, i1 0
+  %nop13070 = alloca i1, i1 0
+  %nop13071 = alloca i1, i1 0
+  %nop13072 = alloca i1, i1 0
+  %nop13073 = alloca i1, i1 0
+  %nop13074 = alloca i1, i1 0
+  %nop13075 = alloca i1, i1 0
+  %nop13076 = alloca i1, i1 0
+  %nop13077 = alloca i1, i1 0
+  %nop13078 = alloca i1, i1 0
+  %nop13079 = alloca i1, i1 0
+  %nop13080 = alloca i1, i1 0
+  %nop13081 = alloca i1, i1 0
+  %nop13082 = alloca i1, i1 0
+  %nop13083 = alloca i1, i1 0
+  %nop13084 = alloca i1, i1 0
+  %nop13085 = alloca i1, i1 0
+  %nop13086 = alloca i1, i1 0
+  %nop13087 = alloca i1, i1 0
+  %nop13088 = alloca i1, i1 0
+  %nop13089 = alloca i1, i1 0
+  %nop13090 = alloca i1, i1 0
+  %nop13091 = alloca i1, i1 0
+  %nop13092 = alloca i1, i1 0
+  %nop13093 = alloca i1, i1 0
+  %nop13094 = alloca i1, i1 0
+  %nop13095 = alloca i1, i1 0
+  %nop13096 = alloca i1, i1 0
+  %nop13097 = alloca i1, i1 0
+  %nop13098 = alloca i1, i1 0
+  %nop13099 = alloca i1, i1 0
+  %nop13100 = alloca i1, i1 0
+  %nop13101 = alloca i1, i1 0
+  %nop13102 = alloca i1, i1 0
+  %nop13103 = alloca i1, i1 0
+  %nop13104 = alloca i1, i1 0
+  %nop13105 = alloca i1, i1 0
+  %nop13106 = alloca i1, i1 0
+  %nop13107 = alloca i1, i1 0
+  %nop13108 = alloca i1, i1 0
+  %nop13109 = alloca i1, i1 0
+  %nop13110 = alloca i1, i1 0
+  %nop13111 = alloca i1, i1 0
+  %nop13112 = alloca i1, i1 0
+  %nop13113 = alloca i1, i1 0
+  %nop13114 = alloca i1, i1 0
+  %nop13115 = alloca i1, i1 0
+  %nop13116 = alloca i1, i1 0
+  %nop13117 = alloca i1, i1 0
+  %nop13118 = alloca i1, i1 0
+  %nop13119 = alloca i1, i1 0
+  %nop13120 = alloca i1, i1 0
+  %nop13121 = alloca i1, i1 0
+  %nop13122 = alloca i1, i1 0
+  %nop13123 = alloca i1, i1 0
+  %nop13124 = alloca i1, i1 0
+  %nop13125 = alloca i1, i1 0
+  %nop13126 = alloca i1, i1 0
+  %nop13127 = alloca i1, i1 0
+  %nop13128 = alloca i1, i1 0
+  %nop13129 = alloca i1, i1 0
+  %nop13130 = alloca i1, i1 0
+  %nop13131 = alloca i1, i1 0
+  %nop13132 = alloca i1, i1 0
+  %nop13133 = alloca i1, i1 0
+  %nop13134 = alloca i1, i1 0
+  %nop13135 = alloca i1, i1 0
+  %nop13136 = alloca i1, i1 0
+  %nop13137 = alloca i1, i1 0
+  %nop13138 = alloca i1, i1 0
+  %nop13139 = alloca i1, i1 0
+  %nop13140 = alloca i1, i1 0
+  %nop13141 = alloca i1, i1 0
+  %nop13142 = alloca i1, i1 0
+  %nop13143 = alloca i1, i1 0
+  %nop13144 = alloca i1, i1 0
+  %nop13145 = alloca i1, i1 0
+  %nop13146 = alloca i1, i1 0
+  %nop13147 = alloca i1, i1 0
+  %nop13148 = alloca i1, i1 0
+  %nop13149 = alloca i1, i1 0
+  %nop13150 = alloca i1, i1 0
+  %nop13151 = alloca i1, i1 0
+  %nop13152 = alloca i1, i1 0
+  %nop13153 = alloca i1, i1 0
+  %nop13154 = alloca i1, i1 0
+  %nop13155 = alloca i1, i1 0
+  %nop13156 = alloca i1, i1 0
+  %nop13157 = alloca i1, i1 0
+  %nop13158 = alloca i1, i1 0
+  %nop13159 = alloca i1, i1 0
+  %nop13160 = alloca i1, i1 0
+  %nop13161 = alloca i1, i1 0
+  %nop13162 = alloca i1, i1 0
+  %nop13163 = alloca i1, i1 0
+  %nop13164 = alloca i1, i1 0
+  %nop13165 = alloca i1, i1 0
+  %nop13166 = alloca i1, i1 0
+  %nop13167 = alloca i1, i1 0
+  %nop13168 = alloca i1, i1 0
+  %nop13169 = alloca i1, i1 0
+  %nop13170 = alloca i1, i1 0
+  %nop13171 = alloca i1, i1 0
+  %nop13172 = alloca i1, i1 0
+  %nop13173 = alloca i1, i1 0
+  %nop13174 = alloca i1, i1 0
+  %nop13175 = alloca i1, i1 0
+  %nop13176 = alloca i1, i1 0
+  %nop13177 = alloca i1, i1 0
+  %nop13178 = alloca i1, i1 0
+  %nop13179 = alloca i1, i1 0
+  %nop13180 = alloca i1, i1 0
+  %nop13181 = alloca i1, i1 0
+  %nop13182 = alloca i1, i1 0
+  %nop13183 = alloca i1, i1 0
+  %nop13184 = alloca i1, i1 0
+  %nop13185 = alloca i1, i1 0
+  %nop13186 = alloca i1, i1 0
+  %nop13187 = alloca i1, i1 0
+  %nop13188 = alloca i1, i1 0
+  %nop13189 = alloca i1, i1 0
+  %nop13190 = alloca i1, i1 0
+  %nop13191 = alloca i1, i1 0
+  %nop13192 = alloca i1, i1 0
+  %nop13193 = alloca i1, i1 0
+  %nop13194 = alloca i1, i1 0
+  %nop13195 = alloca i1, i1 0
+  %nop13196 = alloca i1, i1 0
+  %nop13197 = alloca i1, i1 0
+  %nop13198 = alloca i1, i1 0
+  %nop13199 = alloca i1, i1 0
+  %nop13200 = alloca i1, i1 0
+  %nop13201 = alloca i1, i1 0
+  %nop13202 = alloca i1, i1 0
+  %nop13203 = alloca i1, i1 0
+  %nop13204 = alloca i1, i1 0
+  %nop13205 = alloca i1, i1 0
+  %nop13206 = alloca i1, i1 0
+  %nop13207 = alloca i1, i1 0
+  %nop13208 = alloca i1, i1 0
+  %nop13209 = alloca i1, i1 0
+  %nop13210 = alloca i1, i1 0
+  %nop13211 = alloca i1, i1 0
+  %nop13212 = alloca i1, i1 0
+  %nop13213 = alloca i1, i1 0
+  %nop13214 = alloca i1, i1 0
+  %nop13215 = alloca i1, i1 0
+  %nop13216 = alloca i1, i1 0
+  %nop13217 = alloca i1, i1 0
+  %nop13218 = alloca i1, i1 0
+  %nop13219 = alloca i1, i1 0
+  %nop13220 = alloca i1, i1 0
+  %nop13221 = alloca i1, i1 0
+  %nop13222 = alloca i1, i1 0
+  %nop13223 = alloca i1, i1 0
+  %nop13224 = alloca i1, i1 0
+  %nop13225 = alloca i1, i1 0
+  %nop13226 = alloca i1, i1 0
+  %nop13227 = alloca i1, i1 0
+  %nop13228 = alloca i1, i1 0
+  %nop13229 = alloca i1, i1 0
+  %nop13230 = alloca i1, i1 0
+  %nop13231 = alloca i1, i1 0
+  %nop13232 = alloca i1, i1 0
+  %nop13233 = alloca i1, i1 0
+  %nop13234 = alloca i1, i1 0
+  %nop13235 = alloca i1, i1 0
+  %nop13236 = alloca i1, i1 0
+  %nop13237 = alloca i1, i1 0
+  %nop13238 = alloca i1, i1 0
+  %nop13239 = alloca i1, i1 0
+  %nop13240 = alloca i1, i1 0
+  %nop13241 = alloca i1, i1 0
+  %nop13242 = alloca i1, i1 0
+  %nop13243 = alloca i1, i1 0
+  %nop13244 = alloca i1, i1 0
+  %nop13245 = alloca i1, i1 0
+  %nop13246 = alloca i1, i1 0
+  %nop13247 = alloca i1, i1 0
+  %nop13248 = alloca i1, i1 0
+  %nop13249 = alloca i1, i1 0
+  %nop13250 = alloca i1, i1 0
+  %nop13251 = alloca i1, i1 0
+  %nop13252 = alloca i1, i1 0
+  %nop13253 = alloca i1, i1 0
+  %nop13254 = alloca i1, i1 0
+  %nop13255 = alloca i1, i1 0
+  %nop13256 = alloca i1, i1 0
+  %nop13257 = alloca i1, i1 0
+  %nop13258 = alloca i1, i1 0
+  %nop13259 = alloca i1, i1 0
+  %nop13260 = alloca i1, i1 0
+  %nop13261 = alloca i1, i1 0
+  %nop13262 = alloca i1, i1 0
+  %nop13263 = alloca i1, i1 0
+  %nop13264 = alloca i1, i1 0
+  %nop13265 = alloca i1, i1 0
+  %nop13266 = alloca i1, i1 0
+  %nop13267 = alloca i1, i1 0
+  %nop13268 = alloca i1, i1 0
+  %nop13269 = alloca i1, i1 0
+  %nop13270 = alloca i1, i1 0
+  %nop13271 = alloca i1, i1 0
+  %nop13272 = alloca i1, i1 0
+  %nop13273 = alloca i1, i1 0
+  %nop13274 = alloca i1, i1 0
+  %nop13275 = alloca i1, i1 0
+  %nop13276 = alloca i1, i1 0
+  %nop13277 = alloca i1, i1 0
+  %nop13278 = alloca i1, i1 0
+  %nop13279 = alloca i1, i1 0
+  %nop13280 = alloca i1, i1 0
+  %nop13281 = alloca i1, i1 0
+  %nop13282 = alloca i1, i1 0
+  %nop13283 = alloca i1, i1 0
+  %nop13284 = alloca i1, i1 0
+  %nop13285 = alloca i1, i1 0
+  %nop13286 = alloca i1, i1 0
+  %nop13287 = alloca i1, i1 0
+  %nop13288 = alloca i1, i1 0
+  %nop13289 = alloca i1, i1 0
+  %nop13290 = alloca i1, i1 0
+  %nop13291 = alloca i1, i1 0
+  %nop13292 = alloca i1, i1 0
+  %nop13293 = alloca i1, i1 0
+  %nop13294 = alloca i1, i1 0
+  %nop13295 = alloca i1, i1 0
+  %nop13296 = alloca i1, i1 0
+  %nop13297 = alloca i1, i1 0
+  %nop13298 = alloca i1, i1 0
+  %nop13299 = alloca i1, i1 0
+  %nop13300 = alloca i1, i1 0
+  %nop13301 = alloca i1, i1 0
+  %nop13302 = alloca i1, i1 0
+  %nop13303 = alloca i1, i1 0
+  %nop13304 = alloca i1, i1 0
+  %nop13305 = alloca i1, i1 0
+  %nop13306 = alloca i1, i1 0
+  %nop13307 = alloca i1, i1 0
+  %nop13308 = alloca i1, i1 0
+  %nop13309 = alloca i1, i1 0
+  %nop13310 = alloca i1, i1 0
+  %nop13311 = alloca i1, i1 0
+  %nop13312 = alloca i1, i1 0
+  %nop13313 = alloca i1, i1 0
+  %nop13314 = alloca i1, i1 0
+  %nop13315 = alloca i1, i1 0
+  %nop13316 = alloca i1, i1 0
+  %nop13317 = alloca i1, i1 0
+  %nop13318 = alloca i1, i1 0
+  %nop13319 = alloca i1, i1 0
+  %nop13320 = alloca i1, i1 0
+  %nop13321 = alloca i1, i1 0
+  %nop13322 = alloca i1, i1 0
+  %nop13323 = alloca i1, i1 0
+  %nop13324 = alloca i1, i1 0
+  %nop13325 = alloca i1, i1 0
+  %nop13326 = alloca i1, i1 0
+  %nop13327 = alloca i1, i1 0
+  %nop13328 = alloca i1, i1 0
+  %nop13329 = alloca i1, i1 0
+  %nop13330 = alloca i1, i1 0
+  %nop13331 = alloca i1, i1 0
+  %nop13332 = alloca i1, i1 0
+  %nop13333 = alloca i1, i1 0
+  %nop13334 = alloca i1, i1 0
+  %nop13335 = alloca i1, i1 0
+  %nop13336 = alloca i1, i1 0
+  %nop13337 = alloca i1, i1 0
+  %nop13338 = alloca i1, i1 0
+  %nop13339 = alloca i1, i1 0
+  %nop13340 = alloca i1, i1 0
+  %nop13341 = alloca i1, i1 0
+  %nop13342 = alloca i1, i1 0
+  %nop13343 = alloca i1, i1 0
+  %nop13344 = alloca i1, i1 0
+  %nop13345 = alloca i1, i1 0
+  %nop13346 = alloca i1, i1 0
+  %nop13347 = alloca i1, i1 0
+  %nop13348 = alloca i1, i1 0
+  %nop13349 = alloca i1, i1 0
+  %nop13350 = alloca i1, i1 0
+  %nop13351 = alloca i1, i1 0
+  %nop13352 = alloca i1, i1 0
+  %nop13353 = alloca i1, i1 0
+  %nop13354 = alloca i1, i1 0
+  %nop13355 = alloca i1, i1 0
+  %nop13356 = alloca i1, i1 0
+  %nop13357 = alloca i1, i1 0
+  %nop13358 = alloca i1, i1 0
+  %nop13359 = alloca i1, i1 0
+  %nop13360 = alloca i1, i1 0
+  %nop13361 = alloca i1, i1 0
+  %nop13362 = alloca i1, i1 0
+  %nop13363 = alloca i1, i1 0
+  %nop13364 = alloca i1, i1 0
+  %nop13365 = alloca i1, i1 0
+  %nop13366 = alloca i1, i1 0
+  %nop13367 = alloca i1, i1 0
+  %nop13368 = alloca i1, i1 0
+  %nop13369 = alloca i1, i1 0
+  %nop13370 = alloca i1, i1 0
+  %nop13371 = alloca i1, i1 0
+  %nop13372 = alloca i1, i1 0
+  %nop13373 = alloca i1, i1 0
+  %nop13374 = alloca i1, i1 0
+  %nop13375 = alloca i1, i1 0
+  %nop13376 = alloca i1, i1 0
+  %nop13377 = alloca i1, i1 0
+  %nop13378 = alloca i1, i1 0
+  %nop13379 = alloca i1, i1 0
+  %nop13380 = alloca i1, i1 0
+  %nop13381 = alloca i1, i1 0
+  %nop13382 = alloca i1, i1 0
+  %nop13383 = alloca i1, i1 0
+  %nop13384 = alloca i1, i1 0
+  %nop13385 = alloca i1, i1 0
+  %nop13386 = alloca i1, i1 0
+  %nop13387 = alloca i1, i1 0
+  %nop13388 = alloca i1, i1 0
+  %nop13389 = alloca i1, i1 0
+  %nop13390 = alloca i1, i1 0
+  %nop13391 = alloca i1, i1 0
+  %nop13392 = alloca i1, i1 0
+  %nop13393 = alloca i1, i1 0
+  %nop13394 = alloca i1, i1 0
+  %nop13395 = alloca i1, i1 0
+  %nop13396 = alloca i1, i1 0
+  %nop13397 = alloca i1, i1 0
+  %nop13398 = alloca i1, i1 0
+  %nop13399 = alloca i1, i1 0
+  %nop13400 = alloca i1, i1 0
+  %nop13401 = alloca i1, i1 0
+  %nop13402 = alloca i1, i1 0
+  %nop13403 = alloca i1, i1 0
+  %nop13404 = alloca i1, i1 0
+  %nop13405 = alloca i1, i1 0
+  %nop13406 = alloca i1, i1 0
+  %nop13407 = alloca i1, i1 0
+  %nop13408 = alloca i1, i1 0
+  %nop13409 = alloca i1, i1 0
+  %nop13410 = alloca i1, i1 0
+  %nop13411 = alloca i1, i1 0
+  %nop13412 = alloca i1, i1 0
+  %nop13413 = alloca i1, i1 0
+  %nop13414 = alloca i1, i1 0
+  %nop13415 = alloca i1, i1 0
+  %nop13416 = alloca i1, i1 0
+  %nop13417 = alloca i1, i1 0
+  %nop13418 = alloca i1, i1 0
+  %nop13419 = alloca i1, i1 0
+  %nop13420 = alloca i1, i1 0
+  %nop13421 = alloca i1, i1 0
+  %nop13422 = alloca i1, i1 0
+  %nop13423 = alloca i1, i1 0
+  %nop13424 = alloca i1, i1 0
+  %nop13425 = alloca i1, i1 0
+  %nop13426 = alloca i1, i1 0
+  %nop13427 = alloca i1, i1 0
+  %nop13428 = alloca i1, i1 0
+  %nop13429 = alloca i1, i1 0
+  %nop13430 = alloca i1, i1 0
+  %nop13431 = alloca i1, i1 0
+  %nop13432 = alloca i1, i1 0
+  %nop13433 = alloca i1, i1 0
+  %nop13434 = alloca i1, i1 0
+  %nop13435 = alloca i1, i1 0
+  %nop13436 = alloca i1, i1 0
+  %nop13437 = alloca i1, i1 0
+  %nop13438 = alloca i1, i1 0
+  %nop13439 = alloca i1, i1 0
+  %nop13440 = alloca i1, i1 0
+  %nop13441 = alloca i1, i1 0
+  %nop13442 = alloca i1, i1 0
+  %nop13443 = alloca i1, i1 0
+  %nop13444 = alloca i1, i1 0
+  %nop13445 = alloca i1, i1 0
+  %nop13446 = alloca i1, i1 0
+  %nop13447 = alloca i1, i1 0
+  %nop13448 = alloca i1, i1 0
+  %nop13449 = alloca i1, i1 0
+  %nop13450 = alloca i1, i1 0
+  %nop13451 = alloca i1, i1 0
+  %nop13452 = alloca i1, i1 0
+  %nop13453 = alloca i1, i1 0
+  %nop13454 = alloca i1, i1 0
+  %nop13455 = alloca i1, i1 0
+  %nop13456 = alloca i1, i1 0
+  %nop13457 = alloca i1, i1 0
+  %nop13458 = alloca i1, i1 0
+  %nop13459 = alloca i1, i1 0
+  %nop13460 = alloca i1, i1 0
+  %nop13461 = alloca i1, i1 0
+  %nop13462 = alloca i1, i1 0
+  %nop13463 = alloca i1, i1 0
+  %nop13464 = alloca i1, i1 0
+  %nop13465 = alloca i1, i1 0
+  %nop13466 = alloca i1, i1 0
+  %nop13467 = alloca i1, i1 0
+  %nop13468 = alloca i1, i1 0
+  %nop13469 = alloca i1, i1 0
+  %nop13470 = alloca i1, i1 0
+  %nop13471 = alloca i1, i1 0
+  %nop13472 = alloca i1, i1 0
+  %nop13473 = alloca i1, i1 0
+  %nop13474 = alloca i1, i1 0
+  %nop13475 = alloca i1, i1 0
+  %nop13476 = alloca i1, i1 0
+  %nop13477 = alloca i1, i1 0
+  %nop13478 = alloca i1, i1 0
+  %nop13479 = alloca i1, i1 0
+  %nop13480 = alloca i1, i1 0
+  %nop13481 = alloca i1, i1 0
+  %nop13482 = alloca i1, i1 0
+  %nop13483 = alloca i1, i1 0
+  %nop13484 = alloca i1, i1 0
+  %nop13485 = alloca i1, i1 0
+  %nop13486 = alloca i1, i1 0
+  %nop13487 = alloca i1, i1 0
+  %nop13488 = alloca i1, i1 0
+  %nop13489 = alloca i1, i1 0
+  %nop13490 = alloca i1, i1 0
+  %nop13491 = alloca i1, i1 0
+  %nop13492 = alloca i1, i1 0
+  %nop13493 = alloca i1, i1 0
+  %nop13494 = alloca i1, i1 0
+  %nop13495 = alloca i1, i1 0
+  %nop13496 = alloca i1, i1 0
+  %nop13497 = alloca i1, i1 0
+  %nop13498 = alloca i1, i1 0
+  %nop13499 = alloca i1, i1 0
+  %nop13500 = alloca i1, i1 0
+  %nop13501 = alloca i1, i1 0
+  %nop13502 = alloca i1, i1 0
+  %nop13503 = alloca i1, i1 0
+  %nop13504 = alloca i1, i1 0
+  %nop13505 = alloca i1, i1 0
+  %nop13506 = alloca i1, i1 0
+  %nop13507 = alloca i1, i1 0
+  %nop13508 = alloca i1, i1 0
+  %nop13509 = alloca i1, i1 0
+  %nop13510 = alloca i1, i1 0
+  %nop13511 = alloca i1, i1 0
+  %nop13512 = alloca i1, i1 0
+  %nop13513 = alloca i1, i1 0
+  %nop13514 = alloca i1, i1 0
+  %nop13515 = alloca i1, i1 0
+  %nop13516 = alloca i1, i1 0
+  %nop13517 = alloca i1, i1 0
+  %nop13518 = alloca i1, i1 0
+  %nop13519 = alloca i1, i1 0
+  %nop13520 = alloca i1, i1 0
+  %nop13521 = alloca i1, i1 0
+  %nop13522 = alloca i1, i1 0
+  %nop13523 = alloca i1, i1 0
+  %nop13524 = alloca i1, i1 0
+  %nop13525 = alloca i1, i1 0
+  %nop13526 = alloca i1, i1 0
+  %nop13527 = alloca i1, i1 0
+  %nop13528 = alloca i1, i1 0
+  %nop13529 = alloca i1, i1 0
+  %nop13530 = alloca i1, i1 0
+  %nop13531 = alloca i1, i1 0
+  %nop13532 = alloca i1, i1 0
+  %nop13533 = alloca i1, i1 0
+  %nop13534 = alloca i1, i1 0
+  %nop13535 = alloca i1, i1 0
+  %nop13536 = alloca i1, i1 0
+  %nop13537 = alloca i1, i1 0
+  %nop13538 = alloca i1, i1 0
+  %nop13539 = alloca i1, i1 0
+  %nop13540 = alloca i1, i1 0
+  %nop13541 = alloca i1, i1 0
+  %nop13542 = alloca i1, i1 0
+  %nop13543 = alloca i1, i1 0
+  %nop13544 = alloca i1, i1 0
+  %nop13545 = alloca i1, i1 0
+  %nop13546 = alloca i1, i1 0
+  %nop13547 = alloca i1, i1 0
+  %nop13548 = alloca i1, i1 0
+  %nop13549 = alloca i1, i1 0
+  %nop13550 = alloca i1, i1 0
+  %nop13551 = alloca i1, i1 0
+  %nop13552 = alloca i1, i1 0
+  %nop13553 = alloca i1, i1 0
+  %nop13554 = alloca i1, i1 0
+  %nop13555 = alloca i1, i1 0
+  %nop13556 = alloca i1, i1 0
+  %nop13557 = alloca i1, i1 0
+  %nop13558 = alloca i1, i1 0
+  %nop13559 = alloca i1, i1 0
+  %nop13560 = alloca i1, i1 0
+  %nop13561 = alloca i1, i1 0
+  %nop13562 = alloca i1, i1 0
+  %nop13563 = alloca i1, i1 0
+  %nop13564 = alloca i1, i1 0
+  %nop13565 = alloca i1, i1 0
+  %nop13566 = alloca i1, i1 0
+  %nop13567 = alloca i1, i1 0
+  %nop13568 = alloca i1, i1 0
+  %nop13569 = alloca i1, i1 0
+  %nop13570 = alloca i1, i1 0
+  %nop13571 = alloca i1, i1 0
+  %nop13572 = alloca i1, i1 0
+  %nop13573 = alloca i1, i1 0
+  %nop13574 = alloca i1, i1 0
+  %nop13575 = alloca i1, i1 0
+  %nop13576 = alloca i1, i1 0
+  %nop13577 = alloca i1, i1 0
+  %nop13578 = alloca i1, i1 0
+  %nop13579 = alloca i1, i1 0
+  %nop13580 = alloca i1, i1 0
+  %nop13581 = alloca i1, i1 0
+  %nop13582 = alloca i1, i1 0
+  %nop13583 = alloca i1, i1 0
+  %nop13584 = alloca i1, i1 0
+  %nop13585 = alloca i1, i1 0
+  %nop13586 = alloca i1, i1 0
+  %nop13587 = alloca i1, i1 0
+  %nop13588 = alloca i1, i1 0
+  %nop13589 = alloca i1, i1 0
+  %nop13590 = alloca i1, i1 0
+  %nop13591 = alloca i1, i1 0
+  %nop13592 = alloca i1, i1 0
+  %nop13593 = alloca i1, i1 0
+  %nop13594 = alloca i1, i1 0
+  %nop13595 = alloca i1, i1 0
+  %nop13596 = alloca i1, i1 0
+  %nop13597 = alloca i1, i1 0
+  %nop13598 = alloca i1, i1 0
+  %nop13599 = alloca i1, i1 0
+  %nop13600 = alloca i1, i1 0
+  %nop13601 = alloca i1, i1 0
+  %nop13602 = alloca i1, i1 0
+  %nop13603 = alloca i1, i1 0
+  %nop13604 = alloca i1, i1 0
+  %nop13605 = alloca i1, i1 0
+  %nop13606 = alloca i1, i1 0
+  %nop13607 = alloca i1, i1 0
+  %nop13608 = alloca i1, i1 0
+  %nop13609 = alloca i1, i1 0
+  %nop13610 = alloca i1, i1 0
+  %nop13611 = alloca i1, i1 0
+  %nop13612 = alloca i1, i1 0
+  %nop13613 = alloca i1, i1 0
+  %nop13614 = alloca i1, i1 0
+  %nop13615 = alloca i1, i1 0
+  %nop13616 = alloca i1, i1 0
+  %nop13617 = alloca i1, i1 0
+  %nop13618 = alloca i1, i1 0
+  %nop13619 = alloca i1, i1 0
+  %nop13620 = alloca i1, i1 0
+  %nop13621 = alloca i1, i1 0
+  %nop13622 = alloca i1, i1 0
+  %nop13623 = alloca i1, i1 0
+  %nop13624 = alloca i1, i1 0
+  %nop13625 = alloca i1, i1 0
+  %nop13626 = alloca i1, i1 0
+  %nop13627 = alloca i1, i1 0
+  %nop13628 = alloca i1, i1 0
+  %nop13629 = alloca i1, i1 0
+  %nop13630 = alloca i1, i1 0
+  %nop13631 = alloca i1, i1 0
+  %nop13632 = alloca i1, i1 0
+  %nop13633 = alloca i1, i1 0
+  %nop13634 = alloca i1, i1 0
+  %nop13635 = alloca i1, i1 0
+  %nop13636 = alloca i1, i1 0
+  %nop13637 = alloca i1, i1 0
+  %nop13638 = alloca i1, i1 0
+  %nop13639 = alloca i1, i1 0
+  %nop13640 = alloca i1, i1 0
+  %nop13641 = alloca i1, i1 0
+  %nop13642 = alloca i1, i1 0
+  %nop13643 = alloca i1, i1 0
+  %nop13644 = alloca i1, i1 0
+  %nop13645 = alloca i1, i1 0
+  %nop13646 = alloca i1, i1 0
+  %nop13647 = alloca i1, i1 0
+  %nop13648 = alloca i1, i1 0
+  %nop13649 = alloca i1, i1 0
+  %nop13650 = alloca i1, i1 0
+  %nop13651 = alloca i1, i1 0
+  %nop13652 = alloca i1, i1 0
+  %nop13653 = alloca i1, i1 0
+  %nop13654 = alloca i1, i1 0
+  %nop13655 = alloca i1, i1 0
+  %nop13656 = alloca i1, i1 0
+  %nop13657 = alloca i1, i1 0
+  %nop13658 = alloca i1, i1 0
+  %nop13659 = alloca i1, i1 0
+  %nop13660 = alloca i1, i1 0
+  %nop13661 = alloca i1, i1 0
+  %nop13662 = alloca i1, i1 0
+  %nop13663 = alloca i1, i1 0
+  %nop13664 = alloca i1, i1 0
+  %nop13665 = alloca i1, i1 0
+  %nop13666 = alloca i1, i1 0
+  %nop13667 = alloca i1, i1 0
+  %nop13668 = alloca i1, i1 0
+  %nop13669 = alloca i1, i1 0
+  %nop13670 = alloca i1, i1 0
+  %nop13671 = alloca i1, i1 0
+  %nop13672 = alloca i1, i1 0
+  %nop13673 = alloca i1, i1 0
+  %nop13674 = alloca i1, i1 0
+  %nop13675 = alloca i1, i1 0
+  %nop13676 = alloca i1, i1 0
+  %nop13677 = alloca i1, i1 0
+  %nop13678 = alloca i1, i1 0
+  %nop13679 = alloca i1, i1 0
+  %nop13680 = alloca i1, i1 0
+  %nop13681 = alloca i1, i1 0
+  %nop13682 = alloca i1, i1 0
+  %nop13683 = alloca i1, i1 0
+  %nop13684 = alloca i1, i1 0
+  %nop13685 = alloca i1, i1 0
+  %nop13686 = alloca i1, i1 0
+  %nop13687 = alloca i1, i1 0
+  %nop13688 = alloca i1, i1 0
+  %nop13689 = alloca i1, i1 0
+  %nop13690 = alloca i1, i1 0
+  %nop13691 = alloca i1, i1 0
+  %nop13692 = alloca i1, i1 0
+  %nop13693 = alloca i1, i1 0
+  %nop13694 = alloca i1, i1 0
+  %nop13695 = alloca i1, i1 0
+  %nop13696 = alloca i1, i1 0
+  %nop13697 = alloca i1, i1 0
+  %nop13698 = alloca i1, i1 0
+  %nop13699 = alloca i1, i1 0
+  %nop13700 = alloca i1, i1 0
+  %nop13701 = alloca i1, i1 0
+  %nop13702 = alloca i1, i1 0
+  %nop13703 = alloca i1, i1 0
+  %nop13704 = alloca i1, i1 0
+  %nop13705 = alloca i1, i1 0
+  %nop13706 = alloca i1, i1 0
+  %nop13707 = alloca i1, i1 0
+  %nop13708 = alloca i1, i1 0
+  %nop13709 = alloca i1, i1 0
+  %nop13710 = alloca i1, i1 0
+  %nop13711 = alloca i1, i1 0
+  %nop13712 = alloca i1, i1 0
+  %nop13713 = alloca i1, i1 0
+  %nop13714 = alloca i1, i1 0
+  %nop13715 = alloca i1, i1 0
+  %nop13716 = alloca i1, i1 0
+  %nop13717 = alloca i1, i1 0
+  %nop13718 = alloca i1, i1 0
+  %nop13719 = alloca i1, i1 0
+  %nop13720 = alloca i1, i1 0
+  %nop13721 = alloca i1, i1 0
+  %nop13722 = alloca i1, i1 0
+  %nop13723 = alloca i1, i1 0
+  %nop13724 = alloca i1, i1 0
+  %nop13725 = alloca i1, i1 0
+  %nop13726 = alloca i1, i1 0
+  %nop13727 = alloca i1, i1 0
+  %nop13728 = alloca i1, i1 0
+  %nop13729 = alloca i1, i1 0
+  %nop13730 = alloca i1, i1 0
+  %nop13731 = alloca i1, i1 0
+  %nop13732 = alloca i1, i1 0
+  %nop13733 = alloca i1, i1 0
+  %nop13734 = alloca i1, i1 0
+  %nop13735 = alloca i1, i1 0
+  %nop13736 = alloca i1, i1 0
+  %nop13737 = alloca i1, i1 0
+  %nop13738 = alloca i1, i1 0
+  %nop13739 = alloca i1, i1 0
+  %nop13740 = alloca i1, i1 0
+  %nop13741 = alloca i1, i1 0
+  %nop13742 = alloca i1, i1 0
+  %nop13743 = alloca i1, i1 0
+  %nop13744 = alloca i1, i1 0
+  %nop13745 = alloca i1, i1 0
+  %nop13746 = alloca i1, i1 0
+  %nop13747 = alloca i1, i1 0
+  %nop13748 = alloca i1, i1 0
+  %nop13749 = alloca i1, i1 0
+  %nop13750 = alloca i1, i1 0
+  %nop13751 = alloca i1, i1 0
+  %nop13752 = alloca i1, i1 0
+  %nop13753 = alloca i1, i1 0
+  %nop13754 = alloca i1, i1 0
+  %nop13755 = alloca i1, i1 0
+  %nop13756 = alloca i1, i1 0
+  %nop13757 = alloca i1, i1 0
+  %nop13758 = alloca i1, i1 0
+  %nop13759 = alloca i1, i1 0
+  %nop13760 = alloca i1, i1 0
+  %nop13761 = alloca i1, i1 0
+  %nop13762 = alloca i1, i1 0
+  %nop13763 = alloca i1, i1 0
+  %nop13764 = alloca i1, i1 0
+  %nop13765 = alloca i1, i1 0
+  %nop13766 = alloca i1, i1 0
+  %nop13767 = alloca i1, i1 0
+  %nop13768 = alloca i1, i1 0
+  %nop13769 = alloca i1, i1 0
+  %nop13770 = alloca i1, i1 0
+  %nop13771 = alloca i1, i1 0
+  %nop13772 = alloca i1, i1 0
+  %nop13773 = alloca i1, i1 0
+  %nop13774 = alloca i1, i1 0
+  %nop13775 = alloca i1, i1 0
+  %nop13776 = alloca i1, i1 0
+  %nop13777 = alloca i1, i1 0
+  %nop13778 = alloca i1, i1 0
+  %nop13779 = alloca i1, i1 0
+  %nop13780 = alloca i1, i1 0
+  %nop13781 = alloca i1, i1 0
+  %nop13782 = alloca i1, i1 0
+  %nop13783 = alloca i1, i1 0
+  %nop13784 = alloca i1, i1 0
+  %nop13785 = alloca i1, i1 0
+  %nop13786 = alloca i1, i1 0
+  %nop13787 = alloca i1, i1 0
+  %nop13788 = alloca i1, i1 0
+  %nop13789 = alloca i1, i1 0
+  %nop13790 = alloca i1, i1 0
+  %nop13791 = alloca i1, i1 0
+  %nop13792 = alloca i1, i1 0
+  %nop13793 = alloca i1, i1 0
+  %nop13794 = alloca i1, i1 0
+  %nop13795 = alloca i1, i1 0
+  %nop13796 = alloca i1, i1 0
+  %nop13797 = alloca i1, i1 0
+  %nop13798 = alloca i1, i1 0
+  %nop13799 = alloca i1, i1 0
+  %nop13800 = alloca i1, i1 0
+  %nop13801 = alloca i1, i1 0
+  %nop13802 = alloca i1, i1 0
+  %nop13803 = alloca i1, i1 0
+  %nop13804 = alloca i1, i1 0
+  %nop13805 = alloca i1, i1 0
+  %nop13806 = alloca i1, i1 0
+  %nop13807 = alloca i1, i1 0
+  %nop13808 = alloca i1, i1 0
+  %nop13809 = alloca i1, i1 0
+  %nop13810 = alloca i1, i1 0
+  %nop13811 = alloca i1, i1 0
+  %nop13812 = alloca i1, i1 0
+  %nop13813 = alloca i1, i1 0
+  %nop13814 = alloca i1, i1 0
+  %nop13815 = alloca i1, i1 0
+  %nop13816 = alloca i1, i1 0
+  %nop13817 = alloca i1, i1 0
+  %nop13818 = alloca i1, i1 0
+  %nop13819 = alloca i1, i1 0
+  %nop13820 = alloca i1, i1 0
+  %nop13821 = alloca i1, i1 0
+  %nop13822 = alloca i1, i1 0
+  %nop13823 = alloca i1, i1 0
+  %nop13824 = alloca i1, i1 0
+  %nop13825 = alloca i1, i1 0
+  %nop13826 = alloca i1, i1 0
+  %nop13827 = alloca i1, i1 0
+  %nop13828 = alloca i1, i1 0
+  %nop13829 = alloca i1, i1 0
+  %nop13830 = alloca i1, i1 0
+  %nop13831 = alloca i1, i1 0
+  %nop13832 = alloca i1, i1 0
+  %nop13833 = alloca i1, i1 0
+  %nop13834 = alloca i1, i1 0
+  %nop13835 = alloca i1, i1 0
+  %nop13836 = alloca i1, i1 0
+  %nop13837 = alloca i1, i1 0
+  %nop13838 = alloca i1, i1 0
+  %nop13839 = alloca i1, i1 0
+  %nop13840 = alloca i1, i1 0
+  %nop13841 = alloca i1, i1 0
+  %nop13842 = alloca i1, i1 0
+  %nop13843 = alloca i1, i1 0
+  %nop13844 = alloca i1, i1 0
+  %nop13845 = alloca i1, i1 0
+  %nop13846 = alloca i1, i1 0
+  %nop13847 = alloca i1, i1 0
+  %nop13848 = alloca i1, i1 0
+  %nop13849 = alloca i1, i1 0
+  %nop13850 = alloca i1, i1 0
+  %nop13851 = alloca i1, i1 0
+  %nop13852 = alloca i1, i1 0
+  %nop13853 = alloca i1, i1 0
+  %nop13854 = alloca i1, i1 0
+  %nop13855 = alloca i1, i1 0
+  %nop13856 = alloca i1, i1 0
+  %nop13857 = alloca i1, i1 0
+  %nop13858 = alloca i1, i1 0
+  %nop13859 = alloca i1, i1 0
+  %nop13860 = alloca i1, i1 0
+  %nop13861 = alloca i1, i1 0
+  %nop13862 = alloca i1, i1 0
+  %nop13863 = alloca i1, i1 0
+  %nop13864 = alloca i1, i1 0
+  %nop13865 = alloca i1, i1 0
+  %nop13866 = alloca i1, i1 0
+  %nop13867 = alloca i1, i1 0
+  %nop13868 = alloca i1, i1 0
+  %nop13869 = alloca i1, i1 0
+  %nop13870 = alloca i1, i1 0
+  %nop13871 = alloca i1, i1 0
+  %nop13872 = alloca i1, i1 0
+  %nop13873 = alloca i1, i1 0
+  %nop13874 = alloca i1, i1 0
+  %nop13875 = alloca i1, i1 0
+  %nop13876 = alloca i1, i1 0
+  %nop13877 = alloca i1, i1 0
+  %nop13878 = alloca i1, i1 0
+  %nop13879 = alloca i1, i1 0
+  %nop13880 = alloca i1, i1 0
+  %nop13881 = alloca i1, i1 0
+  %nop13882 = alloca i1, i1 0
+  %nop13883 = alloca i1, i1 0
+  %nop13884 = alloca i1, i1 0
+  %nop13885 = alloca i1, i1 0
+  %nop13886 = alloca i1, i1 0
+  %nop13887 = alloca i1, i1 0
+  %nop13888 = alloca i1, i1 0
+  %nop13889 = alloca i1, i1 0
+  %nop13890 = alloca i1, i1 0
+  %nop13891 = alloca i1, i1 0
+  %nop13892 = alloca i1, i1 0
+  %nop13893 = alloca i1, i1 0
+  %nop13894 = alloca i1, i1 0
+  %nop13895 = alloca i1, i1 0
+  %nop13896 = alloca i1, i1 0
+  %nop13897 = alloca i1, i1 0
+  %nop13898 = alloca i1, i1 0
+  %nop13899 = alloca i1, i1 0
+  %nop13900 = alloca i1, i1 0
+  %nop13901 = alloca i1, i1 0
+  %nop13902 = alloca i1, i1 0
+  %nop13903 = alloca i1, i1 0
+  %nop13904 = alloca i1, i1 0
+  %nop13905 = alloca i1, i1 0
+  %nop13906 = alloca i1, i1 0
+  %nop13907 = alloca i1, i1 0
+  %nop13908 = alloca i1, i1 0
+  %nop13909 = alloca i1, i1 0
+  %nop13910 = alloca i1, i1 0
+  %nop13911 = alloca i1, i1 0
+  %nop13912 = alloca i1, i1 0
+  %nop13913 = alloca i1, i1 0
+  %nop13914 = alloca i1, i1 0
+  %nop13915 = alloca i1, i1 0
+  %nop13916 = alloca i1, i1 0
+  %nop13917 = alloca i1, i1 0
+  %nop13918 = alloca i1, i1 0
+  %nop13919 = alloca i1, i1 0
+  %nop13920 = alloca i1, i1 0
+  %nop13921 = alloca i1, i1 0
+  %nop13922 = alloca i1, i1 0
+  %nop13923 = alloca i1, i1 0
+  %nop13924 = alloca i1, i1 0
+  %nop13925 = alloca i1, i1 0
+  %nop13926 = alloca i1, i1 0
+  %nop13927 = alloca i1, i1 0
+  %nop13928 = alloca i1, i1 0
+  %nop13929 = alloca i1, i1 0
+  %nop13930 = alloca i1, i1 0
+  %nop13931 = alloca i1, i1 0
+  %nop13932 = alloca i1, i1 0
+  %nop13933 = alloca i1, i1 0
+  %nop13934 = alloca i1, i1 0
+  %nop13935 = alloca i1, i1 0
+  %nop13936 = alloca i1, i1 0
+  %nop13937 = alloca i1, i1 0
+  %nop13938 = alloca i1, i1 0
+  %nop13939 = alloca i1, i1 0
+  %nop13940 = alloca i1, i1 0
+  %nop13941 = alloca i1, i1 0
+  %nop13942 = alloca i1, i1 0
+  %nop13943 = alloca i1, i1 0
+  %nop13944 = alloca i1, i1 0
+  %nop13945 = alloca i1, i1 0
+  %nop13946 = alloca i1, i1 0
+  %nop13947 = alloca i1, i1 0
+  %nop13948 = alloca i1, i1 0
+  %nop13949 = alloca i1, i1 0
+  %nop13950 = alloca i1, i1 0
+  %nop13951 = alloca i1, i1 0
+  %nop13952 = alloca i1, i1 0
+  %nop13953 = alloca i1, i1 0
+  %nop13954 = alloca i1, i1 0
+  %nop13955 = alloca i1, i1 0
+  %nop13956 = alloca i1, i1 0
+  %nop13957 = alloca i1, i1 0
+  %nop13958 = alloca i1, i1 0
+  %nop13959 = alloca i1, i1 0
+  %nop13960 = alloca i1, i1 0
+  %nop13961 = alloca i1, i1 0
+  %nop13962 = alloca i1, i1 0
+  %nop13963 = alloca i1, i1 0
+  %nop13964 = alloca i1, i1 0
+  %nop13965 = alloca i1, i1 0
+  %nop13966 = alloca i1, i1 0
+  %nop13967 = alloca i1, i1 0
+  %nop13968 = alloca i1, i1 0
+  %nop13969 = alloca i1, i1 0
+  %nop13970 = alloca i1, i1 0
+  %nop13971 = alloca i1, i1 0
+  %nop13972 = alloca i1, i1 0
+  %nop13973 = alloca i1, i1 0
+  %nop13974 = alloca i1, i1 0
+  %nop13975 = alloca i1, i1 0
+  %nop13976 = alloca i1, i1 0
+  %nop13977 = alloca i1, i1 0
+  %nop13978 = alloca i1, i1 0
+  %nop13979 = alloca i1, i1 0
+  %nop13980 = alloca i1, i1 0
+  %nop13981 = alloca i1, i1 0
+  %nop13982 = alloca i1, i1 0
+  %nop13983 = alloca i1, i1 0
+  %nop13984 = alloca i1, i1 0
+  %nop13985 = alloca i1, i1 0
+  %nop13986 = alloca i1, i1 0
+  %nop13987 = alloca i1, i1 0
+  %nop13988 = alloca i1, i1 0
+  %nop13989 = alloca i1, i1 0
+  %nop13990 = alloca i1, i1 0
+  %nop13991 = alloca i1, i1 0
+  %nop13992 = alloca i1, i1 0
+  %nop13993 = alloca i1, i1 0
+  %nop13994 = alloca i1, i1 0
+  %nop13995 = alloca i1, i1 0
+  %nop13996 = alloca i1, i1 0
+  %nop13997 = alloca i1, i1 0
+  %nop13998 = alloca i1, i1 0
+  %nop13999 = alloca i1, i1 0
+  %nop14000 = alloca i1, i1 0
+  %nop14001 = alloca i1, i1 0
+  %nop14002 = alloca i1, i1 0
+  %nop14003 = alloca i1, i1 0
+  %nop14004 = alloca i1, i1 0
+  %nop14005 = alloca i1, i1 0
+  %nop14006 = alloca i1, i1 0
+  %nop14007 = alloca i1, i1 0
+  %nop14008 = alloca i1, i1 0
+  %nop14009 = alloca i1, i1 0
+  %nop14010 = alloca i1, i1 0
+  %nop14011 = alloca i1, i1 0
+  %nop14012 = alloca i1, i1 0
+  %nop14013 = alloca i1, i1 0
+  %nop14014 = alloca i1, i1 0
+  %nop14015 = alloca i1, i1 0
+  %nop14016 = alloca i1, i1 0
+  %nop14017 = alloca i1, i1 0
+  %nop14018 = alloca i1, i1 0
+  %nop14019 = alloca i1, i1 0
+  %nop14020 = alloca i1, i1 0
+  %nop14021 = alloca i1, i1 0
+  %nop14022 = alloca i1, i1 0
+  %nop14023 = alloca i1, i1 0
+  %nop14024 = alloca i1, i1 0
+  %nop14025 = alloca i1, i1 0
+  %nop14026 = alloca i1, i1 0
+  %nop14027 = alloca i1, i1 0
+  %nop14028 = alloca i1, i1 0
+  %nop14029 = alloca i1, i1 0
+  %nop14030 = alloca i1, i1 0
+  %nop14031 = alloca i1, i1 0
+  %nop14032 = alloca i1, i1 0
+  %nop14033 = alloca i1, i1 0
+  %nop14034 = alloca i1, i1 0
+  %nop14035 = alloca i1, i1 0
+  %nop14036 = alloca i1, i1 0
+  %nop14037 = alloca i1, i1 0
+  %nop14038 = alloca i1, i1 0
+  %nop14039 = alloca i1, i1 0
+  %nop14040 = alloca i1, i1 0
+  %nop14041 = alloca i1, i1 0
+  %nop14042 = alloca i1, i1 0
+  %nop14043 = alloca i1, i1 0
+  %nop14044 = alloca i1, i1 0
+  %nop14045 = alloca i1, i1 0
+  %nop14046 = alloca i1, i1 0
+  %nop14047 = alloca i1, i1 0
+  %nop14048 = alloca i1, i1 0
+  %nop14049 = alloca i1, i1 0
+  %nop14050 = alloca i1, i1 0
+  %nop14051 = alloca i1, i1 0
+  %nop14052 = alloca i1, i1 0
+  %nop14053 = alloca i1, i1 0
+  %nop14054 = alloca i1, i1 0
+  %nop14055 = alloca i1, i1 0
+  %nop14056 = alloca i1, i1 0
+  %nop14057 = alloca i1, i1 0
+  %nop14058 = alloca i1, i1 0
+  %nop14059 = alloca i1, i1 0
+  %nop14060 = alloca i1, i1 0
+  %nop14061 = alloca i1, i1 0
+  %nop14062 = alloca i1, i1 0
+  %nop14063 = alloca i1, i1 0
+  %nop14064 = alloca i1, i1 0
+  %nop14065 = alloca i1, i1 0
+  %nop14066 = alloca i1, i1 0
+  %nop14067 = alloca i1, i1 0
+  %nop14068 = alloca i1, i1 0
+  %nop14069 = alloca i1, i1 0
+  %nop14070 = alloca i1, i1 0
+  %nop14071 = alloca i1, i1 0
+  %nop14072 = alloca i1, i1 0
+  %nop14073 = alloca i1, i1 0
+  %nop14074 = alloca i1, i1 0
+  %nop14075 = alloca i1, i1 0
+  %nop14076 = alloca i1, i1 0
+  %nop14077 = alloca i1, i1 0
+  %nop14078 = alloca i1, i1 0
+  %nop14079 = alloca i1, i1 0
+  %nop14080 = alloca i1, i1 0
+  %nop14081 = alloca i1, i1 0
+  %nop14082 = alloca i1, i1 0
+  %nop14083 = alloca i1, i1 0
+  %nop14084 = alloca i1, i1 0
+  %nop14085 = alloca i1, i1 0
+  %nop14086 = alloca i1, i1 0
+  %nop14087 = alloca i1, i1 0
+  %nop14088 = alloca i1, i1 0
+  %nop14089 = alloca i1, i1 0
+  %nop14090 = alloca i1, i1 0
+  %nop14091 = alloca i1, i1 0
+  %nop14092 = alloca i1, i1 0
+  %nop14093 = alloca i1, i1 0
+  %nop14094 = alloca i1, i1 0
+  %nop14095 = alloca i1, i1 0
+  %nop14096 = alloca i1, i1 0
+  %nop14097 = alloca i1, i1 0
+  %nop14098 = alloca i1, i1 0
+  %nop14099 = alloca i1, i1 0
+  %nop14100 = alloca i1, i1 0
+  %nop14101 = alloca i1, i1 0
+  %nop14102 = alloca i1, i1 0
+  %nop14103 = alloca i1, i1 0
+  %nop14104 = alloca i1, i1 0
+  %nop14105 = alloca i1, i1 0
+  %nop14106 = alloca i1, i1 0
+  %nop14107 = alloca i1, i1 0
+  %nop14108 = alloca i1, i1 0
+  %nop14109 = alloca i1, i1 0
+  %nop14110 = alloca i1, i1 0
+  %nop14111 = alloca i1, i1 0
+  %nop14112 = alloca i1, i1 0
+  %nop14113 = alloca i1, i1 0
+  %nop14114 = alloca i1, i1 0
+  %nop14115 = alloca i1, i1 0
+  %nop14116 = alloca i1, i1 0
+  %nop14117 = alloca i1, i1 0
+  %nop14118 = alloca i1, i1 0
+  %nop14119 = alloca i1, i1 0
+  %nop14120 = alloca i1, i1 0
+  %nop14121 = alloca i1, i1 0
+  %nop14122 = alloca i1, i1 0
+  %nop14123 = alloca i1, i1 0
+  %nop14124 = alloca i1, i1 0
+  %nop14125 = alloca i1, i1 0
+  %nop14126 = alloca i1, i1 0
+  %nop14127 = alloca i1, i1 0
+  %nop14128 = alloca i1, i1 0
+  %nop14129 = alloca i1, i1 0
+  %nop14130 = alloca i1, i1 0
+  %nop14131 = alloca i1, i1 0
+  %nop14132 = alloca i1, i1 0
+  %nop14133 = alloca i1, i1 0
+  %nop14134 = alloca i1, i1 0
+  %nop14135 = alloca i1, i1 0
+  %nop14136 = alloca i1, i1 0
+  %nop14137 = alloca i1, i1 0
+  %nop14138 = alloca i1, i1 0
+  %nop14139 = alloca i1, i1 0
+  %nop14140 = alloca i1, i1 0
+  %nop14141 = alloca i1, i1 0
+  %nop14142 = alloca i1, i1 0
+  %nop14143 = alloca i1, i1 0
+  %nop14144 = alloca i1, i1 0
+  %nop14145 = alloca i1, i1 0
+  %nop14146 = alloca i1, i1 0
+  %nop14147 = alloca i1, i1 0
+  %nop14148 = alloca i1, i1 0
+  %nop14149 = alloca i1, i1 0
+  %nop14150 = alloca i1, i1 0
+  %nop14151 = alloca i1, i1 0
+  %nop14152 = alloca i1, i1 0
+  %nop14153 = alloca i1, i1 0
+  %nop14154 = alloca i1, i1 0
+  %nop14155 = alloca i1, i1 0
+  %nop14156 = alloca i1, i1 0
+  %nop14157 = alloca i1, i1 0
+  %nop14158 = alloca i1, i1 0
+  %nop14159 = alloca i1, i1 0
+  %nop14160 = alloca i1, i1 0
+  %nop14161 = alloca i1, i1 0
+  %nop14162 = alloca i1, i1 0
+  %nop14163 = alloca i1, i1 0
+  %nop14164 = alloca i1, i1 0
+  %nop14165 = alloca i1, i1 0
+  %nop14166 = alloca i1, i1 0
+  %nop14167 = alloca i1, i1 0
+  %nop14168 = alloca i1, i1 0
+  %nop14169 = alloca i1, i1 0
+  %nop14170 = alloca i1, i1 0
+  %nop14171 = alloca i1, i1 0
+  %nop14172 = alloca i1, i1 0
+  %nop14173 = alloca i1, i1 0
+  %nop14174 = alloca i1, i1 0
+  %nop14175 = alloca i1, i1 0
+  %nop14176 = alloca i1, i1 0
+  %nop14177 = alloca i1, i1 0
+  %nop14178 = alloca i1, i1 0
+  %nop14179 = alloca i1, i1 0
+  %nop14180 = alloca i1, i1 0
+  %nop14181 = alloca i1, i1 0
+  %nop14182 = alloca i1, i1 0
+  %nop14183 = alloca i1, i1 0
+  %nop14184 = alloca i1, i1 0
+  %nop14185 = alloca i1, i1 0
+  %nop14186 = alloca i1, i1 0
+  %nop14187 = alloca i1, i1 0
+  %nop14188 = alloca i1, i1 0
+  %nop14189 = alloca i1, i1 0
+  %nop14190 = alloca i1, i1 0
+  %nop14191 = alloca i1, i1 0
+  %nop14192 = alloca i1, i1 0
+  %nop14193 = alloca i1, i1 0
+  %nop14194 = alloca i1, i1 0
+  %nop14195 = alloca i1, i1 0
+  %nop14196 = alloca i1, i1 0
+  %nop14197 = alloca i1, i1 0
+  %nop14198 = alloca i1, i1 0
+  %nop14199 = alloca i1, i1 0
+  %nop14200 = alloca i1, i1 0
+  %nop14201 = alloca i1, i1 0
+  %nop14202 = alloca i1, i1 0
+  %nop14203 = alloca i1, i1 0
+  %nop14204 = alloca i1, i1 0
+  %nop14205 = alloca i1, i1 0
+  %nop14206 = alloca i1, i1 0
+  %nop14207 = alloca i1, i1 0
+  %nop14208 = alloca i1, i1 0
+  %nop14209 = alloca i1, i1 0
+  %nop14210 = alloca i1, i1 0
+  %nop14211 = alloca i1, i1 0
+  %nop14212 = alloca i1, i1 0
+  %nop14213 = alloca i1, i1 0
+  %nop14214 = alloca i1, i1 0
+  %nop14215 = alloca i1, i1 0
+  %nop14216 = alloca i1, i1 0
+  %nop14217 = alloca i1, i1 0
+  %nop14218 = alloca i1, i1 0
+  %nop14219 = alloca i1, i1 0
+  %nop14220 = alloca i1, i1 0
+  %nop14221 = alloca i1, i1 0
+  %nop14222 = alloca i1, i1 0
+  %nop14223 = alloca i1, i1 0
+  %nop14224 = alloca i1, i1 0
+  %nop14225 = alloca i1, i1 0
+  %nop14226 = alloca i1, i1 0
+  %nop14227 = alloca i1, i1 0
+  %nop14228 = alloca i1, i1 0
+  %nop14229 = alloca i1, i1 0
+  %nop14230 = alloca i1, i1 0
+  %nop14231 = alloca i1, i1 0
+  %nop14232 = alloca i1, i1 0
+  %nop14233 = alloca i1, i1 0
+  %nop14234 = alloca i1, i1 0
+  %nop14235 = alloca i1, i1 0
+  %nop14236 = alloca i1, i1 0
+  %nop14237 = alloca i1, i1 0
+  %nop14238 = alloca i1, i1 0
+  %nop14239 = alloca i1, i1 0
+  %nop14240 = alloca i1, i1 0
+  %nop14241 = alloca i1, i1 0
+  %nop14242 = alloca i1, i1 0
+  %nop14243 = alloca i1, i1 0
+  %nop14244 = alloca i1, i1 0
+  %nop14245 = alloca i1, i1 0
+  %nop14246 = alloca i1, i1 0
+  %nop14247 = alloca i1, i1 0
+  %nop14248 = alloca i1, i1 0
+  %nop14249 = alloca i1, i1 0
+  %nop14250 = alloca i1, i1 0
+  %nop14251 = alloca i1, i1 0
+  %nop14252 = alloca i1, i1 0
+  %nop14253 = alloca i1, i1 0
+  %nop14254 = alloca i1, i1 0
+  %nop14255 = alloca i1, i1 0
+  %nop14256 = alloca i1, i1 0
+  %nop14257 = alloca i1, i1 0
+  %nop14258 = alloca i1, i1 0
+  %nop14259 = alloca i1, i1 0
+  %nop14260 = alloca i1, i1 0
+  %nop14261 = alloca i1, i1 0
+  %nop14262 = alloca i1, i1 0
+  %nop14263 = alloca i1, i1 0
+  %nop14264 = alloca i1, i1 0
+  %nop14265 = alloca i1, i1 0
+  %nop14266 = alloca i1, i1 0
+  %nop14267 = alloca i1, i1 0
+  %nop14268 = alloca i1, i1 0
+  %nop14269 = alloca i1, i1 0
+  %nop14270 = alloca i1, i1 0
+  %nop14271 = alloca i1, i1 0
+  %nop14272 = alloca i1, i1 0
+  %nop14273 = alloca i1, i1 0
+  %nop14274 = alloca i1, i1 0
+  %nop14275 = alloca i1, i1 0
+  %nop14276 = alloca i1, i1 0
+  %nop14277 = alloca i1, i1 0
+  %nop14278 = alloca i1, i1 0
+  %nop14279 = alloca i1, i1 0
+  %nop14280 = alloca i1, i1 0
+  %nop14281 = alloca i1, i1 0
+  %nop14282 = alloca i1, i1 0
+  %nop14283 = alloca i1, i1 0
+  %nop14284 = alloca i1, i1 0
+  %nop14285 = alloca i1, i1 0
+  %nop14286 = alloca i1, i1 0
+  %nop14287 = alloca i1, i1 0
+  %nop14288 = alloca i1, i1 0
+  %nop14289 = alloca i1, i1 0
+  %nop14290 = alloca i1, i1 0
+  %nop14291 = alloca i1, i1 0
+  %nop14292 = alloca i1, i1 0
+  %nop14293 = alloca i1, i1 0
+  %nop14294 = alloca i1, i1 0
+  %nop14295 = alloca i1, i1 0
+  %nop14296 = alloca i1, i1 0
+  %nop14297 = alloca i1, i1 0
+  %nop14298 = alloca i1, i1 0
+  %nop14299 = alloca i1, i1 0
+  %nop14300 = alloca i1, i1 0
+  %nop14301 = alloca i1, i1 0
+  %nop14302 = alloca i1, i1 0
+  %nop14303 = alloca i1, i1 0
+  %nop14304 = alloca i1, i1 0
+  %nop14305 = alloca i1, i1 0
+  %nop14306 = alloca i1, i1 0
+  %nop14307 = alloca i1, i1 0
+  %nop14308 = alloca i1, i1 0
+  %nop14309 = alloca i1, i1 0
+  %nop14310 = alloca i1, i1 0
+  %nop14311 = alloca i1, i1 0
+  %nop14312 = alloca i1, i1 0
+  %nop14313 = alloca i1, i1 0
+  %nop14314 = alloca i1, i1 0
+  %nop14315 = alloca i1, i1 0
+  %nop14316 = alloca i1, i1 0
+  %nop14317 = alloca i1, i1 0
+  %nop14318 = alloca i1, i1 0
+  %nop14319 = alloca i1, i1 0
+  %nop14320 = alloca i1, i1 0
+  %nop14321 = alloca i1, i1 0
+  %nop14322 = alloca i1, i1 0
+  %nop14323 = alloca i1, i1 0
+  %nop14324 = alloca i1, i1 0
+  %nop14325 = alloca i1, i1 0
+  %nop14326 = alloca i1, i1 0
+  %nop14327 = alloca i1, i1 0
+  %nop14328 = alloca i1, i1 0
+  %nop14329 = alloca i1, i1 0
+  %nop14330 = alloca i1, i1 0
+  %nop14331 = alloca i1, i1 0
+  %nop14332 = alloca i1, i1 0
+  %nop14333 = alloca i1, i1 0
+  %nop14334 = alloca i1, i1 0
+  %nop14335 = alloca i1, i1 0
+  %nop14336 = alloca i1, i1 0
+  %nop14337 = alloca i1, i1 0
+  %nop14338 = alloca i1, i1 0
+  %nop14339 = alloca i1, i1 0
+  %nop14340 = alloca i1, i1 0
+  %nop14341 = alloca i1, i1 0
+  %nop14342 = alloca i1, i1 0
+  %nop14343 = alloca i1, i1 0
+  %nop14344 = alloca i1, i1 0
+  %nop14345 = alloca i1, i1 0
+  %nop14346 = alloca i1, i1 0
+  %nop14347 = alloca i1, i1 0
+  %nop14348 = alloca i1, i1 0
+  %nop14349 = alloca i1, i1 0
+  %nop14350 = alloca i1, i1 0
+  %nop14351 = alloca i1, i1 0
+  %nop14352 = alloca i1, i1 0
+  %nop14353 = alloca i1, i1 0
+  %nop14354 = alloca i1, i1 0
+  %nop14355 = alloca i1, i1 0
+  %nop14356 = alloca i1, i1 0
+  %nop14357 = alloca i1, i1 0
+  %nop14358 = alloca i1, i1 0
+  %nop14359 = alloca i1, i1 0
+  %nop14360 = alloca i1, i1 0
+  %nop14361 = alloca i1, i1 0
+  %nop14362 = alloca i1, i1 0
+  %nop14363 = alloca i1, i1 0
+  %nop14364 = alloca i1, i1 0
+  %nop14365 = alloca i1, i1 0
+  %nop14366 = alloca i1, i1 0
+  %nop14367 = alloca i1, i1 0
+  %nop14368 = alloca i1, i1 0
+  %nop14369 = alloca i1, i1 0
+  %nop14370 = alloca i1, i1 0
+  %nop14371 = alloca i1, i1 0
+  %nop14372 = alloca i1, i1 0
+  %nop14373 = alloca i1, i1 0
+  %nop14374 = alloca i1, i1 0
+  %nop14375 = alloca i1, i1 0
+  %nop14376 = alloca i1, i1 0
+  %nop14377 = alloca i1, i1 0
+  %nop14378 = alloca i1, i1 0
+  %nop14379 = alloca i1, i1 0
+  %nop14380 = alloca i1, i1 0
+  %nop14381 = alloca i1, i1 0
+  %nop14382 = alloca i1, i1 0
+  %nop14383 = alloca i1, i1 0
+  %nop14384 = alloca i1, i1 0
+  %nop14385 = alloca i1, i1 0
+  %nop14386 = alloca i1, i1 0
+  %nop14387 = alloca i1, i1 0
+  %nop14388 = alloca i1, i1 0
+  %nop14389 = alloca i1, i1 0
+  %nop14390 = alloca i1, i1 0
+  %nop14391 = alloca i1, i1 0
+  %nop14392 = alloca i1, i1 0
+  %nop14393 = alloca i1, i1 0
+  %nop14394 = alloca i1, i1 0
+  %nop14395 = alloca i1, i1 0
+  %nop14396 = alloca i1, i1 0
+  %nop14397 = alloca i1, i1 0
+  %nop14398 = alloca i1, i1 0
+  %nop14399 = alloca i1, i1 0
+  %nop14400 = alloca i1, i1 0
+  %nop14401 = alloca i1, i1 0
+  %nop14402 = alloca i1, i1 0
+  %nop14403 = alloca i1, i1 0
+  %nop14404 = alloca i1, i1 0
+  %nop14405 = alloca i1, i1 0
+  %nop14406 = alloca i1, i1 0
+  %nop14407 = alloca i1, i1 0
+  %nop14408 = alloca i1, i1 0
+  %nop14409 = alloca i1, i1 0
+  %nop14410 = alloca i1, i1 0
+  %nop14411 = alloca i1, i1 0
+  %nop14412 = alloca i1, i1 0
+  %nop14413 = alloca i1, i1 0
+  %nop14414 = alloca i1, i1 0
+  %nop14415 = alloca i1, i1 0
+  %nop14416 = alloca i1, i1 0
+  %nop14417 = alloca i1, i1 0
+  %nop14418 = alloca i1, i1 0
+  %nop14419 = alloca i1, i1 0
+  %nop14420 = alloca i1, i1 0
+  %nop14421 = alloca i1, i1 0
+  %nop14422 = alloca i1, i1 0
+  %nop14423 = alloca i1, i1 0
+  %nop14424 = alloca i1, i1 0
+  %nop14425 = alloca i1, i1 0
+  %nop14426 = alloca i1, i1 0
+  %nop14427 = alloca i1, i1 0
+  %nop14428 = alloca i1, i1 0
+  %nop14429 = alloca i1, i1 0
+  %nop14430 = alloca i1, i1 0
+  %nop14431 = alloca i1, i1 0
+  %nop14432 = alloca i1, i1 0
+  %nop14433 = alloca i1, i1 0
+  %nop14434 = alloca i1, i1 0
+  %nop14435 = alloca i1, i1 0
+  %nop14436 = alloca i1, i1 0
+  %nop14437 = alloca i1, i1 0
+  %nop14438 = alloca i1, i1 0
+  %nop14439 = alloca i1, i1 0
+  %nop14440 = alloca i1, i1 0
+  %nop14441 = alloca i1, i1 0
+  %nop14442 = alloca i1, i1 0
+  %nop14443 = alloca i1, i1 0
+  %nop14444 = alloca i1, i1 0
+  %nop14445 = alloca i1, i1 0
+  %nop14446 = alloca i1, i1 0
+  %nop14447 = alloca i1, i1 0
+  %nop14448 = alloca i1, i1 0
+  %nop14449 = alloca i1, i1 0
+  %nop14450 = alloca i1, i1 0
+  %nop14451 = alloca i1, i1 0
+  %nop14452 = alloca i1, i1 0
+  %nop14453 = alloca i1, i1 0
+  %nop14454 = alloca i1, i1 0
+  %nop14455 = alloca i1, i1 0
+  %nop14456 = alloca i1, i1 0
+  %nop14457 = alloca i1, i1 0
+  %nop14458 = alloca i1, i1 0
+  %nop14459 = alloca i1, i1 0
+  %nop14460 = alloca i1, i1 0
+  %nop14461 = alloca i1, i1 0
+  %nop14462 = alloca i1, i1 0
+  %nop14463 = alloca i1, i1 0
+  %nop14464 = alloca i1, i1 0
+  %nop14465 = alloca i1, i1 0
+  %nop14466 = alloca i1, i1 0
+  %nop14467 = alloca i1, i1 0
+  %nop14468 = alloca i1, i1 0
+  %nop14469 = alloca i1, i1 0
+  %nop14470 = alloca i1, i1 0
+  %nop14471 = alloca i1, i1 0
+  %nop14472 = alloca i1, i1 0
+  %nop14473 = alloca i1, i1 0
+  %nop14474 = alloca i1, i1 0
+  %nop14475 = alloca i1, i1 0
+  %nop14476 = alloca i1, i1 0
+  %nop14477 = alloca i1, i1 0
+  %nop14478 = alloca i1, i1 0
+  %nop14479 = alloca i1, i1 0
+  %nop14480 = alloca i1, i1 0
+  %nop14481 = alloca i1, i1 0
+  %nop14482 = alloca i1, i1 0
+  %nop14483 = alloca i1, i1 0
+  %nop14484 = alloca i1, i1 0
+  %nop14485 = alloca i1, i1 0
+  %nop14486 = alloca i1, i1 0
+  %nop14487 = alloca i1, i1 0
+  %nop14488 = alloca i1, i1 0
+  %nop14489 = alloca i1, i1 0
+  %nop14490 = alloca i1, i1 0
+  %nop14491 = alloca i1, i1 0
+  %nop14492 = alloca i1, i1 0
+  %nop14493 = alloca i1, i1 0
+  %nop14494 = alloca i1, i1 0
+  %nop14495 = alloca i1, i1 0
+  %nop14496 = alloca i1, i1 0
+  %nop14497 = alloca i1, i1 0
+  %nop14498 = alloca i1, i1 0
+  %nop14499 = alloca i1, i1 0
+  %nop14500 = alloca i1, i1 0
+  %nop14501 = alloca i1, i1 0
+  %nop14502 = alloca i1, i1 0
+  %nop14503 = alloca i1, i1 0
+  %nop14504 = alloca i1, i1 0
+  %nop14505 = alloca i1, i1 0
+  %nop14506 = alloca i1, i1 0
+  %nop14507 = alloca i1, i1 0
+  %nop14508 = alloca i1, i1 0
+  %nop14509 = alloca i1, i1 0
+  %nop14510 = alloca i1, i1 0
+  %nop14511 = alloca i1, i1 0
+  %nop14512 = alloca i1, i1 0
+  %nop14513 = alloca i1, i1 0
+  %nop14514 = alloca i1, i1 0
+  %nop14515 = alloca i1, i1 0
+  %nop14516 = alloca i1, i1 0
+  %nop14517 = alloca i1, i1 0
+  %nop14518 = alloca i1, i1 0
+  %nop14519 = alloca i1, i1 0
+  %nop14520 = alloca i1, i1 0
+  %nop14521 = alloca i1, i1 0
+  %nop14522 = alloca i1, i1 0
+  %nop14523 = alloca i1, i1 0
+  %nop14524 = alloca i1, i1 0
+  %nop14525 = alloca i1, i1 0
+  %nop14526 = alloca i1, i1 0
+  %nop14527 = alloca i1, i1 0
+  %nop14528 = alloca i1, i1 0
+  %nop14529 = alloca i1, i1 0
+  %nop14530 = alloca i1, i1 0
+  %nop14531 = alloca i1, i1 0
+  %nop14532 = alloca i1, i1 0
+  %nop14533 = alloca i1, i1 0
+  %nop14534 = alloca i1, i1 0
+  %nop14535 = alloca i1, i1 0
+  %nop14536 = alloca i1, i1 0
+  %nop14537 = alloca i1, i1 0
+  %nop14538 = alloca i1, i1 0
+  %nop14539 = alloca i1, i1 0
+  %nop14540 = alloca i1, i1 0
+  %nop14541 = alloca i1, i1 0
+  %nop14542 = alloca i1, i1 0
+  %nop14543 = alloca i1, i1 0
+  %nop14544 = alloca i1, i1 0
+  %nop14545 = alloca i1, i1 0
+  %nop14546 = alloca i1, i1 0
+  %nop14547 = alloca i1, i1 0
+  %nop14548 = alloca i1, i1 0
+  %nop14549 = alloca i1, i1 0
+  %nop14550 = alloca i1, i1 0
+  %nop14551 = alloca i1, i1 0
+  %nop14552 = alloca i1, i1 0
+  %nop14553 = alloca i1, i1 0
+  %nop14554 = alloca i1, i1 0
+  %nop14555 = alloca i1, i1 0
+  %nop14556 = alloca i1, i1 0
+  %nop14557 = alloca i1, i1 0
+  %nop14558 = alloca i1, i1 0
+  %nop14559 = alloca i1, i1 0
+  %nop14560 = alloca i1, i1 0
+  %nop14561 = alloca i1, i1 0
+  %nop14562 = alloca i1, i1 0
+  %nop14563 = alloca i1, i1 0
+  %nop14564 = alloca i1, i1 0
+  %nop14565 = alloca i1, i1 0
+  %nop14566 = alloca i1, i1 0
+  %nop14567 = alloca i1, i1 0
+  %nop14568 = alloca i1, i1 0
+  %nop14569 = alloca i1, i1 0
+  %nop14570 = alloca i1, i1 0
+  %nop14571 = alloca i1, i1 0
+  %nop14572 = alloca i1, i1 0
+  %nop14573 = alloca i1, i1 0
+  %nop14574 = alloca i1, i1 0
+  %nop14575 = alloca i1, i1 0
+  %nop14576 = alloca i1, i1 0
+  %nop14577 = alloca i1, i1 0
+  %nop14578 = alloca i1, i1 0
+  %nop14579 = alloca i1, i1 0
+  %nop14580 = alloca i1, i1 0
+  %nop14581 = alloca i1, i1 0
+  %nop14582 = alloca i1, i1 0
+  %nop14583 = alloca i1, i1 0
+  %nop14584 = alloca i1, i1 0
+  %nop14585 = alloca i1, i1 0
+  %nop14586 = alloca i1, i1 0
+  %nop14587 = alloca i1, i1 0
+  %nop14588 = alloca i1, i1 0
+  %nop14589 = alloca i1, i1 0
+  %nop14590 = alloca i1, i1 0
+  %nop14591 = alloca i1, i1 0
+  %nop14592 = alloca i1, i1 0
+  %nop14593 = alloca i1, i1 0
+  %nop14594 = alloca i1, i1 0
+  %nop14595 = alloca i1, i1 0
+  %nop14596 = alloca i1, i1 0
+  %nop14597 = alloca i1, i1 0
+  %nop14598 = alloca i1, i1 0
+  %nop14599 = alloca i1, i1 0
+  %nop14600 = alloca i1, i1 0
+  %nop14601 = alloca i1, i1 0
+  %nop14602 = alloca i1, i1 0
+  %nop14603 = alloca i1, i1 0
+  %nop14604 = alloca i1, i1 0
+  %nop14605 = alloca i1, i1 0
+  %nop14606 = alloca i1, i1 0
+  %nop14607 = alloca i1, i1 0
+  %nop14608 = alloca i1, i1 0
+  %nop14609 = alloca i1, i1 0
+  %nop14610 = alloca i1, i1 0
+  %nop14611 = alloca i1, i1 0
+  %nop14612 = alloca i1, i1 0
+  %nop14613 = alloca i1, i1 0
+  %nop14614 = alloca i1, i1 0
+  %nop14615 = alloca i1, i1 0
+  %nop14616 = alloca i1, i1 0
+  %nop14617 = alloca i1, i1 0
+  %nop14618 = alloca i1, i1 0
+  %nop14619 = alloca i1, i1 0
+  %nop14620 = alloca i1, i1 0
+  %nop14621 = alloca i1, i1 0
+  %nop14622 = alloca i1, i1 0
+  %nop14623 = alloca i1, i1 0
+  %nop14624 = alloca i1, i1 0
+  %nop14625 = alloca i1, i1 0
+  %nop14626 = alloca i1, i1 0
+  %nop14627 = alloca i1, i1 0
+  %nop14628 = alloca i1, i1 0
+  %nop14629 = alloca i1, i1 0
+  %nop14630 = alloca i1, i1 0
+  %nop14631 = alloca i1, i1 0
+  %nop14632 = alloca i1, i1 0
+  %nop14633 = alloca i1, i1 0
+  %nop14634 = alloca i1, i1 0
+  %nop14635 = alloca i1, i1 0
+  %nop14636 = alloca i1, i1 0
+  %nop14637 = alloca i1, i1 0
+  %nop14638 = alloca i1, i1 0
+  %nop14639 = alloca i1, i1 0
+  %nop14640 = alloca i1, i1 0
+  %nop14641 = alloca i1, i1 0
+  %nop14642 = alloca i1, i1 0
+  %nop14643 = alloca i1, i1 0
+  %nop14644 = alloca i1, i1 0
+  %nop14645 = alloca i1, i1 0
+  %nop14646 = alloca i1, i1 0
+  %nop14647 = alloca i1, i1 0
+  %nop14648 = alloca i1, i1 0
+  %nop14649 = alloca i1, i1 0
+  %nop14650 = alloca i1, i1 0
+  %nop14651 = alloca i1, i1 0
+  %nop14652 = alloca i1, i1 0
+  %nop14653 = alloca i1, i1 0
+  %nop14654 = alloca i1, i1 0
+  %nop14655 = alloca i1, i1 0
+  %nop14656 = alloca i1, i1 0
+  %nop14657 = alloca i1, i1 0
+  %nop14658 = alloca i1, i1 0
+  %nop14659 = alloca i1, i1 0
+  %nop14660 = alloca i1, i1 0
+  %nop14661 = alloca i1, i1 0
+  %nop14662 = alloca i1, i1 0
+  %nop14663 = alloca i1, i1 0
+  %nop14664 = alloca i1, i1 0
+  %nop14665 = alloca i1, i1 0
+  %nop14666 = alloca i1, i1 0
+  %nop14667 = alloca i1, i1 0
+  %nop14668 = alloca i1, i1 0
+  %nop14669 = alloca i1, i1 0
+  %nop14670 = alloca i1, i1 0
+  %nop14671 = alloca i1, i1 0
+  %nop14672 = alloca i1, i1 0
+  %nop14673 = alloca i1, i1 0
+  %nop14674 = alloca i1, i1 0
+  %nop14675 = alloca i1, i1 0
+  %nop14676 = alloca i1, i1 0
+  %nop14677 = alloca i1, i1 0
+  %nop14678 = alloca i1, i1 0
+  %nop14679 = alloca i1, i1 0
+  %nop14680 = alloca i1, i1 0
+  %nop14681 = alloca i1, i1 0
+  %nop14682 = alloca i1, i1 0
+  %nop14683 = alloca i1, i1 0
+  %nop14684 = alloca i1, i1 0
+  %nop14685 = alloca i1, i1 0
+  %nop14686 = alloca i1, i1 0
+  %nop14687 = alloca i1, i1 0
+  %nop14688 = alloca i1, i1 0
+  %nop14689 = alloca i1, i1 0
+  %nop14690 = alloca i1, i1 0
+  %nop14691 = alloca i1, i1 0
+  %nop14692 = alloca i1, i1 0
+  %nop14693 = alloca i1, i1 0
+  %nop14694 = alloca i1, i1 0
+  %nop14695 = alloca i1, i1 0
+  %nop14696 = alloca i1, i1 0
+  %nop14697 = alloca i1, i1 0
+  %nop14698 = alloca i1, i1 0
+  %nop14699 = alloca i1, i1 0
+  %nop14700 = alloca i1, i1 0
+  %nop14701 = alloca i1, i1 0
+  %nop14702 = alloca i1, i1 0
+  %nop14703 = alloca i1, i1 0
+  %nop14704 = alloca i1, i1 0
+  %nop14705 = alloca i1, i1 0
+  %nop14706 = alloca i1, i1 0
+  %nop14707 = alloca i1, i1 0
+  %nop14708 = alloca i1, i1 0
+  %nop14709 = alloca i1, i1 0
+  %nop14710 = alloca i1, i1 0
+  %nop14711 = alloca i1, i1 0
+  %nop14712 = alloca i1, i1 0
+  %nop14713 = alloca i1, i1 0
+  %nop14714 = alloca i1, i1 0
+  %nop14715 = alloca i1, i1 0
+  %nop14716 = alloca i1, i1 0
+  %nop14717 = alloca i1, i1 0
+  %nop14718 = alloca i1, i1 0
+  %nop14719 = alloca i1, i1 0
+  %nop14720 = alloca i1, i1 0
+  %nop14721 = alloca i1, i1 0
+  %nop14722 = alloca i1, i1 0
+  %nop14723 = alloca i1, i1 0
+  %nop14724 = alloca i1, i1 0
+  %nop14725 = alloca i1, i1 0
+  %nop14726 = alloca i1, i1 0
+  %nop14727 = alloca i1, i1 0
+  %nop14728 = alloca i1, i1 0
+  %nop14729 = alloca i1, i1 0
+  %nop14730 = alloca i1, i1 0
+  %nop14731 = alloca i1, i1 0
+  %nop14732 = alloca i1, i1 0
+  %nop14733 = alloca i1, i1 0
+  %nop14734 = alloca i1, i1 0
+  %nop14735 = alloca i1, i1 0
+  %nop14736 = alloca i1, i1 0
+  %nop14737 = alloca i1, i1 0
+  %nop14738 = alloca i1, i1 0
+  %nop14739 = alloca i1, i1 0
+  %nop14740 = alloca i1, i1 0
+  %nop14741 = alloca i1, i1 0
+  %nop14742 = alloca i1, i1 0
+  %nop14743 = alloca i1, i1 0
+  %nop14744 = alloca i1, i1 0
+  %nop14745 = alloca i1, i1 0
+  %nop14746 = alloca i1, i1 0
+  %nop14747 = alloca i1, i1 0
+  %nop14748 = alloca i1, i1 0
+  %nop14749 = alloca i1, i1 0
+  %nop14750 = alloca i1, i1 0
+  %nop14751 = alloca i1, i1 0
+  %nop14752 = alloca i1, i1 0
+  %nop14753 = alloca i1, i1 0
+  %nop14754 = alloca i1, i1 0
+  %nop14755 = alloca i1, i1 0
+  %nop14756 = alloca i1, i1 0
+  %nop14757 = alloca i1, i1 0
+  %nop14758 = alloca i1, i1 0
+  %nop14759 = alloca i1, i1 0
+  %nop14760 = alloca i1, i1 0
+  %nop14761 = alloca i1, i1 0
+  %nop14762 = alloca i1, i1 0
+  %nop14763 = alloca i1, i1 0
+  %nop14764 = alloca i1, i1 0
+  %nop14765 = alloca i1, i1 0
+  %nop14766 = alloca i1, i1 0
+  %nop14767 = alloca i1, i1 0
+  %nop14768 = alloca i1, i1 0
+  %nop14769 = alloca i1, i1 0
+  %nop14770 = alloca i1, i1 0
+  %nop14771 = alloca i1, i1 0
+  %nop14772 = alloca i1, i1 0
+  %nop14773 = alloca i1, i1 0
+  %nop14774 = alloca i1, i1 0
+  %nop14775 = alloca i1, i1 0
+  %nop14776 = alloca i1, i1 0
+  %nop14777 = alloca i1, i1 0
+  %nop14778 = alloca i1, i1 0
+  %nop14779 = alloca i1, i1 0
+  %nop14780 = alloca i1, i1 0
+  %nop14781 = alloca i1, i1 0
+  %nop14782 = alloca i1, i1 0
+  %nop14783 = alloca i1, i1 0
+  %nop14784 = alloca i1, i1 0
+  %nop14785 = alloca i1, i1 0
+  %nop14786 = alloca i1, i1 0
+  %nop14787 = alloca i1, i1 0
+  %nop14788 = alloca i1, i1 0
+  %nop14789 = alloca i1, i1 0
+  %nop14790 = alloca i1, i1 0
+  %nop14791 = alloca i1, i1 0
+  %nop14792 = alloca i1, i1 0
+  %nop14793 = alloca i1, i1 0
+  %nop14794 = alloca i1, i1 0
+  %nop14795 = alloca i1, i1 0
+  %nop14796 = alloca i1, i1 0
+  %nop14797 = alloca i1, i1 0
+  %nop14798 = alloca i1, i1 0
+  %nop14799 = alloca i1, i1 0
+  %nop14800 = alloca i1, i1 0
+  %nop14801 = alloca i1, i1 0
+  %nop14802 = alloca i1, i1 0
+  %nop14803 = alloca i1, i1 0
+  %nop14804 = alloca i1, i1 0
+  %nop14805 = alloca i1, i1 0
+  %nop14806 = alloca i1, i1 0
+  %nop14807 = alloca i1, i1 0
+  %nop14808 = alloca i1, i1 0
+  %nop14809 = alloca i1, i1 0
+  %nop14810 = alloca i1, i1 0
+  %nop14811 = alloca i1, i1 0
+  %nop14812 = alloca i1, i1 0
+  %nop14813 = alloca i1, i1 0
+  %nop14814 = alloca i1, i1 0
+  %nop14815 = alloca i1, i1 0
+  %nop14816 = alloca i1, i1 0
+  %nop14817 = alloca i1, i1 0
+  %nop14818 = alloca i1, i1 0
+  %nop14819 = alloca i1, i1 0
+  %nop14820 = alloca i1, i1 0
+  %nop14821 = alloca i1, i1 0
+  %nop14822 = alloca i1, i1 0
+  %nop14823 = alloca i1, i1 0
+  %nop14824 = alloca i1, i1 0
+  %nop14825 = alloca i1, i1 0
+  %nop14826 = alloca i1, i1 0
+  %nop14827 = alloca i1, i1 0
+  %nop14828 = alloca i1, i1 0
+  %nop14829 = alloca i1, i1 0
+  %nop14830 = alloca i1, i1 0
+  %nop14831 = alloca i1, i1 0
+  %nop14832 = alloca i1, i1 0
+  %nop14833 = alloca i1, i1 0
+  %nop14834 = alloca i1, i1 0
+  %nop14835 = alloca i1, i1 0
+  %nop14836 = alloca i1, i1 0
+  %nop14837 = alloca i1, i1 0
+  %nop14838 = alloca i1, i1 0
+  %nop14839 = alloca i1, i1 0
+  %nop14840 = alloca i1, i1 0
+  %nop14841 = alloca i1, i1 0
+  %nop14842 = alloca i1, i1 0
+  %nop14843 = alloca i1, i1 0
+  %nop14844 = alloca i1, i1 0
+  %nop14845 = alloca i1, i1 0
+  %nop14846 = alloca i1, i1 0
+  %nop14847 = alloca i1, i1 0
+  %nop14848 = alloca i1, i1 0
+  %nop14849 = alloca i1, i1 0
+  %nop14850 = alloca i1, i1 0
+  %nop14851 = alloca i1, i1 0
+  %nop14852 = alloca i1, i1 0
+  %nop14853 = alloca i1, i1 0
+  %nop14854 = alloca i1, i1 0
+  %nop14855 = alloca i1, i1 0
+  %nop14856 = alloca i1, i1 0
+  %nop14857 = alloca i1, i1 0
+  %nop14858 = alloca i1, i1 0
+  %nop14859 = alloca i1, i1 0
+  %nop14860 = alloca i1, i1 0
+  %nop14861 = alloca i1, i1 0
+  %nop14862 = alloca i1, i1 0
+  %nop14863 = alloca i1, i1 0
+  %nop14864 = alloca i1, i1 0
+  %nop14865 = alloca i1, i1 0
+  %nop14866 = alloca i1, i1 0
+  %nop14867 = alloca i1, i1 0
+  %nop14868 = alloca i1, i1 0
+  %nop14869 = alloca i1, i1 0
+  %nop14870 = alloca i1, i1 0
+  %nop14871 = alloca i1, i1 0
+  %nop14872 = alloca i1, i1 0
+  %nop14873 = alloca i1, i1 0
+  %nop14874 = alloca i1, i1 0
+  %nop14875 = alloca i1, i1 0
+  %nop14876 = alloca i1, i1 0
+  %nop14877 = alloca i1, i1 0
+  %nop14878 = alloca i1, i1 0
+  %nop14879 = alloca i1, i1 0
+  %nop14880 = alloca i1, i1 0
+  %nop14881 = alloca i1, i1 0
+  %nop14882 = alloca i1, i1 0
+  %nop14883 = alloca i1, i1 0
+  %nop14884 = alloca i1, i1 0
+  %nop14885 = alloca i1, i1 0
+  %nop14886 = alloca i1, i1 0
+  %nop14887 = alloca i1, i1 0
+  %nop14888 = alloca i1, i1 0
+  %nop14889 = alloca i1, i1 0
+  %nop14890 = alloca i1, i1 0
+  %nop14891 = alloca i1, i1 0
+  %nop14892 = alloca i1, i1 0
+  %nop14893 = alloca i1, i1 0
+  %nop14894 = alloca i1, i1 0
+  %nop14895 = alloca i1, i1 0
+  %nop14896 = alloca i1, i1 0
+  %nop14897 = alloca i1, i1 0
+  %nop14898 = alloca i1, i1 0
+  %nop14899 = alloca i1, i1 0
+  %nop14900 = alloca i1, i1 0
+  %nop14901 = alloca i1, i1 0
+  %nop14902 = alloca i1, i1 0
+  %nop14903 = alloca i1, i1 0
+  %nop14904 = alloca i1, i1 0
+  %nop14905 = alloca i1, i1 0
+  %nop14906 = alloca i1, i1 0
+  %nop14907 = alloca i1, i1 0
+  %nop14908 = alloca i1, i1 0
+  %nop14909 = alloca i1, i1 0
+  %nop14910 = alloca i1, i1 0
+  %nop14911 = alloca i1, i1 0
+  %nop14912 = alloca i1, i1 0
+  %nop14913 = alloca i1, i1 0
+  %nop14914 = alloca i1, i1 0
+  %nop14915 = alloca i1, i1 0
+  %nop14916 = alloca i1, i1 0
+  %nop14917 = alloca i1, i1 0
+  %nop14918 = alloca i1, i1 0
+  %nop14919 = alloca i1, i1 0
+  %nop14920 = alloca i1, i1 0
+  %nop14921 = alloca i1, i1 0
+  %nop14922 = alloca i1, i1 0
+  %nop14923 = alloca i1, i1 0
+  %nop14924 = alloca i1, i1 0
+  %nop14925 = alloca i1, i1 0
+  %nop14926 = alloca i1, i1 0
+  %nop14927 = alloca i1, i1 0
+  %nop14928 = alloca i1, i1 0
+  %nop14929 = alloca i1, i1 0
+  %nop14930 = alloca i1, i1 0
+  %nop14931 = alloca i1, i1 0
+  %nop14932 = alloca i1, i1 0
+  %nop14933 = alloca i1, i1 0
+  %nop14934 = alloca i1, i1 0
+  %nop14935 = alloca i1, i1 0
+  %nop14936 = alloca i1, i1 0
+  %nop14937 = alloca i1, i1 0
+  %nop14938 = alloca i1, i1 0
+  %nop14939 = alloca i1, i1 0
+  %nop14940 = alloca i1, i1 0
+  %nop14941 = alloca i1, i1 0
+  %nop14942 = alloca i1, i1 0
+  %nop14943 = alloca i1, i1 0
+  %nop14944 = alloca i1, i1 0
+  %nop14945 = alloca i1, i1 0
+  %nop14946 = alloca i1, i1 0
+  %nop14947 = alloca i1, i1 0
+  %nop14948 = alloca i1, i1 0
+  %nop14949 = alloca i1, i1 0
+  %nop14950 = alloca i1, i1 0
+  %nop14951 = alloca i1, i1 0
+  %nop14952 = alloca i1, i1 0
+  %nop14953 = alloca i1, i1 0
+  %nop14954 = alloca i1, i1 0
+  %nop14955 = alloca i1, i1 0
+  %nop14956 = alloca i1, i1 0
+  %nop14957 = alloca i1, i1 0
+  %nop14958 = alloca i1, i1 0
+  %nop14959 = alloca i1, i1 0
+  %nop14960 = alloca i1, i1 0
+  %nop14961 = alloca i1, i1 0
+  %nop14962 = alloca i1, i1 0
+  %nop14963 = alloca i1, i1 0
+  %nop14964 = alloca i1, i1 0
+  %nop14965 = alloca i1, i1 0
+  %nop14966 = alloca i1, i1 0
+  %nop14967 = alloca i1, i1 0
+  %nop14968 = alloca i1, i1 0
+  %nop14969 = alloca i1, i1 0
+  %nop14970 = alloca i1, i1 0
+  %nop14971 = alloca i1, i1 0
+  %nop14972 = alloca i1, i1 0
+  %nop14973 = alloca i1, i1 0
+  %nop14974 = alloca i1, i1 0
+  %nop14975 = alloca i1, i1 0
+  %nop14976 = alloca i1, i1 0
+  %nop14977 = alloca i1, i1 0
+  %nop14978 = alloca i1, i1 0
+  %nop14979 = alloca i1, i1 0
+  %nop14980 = alloca i1, i1 0
+  %nop14981 = alloca i1, i1 0
+  %nop14982 = alloca i1, i1 0
+  %nop14983 = alloca i1, i1 0
+  %nop14984 = alloca i1, i1 0
+  %nop14985 = alloca i1, i1 0
+  %nop14986 = alloca i1, i1 0
+  %nop14987 = alloca i1, i1 0
+  %nop14988 = alloca i1, i1 0
+  %nop14989 = alloca i1, i1 0
+  %nop14990 = alloca i1, i1 0
+  %nop14991 = alloca i1, i1 0
+  %nop14992 = alloca i1, i1 0
+  %nop14993 = alloca i1, i1 0
+  %nop14994 = alloca i1, i1 0
+  %nop14995 = alloca i1, i1 0
+  %nop14996 = alloca i1, i1 0
+  %nop14997 = alloca i1, i1 0
+  %nop14998 = alloca i1, i1 0
+  %nop14999 = alloca i1, i1 0
+  %nop15000 = alloca i1, i1 0
+  %nop15001 = alloca i1, i1 0
+  %nop15002 = alloca i1, i1 0
+  %nop15003 = alloca i1, i1 0
+  %nop15004 = alloca i1, i1 0
+  %nop15005 = alloca i1, i1 0
+  %nop15006 = alloca i1, i1 0
+  %nop15007 = alloca i1, i1 0
+  %nop15008 = alloca i1, i1 0
+  %nop15009 = alloca i1, i1 0
+  %nop15010 = alloca i1, i1 0
+  %nop15011 = alloca i1, i1 0
+  %nop15012 = alloca i1, i1 0
+  %nop15013 = alloca i1, i1 0
+  %nop15014 = alloca i1, i1 0
+  %nop15015 = alloca i1, i1 0
+  %nop15016 = alloca i1, i1 0
+  %nop15017 = alloca i1, i1 0
+  %nop15018 = alloca i1, i1 0
+  %nop15019 = alloca i1, i1 0
+  %nop15020 = alloca i1, i1 0
+  %nop15021 = alloca i1, i1 0
+  %nop15022 = alloca i1, i1 0
+  %nop15023 = alloca i1, i1 0
+  %nop15024 = alloca i1, i1 0
+  %nop15025 = alloca i1, i1 0
+  %nop15026 = alloca i1, i1 0
+  %nop15027 = alloca i1, i1 0
+  %nop15028 = alloca i1, i1 0
+  %nop15029 = alloca i1, i1 0
+  %nop15030 = alloca i1, i1 0
+  %nop15031 = alloca i1, i1 0
+  %nop15032 = alloca i1, i1 0
+  %nop15033 = alloca i1, i1 0
+  %nop15034 = alloca i1, i1 0
+  %nop15035 = alloca i1, i1 0
+  %nop15036 = alloca i1, i1 0
+  %nop15037 = alloca i1, i1 0
+  %nop15038 = alloca i1, i1 0
+  %nop15039 = alloca i1, i1 0
+  %nop15040 = alloca i1, i1 0
+  %nop15041 = alloca i1, i1 0
+  %nop15042 = alloca i1, i1 0
+  %nop15043 = alloca i1, i1 0
+  %nop15044 = alloca i1, i1 0
+  %nop15045 = alloca i1, i1 0
+  %nop15046 = alloca i1, i1 0
+  %nop15047 = alloca i1, i1 0
+  %nop15048 = alloca i1, i1 0
+  %nop15049 = alloca i1, i1 0
+  %nop15050 = alloca i1, i1 0
+  %nop15051 = alloca i1, i1 0
+  %nop15052 = alloca i1, i1 0
+  %nop15053 = alloca i1, i1 0
+  %nop15054 = alloca i1, i1 0
+  %nop15055 = alloca i1, i1 0
+  %nop15056 = alloca i1, i1 0
+  %nop15057 = alloca i1, i1 0
+  %nop15058 = alloca i1, i1 0
+  %nop15059 = alloca i1, i1 0
+  %nop15060 = alloca i1, i1 0
+  %nop15061 = alloca i1, i1 0
+  %nop15062 = alloca i1, i1 0
+  %nop15063 = alloca i1, i1 0
+  %nop15064 = alloca i1, i1 0
+  %nop15065 = alloca i1, i1 0
+  %nop15066 = alloca i1, i1 0
+  %nop15067 = alloca i1, i1 0
+  %nop15068 = alloca i1, i1 0
+  %nop15069 = alloca i1, i1 0
+  %nop15070 = alloca i1, i1 0
+  %nop15071 = alloca i1, i1 0
+  %nop15072 = alloca i1, i1 0
+  %nop15073 = alloca i1, i1 0
+  %nop15074 = alloca i1, i1 0
+  %nop15075 = alloca i1, i1 0
+  %nop15076 = alloca i1, i1 0
+  %nop15077 = alloca i1, i1 0
+  %nop15078 = alloca i1, i1 0
+  %nop15079 = alloca i1, i1 0
+  %nop15080 = alloca i1, i1 0
+  %nop15081 = alloca i1, i1 0
+  %nop15082 = alloca i1, i1 0
+  %nop15083 = alloca i1, i1 0
+  %nop15084 = alloca i1, i1 0
+  %nop15085 = alloca i1, i1 0
+  %nop15086 = alloca i1, i1 0
+  %nop15087 = alloca i1, i1 0
+  %nop15088 = alloca i1, i1 0
+  %nop15089 = alloca i1, i1 0
+  %nop15090 = alloca i1, i1 0
+  %nop15091 = alloca i1, i1 0
+  %nop15092 = alloca i1, i1 0
+  %nop15093 = alloca i1, i1 0
+  %nop15094 = alloca i1, i1 0
+  %nop15095 = alloca i1, i1 0
+  %nop15096 = alloca i1, i1 0
+  %nop15097 = alloca i1, i1 0
+  %nop15098 = alloca i1, i1 0
+  %nop15099 = alloca i1, i1 0
+  %nop15100 = alloca i1, i1 0
+  %nop15101 = alloca i1, i1 0
+  %nop15102 = alloca i1, i1 0
+  %nop15103 = alloca i1, i1 0
+  %nop15104 = alloca i1, i1 0
+  %nop15105 = alloca i1, i1 0
+  %nop15106 = alloca i1, i1 0
+  %nop15107 = alloca i1, i1 0
+  %nop15108 = alloca i1, i1 0
+  %nop15109 = alloca i1, i1 0
+  %nop15110 = alloca i1, i1 0
+  %nop15111 = alloca i1, i1 0
+  %nop15112 = alloca i1, i1 0
+  %nop15113 = alloca i1, i1 0
+  %nop15114 = alloca i1, i1 0
+  %nop15115 = alloca i1, i1 0
+  %nop15116 = alloca i1, i1 0
+  %nop15117 = alloca i1, i1 0
+  %nop15118 = alloca i1, i1 0
+  %nop15119 = alloca i1, i1 0
+  %nop15120 = alloca i1, i1 0
+  %nop15121 = alloca i1, i1 0
+  %nop15122 = alloca i1, i1 0
+  %nop15123 = alloca i1, i1 0
+  %nop15124 = alloca i1, i1 0
+  %nop15125 = alloca i1, i1 0
+  %nop15126 = alloca i1, i1 0
+  %nop15127 = alloca i1, i1 0
+  %nop15128 = alloca i1, i1 0
+  %nop15129 = alloca i1, i1 0
+  %nop15130 = alloca i1, i1 0
+  %nop15131 = alloca i1, i1 0
+  %nop15132 = alloca i1, i1 0
+  %nop15133 = alloca i1, i1 0
+  %nop15134 = alloca i1, i1 0
+  %nop15135 = alloca i1, i1 0
+  %nop15136 = alloca i1, i1 0
+  %nop15137 = alloca i1, i1 0
+  %nop15138 = alloca i1, i1 0
+  %nop15139 = alloca i1, i1 0
+  %nop15140 = alloca i1, i1 0
+  %nop15141 = alloca i1, i1 0
+  %nop15142 = alloca i1, i1 0
+  %nop15143 = alloca i1, i1 0
+  %nop15144 = alloca i1, i1 0
+  %nop15145 = alloca i1, i1 0
+  %nop15146 = alloca i1, i1 0
+  %nop15147 = alloca i1, i1 0
+  %nop15148 = alloca i1, i1 0
+  %nop15149 = alloca i1, i1 0
+  %nop15150 = alloca i1, i1 0
+  %nop15151 = alloca i1, i1 0
+  %nop15152 = alloca i1, i1 0
+  %nop15153 = alloca i1, i1 0
+  %nop15154 = alloca i1, i1 0
+  %nop15155 = alloca i1, i1 0
+  %nop15156 = alloca i1, i1 0
+  %nop15157 = alloca i1, i1 0
+  %nop15158 = alloca i1, i1 0
+  %nop15159 = alloca i1, i1 0
+  %nop15160 = alloca i1, i1 0
+  %nop15161 = alloca i1, i1 0
+  %nop15162 = alloca i1, i1 0
+  %nop15163 = alloca i1, i1 0
+  %nop15164 = alloca i1, i1 0
+  %nop15165 = alloca i1, i1 0
+  %nop15166 = alloca i1, i1 0
+  %nop15167 = alloca i1, i1 0
+  %nop15168 = alloca i1, i1 0
+  %nop15169 = alloca i1, i1 0
+  %nop15170 = alloca i1, i1 0
+  %nop15171 = alloca i1, i1 0
+  %nop15172 = alloca i1, i1 0
+  %nop15173 = alloca i1, i1 0
+  %nop15174 = alloca i1, i1 0
+  %nop15175 = alloca i1, i1 0
+  %nop15176 = alloca i1, i1 0
+  %nop15177 = alloca i1, i1 0
+  %nop15178 = alloca i1, i1 0
+  %nop15179 = alloca i1, i1 0
+  %nop15180 = alloca i1, i1 0
+  %nop15181 = alloca i1, i1 0
+  %nop15182 = alloca i1, i1 0
+  %nop15183 = alloca i1, i1 0
+  %nop15184 = alloca i1, i1 0
+  %nop15185 = alloca i1, i1 0
+  %nop15186 = alloca i1, i1 0
+  %nop15187 = alloca i1, i1 0
+  %nop15188 = alloca i1, i1 0
+  %nop15189 = alloca i1, i1 0
+  %nop15190 = alloca i1, i1 0
+  %nop15191 = alloca i1, i1 0
+  %nop15192 = alloca i1, i1 0
+  %nop15193 = alloca i1, i1 0
+  %nop15194 = alloca i1, i1 0
+  %nop15195 = alloca i1, i1 0
+  %nop15196 = alloca i1, i1 0
+  %nop15197 = alloca i1, i1 0
+  %nop15198 = alloca i1, i1 0
+  %nop15199 = alloca i1, i1 0
+  %nop15200 = alloca i1, i1 0
+  %nop15201 = alloca i1, i1 0
+  %nop15202 = alloca i1, i1 0
+  %nop15203 = alloca i1, i1 0
+  %nop15204 = alloca i1, i1 0
+  %nop15205 = alloca i1, i1 0
+  %nop15206 = alloca i1, i1 0
+  %nop15207 = alloca i1, i1 0
+  %nop15208 = alloca i1, i1 0
+  %nop15209 = alloca i1, i1 0
+  %nop15210 = alloca i1, i1 0
+  %nop15211 = alloca i1, i1 0
+  %nop15212 = alloca i1, i1 0
+  %nop15213 = alloca i1, i1 0
+  %nop15214 = alloca i1, i1 0
+  %nop15215 = alloca i1, i1 0
+  %nop15216 = alloca i1, i1 0
+  %nop15217 = alloca i1, i1 0
+  %nop15218 = alloca i1, i1 0
+  %nop15219 = alloca i1, i1 0
+  %nop15220 = alloca i1, i1 0
+  %nop15221 = alloca i1, i1 0
+  %nop15222 = alloca i1, i1 0
+  %nop15223 = alloca i1, i1 0
+  %nop15224 = alloca i1, i1 0
+  %nop15225 = alloca i1, i1 0
+  %nop15226 = alloca i1, i1 0
+  %nop15227 = alloca i1, i1 0
+  %nop15228 = alloca i1, i1 0
+  %nop15229 = alloca i1, i1 0
+  %nop15230 = alloca i1, i1 0
+  %nop15231 = alloca i1, i1 0
+  %nop15232 = alloca i1, i1 0
+  %nop15233 = alloca i1, i1 0
+  %nop15234 = alloca i1, i1 0
+  %nop15235 = alloca i1, i1 0
+  %nop15236 = alloca i1, i1 0
+  %nop15237 = alloca i1, i1 0
+  %nop15238 = alloca i1, i1 0
+  %nop15239 = alloca i1, i1 0
+  %nop15240 = alloca i1, i1 0
+  %nop15241 = alloca i1, i1 0
+  %nop15242 = alloca i1, i1 0
+  %nop15243 = alloca i1, i1 0
+  %nop15244 = alloca i1, i1 0
+  %nop15245 = alloca i1, i1 0
+  %nop15246 = alloca i1, i1 0
+  %nop15247 = alloca i1, i1 0
+  %nop15248 = alloca i1, i1 0
+  %nop15249 = alloca i1, i1 0
+  %nop15250 = alloca i1, i1 0
+  %nop15251 = alloca i1, i1 0
+  %nop15252 = alloca i1, i1 0
+  %nop15253 = alloca i1, i1 0
+  %nop15254 = alloca i1, i1 0
+  %nop15255 = alloca i1, i1 0
+  %nop15256 = alloca i1, i1 0
+  %nop15257 = alloca i1, i1 0
+  %nop15258 = alloca i1, i1 0
+  %nop15259 = alloca i1, i1 0
+  %nop15260 = alloca i1, i1 0
+  %nop15261 = alloca i1, i1 0
+  %nop15262 = alloca i1, i1 0
+  %nop15263 = alloca i1, i1 0
+  %nop15264 = alloca i1, i1 0
+  %nop15265 = alloca i1, i1 0
+  %nop15266 = alloca i1, i1 0
+  %nop15267 = alloca i1, i1 0
+  %nop15268 = alloca i1, i1 0
+  %nop15269 = alloca i1, i1 0
+  %nop15270 = alloca i1, i1 0
+  %nop15271 = alloca i1, i1 0
+  %nop15272 = alloca i1, i1 0
+  %nop15273 = alloca i1, i1 0
+  %nop15274 = alloca i1, i1 0
+  %nop15275 = alloca i1, i1 0
+  %nop15276 = alloca i1, i1 0
+  %nop15277 = alloca i1, i1 0
+  %nop15278 = alloca i1, i1 0
+  %nop15279 = alloca i1, i1 0
+  %nop15280 = alloca i1, i1 0
+  %nop15281 = alloca i1, i1 0
+  %nop15282 = alloca i1, i1 0
+  %nop15283 = alloca i1, i1 0
+  %nop15284 = alloca i1, i1 0
+  %nop15285 = alloca i1, i1 0
+  %nop15286 = alloca i1, i1 0
+  %nop15287 = alloca i1, i1 0
+  %nop15288 = alloca i1, i1 0
+  %nop15289 = alloca i1, i1 0
+  %nop15290 = alloca i1, i1 0
+  %nop15291 = alloca i1, i1 0
+  %nop15292 = alloca i1, i1 0
+  %nop15293 = alloca i1, i1 0
+  %nop15294 = alloca i1, i1 0
+  %nop15295 = alloca i1, i1 0
+  %nop15296 = alloca i1, i1 0
+  %nop15297 = alloca i1, i1 0
+  %nop15298 = alloca i1, i1 0
+  %nop15299 = alloca i1, i1 0
+  %nop15300 = alloca i1, i1 0
+  %nop15301 = alloca i1, i1 0
+  %nop15302 = alloca i1, i1 0
+  %nop15303 = alloca i1, i1 0
+  %nop15304 = alloca i1, i1 0
+  %nop15305 = alloca i1, i1 0
+  %nop15306 = alloca i1, i1 0
+  %nop15307 = alloca i1, i1 0
+  %nop15308 = alloca i1, i1 0
+  %nop15309 = alloca i1, i1 0
+  %nop15310 = alloca i1, i1 0
+  %nop15311 = alloca i1, i1 0
+  %nop15312 = alloca i1, i1 0
+  %nop15313 = alloca i1, i1 0
+  %nop15314 = alloca i1, i1 0
+  %nop15315 = alloca i1, i1 0
+  %nop15316 = alloca i1, i1 0
+  %nop15317 = alloca i1, i1 0
+  %nop15318 = alloca i1, i1 0
+  %nop15319 = alloca i1, i1 0
+  %nop15320 = alloca i1, i1 0
+  %nop15321 = alloca i1, i1 0
+  %nop15322 = alloca i1, i1 0
+  %nop15323 = alloca i1, i1 0
+  %nop15324 = alloca i1, i1 0
+  %nop15325 = alloca i1, i1 0
+  %nop15326 = alloca i1, i1 0
+  %nop15327 = alloca i1, i1 0
+  %nop15328 = alloca i1, i1 0
+  %nop15329 = alloca i1, i1 0
+  %nop15330 = alloca i1, i1 0
+  %nop15331 = alloca i1, i1 0
+  %nop15332 = alloca i1, i1 0
+  %nop15333 = alloca i1, i1 0
+  %nop15334 = alloca i1, i1 0
+  %nop15335 = alloca i1, i1 0
+  %nop15336 = alloca i1, i1 0
+  %nop15337 = alloca i1, i1 0
+  %nop15338 = alloca i1, i1 0
+  %nop15339 = alloca i1, i1 0
+  %nop15340 = alloca i1, i1 0
+  %nop15341 = alloca i1, i1 0
+  %nop15342 = alloca i1, i1 0
+  %nop15343 = alloca i1, i1 0
+  %nop15344 = alloca i1, i1 0
+  %nop15345 = alloca i1, i1 0
+  %nop15346 = alloca i1, i1 0
+  %nop15347 = alloca i1, i1 0
+  %nop15348 = alloca i1, i1 0
+  %nop15349 = alloca i1, i1 0
+  %nop15350 = alloca i1, i1 0
+  %nop15351 = alloca i1, i1 0
+  %nop15352 = alloca i1, i1 0
+  %nop15353 = alloca i1, i1 0
+  %nop15354 = alloca i1, i1 0
+  %nop15355 = alloca i1, i1 0
+  %nop15356 = alloca i1, i1 0
+  %nop15357 = alloca i1, i1 0
+  %nop15358 = alloca i1, i1 0
+  %nop15359 = alloca i1, i1 0
+  %nop15360 = alloca i1, i1 0
+  %nop15361 = alloca i1, i1 0
+  %nop15362 = alloca i1, i1 0
+  %nop15363 = alloca i1, i1 0
+  %nop15364 = alloca i1, i1 0
+  %nop15365 = alloca i1, i1 0
+  %nop15366 = alloca i1, i1 0
+  %nop15367 = alloca i1, i1 0
+  %nop15368 = alloca i1, i1 0
+  %nop15369 = alloca i1, i1 0
+  %nop15370 = alloca i1, i1 0
+  %nop15371 = alloca i1, i1 0
+  %nop15372 = alloca i1, i1 0
+  %nop15373 = alloca i1, i1 0
+  %nop15374 = alloca i1, i1 0
+  %nop15375 = alloca i1, i1 0
+  %nop15376 = alloca i1, i1 0
+  %nop15377 = alloca i1, i1 0
+  %nop15378 = alloca i1, i1 0
+  %nop15379 = alloca i1, i1 0
+  %nop15380 = alloca i1, i1 0
+  %nop15381 = alloca i1, i1 0
+  %nop15382 = alloca i1, i1 0
+  %nop15383 = alloca i1, i1 0
+  %nop15384 = alloca i1, i1 0
+  %nop15385 = alloca i1, i1 0
+  %nop15386 = alloca i1, i1 0
+  %nop15387 = alloca i1, i1 0
+  %nop15388 = alloca i1, i1 0
+  %nop15389 = alloca i1, i1 0
+  %nop15390 = alloca i1, i1 0
+  %nop15391 = alloca i1, i1 0
+  %nop15392 = alloca i1, i1 0
+  %nop15393 = alloca i1, i1 0
+  %nop15394 = alloca i1, i1 0
+  %nop15395 = alloca i1, i1 0
+  %nop15396 = alloca i1, i1 0
+  %nop15397 = alloca i1, i1 0
+  %nop15398 = alloca i1, i1 0
+  %nop15399 = alloca i1, i1 0
+  %nop15400 = alloca i1, i1 0
+  %nop15401 = alloca i1, i1 0
+  %nop15402 = alloca i1, i1 0
+  %nop15403 = alloca i1, i1 0
+  %nop15404 = alloca i1, i1 0
+  %nop15405 = alloca i1, i1 0
+  %nop15406 = alloca i1, i1 0
+  %nop15407 = alloca i1, i1 0
+  %nop15408 = alloca i1, i1 0
+  %nop15409 = alloca i1, i1 0
+  %nop15410 = alloca i1, i1 0
+  %nop15411 = alloca i1, i1 0
+  %nop15412 = alloca i1, i1 0
+  %nop15413 = alloca i1, i1 0
+  %nop15414 = alloca i1, i1 0
+  %nop15415 = alloca i1, i1 0
+  %nop15416 = alloca i1, i1 0
+  %nop15417 = alloca i1, i1 0
+  %nop15418 = alloca i1, i1 0
+  %nop15419 = alloca i1, i1 0
+  %nop15420 = alloca i1, i1 0
+  %nop15421 = alloca i1, i1 0
+  %nop15422 = alloca i1, i1 0
+  %nop15423 = alloca i1, i1 0
+  %nop15424 = alloca i1, i1 0
+  %nop15425 = alloca i1, i1 0
+  %nop15426 = alloca i1, i1 0
+  %nop15427 = alloca i1, i1 0
+  %nop15428 = alloca i1, i1 0
+  %nop15429 = alloca i1, i1 0
+  %nop15430 = alloca i1, i1 0
+  %nop15431 = alloca i1, i1 0
+  %nop15432 = alloca i1, i1 0
+  %nop15433 = alloca i1, i1 0
+  %nop15434 = alloca i1, i1 0
+  %nop15435 = alloca i1, i1 0
+  %nop15436 = alloca i1, i1 0
+  %nop15437 = alloca i1, i1 0
+  %nop15438 = alloca i1, i1 0
+  %nop15439 = alloca i1, i1 0
+  %nop15440 = alloca i1, i1 0
+  %nop15441 = alloca i1, i1 0
+  %nop15442 = alloca i1, i1 0
+  %nop15443 = alloca i1, i1 0
+  %nop15444 = alloca i1, i1 0
+  %nop15445 = alloca i1, i1 0
+  %nop15446 = alloca i1, i1 0
+  %nop15447 = alloca i1, i1 0
+  %nop15448 = alloca i1, i1 0
+  %nop15449 = alloca i1, i1 0
+  %nop15450 = alloca i1, i1 0
+  %nop15451 = alloca i1, i1 0
+  %nop15452 = alloca i1, i1 0
+  %nop15453 = alloca i1, i1 0
+  %nop15454 = alloca i1, i1 0
+  %nop15455 = alloca i1, i1 0
+  %nop15456 = alloca i1, i1 0
+  %nop15457 = alloca i1, i1 0
+  %nop15458 = alloca i1, i1 0
+  %nop15459 = alloca i1, i1 0
+  %nop15460 = alloca i1, i1 0
+  %nop15461 = alloca i1, i1 0
+  %nop15462 = alloca i1, i1 0
+  %nop15463 = alloca i1, i1 0
+  %nop15464 = alloca i1, i1 0
+  %nop15465 = alloca i1, i1 0
+  %nop15466 = alloca i1, i1 0
+  %nop15467 = alloca i1, i1 0
+  %nop15468 = alloca i1, i1 0
+  %nop15469 = alloca i1, i1 0
+  %nop15470 = alloca i1, i1 0
+  %nop15471 = alloca i1, i1 0
+  %nop15472 = alloca i1, i1 0
+  %nop15473 = alloca i1, i1 0
+  %nop15474 = alloca i1, i1 0
+  %nop15475 = alloca i1, i1 0
+  %nop15476 = alloca i1, i1 0
+  %nop15477 = alloca i1, i1 0
+  %nop15478 = alloca i1, i1 0
+  %nop15479 = alloca i1, i1 0
+  %nop15480 = alloca i1, i1 0
+  %nop15481 = alloca i1, i1 0
+  %nop15482 = alloca i1, i1 0
+  %nop15483 = alloca i1, i1 0
+  %nop15484 = alloca i1, i1 0
+  %nop15485 = alloca i1, i1 0
+  %nop15486 = alloca i1, i1 0
+  %nop15487 = alloca i1, i1 0
+  %nop15488 = alloca i1, i1 0
+  %nop15489 = alloca i1, i1 0
+  %nop15490 = alloca i1, i1 0
+  %nop15491 = alloca i1, i1 0
+  %nop15492 = alloca i1, i1 0
+  %nop15493 = alloca i1, i1 0
+  %nop15494 = alloca i1, i1 0
+  %nop15495 = alloca i1, i1 0
+  %nop15496 = alloca i1, i1 0
+  %nop15497 = alloca i1, i1 0
+  %nop15498 = alloca i1, i1 0
+  %nop15499 = alloca i1, i1 0
+  %nop15500 = alloca i1, i1 0
+  %nop15501 = alloca i1, i1 0
+  %nop15502 = alloca i1, i1 0
+  %nop15503 = alloca i1, i1 0
+  %nop15504 = alloca i1, i1 0
+  %nop15505 = alloca i1, i1 0
+  %nop15506 = alloca i1, i1 0
+  %nop15507 = alloca i1, i1 0
+  %nop15508 = alloca i1, i1 0
+  %nop15509 = alloca i1, i1 0
+  %nop15510 = alloca i1, i1 0
+  %nop15511 = alloca i1, i1 0
+  %nop15512 = alloca i1, i1 0
+  %nop15513 = alloca i1, i1 0
+  %nop15514 = alloca i1, i1 0
+  %nop15515 = alloca i1, i1 0
+  %nop15516 = alloca i1, i1 0
+  %nop15517 = alloca i1, i1 0
+  %nop15518 = alloca i1, i1 0
+  %nop15519 = alloca i1, i1 0
+  %nop15520 = alloca i1, i1 0
+  %nop15521 = alloca i1, i1 0
+  %nop15522 = alloca i1, i1 0
+  %nop15523 = alloca i1, i1 0
+  %nop15524 = alloca i1, i1 0
+  %nop15525 = alloca i1, i1 0
+  %nop15526 = alloca i1, i1 0
+  %nop15527 = alloca i1, i1 0
+  %nop15528 = alloca i1, i1 0
+  %nop15529 = alloca i1, i1 0
+  %nop15530 = alloca i1, i1 0
+  %nop15531 = alloca i1, i1 0
+  %nop15532 = alloca i1, i1 0
+  %nop15533 = alloca i1, i1 0
+  %nop15534 = alloca i1, i1 0
+  %nop15535 = alloca i1, i1 0
+  %nop15536 = alloca i1, i1 0
+  %nop15537 = alloca i1, i1 0
+  %nop15538 = alloca i1, i1 0
+  %nop15539 = alloca i1, i1 0
+  %nop15540 = alloca i1, i1 0
+  %nop15541 = alloca i1, i1 0
+  %nop15542 = alloca i1, i1 0
+  %nop15543 = alloca i1, i1 0
+  %nop15544 = alloca i1, i1 0
+  %nop15545 = alloca i1, i1 0
+  %nop15546 = alloca i1, i1 0
+  %nop15547 = alloca i1, i1 0
+  %nop15548 = alloca i1, i1 0
+  %nop15549 = alloca i1, i1 0
+  %nop15550 = alloca i1, i1 0
+  %nop15551 = alloca i1, i1 0
+  %nop15552 = alloca i1, i1 0
+  %nop15553 = alloca i1, i1 0
+  %nop15554 = alloca i1, i1 0
+  %nop15555 = alloca i1, i1 0
+  %nop15556 = alloca i1, i1 0
+  %nop15557 = alloca i1, i1 0
+  %nop15558 = alloca i1, i1 0
+  %nop15559 = alloca i1, i1 0
+  %nop15560 = alloca i1, i1 0
+  %nop15561 = alloca i1, i1 0
+  %nop15562 = alloca i1, i1 0
+  %nop15563 = alloca i1, i1 0
+  %nop15564 = alloca i1, i1 0
+  %nop15565 = alloca i1, i1 0
+  %nop15566 = alloca i1, i1 0
+  %nop15567 = alloca i1, i1 0
+  %nop15568 = alloca i1, i1 0
+  %nop15569 = alloca i1, i1 0
+  %nop15570 = alloca i1, i1 0
+  %nop15571 = alloca i1, i1 0
+  %nop15572 = alloca i1, i1 0
+  %nop15573 = alloca i1, i1 0
+  %nop15574 = alloca i1, i1 0
+  %nop15575 = alloca i1, i1 0
+  %nop15576 = alloca i1, i1 0
+  %nop15577 = alloca i1, i1 0
+  %nop15578 = alloca i1, i1 0
+  %nop15579 = alloca i1, i1 0
+  %nop15580 = alloca i1, i1 0
+  %nop15581 = alloca i1, i1 0
+  %nop15582 = alloca i1, i1 0
+  %nop15583 = alloca i1, i1 0
+  %nop15584 = alloca i1, i1 0
+  %nop15585 = alloca i1, i1 0
+  %nop15586 = alloca i1, i1 0
+  %nop15587 = alloca i1, i1 0
+  %nop15588 = alloca i1, i1 0
+  %nop15589 = alloca i1, i1 0
+  %nop15590 = alloca i1, i1 0
+  %nop15591 = alloca i1, i1 0
+  %nop15592 = alloca i1, i1 0
+  %nop15593 = alloca i1, i1 0
+  %nop15594 = alloca i1, i1 0
+  %nop15595 = alloca i1, i1 0
+  %nop15596 = alloca i1, i1 0
+  %nop15597 = alloca i1, i1 0
+  %nop15598 = alloca i1, i1 0
+  %nop15599 = alloca i1, i1 0
+  %nop15600 = alloca i1, i1 0
+  %nop15601 = alloca i1, i1 0
+  %nop15602 = alloca i1, i1 0
+  %nop15603 = alloca i1, i1 0
+  %nop15604 = alloca i1, i1 0
+  %nop15605 = alloca i1, i1 0
+  %nop15606 = alloca i1, i1 0
+  %nop15607 = alloca i1, i1 0
+  %nop15608 = alloca i1, i1 0
+  %nop15609 = alloca i1, i1 0
+  %nop15610 = alloca i1, i1 0
+  %nop15611 = alloca i1, i1 0
+  %nop15612 = alloca i1, i1 0
+  %nop15613 = alloca i1, i1 0
+  %nop15614 = alloca i1, i1 0
+  %nop15615 = alloca i1, i1 0
+  %nop15616 = alloca i1, i1 0
+  %nop15617 = alloca i1, i1 0
+  %nop15618 = alloca i1, i1 0
+  %nop15619 = alloca i1, i1 0
+  %nop15620 = alloca i1, i1 0
+  %nop15621 = alloca i1, i1 0
+  %nop15622 = alloca i1, i1 0
+  %nop15623 = alloca i1, i1 0
+  %nop15624 = alloca i1, i1 0
+  %nop15625 = alloca i1, i1 0
+  %nop15626 = alloca i1, i1 0
+  %nop15627 = alloca i1, i1 0
+  %nop15628 = alloca i1, i1 0
+  %nop15629 = alloca i1, i1 0
+  %nop15630 = alloca i1, i1 0
+  %nop15631 = alloca i1, i1 0
+  %nop15632 = alloca i1, i1 0
+  %nop15633 = alloca i1, i1 0
+  %nop15634 = alloca i1, i1 0
+  %nop15635 = alloca i1, i1 0
+  %nop15636 = alloca i1, i1 0
+  %nop15637 = alloca i1, i1 0
+  %nop15638 = alloca i1, i1 0
+  %nop15639 = alloca i1, i1 0
+  %nop15640 = alloca i1, i1 0
+  %nop15641 = alloca i1, i1 0
+  %nop15642 = alloca i1, i1 0
+  %nop15643 = alloca i1, i1 0
+  %nop15644 = alloca i1, i1 0
+  %nop15645 = alloca i1, i1 0
+  %nop15646 = alloca i1, i1 0
+  %nop15647 = alloca i1, i1 0
+  %nop15648 = alloca i1, i1 0
+  %nop15649 = alloca i1, i1 0
+  %nop15650 = alloca i1, i1 0
+  %nop15651 = alloca i1, i1 0
+  %nop15652 = alloca i1, i1 0
+  %nop15653 = alloca i1, i1 0
+  %nop15654 = alloca i1, i1 0
+  %nop15655 = alloca i1, i1 0
+  %nop15656 = alloca i1, i1 0
+  %nop15657 = alloca i1, i1 0
+  %nop15658 = alloca i1, i1 0
+  %nop15659 = alloca i1, i1 0
+  %nop15660 = alloca i1, i1 0
+  %nop15661 = alloca i1, i1 0
+  %nop15662 = alloca i1, i1 0
+  %nop15663 = alloca i1, i1 0
+  %nop15664 = alloca i1, i1 0
+  %nop15665 = alloca i1, i1 0
+  %nop15666 = alloca i1, i1 0
+  %nop15667 = alloca i1, i1 0
+  %nop15668 = alloca i1, i1 0
+  %nop15669 = alloca i1, i1 0
+  %nop15670 = alloca i1, i1 0
+  %nop15671 = alloca i1, i1 0
+  %nop15672 = alloca i1, i1 0
+  %nop15673 = alloca i1, i1 0
+  %nop15674 = alloca i1, i1 0
+  %nop15675 = alloca i1, i1 0
+  %nop15676 = alloca i1, i1 0
+  %nop15677 = alloca i1, i1 0
+  %nop15678 = alloca i1, i1 0
+  %nop15679 = alloca i1, i1 0
+  %nop15680 = alloca i1, i1 0
+  %nop15681 = alloca i1, i1 0
+  %nop15682 = alloca i1, i1 0
+  %nop15683 = alloca i1, i1 0
+  %nop15684 = alloca i1, i1 0
+  %nop15685 = alloca i1, i1 0
+  %nop15686 = alloca i1, i1 0
+  %nop15687 = alloca i1, i1 0
+  %nop15688 = alloca i1, i1 0
+  %nop15689 = alloca i1, i1 0
+  %nop15690 = alloca i1, i1 0
+  %nop15691 = alloca i1, i1 0
+  %nop15692 = alloca i1, i1 0
+  %nop15693 = alloca i1, i1 0
+  %nop15694 = alloca i1, i1 0
+  %nop15695 = alloca i1, i1 0
+  %nop15696 = alloca i1, i1 0
+  %nop15697 = alloca i1, i1 0
+  %nop15698 = alloca i1, i1 0
+  %nop15699 = alloca i1, i1 0
+  %nop15700 = alloca i1, i1 0
+  %nop15701 = alloca i1, i1 0
+  %nop15702 = alloca i1, i1 0
+  %nop15703 = alloca i1, i1 0
+  %nop15704 = alloca i1, i1 0
+  %nop15705 = alloca i1, i1 0
+  %nop15706 = alloca i1, i1 0
+  %nop15707 = alloca i1, i1 0
+  %nop15708 = alloca i1, i1 0
+  %nop15709 = alloca i1, i1 0
+  %nop15710 = alloca i1, i1 0
+  %nop15711 = alloca i1, i1 0
+  %nop15712 = alloca i1, i1 0
+  %nop15713 = alloca i1, i1 0
+  %nop15714 = alloca i1, i1 0
+  %nop15715 = alloca i1, i1 0
+  %nop15716 = alloca i1, i1 0
+  %nop15717 = alloca i1, i1 0
+  %nop15718 = alloca i1, i1 0
+  %nop15719 = alloca i1, i1 0
+  %nop15720 = alloca i1, i1 0
+  %nop15721 = alloca i1, i1 0
+  %nop15722 = alloca i1, i1 0
+  %nop15723 = alloca i1, i1 0
+  %nop15724 = alloca i1, i1 0
+  %nop15725 = alloca i1, i1 0
+  %nop15726 = alloca i1, i1 0
+  %nop15727 = alloca i1, i1 0
+  %nop15728 = alloca i1, i1 0
+  %nop15729 = alloca i1, i1 0
+  %nop15730 = alloca i1, i1 0
+  %nop15731 = alloca i1, i1 0
+  %nop15732 = alloca i1, i1 0
+  %nop15733 = alloca i1, i1 0
+  %nop15734 = alloca i1, i1 0
+  %nop15735 = alloca i1, i1 0
+  %nop15736 = alloca i1, i1 0
+  %nop15737 = alloca i1, i1 0
+  %nop15738 = alloca i1, i1 0
+  %nop15739 = alloca i1, i1 0
+  %nop15740 = alloca i1, i1 0
+  %nop15741 = alloca i1, i1 0
+  %nop15742 = alloca i1, i1 0
+  %nop15743 = alloca i1, i1 0
+  %nop15744 = alloca i1, i1 0
+  %nop15745 = alloca i1, i1 0
+  %nop15746 = alloca i1, i1 0
+  %nop15747 = alloca i1, i1 0
+  %nop15748 = alloca i1, i1 0
+  %nop15749 = alloca i1, i1 0
+  %nop15750 = alloca i1, i1 0
+  %nop15751 = alloca i1, i1 0
+  %nop15752 = alloca i1, i1 0
+  %nop15753 = alloca i1, i1 0
+  %nop15754 = alloca i1, i1 0
+  %nop15755 = alloca i1, i1 0
+  %nop15756 = alloca i1, i1 0
+  %nop15757 = alloca i1, i1 0
+  %nop15758 = alloca i1, i1 0
+  %nop15759 = alloca i1, i1 0
+  %nop15760 = alloca i1, i1 0
+  %nop15761 = alloca i1, i1 0
+  %nop15762 = alloca i1, i1 0
+  %nop15763 = alloca i1, i1 0
+  %nop15764 = alloca i1, i1 0
+  %nop15765 = alloca i1, i1 0
+  %nop15766 = alloca i1, i1 0
+  %nop15767 = alloca i1, i1 0
+  %nop15768 = alloca i1, i1 0
+  %nop15769 = alloca i1, i1 0
+  %nop15770 = alloca i1, i1 0
+  %nop15771 = alloca i1, i1 0
+  %nop15772 = alloca i1, i1 0
+  %nop15773 = alloca i1, i1 0
+  %nop15774 = alloca i1, i1 0
+  %nop15775 = alloca i1, i1 0
+  %nop15776 = alloca i1, i1 0
+  %nop15777 = alloca i1, i1 0
+  %nop15778 = alloca i1, i1 0
+  %nop15779 = alloca i1, i1 0
+  %nop15780 = alloca i1, i1 0
+  %nop15781 = alloca i1, i1 0
+  %nop15782 = alloca i1, i1 0
+  %nop15783 = alloca i1, i1 0
+  %nop15784 = alloca i1, i1 0
+  %nop15785 = alloca i1, i1 0
+  %nop15786 = alloca i1, i1 0
+  %nop15787 = alloca i1, i1 0
+  %nop15788 = alloca i1, i1 0
+  %nop15789 = alloca i1, i1 0
+  %nop15790 = alloca i1, i1 0
+  %nop15791 = alloca i1, i1 0
+  %nop15792 = alloca i1, i1 0
+  %nop15793 = alloca i1, i1 0
+  %nop15794 = alloca i1, i1 0
+  %nop15795 = alloca i1, i1 0
+  %nop15796 = alloca i1, i1 0
+  %nop15797 = alloca i1, i1 0
+  %nop15798 = alloca i1, i1 0
+  %nop15799 = alloca i1, i1 0
+  %nop15800 = alloca i1, i1 0
+  %nop15801 = alloca i1, i1 0
+  %nop15802 = alloca i1, i1 0
+  %nop15803 = alloca i1, i1 0
+  %nop15804 = alloca i1, i1 0
+  %nop15805 = alloca i1, i1 0
+  %nop15806 = alloca i1, i1 0
+  %nop15807 = alloca i1, i1 0
+  %nop15808 = alloca i1, i1 0
+  %nop15809 = alloca i1, i1 0
+  %nop15810 = alloca i1, i1 0
+  %nop15811 = alloca i1, i1 0
+  %nop15812 = alloca i1, i1 0
+  %nop15813 = alloca i1, i1 0
+  %nop15814 = alloca i1, i1 0
+  %nop15815 = alloca i1, i1 0
+  %nop15816 = alloca i1, i1 0
+  %nop15817 = alloca i1, i1 0
+  %nop15818 = alloca i1, i1 0
+  %nop15819 = alloca i1, i1 0
+  %nop15820 = alloca i1, i1 0
+  %nop15821 = alloca i1, i1 0
+  %nop15822 = alloca i1, i1 0
+  %nop15823 = alloca i1, i1 0
+  %nop15824 = alloca i1, i1 0
+  %nop15825 = alloca i1, i1 0
+  %nop15826 = alloca i1, i1 0
+  %nop15827 = alloca i1, i1 0
+  %nop15828 = alloca i1, i1 0
+  %nop15829 = alloca i1, i1 0
+  %nop15830 = alloca i1, i1 0
+  %nop15831 = alloca i1, i1 0
+  %nop15832 = alloca i1, i1 0
+  %nop15833 = alloca i1, i1 0
+  %nop15834 = alloca i1, i1 0
+  %nop15835 = alloca i1, i1 0
+  %nop15836 = alloca i1, i1 0
+  %nop15837 = alloca i1, i1 0
+  %nop15838 = alloca i1, i1 0
+  %nop15839 = alloca i1, i1 0
+  %nop15840 = alloca i1, i1 0
+  %nop15841 = alloca i1, i1 0
+  %nop15842 = alloca i1, i1 0
+  %nop15843 = alloca i1, i1 0
+  %nop15844 = alloca i1, i1 0
+  %nop15845 = alloca i1, i1 0
+  %nop15846 = alloca i1, i1 0
+  %nop15847 = alloca i1, i1 0
+  %nop15848 = alloca i1, i1 0
+  %nop15849 = alloca i1, i1 0
+  %nop15850 = alloca i1, i1 0
+  %nop15851 = alloca i1, i1 0
+  %nop15852 = alloca i1, i1 0
+  %nop15853 = alloca i1, i1 0
+  %nop15854 = alloca i1, i1 0
+  %nop15855 = alloca i1, i1 0
+  %nop15856 = alloca i1, i1 0
+  %nop15857 = alloca i1, i1 0
+  %nop15858 = alloca i1, i1 0
+  %nop15859 = alloca i1, i1 0
+  %nop15860 = alloca i1, i1 0
+  %nop15861 = alloca i1, i1 0
+  %nop15862 = alloca i1, i1 0
+  %nop15863 = alloca i1, i1 0
+  %nop15864 = alloca i1, i1 0
+  %nop15865 = alloca i1, i1 0
+  %nop15866 = alloca i1, i1 0
+  %nop15867 = alloca i1, i1 0
+  %nop15868 = alloca i1, i1 0
+  %nop15869 = alloca i1, i1 0
+  %nop15870 = alloca i1, i1 0
+  %nop15871 = alloca i1, i1 0
+  %nop15872 = alloca i1, i1 0
+  %nop15873 = alloca i1, i1 0
+  %nop15874 = alloca i1, i1 0
+  %nop15875 = alloca i1, i1 0
+  %nop15876 = alloca i1, i1 0
+  %nop15877 = alloca i1, i1 0
+  %nop15878 = alloca i1, i1 0
+  %nop15879 = alloca i1, i1 0
+  %nop15880 = alloca i1, i1 0
+  %nop15881 = alloca i1, i1 0
+  %nop15882 = alloca i1, i1 0
+  %nop15883 = alloca i1, i1 0
+  %nop15884 = alloca i1, i1 0
+  %nop15885 = alloca i1, i1 0
+  %nop15886 = alloca i1, i1 0
+  %nop15887 = alloca i1, i1 0
+  %nop15888 = alloca i1, i1 0
+  %nop15889 = alloca i1, i1 0
+  %nop15890 = alloca i1, i1 0
+  %nop15891 = alloca i1, i1 0
+  %nop15892 = alloca i1, i1 0
+  %nop15893 = alloca i1, i1 0
+  %nop15894 = alloca i1, i1 0
+  %nop15895 = alloca i1, i1 0
+  %nop15896 = alloca i1, i1 0
+  %nop15897 = alloca i1, i1 0
+  %nop15898 = alloca i1, i1 0
+  %nop15899 = alloca i1, i1 0
+  %nop15900 = alloca i1, i1 0
+  %nop15901 = alloca i1, i1 0
+  %nop15902 = alloca i1, i1 0
+  %nop15903 = alloca i1, i1 0
+  %nop15904 = alloca i1, i1 0
+  %nop15905 = alloca i1, i1 0
+  %nop15906 = alloca i1, i1 0
+  %nop15907 = alloca i1, i1 0
+  %nop15908 = alloca i1, i1 0
+  %nop15909 = alloca i1, i1 0
+  %nop15910 = alloca i1, i1 0
+  %nop15911 = alloca i1, i1 0
+  %nop15912 = alloca i1, i1 0
+  %nop15913 = alloca i1, i1 0
+  %nop15914 = alloca i1, i1 0
+  %nop15915 = alloca i1, i1 0
+  %nop15916 = alloca i1, i1 0
+  %nop15917 = alloca i1, i1 0
+  %nop15918 = alloca i1, i1 0
+  %nop15919 = alloca i1, i1 0
+  %nop15920 = alloca i1, i1 0
+  %nop15921 = alloca i1, i1 0
+  %nop15922 = alloca i1, i1 0
+  %nop15923 = alloca i1, i1 0
+  %nop15924 = alloca i1, i1 0
+  %nop15925 = alloca i1, i1 0
+  %nop15926 = alloca i1, i1 0
+  %nop15927 = alloca i1, i1 0
+  %nop15928 = alloca i1, i1 0
+  %nop15929 = alloca i1, i1 0
+  %nop15930 = alloca i1, i1 0
+  %nop15931 = alloca i1, i1 0
+  %nop15932 = alloca i1, i1 0
+  %nop15933 = alloca i1, i1 0
+  %nop15934 = alloca i1, i1 0
+  %nop15935 = alloca i1, i1 0
+  %nop15936 = alloca i1, i1 0
+  %nop15937 = alloca i1, i1 0
+  %nop15938 = alloca i1, i1 0
+  %nop15939 = alloca i1, i1 0
+  %nop15940 = alloca i1, i1 0
+  %nop15941 = alloca i1, i1 0
+  %nop15942 = alloca i1, i1 0
+  %nop15943 = alloca i1, i1 0
+  %nop15944 = alloca i1, i1 0
+  %nop15945 = alloca i1, i1 0
+  %nop15946 = alloca i1, i1 0
+  %nop15947 = alloca i1, i1 0
+  %nop15948 = alloca i1, i1 0
+  %nop15949 = alloca i1, i1 0
+  %nop15950 = alloca i1, i1 0
+  %nop15951 = alloca i1, i1 0
+  %nop15952 = alloca i1, i1 0
+  %nop15953 = alloca i1, i1 0
+  %nop15954 = alloca i1, i1 0
+  %nop15955 = alloca i1, i1 0
+  %nop15956 = alloca i1, i1 0
+  %nop15957 = alloca i1, i1 0
+  %nop15958 = alloca i1, i1 0
+  %nop15959 = alloca i1, i1 0
+  %nop15960 = alloca i1, i1 0
+  %nop15961 = alloca i1, i1 0
+  %nop15962 = alloca i1, i1 0
+  %nop15963 = alloca i1, i1 0
+  %nop15964 = alloca i1, i1 0
+  %nop15965 = alloca i1, i1 0
+  %nop15966 = alloca i1, i1 0
+  %nop15967 = alloca i1, i1 0
+  %nop15968 = alloca i1, i1 0
+  %nop15969 = alloca i1, i1 0
+  %nop15970 = alloca i1, i1 0
+  %nop15971 = alloca i1, i1 0
+  %nop15972 = alloca i1, i1 0
+  %nop15973 = alloca i1, i1 0
+  %nop15974 = alloca i1, i1 0
+  %nop15975 = alloca i1, i1 0
+  %nop15976 = alloca i1, i1 0
+  %nop15977 = alloca i1, i1 0
+  %nop15978 = alloca i1, i1 0
+  %nop15979 = alloca i1, i1 0
+  %nop15980 = alloca i1, i1 0
+  %nop15981 = alloca i1, i1 0
+  %nop15982 = alloca i1, i1 0
+  %nop15983 = alloca i1, i1 0
+  %nop15984 = alloca i1, i1 0
+  %nop15985 = alloca i1, i1 0
+  %nop15986 = alloca i1, i1 0
+  %nop15987 = alloca i1, i1 0
+  %nop15988 = alloca i1, i1 0
+  %nop15989 = alloca i1, i1 0
+  %nop15990 = alloca i1, i1 0
+  %nop15991 = alloca i1, i1 0
+  %nop15992 = alloca i1, i1 0
+  %nop15993 = alloca i1, i1 0
+  %nop15994 = alloca i1, i1 0
+  %nop15995 = alloca i1, i1 0
+  %nop15996 = alloca i1, i1 0
+  %nop15997 = alloca i1, i1 0
+  %nop15998 = alloca i1, i1 0
+  %nop15999 = alloca i1, i1 0
+  %nop16000 = alloca i1, i1 0
+  %nop16001 = alloca i1, i1 0
+  %nop16002 = alloca i1, i1 0
+  %nop16003 = alloca i1, i1 0
+  %nop16004 = alloca i1, i1 0
+  %nop16005 = alloca i1, i1 0
+  %nop16006 = alloca i1, i1 0
+  %nop16007 = alloca i1, i1 0
+  %nop16008 = alloca i1, i1 0
+  %nop16009 = alloca i1, i1 0
+  %nop16010 = alloca i1, i1 0
+  %nop16011 = alloca i1, i1 0
+  %nop16012 = alloca i1, i1 0
+  %nop16013 = alloca i1, i1 0
+  %nop16014 = alloca i1, i1 0
+  %nop16015 = alloca i1, i1 0
+  %nop16016 = alloca i1, i1 0
+  %nop16017 = alloca i1, i1 0
+  %nop16018 = alloca i1, i1 0
+  %nop16019 = alloca i1, i1 0
+  %nop16020 = alloca i1, i1 0
+  %nop16021 = alloca i1, i1 0
+  %nop16022 = alloca i1, i1 0
+  %nop16023 = alloca i1, i1 0
+  %nop16024 = alloca i1, i1 0
+  %nop16025 = alloca i1, i1 0
+  %nop16026 = alloca i1, i1 0
+  %nop16027 = alloca i1, i1 0
+  %nop16028 = alloca i1, i1 0
+  %nop16029 = alloca i1, i1 0
+  %nop16030 = alloca i1, i1 0
+  %nop16031 = alloca i1, i1 0
+  %nop16032 = alloca i1, i1 0
+  %nop16033 = alloca i1, i1 0
+  %nop16034 = alloca i1, i1 0
+  %nop16035 = alloca i1, i1 0
+  %nop16036 = alloca i1, i1 0
+  %nop16037 = alloca i1, i1 0
+  %nop16038 = alloca i1, i1 0
+  %nop16039 = alloca i1, i1 0
+  %nop16040 = alloca i1, i1 0
+  %nop16041 = alloca i1, i1 0
+  %nop16042 = alloca i1, i1 0
+  %nop16043 = alloca i1, i1 0
+  %nop16044 = alloca i1, i1 0
+  %nop16045 = alloca i1, i1 0
+  %nop16046 = alloca i1, i1 0
+  %nop16047 = alloca i1, i1 0
+  %nop16048 = alloca i1, i1 0
+  %nop16049 = alloca i1, i1 0
+  %nop16050 = alloca i1, i1 0
+  %nop16051 = alloca i1, i1 0
+  %nop16052 = alloca i1, i1 0
+  %nop16053 = alloca i1, i1 0
+  %nop16054 = alloca i1, i1 0
+  %nop16055 = alloca i1, i1 0
+  %nop16056 = alloca i1, i1 0
+  %nop16057 = alloca i1, i1 0
+  %nop16058 = alloca i1, i1 0
+  %nop16059 = alloca i1, i1 0
+  %nop16060 = alloca i1, i1 0
+  %nop16061 = alloca i1, i1 0
+  %nop16062 = alloca i1, i1 0
+  %nop16063 = alloca i1, i1 0
+  %nop16064 = alloca i1, i1 0
+  %nop16065 = alloca i1, i1 0
+  %nop16066 = alloca i1, i1 0
+  %nop16067 = alloca i1, i1 0
+  %nop16068 = alloca i1, i1 0
+  %nop16069 = alloca i1, i1 0
+  %nop16070 = alloca i1, i1 0
+  %nop16071 = alloca i1, i1 0
+  %nop16072 = alloca i1, i1 0
+  %nop16073 = alloca i1, i1 0
+  %nop16074 = alloca i1, i1 0
+  %nop16075 = alloca i1, i1 0
+  %nop16076 = alloca i1, i1 0
+  %nop16077 = alloca i1, i1 0
+  %nop16078 = alloca i1, i1 0
+  %nop16079 = alloca i1, i1 0
+  %nop16080 = alloca i1, i1 0
+  %nop16081 = alloca i1, i1 0
+  %nop16082 = alloca i1, i1 0
+  %nop16083 = alloca i1, i1 0
+  %nop16084 = alloca i1, i1 0
+  %nop16085 = alloca i1, i1 0
+  %nop16086 = alloca i1, i1 0
+  %nop16087 = alloca i1, i1 0
+  %nop16088 = alloca i1, i1 0
+  %nop16089 = alloca i1, i1 0
+  %nop16090 = alloca i1, i1 0
+  %nop16091 = alloca i1, i1 0
+  %nop16092 = alloca i1, i1 0
+  %nop16093 = alloca i1, i1 0
+  %nop16094 = alloca i1, i1 0
+  %nop16095 = alloca i1, i1 0
+  %nop16096 = alloca i1, i1 0
+  %nop16097 = alloca i1, i1 0
+  %nop16098 = alloca i1, i1 0
+  %nop16099 = alloca i1, i1 0
+  %nop16100 = alloca i1, i1 0
+  %nop16101 = alloca i1, i1 0
+  %nop16102 = alloca i1, i1 0
+  %nop16103 = alloca i1, i1 0
+  %nop16104 = alloca i1, i1 0
+  %nop16105 = alloca i1, i1 0
+  %nop16106 = alloca i1, i1 0
+  %nop16107 = alloca i1, i1 0
+  %nop16108 = alloca i1, i1 0
+  %nop16109 = alloca i1, i1 0
+  %nop16110 = alloca i1, i1 0
+  %nop16111 = alloca i1, i1 0
+  %nop16112 = alloca i1, i1 0
+  %nop16113 = alloca i1, i1 0
+  %nop16114 = alloca i1, i1 0
+  %nop16115 = alloca i1, i1 0
+  %nop16116 = alloca i1, i1 0
+  %nop16117 = alloca i1, i1 0
+  %nop16118 = alloca i1, i1 0
+  %nop16119 = alloca i1, i1 0
+  %nop16120 = alloca i1, i1 0
+  %nop16121 = alloca i1, i1 0
+  %nop16122 = alloca i1, i1 0
+  %nop16123 = alloca i1, i1 0
+  %nop16124 = alloca i1, i1 0
+  %nop16125 = alloca i1, i1 0
+  %nop16126 = alloca i1, i1 0
+  %nop16127 = alloca i1, i1 0
+  %nop16128 = alloca i1, i1 0
+  %nop16129 = alloca i1, i1 0
+  %nop16130 = alloca i1, i1 0
+  %nop16131 = alloca i1, i1 0
+  %nop16132 = alloca i1, i1 0
+  %nop16133 = alloca i1, i1 0
+  %nop16134 = alloca i1, i1 0
+  %nop16135 = alloca i1, i1 0
+  %nop16136 = alloca i1, i1 0
+  %nop16137 = alloca i1, i1 0
+  %nop16138 = alloca i1, i1 0
+  %nop16139 = alloca i1, i1 0
+  %nop16140 = alloca i1, i1 0
+  %nop16141 = alloca i1, i1 0
+  %nop16142 = alloca i1, i1 0
+  %nop16143 = alloca i1, i1 0
+  %nop16144 = alloca i1, i1 0
+  %nop16145 = alloca i1, i1 0
+  %nop16146 = alloca i1, i1 0
+  %nop16147 = alloca i1, i1 0
+  %nop16148 = alloca i1, i1 0
+  %nop16149 = alloca i1, i1 0
+  %nop16150 = alloca i1, i1 0
+  %nop16151 = alloca i1, i1 0
+  %nop16152 = alloca i1, i1 0
+  %nop16153 = alloca i1, i1 0
+  %nop16154 = alloca i1, i1 0
+  %nop16155 = alloca i1, i1 0
+  %nop16156 = alloca i1, i1 0
+  %nop16157 = alloca i1, i1 0
+  %nop16158 = alloca i1, i1 0
+  %nop16159 = alloca i1, i1 0
+  %nop16160 = alloca i1, i1 0
+  %nop16161 = alloca i1, i1 0
+  %nop16162 = alloca i1, i1 0
+  %nop16163 = alloca i1, i1 0
+  %nop16164 = alloca i1, i1 0
+  %nop16165 = alloca i1, i1 0
+  %nop16166 = alloca i1, i1 0
+  %nop16167 = alloca i1, i1 0
+  %nop16168 = alloca i1, i1 0
+  %nop16169 = alloca i1, i1 0
+  %nop16170 = alloca i1, i1 0
+  %nop16171 = alloca i1, i1 0
+  %nop16172 = alloca i1, i1 0
+  %nop16173 = alloca i1, i1 0
+  %nop16174 = alloca i1, i1 0
+  %nop16175 = alloca i1, i1 0
+  %nop16176 = alloca i1, i1 0
+  %nop16177 = alloca i1, i1 0
+  %nop16178 = alloca i1, i1 0
+  %nop16179 = alloca i1, i1 0
+  %nop16180 = alloca i1, i1 0
+  %nop16181 = alloca i1, i1 0
+  %nop16182 = alloca i1, i1 0
+  %nop16183 = alloca i1, i1 0
+  %nop16184 = alloca i1, i1 0
+  %nop16185 = alloca i1, i1 0
+  %nop16186 = alloca i1, i1 0
+  %nop16187 = alloca i1, i1 0
+  %nop16188 = alloca i1, i1 0
+  %nop16189 = alloca i1, i1 0
+  %nop16190 = alloca i1, i1 0
+  %nop16191 = alloca i1, i1 0
+  %nop16192 = alloca i1, i1 0
+  %nop16193 = alloca i1, i1 0
+  %nop16194 = alloca i1, i1 0
+  %nop16195 = alloca i1, i1 0
+  %nop16196 = alloca i1, i1 0
+  %nop16197 = alloca i1, i1 0
+  %nop16198 = alloca i1, i1 0
+  %nop16199 = alloca i1, i1 0
+  %nop16200 = alloca i1, i1 0
+  %nop16201 = alloca i1, i1 0
+  %nop16202 = alloca i1, i1 0
+  %nop16203 = alloca i1, i1 0
+  %nop16204 = alloca i1, i1 0
+  %nop16205 = alloca i1, i1 0
+  %nop16206 = alloca i1, i1 0
+  %nop16207 = alloca i1, i1 0
+  %nop16208 = alloca i1, i1 0
+  %nop16209 = alloca i1, i1 0
+  %nop16210 = alloca i1, i1 0
+  %nop16211 = alloca i1, i1 0
+  %nop16212 = alloca i1, i1 0
+  %nop16213 = alloca i1, i1 0
+  %nop16214 = alloca i1, i1 0
+  %nop16215 = alloca i1, i1 0
+  %nop16216 = alloca i1, i1 0
+  %nop16217 = alloca i1, i1 0
+  %nop16218 = alloca i1, i1 0
+  %nop16219 = alloca i1, i1 0
+  %nop16220 = alloca i1, i1 0
+  %nop16221 = alloca i1, i1 0
+  %nop16222 = alloca i1, i1 0
+  %nop16223 = alloca i1, i1 0
+  %nop16224 = alloca i1, i1 0
+  %nop16225 = alloca i1, i1 0
+  %nop16226 = alloca i1, i1 0
+  %nop16227 = alloca i1, i1 0
+  %nop16228 = alloca i1, i1 0
+  %nop16229 = alloca i1, i1 0
+  %nop16230 = alloca i1, i1 0
+  %nop16231 = alloca i1, i1 0
+  %nop16232 = alloca i1, i1 0
+  %nop16233 = alloca i1, i1 0
+  %nop16234 = alloca i1, i1 0
+  %nop16235 = alloca i1, i1 0
+  %nop16236 = alloca i1, i1 0
+  %nop16237 = alloca i1, i1 0
+  %nop16238 = alloca i1, i1 0
+  %nop16239 = alloca i1, i1 0
+  %nop16240 = alloca i1, i1 0
+  %nop16241 = alloca i1, i1 0
+  %nop16242 = alloca i1, i1 0
+  %nop16243 = alloca i1, i1 0
+  %nop16244 = alloca i1, i1 0
+  %nop16245 = alloca i1, i1 0
+  %nop16246 = alloca i1, i1 0
+  %nop16247 = alloca i1, i1 0
+  %nop16248 = alloca i1, i1 0
+  %nop16249 = alloca i1, i1 0
+  %nop16250 = alloca i1, i1 0
+  %nop16251 = alloca i1, i1 0
+  %nop16252 = alloca i1, i1 0
+  %nop16253 = alloca i1, i1 0
+  %nop16254 = alloca i1, i1 0
+  %nop16255 = alloca i1, i1 0
+  %nop16256 = alloca i1, i1 0
+  %nop16257 = alloca i1, i1 0
+  %nop16258 = alloca i1, i1 0
+  %nop16259 = alloca i1, i1 0
+  %nop16260 = alloca i1, i1 0
+  %nop16261 = alloca i1, i1 0
+  %nop16262 = alloca i1, i1 0
+  %nop16263 = alloca i1, i1 0
+  %nop16264 = alloca i1, i1 0
+  %nop16265 = alloca i1, i1 0
+  %nop16266 = alloca i1, i1 0
+  %nop16267 = alloca i1, i1 0
+  %nop16268 = alloca i1, i1 0
+  %nop16269 = alloca i1, i1 0
+  %nop16270 = alloca i1, i1 0
+  %nop16271 = alloca i1, i1 0
+  %nop16272 = alloca i1, i1 0
+  %nop16273 = alloca i1, i1 0
+  %nop16274 = alloca i1, i1 0
+  %nop16275 = alloca i1, i1 0
+  %nop16276 = alloca i1, i1 0
+  %nop16277 = alloca i1, i1 0
+  %nop16278 = alloca i1, i1 0
+  %nop16279 = alloca i1, i1 0
+  %nop16280 = alloca i1, i1 0
+  %nop16281 = alloca i1, i1 0
+  %nop16282 = alloca i1, i1 0
+  %nop16283 = alloca i1, i1 0
+  %nop16284 = alloca i1, i1 0
+  %nop16285 = alloca i1, i1 0
+  %nop16286 = alloca i1, i1 0
+  %nop16287 = alloca i1, i1 0
+  %nop16288 = alloca i1, i1 0
+  %nop16289 = alloca i1, i1 0
+  %nop16290 = alloca i1, i1 0
+  %nop16291 = alloca i1, i1 0
+  %nop16292 = alloca i1, i1 0
+  %nop16293 = alloca i1, i1 0
+  %nop16294 = alloca i1, i1 0
+  %nop16295 = alloca i1, i1 0
+  %nop16296 = alloca i1, i1 0
+  %nop16297 = alloca i1, i1 0
+  %nop16298 = alloca i1, i1 0
+  %nop16299 = alloca i1, i1 0
+  %nop16300 = alloca i1, i1 0
+  %nop16301 = alloca i1, i1 0
+  %nop16302 = alloca i1, i1 0
+  %nop16303 = alloca i1, i1 0
+  %nop16304 = alloca i1, i1 0
+  %nop16305 = alloca i1, i1 0
+  %nop16306 = alloca i1, i1 0
+  %nop16307 = alloca i1, i1 0
+  %nop16308 = alloca i1, i1 0
+  %nop16309 = alloca i1, i1 0
+  %nop16310 = alloca i1, i1 0
+  %nop16311 = alloca i1, i1 0
+  %nop16312 = alloca i1, i1 0
+  %nop16313 = alloca i1, i1 0
+  %nop16314 = alloca i1, i1 0
+  %nop16315 = alloca i1, i1 0
+  %nop16316 = alloca i1, i1 0
+  %nop16317 = alloca i1, i1 0
+  %nop16318 = alloca i1, i1 0
+  %nop16319 = alloca i1, i1 0
+  %nop16320 = alloca i1, i1 0
+  %nop16321 = alloca i1, i1 0
+  %nop16322 = alloca i1, i1 0
+  %nop16323 = alloca i1, i1 0
+  %nop16324 = alloca i1, i1 0
+  %nop16325 = alloca i1, i1 0
+  %nop16326 = alloca i1, i1 0
+  %nop16327 = alloca i1, i1 0
+  %nop16328 = alloca i1, i1 0
+  %nop16329 = alloca i1, i1 0
+  %nop16330 = alloca i1, i1 0
+  %nop16331 = alloca i1, i1 0
+  %nop16332 = alloca i1, i1 0
+  %nop16333 = alloca i1, i1 0
+  %nop16334 = alloca i1, i1 0
+  %nop16335 = alloca i1, i1 0
+  %nop16336 = alloca i1, i1 0
+  %nop16337 = alloca i1, i1 0
+  %nop16338 = alloca i1, i1 0
+  %nop16339 = alloca i1, i1 0
+  %nop16340 = alloca i1, i1 0
+  %nop16341 = alloca i1, i1 0
+  %nop16342 = alloca i1, i1 0
+  %nop16343 = alloca i1, i1 0
+  %nop16344 = alloca i1, i1 0
+  %nop16345 = alloca i1, i1 0
+  %nop16346 = alloca i1, i1 0
+  %nop16347 = alloca i1, i1 0
+  %nop16348 = alloca i1, i1 0
+  %nop16349 = alloca i1, i1 0
+  %nop16350 = alloca i1, i1 0
+  %nop16351 = alloca i1, i1 0
+  %nop16352 = alloca i1, i1 0
+  %nop16353 = alloca i1, i1 0
+  %nop16354 = alloca i1, i1 0
+  %nop16355 = alloca i1, i1 0
+  %nop16356 = alloca i1, i1 0
+  %nop16357 = alloca i1, i1 0
+  %nop16358 = alloca i1, i1 0
+  %nop16359 = alloca i1, i1 0
+  %nop16360 = alloca i1, i1 0
+  %nop16361 = alloca i1, i1 0
+  %nop16362 = alloca i1, i1 0
+  %nop16363 = alloca i1, i1 0
+  %nop16364 = alloca i1, i1 0
+  %nop16365 = alloca i1, i1 0
+  %nop16366 = alloca i1, i1 0
+  %nop16367 = alloca i1, i1 0
+  %nop16368 = alloca i1, i1 0
+  %nop16369 = alloca i1, i1 0
+  %nop16370 = alloca i1, i1 0
+  %nop16371 = alloca i1, i1 0
+  %nop16372 = alloca i1, i1 0
+  %nop16373 = alloca i1, i1 0
+  %nop16374 = alloca i1, i1 0
+  %nop16375 = alloca i1, i1 0
+  %nop16376 = alloca i1, i1 0
+  %nop16377 = alloca i1, i1 0
+  br label %for.inc
+
+for.inc:
+  %3 = load i32* %i, align 4
+  %inc = add nsw i32 %3, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+; CHECK:  addiu $sp, $sp, -8
+; CHECK:  sw  $ra, 0($sp)
+; CHECK:  lui $[[REG1:[0-9]+]], 65534
+; CHECK:  addiu $[[REG1]], $[[REG1]], -12
+; CHECK:  addu  $[[REG1]], $ra, $[[REG1]]
+; CHECK:  lw  $ra, 0($sp)
+; CHECK:  jr  $[[REG1]]
+; CHECK:  addiu $sp, $sp, 8
+
+for.end:
+  ret i32 0
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false"
+  "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"
+  "no-infs-fp-math"="false" "no-nans-fp-math"="false"
+  "stack-protector-buffer-size"="8" "unsafe-fp-math"="false"
+  "use-soft-float"="false" }
diff --git a/test/MC/Mips/micromips-movcond-instructions.s b/test/MC/Mips/micromips-movcond-instructions.s
new file mode 100644
index 0000000..5da8702
--- /dev/null
+++ b/test/MC/Mips/micromips-movcond-instructions.s
@@ -0,0 +1,26 @@
+# RUN: llvm-mc %s -triple=mipsel -show-encoding -mattr=micromips \
+# RUN: | FileCheck -check-prefix=CHECK-EL %s
+# RUN: llvm-mc %s -triple=mips -show-encoding -mattr=micromips \
+# RUN: | FileCheck -check-prefix=CHECK-EB %s
+# Check that the assembler can handle the documented syntax
+# for move conditional instructions.
+#------------------------------------------------------------------------------
+# Move Conditional
+#------------------------------------------------------------------------------
+# Little endian
+#------------------------------------------------------------------------------
+# CHECK-EL: movz    $9, $6, $7        # encoding: [0xe6,0x00,0x58,0x48]
+# CHECK-EL: movn    $9, $6, $7        # encoding: [0xe6,0x00,0x18,0x48]
+# CHECK-EL: movt    $9, $6, $fcc0     # encoding: [0x26,0x55,0x7b,0x09]
+# CHECK-EL: movf    $9, $6, $fcc0     # encoding: [0x26,0x55,0x7b,0x01]
+#------------------------------------------------------------------------------
+# Big endian
+#------------------------------------------------------------------------------
+# CHECK-EB: movz    $9, $6, $7        # encoding: [0x00,0xe6,0x48,0x58]
+# CHECK-EB: movn    $9, $6, $7        # encoding: [0x00,0xe6,0x48,0x18]
+# CHECK-EB: movt    $9, $6, $fcc0     # encoding: [0x55,0x26,0x09,0x7b]
+# CHECK-EB: movf    $9, $6, $fcc0     # encoding: [0x55,0x26,0x01,0x7b]
+     movz    $9, $6, $7
+     movn    $9, $6, $7
+     movt    $9, $6, $fcc0
+     movf    $9, $6, $fcc0
diff --git a/test/MC/Mips/micromips-multiply-instructions.s b/test/MC/Mips/micromips-multiply-instructions.s
new file mode 100644
index 0000000..7c3c518
--- /dev/null
+++ b/test/MC/Mips/micromips-multiply-instructions.s
@@ -0,0 +1,26 @@
+# RUN: llvm-mc %s -triple=mipsel -show-encoding -mattr=micromips \
+# RUN: | FileCheck -check-prefix=CHECK-EL %s
+# RUN: llvm-mc %s -triple=mips -show-encoding -mattr=micromips \
+# RUN: | FileCheck -check-prefix=CHECK-EB %s
+# Check that the assembler can handle the documented syntax
+# for Multiply Add/Sub instructions.
+#------------------------------------------------------------------------------
+# Multiply Add/Sub Instructions
+#------------------------------------------------------------------------------
+# Little endian
+#------------------------------------------------------------------------------
+# CHECK-EL: madd   $4, $5    # encoding: [0xa4,0x00,0x3c,0xcb]
+# CHECK-EL: maddu  $4, $5    # encoding: [0xa4,0x00,0x3c,0xdb]
+# CHECK-EL: msub   $4, $5    # encoding: [0xa4,0x00,0x3c,0xeb]
+# CHECK-EL: msubu  $4, $5    # encoding: [0xa4,0x00,0x3c,0xfb]
+#------------------------------------------------------------------------------
+# Big endian
+#------------------------------------------------------------------------------
+# CHECK-EB: madd   $4, $5    # encoding: [0x00,0xa4,0xcb,0x3c]
+# CHECK-EB: maddu  $4, $5    # encoding: [0x00,0xa4,0xdb,0x3c]
+# CHECK-EB: msub   $4, $5    # encoding: [0x00,0xa4,0xeb,0x3c]
+# CHECK-EB: msubu  $4, $5    # encoding: [0x00,0xa4,0xfb,0x3c]
+    madd     $4, $5
+    maddu    $4, $5
+    msub     $4, $5
+    msubu    $4, $5
diff --git a/test/MC/Mips/micromips-relocations.s b/test/MC/Mips/micromips-relocations.s
new file mode 100644
index 0000000..804dd2f
--- /dev/null
+++ b/test/MC/Mips/micromips-relocations.s
@@ -0,0 +1,99 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding \
+# RUN: -mattr=micromips | FileCheck %s -check-prefix=CHECK-FIXUP
+# RUN: llvm-mc %s -filetype=obj -triple=mipsel-unknown-linux \
+# RUN: -mattr=micromips | llvm-readobj -r \
+# RUN: | FileCheck %s -check-prefix=CHECK-ELF
+#------------------------------------------------------------------------------
+# Check that the assembler can handle the documented syntax
+# for relocations.
+#------------------------------------------------------------------------------
+# CHECK-FIXUP: lui $2, %hi(_gp_disp)
+# CHECK-FIXUP:        # encoding: [0xa2'A',0x41'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: _gp_disp@ABS_HI,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_HI16
+# CHECK-FIXUP: addiu $2, $2, %lo(_gp_disp)
+# CHECK-FIXUP:        # encoding: [0x42'A',0x30'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: _gp_disp@ABS_LO,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_LO16
+# CHECK-FIXUP: lw $25, %call16(strchr)($gp)
+# CHECK-FIXUP:        # encoding: [0x3c'A',0xff'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: strchr@GOT_CALL,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_CALL16
+# CHECK-FIXUP: lw $3, %got(loop_1)($2)
+# CHECK-FIXUP:        # encoding: [0x62'A',0xfc'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: loop_1@GOT,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_GOT16
+# CHECK-FIXUP: lui $2, %dtprel_hi(_gp_disp)
+# CHECK-FIXUP:        # encoding: [0xa2'A',0x41'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: _gp_disp@DTPREL_HI,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_TLS_DTPREL_HI16
+# CHECK-FIXUP: addiu $2, $2, %dtprel_lo(_gp_disp)
+# CHECK-FIXUP:        # encoding: [0x42'A',0x30'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: _gp_disp@DTPREL_LO,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_TLS_DTPREL_LO16
+# CHECK-FIXUP: lw $3, %got(loop_1)($2)
+# CHECK-FIXUP:        # encoding: [0x62'A',0xfc'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: loop_1@GOT,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_GOT16
+# CHECK-FIXUP: lw $4, %got_disp(loop_2)($3)
+# CHECK-FIXUP:        # encoding: [0x83'A',0xfc'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: loop_2@GOT_DISP,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_GOT_DISP
+# CHECK-FIXUP: lw $5, %got_page(loop_3)($4)
+# CHECK-FIXUP:        # encoding: [0xa4'A',0xfc'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: loop_3@GOT_PAGE,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_GOT_PAGE
+# CHECK-FIXUP: lw $6, %got_ofst(loop_4)($5)
+# CHECK-FIXUP:        # encoding: [0xc5'A',0xfc'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: loop_4@GOT_OFST,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_GOT_OFST
+# CHECK-FIXUP: lui $2, %tprel_hi(_gp_disp)
+# CHECK-FIXUP:        # encoding: [0xa2'A',0x41'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: _gp_disp@TPREL_HI,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_TLS_TPREL_HI16
+# CHECK-FIXUP: addiu $2, $2, %tprel_lo(_gp_disp)
+# CHECK-FIXUP:        # encoding: [0x42'A',0x30'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: _gp_disp@TPREL_LO,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_TLS_TPREL_LO16
+#------------------------------------------------------------------------------
+# Check that the appropriate relocations were created.
+#------------------------------------------------------------------------------
+# CHECK-ELF: Relocations [
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_HI16
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_LO16
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_CALL16
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_GOT16
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_TLS_DTPREL_HI16
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_TLS_DTPREL_LO16
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_GOT16
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_GOT_DISP
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_GOT_PAGE
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_GOT_OFST
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_TLS_TPREL_HI16
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_TLS_TPREL_LO16
+# CHECK-ELF: ]
+
+    lui    $2, %hi(_gp_disp)
+    addiu  $2, $2, %lo(_gp_disp)
+    lw     $25, %call16(strchr)($gp)
+    lw     $3, %got(loop_1)($2)
+    lui    $2, %dtprel_hi(_gp_disp)
+    addiu  $2, $2, %dtprel_lo(_gp_disp)
+    lw     $3, %got(loop_1)($2)
+    lw     $4, %got_disp(loop_2)($3)
+    lw     $5, %got_page(loop_3)($4)
+    lw     $6, %got_ofst(loop_4)($5)
+    lui    $2, %tprel_hi(_gp_disp)
+    addiu  $2, $2, %tprel_lo(_gp_disp)
diff --git a/test/MC/Mips/micromips-shift-instructions.s b/test/MC/Mips/micromips-shift-instructions.s
index 3b5060f..bbb71ac 100644
--- a/test/MC/Mips/micromips-shift-instructions.s
+++ b/test/MC/Mips/micromips-shift-instructions.s
@@ -1,17 +1,31 @@
-# RUN: llvm-mc %s -triple=mipsel -show-encoding -mcpu=mips32r2 -mattr=micromips | FileCheck %s
+# RUN: llvm-mc %s -triple=mipsel -show-encoding -mcpu=mips32r2 -mattr=micromips | FileCheck -check-prefix=CHECK-EL %s
+# RUN: llvm-mc %s -triple=mips -show-encoding -mcpu=mips32r2 -mattr=micromips | FileCheck -check-prefix=CHECK-EB %s
 # Check that the assembler can handle the documented syntax
 # for shift instructions.
 #------------------------------------------------------------------------------
 # Shift Instructions
 #------------------------------------------------------------------------------
-# CHECK: sll    $4, $3, 7      # encoding: [0x00,0x38,0x83,0x00]
-# CHECK: sllv   $2, $3, $5     # encoding: [0x10,0x10,0x65,0x00]
-# CHECK: sra    $4, $3, 7      # encoding: [0x80,0x38,0x83,0x00]
-# CHECK: srav   $2, $3, $5     # encoding: [0x90,0x10,0x65,0x00]
-# CHECK: srl    $4, $3, 7      # encoding: [0x40,0x38,0x83,0x00]
-# CHECK: srlv   $2, $3, $5     # encoding: [0x50,0x10,0x65,0x00]
-# CHECK: rotr   $9, $6, 7      # encoding: [0xc0,0x38,0x26,0x01]
-# CHECK: rotrv  $9, $6, $7     # encoding: [0xd0,0x48,0xc7,0x00]
+# Little endian
+#------------------------------------------------------------------------------
+# CHECK-EL: sll    $4, $3, 7      # encoding: [0x83,0x00,0x00,0x38]
+# CHECK-EL: sllv   $2, $3, $5     # encoding: [0x65,0x00,0x10,0x10]
+# CHECK-EL: sra    $4, $3, 7      # encoding: [0x83,0x00,0x80,0x38]
+# CHECK-EL: srav   $2, $3, $5     # encoding: [0x65,0x00,0x90,0x10]
+# CHECK-EL: srl    $4, $3, 7      # encoding: [0x83,0x00,0x40,0x38]
+# CHECK-EL: srlv   $2, $3, $5     # encoding: [0x65,0x00,0x50,0x10]
+# CHECK-EL: rotr   $9, $6, 7      # encoding: [0x26,0x01,0xc0,0x38]
+# CHECK-EL: rotrv  $9, $6, $7     # encoding: [0xc7,0x00,0xd0,0x48]
+#------------------------------------------------------------------------------
+# Big endian
+#------------------------------------------------------------------------------
+# CHECK-EB: sll $4, $3, 7         # encoding: [0x00,0x83,0x38,0x00]
+# CHECK-EB: sllv  $2, $3, $5      # encoding: [0x00,0x65,0x10,0x10]
+# CHECK-EB: sra $4, $3, 7         # encoding: [0x00,0x83,0x38,0x80]
+# CHECK-EB: srav  $2, $3, $5      # encoding: [0x00,0x65,0x10,0x90]
+# CHECK-EB: srl $4, $3, 7         # encoding: [0x00,0x83,0x38,0x40]
+# CHECK-EB: srlv  $2, $3, $5      # encoding: [0x00,0x65,0x10,0x50]
+# CHECK-EB: rotr  $9, $6, 7       # encoding: [0x01,0x26,0x38,0xc0]
+# CHECK-EB: rotrv $9, $6, $7      # encoding: [0x00,0xc7,0x48,0xd0]
      sll    $4, $3, 7
      sllv   $2, $3, $5
      sra    $4, $3, 7
diff --git a/test/MC/Mips/micromips-tailr.s b/test/MC/Mips/micromips-tailr.s
new file mode 100644
index 0000000..0c21a7b
--- /dev/null
+++ b/test/MC/Mips/micromips-tailr.s
@@ -0,0 +1,26 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding \
+# RUN: -mattr=micromips | FileCheck %s -check-prefix=CHECK-FIXUP
+# RUN: llvm-mc %s -filetype=obj -triple=mipsel-unknown-linux \
+# RUN: -mattr=micromips | llvm-readobj -r \
+# RUN: | FileCheck %s -check-prefix=CHECK-ELF
+#------------------------------------------------------------------------------
+# Check that the assembler can handle the documented syntax
+# for relocations.
+#------------------------------------------------------------------------------
+# CHECK-FIXUP: foo:
+# CHECK-FIXUP:   addiu $2, $zero, 1332
+# CHECK-FIXUP:         # encoding: [0x40,0x30,0x34,0x05]
+# CHECK-FIXUP:   j foo # encoding: [A,0xd4'A',A,0b000000AA]
+# CHECK-FIXUP:         #   fixup A - offset: 0,
+# CHECK-FIXUP:             value: foo, kind: fixup_MICROMIPS_26_S1
+# CHECK-FIXUP:   nop   # encoding: [0x00,0x00,0x00,0x00]
+#------------------------------------------------------------------------------
+# Check that the appropriate relocations were created.
+#------------------------------------------------------------------------------
+# CHECK-ELF: Relocations [
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_26_S1
+# CHECK-ELF: ]
+
+foo:
+  addiu $2, $0, 1332
+  j foo
diff --git a/test/MC/Mips/micromips-trap-instructions.s b/test/MC/Mips/micromips-trap-instructions.s
new file mode 100644
index 0000000..404006c
--- /dev/null
+++ b/test/MC/Mips/micromips-trap-instructions.s
@@ -0,0 +1,50 @@
+# RUN: llvm-mc %s -triple=mipsel -show-encoding -mattr=micromips \
+# RUN: | FileCheck -check-prefix=CHECK-EL %s
+# RUN: llvm-mc %s -triple=mips -show-encoding -mattr=micromips \
+# RUN: | FileCheck -check-prefix=CHECK-EB %s
+# Check that the assembler can handle the documented syntax
+# for miscellaneous instructions
+#------------------------------------------------------------------------------
+# Miscellaneous Instructions
+#------------------------------------------------------------------------------
+# Little endian
+#------------------------------------------------------------------------------
+# CHECK-EL: teq     $8, $9       # encoding: [0x28,0x01,0x3c,0x00]
+# CHECK-EL: tge     $8, $9       # encoding: [0x28,0x01,0x3c,0x02]
+# CHECK-EL: tgeu    $8, $9       # encoding: [0x28,0x01,0x3c,0x04]
+# CHECK-EL: tlt     $8, $9       # encoding: [0x28,0x01,0x3c,0x08]
+# CHECK-EL: tltu    $8, $9       # encoding: [0x28,0x01,0x3c,0x0a]
+# CHECK-EL: tne     $8, $9       # encoding: [0x28,0x01,0x3c,0x0c]
+# CHECK-EL: teqi    $9, 17767    # encoding: [0xc9,0x41,0x67,0x45]
+# CHECK-EL: tgei    $9, 17767    # encoding: [0x29,0x41,0x67,0x45]
+# CHECK-EL: tgeiu   $9, 17767    # encoding: [0x69,0x41,0x67,0x45]
+# CHECK-EL: tlti    $9, 17767    # encoding: [0x09,0x41,0x67,0x45]
+# CHECK-EL: tltiu   $9, 17767    # encoding: [0x49,0x41,0x67,0x45]
+# CHECK-EL: tnei    $9, 17767    # encoding: [0x89,0x41,0x67,0x45]
+#------------------------------------------------------------------------------
+# Big endian
+#------------------------------------------------------------------------------
+# CHECK-EB: teq     $8, $9       # encoding: [0x01,0x28,0x00,0x3c]
+# CHECK-EB: tge     $8, $9       # encoding: [0x01,0x28,0x02,0x3c]
+# CHECK-EB: tgeu    $8, $9       # encoding: [0x01,0x28,0x04,0x3c]
+# CHECK-EB: tlt     $8, $9       # encoding: [0x01,0x28,0x08,0x3c]
+# CHECK-EB: tltu    $8, $9       # encoding: [0x01,0x28,0x0a,0x3c]
+# CHECK-EB: tne     $8, $9       # encoding: [0x01,0x28,0x0c,0x3c]
+# CHECK-EB: teqi    $9, 17767    # encoding: [0x41,0xc9,0x45,0x67]
+# CHECK-EB: tgei    $9, 17767    # encoding: [0x41,0x29,0x45,0x67]
+# CHECK-EB: tgeiu   $9, 17767    # encoding: [0x41,0x69,0x45,0x67]
+# CHECK-EB: tlti    $9, 17767    # encoding: [0x41,0x09,0x45,0x67]
+# CHECK-EB: tltiu   $9, 17767    # encoding: [0x41,0x49,0x45,0x67]
+# CHECK-EB: tnei    $9, 17767    # encoding: [0x41,0x89,0x45,0x67]
+    teq     $8, $9, 0
+    tge     $8, $9, 0
+    tgeu    $8, $9, 0
+    tlt     $8, $9, 0
+    tltu    $8, $9, 0
+    tne     $8, $9, 0
+    teqi    $9, 17767
+    tgei    $9, 17767
+    tgeiu   $9, 17767
+    tlti    $9, 17767
+    tltiu   $9, 17767
+    tnei    $9, 17767
diff --git a/test/MC/Mips/mips-alu-instructions.s b/test/MC/Mips/mips-alu-instructions.s
index eccc288..68a8da0 100644
--- a/test/MC/Mips/mips-alu-instructions.s
+++ b/test/MC/Mips/mips-alu-instructions.s
@@ -86,7 +86,9 @@
 # CHECK:  mult   $3, $5          # encoding: [0x18,0x00,0x65,0x00]
 # CHECK:  multu  $3, $5          # encoding: [0x19,0x00,0x65,0x00]
 # CHECK:  sub    $9, $6, $7      # encoding: [0x22,0x48,0xc7,0x00]
+# CHECK:  addi   $sp, $sp, -56   # encoding: [0xc8,0xff,0xbd,0x23]
 # CHECK:  subu   $4, $3, $5      # encoding: [0x23,0x20,0x65,0x00]
+# CHECK:  addiu   $sp, $sp, -40  # encoding: [0xd8,0xff,0xbd,0x27]
 # CHECK:  neg     $6, $7         # encoding: [0x22,0x30,0x07,0x00]
 # CHECK:  negu    $6, $7         # encoding: [0x23,0x30,0x07,0x00]
 # CHECK:  move    $7, $8         # encoding: [0x21,0x38,0x00,0x01]
@@ -109,7 +111,9 @@
     mult   $3,$5
     multu  $3,$5
     sub    $9,$6,$7
+    sub    $sp,$sp,56
     subu   $4,$3,$5
+    subu    $sp,$sp,40
     neg    $6,$7
     negu   $6,$7
     move   $7,$8
diff --git a/test/MC/Mips/mips-control-instructions.s b/test/MC/Mips/mips-control-instructions.s
index ee70940..4a16c53 100644
--- a/test/MC/Mips/mips-control-instructions.s
+++ b/test/MC/Mips/mips-control-instructions.s
@@ -1,7 +1,7 @@
 # RUN: llvm-mc %s -triple=mips-unknown-unknown -show-encoding -mcpu=mips32r2 | \
 # RUN: FileCheck -check-prefix=CHECK32  %s
-# RUN: llvm-mc %s -triple=mips-unknown-unknown -show-encoding -mcpu=mips64r2 | \
-# RUN: FileCheck -check-prefix=CHECK64  %s
+# RUN: llvm-mc %s -triple=mips64-unknown-unknown -show-encoding -mcpu=mips64r2 \
+# RUN: | FileCheck -check-prefix=CHECK64  %s
 
 # CHECK32:    break                      # encoding: [0x00,0x00,0x00,0x0d]
 # CHECK32:    break   7, 0               # encoding: [0x00,0x07,0x00,0x0d]
@@ -10,6 +10,31 @@
 # CHECK32:    syscall 13396              # encoding: [0x00,0x0d,0x15,0x0c]
 # CHECK32:    eret                       # encoding: [0x42,0x00,0x00,0x18]
 # CHECK32:    deret                      # encoding: [0x42,0x00,0x00,0x1f]
+# CHECK32:    di                         # encoding: [0x41,0x60,0x60,0x00]
+# CHECK32:    di                         # encoding: [0x41,0x60,0x60,0x00]
+# CHECK32:    di      $10                # encoding: [0x41,0x6a,0x60,0x00]
+# CHECK32:    ei                         # encoding: [0x41,0x60,0x60,0x20]
+# CHECK32:    ei                         # encoding: [0x41,0x60,0x60,0x20]
+# CHECK32:    ei      $10                # encoding: [0x41,0x6a,0x60,0x20]
+# CHECK32:    wait                       # encoding: [0x42,0x00,0x00,0x20]
+# CHECK32:    teq     $zero, $3          # encoding: [0x00,0x03,0x00,0x34]
+# CHECK32:    teq     $zero, $3, 1       # encoding: [0x00,0x03,0x00,0x74]
+# CHECK32:    teqi    $3, 1              # encoding: [0x04,0x6c,0x00,0x01]
+# CHECK32:    tge     $zero, $3          # encoding: [0x00,0x03,0x00,0x30]
+# CHECK32:    tge     $zero, $3, 3       # encoding: [0x00,0x03,0x00,0xf0]
+# CHECK32:    tgei    $3, 3              # encoding: [0x04,0x68,0x00,0x03]
+# CHECK32:    tgeu    $zero, $3          # encoding: [0x00,0x03,0x00,0x31]
+# CHECK32:    tgeu    $zero, $3, 7       # encoding: [0x00,0x03,0x01,0xf1]
+# CHECK32:    tgeiu   $3, 7              # encoding: [0x04,0x69,0x00,0x07]
+# CHECK32:    tlt     $zero, $3          # encoding: [0x00,0x03,0x00,0x32]
+# CHECK32:    tlt     $zero, $3, 31      # encoding: [0x00,0x03,0x07,0xf2]
+# CHECK32:    tlti    $3, 31             # encoding: [0x04,0x6a,0x00,0x1f]
+# CHECK32:    tltu    $zero, $3          # encoding: [0x00,0x03,0x00,0x33]
+# CHECK32:    tltu    $zero, $3, 255     # encoding: [0x00,0x03,0x3f,0xf3]
+# CHECK32:    tltiu   $3, 255            # encoding: [0x04,0x6b,0x00,0xff]
+# CHECK32:    tne     $zero, $3          # encoding: [0x00,0x03,0x00,0x36]
+# CHECK32:    tne     $zero, $3, 1023    # encoding: [0x00,0x03,0xff,0xf6]
+# CHECK32:    tnei    $3, 1023           # encoding: [0x04,0x6e,0x03,0xff]
 
 # CHECK64:    break                      # encoding: [0x00,0x00,0x00,0x0d]
 # CHECK64:    break   7, 0               # encoding: [0x00,0x07,0x00,0x0d]
@@ -18,6 +43,31 @@
 # CHECK64:    syscall 13396              # encoding: [0x00,0x0d,0x15,0x0c]
 # CHECK64:    eret                       # encoding: [0x42,0x00,0x00,0x18]
 # CHECK64:    deret                      # encoding: [0x42,0x00,0x00,0x1f]
+# CHECK64:    di                         # encoding: [0x41,0x60,0x60,0x00]
+# CHECK64:    di                         # encoding: [0x41,0x60,0x60,0x00]
+# CHECK64:    di      $10                # encoding: [0x41,0x6a,0x60,0x00]
+# CHECK64:    ei                         # encoding: [0x41,0x60,0x60,0x20]
+# CHECK64:    ei                         # encoding: [0x41,0x60,0x60,0x20]
+# CHECK64:    ei      $10                # encoding: [0x41,0x6a,0x60,0x20]
+# CHECK64:    wait                       # encoding: [0x42,0x00,0x00,0x20]
+# CHECK64:    teq     $zero, $3          # encoding: [0x00,0x03,0x00,0x34]
+# CHECK64:    teq     $zero, $3, 1       # encoding: [0x00,0x03,0x00,0x74]
+# CHECK64:    teqi    $3, 1              # encoding: [0x04,0x6c,0x00,0x01]
+# CHECK64:    tge     $zero, $3          # encoding: [0x00,0x03,0x00,0x30]
+# CHECK64:    tge     $zero, $3, 3       # encoding: [0x00,0x03,0x00,0xf0]
+# CHECK64:    tgei    $3, 3              # encoding: [0x04,0x68,0x00,0x03]
+# CHECK64:    tgeu    $zero, $3          # encoding: [0x00,0x03,0x00,0x31]
+# CHECK64:    tgeu    $zero, $3, 7       # encoding: [0x00,0x03,0x01,0xf1]
+# CHECK64:    tgeiu   $3, 7              # encoding: [0x04,0x69,0x00,0x07]
+# CHECK64:    tlt     $zero, $3          # encoding: [0x00,0x03,0x00,0x32]
+# CHECK64:    tlt     $zero, $3, 31      # encoding: [0x00,0x03,0x07,0xf2]
+# CHECK64:    tlti    $3, 31             # encoding: [0x04,0x6a,0x00,0x1f]
+# CHECK64:    tltu    $zero, $3          # encoding: [0x00,0x03,0x00,0x33]
+# CHECK64:    tltu    $zero, $3, 255     # encoding: [0x00,0x03,0x3f,0xf3]
+# CHECK64:    tltiu   $3, 255            # encoding: [0x04,0x6b,0x00,0xff]
+# CHECK64:    tne     $zero, $3          # encoding: [0x00,0x03,0x00,0x36]
+# CHECK64:    tne     $zero, $3, 1023    # encoding: [0x00,0x03,0xff,0xf6]
+# CHECK64:    tnei    $3, 1023           # encoding: [0x04,0x6e,0x03,0xff]
 
     break
     break 7
@@ -26,3 +76,31 @@
     syscall 0x3454
     eret
     deret
+    di
+    di  $0
+    di  $10
+
+    ei
+    ei  $0
+    ei  $10
+
+    wait
+
+    teq   $0,$3
+    teq   $0,$3,1
+    teqi  $3,1
+    tge   $0,$3
+    tge   $0,$3,3
+    tgei  $3,3
+    tgeu  $0,$3
+    tgeu  $0,$3,7
+    tgeiu $3,7
+    tlt   $0,$3
+    tlt   $0,$3,31
+    tlti  $3,31
+    tltu  $0,$3
+    tltu  $0,$3,255
+    tltiu $3,255
+    tne   $0,$3
+    tne   $0,$3,1023
+    tnei  $3,1023
diff --git a/test/MC/Mips/mips-dsp-instructions.s b/test/MC/Mips/mips-dsp-instructions.s
index 4de88ce..5a9e8ea 100644
--- a/test/MC/Mips/mips-dsp-instructions.s
+++ b/test/MC/Mips/mips-dsp-instructions.s
@@ -22,6 +22,33 @@
 # CHECK:   precr_sra_r.ph.w  $25, $26, 0     # encoding: [0x7f,0x59,0x07,0xd1]
 # CHECK:   precr_sra_r.ph.w  $25, $26, 31    # encoding: [0x7f,0x59,0xff,0xd1]
 
+# CHECK:   lbux $10, $20($26)                # encoding: [0x7f,0x54,0x51,0x8a]
+# CHECK:   lhx  $11, $21($27)                # encoding: [0x7f,0x75,0x59,0x0a]
+# CHECK:   lwx  $12, $22($gp)                # encoding: [0x7f,0x96,0x60,0x0a]
+
+# CHECK:    mult $ac3, $2, $3               # encoding: [0x00,0x43,0x18,0x18]
+# CHECK:    multu $ac2, $4, $5              # encoding: [0x00,0x85,0x10,0x19]
+# CHECK:    madd $ac1, $6, $7               # encoding: [0x70,0xc7,0x08,0x00]
+# CHECK:    maddu $ac0, $8, $9              # encoding: [0x71,0x09,0x00,0x01]
+# CHECK:    msub $ac3, $10, $11             # encoding: [0x71,0x4b,0x18,0x04]
+# CHECK:    msubu $ac2, $12, $13            # encoding: [0x71,0x8d,0x10,0x05]
+# CHECK:    mfhi $14, $ac1                  # encoding: [0x00,0x20,0x70,0x10]
+# CHECK:    mflo $15, $ac0                  # encoding: [0x00,0x00,0x78,0x12]
+# CHECK:    mthi $16, $ac3                  # encoding: [0x02,0x00,0x18,0x11]
+# CHECK:    mtlo $17, $ac2                  # encoding: [0x02,0x20,0x10,0x13]
+
+# CHECK:    mult $2, $3                      # encoding: [0x00,0x43,0x00,0x18]
+# CHECK:    multu $4, $5                     # encoding: [0x00,0x85,0x00,0x19]
+# CHECK:    madd $6, $7                      # encoding: [0x70,0xc7,0x00,0x00]
+# CHECK:    maddu $8, $9                     # encoding: [0x71,0x09,0x00,0x01]
+# CHECK:    msub $10, $11                    # encoding: [0x71,0x4b,0x00,0x04]
+# CHECK:    msubu $12, $13                   # encoding: [0x71,0x8d,0x00,0x05]
+# CHECK:    mfhi $14                         # encoding: [0x00,0x00,0x70,0x10]
+# CHECK:    mflo $15                         # encoding: [0x00,0x00,0x78,0x12]
+# CHECK:    mthi $16                         # encoding: [0x02,0x00,0x00,0x11]
+# CHECK:    mtlo $17                         # encoding: [0x02,0x20,0x00,0x13]
+
+
   precrq.qb.ph    $16,$17,$18
   precrq.ph.w     $17,$18,$19
   precrq_rs.ph.w  $18,$19,$20
@@ -42,3 +69,29 @@
   precr_sra.ph.w  $24,$25,31
   precr_sra_r.ph.w  $25,$26,0
   precr_sra_r.ph.w  $25,$26,31
+
+  lbux $10, $s4($26)
+  lhx  $11, $s5($27)
+  lwx  $12, $s6($28)
+
+  mult $ac3, $2, $3
+  multu $ac2, $4, $5
+  madd $ac1, $6, $7
+  maddu $ac0, $8, $9
+  msub $ac3, $10, $11
+  msubu $ac2, $12, $13
+  mfhi $14, $ac1
+  mflo $15, $ac0
+  mthi $16, $ac3
+  mtlo $17, $ac2
+
+  mult $2, $3
+  multu $4, $5
+  madd $6, $7
+  maddu $8, $9
+  msub $10, $11
+  msubu $12, $13
+  mfhi $14
+  mflo $15
+  mthi $16
+  mtlo $17
diff --git a/test/MC/Mips/mips-fpu-instructions.s b/test/MC/Mips/mips-fpu-instructions.s
index dc52676..bfaef9e 100644
--- a/test/MC/Mips/mips-fpu-instructions.s
+++ b/test/MC/Mips/mips-fpu-instructions.s
@@ -1,4 +1,5 @@
 # RUN: llvm-mc  %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
+# RUN: llvm-mc  %s -triple=mips64el-unknown-linux -show-encoding -mcpu=mips64r2 | FileCheck %s
 # Check that the assembler can handle the documented syntax
 # for FPU instructions.
 #------------------------------------------------------------------------------
@@ -137,6 +138,8 @@
 #------------------------------------------------------------------------------
 # FP move instructions
 #------------------------------------------------------------------------------
+# CHECK: bc1f    $BB_1                 # encoding: [A,A,0x00,0x45]
+# CHECK: #   fixup A - offset: 0, value: ($BB_1), kind: fixup_Mips_PC16
 
 # CHECK:  cfc1    $6, $0               # encoding: [0x00,0x00,0x46,0x44]
 # CHECK:  ctc1    $10, $31             # encoding: [0x00,0xf8,0xca,0x44]
@@ -164,7 +167,15 @@
 # CHECK:  movf.s  $f4, $f6, $fcc5         # encoding: [0x11,0x31,0x14,0x46]
 # CHECK:  luxc1   $f0, $6($5)             # encoding: [0x05,0x00,0xa6,0x4c]
 # CHECK:  suxc1   $f4, $24($5)            # encoding: [0x0d,0x20,0xb8,0x4c]
-
+# CHECK:  lwxc1   $f20, $12($14)          # encoding: [0x00,0x05,0xcc,0x4d]
+# CHECK:  swxc1   $f26, $18($22)          # encoding: [0x08,0xd0,0xd2,0x4e]
+# CHECK:  mfhc1   $17, $f4                # encoding: [0x00,0x20,0x71,0x44]
+# CHECK:  mthc1   $17, $f6                # encoding: [0x00,0x30,0xf1,0x44]
+# CHECK:  swc2    $4, 16($sp)             # encoding: [0x10,0x00,0xa4,0xeb]
+# CHECK:  sdc2    $4, 16($sp)             # encoding: [0x10,0x00,0xa4,0xfb]
+# CHECK:  lwc2    $11, 12($ra)            # encoding: [0x0c,0x00,0xeb,0xcb]
+# CHECK:  ldc2    $11, 12($ra)            # encoding: [0x0c,0x00,0xeb,0xdb]
+   bc1f    $fcc0, $BB_1
    cfc1    $a2,$0
    ctc1    $10,$31
    mfc1    $a2,$f7
@@ -189,5 +200,13 @@
    movt    $4, $5, $fcc4
    movf.d  $f4, $f6, $fcc2
    movf.s  $f4, $f6, $fcc5
-   luxc1 $f0, $a2($a1)
-   suxc1 $f4, $t8($a1)
-\ No newline at end of file
+   luxc1   $f0, $a2($a1)
+   suxc1   $f4, $t8($a1)
+   lwxc1   $f20, $12($14)
+   swxc1   $f26, $s2($s6)
+   mfhc1   $17, $f4
+   mthc1   $17, $f6
+   swc2    $4, 16($sp)
+   sdc2    $4, 16($sp)
+   lwc2    $11, 12($ra)
+   ldc2    $11, 12($ra)
diff --git a/test/MC/Mips/mips64-alu-instructions.s b/test/MC/Mips/mips64-alu-instructions.s
index c33ba70..8262a46 100644
--- a/test/MC/Mips/mips64-alu-instructions.s
+++ b/test/MC/Mips/mips64-alu-instructions.s
@@ -1,4 +1,4 @@
-# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips64r2 | FileCheck %s
+# RUN: llvm-mc %s -triple=mips64el-unknown-linux -show-encoding -mcpu=mips64r2 | FileCheck %s
 # Check that the assembler can handle the documented syntax
 # for arithmetic and logical instructions.
 #------------------------------------------------------------------------------
@@ -73,6 +73,8 @@
 # CHECK:  daddiu  $9, $6, -15001  # encoding: [0x67,0xc5,0xc9,0x64]
 # CHECK:  daddiu  $9, $9, -15001  # encoding: [0x67,0xc5,0x29,0x65]
 # CHECK:  daddu   $9, $6, $7      # encoding: [0x2d,0x48,0xc7,0x00]
+# CHECK:  drotr   $9, $6, 20      # encoding: [0x3a,0x4d,0x26,0x00]
+# CHECK:  drotr32 $9, $6, 52      # encoding: [0x3e,0x4d,0x26,0x00]
 # CHECK:  madd   $6, $7          # encoding: [0x00,0x00,0xc7,0x70]
 # CHECK:  maddu  $6, $7          # encoding: [0x01,0x00,0xc7,0x70]
 # CHECK:  msub   $6, $7          # encoding: [0x04,0x00,0xc7,0x70]
@@ -94,6 +96,8 @@
     daddiu  $9,$6,-15001
     daddiu  $9,-15001
     daddu   $9,$6,$7
+    drotr   $9, $6, 20
+    drotr32 $9, $6, 52
     madd   $6,$7
     maddu  $6,$7
     msub   $6,$7
diff --git a/test/MC/Mips/mips64-instructions.s b/test/MC/Mips/mips64-instructions.s
new file mode 100644
index 0000000..74e9d13
--- /dev/null
+++ b/test/MC/Mips/mips64-instructions.s
@@ -0,0 +1,7 @@
+# RUN: llvm-mc  %s -triple=mips64el-unknown-linux -show-encoding -mcpu=mips64r2 | FileCheck %s
+
+# CHECK: ldxc1 $f2, $2($10)           # encoding: [0x81,0x00,0x42,0x4d]
+# CHECK: sdxc1 $f8, $4($25)           # encoding: [0x09,0x40,0x24,0x4f]
+
+  ldxc1 $f2, $2($10)
+  sdxc1 $f8, $a0($t9)
diff --git a/test/MC/Mips/mips_directives.s b/test/MC/Mips/mips_directives.s
index bbb2616..44e707c 100644
--- a/test/MC/Mips/mips_directives.s
+++ b/test/MC/Mips/mips_directives.s
@@ -19,9 +19,11 @@ $BB0_2:
     .set    noat
 $JTI0_0:
     .gpword    ($BB0_2)
+
     .word 0x77fffffc
 # CHECK: $JTI0_0:
-# CHECK-NEXT:     .4byte    2013265916
+# CHECK: .gpword ($BB0_2)
+# CHECK:     .4byte    2013265916
     .set  at=$12
     .set macro
 # CHECK:   b 1332               # encoding: [0x10,0x00,0x01,0x4d]
diff --git a/test/MC/Mips/msa/test_2r.s b/test/MC/Mips/msa/test_2r.s
new file mode 100644
index 0000000..67a2b6f
--- /dev/null
+++ b/test/MC/Mips/msa/test_2r.s
@@ -0,0 +1,51 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        fill.b  $w30, $9                # encoding: [0x7b,0x00,0x4f,0x9e]
+# CHECK:        fill.h  $w31, $23               # encoding: [0x7b,0x01,0xbf,0xde]
+# CHECK:        fill.w  $w16, $24               # encoding: [0x7b,0x02,0xc4,0x1e]
+# CHECK:        nloc.b  $w21, $w0               # encoding: [0x7b,0x08,0x05,0x5e]
+# CHECK:        nloc.h  $w18, $w31              # encoding: [0x7b,0x09,0xfc,0x9e]
+# CHECK:        nloc.w  $w2, $w23               # encoding: [0x7b,0x0a,0xb8,0x9e]
+# CHECK:        nloc.d  $w4, $w10               # encoding: [0x7b,0x0b,0x51,0x1e]
+# CHECK:        nlzc.b  $w31, $w2               # encoding: [0x7b,0x0c,0x17,0xde]
+# CHECK:        nlzc.h  $w27, $w22              # encoding: [0x7b,0x0d,0xb6,0xde]
+# CHECK:        nlzc.w  $w10, $w29              # encoding: [0x7b,0x0e,0xea,0x9e]
+# CHECK:        nlzc.d  $w25, $w9               # encoding: [0x7b,0x0f,0x4e,0x5e]
+# CHECK:        pcnt.b  $w20, $w18              # encoding: [0x7b,0x04,0x95,0x1e]
+# CHECK:        pcnt.h  $w0, $w8                # encoding: [0x7b,0x05,0x40,0x1e]
+# CHECK:        pcnt.w  $w23, $w9               # encoding: [0x7b,0x06,0x4d,0xde]
+# CHECK:        pcnt.d  $w21, $w24              # encoding: [0x7b,0x07,0xc5,0x5e]
+
+# CHECKOBJDUMP:        fill.b  $w30, $9
+# CHECKOBJDUMP:        fill.h  $w31, $23
+# CHECKOBJDUMP:        fill.w  $w16, $24
+# CHECKOBJDUMP:        nloc.b  $w21, $w0
+# CHECKOBJDUMP:        nloc.h  $w18, $w31
+# CHECKOBJDUMP:        nloc.w  $w2, $w23
+# CHECKOBJDUMP:        nloc.d  $w4, $w10
+# CHECKOBJDUMP:        nlzc.b  $w31, $w2
+# CHECKOBJDUMP:        nlzc.h  $w27, $w22
+# CHECKOBJDUMP:        nlzc.w  $w10, $w29
+# CHECKOBJDUMP:        nlzc.d  $w25, $w9
+# CHECKOBJDUMP:        pcnt.b  $w20, $w18
+# CHECKOBJDUMP:        pcnt.h  $w0, $w8
+# CHECKOBJDUMP:        pcnt.w  $w23, $w9
+# CHECKOBJDUMP:        pcnt.d  $w21, $w24
+
+                fill.b  $w30, $9
+                fill.h  $w31, $23
+                fill.w  $w16, $24
+                nloc.b  $w21, $w0
+                nloc.h  $w18, $w31
+                nloc.w  $w2, $w23
+                nloc.d  $w4, $w10
+                nlzc.b  $w31, $w2
+                nlzc.h  $w27, $w22
+                nlzc.w  $w10, $w29
+                nlzc.d  $w25, $w9
+                pcnt.b  $w20, $w18
+                pcnt.h  $w0, $w8
+                pcnt.w  $w23, $w9
+                pcnt.d  $w21, $w24
diff --git a/test/MC/Mips/msa/test_2rf.s b/test/MC/Mips/msa/test_2rf.s
new file mode 100644
index 0000000..64025a4
--- /dev/null
+++ b/test/MC/Mips/msa/test_2rf.s
@@ -0,0 +1,102 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        fclass.w        $w26, $w12              # encoding: [0x7b,0x20,0x66,0x9e]
+# CHECK:        fclass.d        $w24, $w17              # encoding: [0x7b,0x21,0x8e,0x1e]
+# CHECK:        fexupl.w        $w8, $w0                # encoding: [0x7b,0x30,0x02,0x1e]
+# CHECK:        fexupl.d        $w17, $w29              # encoding: [0x7b,0x31,0xec,0x5e]
+# CHECK:        fexupr.w        $w13, $w4               # encoding: [0x7b,0x32,0x23,0x5e]
+# CHECK:        fexupr.d        $w5, $w2                # encoding: [0x7b,0x33,0x11,0x5e]
+# CHECK:        ffint_s.w       $w20, $w29              # encoding: [0x7b,0x3c,0xed,0x1e]
+# CHECK:        ffint_s.d       $w12, $w15              # encoding: [0x7b,0x3d,0x7b,0x1e]
+# CHECK:        ffint_u.w       $w7, $w27               # encoding: [0x7b,0x3e,0xd9,0xde]
+# CHECK:        ffint_u.d       $w19, $w16              # encoding: [0x7b,0x3f,0x84,0xde]
+# CHECK:        ffql.w          $w31, $w13              # encoding: [0x7b,0x34,0x6f,0xde]
+# CHECK:        ffql.d          $w12, $w13              # encoding: [0x7b,0x35,0x6b,0x1e]
+# CHECK:        ffqr.w          $w27, $w30              # encoding: [0x7b,0x36,0xf6,0xde]
+# CHECK:        ffqr.d          $w30, $w15              # encoding: [0x7b,0x37,0x7f,0x9e]
+# CHECK:        flog2.w         $w25, $w31              # encoding: [0x7b,0x2e,0xfe,0x5e]
+# CHECK:        flog2.d         $w18, $w10              # encoding: [0x7b,0x2f,0x54,0x9e]
+# CHECK:        frint.w         $w7, $w15               # encoding: [0x7b,0x2c,0x79,0xde]
+# CHECK:        frint.d         $w21, $w22              # encoding: [0x7b,0x2d,0xb5,0x5e]
+# CHECK:        frcp.w          $w19, $w0               # encoding: [0x7b,0x2a,0x04,0xde]
+# CHECK:        frcp.d          $w4, $w14               # encoding: [0x7b,0x2b,0x71,0x1e]
+# CHECK:        frsqrt.w        $w12, $w17              # encoding: [0x7b,0x28,0x8b,0x1e]
+# CHECK:        frsqrt.d        $w23, $w11              # encoding: [0x7b,0x29,0x5d,0xde]
+# CHECK:        fsqrt.w         $w0, $w11               # encoding: [0x7b,0x26,0x58,0x1e]
+# CHECK:        fsqrt.d         $w15, $w12              # encoding: [0x7b,0x27,0x63,0xde]
+# CHECK:        ftint_s.w       $w30, $w5               # encoding: [0x7b,0x38,0x2f,0x9e]
+# CHECK:        ftint_s.d       $w5, $w23               # encoding: [0x7b,0x39,0xb9,0x5e]
+# CHECK:        ftint_u.w       $w20, $w14              # encoding: [0x7b,0x3a,0x75,0x1e]
+# CHECK:        ftint_u.d       $w23, $w21              # encoding: [0x7b,0x3b,0xad,0xde]
+# CHECK:        ftrunc_s.w      $w29, $w17              # encoding: [0x7b,0x22,0x8f,0x5e]
+# CHECK:        ftrunc_s.d      $w12, $w27              # encoding: [0x7b,0x23,0xdb,0x1e]
+# CHECK:        ftrunc_u.w      $w17, $w15              # encoding: [0x7b,0x24,0x7c,0x5e]
+# CHECK:        ftrunc_u.d      $w5, $w27               # encoding: [0x7b,0x25,0xd9,0x5e]
+
+# CHECKOBJDUMP:        fclass.w        $w26, $w12
+# CHECKOBJDUMP:        fclass.d        $w24, $w17
+# CHECKOBJDUMP:        fexupl.w        $w8, $w0
+# CHECKOBJDUMP:        fexupl.d        $w17, $w29
+# CHECKOBJDUMP:        fexupr.w        $w13, $w4
+# CHECKOBJDUMP:        fexupr.d        $w5, $w2
+# CHECKOBJDUMP:        ffint_s.w       $w20, $w29
+# CHECKOBJDUMP:        ffint_s.d       $w12, $w15
+# CHECKOBJDUMP:        ffint_u.w       $w7, $w27
+# CHECKOBJDUMP:        ffint_u.d       $w19, $w16
+# CHECKOBJDUMP:        ffql.w          $w31, $w13
+# CHECKOBJDUMP:        ffql.d          $w12, $w13
+# CHECKOBJDUMP:        ffqr.w          $w27, $w30
+# CHECKOBJDUMP:        ffqr.d          $w30, $w15
+# CHECKOBJDUMP:        flog2.w         $w25, $w31
+# CHECKOBJDUMP:        flog2.d         $w18, $w10
+# CHECKOBJDUMP:        frint.w         $w7, $w15
+# CHECKOBJDUMP:        frint.d         $w21, $w22
+# CHECKOBJDUMP:        frcp.w          $w19, $w0
+# CHECKOBJDUMP:        frcp.d          $w4, $w14
+# CHECKOBJDUMP:        frsqrt.w        $w12, $w17
+# CHECKOBJDUMP:        frsqrt.d        $w23, $w11
+# CHECKOBJDUMP:        fsqrt.w         $w0, $w11
+# CHECKOBJDUMP:        fsqrt.d         $w15, $w12
+# CHECKOBJDUMP:        ftint_s.w       $w30, $w5
+# CHECKOBJDUMP:        ftint_s.d       $w5, $w23
+# CHECKOBJDUMP:        ftint_u.w       $w20, $w14
+# CHECKOBJDUMP:        ftint_u.d       $w23, $w21
+# CHECKOBJDUMP:        ftrunc_s.w      $w29, $w17
+# CHECKOBJDUMP:        ftrunc_s.d      $w12, $w27
+# CHECKOBJDUMP:        ftrunc_u.w      $w17, $w15
+# CHECKOBJDUMP:        ftrunc_u.d      $w5, $w27
+
+                fclass.w        $w26, $w12
+                fclass.d        $w24, $w17
+                fexupl.w        $w8, $w0
+                fexupl.d        $w17, $w29
+                fexupr.w        $w13, $w4
+                fexupr.d        $w5, $w2
+                ffint_s.w       $w20, $w29
+                ffint_s.d       $w12, $w15
+                ffint_u.w       $w7, $w27
+                ffint_u.d       $w19, $w16
+                ffql.w          $w31, $w13
+                ffql.d          $w12, $w13
+                ffqr.w          $w27, $w30
+                ffqr.d          $w30, $w15
+                flog2.w         $w25, $w31
+                flog2.d         $w18, $w10
+                frint.w         $w7, $w15
+                frint.d         $w21, $w22
+                frcp.w          $w19, $w0
+                frcp.d          $w4, $w14
+                frsqrt.w        $w12, $w17
+                frsqrt.d        $w23, $w11
+                fsqrt.w         $w0, $w11
+                fsqrt.d         $w15, $w12
+                ftint_s.w       $w30, $w5
+                ftint_s.d       $w5, $w23
+                ftint_u.w       $w20, $w14
+                ftint_u.d       $w23, $w21
+                ftrunc_s.w      $w29, $w17
+                ftrunc_s.d      $w12, $w27
+                ftrunc_u.w      $w17, $w15
+                ftrunc_u.d      $w5, $w27
diff --git a/test/MC/Mips/msa/test_3r.s b/test/MC/Mips/msa/test_3r.s
new file mode 100644
index 0000000..3047ecb
--- /dev/null
+++ b/test/MC/Mips/msa/test_3r.s
@@ -0,0 +1,732 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        add_a.b         $w26, $w9, $w4                  # encoding: [0x78,0x04,0x4e,0x90]
+# CHECK:        add_a.h         $w23, $w27, $w31                # encoding: [0x78,0x3f,0xdd,0xd0]
+# CHECK:        add_a.w         $w11, $w6, $w22                 # encoding: [0x78,0x56,0x32,0xd0]
+# CHECK:        add_a.d         $w6, $w10, $w0                  # encoding: [0x78,0x60,0x51,0x90]
+# CHECK:        adds_a.b        $w19, $w24, $w19                # encoding: [0x78,0x93,0xc4,0xd0]
+# CHECK:        adds_a.h        $w25, $w6, $w4                  # encoding: [0x78,0xa4,0x36,0x50]
+# CHECK:        adds_a.w        $w25, $w17, $w27                # encoding: [0x78,0xdb,0x8e,0x50]
+# CHECK:        adds_a.d        $w15, $w18, $w26                # encoding: [0x78,0xfa,0x93,0xd0]
+# CHECK:        adds_s.b        $w29, $w11, $w19                # encoding: [0x79,0x13,0x5f,0x50]
+# CHECK:        adds_s.h        $w5, $w23, $w26                 # encoding: [0x79,0x3a,0xb9,0x50]
+# CHECK:        adds_s.w        $w16, $w14, $w13                # encoding: [0x79,0x4d,0x74,0x10]
+# CHECK:        adds_s.d        $w2, $w14, $w28                 # encoding: [0x79,0x7c,0x70,0x90]
+# CHECK:        adds_u.b        $w3, $w17, $w14                 # encoding: [0x79,0x8e,0x88,0xd0]
+# CHECK:        adds_u.h        $w10, $w30, $w4                 # encoding: [0x79,0xa4,0xf2,0x90]
+# CHECK:        adds_u.w        $w15, $w18, $w20                # encoding: [0x79,0xd4,0x93,0xd0]
+# CHECK:        adds_u.d        $w30, $w10, $w9                 # encoding: [0x79,0xe9,0x57,0x90]
+# CHECK:        addv.b          $w24, $w20, $w21                # encoding: [0x78,0x15,0xa6,0x0e]
+# CHECK:        addv.h          $w4, $w13, $w27                 # encoding: [0x78,0x3b,0x69,0x0e]
+# CHECK:        addv.w          $w19, $w11, $w14                # encoding: [0x78,0x4e,0x5c,0xce]
+# CHECK:        addv.d          $w2, $w21, $w31                 # encoding: [0x78,0x7f,0xa8,0x8e]
+# CHECK:        asub_s.b        $w23, $w16, $w3                 # encoding: [0x7a,0x03,0x85,0xd1]
+# CHECK:        asub_s.h        $w22, $w17, $w25                # encoding: [0x7a,0x39,0x8d,0x91]
+# CHECK:        asub_s.w        $w24, $w1, $w9                  # encoding: [0x7a,0x49,0x0e,0x11]
+# CHECK:        asub_s.d        $w13, $w12, $w12                # encoding: [0x7a,0x6c,0x63,0x51]
+# CHECK:        asub_u.b        $w10, $w29, $w11                # encoding: [0x7a,0x8b,0xea,0x91]
+# CHECK:        asub_u.h        $w18, $w9, $w15                 # encoding: [0x7a,0xaf,0x4c,0x91]
+# CHECK:        asub_u.w        $w10, $w19, $w31                # encoding: [0x7a,0xdf,0x9a,0x91]
+# CHECK:        asub_u.d        $w17, $w10, $w0                 # encoding: [0x7a,0xe0,0x54,0x51]
+# CHECK:        ave_s.b         $w2, $w5, $w1                   # encoding: [0x7a,0x01,0x28,0x90]
+# CHECK:        ave_s.h         $w16, $w19, $w9                 # encoding: [0x7a,0x29,0x9c,0x10]
+# CHECK:        ave_s.w         $w17, $w31, $w5                 # encoding: [0x7a,0x45,0xfc,0x50]
+# CHECK:        ave_s.d         $w27, $w25, $w10                # encoding: [0x7a,0x6a,0xce,0xd0]
+# CHECK:        ave_u.b         $w16, $w19, $w9                 # encoding: [0x7a,0x89,0x9c,0x10]
+# CHECK:        ave_u.h         $w28, $w28, $w11                # encoding: [0x7a,0xab,0xe7,0x10]
+# CHECK:        ave_u.w         $w11, $w12, $w11                # encoding: [0x7a,0xcb,0x62,0xd0]
+# CHECK:        ave_u.d         $w30, $w19, $w28                # encoding: [0x7a,0xfc,0x9f,0x90]
+# CHECK:        aver_s.b        $w26, $w16, $w2                 # encoding: [0x7b,0x02,0x86,0x90]
+# CHECK:        aver_s.h        $w31, $w27, $w27                # encoding: [0x7b,0x3b,0xdf,0xd0]
+# CHECK:        aver_s.w        $w28, $w18, $w25                # encoding: [0x7b,0x59,0x97,0x10]
+# CHECK:        aver_s.d        $w29, $w21, $w27                # encoding: [0x7b,0x7b,0xaf,0x50]
+# CHECK:        aver_u.b        $w29, $w26, $w3                 # encoding: [0x7b,0x83,0xd7,0x50]
+# CHECK:        aver_u.h        $w18, $w18, $w9                 # encoding: [0x7b,0xa9,0x94,0x90]
+# CHECK:        aver_u.w        $w17, $w25, $w29                # encoding: [0x7b,0xdd,0xcc,0x50]
+# CHECK:        aver_u.d        $w22, $w22, $w19                # encoding: [0x7b,0xf3,0xb5,0x90]
+# CHECK:        bclr.b          $w2, $w15, $w29                 # encoding: [0x79,0x9d,0x78,0x8d]
+# CHECK:        bclr.h          $w16, $w21, $w28                # encoding: [0x79,0xbc,0xac,0x0d]
+# CHECK:        bclr.w          $w19, $w2, $w9                  # encoding: [0x79,0xc9,0x14,0xcd]
+# CHECK:        bclr.d          $w27, $w31, $w4                 # encoding: [0x79,0xe4,0xfe,0xcd]
+# CHECK:        binsl.b         $w5, $w16, $w24                 # encoding: [0x7b,0x18,0x81,0x4d]
+# CHECK:        binsl.h         $w30, $w5, $w10                 # encoding: [0x7b,0x2a,0x2f,0x8d]
+# CHECK:        binsl.w         $w14, $w15, $w13                # encoding: [0x7b,0x4d,0x7b,0x8d]
+# CHECK:        binsl.d         $w23, $w20, $w12                # encoding: [0x7b,0x6c,0xa5,0xcd]
+# CHECK:        binsr.b         $w22, $w11, $w2                 # encoding: [0x7b,0x82,0x5d,0x8d]
+# CHECK:        binsr.h         $w0, $w26, $w6                  # encoding: [0x7b,0xa6,0xd0,0x0d]
+# CHECK:        binsr.w         $w26, $w3, $w28                 # encoding: [0x7b,0xdc,0x1e,0x8d]
+# CHECK:        binsr.d         $w0, $w0, $w21                  # encoding: [0x7b,0xf5,0x00,0x0d]
+# CHECK:        bneg.b          $w0, $w11, $w24                 # encoding: [0x7a,0x98,0x58,0x0d]
+# CHECK:        bneg.h          $w28, $w16, $w4                 # encoding: [0x7a,0xa4,0x87,0x0d]
+# CHECK:        bneg.w          $w3, $w26, $w19                 # encoding: [0x7a,0xd3,0xd0,0xcd]
+# CHECK:        bneg.d          $w13, $w29, $w15                # encoding: [0x7a,0xef,0xeb,0x4d]
+# CHECK:        bset.b          $w31, $w5, $w31                 # encoding: [0x7a,0x1f,0x2f,0xcd]
+# CHECK:        bset.h          $w14, $w12, $w6                 # encoding: [0x7a,0x26,0x63,0x8d]
+# CHECK:        bset.w          $w31, $w9, $w12                 # encoding: [0x7a,0x4c,0x4f,0xcd]
+# CHECK:        bset.d          $w5, $w22, $w5                  # encoding: [0x7a,0x65,0xb1,0x4d]
+# CHECK:        ceq.b           $w31, $w31, $w18                # encoding: [0x78,0x12,0xff,0xcf]
+# CHECK:        ceq.h           $w10, $w27, $w9                 # encoding: [0x78,0x29,0xda,0x8f]
+# CHECK:        ceq.w           $w9, $w5, $w14                  # encoding: [0x78,0x4e,0x2a,0x4f]
+# CHECK:        ceq.d           $w5, $w17, $w0                  # encoding: [0x78,0x60,0x89,0x4f]
+# CHECK:        cle_s.b         $w23, $w4, $w9                  # encoding: [0x7a,0x09,0x25,0xcf]
+# CHECK:        cle_s.h         $w22, $w27, $w19                # encoding: [0x7a,0x33,0xdd,0x8f]
+# CHECK:        cle_s.w         $w30, $w26, $w10                # encoding: [0x7a,0x4a,0xd7,0x8f]
+# CHECK:        cle_s.d         $w18, $w5, $w10                 # encoding: [0x7a,0x6a,0x2c,0x8f]
+# CHECK:        cle_u.b         $w1, $w25, $w0                  # encoding: [0x7a,0x80,0xc8,0x4f]
+# CHECK:        cle_u.h         $w7, $w0, $w29                  # encoding: [0x7a,0xbd,0x01,0xcf]
+# CHECK:        cle_u.w         $w25, $w18, $w1                 # encoding: [0x7a,0xc1,0x96,0x4f]
+# CHECK:        cle_u.d         $w6, $w0, $w30                  # encoding: [0x7a,0xfe,0x01,0x8f]
+# CHECK:        clt_s.b         $w25, $w2, $w21                 # encoding: [0x79,0x15,0x16,0x4f]
+# CHECK:        clt_s.h         $w2, $w19, $w9                  # encoding: [0x79,0x29,0x98,0x8f]
+# CHECK:        clt_s.w         $w23, $w8, $w16                 # encoding: [0x79,0x50,0x45,0xcf]
+# CHECK:        clt_s.d         $w7, $w30, $w12                 # encoding: [0x79,0x6c,0xf1,0xcf]
+# CHECK:        clt_u.b         $w2, $w31, $w13                 # encoding: [0x79,0x8d,0xf8,0x8f]
+# CHECK:        clt_u.h         $w16, $w31, $w23                # encoding: [0x79,0xb7,0xfc,0x0f]
+# CHECK:        clt_u.w         $w3, $w24, $w9                  # encoding: [0x79,0xc9,0xc0,0xcf]
+# CHECK:        clt_u.d         $w7, $w0, $w1                   # encoding: [0x79,0xe1,0x01,0xcf]
+# CHECK:        div_s.b         $w29, $w3, $w18                 # encoding: [0x7a,0x12,0x1f,0x52]
+# CHECK:        div_s.h         $w17, $w16, $w13                # encoding: [0x7a,0x2d,0x84,0x52]
+# CHECK:        div_s.w         $w4, $w25, $w30                 # encoding: [0x7a,0x5e,0xc9,0x12]
+# CHECK:        div_s.d         $w31, $w9, $w20                 # encoding: [0x7a,0x74,0x4f,0xd2]
+# CHECK:        div_u.b         $w6, $w29, $w10                 # encoding: [0x7a,0x8a,0xe9,0x92]
+# CHECK:        div_u.h         $w24, $w21, $w14                # encoding: [0x7a,0xae,0xae,0x12]
+# CHECK:        div_u.w         $w29, $w14, $w25                # encoding: [0x7a,0xd9,0x77,0x52]
+# CHECK:        div_u.d         $w31, $w1, $w21                 # encoding: [0x7a,0xf5,0x0f,0xd2]
+# CHECK:        dotp_s.h        $w23, $w22, $w25                # encoding: [0x78,0x39,0xb5,0xd3]
+# CHECK:        dotp_s.w        $w20, $w14, $w5                 # encoding: [0x78,0x45,0x75,0x13]
+# CHECK:        dotp_s.d        $w17, $w2, $w22                 # encoding: [0x78,0x76,0x14,0x53]
+# CHECK:        dotp_u.h        $w13, $w2, $w6                  # encoding: [0x78,0xa6,0x13,0x53]
+# CHECK:        dotp_u.w        $w15, $w22, $w21                # encoding: [0x78,0xd5,0xb3,0xd3]
+# CHECK:        dotp_u.d        $w4, $w16, $w26                 # encoding: [0x78,0xfa,0x81,0x13]
+# CHECK:        dpadd_s.h       $w1, $w28, $w22                 # encoding: [0x79,0x36,0xe0,0x53]
+# CHECK:        dpadd_s.w       $w10, $w1, $w12                 # encoding: [0x79,0x4c,0x0a,0x93]
+# CHECK:        dpadd_s.d       $w3, $w21, $w27                 # encoding: [0x79,0x7b,0xa8,0xd3]
+# CHECK:        dpadd_u.h       $w17, $w5, $w20                 # encoding: [0x79,0xb4,0x2c,0x53]
+# CHECK:        dpadd_u.w       $w24, $w8, $w16                 # encoding: [0x79,0xd0,0x46,0x13]
+# CHECK:        dpadd_u.d       $w15, $w29, $w16                # encoding: [0x79,0xf0,0xeb,0xd3]
+# CHECK:        dpsub_s.h       $w4, $w11, $w12                 # encoding: [0x7a,0x2c,0x59,0x13]
+# CHECK:        dpsub_s.w       $w4, $w7, $w6                   # encoding: [0x7a,0x46,0x39,0x13]
+# CHECK:        dpsub_s.d       $w31, $w12, $w28                # encoding: [0x7a,0x7c,0x67,0xd3]
+# CHECK:        dpsub_u.h       $w4, $w25, $w17                 # encoding: [0x7a,0xb1,0xc9,0x13]
+# CHECK:        dpsub_u.w       $w19, $w25, $w16                # encoding: [0x7a,0xd0,0xcc,0xd3]
+# CHECK:        dpsub_u.d       $w7, $w10, $w26                 # encoding: [0x7a,0xfa,0x51,0xd3]
+# CHECK:        hadd_s.h        $w28, $w24, $w2                 # encoding: [0x7a,0x22,0xc7,0x15]
+# CHECK:        hadd_s.w        $w24, $w17, $w11                # encoding: [0x7a,0x4b,0x8e,0x15]
+# CHECK:        hadd_s.d        $w17, $w15, $w20                # encoding: [0x7a,0x74,0x7c,0x55]
+# CHECK:        hadd_u.h        $w12, $w29, $w17                # encoding: [0x7a,0xb1,0xeb,0x15]
+# CHECK:        hadd_u.w        $w9, $w5, $w6                   # encoding: [0x7a,0xc6,0x2a,0x55]
+# CHECK:        hadd_u.d        $w1, $w20, $w6                  # encoding: [0x7a,0xe6,0xa0,0x55]
+# CHECK:        hsub_s.h        $w16, $w14, $w29                # encoding: [0x7b,0x3d,0x74,0x15]
+# CHECK:        hsub_s.w        $w9, $w13, $w11                 # encoding: [0x7b,0x4b,0x6a,0x55]
+# CHECK:        hsub_s.d        $w30, $w18, $w14                # encoding: [0x7b,0x6e,0x97,0x95]
+# CHECK:        hsub_u.h        $w7, $w12, $w14                 # encoding: [0x7b,0xae,0x61,0xd5]
+# CHECK:        hsub_u.w        $w21, $w5, $w5                  # encoding: [0x7b,0xc5,0x2d,0x55]
+# CHECK:        hsub_u.d        $w11, $w12, $w31                # encoding: [0x7b,0xff,0x62,0xd5]
+# CHECK:        ilvev.b         $w18, $w16, $w30                # encoding: [0x7b,0x1e,0x84,0x94]
+# CHECK:        ilvev.h         $w14, $w0, $w13                 # encoding: [0x7b,0x2d,0x03,0x94]
+# CHECK:        ilvev.w         $w12, $w25, $w22                # encoding: [0x7b,0x56,0xcb,0x14]
+# CHECK:        ilvev.d         $w30, $w27, $w3                 # encoding: [0x7b,0x63,0xdf,0x94]
+# CHECK:        ilvl.b          $w29, $w3, $w21                 # encoding: [0x7a,0x15,0x1f,0x54]
+# CHECK:        ilvl.h          $w27, $w10, $w17                # encoding: [0x7a,0x31,0x56,0xd4]
+# CHECK:        ilvl.w          $w6, $w1, $w0                   # encoding: [0x7a,0x40,0x09,0x94]
+# CHECK:        ilvl.d          $w3, $w16, $w24                 # encoding: [0x7a,0x78,0x80,0xd4]
+# CHECK:        ilvod.b         $w11, $w5, $w20                 # encoding: [0x7b,0x94,0x2a,0xd4]
+# CHECK:        ilvod.h         $w18, $w13, $w31                # encoding: [0x7b,0xbf,0x6c,0x94]
+# CHECK:        ilvod.w         $w29, $w16, $w24                # encoding: [0x7b,0xd8,0x87,0x54]
+# CHECK:        ilvod.d         $w22, $w12, $w29                # encoding: [0x7b,0xfd,0x65,0x94]
+# CHECK:        ilvr.b          $w4, $w30, $w6                  # encoding: [0x7a,0x86,0xf1,0x14]
+# CHECK:        ilvr.h          $w28, $w19, $w29                # encoding: [0x7a,0xbd,0x9f,0x14]
+# CHECK:        ilvr.w          $w18, $w20, $w21                # encoding: [0x7a,0xd5,0xa4,0x94]
+# CHECK:        ilvr.d          $w23, $w30, $w12                # encoding: [0x7a,0xec,0xf5,0xd4]
+# CHECK:        maddv.b         $w17, $w31, $w29                # encoding: [0x78,0x9d,0xfc,0x52]
+# CHECK:        maddv.h         $w7, $w24, $w9                  # encoding: [0x78,0xa9,0xc1,0xd2]
+# CHECK:        maddv.w         $w22, $w22, $w20                # encoding: [0x78,0xd4,0xb5,0x92]
+# CHECK:        maddv.d         $w30, $w26, $w20                # encoding: [0x78,0xf4,0xd7,0x92]
+# CHECK:        max_a.b         $w23, $w11, $w23                # encoding: [0x7b,0x17,0x5d,0xce]
+# CHECK:        max_a.h         $w20, $w5, $w30                 # encoding: [0x7b,0x3e,0x2d,0x0e]
+# CHECK:        max_a.w         $w7, $w18, $w30                 # encoding: [0x7b,0x5e,0x91,0xce]
+# CHECK:        max_a.d         $w8, $w8, $w31                  # encoding: [0x7b,0x7f,0x42,0x0e]
+# CHECK:        max_s.b         $w10, $w1, $w19                 # encoding: [0x79,0x13,0x0a,0x8e]
+# CHECK:        max_s.h         $w15, $w29, $w17                # encoding: [0x79,0x31,0xeb,0xce]
+# CHECK:        max_s.w         $w15, $w29, $w14                # encoding: [0x79,0x4e,0xeb,0xce]
+# CHECK:        max_s.d         $w25, $w24, $w3                 # encoding: [0x79,0x63,0xc6,0x4e]
+# CHECK:        max_u.b         $w12, $w24, $w5                 # encoding: [0x79,0x85,0xc3,0x0e]
+# CHECK:        max_u.h         $w5, $w6, $w7                   # encoding: [0x79,0xa7,0x31,0x4e]
+# CHECK:        max_u.w         $w16, $w4, $w7                  # encoding: [0x79,0xc7,0x24,0x0e]
+# CHECK:        max_u.d         $w26, $w12, $w24                # encoding: [0x79,0xf8,0x66,0x8e]
+# CHECK:        min_a.b         $w4, $w26, $w1                  # encoding: [0x7b,0x81,0xd1,0x0e]
+# CHECK:        min_a.h         $w12, $w13, $w31                # encoding: [0x7b,0xbf,0x6b,0x0e]
+# CHECK:        min_a.w         $w28, $w20, $w0                 # encoding: [0x7b,0xc0,0xa7,0x0e]
+# CHECK:        min_a.d         $w12, $w20, $w19                # encoding: [0x7b,0xf3,0xa3,0x0e]
+# CHECK:        min_s.b         $w19, $w3, $w14                 # encoding: [0x7a,0x0e,0x1c,0xce]
+# CHECK:        min_s.h         $w27, $w21, $w8                 # encoding: [0x7a,0x28,0xae,0xce]
+# CHECK:        min_s.w         $w0, $w14, $w30                 # encoding: [0x7a,0x5e,0x70,0x0e]
+# CHECK:        min_s.d         $w6, $w8, $w21                  # encoding: [0x7a,0x75,0x41,0x8e]
+# CHECK:        min_u.b         $w22, $w26, $w8                 # encoding: [0x7a,0x88,0xd5,0x8e]
+# CHECK:        min_u.h         $w7, $w27, $w12                 # encoding: [0x7a,0xac,0xd9,0xce]
+# CHECK:        min_u.w         $w8, $w20, $w14                 # encoding: [0x7a,0xce,0xa2,0x0e]
+# CHECK:        min_u.d         $w26, $w14, $w15                # encoding: [0x7a,0xef,0x76,0x8e]
+# CHECK:        mod_s.b         $w18, $w1, $w26                 # encoding: [0x7b,0x1a,0x0c,0x92]
+# CHECK:        mod_s.h         $w31, $w30, $w28                # encoding: [0x7b,0x3c,0xf7,0xd2]
+# CHECK:        mod_s.w         $w2, $w6, $w13                  # encoding: [0x7b,0x4d,0x30,0x92]
+# CHECK:        mod_s.d         $w21, $w27, $w22                # encoding: [0x7b,0x76,0xdd,0x52]
+# CHECK:        mod_u.b         $w16, $w7, $w13                 # encoding: [0x7b,0x8d,0x3c,0x12]
+# CHECK:        mod_u.h         $w24, $w8, $w7                  # encoding: [0x7b,0xa7,0x46,0x12]
+# CHECK:        mod_u.w         $w30, $w2, $w17                 # encoding: [0x7b,0xd1,0x17,0x92]
+# CHECK:        mod_u.d         $w31, $w2, $w25                 # encoding: [0x7b,0xf9,0x17,0xd2]
+# CHECK:        msubv.b         $w14, $w5, $w12                 # encoding: [0x79,0x0c,0x2b,0x92]
+# CHECK:        msubv.h         $w6, $w7, $w30                  # encoding: [0x79,0x3e,0x39,0x92]
+# CHECK:        msubv.w         $w13, $w2, $w21                 # encoding: [0x79,0x55,0x13,0x52]
+# CHECK:        msubv.d         $w16, $w14, $w27                # encoding: [0x79,0x7b,0x74,0x12]
+# CHECK:        mulv.b          $w20, $w3, $w13                 # encoding: [0x78,0x0d,0x1d,0x12]
+# CHECK:        mulv.h          $w27, $w26, $w14                # encoding: [0x78,0x2e,0xd6,0xd2]
+# CHECK:        mulv.w          $w10, $w29, $w3                 # encoding: [0x78,0x43,0xea,0x92]
+# CHECK:        mulv.d          $w7, $w19, $w29                 # encoding: [0x78,0x7d,0x99,0xd2]
+# CHECK:        pckev.b         $w5, $w27, $w7                  # encoding: [0x79,0x07,0xd9,0x54]
+# CHECK:        pckev.h         $w1, $w4, $w27                  # encoding: [0x79,0x3b,0x20,0x54]
+# CHECK:        pckev.w         $w30, $w20, $w0                 # encoding: [0x79,0x40,0xa7,0x94]
+# CHECK:        pckev.d         $w6, $w1, $w15                  # encoding: [0x79,0x6f,0x09,0x94]
+# CHECK:        pckod.b         $w18, $w28, $w30                # encoding: [0x79,0x9e,0xe4,0x94]
+# CHECK:        pckod.h         $w26, $w5, $w8                  # encoding: [0x79,0xa8,0x2e,0x94]
+# CHECK:        pckod.w         $w9, $w4, $w2                   # encoding: [0x79,0xc2,0x22,0x54]
+# CHECK:        pckod.d         $w30, $w22, $w20                # encoding: [0x79,0xf4,0xb7,0x94]
+# CHECK:        sld.b           $w5, $w23[$12]                  # encoding: [0x78,0x0c,0xb9,0x54]
+# CHECK:        sld.h           $w1, $w23[$3]                   # encoding: [0x78,0x23,0xb8,0x54]
+# CHECK:        sld.w           $w20, $w8[$9]                   # encoding: [0x78,0x49,0x45,0x14]
+# CHECK:        sld.d           $w7, $w23[$fp]                  # encoding: [0x78,0x7e,0xb9,0xd4]
+# CHECK:        sll.b           $w3, $w0, $w17                  # encoding: [0x78,0x11,0x00,0xcd]
+# CHECK:        sll.h           $w17, $w27, $w3                 # encoding: [0x78,0x23,0xdc,0x4d]
+# CHECK:        sll.w           $w16, $w7, $w6                  # encoding: [0x78,0x46,0x3c,0x0d]
+# CHECK:        sll.d           $w9, $w0, $w26                  # encoding: [0x78,0x7a,0x02,0x4d]
+# CHECK:        splat.b         $w28, $w1[$1]                   # encoding: [0x78,0x81,0x0f,0x14]
+# CHECK:        splat.h         $w2, $w11[$11]                  # encoding: [0x78,0xab,0x58,0x94]
+# CHECK:        splat.w         $w22, $w0[$11]                  # encoding: [0x78,0xcb,0x05,0x94]
+# CHECK:        splat.d         $w0, $w0[$2]                    # encoding: [0x78,0xe2,0x00,0x14]
+# CHECK:        sra.b           $w28, $w4, $w17                 # encoding: [0x78,0x91,0x27,0x0d]
+# CHECK:        sra.h           $w13, $w9, $w3                  # encoding: [0x78,0xa3,0x4b,0x4d]
+# CHECK:        sra.w           $w27, $w21, $w19                # encoding: [0x78,0xd3,0xae,0xcd]
+# CHECK:        sra.d           $w30, $w8, $w23                 # encoding: [0x78,0xf7,0x47,0x8d]
+# CHECK:        srar.b          $w19, $w18, $w18                # encoding: [0x78,0x92,0x94,0xd5]
+# CHECK:        srar.h          $w7, $w23, $w8                  # encoding: [0x78,0xa8,0xb9,0xd5]
+# CHECK:        srar.w          $w1, $w12, $w2                  # encoding: [0x78,0xc2,0x60,0x55]
+# CHECK:        srar.d          $w21, $w7, $w14                 # encoding: [0x78,0xee,0x3d,0x55]
+# CHECK:        srl.b           $w12, $w3, $w19                 # encoding: [0x79,0x13,0x1b,0x0d]
+# CHECK:        srl.h           $w23, $w31, $w20                # encoding: [0x79,0x34,0xfd,0xcd]
+# CHECK:        srl.w           $w18, $w27, $w11                # encoding: [0x79,0x4b,0xdc,0x8d]
+# CHECK:        srl.d           $w3, $w12, $w26                 # encoding: [0x79,0x7a,0x60,0xcd]
+# CHECK:        srlr.b          $w15, $w21, $w11                # encoding: [0x79,0x0b,0xab,0xd5]
+# CHECK:        srlr.h          $w21, $w13, $w19                # encoding: [0x79,0x33,0x6d,0x55]
+# CHECK:        srlr.w          $w6, $w30, $w3                  # encoding: [0x79,0x43,0xf1,0x95]
+# CHECK:        srlr.d          $w1, $w2, $w14                  # encoding: [0x79,0x6e,0x10,0x55]
+# CHECK:        subs_s.b        $w25, $w15, $w1                 # encoding: [0x78,0x01,0x7e,0x51]
+# CHECK:        subs_s.h        $w28, $w25, $w22                # encoding: [0x78,0x36,0xcf,0x11]
+# CHECK:        subs_s.w        $w10, $w12, $w21                # encoding: [0x78,0x55,0x62,0x91]
+# CHECK:        subs_s.d        $w4, $w20, $w18                 # encoding: [0x78,0x72,0xa1,0x11]
+# CHECK:        subs_u.b        $w21, $w6, $w25                 # encoding: [0x78,0x99,0x35,0x51]
+# CHECK:        subs_u.h        $w3, $w10, $w7                  # encoding: [0x78,0xa7,0x50,0xd1]
+# CHECK:        subs_u.w        $w9, $w15, $w10                 # encoding: [0x78,0xca,0x7a,0x51]
+# CHECK:        subs_u.d        $w7, $w19, $w10                 # encoding: [0x78,0xea,0x99,0xd1]
+# CHECK:        subsus_u.b      $w6, $w7, $w12                  # encoding: [0x79,0x0c,0x39,0x91]
+# CHECK:        subsus_u.h      $w6, $w29, $w19                 # encoding: [0x79,0x33,0xe9,0x91]
+# CHECK:        subsus_u.w      $w7, $w15, $w7                  # encoding: [0x79,0x47,0x79,0xd1]
+# CHECK:        subsus_u.d      $w9, $w3, $w15                  # encoding: [0x79,0x6f,0x1a,0x51]
+# CHECK:        subsuu_s.b      $w22, $w3, $w31                 # encoding: [0x79,0x9f,0x1d,0x91]
+# CHECK:        subsuu_s.h      $w19, $w23, $w22                # encoding: [0x79,0xb6,0xbc,0xd1]
+# CHECK:        subsuu_s.w      $w9, $w10, $w13                 # encoding: [0x79,0xcd,0x52,0x51]
+# CHECK:        subsuu_s.d      $w5, $w6, $w0                   # encoding: [0x79,0xe0,0x31,0x51]
+# CHECK:        subv.b          $w6, $w13, $w19                 # encoding: [0x78,0x93,0x69,0x8e]
+# CHECK:        subv.h          $w4, $w25, $w12                 # encoding: [0x78,0xac,0xc9,0x0e]
+# CHECK:        subv.w          $w27, $w27, $w11                # encoding: [0x78,0xcb,0xde,0xce]
+# CHECK:        subv.d          $w9, $w24, $w10                 # encoding: [0x78,0xea,0xc2,0x4e]
+# CHECK:        vshf.b          $w3, $w16, $w5                  # encoding: [0x78,0x05,0x80,0xd5]
+# CHECK:        vshf.h          $w20, $w19, $w8                 # encoding: [0x78,0x28,0x9d,0x15]
+# CHECK:        vshf.w          $w16, $w30, $w25                # encoding: [0x78,0x59,0xf4,0x15]
+# CHECK:        vshf.d          $w19, $w11, $w15                # encoding: [0x78,0x6f,0x5c,0xd5]
+
+# CHECKOBJDUMP:        add_a.b         $w26, $w9, $w4
+# CHECKOBJDUMP:        add_a.h         $w23, $w27, $w31
+# CHECKOBJDUMP:        add_a.w         $w11, $w6, $w22
+# CHECKOBJDUMP:        add_a.d         $w6, $w10, $w0
+# CHECKOBJDUMP:        adds_a.b        $w19, $w24, $w19
+# CHECKOBJDUMP:        adds_a.h        $w25, $w6, $w4
+# CHECKOBJDUMP:        adds_a.w        $w25, $w17, $w27
+# CHECKOBJDUMP:        adds_a.d        $w15, $w18, $w26
+# CHECKOBJDUMP:        adds_s.b        $w29, $w11, $w19
+# CHECKOBJDUMP:        adds_s.h        $w5, $w23, $w26
+# CHECKOBJDUMP:        adds_s.w        $w16, $w14, $w13
+# CHECKOBJDUMP:        adds_s.d        $w2, $w14, $w28
+# CHECKOBJDUMP:        adds_u.b        $w3, $w17, $w14
+# CHECKOBJDUMP:        adds_u.h        $w10, $w30, $w4
+# CHECKOBJDUMP:        adds_u.w        $w15, $w18, $w20
+# CHECKOBJDUMP:        adds_u.d        $w30, $w10, $w9
+# CHECKOBJDUMP:        addv.b          $w24, $w20, $w21
+# CHECKOBJDUMP:        addv.h          $w4, $w13, $w27
+# CHECKOBJDUMP:        addv.w          $w19, $w11, $w14
+# CHECKOBJDUMP:        addv.d          $w2, $w21, $w31
+# CHECKOBJDUMP:        asub_s.b        $w23, $w16, $w3
+# CHECKOBJDUMP:        asub_s.h        $w22, $w17, $w25
+# CHECKOBJDUMP:        asub_s.w        $w24, $w1, $w9
+# CHECKOBJDUMP:        asub_s.d        $w13, $w12, $w12
+# CHECKOBJDUMP:        asub_u.b        $w10, $w29, $w11
+# CHECKOBJDUMP:        asub_u.h        $w18, $w9, $w15
+# CHECKOBJDUMP:        asub_u.w        $w10, $w19, $w31
+# CHECKOBJDUMP:        asub_u.d        $w17, $w10, $w0
+# CHECKOBJDUMP:        ave_s.b         $w2, $w5, $w1
+# CHECKOBJDUMP:        ave_s.h         $w16, $w19, $w9
+# CHECKOBJDUMP:        ave_s.w         $w17, $w31, $w5
+# CHECKOBJDUMP:        ave_s.d         $w27, $w25, $w10
+# CHECKOBJDUMP:        ave_u.b         $w16, $w19, $w9
+# CHECKOBJDUMP:        ave_u.h         $w28, $w28, $w11
+# CHECKOBJDUMP:        ave_u.w         $w11, $w12, $w11
+# CHECKOBJDUMP:        ave_u.d         $w30, $w19, $w28
+# CHECKOBJDUMP:        aver_s.b        $w26, $w16, $w2
+# CHECKOBJDUMP:        aver_s.h        $w31, $w27, $w27
+# CHECKOBJDUMP:        aver_s.w        $w28, $w18, $w25
+# CHECKOBJDUMP:        aver_s.d        $w29, $w21, $w27
+# CHECKOBJDUMP:        aver_u.b        $w29, $w26, $w3
+# CHECKOBJDUMP:        aver_u.h        $w18, $w18, $w9
+# CHECKOBJDUMP:        aver_u.w        $w17, $w25, $w29
+# CHECKOBJDUMP:        aver_u.d        $w22, $w22, $w19
+# CHECKOBJDUMP:        bclr.b          $w2, $w15, $w29
+# CHECKOBJDUMP:        bclr.h          $w16, $w21, $w28
+# CHECKOBJDUMP:        bclr.w          $w19, $w2, $w9
+# CHECKOBJDUMP:        bclr.d          $w27, $w31, $w4
+# CHECKOBJDUMP:        binsl.b         $w5, $w16, $w24
+# CHECKOBJDUMP:        binsl.h         $w30, $w5, $w10
+# CHECKOBJDUMP:        binsl.w         $w14, $w15, $w13
+# CHECKOBJDUMP:        binsl.d         $w23, $w20, $w12
+# CHECKOBJDUMP:        binsr.b         $w22, $w11, $w2
+# CHECKOBJDUMP:        binsr.h         $w0, $w26, $w6
+# CHECKOBJDUMP:        binsr.w         $w26, $w3, $w28
+# CHECKOBJDUMP:        binsr.d         $w0, $w0, $w21
+# CHECKOBJDUMP:        bneg.b          $w0, $w11, $w24
+# CHECKOBJDUMP:        bneg.h          $w28, $w16, $w4
+# CHECKOBJDUMP:        bneg.w          $w3, $w26, $w19
+# CHECKOBJDUMP:        bneg.d          $w13, $w29, $w15
+# CHECKOBJDUMP:        bset.b          $w31, $w5, $w31
+# CHECKOBJDUMP:        bset.h          $w14, $w12, $w6
+# CHECKOBJDUMP:        bset.w          $w31, $w9, $w12
+# CHECKOBJDUMP:        bset.d          $w5, $w22, $w5
+# CHECKOBJDUMP:        ceq.b           $w31, $w31, $w18
+# CHECKOBJDUMP:        ceq.h           $w10, $w27, $w9
+# CHECKOBJDUMP:        ceq.w           $w9, $w5, $w14
+# CHECKOBJDUMP:        ceq.d           $w5, $w17, $w0
+# CHECKOBJDUMP:        cle_s.b         $w23, $w4, $w9
+# CHECKOBJDUMP:        cle_s.h         $w22, $w27, $w19
+# CHECKOBJDUMP:        cle_s.w         $w30, $w26, $w10
+# CHECKOBJDUMP:        cle_s.d         $w18, $w5, $w10
+# CHECKOBJDUMP:        cle_u.b         $w1, $w25, $w0
+# CHECKOBJDUMP:        cle_u.h         $w7, $w0, $w29
+# CHECKOBJDUMP:        cle_u.w         $w25, $w18, $w1
+# CHECKOBJDUMP:        cle_u.d         $w6, $w0, $w30
+# CHECKOBJDUMP:        clt_s.b         $w25, $w2, $w21
+# CHECKOBJDUMP:        clt_s.h         $w2, $w19, $w9
+# CHECKOBJDUMP:        clt_s.w         $w23, $w8, $w16
+# CHECKOBJDUMP:        clt_s.d         $w7, $w30, $w12
+# CHECKOBJDUMP:        clt_u.b         $w2, $w31, $w13
+# CHECKOBJDUMP:        clt_u.h         $w16, $w31, $w23
+# CHECKOBJDUMP:        clt_u.w         $w3, $w24, $w9
+# CHECKOBJDUMP:        clt_u.d         $w7, $w0, $w1
+# CHECKOBJDUMP:        div_s.b         $w29, $w3, $w18
+# CHECKOBJDUMP:        div_s.h         $w17, $w16, $w13
+# CHECKOBJDUMP:        div_s.w         $w4, $w25, $w30
+# CHECKOBJDUMP:        div_s.d         $w31, $w9, $w20
+# CHECKOBJDUMP:        div_u.b         $w6, $w29, $w10
+# CHECKOBJDUMP:        div_u.h         $w24, $w21, $w14
+# CHECKOBJDUMP:        div_u.w         $w29, $w14, $w25
+# CHECKOBJDUMP:        div_u.d         $w31, $w1, $w21
+# CHECKOBJDUMP:        dotp_s.h        $w23, $w22, $w25
+# CHECKOBJDUMP:        dotp_s.w        $w20, $w14, $w5
+# CHECKOBJDUMP:        dotp_s.d        $w17, $w2, $w22
+# CHECKOBJDUMP:        dotp_u.h        $w13, $w2, $w6
+# CHECKOBJDUMP:        dotp_u.w        $w15, $w22, $w21
+# CHECKOBJDUMP:        dotp_u.d        $w4, $w16, $w26
+# CHECKOBJDUMP:        dpadd_s.h       $w1, $w28, $w22
+# CHECKOBJDUMP:        dpadd_s.w       $w10, $w1, $w12
+# CHECKOBJDUMP:        dpadd_s.d       $w3, $w21, $w27
+# CHECKOBJDUMP:        dpadd_u.h       $w17, $w5, $w20
+# CHECKOBJDUMP:        dpadd_u.w       $w24, $w8, $w16
+# CHECKOBJDUMP:        dpadd_u.d       $w15, $w29, $w16
+# CHECKOBJDUMP:        dpsub_s.h       $w4, $w11, $w12
+# CHECKOBJDUMP:        dpsub_s.w       $w4, $w7, $w6
+# CHECKOBJDUMP:        dpsub_s.d       $w31, $w12, $w28
+# CHECKOBJDUMP:        dpsub_u.h       $w4, $w25, $w17
+# CHECKOBJDUMP:        dpsub_u.w       $w19, $w25, $w16
+# CHECKOBJDUMP:        dpsub_u.d       $w7, $w10, $w26
+# CHECKOBJDUMP:        hadd_s.h        $w28, $w24, $w2
+# CHECKOBJDUMP:        hadd_s.w        $w24, $w17, $w11
+# CHECKOBJDUMP:        hadd_s.d        $w17, $w15, $w20
+# CHECKOBJDUMP:        hadd_u.h        $w12, $w29, $w17
+# CHECKOBJDUMP:        hadd_u.w        $w9, $w5, $w6
+# CHECKOBJDUMP:        hadd_u.d        $w1, $w20, $w6
+# CHECKOBJDUMP:        hsub_s.h        $w16, $w14, $w29
+# CHECKOBJDUMP:        hsub_s.w        $w9, $w13, $w11
+# CHECKOBJDUMP:        hsub_s.d        $w30, $w18, $w14
+# CHECKOBJDUMP:        hsub_u.h        $w7, $w12, $w14
+# CHECKOBJDUMP:        hsub_u.w        $w21, $w5, $w5
+# CHECKOBJDUMP:        hsub_u.d        $w11, $w12, $w31
+# CHECKOBJDUMP:        ilvev.b         $w18, $w16, $w30
+# CHECKOBJDUMP:        ilvev.h         $w14, $w0, $w13
+# CHECKOBJDUMP:        ilvev.w         $w12, $w25, $w22
+# CHECKOBJDUMP:        ilvev.d         $w30, $w27, $w3
+# CHECKOBJDUMP:        ilvl.b          $w29, $w3, $w21
+# CHECKOBJDUMP:        ilvl.h          $w27, $w10, $w17
+# CHECKOBJDUMP:        ilvl.w          $w6, $w1, $w0
+# CHECKOBJDUMP:        ilvl.d          $w3, $w16, $w24
+# CHECKOBJDUMP:        ilvod.b         $w11, $w5, $w20
+# CHECKOBJDUMP:        ilvod.h         $w18, $w13, $w31
+# CHECKOBJDUMP:        ilvod.w         $w29, $w16, $w24
+# CHECKOBJDUMP:        ilvod.d         $w22, $w12, $w29
+# CHECKOBJDUMP:        ilvr.b          $w4, $w30, $w6
+# CHECKOBJDUMP:        ilvr.h          $w28, $w19, $w29
+# CHECKOBJDUMP:        ilvr.w          $w18, $w20, $w21
+# CHECKOBJDUMP:        ilvr.d          $w23, $w30, $w12
+# CHECKOBJDUMP:        maddv.b         $w17, $w31, $w29
+# CHECKOBJDUMP:        maddv.h         $w7, $w24, $w9
+# CHECKOBJDUMP:        maddv.w         $w22, $w22, $w20
+# CHECKOBJDUMP:        maddv.d         $w30, $w26, $w20
+# CHECKOBJDUMP:        max_a.b         $w23, $w11, $w23
+# CHECKOBJDUMP:        max_a.h         $w20, $w5, $w30
+# CHECKOBJDUMP:        max_a.w         $w7, $w18, $w30
+# CHECKOBJDUMP:        max_a.d         $w8, $w8, $w31
+# CHECKOBJDUMP:        max_s.b         $w10, $w1, $w19
+# CHECKOBJDUMP:        max_s.h         $w15, $w29, $w17
+# CHECKOBJDUMP:        max_s.w         $w15, $w29, $w14
+# CHECKOBJDUMP:        max_s.d         $w25, $w24, $w3
+# CHECKOBJDUMP:        max_u.b         $w12, $w24, $w5
+# CHECKOBJDUMP:        max_u.h         $w5, $w6, $w7
+# CHECKOBJDUMP:        max_u.w         $w16, $w4, $w7
+# CHECKOBJDUMP:        max_u.d         $w26, $w12, $w24
+# CHECKOBJDUMP:        min_a.b         $w4, $w26, $w1
+# CHECKOBJDUMP:        min_a.h         $w12, $w13, $w31
+# CHECKOBJDUMP:        min_a.w         $w28, $w20, $w0
+# CHECKOBJDUMP:        min_a.d         $w12, $w20, $w19
+# CHECKOBJDUMP:        min_s.b         $w19, $w3, $w14
+# CHECKOBJDUMP:        min_s.h         $w27, $w21, $w8
+# CHECKOBJDUMP:        min_s.w         $w0, $w14, $w30
+# CHECKOBJDUMP:        min_s.d         $w6, $w8, $w21
+# CHECKOBJDUMP:        min_u.b         $w22, $w26, $w8
+# CHECKOBJDUMP:        min_u.h         $w7, $w27, $w12
+# CHECKOBJDUMP:        min_u.w         $w8, $w20, $w14
+# CHECKOBJDUMP:        min_u.d         $w26, $w14, $w15
+# CHECKOBJDUMP:        mod_s.b         $w18, $w1, $w26
+# CHECKOBJDUMP:        mod_s.h         $w31, $w30, $w28
+# CHECKOBJDUMP:        mod_s.w         $w2, $w6, $w13
+# CHECKOBJDUMP:        mod_s.d         $w21, $w27, $w22
+# CHECKOBJDUMP:        mod_u.b         $w16, $w7, $w13
+# CHECKOBJDUMP:        mod_u.h         $w24, $w8, $w7
+# CHECKOBJDUMP:        mod_u.w         $w30, $w2, $w17
+# CHECKOBJDUMP:        mod_u.d         $w31, $w2, $w25
+# CHECKOBJDUMP:        msubv.b         $w14, $w5, $w12
+# CHECKOBJDUMP:        msubv.h         $w6, $w7, $w30
+# CHECKOBJDUMP:        msubv.w         $w13, $w2, $w21
+# CHECKOBJDUMP:        msubv.d         $w16, $w14, $w27
+# CHECKOBJDUMP:        mulv.b          $w20, $w3, $w13
+# CHECKOBJDUMP:        mulv.h          $w27, $w26, $w14
+# CHECKOBJDUMP:        mulv.w          $w10, $w29, $w3
+# CHECKOBJDUMP:        mulv.d          $w7, $w19, $w29
+# CHECKOBJDUMP:        pckev.b         $w5, $w27, $w7
+# CHECKOBJDUMP:        pckev.h         $w1, $w4, $w27
+# CHECKOBJDUMP:        pckev.w         $w30, $w20, $w0
+# CHECKOBJDUMP:        pckev.d         $w6, $w1, $w15
+# CHECKOBJDUMP:        pckod.b         $w18, $w28, $w30
+# CHECKOBJDUMP:        pckod.h         $w26, $w5, $w8
+# CHECKOBJDUMP:        pckod.w         $w9, $w4, $w2
+# CHECKOBJDUMP:        pckod.d         $w30, $w22, $w20
+# CHECKOBJDUMP:        sld.b           $w5, $w23[$12]
+# CHECKOBJDUMP:        sld.h           $w1, $w23[$3]
+# CHECKOBJDUMP:        sld.w           $w20, $w8[$9]
+# CHECKOBJDUMP:        sld.d           $w7, $w23[$fp]
+# CHECKOBJDUMP:        sll.b           $w3, $w0, $w17
+# CHECKOBJDUMP:        sll.h           $w17, $w27, $w3
+# CHECKOBJDUMP:        sll.w           $w16, $w7, $w6
+# CHECKOBJDUMP:        sll.d           $w9, $w0, $w26
+# CHECKOBJDUMP:        splat.b         $w28, $w1[$1]
+# CHECKOBJDUMP:        splat.h         $w2, $w11[$11]
+# CHECKOBJDUMP:        splat.w         $w22, $w0[$11]
+# CHECKOBJDUMP:        splat.d         $w0, $w0[$2]
+# CHECKOBJDUMP:        sra.b           $w28, $w4, $w17
+# CHECKOBJDUMP:        sra.h           $w13, $w9, $w3
+# CHECKOBJDUMP:        sra.w           $w27, $w21, $w19
+# CHECKOBJDUMP:        sra.d           $w30, $w8, $w23
+# CHECKOBJDUMP:        srar.b          $w19, $w18, $w18
+# CHECKOBJDUMP:        srar.h          $w7, $w23, $w8
+# CHECKOBJDUMP:        srar.w          $w1, $w12, $w2
+# CHECKOBJDUMP:        srar.d          $w21, $w7, $w14
+# CHECKOBJDUMP:        srl.b           $w12, $w3, $w19
+# CHECKOBJDUMP:        srl.h           $w23, $w31, $w20
+# CHECKOBJDUMP:        srl.w           $w18, $w27, $w11
+# CHECKOBJDUMP:        srl.d           $w3, $w12, $w26
+# CHECKOBJDUMP:        srlr.b          $w15, $w21, $w11
+# CHECKOBJDUMP:        srlr.h          $w21, $w13, $w19
+# CHECKOBJDUMP:        srlr.w          $w6, $w30, $w3
+# CHECKOBJDUMP:        srlr.d          $w1, $w2, $w14
+# CHECKOBJDUMP:        subs_s.b        $w25, $w15, $w1
+# CHECKOBJDUMP:        subs_s.h        $w28, $w25, $w22
+# CHECKOBJDUMP:        subs_s.w        $w10, $w12, $w21
+# CHECKOBJDUMP:        subs_s.d        $w4, $w20, $w18
+# CHECKOBJDUMP:        subs_u.b        $w21, $w6, $w25
+# CHECKOBJDUMP:        subs_u.h        $w3, $w10, $w7
+# CHECKOBJDUMP:        subs_u.w        $w9, $w15, $w10
+# CHECKOBJDUMP:        subs_u.d        $w7, $w19, $w10
+# CHECKOBJDUMP:        subsus_u.b      $w6, $w7, $w12
+# CHECKOBJDUMP:        subsus_u.h      $w6, $w29, $w19
+# CHECKOBJDUMP:        subsus_u.w      $w7, $w15, $w7
+# CHECKOBJDUMP:        subsus_u.d      $w9, $w3, $w15
+# CHECKOBJDUMP:        subsuu_s.b      $w22, $w3, $w31
+# CHECKOBJDUMP:        subsuu_s.h      $w19, $w23, $w22
+# CHECKOBJDUMP:        subsuu_s.w      $w9, $w10, $w13
+# CHECKOBJDUMP:        subsuu_s.d      $w5, $w6, $w0
+# CHECKOBJDUMP:        subv.b          $w6, $w13, $w19
+# CHECKOBJDUMP:        subv.h          $w4, $w25, $w12
+# CHECKOBJDUMP:        subv.w          $w27, $w27, $w11
+# CHECKOBJDUMP:        subv.d          $w9, $w24, $w10
+# CHECKOBJDUMP:        vshf.b          $w3, $w16, $w5
+# CHECKOBJDUMP:        vshf.h          $w20, $w19, $w8
+# CHECKOBJDUMP:        vshf.w          $w16, $w30, $w25
+# CHECKOBJDUMP:        vshf.d          $w19, $w11, $w15
+
+                add_a.b         $w26, $w9, $w4
+                add_a.h         $w23, $w27, $w31
+                add_a.w         $w11, $w6, $w22
+                add_a.d         $w6, $w10, $w0
+                adds_a.b        $w19, $w24, $w19
+                adds_a.h        $w25, $w6, $w4
+                adds_a.w        $w25, $w17, $w27
+                adds_a.d        $w15, $w18, $w26
+                adds_s.b        $w29, $w11, $w19
+                adds_s.h        $w5, $w23, $w26
+                adds_s.w        $w16, $w14, $w13
+                adds_s.d        $w2, $w14, $w28
+                adds_u.b        $w3, $w17, $w14
+                adds_u.h        $w10, $w30, $w4
+                adds_u.w        $w15, $w18, $w20
+                adds_u.d        $w30, $w10, $w9
+                addv.b          $w24, $w20, $w21
+                addv.h          $w4, $w13, $w27
+                addv.w          $w19, $w11, $w14
+                addv.d          $w2, $w21, $w31
+                asub_s.b        $w23, $w16, $w3
+                asub_s.h        $w22, $w17, $w25
+                asub_s.w        $w24, $w1, $w9
+                asub_s.d        $w13, $w12, $w12
+                asub_u.b        $w10, $w29, $w11
+                asub_u.h        $w18, $w9, $w15
+                asub_u.w        $w10, $w19, $w31
+                asub_u.d        $w17, $w10, $w0
+                ave_s.b         $w2, $w5, $w1
+                ave_s.h         $w16, $w19, $w9
+                ave_s.w         $w17, $w31, $w5
+                ave_s.d         $w27, $w25, $w10
+                ave_u.b         $w16, $w19, $w9
+                ave_u.h         $w28, $w28, $w11
+                ave_u.w         $w11, $w12, $w11
+                ave_u.d         $w30, $w19, $w28
+                aver_s.b        $w26, $w16, $w2
+                aver_s.h        $w31, $w27, $w27
+                aver_s.w        $w28, $w18, $w25
+                aver_s.d        $w29, $w21, $w27
+                aver_u.b        $w29, $w26, $w3
+                aver_u.h        $w18, $w18, $w9
+                aver_u.w        $w17, $w25, $w29
+                aver_u.d        $w22, $w22, $w19
+                bclr.b          $w2, $w15, $w29
+                bclr.h          $w16, $w21, $w28
+                bclr.w          $w19, $w2, $w9
+                bclr.d          $w27, $w31, $w4
+                binsl.b         $w5, $w16, $w24
+                binsl.h         $w30, $w5, $w10
+                binsl.w         $w14, $w15, $w13
+                binsl.d         $w23, $w20, $w12
+                binsr.b         $w22, $w11, $w2
+                binsr.h         $w0, $w26, $w6
+                binsr.w         $w26, $w3, $w28
+                binsr.d         $w0, $w0, $w21
+                bneg.b          $w0, $w11, $w24
+                bneg.h          $w28, $w16, $w4
+                bneg.w          $w3, $w26, $w19
+                bneg.d          $w13, $w29, $w15
+                bset.b          $w31, $w5, $w31
+                bset.h          $w14, $w12, $w6
+                bset.w          $w31, $w9, $w12
+                bset.d          $w5, $w22, $w5
+                ceq.b           $w31, $w31, $w18
+                ceq.h           $w10, $w27, $w9
+                ceq.w           $w9, $w5, $w14
+                ceq.d           $w5, $w17, $w0
+                cle_s.b         $w23, $w4, $w9
+                cle_s.h         $w22, $w27, $w19
+                cle_s.w         $w30, $w26, $w10
+                cle_s.d         $w18, $w5, $w10
+                cle_u.b         $w1, $w25, $w0
+                cle_u.h         $w7, $w0, $w29
+                cle_u.w         $w25, $w18, $w1
+                cle_u.d         $w6, $w0, $w30
+                clt_s.b         $w25, $w2, $w21
+                clt_s.h         $w2, $w19, $w9
+                clt_s.w         $w23, $w8, $w16
+                clt_s.d         $w7, $w30, $w12
+                clt_u.b         $w2, $w31, $w13
+                clt_u.h         $w16, $w31, $w23
+                clt_u.w         $w3, $w24, $w9
+                clt_u.d         $w7, $w0, $w1
+                div_s.b         $w29, $w3, $w18
+                div_s.h         $w17, $w16, $w13
+                div_s.w         $w4, $w25, $w30
+                div_s.d         $w31, $w9, $w20
+                div_u.b         $w6, $w29, $w10
+                div_u.h         $w24, $w21, $w14
+                div_u.w         $w29, $w14, $w25
+                div_u.d         $w31, $w1, $w21
+                dotp_s.h        $w23, $w22, $w25
+                dotp_s.w        $w20, $w14, $w5
+                dotp_s.d        $w17, $w2, $w22
+                dotp_u.h        $w13, $w2, $w6
+                dotp_u.w        $w15, $w22, $w21
+                dotp_u.d        $w4, $w16, $w26
+                dpadd_s.h       $w1, $w28, $w22
+                dpadd_s.w       $w10, $w1, $w12
+                dpadd_s.d       $w3, $w21, $w27
+                dpadd_u.h       $w17, $w5, $w20
+                dpadd_u.w       $w24, $w8, $w16
+                dpadd_u.d       $w15, $w29, $w16
+                dpsub_s.h       $w4, $w11, $w12
+                dpsub_s.w       $w4, $w7, $w6
+                dpsub_s.d       $w31, $w12, $w28
+                dpsub_u.h       $w4, $w25, $w17
+                dpsub_u.w       $w19, $w25, $w16
+                dpsub_u.d       $w7, $w10, $w26
+                hadd_s.h        $w28, $w24, $w2
+                hadd_s.w        $w24, $w17, $w11
+                hadd_s.d        $w17, $w15, $w20
+                hadd_u.h        $w12, $w29, $w17
+                hadd_u.w        $w9, $w5, $w6
+                hadd_u.d        $w1, $w20, $w6
+                hsub_s.h        $w16, $w14, $w29
+                hsub_s.w        $w9, $w13, $w11
+                hsub_s.d        $w30, $w18, $w14
+                hsub_u.h        $w7, $w12, $w14
+                hsub_u.w        $w21, $w5, $w5
+                hsub_u.d        $w11, $w12, $w31
+                ilvev.b         $w18, $w16, $w30
+                ilvev.h         $w14, $w0, $w13
+                ilvev.w         $w12, $w25, $w22
+                ilvev.d         $w30, $w27, $w3
+                ilvl.b          $w29, $w3, $w21
+                ilvl.h          $w27, $w10, $w17
+                ilvl.w          $w6, $w1, $w0
+                ilvl.d          $w3, $w16, $w24
+                ilvod.b         $w11, $w5, $w20
+                ilvod.h         $w18, $w13, $w31
+                ilvod.w         $w29, $w16, $w24
+                ilvod.d         $w22, $w12, $w29
+                ilvr.b          $w4, $w30, $w6
+                ilvr.h          $w28, $w19, $w29
+                ilvr.w          $w18, $w20, $w21
+                ilvr.d          $w23, $w30, $w12
+                maddv.b         $w17, $w31, $w29
+                maddv.h         $w7, $w24, $w9
+                maddv.w         $w22, $w22, $w20
+                maddv.d         $w30, $w26, $w20
+                max_a.b         $w23, $w11, $w23
+                max_a.h         $w20, $w5, $w30
+                max_a.w         $w7, $w18, $w30
+                max_a.d         $w8, $w8, $w31
+                max_s.b         $w10, $w1, $w19
+                max_s.h         $w15, $w29, $w17
+                max_s.w         $w15, $w29, $w14
+                max_s.d         $w25, $w24, $w3
+                max_u.b         $w12, $w24, $w5
+                max_u.h         $w5, $w6, $w7
+                max_u.w         $w16, $w4, $w7
+                max_u.d         $w26, $w12, $w24
+                min_a.b         $w4, $w26, $w1
+                min_a.h         $w12, $w13, $w31
+                min_a.w         $w28, $w20, $w0
+                min_a.d         $w12, $w20, $w19
+                min_s.b         $w19, $w3, $w14
+                min_s.h         $w27, $w21, $w8
+                min_s.w         $w0, $w14, $w30
+                min_s.d         $w6, $w8, $w21
+                min_u.b         $w22, $w26, $w8
+                min_u.h         $w7, $w27, $w12
+                min_u.w         $w8, $w20, $w14
+                min_u.d         $w26, $w14, $w15
+                mod_s.b         $w18, $w1, $w26
+                mod_s.h         $w31, $w30, $w28
+                mod_s.w         $w2, $w6, $w13
+                mod_s.d         $w21, $w27, $w22
+                mod_u.b         $w16, $w7, $w13
+                mod_u.h         $w24, $w8, $w7
+                mod_u.w         $w30, $w2, $w17
+                mod_u.d         $w31, $w2, $w25
+                msubv.b         $w14, $w5, $w12
+                msubv.h         $w6, $w7, $w30
+                msubv.w         $w13, $w2, $w21
+                msubv.d         $w16, $w14, $w27
+                mulv.b          $w20, $w3, $w13
+                mulv.h          $w27, $w26, $w14
+                mulv.w          $w10, $w29, $w3
+                mulv.d          $w7, $w19, $w29
+                pckev.b         $w5, $w27, $w7
+                pckev.h         $w1, $w4, $w27
+                pckev.w         $w30, $w20, $w0
+                pckev.d         $w6, $w1, $w15
+                pckod.b         $w18, $w28, $w30
+                pckod.h         $w26, $w5, $w8
+                pckod.w         $w9, $w4, $w2
+                pckod.d         $w30, $w22, $w20
+                sld.b           $w5, $w23[$12]
+                sld.h           $w1, $w23[$3]
+                sld.w           $w20, $w8[$9]
+                sld.d           $w7, $w23[$30]
+                sll.b           $w3, $w0, $w17
+                sll.h           $w17, $w27, $w3
+                sll.w           $w16, $w7, $w6
+                sll.d           $w9, $w0, $w26
+                splat.b         $w28, $w1[$1]
+                splat.h         $w2, $w11[$11]
+                splat.w         $w22, $w0[$11]
+                splat.d         $w0, $w0[$2]
+                sra.b           $w28, $w4, $w17
+                sra.h           $w13, $w9, $w3
+                sra.w           $w27, $w21, $w19
+                sra.d           $w30, $w8, $w23
+                srar.b          $w19, $w18, $w18
+                srar.h          $w7, $w23, $w8
+                srar.w          $w1, $w12, $w2
+                srar.d          $w21, $w7, $w14
+                srl.b           $w12, $w3, $w19
+                srl.h           $w23, $w31, $w20
+                srl.w           $w18, $w27, $w11
+                srl.d           $w3, $w12, $w26
+                srlr.b          $w15, $w21, $w11
+                srlr.h          $w21, $w13, $w19
+                srlr.w          $w6, $w30, $w3
+                srlr.d          $w1, $w2, $w14
+                subs_s.b        $w25, $w15, $w1
+                subs_s.h        $w28, $w25, $w22
+                subs_s.w        $w10, $w12, $w21
+                subs_s.d        $w4, $w20, $w18
+                subs_u.b        $w21, $w6, $w25
+                subs_u.h        $w3, $w10, $w7
+                subs_u.w        $w9, $w15, $w10
+                subs_u.d        $w7, $w19, $w10
+                subsus_u.b      $w6, $w7, $w12
+                subsus_u.h      $w6, $w29, $w19
+                subsus_u.w      $w7, $w15, $w7
+                subsus_u.d      $w9, $w3, $w15
+                subsuu_s.b      $w22, $w3, $w31
+                subsuu_s.h      $w19, $w23, $w22
+                subsuu_s.w      $w9, $w10, $w13
+                subsuu_s.d      $w5, $w6, $w0
+                subv.b          $w6, $w13, $w19
+                subv.h          $w4, $w25, $w12
+                subv.w          $w27, $w27, $w11
+                subv.d          $w9, $w24, $w10
+                vshf.b          $w3, $w16, $w5
+                vshf.h          $w20, $w19, $w8
+                vshf.w          $w16, $w30, $w25
+                vshf.d          $w19, $w11, $w15
diff --git a/test/MC/Mips/msa/test_3rf.s b/test/MC/Mips/msa/test_3rf.s
new file mode 100644
index 0000000..f45557e
--- /dev/null
+++ b/test/MC/Mips/msa/test_3rf.s
@@ -0,0 +1,252 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        fadd.w          $w28, $w19, $w28        # encoding: [0x78,0x1c,0x9f,0x1b]
+# CHECK:        fadd.d          $w13, $w2, $w29         # encoding: [0x78,0x3d,0x13,0x5b]
+# CHECK:        fcaf.w          $w14, $w11, $w25        # encoding: [0x78,0x19,0x5b,0x9a]
+# CHECK:        fcaf.d          $w1, $w1, $w19          # encoding: [0x78,0x33,0x08,0x5a]
+# CHECK:        fceq.w          $w1, $w23, $w16         # encoding: [0x78,0x90,0xb8,0x5a]
+# CHECK:        fceq.d          $w0, $w8, $w16          # encoding: [0x78,0xb0,0x40,0x1a]
+# CHECK:        fcle.w          $w16, $w9, $w24         # encoding: [0x79,0x98,0x4c,0x1a]
+# CHECK:        fcle.d          $w27, $w14, $w1         # encoding: [0x79,0xa1,0x76,0xda]
+# CHECK:        fclt.w          $w28, $w8, $w8          # encoding: [0x79,0x08,0x47,0x1a]
+# CHECK:        fclt.d          $w30, $w25, $w11        # encoding: [0x79,0x2b,0xcf,0x9a]
+# CHECK:        fcne.w          $w2, $w18, $w23         # encoding: [0x78,0xd7,0x90,0x9c]
+# CHECK:        fcne.d          $w14, $w20, $w15        # encoding: [0x78,0xef,0xa3,0x9c]
+# CHECK:        fcor.w          $w10, $w18, $w25        # encoding: [0x78,0x59,0x92,0x9c]
+# CHECK:        fcor.d          $w17, $w25, $w11        # encoding: [0x78,0x6b,0xcc,0x5c]
+# CHECK:        fcueq.w         $w14, $w2, $w21         # encoding: [0x78,0xd5,0x13,0x9a]
+# CHECK:        fcueq.d         $w29, $w3, $w7          # encoding: [0x78,0xe7,0x1f,0x5a]
+# CHECK:        fcule.w         $w17, $w5, $w3          # encoding: [0x79,0xc3,0x2c,0x5a]
+# CHECK:        fcule.d         $w31, $w1, $w30         # encoding: [0x79,0xfe,0x0f,0xda]
+# CHECK:        fcult.w         $w6, $w25, $w9          # encoding: [0x79,0x49,0xc9,0x9a]
+# CHECK:        fcult.d         $w27, $w8, $w17         # encoding: [0x79,0x71,0x46,0xda]
+# CHECK:        fcun.w          $w4, $w20, $w8          # encoding: [0x78,0x48,0xa1,0x1a]
+# CHECK:        fcun.d          $w29, $w11, $w3         # encoding: [0x78,0x63,0x5f,0x5a]
+# CHECK:        fcune.w         $w13, $w18, $w19        # encoding: [0x78,0x93,0x93,0x5c]
+# CHECK:        fcune.d         $w16, $w26, $w21        # encoding: [0x78,0xb5,0xd4,0x1c]
+# CHECK:        fdiv.w          $w13, $w24, $w2         # encoding: [0x78,0xc2,0xc3,0x5b]
+# CHECK:        fdiv.d          $w19, $w4, $w25         # encoding: [0x78,0xf9,0x24,0xdb]
+# CHECK:        fexdo.h         $w8, $w0, $w16          # encoding: [0x7a,0x10,0x02,0x1b]
+# CHECK:        fexdo.w         $w0, $w13, $w27         # encoding: [0x7a,0x3b,0x68,0x1b]
+# CHECK:        fexp2.w         $w17, $w0, $w3          # encoding: [0x79,0xc3,0x04,0x5b]
+# CHECK:        fexp2.d         $w22, $w0, $w10         # encoding: [0x79,0xea,0x05,0x9b]
+# CHECK:        fmadd.w         $w29, $w6, $w23         # encoding: [0x79,0x17,0x37,0x5b]
+# CHECK:        fmadd.d         $w11, $w28, $w21        # encoding: [0x79,0x35,0xe2,0xdb]
+# CHECK:        fmax.w          $w0, $w23, $w13         # encoding: [0x7b,0x8d,0xb8,0x1b]
+# CHECK:        fmax.d          $w26, $w18, $w8         # encoding: [0x7b,0xa8,0x96,0x9b]
+# CHECK:        fmax_a.w        $w10, $w16, $w10        # encoding: [0x7b,0xca,0x82,0x9b]
+# CHECK:        fmax_a.d        $w30, $w9, $w22         # encoding: [0x7b,0xf6,0x4f,0x9b]
+# CHECK:        fmin.w          $w24, $w1, $w30         # encoding: [0x7b,0x1e,0x0e,0x1b]
+# CHECK:        fmin.d          $w27, $w27, $w10        # encoding: [0x7b,0x2a,0xde,0xdb]
+# CHECK:        fmin_a.w        $w10, $w29, $w20        # encoding: [0x7b,0x54,0xea,0x9b]
+# CHECK:        fmin_a.d        $w13, $w30, $w24        # encoding: [0x7b,0x78,0xf3,0x5b]
+# CHECK:        fmsub.w         $w17, $w25, $w0         # encoding: [0x79,0x40,0xcc,0x5b]
+# CHECK:        fmsub.d         $w8, $w18, $w16         # encoding: [0x79,0x70,0x92,0x1b]
+# CHECK:        fmul.w          $w3, $w15, $w15         # encoding: [0x78,0x8f,0x78,0xdb]
+# CHECK:        fmul.d          $w9, $w30, $w10         # encoding: [0x78,0xaa,0xf2,0x5b]
+# CHECK:        fsaf.w          $w25, $w5, $w10         # encoding: [0x7a,0x0a,0x2e,0x5a]
+# CHECK:        fsaf.d          $w25, $w3, $w29         # encoding: [0x7a,0x3d,0x1e,0x5a]
+# CHECK:        fseq.w          $w11, $w17, $w13        # encoding: [0x7a,0x8d,0x8a,0xda]
+# CHECK:        fseq.d          $w29, $w0, $w31         # encoding: [0x7a,0xbf,0x07,0x5a]
+# CHECK:        fsle.w          $w30, $w31, $w31        # encoding: [0x7b,0x9f,0xff,0x9a]
+# CHECK:        fsle.d          $w18, $w23, $w24        # encoding: [0x7b,0xb8,0xbc,0x9a]
+# CHECK:        fslt.w          $w12, $w5, $w6          # encoding: [0x7b,0x06,0x2b,0x1a]
+# CHECK:        fslt.d          $w16, $w26, $w21        # encoding: [0x7b,0x35,0xd4,0x1a]
+# CHECK:        fsne.w          $w30, $w1, $w12         # encoding: [0x7a,0xcc,0x0f,0x9c]
+# CHECK:        fsne.d          $w14, $w13, $w23        # encoding: [0x7a,0xf7,0x6b,0x9c]
+# CHECK:        fsor.w          $w27, $w13, $w27        # encoding: [0x7a,0x5b,0x6e,0xdc]
+# CHECK:        fsor.d          $w12, $w24, $w11        # encoding: [0x7a,0x6b,0xc3,0x1c]
+# CHECK:        fsub.w          $w31, $w26, $w1         # encoding: [0x78,0x41,0xd7,0xdb]
+# CHECK:        fsub.d          $w19, $w17, $w27        # encoding: [0x78,0x7b,0x8c,0xdb]
+# CHECK:        fsueq.w         $w16, $w24, $w25        # encoding: [0x7a,0xd9,0xc4,0x1a]
+# CHECK:        fsueq.d         $w18, $w14, $w14        # encoding: [0x7a,0xee,0x74,0x9a]
+# CHECK:        fsule.w         $w23, $w30, $w13        # encoding: [0x7b,0xcd,0xf5,0xda]
+# CHECK:        fsule.d         $w2, $w11, $w26         # encoding: [0x7b,0xfa,0x58,0x9a]
+# CHECK:        fsult.w         $w11, $w26, $w22        # encoding: [0x7b,0x56,0xd2,0xda]
+# CHECK:        fsult.d         $w6, $w23, $w30         # encoding: [0x7b,0x7e,0xb9,0x9a]
+# CHECK:        fsun.w          $w3, $w18, $w28         # encoding: [0x7a,0x5c,0x90,0xda]
+# CHECK:        fsun.d          $w18, $w11, $w19        # encoding: [0x7a,0x73,0x5c,0x9a]
+# CHECK:        fsune.w         $w16, $w31, $w2         # encoding: [0x7a,0x82,0xfc,0x1c]
+# CHECK:        fsune.d         $w3, $w26, $w17         # encoding: [0x7a,0xb1,0xd0,0xdc]
+# CHECK:        ftq.h           $w16, $w4, $w24         # encoding: [0x7a,0x98,0x24,0x1b]
+# CHECK:        ftq.w           $w5, $w5, $w25          # encoding: [0x7a,0xb9,0x29,0x5b]
+# CHECK:        madd_q.h        $w16, $w20, $w10        # encoding: [0x79,0x4a,0xa4,0x1c]
+# CHECK:        madd_q.w        $w28, $w2, $w9          # encoding: [0x79,0x69,0x17,0x1c]
+# CHECK:        maddr_q.h       $w8, $w18, $w9          # encoding: [0x7b,0x49,0x92,0x1c]
+# CHECK:        maddr_q.w       $w29, $w12, $w16        # encoding: [0x7b,0x70,0x67,0x5c]
+# CHECK:        msub_q.h        $w24, $w26, $w10        # encoding: [0x79,0x8a,0xd6,0x1c]
+# CHECK:        msub_q.w        $w13, $w30, $w28        # encoding: [0x79,0xbc,0xf3,0x5c]
+# CHECK:        msubr_q.h       $w12, $w21, $w11        # encoding: [0x7b,0x8b,0xab,0x1c]
+# CHECK:        msubr_q.w       $w1, $w14, $w20         # encoding: [0x7b,0xb4,0x70,0x5c]
+# CHECK:        mul_q.h         $w6, $w16, $w30         # encoding: [0x79,0x1e,0x81,0x9c]
+# CHECK:        mul_q.w         $w16, $w1, $w4          # encoding: [0x79,0x24,0x0c,0x1c]
+# CHECK:        mulr_q.h        $w6, $w20, $w19         # encoding: [0x7b,0x13,0xa1,0x9c]
+# CHECK:        mulr_q.w        $w27, $w1, $w20         # encoding: [0x7b,0x34,0x0e,0xdc]
+
+# CHECKOBJDUMP:        fadd.w          $w28, $w19, $w28
+# CHECKOBJDUMP:        fadd.d          $w13, $w2, $w29
+# CHECKOBJDUMP:        fcaf.w          $w14, $w11, $w25
+# CHECKOBJDUMP:        fcaf.d          $w1, $w1, $w19
+# CHECKOBJDUMP:        fceq.w          $w1, $w23, $w16
+# CHECKOBJDUMP:        fceq.d          $w0, $w8, $w16
+# CHECKOBJDUMP:        fcle.w          $w16, $w9, $w24
+# CHECKOBJDUMP:        fcle.d          $w27, $w14, $w1
+# CHECKOBJDUMP:        fclt.w          $w28, $w8, $w8
+# CHECKOBJDUMP:        fclt.d          $w30, $w25, $w11
+# CHECKOBJDUMP:        fcne.w          $w2, $w18, $w23
+# CHECKOBJDUMP:        fcne.d          $w14, $w20, $w15
+# CHECKOBJDUMP:        fcor.w          $w10, $w18, $w25
+# CHECKOBJDUMP:        fcor.d          $w17, $w25, $w11
+# CHECKOBJDUMP:        fcueq.w         $w14, $w2, $w21
+# CHECKOBJDUMP:        fcueq.d         $w29, $w3, $w7
+# CHECKOBJDUMP:        fcule.w         $w17, $w5, $w3
+# CHECKOBJDUMP:        fcule.d         $w31, $w1, $w30
+# CHECKOBJDUMP:        fcult.w         $w6, $w25, $w9
+# CHECKOBJDUMP:        fcult.d         $w27, $w8, $w17
+# CHECKOBJDUMP:        fcun.w          $w4, $w20, $w8
+# CHECKOBJDUMP:        fcun.d          $w29, $w11, $w3
+# CHECKOBJDUMP:        fcune.w         $w13, $w18, $w19
+# CHECKOBJDUMP:        fcune.d         $w16, $w26, $w21
+# CHECKOBJDUMP:        fdiv.w          $w13, $w24, $w2
+# CHECKOBJDUMP:        fdiv.d          $w19, $w4, $w25
+# CHECKOBJDUMP:        fexdo.h         $w8, $w0, $w16
+# CHECKOBJDUMP:        fexdo.w         $w0, $w13, $w27
+# CHECKOBJDUMP:        fexp2.w         $w17, $w0, $w3
+# CHECKOBJDUMP:        fexp2.d         $w22, $w0, $w10
+# CHECKOBJDUMP:        fmadd.w         $w29, $w6, $w23
+# CHECKOBJDUMP:        fmadd.d         $w11, $w28, $w21
+# CHECKOBJDUMP:        fmax.w          $w0, $w23, $w13
+# CHECKOBJDUMP:        fmax.d          $w26, $w18, $w8
+# CHECKOBJDUMP:        fmax_a.w        $w10, $w16, $w10
+# CHECKOBJDUMP:        fmax_a.d        $w30, $w9, $w22
+# CHECKOBJDUMP:        fmin.w          $w24, $w1, $w30
+# CHECKOBJDUMP:        fmin.d          $w27, $w27, $w10
+# CHECKOBJDUMP:        fmin_a.w        $w10, $w29, $w20
+# CHECKOBJDUMP:        fmin_a.d        $w13, $w30, $w24
+# CHECKOBJDUMP:        fmsub.w         $w17, $w25, $w0
+# CHECKOBJDUMP:        fmsub.d         $w8, $w18, $w16
+# CHECKOBJDUMP:        fmul.w          $w3, $w15, $w15
+# CHECKOBJDUMP:        fmul.d          $w9, $w30, $w10
+# CHECKOBJDUMP:        fsaf.w          $w25, $w5, $w10
+# CHECKOBJDUMP:        fsaf.d          $w25, $w3, $w29
+# CHECKOBJDUMP:        fseq.w          $w11, $w17, $w13
+# CHECKOBJDUMP:        fseq.d          $w29, $w0, $w31
+# CHECKOBJDUMP:        fsle.w          $w30, $w31, $w31
+# CHECKOBJDUMP:        fsle.d          $w18, $w23, $w24
+# CHECKOBJDUMP:        fslt.w          $w12, $w5, $w6
+# CHECKOBJDUMP:        fslt.d          $w16, $w26, $w21
+# CHECKOBJDUMP:        fsne.w          $w30, $w1, $w12
+# CHECKOBJDUMP:        fsne.d          $w14, $w13, $w23
+# CHECKOBJDUMP:        fsor.w          $w27, $w13, $w27
+# CHECKOBJDUMP:        fsor.d          $w12, $w24, $w11
+# CHECKOBJDUMP:        fsub.w          $w31, $w26, $w1
+# CHECKOBJDUMP:        fsub.d          $w19, $w17, $w27
+# CHECKOBJDUMP:        fsueq.w         $w16, $w24, $w25
+# CHECKOBJDUMP:        fsueq.d         $w18, $w14, $w14
+# CHECKOBJDUMP:        fsule.w         $w23, $w30, $w13
+# CHECKOBJDUMP:        fsule.d         $w2, $w11, $w26
+# CHECKOBJDUMP:        fsult.w         $w11, $w26, $w22
+# CHECKOBJDUMP:        fsult.d         $w6, $w23, $w30
+# CHECKOBJDUMP:        fsun.w          $w3, $w18, $w28
+# CHECKOBJDUMP:        fsun.d          $w18, $w11, $w19
+# CHECKOBJDUMP:        fsune.w         $w16, $w31, $w2
+# CHECKOBJDUMP:        fsune.d         $w3, $w26, $w17
+# CHECKOBJDUMP:        ftq.h           $w16, $w4, $w24
+# CHECKOBJDUMP:        ftq.w           $w5, $w5, $w25
+# CHECKOBJDUMP:        madd_q.h        $w16, $w20, $w10
+# CHECKOBJDUMP:        madd_q.w        $w28, $w2, $w9
+# CHECKOBJDUMP:        maddr_q.h       $w8, $w18, $w9
+# CHECKOBJDUMP:        maddr_q.w       $w29, $w12, $w16
+# CHECKOBJDUMP:        msub_q.h        $w24, $w26, $w10
+# CHECKOBJDUMP:        msub_q.w        $w13, $w30, $w28
+# CHECKOBJDUMP:        msubr_q.h       $w12, $w21, $w11
+# CHECKOBJDUMP:        msubr_q.w       $w1, $w14, $w20
+# CHECKOBJDUMP:        mul_q.h         $w6, $w16, $w30
+# CHECKOBJDUMP:        mul_q.w         $w16, $w1, $w4
+# CHECKOBJDUMP:        mulr_q.h        $w6, $w20, $w19
+# CHECKOBJDUMP:        mulr_q.w        $w27, $w1, $w20
+
+                fadd.w          $w28, $w19, $w28
+                fadd.d          $w13, $w2, $w29
+                fcaf.w          $w14, $w11, $w25
+                fcaf.d          $w1, $w1, $w19
+                fceq.w          $w1, $w23, $w16
+                fceq.d          $w0, $w8, $w16
+                fcle.w          $w16, $w9, $w24
+                fcle.d          $w27, $w14, $w1
+                fclt.w          $w28, $w8, $w8
+                fclt.d          $w30, $w25, $w11
+                fcne.w          $w2, $w18, $w23
+                fcne.d          $w14, $w20, $w15
+                fcor.w          $w10, $w18, $w25
+                fcor.d          $w17, $w25, $w11
+                fcueq.w         $w14, $w2, $w21
+                fcueq.d         $w29, $w3, $w7
+                fcule.w         $w17, $w5, $w3
+                fcule.d         $w31, $w1, $w30
+                fcult.w         $w6, $w25, $w9
+                fcult.d         $w27, $w8, $w17
+                fcun.w          $w4, $w20, $w8
+                fcun.d          $w29, $w11, $w3
+                fcune.w         $w13, $w18, $w19
+                fcune.d         $w16, $w26, $w21
+                fdiv.w          $w13, $w24, $w2
+                fdiv.d          $w19, $w4, $w25
+                fexdo.h         $w8, $w0, $w16
+                fexdo.w         $w0, $w13, $w27
+                fexp2.w         $w17, $w0, $w3
+                fexp2.d         $w22, $w0, $w10
+                fmadd.w         $w29, $w6, $w23
+                fmadd.d         $w11, $w28, $w21
+                fmax.w          $w0, $w23, $w13
+                fmax.d          $w26, $w18, $w8
+                fmax_a.w        $w10, $w16, $w10
+                fmax_a.d        $w30, $w9, $w22
+                fmin.w          $w24, $w1, $w30
+                fmin.d          $w27, $w27, $w10
+                fmin_a.w        $w10, $w29, $w20
+                fmin_a.d        $w13, $w30, $w24
+                fmsub.w         $w17, $w25, $w0
+                fmsub.d         $w8, $w18, $w16
+                fmul.w          $w3, $w15, $w15
+                fmul.d          $w9, $w30, $w10
+                fsaf.w          $w25, $w5, $w10
+                fsaf.d          $w25, $w3, $w29
+                fseq.w          $w11, $w17, $w13
+                fseq.d          $w29, $w0, $w31
+                fsle.w          $w30, $w31, $w31
+                fsle.d          $w18, $w23, $w24
+                fslt.w          $w12, $w5, $w6
+                fslt.d          $w16, $w26, $w21
+                fsne.w          $w30, $w1, $w12
+                fsne.d          $w14, $w13, $w23
+                fsor.w          $w27, $w13, $w27
+                fsor.d          $w12, $w24, $w11
+                fsub.w          $w31, $w26, $w1
+                fsub.d          $w19, $w17, $w27
+                fsueq.w         $w16, $w24, $w25
+                fsueq.d         $w18, $w14, $w14
+                fsule.w         $w23, $w30, $w13
+                fsule.d         $w2, $w11, $w26
+                fsult.w         $w11, $w26, $w22
+                fsult.d         $w6, $w23, $w30
+                fsun.w          $w3, $w18, $w28
+                fsun.d          $w18, $w11, $w19
+                fsune.w         $w16, $w31, $w2
+                fsune.d         $w3, $w26, $w17
+                ftq.h           $w16, $w4, $w24
+                ftq.w           $w5, $w5, $w25
+                madd_q.h        $w16, $w20, $w10
+                madd_q.w        $w28, $w2, $w9
+                maddr_q.h       $w8, $w18, $w9
+                maddr_q.w       $w29, $w12, $w16
+                msub_q.h        $w24, $w26, $w10
+                msub_q.w        $w13, $w30, $w28
+                msubr_q.h       $w12, $w21, $w11
+                msubr_q.w       $w1, $w14, $w20
+                mul_q.h         $w6, $w16, $w30
+                mul_q.w         $w16, $w1, $w4
+                mulr_q.h        $w6, $w20, $w19
+                mulr_q.w        $w27, $w1, $w20
diff --git a/test/MC/Mips/msa/test_bit.s b/test/MC/Mips/msa/test_bit.s
new file mode 100644
index 0000000..7c23131
--- /dev/null
+++ b/test/MC/Mips/msa/test_bit.s
@@ -0,0 +1,150 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        bclri.b         $w21, $w30, 2           # encoding: [0x79,0xf2,0xf5,0x49]
+# CHECK:        bclri.h         $w24, $w21, 0           # encoding: [0x79,0xe0,0xae,0x09]
+# CHECK:        bclri.w         $w23, $w30, 3           # encoding: [0x79,0xc3,0xf5,0xc9]
+# CHECK:        bclri.d         $w9, $w11, 0            # encoding: [0x79,0x80,0x5a,0x49]
+# CHECK:        binsli.b        $w25, $w12, 1           # encoding: [0x7b,0x71,0x66,0x49]
+# CHECK:        binsli.h        $w21, $w22, 0           # encoding: [0x7b,0x60,0xb5,0x49]
+# CHECK:        binsli.w        $w22, $w4, 0            # encoding: [0x7b,0x40,0x25,0x89]
+# CHECK:        binsli.d        $w6, $w2, 6             # encoding: [0x7b,0x06,0x11,0x89]
+# CHECK:        binsri.b        $w15, $w19, 0           # encoding: [0x7b,0xf0,0x9b,0xc9]
+# CHECK:        binsri.h        $w8, $w30, 1            # encoding: [0x7b,0xe1,0xf2,0x09]
+# CHECK:        binsri.w        $w2, $w19, 5            # encoding: [0x7b,0xc5,0x98,0x89]
+# CHECK:        binsri.d        $w18, $w20, 1           # encoding: [0x7b,0x81,0xa4,0x89]
+# CHECK:        bnegi.b         $w24, $w19, 0           # encoding: [0x7a,0xf0,0x9e,0x09]
+# CHECK:        bnegi.h         $w28, $w11, 3           # encoding: [0x7a,0xe3,0x5f,0x09]
+# CHECK:        bnegi.w         $w1, $w27, 5            # encoding: [0x7a,0xc5,0xd8,0x49]
+# CHECK:        bnegi.d         $w4, $w21, 1            # encoding: [0x7a,0x81,0xa9,0x09]
+# CHECK:        bseti.b         $w18, $w8, 0            # encoding: [0x7a,0x70,0x44,0x89]
+# CHECK:        bseti.h         $w24, $w14, 2           # encoding: [0x7a,0x62,0x76,0x09]
+# CHECK:        bseti.w         $w9, $w18, 4            # encoding: [0x7a,0x44,0x92,0x49]
+# CHECK:        bseti.d         $w7, $w15, 1            # encoding: [0x7a,0x01,0x79,0xc9]
+# CHECK:        sat_s.b         $w31, $w31, 2           # encoding: [0x78,0x72,0xff,0xca]
+# CHECK:        sat_s.h         $w19, $w19, 0           # encoding: [0x78,0x60,0x9c,0xca]
+# CHECK:        sat_s.w         $w19, $w29, 0           # encoding: [0x78,0x40,0xec,0xca]
+# CHECK:        sat_s.d         $w11, $w22, 0           # encoding: [0x78,0x00,0xb2,0xca]
+# CHECK:        sat_u.b         $w1, $w13, 3            # encoding: [0x78,0xf3,0x68,0x4a]
+# CHECK:        sat_u.h         $w30, $w24, 4           # encoding: [0x78,0xe4,0xc7,0x8a]
+# CHECK:        sat_u.w         $w31, $w13, 0           # encoding: [0x78,0xc0,0x6f,0xca]
+# CHECK:        sat_u.d         $w29, $w16, 5           # encoding: [0x78,0x85,0x87,0x4a]
+# CHECK:        slli.b          $w23, $w10, 1           # encoding: [0x78,0x71,0x55,0xc9]
+# CHECK:        slli.h          $w9, $w18, 1            # encoding: [0x78,0x61,0x92,0x49]
+# CHECK:        slli.w          $w11, $w29, 4           # encoding: [0x78,0x44,0xea,0xc9]
+# CHECK:        slli.d          $w25, $w20, 1           # encoding: [0x78,0x01,0xa6,0x49]
+# CHECK:        srai.b          $w24, $w29, 1           # encoding: [0x78,0xf1,0xee,0x09]
+# CHECK:        srai.h          $w1, $w6, 0             # encoding: [0x78,0xe0,0x30,0x49]
+# CHECK:        srai.w          $w7, $w26, 1            # encoding: [0x78,0xc1,0xd1,0xc9]
+# CHECK:        srai.d          $w20, $w25, 3           # encoding: [0x78,0x83,0xcd,0x09]
+# CHECK:        srari.b         $w5, $w25, 0            # encoding: [0x79,0x70,0xc9,0x4a]
+# CHECK:        srari.h         $w7, $w6, 4             # encoding: [0x79,0x64,0x31,0xca]
+# CHECK:        srari.w         $w17, $w11, 5           # encoding: [0x79,0x45,0x5c,0x4a]
+# CHECK:        srari.d         $w21, $w25, 5           # encoding: [0x79,0x05,0xcd,0x4a]
+# CHECK:        srli.b          $w2, $w0, 2             # encoding: [0x79,0x72,0x00,0x89]
+# CHECK:        srli.h          $w31, $w31, 2           # encoding: [0x79,0x62,0xff,0xc9]
+# CHECK:        srli.w          $w5, $w9, 4             # encoding: [0x79,0x44,0x49,0x49]
+# CHECK:        srli.d          $w27, $w26, 5           # encoding: [0x79,0x05,0xd6,0xc9]
+# CHECK:        srlri.b         $w18, $w3, 0            # encoding: [0x79,0xf0,0x1c,0x8a]
+# CHECK:        srlri.h         $w1, $w2, 3             # encoding: [0x79,0xe3,0x10,0x4a]
+# CHECK:        srlri.w         $w11, $w22, 2           # encoding: [0x79,0xc2,0xb2,0xca]
+# CHECK:        srlri.d         $w24, $w10, 6           # encoding: [0x79,0x86,0x56,0x0a]
+
+# CHECKOBJDUMP:        bclri.b         $w21, $w30, 2
+# CHECKOBJDUMP:        bclri.h         $w24, $w21, 0
+# CHECKOBJDUMP:        bclri.w         $w23, $w30, 3
+# CHECKOBJDUMP:        bclri.d         $w9, $w11, 0
+# CHECKOBJDUMP:        binsli.b        $w25, $w12, 1
+# CHECKOBJDUMP:        binsli.h        $w21, $w22, 0
+# CHECKOBJDUMP:        binsli.w        $w22, $w4, 0
+# CHECKOBJDUMP:        binsli.d        $w6, $w2, 6
+# CHECKOBJDUMP:        binsri.b        $w15, $w19, 0
+# CHECKOBJDUMP:        binsri.h        $w8, $w30, 1
+# CHECKOBJDUMP:        binsri.w        $w2, $w19, 5
+# CHECKOBJDUMP:        binsri.d        $w18, $w20, 1
+# CHECKOBJDUMP:        bnegi.b         $w24, $w19, 0
+# CHECKOBJDUMP:        bnegi.h         $w28, $w11, 3
+# CHECKOBJDUMP:        bnegi.w         $w1, $w27, 5
+# CHECKOBJDUMP:        bnegi.d         $w4, $w21, 1
+# CHECKOBJDUMP:        bseti.b         $w18, $w8, 0
+# CHECKOBJDUMP:        bseti.h         $w24, $w14, 2
+# CHECKOBJDUMP:        bseti.w         $w9, $w18, 4
+# CHECKOBJDUMP:        bseti.d         $w7, $w15, 1
+# CHECKOBJDUMP:        sat_s.b         $w31, $w31, 2
+# CHECKOBJDUMP:        sat_s.h         $w19, $w19, 0
+# CHECKOBJDUMP:        sat_s.w         $w19, $w29, 0
+# CHECKOBJDUMP:        sat_s.d         $w11, $w22, 0
+# CHECKOBJDUMP:        sat_u.b         $w1, $w13, 3
+# CHECKOBJDUMP:        sat_u.h         $w30, $w24, 4
+# CHECKOBJDUMP:        sat_u.w         $w31, $w13, 0
+# CHECKOBJDUMP:        sat_u.d         $w29, $w16, 5
+# CHECKOBJDUMP:        slli.b          $w23, $w10, 1
+# CHECKOBJDUMP:        slli.h          $w9, $w18, 1
+# CHECKOBJDUMP:        slli.w          $w11, $w29, 4
+# CHECKOBJDUMP:        slli.d          $w25, $w20, 1
+# CHECKOBJDUMP:        srai.b          $w24, $w29, 1
+# CHECKOBJDUMP:        srai.h          $w1, $w6, 0
+# CHECKOBJDUMP:        srai.w          $w7, $w26, 1
+# CHECKOBJDUMP:        srai.d          $w20, $w25, 3
+# CHECKOBJDUMP:        srari.b         $w5, $w25, 0
+# CHECKOBJDUMP:        srari.h         $w7, $w6, 4
+# CHECKOBJDUMP:        srari.w         $w17, $w11, 5
+# CHECKOBJDUMP:        srari.d         $w21, $w25, 5
+# CHECKOBJDUMP:        srli.b          $w2, $w0, 2
+# CHECKOBJDUMP:        srli.h          $w31, $w31, 2
+# CHECKOBJDUMP:        srli.w          $w5, $w9, 4
+# CHECKOBJDUMP:        srli.d          $w27, $w26, 5
+# CHECKOBJDUMP:        srlri.b         $w18, $w3, 0
+# CHECKOBJDUMP:        srlri.h         $w1, $w2, 3
+# CHECKOBJDUMP:        srlri.w         $w11, $w22, 2
+# CHECKOBJDUMP:        srlri.d         $w24, $w10, 6
+
+                bclri.b         $w21, $w30, 2
+                bclri.h         $w24, $w21, 0
+                bclri.w         $w23, $w30, 3
+                bclri.d         $w9, $w11, 0
+                binsli.b        $w25, $w12, 1
+                binsli.h        $w21, $w22, 0
+                binsli.w        $w22, $w4, 0
+                binsli.d        $w6, $w2, 6
+                binsri.b        $w15, $w19, 0
+                binsri.h        $w8, $w30, 1
+                binsri.w        $w2, $w19, 5
+                binsri.d        $w18, $w20, 1
+                bnegi.b         $w24, $w19, 0
+                bnegi.h         $w28, $w11, 3
+                bnegi.w         $w1, $w27, 5
+                bnegi.d         $w4, $w21, 1
+                bseti.b         $w18, $w8, 0
+                bseti.h         $w24, $w14, 2
+                bseti.w         $w9, $w18, 4
+                bseti.d         $w7, $w15, 1
+                sat_s.b         $w31, $w31, 2
+                sat_s.h         $w19, $w19, 0
+                sat_s.w         $w19, $w29, 0
+                sat_s.d         $w11, $w22, 0
+                sat_u.b         $w1, $w13, 3
+                sat_u.h         $w30, $w24, 4
+                sat_u.w         $w31, $w13, 0
+                sat_u.d         $w29, $w16, 5
+                slli.b          $w23, $w10, 1
+                slli.h          $w9, $w18, 1
+                slli.w          $w11, $w29, 4
+                slli.d          $w25, $w20, 1
+                srai.b          $w24, $w29, 1
+                srai.h          $w1, $w6, 0
+                srai.w          $w7, $w26, 1
+                srai.d          $w20, $w25, 3
+                srari.b         $w5, $w25, 0
+                srari.h         $w7, $w6, 4
+                srari.w         $w17, $w11, 5
+                srari.d         $w21, $w25, 5
+                srli.b          $w2, $w0, 2
+                srli.h          $w31, $w31, 2
+                srli.w          $w5, $w9, 4
+                srli.d          $w27, $w26, 5
+                srlri.b         $w18, $w3, 0
+                srlri.h         $w1, $w2, 3
+                srlri.w         $w11, $w22, 2
+                srlri.d         $w24, $w10, 6
diff --git a/test/MC/Mips/msa/test_cbranch.s b/test/MC/Mips/msa/test_cbranch.s
new file mode 100644
index 0000000..2fc65af
--- /dev/null
+++ b/test/MC/Mips/msa/test_cbranch.s
@@ -0,0 +1,78 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+#CHECK:      bnz.b        $w0, 4        # encoding: [0x47,0x80,0x00,0x01]
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bnz.h        $w1, 16       # encoding: [0x47,0xa1,0x00,0x04]
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bnz.w        $w2, 128      # encoding: [0x47,0xc2,0x00,0x20]
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bnz.d        $w3, -128     # encoding: [0x47,0xe3,0xff,0xe0]
+#CHECK:      bnz.b        $w0, SYMBOL0  # encoding: [0x47'A',0x80'A',0x00,0x00]
+                                        #   fixup A - offset: 0, value: SYMBOL0, kind: fixup_Mips_PC16
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bnz.h        $w1, SYMBOL1  # encoding: [0x47'A',0xa1'A',0x00,0x00]
+                                        #   fixup A - offset: 0, value: SYMBOL1, kind: fixup_Mips_PC16
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bnz.w        $w2, SYMBOL2  # encoding: [0x47'A',0xc2'A',0x00,0x00]
+                                        #   fixup A - offset: 0, value: SYMBOL2, kind: fixup_Mips_PC16
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bnz.d        $w3, SYMBOL3  # encoding: [0x47'A',0xe3'A',0x00,0x00]
+                                        #   fixup A - offset: 0, value: SYMBOL3, kind: fixup_Mips_PC16
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+
+#CHECK:      bnz.v        $w0, 4        # encoding: [0x45,0xe0,0x00,0x01]
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bnz.v        $w0, SYMBOL0  # encoding: [0x45'A',0xe0'A',0x00,0x00]
+                                        #   fixup A - offset: 0, value: SYMBOL0, kind: fixup_Mips_PC16
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+
+#CHECK:      bz.b         $w0, 128      # encoding: [0x47,0x00,0x00,0x20]
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bz.h         $w1, 256      # encoding: [0x47,0x21,0x00,0x40]
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bz.w         $w2, 512      # encoding: [0x47,0x42,0x00,0x80]
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bz.d         $w3, -1024    # encoding: [0x47,0x63,0xff,0x00]
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bz.b         $w0, SYMBOL0  # encoding: [0x47'A',A,0x00,0x00]
+                                        #   fixup A - offset: 0, value: SYMBOL0, kind: fixup_Mips_PC16
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bz.h         $w1, SYMBOL1  # encoding: [0x47'A',0x21'A',0x00,0x00]
+                                        #   fixup A - offset: 0, value: SYMBOL1, kind: fixup_Mips_PC16
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bz.w         $w2, SYMBOL2  # encoding: [0x47'A',0x42'A',0x00,0x00]
+                                        #   fixup A - offset: 0, value: SYMBOL2, kind: fixup_Mips_PC16
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bz.d         $w3, SYMBOL3  # encoding: [0x47'A',0x63'A',0x00,0x00]
+                                        #   fixup A - offset: 0, value: SYMBOL3, kind: fixup_Mips_PC16
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+
+#CHECK:      bz.v        $w0, 4        # encoding: [0x45,0x60,0x00,0x01]
+#CHECK:      nop                       # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bz.v        $w0, SYMBOL0  # encoding: [0x45'A',0x60'A',0x00,0x00]
+                                       #   fixup A - offset: 0, value: SYMBOL0, kind: fixup_Mips_PC16
+#CHECK:      nop                       # encoding: [0x00,0x00,0x00,0x00]
+
+bnz.b        $w0, 4
+bnz.h        $w1, 16
+bnz.w        $w2, 128
+bnz.d        $w3, -128
+bnz.b        $w0, SYMBOL0
+bnz.h        $w1, SYMBOL1
+bnz.w        $w2, SYMBOL2
+bnz.d        $w3, SYMBOL3
+
+bnz.v        $w0, 4
+bnz.v        $w0, SYMBOL0
+
+bz.b        $w0, 128
+bz.h        $w1, 256
+bz.w        $w2, 512
+bz.d        $w3, -1024
+bz.b        $w0, SYMBOL0
+bz.h        $w1, SYMBOL1
+bz.w        $w2, SYMBOL2
+bz.d        $w3, SYMBOL3
+
+bz.v        $w0, 4
+bz.v        $w0, SYMBOL0
diff --git a/test/MC/Mips/msa/test_ctrlregs.s b/test/MC/Mips/msa/test_ctrlregs.s
new file mode 100644
index 0000000..f8f4f9e
--- /dev/null
+++ b/test/MC/Mips/msa/test_ctrlregs.s
@@ -0,0 +1,105 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+#CHECK:  cfcmsa       $1, $0                  # encoding: [0x78,0x7e,0x00,0x59]
+#CHECK:  cfcmsa       $1, $0                  # encoding: [0x78,0x7e,0x00,0x59]
+#CHECK:  cfcmsa       $2, $1                  # encoding: [0x78,0x7e,0x08,0x99]
+#CHECK:  cfcmsa       $2, $1                  # encoding: [0x78,0x7e,0x08,0x99]
+#CHECK:  cfcmsa       $3, $2                  # encoding: [0x78,0x7e,0x10,0xd9]
+#CHECK:  cfcmsa       $3, $2                  # encoding: [0x78,0x7e,0x10,0xd9]
+#CHECK:  cfcmsa       $4, $3                  # encoding: [0x78,0x7e,0x19,0x19]
+#CHECK:  cfcmsa       $4, $3                  # encoding: [0x78,0x7e,0x19,0x19]
+#CHECK:  cfcmsa       $5, $4                  # encoding: [0x78,0x7e,0x21,0x59]
+#CHECK:  cfcmsa       $5, $4                  # encoding: [0x78,0x7e,0x21,0x59]
+#CHECK:  cfcmsa       $6, $5                  # encoding: [0x78,0x7e,0x29,0x99]
+#CHECK:  cfcmsa       $6, $5                  # encoding: [0x78,0x7e,0x29,0x99]
+#CHECK:  cfcmsa       $7, $6                  # encoding: [0x78,0x7e,0x31,0xd9]
+#CHECK:  cfcmsa       $7, $6                  # encoding: [0x78,0x7e,0x31,0xd9]
+#CHECK:  cfcmsa       $8, $7                  # encoding: [0x78,0x7e,0x3a,0x19]
+#CHECK:  cfcmsa       $8, $7                  # encoding: [0x78,0x7e,0x3a,0x19]
+
+#CHECK:  ctcmsa       $0, $1                  # encoding: [0x78,0x3e,0x08,0x19]
+#CHECK:  ctcmsa       $0, $1                  # encoding: [0x78,0x3e,0x08,0x19]
+#CHECK:  ctcmsa       $1, $2                  # encoding: [0x78,0x3e,0x10,0x59]
+#CHECK:  ctcmsa       $1, $2                  # encoding: [0x78,0x3e,0x10,0x59]
+#CHECK:  ctcmsa       $2, $3                  # encoding: [0x78,0x3e,0x18,0x99]
+#CHECK:  ctcmsa       $2, $3                  # encoding: [0x78,0x3e,0x18,0x99]
+#CHECK:  ctcmsa       $3, $4                  # encoding: [0x78,0x3e,0x20,0xd9]
+#CHECK:  ctcmsa       $3, $4                  # encoding: [0x78,0x3e,0x20,0xd9]
+#CHECK:  ctcmsa       $4, $5                  # encoding: [0x78,0x3e,0x29,0x19]
+#CHECK:  ctcmsa       $4, $5                  # encoding: [0x78,0x3e,0x29,0x19]
+#CHECK:  ctcmsa       $5, $6                  # encoding: [0x78,0x3e,0x31,0x59]
+#CHECK:  ctcmsa       $5, $6                  # encoding: [0x78,0x3e,0x31,0x59]
+#CHECK:  ctcmsa       $6, $7                  # encoding: [0x78,0x3e,0x39,0x99]
+#CHECK:  ctcmsa       $6, $7                  # encoding: [0x78,0x3e,0x39,0x99]
+#CHECK:  ctcmsa       $7, $8                  # encoding: [0x78,0x3e,0x41,0xd9]
+#CHECK:  ctcmsa       $7, $8                  # encoding: [0x78,0x3e,0x41,0xd9]
+
+#CHECKOBJDUMP:  cfcmsa       $1, $0
+#CHECKOBJDUMP:  cfcmsa       $1, $0
+#CHECKOBJDUMP:  cfcmsa       $2, $1
+#CHECKOBJDUMP:  cfcmsa       $2, $1
+#CHECKOBJDUMP:  cfcmsa       $3, $2
+#CHECKOBJDUMP:  cfcmsa       $3, $2
+#CHECKOBJDUMP:  cfcmsa       $4, $3
+#CHECKOBJDUMP:  cfcmsa       $4, $3
+#CHECKOBJDUMP:  cfcmsa       $5, $4
+#CHECKOBJDUMP:  cfcmsa       $5, $4
+#CHECKOBJDUMP:  cfcmsa       $6, $5
+#CHECKOBJDUMP:  cfcmsa       $6, $5
+#CHECKOBJDUMP:  cfcmsa       $7, $6
+#CHECKOBJDUMP:  cfcmsa       $7, $6
+#CHECKOBJDUMP:  cfcmsa       $8, $7
+#CHECKOBJDUMP:  cfcmsa       $8, $7
+
+#CHECKOBJDUMP:  ctcmsa       $0, $1
+#CHECKOBJDUMP:  ctcmsa       $0, $1
+#CHECKOBJDUMP:  ctcmsa       $1, $2
+#CHECKOBJDUMP:  ctcmsa       $1, $2
+#CHECKOBJDUMP:  ctcmsa       $2, $3
+#CHECKOBJDUMP:  ctcmsa       $2, $3
+#CHECKOBJDUMP:  ctcmsa       $3, $4
+#CHECKOBJDUMP:  ctcmsa       $3, $4
+#CHECKOBJDUMP:  ctcmsa       $4, $5
+#CHECKOBJDUMP:  ctcmsa       $4, $5
+#CHECKOBJDUMP:  ctcmsa       $5, $6
+#CHECKOBJDUMP:  ctcmsa       $5, $6
+#CHECKOBJDUMP:  ctcmsa       $6, $7
+#CHECKOBJDUMP:  ctcmsa       $6, $7
+#CHECKOBJDUMP:  ctcmsa       $7, $8
+#CHECKOBJDUMP:  ctcmsa       $7, $8
+
+cfcmsa       $1, $msair
+cfcmsa       $1, $0
+cfcmsa       $2, $msacsr
+cfcmsa       $2, $1
+cfcmsa       $3, $msaaccess
+cfcmsa       $3, $2
+cfcmsa       $4, $msasave
+cfcmsa       $4, $3
+cfcmsa       $5, $msamodify
+cfcmsa       $5, $4
+cfcmsa       $6, $msarequest
+cfcmsa       $6, $5
+cfcmsa       $7, $msamap
+cfcmsa       $7, $6
+cfcmsa       $8, $msaunmap
+cfcmsa       $8, $7
+
+ctcmsa       $msair, $1
+ctcmsa       $0, $1
+ctcmsa       $msacsr, $2
+ctcmsa       $1, $2
+ctcmsa       $msaaccess, $3
+ctcmsa       $2, $3
+ctcmsa       $msasave, $4
+ctcmsa       $3, $4
+ctcmsa       $msamodify, $5
+ctcmsa       $4, $5
+ctcmsa       $msarequest, $6
+ctcmsa       $5, $6
+ctcmsa       $msamap, $7
+ctcmsa       $6, $7
+ctcmsa       $msaunmap, $8
+ctcmsa       $7, $8
diff --git a/test/MC/Mips/msa/test_elm.s b/test/MC/Mips/msa/test_elm.s
new file mode 100644
index 0000000..1d04838
--- /dev/null
+++ b/test/MC/Mips/msa/test_elm.s
@@ -0,0 +1,51 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        copy_s.b        $13, $w8[2]             # encoding: [0x78,0x82,0x43,0x59]
+# CHECK:        copy_s.h        $1, $w25[0]             # encoding: [0x78,0xa0,0xc8,0x59]
+# CHECK:        copy_s.w        $22, $w5[1]             # encoding: [0x78,0xb1,0x2d,0x99]
+# CHECK:        copy_u.b        $22, $w20[4]            # encoding: [0x78,0xc4,0xa5,0x99]
+# CHECK:        copy_u.h        $20, $w4[0]             # encoding: [0x78,0xe0,0x25,0x19]
+# CHECK:        copy_u.w        $fp, $w13[2]            # encoding: [0x78,0xf2,0x6f,0x99]
+# CHECK:        sldi.b          $w0, $w29[4]            # encoding: [0x78,0x04,0xe8,0x19]
+# CHECK:        sldi.h          $w8, $w17[0]            # encoding: [0x78,0x20,0x8a,0x19]
+# CHECK:        sldi.w          $w20, $w27[2]           # encoding: [0x78,0x32,0xdd,0x19]
+# CHECK:        sldi.d          $w4, $w12[0]            # encoding: [0x78,0x38,0x61,0x19]
+# CHECK:        splati.b        $w25, $w3[2]            # encoding: [0x78,0x42,0x1e,0x59]
+# CHECK:        splati.h        $w24, $w28[1]           # encoding: [0x78,0x61,0xe6,0x19]
+# CHECK:        splati.w        $w13, $w18[0]           # encoding: [0x78,0x70,0x93,0x59]
+# CHECK:        splati.d        $w28, $w1[0]            # encoding: [0x78,0x78,0x0f,0x19]
+# CHECK:        move.v          $w23, $w24              # encoding: [0x78,0xbe,0xc5,0xd9]
+
+# CHECKOBJDUMP:        copy_s.b        $13, $w8[2]
+# CHECKOBJDUMP:        copy_s.h        $1, $w25[0]
+# CHECKOBJDUMP:        copy_s.w        $22, $w5[1]
+# CHECKOBJDUMP:        copy_u.b        $22, $w20[4]
+# CHECKOBJDUMP:        copy_u.h        $20, $w4[0]
+# CHECKOBJDUMP:        copy_u.w        $fp, $w13[2]
+# CHECKOBJDUMP:        sldi.b          $w0, $w29[4]
+# CHECKOBJDUMP:        sldi.h          $w8, $w17[0]
+# CHECKOBJDUMP:        sldi.w          $w20, $w27[2]
+# CHECKOBJDUMP:        sldi.d          $w4, $w12[0]
+# CHECKOBJDUMP:        splati.b        $w25, $w3[2]
+# CHECKOBJDUMP:        splati.h        $w24, $w28[1]
+# CHECKOBJDUMP:        splati.w        $w13, $w18[0]
+# CHECKOBJDUMP:        splati.d        $w28, $w1[0]
+# CHECKOBJDUMP:        move.v          $w23, $w24
+
+                copy_s.b        $13, $w8[2]
+                copy_s.h        $1, $w25[0]
+                copy_s.w        $22, $w5[1]
+                copy_u.b        $22, $w20[4]
+                copy_u.h        $20, $w4[0]
+                copy_u.w        $30, $w13[2]
+                sldi.b          $w0, $w29[4]
+                sldi.h          $w8, $w17[0]
+                sldi.w          $w20, $w27[2]
+                sldi.d          $w4, $w12[0]
+                splati.b        $w25, $w3[2]
+                splati.h        $w24, $w28[1]
+                splati.w        $w13, $w18[0]
+                splati.d        $w28, $w1[0]
+                move.v          $w23, $w24
diff --git a/test/MC/Mips/msa/test_elm_insert.s b/test/MC/Mips/msa/test_elm_insert.s
new file mode 100644
index 0000000..5fc55f3
--- /dev/null
+++ b/test/MC/Mips/msa/test_elm_insert.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        insert.b        $w23[3], $sp            # encoding: [0x79,0x03,0xed,0xd9]
+# CHECK:        insert.h        $w20[2], $5             # encoding: [0x79,0x22,0x2d,0x19]
+# CHECK:        insert.w        $w8[2], $15             # encoding: [0x79,0x32,0x7a,0x19]
+
+# CHECKOBJDUMP:        insert.b        $w23[3], $sp
+# CHECKOBJDUMP:        insert.h        $w20[2], $5
+# CHECKOBJDUMP:        insert.w        $w8[2], $15
+
+                insert.b        $w23[3], $sp
+                insert.h        $w20[2], $5
+                insert.w        $w8[2], $15
diff --git a/test/MC/Mips/msa/test_elm_insve.s b/test/MC/Mips/msa/test_elm_insve.s
new file mode 100644
index 0000000..d63d687
--- /dev/null
+++ b/test/MC/Mips/msa/test_elm_insve.s
@@ -0,0 +1,18 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        insve.b $w25[3], $w9[0]         # encoding: [0x79,0x43,0x4e,0x59]
+# CHECK:        insve.h $w24[2], $w2[0]         # encoding: [0x79,0x62,0x16,0x19]
+# CHECK:        insve.w $w0[2], $w13[0]         # encoding: [0x79,0x72,0x68,0x19]
+# CHECK:        insve.d $w3[0], $w18[0]         # encoding: [0x79,0x78,0x90,0xd9]
+
+# CHECKOBJDUMP:        insve.b $w25[3], $w9[0]
+# CHECKOBJDUMP:        insve.h $w24[2], $w2[0]
+# CHECKOBJDUMP:        insve.w $w0[2], $w13[0]
+# CHECKOBJDUMP:        insve.d $w3[0], $w18[0]
+
+                insve.b $w25[3], $w9[0]
+                insve.h $w24[2], $w2[0]
+                insve.w $w0[2], $w13[0]
+                insve.d $w3[0], $w18[0]
diff --git a/test/MC/Mips/msa/test_i10.s b/test/MC/Mips/msa/test_i10.s
new file mode 100644
index 0000000..828ebb5
--- /dev/null
+++ b/test/MC/Mips/msa/test_i10.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+
+# CHECK:        ldi.b   $w8, 198                # encoding: [0x7b,0x06,0x32,0x07]
+# CHECK:        ldi.h   $w20, 313               # encoding: [0x7b,0x29,0xcd,0x07]
+# CHECK:        ldi.w   $w24, 492               # encoding: [0x7b,0x4f,0x66,0x07]
+# CHECK:        ldi.d   $w27, -180              # encoding: [0x7b,0x7a,0x66,0xc7]
+
+# CHECKOBJDUMP:        ldi.b   $w8, 198
+# CHECKOBJDUMP:        ldi.h   $w20, 313
+# CHECKOBJDUMP:        ldi.w   $w24, 492
+# CHECKOBJDUMP:        ldi.d   $w27, 844
+
+                ldi.b   $w8, 198
+                ldi.h   $w20, 313
+                ldi.w   $w24, 492
+                ldi.d   $w27, -180
diff --git a/test/MC/Mips/msa/test_i5.s b/test/MC/Mips/msa/test_i5.s
new file mode 100644
index 0000000..992bfe1
--- /dev/null
+++ b/test/MC/Mips/msa/test_i5.s
@@ -0,0 +1,138 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        addvi.b         $w3, $w31, 30           # encoding: [0x78,0x1e,0xf8,0xc6]
+# CHECK:        addvi.h         $w24, $w13, 26          # encoding: [0x78,0x3a,0x6e,0x06]
+# CHECK:        addvi.w         $w26, $w20, 26          # encoding: [0x78,0x5a,0xa6,0x86]
+# CHECK:        addvi.d         $w16, $w1, 21           # encoding: [0x78,0x75,0x0c,0x06]
+# CHECK:        ceqi.b          $w24, $w21, -8          # encoding: [0x78,0x18,0xae,0x07]
+# CHECK:        ceqi.h          $w31, $w15, 2           # encoding: [0x78,0x22,0x7f,0xc7]
+# CHECK:        ceqi.w          $w12, $w1, -1           # encoding: [0x78,0x5f,0x0b,0x07]
+# CHECK:        ceqi.d          $w24, $w22, 7           # encoding: [0x78,0x67,0xb6,0x07]
+# CHECK:        clei_s.b        $w12, $w16, 1           # encoding: [0x7a,0x01,0x83,0x07]
+# CHECK:        clei_s.h        $w2, $w10, -9           # encoding: [0x7a,0x37,0x50,0x87]
+# CHECK:        clei_s.w        $w4, $w11, -10          # encoding: [0x7a,0x56,0x59,0x07]
+# CHECK:        clei_s.d        $w0, $w29, -10          # encoding: [0x7a,0x76,0xe8,0x07]
+# CHECK:        clei_u.b        $w21, $w17, 3           # encoding: [0x7a,0x83,0x8d,0x47]
+# CHECK:        clei_u.h        $w29, $w7, 17           # encoding: [0x7a,0xb1,0x3f,0x47]
+# CHECK:        clei_u.w        $w1, $w1, 2             # encoding: [0x7a,0xc2,0x08,0x47]
+# CHECK:        clei_u.d        $w27, $w27, 29          # encoding: [0x7a,0xfd,0xde,0xc7]
+# CHECK:        clti_s.b        $w19, $w13, -7          # encoding: [0x79,0x19,0x6c,0xc7]
+# CHECK:        clti_s.h        $w15, $w10, -12         # encoding: [0x79,0x34,0x53,0xc7]
+# CHECK:        clti_s.w        $w12, $w12, 11          # encoding: [0x79,0x4b,0x63,0x07]
+# CHECK:        clti_s.d        $w29, $w20, -15         # encoding: [0x79,0x71,0xa7,0x47]
+# CHECK:        clti_u.b        $w14, $w9, 29           # encoding: [0x79,0x9d,0x4b,0x87]
+# CHECK:        clti_u.h        $w24, $w25, 25          # encoding: [0x79,0xb9,0xce,0x07]
+# CHECK:        clti_u.w        $w1, $w1, 22            # encoding: [0x79,0xd6,0x08,0x47]
+# CHECK:        clti_u.d        $w21, $w25, 1           # encoding: [0x79,0xe1,0xcd,0x47]
+# CHECK:        maxi_s.b        $w22, $w21, 1           # encoding: [0x79,0x01,0xad,0x86]
+# CHECK:        maxi_s.h        $w29, $w5, -8           # encoding: [0x79,0x38,0x2f,0x46]
+# CHECK:        maxi_s.w        $w1, $w10, -12          # encoding: [0x79,0x54,0x50,0x46]
+# CHECK:        maxi_s.d        $w13, $w29, -16         # encoding: [0x79,0x70,0xeb,0x46]
+# CHECK:        maxi_u.b        $w20, $w0, 12           # encoding: [0x79,0x8c,0x05,0x06]
+# CHECK:        maxi_u.h        $w1, $w14, 3            # encoding: [0x79,0xa3,0x70,0x46]
+# CHECK:        maxi_u.w        $w27, $w22, 11          # encoding: [0x79,0xcb,0xb6,0xc6]
+# CHECK:        maxi_u.d        $w26, $w6, 4            # encoding: [0x79,0xe4,0x36,0x86]
+# CHECK:        mini_s.b        $w4, $w1, 1             # encoding: [0x7a,0x01,0x09,0x06]
+# CHECK:        mini_s.h        $w27, $w27, -9          # encoding: [0x7a,0x37,0xde,0xc6]
+# CHECK:        mini_s.w        $w28, $w11, 9           # encoding: [0x7a,0x49,0x5f,0x06]
+# CHECK:        mini_s.d        $w11, $w10, 10          # encoding: [0x7a,0x6a,0x52,0xc6]
+# CHECK:        mini_u.b        $w18, $w23, 27          # encoding: [0x7a,0x9b,0xbc,0x86]
+# CHECK:        mini_u.h        $w7, $w26, 18           # encoding: [0x7a,0xb2,0xd1,0xc6]
+# CHECK:        mini_u.w        $w11, $w12, 26          # encoding: [0x7a,0xda,0x62,0xc6]
+# CHECK:        mini_u.d        $w11, $w15, 2           # encoding: [0x7a,0xe2,0x7a,0xc6]
+# CHECK:        subvi.b         $w24, $w20, 19          # encoding: [0x78,0x93,0xa6,0x06]
+# CHECK:        subvi.h         $w11, $w19, 4           # encoding: [0x78,0xa4,0x9a,0xc6]
+# CHECK:        subvi.w         $w12, $w10, 11          # encoding: [0x78,0xcb,0x53,0x06]
+# CHECK:        subvi.d         $w19, $w16, 7           # encoding: [0x78,0xe7,0x84,0xc6]
+
+# CHECKOBJDUMP:        addvi.b         $w3, $w31, 30
+# CHECKOBJDUMP:        addvi.h         $w24, $w13, 26
+# CHECKOBJDUMP:        addvi.w         $w26, $w20, 26
+# CHECKOBJDUMP:        addvi.d         $w16, $w1, 21
+# CHECKOBJDUMP:        ceqi.b          $w24, $w21, 24
+# CHECKOBJDUMP:        ceqi.h          $w31, $w15, 2
+# CHECKOBJDUMP:        ceqi.w          $w12, $w1, 31
+# CHECKOBJDUMP:        ceqi.d          $w24, $w22, 7
+# CHECKOBJDUMP:        clei_s.b        $w12, $w16, 1
+# CHECKOBJDUMP:        clei_s.h        $w2, $w10, 23
+# CHECKOBJDUMP:        clei_s.w        $w4, $w11, 22
+# CHECKOBJDUMP:        clei_s.d        $w0, $w29, 22
+# CHECKOBJDUMP:        clei_u.b        $w21, $w17, 3
+# CHECKOBJDUMP:        clei_u.h        $w29, $w7, 17
+# CHECKOBJDUMP:        clei_u.w        $w1, $w1, 2
+# CHECKOBJDUMP:        clei_u.d        $w27, $w27, 29
+# CHECKOBJDUMP:        clti_s.b        $w19, $w13, 25
+# CHECKOBJDUMP:        clti_s.h        $w15, $w10, 20
+# CHECKOBJDUMP:        clti_s.w        $w12, $w12, 11
+# CHECKOBJDUMP:        clti_s.d        $w29, $w20, 17
+# CHECKOBJDUMP:        clti_u.b        $w14, $w9, 29
+# CHECKOBJDUMP:        clti_u.h        $w24, $w25, 25
+# CHECKOBJDUMP:        clti_u.w        $w1, $w1, 22
+# CHECKOBJDUMP:        clti_u.d        $w21, $w25, 1
+# CHECKOBJDUMP:        maxi_s.b        $w22, $w21, 1
+# CHECKOBJDUMP:        maxi_s.h        $w29, $w5, 24
+# CHECKOBJDUMP:        maxi_s.w        $w1, $w10, 20
+# CHECKOBJDUMP:        maxi_s.d        $w13, $w29, 16
+# CHECKOBJDUMP:        maxi_u.b        $w20, $w0, 12
+# CHECKOBJDUMP:        maxi_u.h        $w1, $w14, 3
+# CHECKOBJDUMP:        maxi_u.w        $w27, $w22, 11
+# CHECKOBJDUMP:        maxi_u.d        $w26, $w6, 4
+# CHECKOBJDUMP:        mini_s.b        $w4, $w1, 1
+# CHECKOBJDUMP:        mini_s.h        $w27, $w27, 23
+# CHECKOBJDUMP:        mini_s.w        $w28, $w11, 9
+# CHECKOBJDUMP:        mini_s.d        $w11, $w10, 10
+# CHECKOBJDUMP:        mini_u.b        $w18, $w23, 27
+# CHECKOBJDUMP:        mini_u.h        $w7, $w26, 18
+# CHECKOBJDUMP:        mini_u.w        $w11, $w12, 26
+# CHECKOBJDUMP:        mini_u.d        $w11, $w15, 2
+# CHECKOBJDUMP:        subvi.b         $w24, $w20, 19
+# CHECKOBJDUMP:        subvi.h         $w11, $w19, 4
+# CHECKOBJDUMP:        subvi.w         $w12, $w10, 11
+# CHECKOBJDUMP:        subvi.d         $w19, $w16, 7
+
+                addvi.b         $w3, $w31, 30
+                addvi.h         $w24, $w13, 26
+                addvi.w         $w26, $w20, 26
+                addvi.d         $w16, $w1, 21
+                ceqi.b          $w24, $w21, -8
+                ceqi.h          $w31, $w15, 2
+                ceqi.w          $w12, $w1, -1
+                ceqi.d          $w24, $w22, 7
+                clei_s.b        $w12, $w16, 1
+                clei_s.h        $w2, $w10, -9
+                clei_s.w        $w4, $w11, -10
+                clei_s.d        $w0, $w29, -10
+                clei_u.b        $w21, $w17, 3
+                clei_u.h        $w29, $w7, 17
+                clei_u.w        $w1, $w1, 2
+                clei_u.d        $w27, $w27, 29
+                clti_s.b        $w19, $w13, -7
+                clti_s.h        $w15, $w10, -12
+                clti_s.w        $w12, $w12, 11
+                clti_s.d        $w29, $w20, -15
+                clti_u.b        $w14, $w9, 29
+                clti_u.h        $w24, $w25, 25
+                clti_u.w        $w1, $w1, 22
+                clti_u.d        $w21, $w25, 1
+                maxi_s.b        $w22, $w21, 1
+                maxi_s.h        $w29, $w5, -8
+                maxi_s.w        $w1, $w10, -12
+                maxi_s.d        $w13, $w29, -16
+                maxi_u.b        $w20, $w0, 12
+                maxi_u.h        $w1, $w14, 3
+                maxi_u.w        $w27, $w22, 11
+                maxi_u.d        $w26, $w6, 4
+                mini_s.b        $w4, $w1, 1
+                mini_s.h        $w27, $w27, -9
+                mini_s.w        $w28, $w11, 9
+                mini_s.d        $w11, $w10, 10
+                mini_u.b        $w18, $w23, 27
+                mini_u.h        $w7, $w26, 18
+                mini_u.w        $w11, $w12, 26
+                mini_u.d        $w11, $w15, 2
+                subvi.b         $w24, $w20, 19
+                subvi.h         $w11, $w19, 4
+                subvi.w         $w12, $w10, 11
+                subvi.d         $w19, $w16, 7
diff --git a/test/MC/Mips/msa/test_i8.s b/test/MC/Mips/msa/test_i8.s
new file mode 100644
index 0000000..2604be0
--- /dev/null
+++ b/test/MC/Mips/msa/test_i8.s
@@ -0,0 +1,36 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        andi.b  $w2, $w29, 48           # encoding: [0x78,0x30,0xe8,0x80]
+# CHECK:        bmnzi.b $w6, $w22, 126          # encoding: [0x78,0x7e,0xb1,0x81]
+# CHECK:        bmzi.b  $w27, $w1, 88           # encoding: [0x79,0x58,0x0e,0xc1]
+# CHECK:        bseli.b $w29, $w3, 189          # encoding: [0x7a,0xbd,0x1f,0x41]
+# CHECK:        nori.b  $w1, $w17, 56           # encoding: [0x7a,0x38,0x88,0x40]
+# CHECK:        ori.b   $w26, $w20, 135         # encoding: [0x79,0x87,0xa6,0x80]
+# CHECK:        shf.b   $w19, $w30, 105         # encoding: [0x78,0x69,0xf4,0xc2]
+# CHECK:        shf.h   $w17, $w8, 76           # encoding: [0x79,0x4c,0x44,0x42]
+# CHECK:        shf.w   $w14, $w3, 93           # encoding: [0x7a,0x5d,0x1b,0x82]
+# CHECK:        xori.b  $w16, $w10, 20          # encoding: [0x7b,0x14,0x54,0x00]
+
+# CHECKOBJDUMP:        andi.b  $w2, $w29, 48
+# CHECKOBJDUMP:        bmnzi.b $w6, $w22, 126
+# CHECKOBJDUMP:        bmzi.b  $w27, $w1, 88
+# CHECKOBJDUMP:        bseli.b $w29, $w3, 189
+# CHECKOBJDUMP:        nori.b  $w1, $w17, 56
+# CHECKOBJDUMP:        ori.b   $w26, $w20, 135
+# CHECKOBJDUMP:        shf.b   $w19, $w30, 105
+# CHECKOBJDUMP:        shf.h   $w17, $w8, 76
+# CHECKOBJDUMP:        shf.w   $w14, $w3, 93
+# CHECKOBJDUMP:        xori.b  $w16, $w10, 20
+
+                andi.b  $w2, $w29, 48
+                bmnzi.b $w6, $w22, 126
+                bmzi.b  $w27, $w1, 88
+                bseli.b $w29, $w3, 189
+                nori.b  $w1, $w17, 56
+                ori.b   $w26, $w20, 135
+                shf.b   $w19, $w30, 105
+                shf.h   $w17, $w8, 76
+                shf.w   $w14, $w3, 93
+                xori.b  $w16, $w10, 20
diff --git a/test/MC/Mips/msa/test_lsa.s b/test/MC/Mips/msa/test_lsa.s
new file mode 100644
index 0000000..6d1d868
--- /dev/null
+++ b/test/MC/Mips/msa/test_lsa.s
@@ -0,0 +1,18 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        lsa        $8, $9, $10, 1              # encoding: [0x01,0x2a,0x40,0x05]
+# CHECK:        lsa        $8, $9, $10, 2              # encoding: [0x01,0x2a,0x40,0x45]
+# CHECK:        lsa        $8, $9, $10, 3              # encoding: [0x01,0x2a,0x40,0x85]
+# CHECK:        lsa        $8, $9, $10, 4              # encoding: [0x01,0x2a,0x40,0xc5]
+
+# CHECKOBJDUMP: lsa        $8, $9, $10, 1
+# CHECKOBJDUMP: lsa        $8, $9, $10, 2
+# CHECKOBJDUMP: lsa        $8, $9, $10, 3
+# CHECKOBJDUMP: lsa        $8, $9, $10, 4
+
+                lsa        $8, $9, $10, 1
+                lsa        $8, $9, $10, 2
+                lsa        $8, $9, $10, 3
+                lsa        $8, $9, $10, 4
diff --git a/test/MC/Mips/msa/test_mi10.s b/test/MC/Mips/msa/test_mi10.s
new file mode 100644
index 0000000..80257cd
--- /dev/null
+++ b/test/MC/Mips/msa/test_mi10.s
@@ -0,0 +1,30 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        ld.b    $w2, 1($7)              # encoding: [0x78,0x01,0x38,0xa0]
+# CHECK:        ld.h    $w16, -9($zero)         # encoding: [0x7b,0xf7,0x04,0x21]
+# CHECK:        ld.w    $w13, -6($4)            # encoding: [0x7b,0xfa,0x23,0x62]
+# CHECK:        ld.d    $w1, -5($16)            # encoding: [0x7b,0xfb,0x80,0x63]
+# CHECK:        st.b    $w29, 1($14)            # encoding: [0x78,0x01,0x77,0x64]
+# CHECK:        st.h    $w6, -1($8)             # encoding: [0x7b,0xff,0x41,0xa5]
+# CHECK:        st.w    $w18, 8($15)            # encoding: [0x78,0x08,0x7c,0xa6]
+# CHECK:        st.d    $w3, -14($18)           # encoding: [0x7b,0xf2,0x90,0xe7]
+
+# CHECKOBJDUMP:        ld.b    $w2, 1($7)
+# CHECKOBJDUMP:        ld.h    $w16, -9($zero)
+# CHECKOBJDUMP:        ld.w    $w13, -6($4)
+# CHECKOBJDUMP:        ld.d    $w1, -5($16)
+# CHECKOBJDUMP:        st.b    $w29, 1($14)
+# CHECKOBJDUMP:        st.h    $w6, -1($8)
+# CHECKOBJDUMP:        st.w    $w18, 8($15)
+# CHECKOBJDUMP:        st.d    $w3, -14($18)
+
+                ld.b    $w2, 1($7)
+                ld.h    $w16, -9($zero)
+                ld.w    $w13, -6($4)
+                ld.d    $w1, -5($16)
+                st.b    $w29, 1($14)
+                st.h    $w6, -1($8)
+                st.w    $w18, 8($15)
+                st.d    $w3, -14($18)
diff --git a/test/MC/Mips/msa/test_vec.s b/test/MC/Mips/msa/test_vec.s
new file mode 100644
index 0000000..9294f37
--- /dev/null
+++ b/test/MC/Mips/msa/test_vec.s
@@ -0,0 +1,27 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        and.v   $w25, $w20, $w27        # encoding: [0x78,0x1b,0xa6,0x5e]
+# CHECK:        bmnz.v  $w17, $w6, $w7          # encoding: [0x78,0x87,0x34,0x5e]
+# CHECK:        bmz.v   $w3, $w17, $w9          # encoding: [0x78,0xa9,0x88,0xde]
+# CHECK:        bsel.v  $w8, $w0, $w14          # encoding: [0x78,0xce,0x02,0x1e]
+# CHECK:        nor.v   $w7, $w31, $w0          # encoding: [0x78,0x40,0xf9,0xde]
+# CHECK:        or.v    $w24, $w26, $w30        # encoding: [0x78,0x3e,0xd6,0x1e]
+# CHECK:        xor.v   $w7, $w27, $w15         # encoding: [0x78,0x6f,0xd9,0xde]
+
+# CHECKOBJDUMP:        and.v   $w25, $w20, $w27
+# CHECKOBJDUMP:        bmnz.v  $w17, $w6, $w7
+# CHECKOBJDUMP:        bmz.v   $w3, $w17, $w9
+# CHECKOBJDUMP:        bsel.v  $w8, $w0, $w14
+# CHECKOBJDUMP:        nor.v   $w7, $w31, $w0
+# CHECKOBJDUMP:        or.v    $w24, $w26, $w30
+# CHECKOBJDUMP:        xor.v   $w7, $w27, $w15
+
+                and.v   $w25, $w20, $w27
+                bmnz.v  $w17, $w6, $w7
+                bmz.v   $w3, $w17, $w9
+                bsel.v  $w8, $w0, $w14
+                nor.v   $w7, $w31, $w0
+                or.v    $w24, $w26, $w30
+                xor.v   $w7, $w27, $w15
diff --git a/test/MC/Mips/xgot.ll b/test/MC/Mips/xgot.ll
index e2a500f..cc33678 100644
--- a/test/MC/Mips/xgot.ll
+++ b/test/MC/Mips/xgot.ll
@@ -14,10 +14,10 @@ entry:
 ; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_LO16
 ; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_GOT_HI16
 ; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_GOT_LO16
-; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_GOT
-; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_LO16
 ; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_CALL_HI16
 ; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_CALL_LO16
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_GOT
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_LO16
 ; CHECK: ]
 
   %0 = load i32* @ext_1, align 4
diff --git a/test/MC/PowerPC/deprecated-p7.s b/test/MC/PowerPC/deprecated-p7.s
new file mode 100644
index 0000000..ded9923
--- /dev/null
+++ b/test/MC/PowerPC/deprecated-p7.s
@@ -0,0 +1,12 @@
+# RUN: llvm-mc -triple powerpc64-unknown-linux-gnu -mcpu=pwr7 -show-encoding < %s 2>&1 | FileCheck %s
+# RUN: llvm-mc -triple powerpc-unknown-linux-gnu -mcpu=601 -show-encoding < %s 2>&1 | FileCheck -check-prefix=CHECK-OLD %s
+
+         mftb 3
+# CHECK: warning: deprecated
+# CHECK: mftb 3
+
+# CHECK-OLD-NOT: warning: deprecated
+# CHECK-OLD: mftb 3
+
+# FIXME: Test dst and friends once we can parse them.
+
diff --git a/test/MC/PowerPC/lit.local.cfg b/test/MC/PowerPC/lit.local.cfg
index 88488cd..193ebeb 100644
--- a/test/MC/PowerPC/lit.local.cfg
+++ b/test/MC/PowerPC/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'PowerPC' in targets:
     config.unsupported = True
diff --git a/test/MC/PowerPC/ppc-nop.s b/test/MC/PowerPC/ppc-nop.s
index 567943c..50afae2 100644
--- a/test/MC/PowerPC/ppc-nop.s
+++ b/test/MC/PowerPC/ppc-nop.s
@@ -5,5 +5,8 @@ blr
 .p2align 3
 blr
 
-# CHECK:  0000: 4E800020 60000000 4E800020
+.byte 0x42
+.p2align 2
+
+# CHECK:  0000: 4E800020 60000000 4E800020 42000000
 
diff --git a/test/MC/PowerPC/ppc64-encoding-bookIII.s b/test/MC/PowerPC/ppc64-encoding-bookIII.s
new file mode 100644
index 0000000..318c30b
--- /dev/null
+++ b/test/MC/PowerPC/ppc64-encoding-bookIII.s
@@ -0,0 +1,107 @@
+# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck %s
+
+# CHECK: mtmsr 4, 0                       # encoding: [0x7c,0x80,0x01,0x24]
+         mtmsr %r4
+
+# CHECK: mtmsr 4, 1                       # encoding: [0x7c,0x81,0x01,0x24]
+         mtmsr %r4, 1
+
+# CHECK: mfmsr 4                         # encoding: [0x7c,0x80,0x00,0xa6]
+         mfmsr %r4
+
+# CHECK: mtmsrd 4, 0                     # encoding: [0x7c,0x80,0x01,0x64]
+         mtmsrd %r4
+
+# CHECK: mtmsrd 4, 1                     # encoding: [0x7c,0x81,0x01,0x64]
+         mtmsrd %r4, 1
+
+# CHECK: mfspr 4, 272                    # encoding: [0x7c,0x90,0x42,0xa6]
+         mfsprg %r4, 0
+
+# CHECK: mfspr 4, 273                    # encoding: [0x7c,0x91,0x42,0xa6]
+         mfsprg %r4, 1
+
+# CHECK: mfspr 4, 274                    # encoding: [0x7c,0x92,0x42,0xa6]
+         mfsprg %r4, 2
+
+# CHECK: mfspr 4, 275                    # encoding: [0x7c,0x93,0x42,0xa6]
+         mfsprg %r4, 3
+
+# CHECK: mtspr 272, 4                    # encoding: [0x7c,0x90,0x43,0xa6]
+         mtsprg 0, %r4
+
+# CHECK: mtspr 273, 4                    # encoding: [0x7c,0x91,0x43,0xa6]
+         mtsprg 1, %r4
+
+# CHECK: mtspr 274, 4                    # encoding: [0x7c,0x92,0x43,0xa6]
+         mtsprg 2, %r4
+
+# CHECK: mtspr 275, 4                    # encoding: [0x7c,0x93,0x43,0xa6]
+         mtsprg 3, %r4
+
+# CHECK: mtspr 272, 4                    # encoding: [0x7c,0x90,0x43,0xa6]
+         mtsprg0 %r4
+
+# CHECK: mtspr 273, 4                    # encoding: [0x7c,0x91,0x43,0xa6]
+         mtsprg1 %r4
+
+# CHECK: mtspr 274, 4                    # encoding: [0x7c,0x92,0x43,0xa6]
+         mtsprg2 %r4
+
+# CHECK: mtspr 275, 4                    # encoding: [0x7c,0x93,0x43,0xa6]
+         mtsprg3 %r4
+
+# CHECK: mtspr 280, 4                    # encoding: [0x7c,0x98,0x43,0xa6]
+         mtasr %r4
+
+# CHECK: mfspr 4, 22                     # encoding: [0x7c,0x96,0x02,0xa6]
+         mfdec %r4
+
+# CHECK: mtspr 22, 4                     # encoding: [0x7c,0x96,0x03,0xa6]
+         mtdec %r4
+
+# CHECK: mfspr 4, 287                    # encoding: [0x7c,0x9f,0x42,0xa6]
+         mfpvr %r4
+
+# CHECK: mfspr 4, 25                     # encoding: [0x7c,0x99,0x02,0xa6]
+         mfsdr1 %r4
+
+# CHECK: mtspr 25, 4                     # encoding: [0x7c,0x99,0x03,0xa6]
+         mtsdr1 %r4
+
+# CHECK: mfspr 4, 26                     # encoding: [0x7c,0x9a,0x02,0xa6]
+         mfsrr0 %r4
+
+# CHECK: mtspr 26, 4                     # encoding: [0x7c,0x9a,0x03,0xa6]
+         mtsrr0 %r4
+
+# CHECK: mfspr 4, 27                     # encoding: [0x7c,0x9b,0x02,0xa6]
+         mfsrr1 %r4
+
+# CHECK: mtspr 27, 4                     # encoding: [0x7c,0x9b,0x03,0xa6]
+         mtsrr1 %r4
+
+# CHECK: slbie 4                         # encoding: [0x7c,0x00,0x23,0x64]
+         slbie %r4
+
+# CHECK: slbmte 4, 5                     # encoding: [0x7c,0x80,0x2b,0x24]
+         slbmte %r4, %r5
+
+# CHECK: slbmfee 4, 5                    # encoding: [0x7c,0x80,0x2f,0x26]
+         slbmfee %r4, %r5
+
+# CHECK: slbia                           # encoding: [0x7c,0x00,0x03,0xe4]
+         slbia
+
+# CHECK: tlbsync                         # encoding: [0x7c,0x00,0x04,0x6c]
+         tlbsync
+
+# CHECK: tlbiel 4                        # encoding: [0x7c,0x00,0x22,0x24]
+         tlbiel %r4
+
+# CHECK: tlbie 4,0                       # encoding: [0x7c,0x00,0x22,0x64]
+         tlbie %r4, 0
+
+# CHECK: tlbie 4,0                       # encoding: [0x7c,0x00,0x22,0x64]
+         tlbie %r4
+
diff --git a/test/MC/PowerPC/ppc64-encoding-fp.s b/test/MC/PowerPC/ppc64-encoding-fp.s
index ae0e286..f9bdee1 100644
--- a/test/MC/PowerPC/ppc64-encoding-fp.s
+++ b/test/MC/PowerPC/ppc64-encoding-fp.s
@@ -65,8 +65,10 @@
          fnabs 2, 3
 # CHECK: fnabs. 2, 3                     # encoding: [0xfc,0x40,0x19,0x11]
          fnabs. 2, 3
-# FIXME: fcpsgn 2, 3
-# FIXME: fcpsgn. 2, 3
+# CHECK: fcpsgn 2, 3, 4                  # encoding: [0xfc,0x43,0x20,0x10]
+         fcpsgn 2, 3, 4
+# CHECK: fcpsgn. 2, 3, 4                 # encoding: [0xfc,0x43,0x20,0x11]
+         fcpsgn. 2, 3, 4
 
 # Floating-point arithmetic instructions
 
@@ -171,8 +173,10 @@
 # CHECK: frsp. 2, 3                      # encoding: [0xfc,0x40,0x18,0x19]
          frsp. 2, 3
 
-# FIXME: fctid 2, 3
-# FIXME: fctid. 2, 3
+# CHECK: fctid 2, 3                      # encoding: [0xfc,0x40,0x1e,0x5c]
+         fctid 2, 3
+# CHECK: fctid. 2, 3                     # encoding: [0xfc,0x40,0x1e,0x5d]
+         fctid. 2, 3
 # CHECK: fctidz 2, 3                     # encoding: [0xfc,0x40,0x1e,0x5e]
          fctidz 2, 3
 # CHECK: fctidz. 2, 3                    # encoding: [0xfc,0x40,0x1e,0x5f]
@@ -183,8 +187,10 @@
          fctiduz 2, 3
 # CHECK: fctiduz. 2, 3                   # encoding: [0xfc,0x40,0x1f,0x5f]
          fctiduz. 2, 3
-# FIXME: fctiw 2, 3
-# FIXME: fctiw. 2, 3
+# CHECK: fctiw 2, 3                      # encoding: [0xfc,0x40,0x18,0x1c]
+         fctiw 2, 3
+# CHECK: fctiw. 2, 3                     # encoding: [0xfc,0x40,0x18,0x1d]
+         fctiw. 2, 3
 # CHECK: fctiwz 2, 3                     # encoding: [0xfc,0x40,0x18,0x1e]
          fctiwz 2, 3
 # CHECK: fctiwz. 2, 3                    # encoding: [0xfc,0x40,0x18,0x1f]
diff --git a/test/MC/PowerPC/ppc64-errors.s b/test/MC/PowerPC/ppc64-errors.s
index bc8c95c..53197ba 100644
--- a/test/MC/PowerPC/ppc64-errors.s
+++ b/test/MC/PowerPC/ppc64-errors.s
@@ -96,3 +96,6 @@
 # CHECK-NEXT: ld 1, 32768(2)
               ld 1, 32768(2)
 
+# CHECK: error: invalid modifier 'got' (no symbols present)
+         addi 4, 3, 123@got
+# CHECK-NEXT: addi 4, 3, 123@got
diff --git a/test/MC/PowerPC/ppc64-fixup-explicit.s b/test/MC/PowerPC/ppc64-fixup-explicit.s
index 217e057..7c56fe8 100644
--- a/test/MC/PowerPC/ppc64-fixup-explicit.s
+++ b/test/MC/PowerPC/ppc64-fixup-explicit.s
@@ -2,7 +2,7 @@
 # RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck %s
 
 # RUN: llvm-mc -triple powerpc64-unknown-unknown -filetype=obj %s | \
-# RUN: llvm-readobj -r | FileCheck %s -check-prefix=REL
+# RUN: llvm-readobj -r | FileCheck %s -check-prefix=CHECK-REL
 
 # GOT references must result in explicit relocations
 # even if the target symbol is local.
diff --git a/test/MC/PowerPC/ppc64-fixups.s b/test/MC/PowerPC/ppc64-fixups.s
index 56f99d8..a075066 100644
--- a/test/MC/PowerPC/ppc64-fixups.s
+++ b/test/MC/PowerPC/ppc64-fixups.s
@@ -2,7 +2,7 @@
 # RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck %s
 
 # RUN: llvm-mc -triple powerpc64-unknown-unknown -filetype=obj %s | \
-# RUN: llvm-readobj -r | FileCheck %s -check-prefix=REL
+# RUN: llvm-readobj -r | FileCheck %s -check-prefix=CHECK-REL
 
 # CHECK: b target                        # encoding: [0b010010AA,A,A,0bAAAAAA00]
 # CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_br24
@@ -442,3 +442,7 @@ base:
 # CHECK-REL: 0x{{[0-9A-F]*[08]}} R_PPC64_DTPREL64 target 0x0
 	.quad target@dtprel
 
+# Constant fixup
+        ori 1, 2, 131071@l
+# CHECK: ori 1, 2, 131071@l              # encoding: [0x60,0x41,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: 131071@l, kind: fixup_ppc_half16
diff --git a/test/MC/PowerPC/ppc64-operands.s b/test/MC/PowerPC/ppc64-operands.s
index cb96fd4..fc1cbeb 100644
--- a/test/MC/PowerPC/ppc64-operands.s
+++ b/test/MC/PowerPC/ppc64-operands.s
@@ -108,3 +108,8 @@
 # CHECK: beqa 0, 1024                    # encoding: [0x41,0x82,0x04,0x02]
          beqa 1024
 
+# CHECK:                                 # encoding: [0x42,0x9f,A,0bAAAAAA01]
+         bcl 20, 31, $+4
+
+# CHECK:                                 # encoding: [0x42,0x00,A,0bAAAAAA00]
+         bdnz $-8
diff --git a/test/MC/PowerPC/tls-gd-obj.s b/test/MC/PowerPC/tls-gd-obj.s
new file mode 100644
index 0000000..63d47ee
--- /dev/null
+++ b/test/MC/PowerPC/tls-gd-obj.s
@@ -0,0 +1,56 @@
+// RUN: llvm-mc -triple=powerpc64-pc-linux -filetype=obj %s -o - | \
+// RUN: llvm-readobj -r | FileCheck %s
+
+// Test correct relocation generation for thread-local storage using
+// the general dynamic model and integrated assembly.
+
+
+	.file	"/home/espindola/llvm/llvm/test/CodeGen/PowerPC/tls-gd-obj.ll"
+	.text
+	.globl	main
+	.align	2
+	.type	main,@function
+	.section	.opd,"aw",@progbits
+main:                                   # @main
+	.align	3
+	.quad	.L.main
+	.quad	.TOC.@tocbase
+	.quad	0
+	.text
+.L.main:
+# BB#0:                                 # %entry
+	addis 3, 2, a@got@tlsgd@ha
+	addi 3, 3, a@got@tlsgd@l
+	li 4, 0
+	bl __tls_get_addr(a@tlsgd)
+	nop
+	stw 4, -4(1)
+	lwz 4, 0(3)
+	extsw 3, 4
+	blr
+	.long	0
+	.quad	0
+.Ltmp0:
+	.size	main, .Ltmp0-.L.main
+
+	.type	a,@object               # @a
+	.section	.tbss,"awT",@nobits
+	.globl	a
+	.align	2
+a:
+	.long	0                       # 0x0
+	.size	a, 4
+
+
+// Verify generation of R_PPC64_GOT_TLSGD16_HA, R_PPC64_GOT_TLSGD16_LO,
+// and R_PPC64_TLSGD for accessing external variable a, and R_PPC64_REL24
+// for the call to __tls_get_addr.
+//
+// CHECK: Relocations [
+// CHECK:   Section (2) .rela.text {
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSGD16_HA a
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSGD16_LO a
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TLSGD          a
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_REL24          __tls_get_addr
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/PowerPC/tls-ie-obj.s b/test/MC/PowerPC/tls-ie-obj.s
new file mode 100644
index 0000000..c8c5d91
--- /dev/null
+++ b/test/MC/PowerPC/tls-ie-obj.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=powerpc64-pc-linux -filetype=obj %s -o - | \
+// RUN: llvm-readobj -r | FileCheck %s
+
+// Test correct relocation generation for thread-local storage
+// using the initial-exec model and integrated assembly.
+
+	.file	"/home/espindola/llvm/llvm/test/CodeGen/PowerPC/tls-ie-obj.ll"
+	.text
+	.globl	main
+	.align	2
+	.type	main,@function
+	.section	.opd,"aw",@progbits
+main:                                   # @main
+	.align	3
+	.quad	.L.main
+	.quad	.TOC.@tocbase
+	.quad	0
+	.text
+.L.main:
+# BB#0:                                 # %entry
+	li 3, 0
+	addis 4, 2, a@got@tprel@ha
+	ld 4, a@got@tprel@l(4)
+	add 4, 4, a@tls
+	stw 3, -4(1)
+	lwz 3, 0(4)
+	extsw 3, 3
+	blr
+	.long	0
+	.quad	0
+.Ltmp0:
+	.size	main, .Ltmp0-.L.main
+
+
+// Verify generation of R_PPC64_GOT_TPREL16_DS and R_PPC64_TLS for
+// accessing external variable a.
+//
+// CHECK: Relocations [
+// CHECK:   Section (2) .rela.text {
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TPREL16_HA    a
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TPREL16_LO_DS a
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TLS               a
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/PowerPC/tls-ld-obj.s b/test/MC/PowerPC/tls-ld-obj.s
new file mode 100644
index 0000000..b0c4a7a
--- /dev/null
+++ b/test/MC/PowerPC/tls-ld-obj.s
@@ -0,0 +1,61 @@
+// RUN: llvm-mc -triple=powerpc64-pc-linux -filetype=obj %s -o - | \
+// RUN: llvm-readobj -r | FileCheck %s
+
+// Test correct relocation generation for thread-local storage using
+// the local dynamic model.
+
+	.file	"/home/espindola/llvm/llvm/test/CodeGen/PowerPC/tls-ld-obj.ll"
+	.text
+	.globl	main
+	.align	2
+	.type	main,@function
+	.section	.opd,"aw",@progbits
+main:                                   # @main
+	.align	3
+	.quad	.L.main
+	.quad	.TOC.@tocbase
+	.quad	0
+	.text
+.L.main:
+# BB#0:                                 # %entry
+	addis 3, 2, a@got@tlsld@ha
+	addi 3, 3, a@got@tlsld@l
+	li 4, 0
+	bl __tls_get_addr(a@tlsld)
+	nop
+	stw 4, -4(1)
+	addis 3, 3, a@dtprel@ha
+	addi 3, 3, a@dtprel@l
+	lwz 4, 0(3)
+	extsw 3, 4
+	blr
+	.long	0
+	.quad	0
+.Ltmp0:
+	.size	main, .Ltmp0-.L.main
+
+	.hidden	a                       # @a
+	.type	a,@object
+	.section	.tbss,"awT",@nobits
+	.globl	a
+	.align	2
+a:
+	.long	0                       # 0x0
+	.size	a, 4
+
+
+// Verify generation of R_PPC64_GOT_TLSLD16_HA, R_PPC64_GOT_TLSLD16_LO,
+// R_PPC64_TLSLD, R_PPC64_DTPREL16_HA, and R_PPC64_DTPREL16_LO for
+// accessing external variable a, and R_PPC64_REL24 for the call to
+// __tls_get_addr.
+//
+// CHECK: Relocations [
+// CHECK:   Section (2) .rela.text {
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSLD16_HA a
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSLD16_LO a
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TLSLD          a
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_REL24          __tls_get_addr
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_DTPREL16_HA    a
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_DTPREL16_LO    a
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/SystemZ/insn-bad-z196.s b/test/MC/SystemZ/insn-bad-z196.s
index ec90c89..089d9b5 100644
--- a/test/MC/SystemZ/insn-bad-z196.s
+++ b/test/MC/SystemZ/insn-bad-z196.s
@@ -25,6 +25,134 @@
 	ahik	%r0, %r1, foo
 
 #CHECK: error: invalid operand
+#CHECK: aih	%r0, (-1 << 31) - 1
+#CHECK: error: invalid operand
+#CHECK: aih	%r0, (1 << 31)
+
+	aih	%r0, (-1 << 31) - 1
+	aih	%r0, (1 << 31)
+
+#CHECK: error: invalid operand
+#CHECK: chf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: chf	%r0, 524288
+
+	chf	%r0, -524289
+	chf	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: cih	%r0, (-1 << 31) - 1
+#CHECK: error: invalid operand
+#CHECK: cih	%r0, (1 << 31)
+
+	cih	%r0, (-1 << 31) - 1
+	cih	%r0, (1 << 31)
+
+#CHECK: error: invalid operand
+#CHECK: clhf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: clhf	%r0, 524288
+
+	clhf	%r0, -524289
+	clhf	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: clih	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: clih	%r0, (1 << 32)
+
+	clih	%r0, -1
+	clih	%r0, (1 << 32)
+
+#CHECK: error: invalid operand
+#CHECK: fidbra	%f0, 0, %f0, -1
+#CHECK: error: invalid operand
+#CHECK: fidbra	%f0, 0, %f0, 16
+#CHECK: error: invalid operand
+#CHECK: fidbra	%f0, -1, %f0, 0
+#CHECK: error: invalid operand
+#CHECK: fidbra	%f0, 16, %f0, 0
+
+	fidbra	%f0, 0, %f0, -1
+	fidbra	%f0, 0, %f0, 16
+	fidbra	%f0, -1, %f0, 0
+	fidbra	%f0, 16, %f0, 0
+
+#CHECK: error: invalid operand
+#CHECK: fiebra	%f0, 0, %f0, -1
+#CHECK: error: invalid operand
+#CHECK: fiebra	%f0, 0, %f0, 16
+#CHECK: error: invalid operand
+#CHECK: fiebra	%f0, -1, %f0, 0
+#CHECK: error: invalid operand
+#CHECK: fiebra	%f0, 16, %f0, 0
+
+	fiebra	%f0, 0, %f0, -1
+	fiebra	%f0, 0, %f0, 16
+	fiebra	%f0, -1, %f0, 0
+	fiebra	%f0, 16, %f0, 0
+
+#CHECK: error: invalid operand
+#CHECK: fixbra	%f0, 0, %f0, -1
+#CHECK: error: invalid operand
+#CHECK: fixbra	%f0, 0, %f0, 16
+#CHECK: error: invalid operand
+#CHECK: fixbra	%f0, -1, %f0, 0
+#CHECK: error: invalid operand
+#CHECK: fixbra	%f0, 16, %f0, 0
+#CHECK: error: invalid register pair
+#CHECK: fixbra	%f0, 0, %f2, 0
+#CHECK: error: invalid register pair
+#CHECK: fixbra	%f2, 0, %f0, 0
+
+	fixbra	%f0, 0, %f0, -1
+	fixbra	%f0, 0, %f0, 16
+	fixbra	%f0, -1, %f0, 0
+	fixbra	%f0, 16, %f0, 0
+	fixbra	%f0, 0, %f2, 0
+	fixbra	%f2, 0, %f0, 0
+
+#CHECK: error: invalid operand
+#CHECK: lbh	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lbh	%r0, 524288
+
+	lbh	%r0, -524289
+	lbh	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: lfh	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lfh	%r0, 524288
+
+	lfh	%r0, -524289
+	lfh	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: lhh	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lhh	%r0, 524288
+
+	lhh	%r0, -524289
+	lhh	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: llch	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: llch	%r0, 524288
+
+	llch	%r0, -524289
+	llch	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: llhh	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: llhh	%r0, 524288
+
+	llhh	%r0, -524289
+	llhh	%r0, 524288
+
+#CHECK: error: invalid operand
 #CHECK: loc	%r0,0,-1
 #CHECK: error: invalid operand
 #CHECK: loc	%r0,0,16
@@ -157,6 +285,30 @@
 	srlk	%r0,%r0,0(%r1,%r2)
 
 #CHECK: error: invalid operand
+#CHECK: stch	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: stch	%r0, 524288
+
+	stch	%r0, -524289
+	stch	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: sthh	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: sthh	%r0, 524288
+
+	sthh	%r0, -524289
+	sthh	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: stfh	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: stfh	%r0, 524288
+
+	stfh	%r0, -524289
+	stfh	%r0, 524288
+
+#CHECK: error: invalid operand
 #CHECK: stoc	%r0,0,-1
 #CHECK: error: invalid operand
 #CHECK: stoc	%r0,0,16
diff --git a/test/MC/SystemZ/insn-bad.s b/test/MC/SystemZ/insn-bad.s
index b730637..2a3fb98 100644
--- a/test/MC/SystemZ/insn-bad.s
+++ b/test/MC/SystemZ/insn-bad.s
@@ -128,6 +128,11 @@
 	ahy	%r0, -524289
 	ahy	%r0, 524288
 
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: aih	%r0, 0
+
+	aih	%r0, 0
+
 #CHECK: error: invalid operand
 #CHECK: al	%r0, -1
 #CHECK: error: invalid operand
@@ -251,6 +256,14 @@
 	ay	%r0, -524289
 	ay	%r0, 524288
 
+#CHECK: error: invalid operand
+#CHECK: bcr	-1, %r1
+#CHECK: error: invalid operand
+#CHECK: bcr	16, %r1
+
+	bcr	-1, %r1
+	bcr	16, %r1
+
 #CHECK: error: offset out of range
 #CHECK: bras	%r0, -0x100002
 #CHECK: error: offset out of range
@@ -607,6 +620,11 @@
 	ch	%r0, -1
 	ch	%r0, 4096
 
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: chf	%r0, 0
+
+	chf	%r0, 0
+
 #CHECK: error: invalid operand
 #CHECK: chhsi	-1, 0
 #CHECK: error: invalid operand
@@ -674,6 +692,11 @@
 	chy	%r0, -524289
 	chy	%r0, 524288
 
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: cih	%r0, 0
+
+	cih	%r0, 0
+
 #CHECK: error: invalid operand
 #CHECK: cij	%r0, -129, 0, 0
 #CHECK: error: invalid operand
@@ -712,6 +735,55 @@
 	cl	%r0, -1
 	cl	%r0, 4096
 
+#CHECK: error: missing length in address
+#CHECK: clc	0, 0
+#CHECK: error: missing length in address
+#CHECK: clc	0(%r1), 0(%r1)
+#CHECK: error: invalid use of length addressing
+#CHECK: clc	0(1,%r1), 0(2,%r1)
+#CHECK: error: invalid operand
+#CHECK: clc	0(0,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: clc	0(257,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: clc	-1(1,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: clc	4096(1,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: clc	0(1,%r1), -1(%r1)
+#CHECK: error: invalid operand
+#CHECK: clc	0(1,%r1), 4096(%r1)
+#CHECK: error: %r0 used in an address
+#CHECK: clc	0(1,%r0), 0(%r1)
+#CHECK: error: %r0 used in an address
+#CHECK: clc	0(1,%r1), 0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: clc	0(%r1,%r2), 0(%r1)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: clc	0(1,%r2), 0(%r1,%r2)
+#CHECK: error: unknown token in expression
+#CHECK: clc	0(-), 0
+
+	clc	0, 0
+	clc	0(%r1), 0(%r1)
+	clc	0(1,%r1), 0(2,%r1)
+	clc	0(0,%r1), 0(%r1)
+	clc	0(257,%r1), 0(%r1)
+	clc	-1(1,%r1), 0(%r1)
+	clc	4096(1,%r1), 0(%r1)
+	clc	0(1,%r1), -1(%r1)
+	clc	0(1,%r1), 4096(%r1)
+	clc	0(1,%r0), 0(%r1)
+	clc	0(1,%r1), 0(%r0)
+	clc	0(%r1,%r2), 0(%r1)
+	clc	0(1,%r2), 0(%r1,%r2)
+	clc	0(-), 0
+
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: clhf	%r0, 0
+
+	clhf	%r0, 0
+
 #CHECK: error: invalid operand
 #CHECK: clfhsi	-1, 0
 #CHECK: error: invalid operand
@@ -806,6 +878,50 @@
 	clghsi	0, -1
 	clghsi	0, 65536
 
+#CHECK: error: invalid operand
+#CHECK: clgij	%r0, -1, 0, 0
+#CHECK: error: invalid operand
+#CHECK: clgij	%r0, 256, 0, 0
+
+	clgij	%r0, -1, 0, 0
+	clgij	%r0, 256, 0, 0
+
+#CHECK: error: offset out of range
+#CHECK: clgij	%r0, 0, 0, -0x100002
+#CHECK: error: offset out of range
+#CHECK: clgij	%r0, 0, 0, -1
+#CHECK: error: offset out of range
+#CHECK: clgij	%r0, 0, 0, 1
+#CHECK: error: offset out of range
+#CHECK: clgij	%r0, 0, 0, 0x10000
+
+	clgij	%r0, 0, 0, -0x100002
+	clgij	%r0, 0, 0, -1
+	clgij	%r0, 0, 0, 1
+	clgij	%r0, 0, 0, 0x10000
+
+#CHECK: error: invalid instruction
+#CHECK:	clgijo	%r0, 0, 0, 0
+#CHECK: error: invalid instruction
+#CHECK:	clgijno	%r0, 0, 0, 0
+
+	clgijo	%r0, 0, 0, 0
+	clgijno	%r0, 0, 0, 0
+
+#CHECK: error: offset out of range
+#CHECK: clgrj	%r0, %r0, 0, -0x100002
+#CHECK: error: offset out of range
+#CHECK: clgrj	%r0, %r0, 0, -1
+#CHECK: error: offset out of range
+#CHECK: clgrj	%r0, %r0, 0, 1
+#CHECK: error: offset out of range
+#CHECK: clgrj	%r0, %r0, 0, 0x10000
+
+	clgrj	%r0, %r0, 0, -0x100002
+	clgrj	%r0, %r0, 0, -1
+	clgrj	%r0, %r0, 0, 1
+	clgrj	%r0, %r0, 0, 0x10000
+
 #CHECK: error: offset out of range
 #CHECK: clgrl	%r0, -0x1000000002
 #CHECK: error: offset out of range
@@ -868,6 +984,41 @@
 	cli	0, -1
 	cli	0, 256
 
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: clih	%r0, 0
+
+	clih	%r0, 0
+
+#CHECK: error: invalid operand
+#CHECK: clij	%r0, -1, 0, 0
+#CHECK: error: invalid operand
+#CHECK: clij	%r0, 256, 0, 0
+
+	clij	%r0, -1, 0, 0
+	clij	%r0, 256, 0, 0
+
+#CHECK: error: offset out of range
+#CHECK: clij	%r0, 0, 0, -0x100002
+#CHECK: error: offset out of range
+#CHECK: clij	%r0, 0, 0, -1
+#CHECK: error: offset out of range
+#CHECK: clij	%r0, 0, 0, 1
+#CHECK: error: offset out of range
+#CHECK: clij	%r0, 0, 0, 0x10000
+
+	clij	%r0, 0, 0, -0x100002
+	clij	%r0, 0, 0, -1
+	clij	%r0, 0, 0, 1
+	clij	%r0, 0, 0, 0x10000
+
+#CHECK: error: invalid instruction
+#CHECK:	clijo	%r0, 0, 0, 0
+#CHECK: error: invalid instruction
+#CHECK:	clijno	%r0, 0, 0, 0
+
+	clijo	%r0, 0, 0, 0
+	clijno	%r0, 0, 0, 0
+
 #CHECK: error: invalid operand
 #CHECK: cliy	-524289, 0
 #CHECK: error: invalid operand
@@ -886,6 +1037,28 @@
 	cliy	0, 256
 
 #CHECK: error: offset out of range
+#CHECK: clrj	%r0, %r0, 0, -0x100002
+#CHECK: error: offset out of range
+#CHECK: clrj	%r0, %r0, 0, -1
+#CHECK: error: offset out of range
+#CHECK: clrj	%r0, %r0, 0, 1
+#CHECK: error: offset out of range
+#CHECK: clrj	%r0, %r0, 0, 0x10000
+
+	clrj	%r0, %r0, 0, -0x100002
+	clrj	%r0, %r0, 0, -1
+	clrj	%r0, %r0, 0, 1
+	clrj	%r0, %r0, 0, 0x10000
+
+#CHECK: error: invalid instruction
+#CHECK:	clrjo	%r0, %r0, 0, 0
+#CHECK: error: invalid instruction
+#CHECK:	clrjno	%r0, %r0, 0, 0
+
+	clrjo	%r0, %r0, 0, 0
+	clrjno	%r0, %r0, 0, 0
+
+#CHECK: error: offset out of range
 #CHECK: clrl	%r0, -0x1000000002
 #CHECK: error: offset out of range
 #CHECK: clrl	%r0, -1
@@ -1098,6 +1271,11 @@
 	fidbr	%f0, -1, %f0
 	fidbr	%f0, 16, %f0
 
+#CHECK: error: {{(instruction requires: fp-extension)?}}
+#CHECK: fidbra	%f0, 0, %f0, 0
+
+	fidbra	%f0, 0, %f0, 0
+
 #CHECK: error: invalid operand
 #CHECK: fiebr	%f0, -1, %f0
 #CHECK: error: invalid operand
@@ -1106,6 +1284,11 @@
 	fiebr	%f0, -1, %f0
 	fiebr	%f0, 16, %f0
 
+#CHECK: error: {{(instruction requires: fp-extension)?}}
+#CHECK: fiebra	%f0, 0, %f0, 0
+
+	fiebra	%f0, 0, %f0, 0
+
 #CHECK: error: invalid operand
 #CHECK: fixbr	%f0, -1, %f0
 #CHECK: error: invalid operand
@@ -1120,6 +1303,11 @@
 	fixbr	%f0, 0, %f2
 	fixbr	%f2, 0, %f0
 
+#CHECK: error: {{(instruction requires: fp-extension)?}}
+#CHECK: fixbra	%f0, 0, %f0, 0
+
+	fixbra	%f0, 0, %f0, 0
+
 #CHECK: error: invalid register pair
 #CHECK: flogr	%r1, %r0
 
@@ -1235,6 +1423,11 @@
 	lb	%r0, -524289
 	lb	%r0, 524288
 
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: lbh	%r0, 0
+
+	lbh	%r0, 0
+
 #CHECK: error: invalid register pair
 #CHECK: lcxbr	%f0, %f2
 #CHECK: error: invalid register pair
@@ -1299,6 +1492,11 @@
 	ley	%f0, -524289
 	ley	%f0, 524288
 
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: lfh	%r0, 0
+
+	lfh	%r0, 0
+
 #CHECK: error: invalid operand
 #CHECK: lg	%r0, -524289
 #CHECK: error: invalid operand
@@ -1400,6 +1598,11 @@
 	lh	%r0, -1
 	lh	%r0, 4096
 
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: lhh	%r0, 0
+
+	lhh	%r0, 0
+
 #CHECK: error: invalid operand
 #CHECK: lhi	%r0, -32769
 #CHECK: error: invalid operand
@@ -1441,6 +1644,11 @@
 	llc	%r0, -524289
 	llc	%r0, 524288
 
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: llch	%r0, 0
+
+	llch	%r0, 0
+
 #CHECK: error: invalid operand
 #CHECK: llgc	%r0, -524289
 #CHECK: error: invalid operand
@@ -1501,6 +1709,11 @@
 	llh	%r0, -524289
 	llh	%r0, 524288
 
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: llhh	%r0, 0
+
+	llhh	%r0, 0
+
 #CHECK: error: offset out of range
 #CHECK: llhrl	%r0, -0x1000000002
 #CHECK: error: offset out of range
@@ -1992,6 +2205,50 @@
 	n	%r0, -1
 	n	%r0, 4096
 
+#CHECK: error: missing length in address
+#CHECK: nc	0, 0
+#CHECK: error: missing length in address
+#CHECK: nc	0(%r1), 0(%r1)
+#CHECK: error: invalid use of length addressing
+#CHECK: nc	0(1,%r1), 0(2,%r1)
+#CHECK: error: invalid operand
+#CHECK: nc	0(0,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: nc	0(257,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: nc	-1(1,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: nc	4096(1,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: nc	0(1,%r1), -1(%r1)
+#CHECK: error: invalid operand
+#CHECK: nc	0(1,%r1), 4096(%r1)
+#CHECK: error: %r0 used in an address
+#CHECK: nc	0(1,%r0), 0(%r1)
+#CHECK: error: %r0 used in an address
+#CHECK: nc	0(1,%r1), 0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: nc	0(%r1,%r2), 0(%r1)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: nc	0(1,%r2), 0(%r1,%r2)
+#CHECK: error: unknown token in expression
+#CHECK: nc	0(-), 0
+
+	nc	0, 0
+	nc	0(%r1), 0(%r1)
+	nc	0(1,%r1), 0(2,%r1)
+	nc	0(0,%r1), 0(%r1)
+	nc	0(257,%r1), 0(%r1)
+	nc	-1(1,%r1), 0(%r1)
+	nc	4096(1,%r1), 0(%r1)
+	nc	0(1,%r1), -1(%r1)
+	nc	0(1,%r1), 4096(%r1)
+	nc	0(1,%r0), 0(%r1)
+	nc	0(1,%r1), 0(%r0)
+	nc	0(%r1,%r2), 0(%r1)
+	nc	0(1,%r2), 0(%r1,%r2)
+	nc	0(-), 0
+
 #CHECK: error: invalid operand
 #CHECK: ng	%r0, -524289
 #CHECK: error: invalid operand
@@ -2108,6 +2365,50 @@
 	o	%r0, -1
 	o	%r0, 4096
 
+#CHECK: error: missing length in address
+#CHECK: oc	0, 0
+#CHECK: error: missing length in address
+#CHECK: oc	0(%r1), 0(%r1)
+#CHECK: error: invalid use of length addressing
+#CHECK: oc	0(1,%r1), 0(2,%r1)
+#CHECK: error: invalid operand
+#CHECK: oc	0(0,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: oc	0(257,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: oc	-1(1,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: oc	4096(1,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: oc	0(1,%r1), -1(%r1)
+#CHECK: error: invalid operand
+#CHECK: oc	0(1,%r1), 4096(%r1)
+#CHECK: error: %r0 used in an address
+#CHECK: oc	0(1,%r0), 0(%r1)
+#CHECK: error: %r0 used in an address
+#CHECK: oc	0(1,%r1), 0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: oc	0(%r1,%r2), 0(%r1)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: oc	0(1,%r2), 0(%r1,%r2)
+#CHECK: error: unknown token in expression
+#CHECK: oc	0(-), 0
+
+	oc	0, 0
+	oc	0(%r1), 0(%r1)
+	oc	0(1,%r1), 0(2,%r1)
+	oc	0(0,%r1), 0(%r1)
+	oc	0(257,%r1), 0(%r1)
+	oc	-1(1,%r1), 0(%r1)
+	oc	4096(1,%r1), 0(%r1)
+	oc	0(1,%r1), -1(%r1)
+	oc	0(1,%r1), 4096(%r1)
+	oc	0(1,%r0), 0(%r1)
+	oc	0(1,%r1), 0(%r0)
+	oc	0(%r1,%r2), 0(%r1)
+	oc	0(1,%r2), 0(%r1,%r2)
+	oc	0(-), 0
+
 #CHECK: error: invalid operand
 #CHECK: og	%r0, -524289
 #CHECK: error: invalid operand
@@ -2217,6 +2518,40 @@
 	oy	%r0, 524288
 
 #CHECK: error: invalid operand
+#CHECK: pfd	-1, 0
+#CHECK: error: invalid operand
+#CHECK: pfd	16, 0
+#CHECK: error: invalid operand
+#CHECK: pfd	1, -524289
+#CHECK: error: invalid operand
+#CHECK: pfd	1, 524288
+
+	pfd	-1, 0
+	pfd	16, 0
+	pfd	1, -524289
+	pfd	1, 524288
+
+#CHECK: error: invalid operand
+#CHECK: pfdrl	-1, 0
+#CHECK: error: invalid operand
+#CHECK: pfdrl	16, 0
+#CHECK: error: offset out of range
+#CHECK: pfdrl	1, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: pfdrl	1, -1
+#CHECK: error: offset out of range
+#CHECK: pfdrl	1, 1
+#CHECK: error: offset out of range
+#CHECK: pfdrl	1, 0x100000000
+
+	pfdrl	-1, 0
+	pfdrl	16, 0
+	pfdrl	1, -0x1000000002
+	pfdrl	1, -1
+	pfdrl	1, 1
+	pfdrl	1, 0x100000000
+
+#CHECK: error: invalid operand
 #CHECK: risbg	%r0,%r0,0,0,-1
 #CHECK: error: invalid operand
 #CHECK: risbg	%r0,%r0,0,0,64
@@ -2613,6 +2948,11 @@
 	stc	%r0, -1
 	stc	%r0, 4096
 
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: stch	%r0, 0
+
+	stch	%r0, 0
+
 #CHECK: error: invalid operand
 #CHECK: stcy	%r0, -524289
 #CHECK: error: invalid operand
@@ -2683,6 +3023,11 @@
 	sth	%r0, -1
 	sth	%r0, 4096
 
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: sthh	%r0, 0
+
+	sthh	%r0, 0
+
 #CHECK: error: offset out of range
 #CHECK: sthrl	%r0, -0x1000000002
 #CHECK: error: offset out of range
@@ -2705,6 +3050,11 @@
 	sthy	%r0, -524289
 	sthy	%r0, 524288
 
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: stfh	%r0, 0
+
+	stfh	%r0, 0
+
 #CHECK: error: invalid operand
 #CHECK: stmg	%r0, %r0, -524289
 #CHECK: error: invalid operand
@@ -2771,6 +3121,72 @@
 	sy	%r0, 524288
 
 #CHECK: error: invalid operand
+#CHECK: tm	-1, 0
+#CHECK: error: invalid operand
+#CHECK: tm	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: tm	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: tm	0, -1
+#CHECK: error: invalid operand
+#CHECK: tm	0, 256
+
+	tm	-1, 0
+	tm	4096, 0
+	tm	0(%r1,%r2), 0
+	tm	0, -1
+	tm	0, 256
+
+#CHECK: error: invalid operand
+#CHECK: tmhh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: tmhh	%r0, 0x10000
+
+	tmhh	%r0, -1
+	tmhh	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: tmhl	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: tmhl	%r0, 0x10000
+
+	tmhl	%r0, -1
+	tmhl	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: tmlh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: tmlh	%r0, 0x10000
+
+	tmlh	%r0, -1
+	tmlh	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: tmll	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: tmll	%r0, 0x10000
+
+	tmll	%r0, -1
+	tmll	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: tmy	-524289, 0
+#CHECK: error: invalid operand
+#CHECK: tmy	524288, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: tmy	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: tmy	0, -1
+#CHECK: error: invalid operand
+#CHECK: tmy	0, 256
+
+	tmy	-524289, 0
+	tmy	524288, 0
+	tmy	0(%r1,%r2), 0
+	tmy	0, -1
+	tmy	0, 256
+
+#CHECK: error: invalid operand
 #CHECK: x	%r0, -1
 #CHECK: error: invalid operand
 #CHECK: x	%r0, 4096
@@ -2778,6 +3194,50 @@
 	x	%r0, -1
 	x	%r0, 4096
 
+#CHECK: error: missing length in address
+#CHECK: xc	0, 0
+#CHECK: error: missing length in address
+#CHECK: xc	0(%r1), 0(%r1)
+#CHECK: error: invalid use of length addressing
+#CHECK: xc	0(1,%r1), 0(2,%r1)
+#CHECK: error: invalid operand
+#CHECK: xc	0(0,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: xc	0(257,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: xc	-1(1,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: xc	4096(1,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: xc	0(1,%r1), -1(%r1)
+#CHECK: error: invalid operand
+#CHECK: xc	0(1,%r1), 4096(%r1)
+#CHECK: error: %r0 used in an address
+#CHECK: xc	0(1,%r0), 0(%r1)
+#CHECK: error: %r0 used in an address
+#CHECK: xc	0(1,%r1), 0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: xc	0(%r1,%r2), 0(%r1)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: xc	0(1,%r2), 0(%r1,%r2)
+#CHECK: error: unknown token in expression
+#CHECK: xc	0(-), 0
+
+	xc	0, 0
+	xc	0(%r1), 0(%r1)
+	xc	0(1,%r1), 0(2,%r1)
+	xc	0(0,%r1), 0(%r1)
+	xc	0(257,%r1), 0(%r1)
+	xc	-1(1,%r1), 0(%r1)
+	xc	4096(1,%r1), 0(%r1)
+	xc	0(1,%r1), -1(%r1)
+	xc	0(1,%r1), 4096(%r1)
+	xc	0(1,%r0), 0(%r1)
+	xc	0(1,%r1), 0(%r0)
+	xc	0(%r1,%r2), 0(%r1)
+	xc	0(1,%r2), 0(%r1,%r2)
+	xc	0(-), 0
+
 #CHECK: error: invalid operand
 #CHECK: xg	%r0, -524289
 #CHECK: error: invalid operand
diff --git a/test/MC/SystemZ/insn-good-z196.s b/test/MC/SystemZ/insn-good-z196.s
index 5f7c277..258e06f 100644
--- a/test/MC/SystemZ/insn-good-z196.s
+++ b/test/MC/SystemZ/insn-good-z196.s
@@ -49,6 +49,20 @@
 	ahik	%r15, %r0, 0
 	ahik	%r7, %r8, -16
 
+#CHECK: aih	%r0, -2147483648        # encoding: [0xcc,0x08,0x80,0x00,0x00,0x00]
+#CHECK: aih	%r0, -1                 # encoding: [0xcc,0x08,0xff,0xff,0xff,0xff]
+#CHECK: aih	%r0, 0                  # encoding: [0xcc,0x08,0x00,0x00,0x00,0x00]
+#CHECK: aih	%r0, 1                  # encoding: [0xcc,0x08,0x00,0x00,0x00,0x01]
+#CHECK: aih	%r0, 2147483647         # encoding: [0xcc,0x08,0x7f,0xff,0xff,0xff]
+#CHECK: aih	%r15, 0                 # encoding: [0xcc,0xf8,0x00,0x00,0x00,0x00]
+
+	aih	%r0, -1 << 31
+	aih	%r0, -1
+	aih	%r0, 0
+	aih	%r0, 1
+	aih	%r0, (1 << 31) - 1
+	aih	%r15, 0
+
 #CHECK: alghsik	%r0, %r0, -32768        # encoding: [0xec,0x00,0x80,0x00,0x00,0xdb]
 #CHECK: alghsik	%r0, %r0, -1            # encoding: [0xec,0x00,0xff,0xff,0x00,0xdb]
 #CHECK: alghsik	%r0, %r0, 0             # encoding: [0xec,0x00,0x00,0x00,0x00,0xdb]
@@ -121,6 +135,226 @@
 	ark	%r15,%r0,%r0
 	ark	%r7,%r8,%r9
 
+#CHECK: chf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0xcd]
+#CHECK: chf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0xcd]
+#CHECK: chf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0xcd]
+#CHECK: chf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0xcd]
+#CHECK: chf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0xcd]
+#CHECK: chf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0xcd]
+#CHECK: chf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0xcd]
+#CHECK: chf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0xcd]
+#CHECK: chf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0xcd]
+#CHECK: chf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0xcd]
+
+	chf	%r0, -524288
+	chf	%r0, -1
+	chf	%r0, 0
+	chf	%r0, 1
+	chf	%r0, 524287
+	chf	%r0, 0(%r1)
+	chf	%r0, 0(%r15)
+	chf	%r0, 524287(%r1,%r15)
+	chf	%r0, 524287(%r15,%r1)
+	chf	%r15, 0
+
+#CHECK: cih	%r0, -2147483648        # encoding: [0xcc,0x0d,0x80,0x00,0x00,0x00]
+#CHECK: cih	%r0, -1                 # encoding: [0xcc,0x0d,0xff,0xff,0xff,0xff]
+#CHECK: cih	%r0, 0                  # encoding: [0xcc,0x0d,0x00,0x00,0x00,0x00]
+#CHECK: cih	%r0, 1                  # encoding: [0xcc,0x0d,0x00,0x00,0x00,0x01]
+#CHECK: cih	%r0, 2147483647         # encoding: [0xcc,0x0d,0x7f,0xff,0xff,0xff]
+#CHECK: cih	%r15, 0                 # encoding: [0xcc,0xfd,0x00,0x00,0x00,0x00]
+
+	cih	%r0, -1 << 31
+	cih	%r0, -1
+	cih	%r0, 0
+	cih	%r0, 1
+	cih	%r0, (1 << 31) - 1
+	cih	%r15, 0
+
+#CHECK: clhf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0xcf]
+#CHECK: clhf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0xcf]
+#CHECK: clhf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0xcf]
+#CHECK: clhf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0xcf]
+#CHECK: clhf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0xcf]
+#CHECK: clhf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0xcf]
+#CHECK: clhf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0xcf]
+#CHECK: clhf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0xcf]
+#CHECK: clhf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0xcf]
+#CHECK: clhf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0xcf]
+
+	clhf	%r0, -524288
+	clhf	%r0, -1
+	clhf	%r0, 0
+	clhf	%r0, 1
+	clhf	%r0, 524287
+	clhf	%r0, 0(%r1)
+	clhf	%r0, 0(%r15)
+	clhf	%r0, 524287(%r1,%r15)
+	clhf	%r0, 524287(%r15,%r1)
+	clhf	%r15, 0
+
+#CHECK: clih	%r0, 0                  # encoding: [0xcc,0x0f,0x00,0x00,0x00,0x00]
+#CHECK: clih	%r0, 1                  # encoding: [0xcc,0x0f,0x00,0x00,0x00,0x01]
+#CHECK: clih	%r0, 4294967295         # encoding: [0xcc,0x0f,0xff,0xff,0xff,0xff]
+#CHECK: clih	%r15, 0                 # encoding: [0xcc,0xff,0x00,0x00,0x00,0x00]
+
+	clih	%r0, 0
+	clih	%r0, 1
+	clih	%r0, (1 << 32) - 1
+	clih	%r15, 0
+
+#CHECK: fidbra	%f0, 0, %f0, 0          # encoding: [0xb3,0x5f,0x00,0x00]
+#CHECK: fidbra	%f0, 0, %f0, 15         # encoding: [0xb3,0x5f,0x0f,0x00]
+#CHECK: fidbra	%f0, 0, %f15, 0         # encoding: [0xb3,0x5f,0x00,0x0f]
+#CHECK: fidbra	%f0, 15, %f0, 0         # encoding: [0xb3,0x5f,0xf0,0x00]
+#CHECK: fidbra	%f4, 5, %f6, 7          # encoding: [0xb3,0x5f,0x57,0x46]
+#CHECK: fidbra	%f15, 0, %f0, 0         # encoding: [0xb3,0x5f,0x00,0xf0]
+
+	fidbra	%f0, 0, %f0, 0
+	fidbra	%f0, 0, %f0, 15
+	fidbra	%f0, 0, %f15, 0
+	fidbra	%f0, 15, %f0, 0
+	fidbra	%f4, 5, %f6, 7
+	fidbra	%f15, 0, %f0, 0
+
+#CHECK: fiebra	%f0, 0, %f0, 0          # encoding: [0xb3,0x57,0x00,0x00]
+#CHECK: fiebra	%f0, 0, %f0, 15         # encoding: [0xb3,0x57,0x0f,0x00]
+#CHECK: fiebra	%f0, 0, %f15, 0         # encoding: [0xb3,0x57,0x00,0x0f]
+#CHECK: fiebra	%f0, 15, %f0, 0         # encoding: [0xb3,0x57,0xf0,0x00]
+#CHECK: fiebra	%f4, 5, %f6, 7          # encoding: [0xb3,0x57,0x57,0x46]
+#CHECK: fiebra	%f15, 0, %f0, 0         # encoding: [0xb3,0x57,0x00,0xf0]
+
+	fiebra	%f0, 0, %f0, 0
+	fiebra	%f0, 0, %f0, 15
+	fiebra	%f0, 0, %f15, 0
+	fiebra	%f0, 15, %f0, 0
+	fiebra	%f4, 5, %f6, 7
+	fiebra	%f15, 0, %f0, 0
+
+#CHECK: fixbra	%f0, 0, %f0, 0          # encoding: [0xb3,0x47,0x00,0x00]
+#CHECK: fixbra	%f0, 0, %f0, 15         # encoding: [0xb3,0x47,0x0f,0x00]
+#CHECK: fixbra	%f0, 0, %f13, 0         # encoding: [0xb3,0x47,0x00,0x0d]
+#CHECK: fixbra	%f0, 15, %f0, 0         # encoding: [0xb3,0x47,0xf0,0x00]
+#CHECK: fixbra	%f4, 5, %f8, 9          # encoding: [0xb3,0x47,0x59,0x48]
+#CHECK: fixbra	%f13, 0, %f0, 0         # encoding: [0xb3,0x47,0x00,0xd0]
+
+	fixbra	%f0, 0, %f0, 0
+	fixbra	%f0, 0, %f0, 15
+	fixbra	%f0, 0, %f13, 0
+	fixbra	%f0, 15, %f0, 0
+	fixbra	%f4, 5, %f8, 9
+	fixbra	%f13, 0, %f0, 0
+
+#CHECK: lbh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0xc0]
+#CHECK: lbh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0xc0]
+#CHECK: lbh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0xc0]
+#CHECK: lbh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0xc0]
+#CHECK: lbh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0xc0]
+#CHECK: lbh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0xc0]
+#CHECK: lbh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0xc0]
+#CHECK: lbh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0xc0]
+#CHECK: lbh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0xc0]
+#CHECK: lbh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0xc0]
+
+	lbh	%r0, -524288
+	lbh	%r0, -1
+	lbh	%r0, 0
+	lbh	%r0, 1
+	lbh	%r0, 524287
+	lbh	%r0, 0(%r1)
+	lbh	%r0, 0(%r15)
+	lbh	%r0, 524287(%r1,%r15)
+	lbh	%r0, 524287(%r15,%r1)
+	lbh	%r15, 0
+
+#CHECK: lfh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0xca]
+#CHECK: lfh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0xca]
+#CHECK: lfh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0xca]
+#CHECK: lfh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0xca]
+#CHECK: lfh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0xca]
+#CHECK: lfh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0xca]
+#CHECK: lfh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0xca]
+#CHECK: lfh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0xca]
+#CHECK: lfh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0xca]
+#CHECK: lfh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0xca]
+
+	lfh	%r0, -524288
+	lfh	%r0, -1
+	lfh	%r0, 0
+	lfh	%r0, 1
+	lfh	%r0, 524287
+	lfh	%r0, 0(%r1)
+	lfh	%r0, 0(%r15)
+	lfh	%r0, 524287(%r1,%r15)
+	lfh	%r0, 524287(%r15,%r1)
+	lfh	%r15, 0
+
+#CHECK: lhh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0xc4]
+#CHECK: lhh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0xc4]
+#CHECK: lhh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0xc4]
+#CHECK: lhh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0xc4]
+#CHECK: lhh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0xc4]
+#CHECK: lhh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0xc4]
+#CHECK: lhh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0xc4]
+#CHECK: lhh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0xc4]
+#CHECK: lhh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0xc4]
+#CHECK: lhh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0xc4]
+
+	lhh	%r0, -524288
+	lhh	%r0, -1
+	lhh	%r0, 0
+	lhh	%r0, 1
+	lhh	%r0, 524287
+	lhh	%r0, 0(%r1)
+	lhh	%r0, 0(%r15)
+	lhh	%r0, 524287(%r1,%r15)
+	lhh	%r0, 524287(%r15,%r1)
+	lhh	%r15, 0
+
+#CHECK: llch	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0xc2]
+#CHECK: llch	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0xc2]
+#CHECK: llch	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0xc2]
+#CHECK: llch	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0xc2]
+#CHECK: llch	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0xc2]
+#CHECK: llch	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0xc2]
+#CHECK: llch	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0xc2]
+#CHECK: llch	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0xc2]
+#CHECK: llch	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0xc2]
+#CHECK: llch	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0xc2]
+
+	llch	%r0, -524288
+	llch	%r0, -1
+	llch	%r0, 0
+	llch	%r0, 1
+	llch	%r0, 524287
+	llch	%r0, 0(%r1)
+	llch	%r0, 0(%r15)
+	llch	%r0, 524287(%r1,%r15)
+	llch	%r0, 524287(%r15,%r1)
+	llch	%r15, 0
+
+#CHECK: llhh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0xc6]
+#CHECK: llhh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0xc6]
+#CHECK: llhh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0xc6]
+#CHECK: llhh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0xc6]
+#CHECK: llhh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0xc6]
+#CHECK: llhh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0xc6]
+#CHECK: llhh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0xc6]
+#CHECK: llhh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0xc6]
+#CHECK: llhh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0xc6]
+#CHECK: llhh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0xc6]
+
+	llhh	%r0, -524288
+	llhh	%r0, -1
+	llhh	%r0, 0
+	llhh	%r0, 1
+	llhh	%r0, 524287
+	llhh	%r0, 0(%r1)
+	llhh	%r0, 0(%r15)
+	llhh	%r0, 524287(%r1,%r15)
+	llhh	%r0, 524287(%r15,%r1)
+	llhh	%r15, 0
+
 #CHECK: loc	%r0, 0, 0               # encoding: [0xeb,0x00,0x00,0x00,0x00,0xf2]
 #CHECK: loc	%r0, 0, 15              # encoding: [0xeb,0x0f,0x00,0x00,0x00,0xf2]
 #CHECK: loc	%r0, -524288, 0         # encoding: [0xeb,0x00,0x00,0x00,0x80,0xf2]
@@ -495,6 +729,72 @@
 	srlk	%r0,%r0,524287(%r1)
 	srlk	%r0,%r0,524287(%r15)
 
+#CHECK: stch	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0xc3]
+#CHECK: stch	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0xc3]
+#CHECK: stch	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0xc3]
+#CHECK: stch	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0xc3]
+#CHECK: stch	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0xc3]
+#CHECK: stch	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0xc3]
+#CHECK: stch	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0xc3]
+#CHECK: stch	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0xc3]
+#CHECK: stch	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0xc3]
+#CHECK: stch	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0xc3]
+
+	stch	%r0, -524288
+	stch	%r0, -1
+	stch	%r0, 0
+	stch	%r0, 1
+	stch	%r0, 524287
+	stch	%r0, 0(%r1)
+	stch	%r0, 0(%r15)
+	stch	%r0, 524287(%r1,%r15)
+	stch	%r0, 524287(%r15,%r1)
+	stch	%r15, 0
+
+#CHECK: sthh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0xc7]
+#CHECK: sthh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0xc7]
+#CHECK: sthh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0xc7]
+#CHECK: sthh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0xc7]
+#CHECK: sthh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0xc7]
+#CHECK: sthh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0xc7]
+#CHECK: sthh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0xc7]
+#CHECK: sthh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0xc7]
+#CHECK: sthh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0xc7]
+#CHECK: sthh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0xc7]
+
+	sthh	%r0, -524288
+	sthh	%r0, -1
+	sthh	%r0, 0
+	sthh	%r0, 1
+	sthh	%r0, 524287
+	sthh	%r0, 0(%r1)
+	sthh	%r0, 0(%r15)
+	sthh	%r0, 524287(%r1,%r15)
+	sthh	%r0, 524287(%r15,%r1)
+	sthh	%r15, 0
+
+#CHECK: stfh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0xcb]
+#CHECK: stfh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0xcb]
+#CHECK: stfh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0xcb]
+#CHECK: stfh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0xcb]
+#CHECK: stfh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0xcb]
+#CHECK: stfh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0xcb]
+#CHECK: stfh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0xcb]
+#CHECK: stfh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0xcb]
+#CHECK: stfh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0xcb]
+#CHECK: stfh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0xcb]
+
+	stfh	%r0, -524288
+	stfh	%r0, -1
+	stfh	%r0, 0
+	stfh	%r0, 1
+	stfh	%r0, 524287
+	stfh	%r0, 0(%r1)
+	stfh	%r0, 0(%r15)
+	stfh	%r0, 524287(%r1,%r15)
+	stfh	%r0, 524287(%r15,%r1)
+	stfh	%r15, 0
+
 #CHECK: stoc	%r0, 0, 0               # encoding: [0xeb,0x00,0x00,0x00,0x00,0xf3]
 #CHECK: stoc	%r0, 0, 15              # encoding: [0xeb,0x0f,0x00,0x00,0x00,0xf3]
 #CHECK: stoc	%r0, -524288, 0         # encoding: [0xeb,0x00,0x00,0x00,0x80,0xf3]
diff --git a/test/MC/SystemZ/insn-good.s b/test/MC/SystemZ/insn-good.s
index c997271..23bd68a 100644
--- a/test/MC/SystemZ/insn-good.s
+++ b/test/MC/SystemZ/insn-good.s
@@ -527,11 +527,102 @@
 	basr	%r14,%r9
 	basr	%r15,%r1
 
+#CHECK: bcr	0, %r0			# encoding: [0x07,0x00]
+#CHECK:	bcr	0, %r15			# encoding: [0x07,0x0f]
 
+	bcr	0, %r0
+	bcr	0, %r15
+
+#CHECK:	bcr	1, %r7			# encoding: [0x07,0x17]
+#CHECK:	bor	%r15			# encoding: [0x07,0x1f]
+
+	bcr	1, %r7
+	bor	%r15
+
+#CHECK:	bcr	2, %r7			# encoding: [0x07,0x27]
+#CHECK:	bhr	%r15			# encoding: [0x07,0x2f]
+
+	bcr	2, %r7
+	bhr	%r15
+
+#CHECK:	bcr	3, %r7			# encoding: [0x07,0x37]
+#CHECK:	bnler	%r15			# encoding: [0x07,0x3f]
+
+	bcr	3, %r7
+	bnler	%r15
+
+#CHECK:	bcr	4, %r7			# encoding: [0x07,0x47]
+#CHECK:	blr	%r15			# encoding: [0x07,0x4f]
+
+	bcr	4, %r7
+	blr	%r15
+
+#CHECK:	bcr	5, %r7			# encoding: [0x07,0x57]
+#CHECK:	bnher	%r15			# encoding: [0x07,0x5f]
+
+	bcr	5, %r7
+	bnher	%r15
+
+#CHECK:	bcr	6, %r7			# encoding: [0x07,0x67]
+#CHECK:	blhr	%r15			# encoding: [0x07,0x6f]
+
+	bcr	6, %r7
+	blhr	%r15
+
+#CHECK:	bcr	7, %r7			# encoding: [0x07,0x77]
+#CHECK:	bner	%r15			# encoding: [0x07,0x7f]
+
+	bcr	7, %r7
+	bner	%r15
+
+#CHECK:	bcr	8, %r7			# encoding: [0x07,0x87]
+#CHECK:	ber	%r15			# encoding: [0x07,0x8f]
+
+	bcr	8, %r7
+	ber	%r15
+
+#CHECK:	bcr	9, %r7			# encoding: [0x07,0x97]
+#CHECK:	bnlhr	%r15			# encoding: [0x07,0x9f]
+
+	bcr	9, %r7
+	bnlhr	%r15
+
+#CHECK:	bcr	10, %r7			# encoding: [0x07,0xa7]
+#CHECK:	bher	%r15			# encoding: [0x07,0xaf]
+
+	bcr	10, %r7
+	bher	%r15
+
+#CHECK:	bcr	11, %r7			# encoding: [0x07,0xb7]
+#CHECK:	bnlr	%r15			# encoding: [0x07,0xbf]
+
+	bcr	11, %r7
+	bnlr	%r15
+
+#CHECK:	bcr	12, %r7			# encoding: [0x07,0xc7]
+#CHECK:	bler	%r15			# encoding: [0x07,0xcf]
+
+	bcr	12, %r7
+	bler	%r15
+
+#CHECK:	bcr	13, %r7			# encoding: [0x07,0xd7]
+#CHECK:	bnhr	%r15			# encoding: [0x07,0xdf]
+
+	bcr	13, %r7
+	bnhr	%r15
+
+#CHECK:	bcr	14, %r7			# encoding: [0x07,0xe7]
+#CHECK:	bnor	%r15			# encoding: [0x07,0xef]
+
+	bcr	14, %r7
+	bnor	%r15
+
+#CHECK:	bcr	15, %r7			# encoding: [0x07,0xf7]
 #CHECK: br	%r1                     # encoding: [0x07,0xf1]
 #CHECK: br	%r14                    # encoding: [0x07,0xfe]
 #CHECK: br	%r15                    # encoding: [0x07,0xff]
 
+	bcr	15, %r7
 	br	%r1
 	br	%r14
 	br	%r15
@@ -2454,6 +2545,32 @@
 	cl	%r0, 4095(%r15,%r1)
 	cl	%r15, 0
 
+#CHECK: clc	0(1), 0                 # encoding: [0xd5,0x00,0x00,0x00,0x00,0x00]
+#CHECK: clc	0(1), 0(%r1)            # encoding: [0xd5,0x00,0x00,0x00,0x10,0x00]
+#CHECK: clc	0(1), 0(%r15)           # encoding: [0xd5,0x00,0x00,0x00,0xf0,0x00]
+#CHECK: clc	0(1), 4095              # encoding: [0xd5,0x00,0x00,0x00,0x0f,0xff]
+#CHECK: clc	0(1), 4095(%r1)         # encoding: [0xd5,0x00,0x00,0x00,0x1f,0xff]
+#CHECK: clc	0(1), 4095(%r15)        # encoding: [0xd5,0x00,0x00,0x00,0xff,0xff]
+#CHECK: clc	0(1,%r1), 0             # encoding: [0xd5,0x00,0x10,0x00,0x00,0x00]
+#CHECK: clc	0(1,%r15), 0            # encoding: [0xd5,0x00,0xf0,0x00,0x00,0x00]
+#CHECK: clc	4095(1,%r1), 0          # encoding: [0xd5,0x00,0x1f,0xff,0x00,0x00]
+#CHECK: clc	4095(1,%r15), 0         # encoding: [0xd5,0x00,0xff,0xff,0x00,0x00]
+#CHECK: clc	0(256,%r1), 0           # encoding: [0xd5,0xff,0x10,0x00,0x00,0x00]
+#CHECK: clc	0(256,%r15), 0          # encoding: [0xd5,0xff,0xf0,0x00,0x00,0x00]
+
+	clc	0(1), 0
+	clc	0(1), 0(%r1)
+	clc	0(1), 0(%r15)
+	clc	0(1), 4095
+	clc	0(1), 4095(%r1)
+	clc	0(1), 4095(%r15)
+	clc	0(1,%r1), 0
+	clc	0(1,%r15), 0
+	clc	4095(1,%r1), 0
+	clc	4095(1,%r15), 0
+	clc	0(256,%r1), 0
+	clc	0(256,%r15), 0
+
 #CHECK: clfhsi	0, 0                    # encoding: [0xe5,0x5d,0x00,0x00,0x00,0x00]
 #CHECK: clfhsi	4095, 0                 # encoding: [0xe5,0x5d,0x0f,0xff,0x00,0x00]
 #CHECK: clfhsi	0, 65535                # encoding: [0xe5,0x5d,0x00,0x00,0xff,0xff]
@@ -2630,6 +2747,233 @@
 	clghsi	4095(%r1), 42
 	clghsi	4095(%r15), 42
 
+#CHECK: clgij	%r0, 0, 0, .[[LAB:L.*]]	# encoding: [0xec,0x00,A,A,0x00,0x7d]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: clgij	%r0, 255, 0, .[[LAB:L.*]]	# encoding: [0xec,0x00,A,A,0xff,0x7d]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: clgij	%r15, 0, 0, .[[LAB:L.*]]	# encoding: [0xec,0xf0,A,A,0x00,0x7d]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	clgij	%r0, 0, 0, 0
+	clgij	%r0, 255, 0, 0
+	clgij	%r15, 0, 0, 0
+
+#CHECK: clgij	%r1, 193, 0, .[[LAB:L.*]]-65536	# encoding: [0xec,0x10,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-65536)+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 0, -0x10000
+#CHECK: clgij	%r1, 193, 0, .[[LAB:L.*]]-2	# encoding: [0xec,0x10,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 0, -2
+#CHECK: clgij	%r1, 193, 0, .[[LAB:L.*]]		# encoding: [0xec,0x10,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 0, 0
+#CHECK: clgij	%r1, 193, 0, .[[LAB:L.*]]+65534	# encoding: [0xec,0x10,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+65534)+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 0, 0xfffe
+
+#CHECK: clgij	%r1, 193, 0, foo                  # encoding: [0xec,0x10,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 0, foo
+
+#CHECK: clgij	%r1, 193, 1, foo                  # encoding: [0xec,0x11,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 1, foo
+
+#CHECK: clgij	%r1, 193, 2, foo                  # encoding: [0xec,0x12,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgijh	%r1, 193, foo                     # encoding: [0xec,0x12,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgijnle	%r1, 193, foo                     # encoding: [0xec,0x12,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 2, foo
+	clgijh	%r1, 193, foo
+	clgijnle	%r1, 193, foo
+
+#CHECK: clgij	%r1, 193, 3, foo                  # encoding: [0xec,0x13,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 3, foo
+
+#CHECK: clgij	%r1, 193, 4, foo                  # encoding: [0xec,0x14,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgijl	%r1, 193, foo                     # encoding: [0xec,0x14,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgijnhe	%r1, 193, foo                     # encoding: [0xec,0x14,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 4, foo
+	clgijl	%r1, 193, foo
+	clgijnhe	%r1, 193, foo
+
+#CHECK: clgij	%r1, 193, 5, foo                  # encoding: [0xec,0x15,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 5, foo
+
+#CHECK: clgij	%r1, 193, 6, foo                  # encoding: [0xec,0x16,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgijlh	%r1, 193, foo                     # encoding: [0xec,0x16,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgijne	%r1, 193, foo                     # encoding: [0xec,0x16,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 6, foo
+	clgijlh	%r1, 193, foo
+	clgijne	%r1, 193, foo
+
+#CHECK: clgij	%r1, 193, 7, foo                  # encoding: [0xec,0x17,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 7, foo
+
+#CHECK: clgij	%r1, 193, 8, foo                  # encoding: [0xec,0x18,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgije	%r1, 193, foo                     # encoding: [0xec,0x18,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgijnlh	%r1, 193, foo                     # encoding: [0xec,0x18,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 8, foo
+	clgije	%r1, 193, foo
+	clgijnlh	%r1, 193, foo
+
+#CHECK: clgij	%r1, 193, 9, foo                  # encoding: [0xec,0x19,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 9, foo
+
+#CHECK: clgij	%r1, 193, 10, foo                 # encoding: [0xec,0x1a,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgijhe	%r1, 193, foo                     # encoding: [0xec,0x1a,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgijnl	%r1, 193, foo                     # encoding: [0xec,0x1a,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 10, foo
+	clgijhe	%r1, 193, foo
+	clgijnl	%r1, 193, foo
+
+#CHECK: clgij	%r1, 193, 11, foo                 # encoding: [0xec,0x1b,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 11, foo
+
+#CHECK: clgij	%r1, 193, 12, foo                 # encoding: [0xec,0x1c,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgijle	%r1, 193, foo                     # encoding: [0xec,0x1c,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgijnh	%r1, 193, foo                     # encoding: [0xec,0x1c,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 12, foo
+	clgijle	%r1, 193, foo
+	clgijnh	%r1, 193, foo
+
+#CHECK: clgij	%r1, 193, 13, foo                 # encoding: [0xec,0x1d,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 13, foo
+
+#CHECK: clgij	%r1, 193, 14, foo                 # encoding: [0xec,0x1e,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 14, foo
+
+#CHECK: clgij	%r1, 193, 15, foo                 # encoding: [0xec,0x1f,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 15, foo
+
+#CHECK: clgij	%r1, 193, 0, bar+100              # encoding: [0xec,0x10,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 0, bar+100
+
+#CHECK: clgijh	%r1, 193, bar+100                 # encoding: [0xec,0x12,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgijh	%r1, 193, bar+100
+
+#CHECK: clgijnle	%r1, 193, bar+100                 # encoding: [0xec,0x12,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgijnle	%r1, 193, bar+100
+
+#CHECK: clgijl	%r1, 193, bar+100                 # encoding: [0xec,0x14,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgijl	%r1, 193, bar+100
+
+#CHECK: clgijnhe	%r1, 193, bar+100                 # encoding: [0xec,0x14,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgijnhe	%r1, 193, bar+100
+
+#CHECK: clgijlh	%r1, 193, bar+100                 # encoding: [0xec,0x16,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgijlh	%r1, 193, bar+100
+
+#CHECK: clgijne	%r1, 193, bar+100                 # encoding: [0xec,0x16,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgijne	%r1, 193, bar+100
+
+#CHECK: clgije	%r1, 193, bar+100                 # encoding: [0xec,0x18,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgije	%r1, 193, bar+100
+
+#CHECK: clgijnlh	%r1, 193, bar+100                 # encoding: [0xec,0x18,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgijnlh	%r1, 193, bar+100
+
+#CHECK: clgijhe	%r1, 193, bar+100                 # encoding: [0xec,0x1a,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgijhe	%r1, 193, bar+100
+
+#CHECK: clgijnl	%r1, 193, bar+100                 # encoding: [0xec,0x1a,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgijnl	%r1, 193, bar+100
+
+#CHECK: clgijle	%r1, 193, bar+100                 # encoding: [0xec,0x1c,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgijle	%r1, 193, bar+100
+
+#CHECK: clgijnh	%r1, 193, bar+100                 # encoding: [0xec,0x1c,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgijnh	%r1, 193, bar+100
+
+#CHECK: clgij	%r1, 193, 0, bar@PLT              # encoding: [0xec,0x10,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 0, bar@PLT
+
+#CHECK: clgijh	%r1, 193, bar@PLT                 # encoding: [0xec,0x12,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgijh	%r1, 193, bar@PLT
+
+#CHECK: clgijnle	%r1, 193, bar@PLT                 # encoding: [0xec,0x12,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgijnle	%r1, 193, bar@PLT
+
+#CHECK: clgijl	%r1, 193, bar@PLT                 # encoding: [0xec,0x14,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgijl	%r1, 193, bar@PLT
+
+#CHECK: clgijnhe	%r1, 193, bar@PLT                 # encoding: [0xec,0x14,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgijnhe	%r1, 193, bar@PLT
+
+#CHECK: clgijlh	%r1, 193, bar@PLT                 # encoding: [0xec,0x16,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgijlh	%r1, 193, bar@PLT
+
+#CHECK: clgijne	%r1, 193, bar@PLT                 # encoding: [0xec,0x16,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgijne	%r1, 193, bar@PLT
+
+#CHECK: clgije	%r1, 193, bar@PLT                 # encoding: [0xec,0x18,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgije	%r1, 193, bar@PLT
+
+#CHECK: clgijnlh	%r1, 193, bar@PLT                 # encoding: [0xec,0x18,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgijnlh	%r1, 193, bar@PLT
+
+#CHECK: clgijhe	%r1, 193, bar@PLT                 # encoding: [0xec,0x1a,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgijhe	%r1, 193, bar@PLT
+
+#CHECK: clgijnl	%r1, 193, bar@PLT                 # encoding: [0xec,0x1a,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgijnl	%r1, 193, bar@PLT
+
+#CHECK: clgijle	%r1, 193, bar@PLT                 # encoding: [0xec,0x1c,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgijle	%r1, 193, bar@PLT
+
+#CHECK: clgijnh	%r1, 193, bar@PLT                 # encoding: [0xec,0x1c,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgijnh	%r1, 193, bar@PLT
+
 #CHECK: clgr	%r0, %r0                # encoding: [0xb9,0x21,0x00,0x00]
 #CHECK: clgr	%r0, %r15               # encoding: [0xb9,0x21,0x00,0x0f]
 #CHECK: clgr	%r15, %r0               # encoding: [0xb9,0x21,0x00,0xf0]
@@ -2640,6 +2984,236 @@
 	clgr	%r15,%r0
 	clgr	%r7,%r8
 
+#CHECK: clgrj	%r0, %r0, 0, .[[LAB:L.*]]	# encoding: [0xec,0x00,A,A,0x00,0x65]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: clgrj	%r0, %r15, 0, .[[LAB:L.*]]	# encoding: [0xec,0x0f,A,A,0x00,0x65]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: clgrj	%r15, %r0, 0, .[[LAB:L.*]]	# encoding: [0xec,0xf0,A,A,0x00,0x65]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: clgrj	%r7, %r8, 0, .[[LAB:L.*]]	# encoding: [0xec,0x78,A,A,0x00,0x65]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	clgrj	%r0,%r0,0,0
+	clgrj	%r0,%r15,0,0
+	clgrj	%r15,%r0,0,0
+	clgrj	%r7,%r8,0,0
+
+#CHECK: clgrj	%r1, %r2, 0, .[[LAB:L.*]]-65536	# encoding: [0xec,0x12,A,A,0x00,0x65]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-65536)+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 0, -0x10000
+#CHECK: clgrj	%r1, %r2, 0, .[[LAB:L.*]]-2	# encoding: [0xec,0x12,A,A,0x00,0x65]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 0, -2
+#CHECK: clgrj	%r1, %r2, 0, .[[LAB:L.*]]		# encoding: [0xec,0x12,A,A,0x00,0x65]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 0, 0
+#CHECK: clgrj	%r1, %r2, 0, .[[LAB:L.*]]+65534	# encoding: [0xec,0x12,A,A,0x00,0x65]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+65534)+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 0, 0xfffe
+
+#CHECK: clgrj	%r1, %r2, 0, foo                  # encoding: [0xec,0x12,A,A,0x00,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 0, foo
+
+#CHECK: clgrj	%r1, %r2, 1, foo                  # encoding: [0xec,0x12,A,A,0x10,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 1, foo
+
+#CHECK: clgrj	%r1, %r2, 2, foo                  # encoding: [0xec,0x12,A,A,0x20,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrjh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x20,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrjnle	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x20,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 2, foo
+	clgrjh	%r1, %r2, foo
+	clgrjnle	%r1, %r2, foo
+
+#CHECK: clgrj	%r1, %r2, 3, foo                  # encoding: [0xec,0x12,A,A,0x30,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 3, foo
+
+#CHECK: clgrj	%r1, %r2, 4, foo                  # encoding: [0xec,0x12,A,A,0x40,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrjl	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x40,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrjnhe	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x40,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 4, foo
+	clgrjl	%r1, %r2, foo
+	clgrjnhe	%r1, %r2, foo
+
+#CHECK: clgrj	%r1, %r2, 5, foo                  # encoding: [0xec,0x12,A,A,0x50,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 5, foo
+
+#CHECK: clgrj	%r1, %r2, 6, foo                  # encoding: [0xec,0x12,A,A,0x60,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrjlh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x60,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrjne	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x60,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 6, foo
+	clgrjlh	%r1, %r2, foo
+	clgrjne	%r1, %r2, foo
+
+#CHECK: clgrj	%r1, %r2, 7, foo                  # encoding: [0xec,0x12,A,A,0x70,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 7, foo
+
+#CHECK: clgrj	%r1, %r2, 8, foo                  # encoding: [0xec,0x12,A,A,0x80,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrje	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x80,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrjnlh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x80,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 8, foo
+	clgrje	%r1, %r2, foo
+	clgrjnlh	%r1, %r2, foo
+
+#CHECK: clgrj	%r1, %r2, 9, foo                  # encoding: [0xec,0x12,A,A,0x90,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 9, foo
+
+#CHECK: clgrj	%r1, %r2, 10, foo                 # encoding: [0xec,0x12,A,A,0xa0,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrjhe	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xa0,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrjnl	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xa0,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 10, foo
+	clgrjhe	%r1, %r2, foo
+	clgrjnl	%r1, %r2, foo
+
+#CHECK: clgrj	%r1, %r2, 11, foo                 # encoding: [0xec,0x12,A,A,0xb0,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 11, foo
+
+#CHECK: clgrj	%r1, %r2, 12, foo                 # encoding: [0xec,0x12,A,A,0xc0,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrjle	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xc0,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrjnh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xc0,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 12, foo
+	clgrjle	%r1, %r2, foo
+	clgrjnh	%r1, %r2, foo
+
+#CHECK: clgrj	%r1, %r2, 13, foo                 # encoding: [0xec,0x12,A,A,0xd0,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 13, foo
+
+#CHECK: clgrj	%r1, %r2, 14, foo                 # encoding: [0xec,0x12,A,A,0xe0,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 14, foo
+
+#CHECK: clgrj	%r1, %r2, 15, foo                 # encoding: [0xec,0x12,A,A,0xf0,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 15, foo
+
+#CHECK: clgrj	%r1, %r2, 0, bar+100              # encoding: [0xec,0x12,A,A,0x00,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 0, bar+100
+
+#CHECK: clgrjh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x20,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrjh	%r1, %r2, bar+100
+
+#CHECK: clgrjnle	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x20,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrjnle	%r1, %r2, bar+100
+
+#CHECK: clgrjl	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x40,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrjl	%r1, %r2, bar+100
+
+#CHECK: clgrjnhe	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x40,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrjnhe	%r1, %r2, bar+100
+
+#CHECK: clgrjlh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x60,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrjlh	%r1, %r2, bar+100
+
+#CHECK: clgrjne	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x60,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrjne	%r1, %r2, bar+100
+
+#CHECK: clgrje	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x80,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrje	%r1, %r2, bar+100
+
+#CHECK: clgrjnlh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x80,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrjnlh	%r1, %r2, bar+100
+
+#CHECK: clgrjhe	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xa0,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrjhe	%r1, %r2, bar+100
+
+#CHECK: clgrjnl	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xa0,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrjnl	%r1, %r2, bar+100
+
+#CHECK: clgrjle	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xc0,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrjle	%r1, %r2, bar+100
+
+#CHECK: clgrjnh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xc0,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrjnh	%r1, %r2, bar+100
+
+#CHECK: clgrj	%r1, %r2, 0, bar@PLT              # encoding: [0xec,0x12,A,A,0x00,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 0, bar@PLT
+
+#CHECK: clgrjh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x20,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrjh	%r1, %r2, bar@PLT
+
+#CHECK: clgrjnle	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x20,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrjnle	%r1, %r2, bar@PLT
+
+#CHECK: clgrjl	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x40,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrjl	%r1, %r2, bar@PLT
+
+#CHECK: clgrjnhe	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x40,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrjnhe	%r1, %r2, bar@PLT
+
+#CHECK: clgrjlh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x60,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrjlh	%r1, %r2, bar@PLT
+
+#CHECK: clgrjne	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x60,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrjne	%r1, %r2, bar@PLT
+
+#CHECK: clgrje	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x80,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrje	%r1, %r2, bar@PLT
+
+#CHECK: clgrjnlh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x80,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrjnlh	%r1, %r2, bar@PLT
+
+#CHECK: clgrjhe	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xa0,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrjhe	%r1, %r2, bar@PLT
+
+#CHECK: clgrjnl	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xa0,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrjnl	%r1, %r2, bar@PLT
+
+#CHECK: clgrjle	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xc0,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrjle	%r1, %r2, bar@PLT
+
+#CHECK: clgrjnh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xc0,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrjnh	%r1, %r2, bar@PLT
+
 #CHECK: clgrl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc6,0x0a,A,A,A,A]
 #CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
 	clgrl	%r0, -0x100000000
@@ -2746,6 +3320,233 @@
 	cli	4095(%r1), 42
 	cli	4095(%r15), 42
 
+#CHECK: clij	%r0, 0, 0, .[[LAB:L.*]]	# encoding: [0xec,0x00,A,A,0x00,0x7f]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: clij	%r0, 255, 0, .[[LAB:L.*]]	# encoding: [0xec,0x00,A,A,0xff,0x7f]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: clij	%r15, 0, 0, .[[LAB:L.*]]	# encoding: [0xec,0xf0,A,A,0x00,0x7f]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	clij	%r0, 0, 0, 0
+	clij	%r0, 255, 0, 0
+	clij	%r15, 0, 0, 0
+
+#CHECK: clij	%r1, 193, 0, .[[LAB:L.*]]-65536	# encoding: [0xec,0x10,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-65536)+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 0, -0x10000
+#CHECK: clij	%r1, 193, 0, .[[LAB:L.*]]-2	# encoding: [0xec,0x10,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 0, -2
+#CHECK: clij	%r1, 193, 0, .[[LAB:L.*]]		# encoding: [0xec,0x10,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 0, 0
+#CHECK: clij	%r1, 193, 0, .[[LAB:L.*]]+65534	# encoding: [0xec,0x10,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+65534)+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 0, 0xfffe
+
+#CHECK: clij	%r1, 193, 0, foo                  # encoding: [0xec,0x10,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 0, foo
+
+#CHECK: clij	%r1, 193, 1, foo                  # encoding: [0xec,0x11,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 1, foo
+
+#CHECK: clij	%r1, 193, 2, foo                  # encoding: [0xec,0x12,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clijh	%r1, 193, foo                     # encoding: [0xec,0x12,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clijnle	%r1, 193, foo                     # encoding: [0xec,0x12,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 2, foo
+	clijh	%r1, 193, foo
+	clijnle	%r1, 193, foo
+
+#CHECK: clij	%r1, 193, 3, foo                  # encoding: [0xec,0x13,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 3, foo
+
+#CHECK: clij	%r1, 193, 4, foo                  # encoding: [0xec,0x14,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clijl	%r1, 193, foo                     # encoding: [0xec,0x14,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clijnhe	%r1, 193, foo                     # encoding: [0xec,0x14,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 4, foo
+	clijl	%r1, 193, foo
+	clijnhe	%r1, 193, foo
+
+#CHECK: clij	%r1, 193, 5, foo                  # encoding: [0xec,0x15,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 5, foo
+
+#CHECK: clij	%r1, 193, 6, foo                  # encoding: [0xec,0x16,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clijlh	%r1, 193, foo                     # encoding: [0xec,0x16,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clijne	%r1, 193, foo                     # encoding: [0xec,0x16,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 6, foo
+	clijlh	%r1, 193, foo
+	clijne	%r1, 193, foo
+
+#CHECK: clij	%r1, 193, 7, foo                  # encoding: [0xec,0x17,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 7, foo
+
+#CHECK: clij	%r1, 193, 8, foo                  # encoding: [0xec,0x18,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clije	%r1, 193, foo                     # encoding: [0xec,0x18,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clijnlh	%r1, 193, foo                     # encoding: [0xec,0x18,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 8, foo
+	clije	%r1, 193, foo
+	clijnlh	%r1, 193, foo
+
+#CHECK: clij	%r1, 193, 9, foo                  # encoding: [0xec,0x19,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 9, foo
+
+#CHECK: clij	%r1, 193, 10, foo                 # encoding: [0xec,0x1a,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clijhe	%r1, 193, foo                     # encoding: [0xec,0x1a,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clijnl	%r1, 193, foo                     # encoding: [0xec,0x1a,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 10, foo
+	clijhe	%r1, 193, foo
+	clijnl	%r1, 193, foo
+
+#CHECK: clij	%r1, 193, 11, foo                 # encoding: [0xec,0x1b,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 11, foo
+
+#CHECK: clij	%r1, 193, 12, foo                 # encoding: [0xec,0x1c,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clijle	%r1, 193, foo                     # encoding: [0xec,0x1c,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clijnh	%r1, 193, foo                     # encoding: [0xec,0x1c,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 12, foo
+	clijle	%r1, 193, foo
+	clijnh	%r1, 193, foo
+
+#CHECK: clij	%r1, 193, 13, foo                 # encoding: [0xec,0x1d,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 13, foo
+
+#CHECK: clij	%r1, 193, 14, foo                 # encoding: [0xec,0x1e,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 14, foo
+
+#CHECK: clij	%r1, 193, 15, foo                 # encoding: [0xec,0x1f,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 15, foo
+
+#CHECK: clij	%r1, 193, 0, bar+100              # encoding: [0xec,0x10,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 0, bar+100
+
+#CHECK: clijh	%r1, 193, bar+100                 # encoding: [0xec,0x12,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clijh	%r1, 193, bar+100
+
+#CHECK: clijnle	%r1, 193, bar+100                 # encoding: [0xec,0x12,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clijnle	%r1, 193, bar+100
+
+#CHECK: clijl	%r1, 193, bar+100                 # encoding: [0xec,0x14,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clijl	%r1, 193, bar+100
+
+#CHECK: clijnhe	%r1, 193, bar+100                 # encoding: [0xec,0x14,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clijnhe	%r1, 193, bar+100
+
+#CHECK: clijlh	%r1, 193, bar+100                 # encoding: [0xec,0x16,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clijlh	%r1, 193, bar+100
+
+#CHECK: clijne	%r1, 193, bar+100                 # encoding: [0xec,0x16,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clijne	%r1, 193, bar+100
+
+#CHECK: clije	%r1, 193, bar+100                 # encoding: [0xec,0x18,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clije	%r1, 193, bar+100
+
+#CHECK: clijnlh	%r1, 193, bar+100                 # encoding: [0xec,0x18,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clijnlh	%r1, 193, bar+100
+
+#CHECK: clijhe	%r1, 193, bar+100                 # encoding: [0xec,0x1a,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clijhe	%r1, 193, bar+100
+
+#CHECK: clijnl	%r1, 193, bar+100                 # encoding: [0xec,0x1a,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clijnl	%r1, 193, bar+100
+
+#CHECK: clijle	%r1, 193, bar+100                 # encoding: [0xec,0x1c,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clijle	%r1, 193, bar+100
+
+#CHECK: clijnh	%r1, 193, bar+100                 # encoding: [0xec,0x1c,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clijnh	%r1, 193, bar+100
+
+#CHECK: clij	%r1, 193, 0, bar@PLT              # encoding: [0xec,0x10,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 0, bar@PLT
+
+#CHECK: clijh	%r1, 193, bar@PLT                 # encoding: [0xec,0x12,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clijh	%r1, 193, bar@PLT
+
+#CHECK: clijnle	%r1, 193, bar@PLT                 # encoding: [0xec,0x12,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clijnle	%r1, 193, bar@PLT
+
+#CHECK: clijl	%r1, 193, bar@PLT                 # encoding: [0xec,0x14,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clijl	%r1, 193, bar@PLT
+
+#CHECK: clijnhe	%r1, 193, bar@PLT                 # encoding: [0xec,0x14,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clijnhe	%r1, 193, bar@PLT
+
+#CHECK: clijlh	%r1, 193, bar@PLT                 # encoding: [0xec,0x16,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clijlh	%r1, 193, bar@PLT
+
+#CHECK: clijne	%r1, 193, bar@PLT                 # encoding: [0xec,0x16,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clijne	%r1, 193, bar@PLT
+
+#CHECK: clije	%r1, 193, bar@PLT                 # encoding: [0xec,0x18,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clije	%r1, 193, bar@PLT
+
+#CHECK: clijnlh	%r1, 193, bar@PLT                 # encoding: [0xec,0x18,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clijnlh	%r1, 193, bar@PLT
+
+#CHECK: clijhe	%r1, 193, bar@PLT                 # encoding: [0xec,0x1a,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clijhe	%r1, 193, bar@PLT
+
+#CHECK: clijnl	%r1, 193, bar@PLT                 # encoding: [0xec,0x1a,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clijnl	%r1, 193, bar@PLT
+
+#CHECK: clijle	%r1, 193, bar@PLT                 # encoding: [0xec,0x1c,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clijle	%r1, 193, bar@PLT
+
+#CHECK: clijnh	%r1, 193, bar@PLT                 # encoding: [0xec,0x1c,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clijnh	%r1, 193, bar@PLT
+
 #CHECK: cliy	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x55]
 #CHECK: cliy	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x55]
 #CHECK: cliy	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x55]
@@ -2778,6 +3579,236 @@
 	clr	%r15,%r0
 	clr	%r7,%r8
 
+#CHECK: clrj	%r0, %r0, 0, .[[LAB:L.*]]	# encoding: [0xec,0x00,A,A,0x00,0x77]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: clrj	%r0, %r15, 0, .[[LAB:L.*]]	# encoding: [0xec,0x0f,A,A,0x00,0x77]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: clrj	%r15, %r0, 0, .[[LAB:L.*]]	# encoding: [0xec,0xf0,A,A,0x00,0x77]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: clrj	%r7, %r8, 0, .[[LAB:L.*]]	# encoding: [0xec,0x78,A,A,0x00,0x77]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	clrj	%r0,%r0,0,0
+	clrj	%r0,%r15,0,0
+	clrj	%r15,%r0,0,0
+	clrj	%r7,%r8,0,0
+
+#CHECK: clrj	%r1, %r2, 0, .[[LAB:L.*]]-65536	# encoding: [0xec,0x12,A,A,0x00,0x77]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-65536)+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 0, -0x10000
+#CHECK: clrj	%r1, %r2, 0, .[[LAB:L.*]]-2	# encoding: [0xec,0x12,A,A,0x00,0x77]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 0, -2
+#CHECK: clrj	%r1, %r2, 0, .[[LAB:L.*]]		# encoding: [0xec,0x12,A,A,0x00,0x77]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 0, 0
+#CHECK: clrj	%r1, %r2, 0, .[[LAB:L.*]]+65534	# encoding: [0xec,0x12,A,A,0x00,0x77]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+65534)+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 0, 0xfffe
+
+#CHECK: clrj	%r1, %r2, 0, foo                  # encoding: [0xec,0x12,A,A,0x00,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 0, foo
+
+#CHECK: clrj	%r1, %r2, 1, foo                  # encoding: [0xec,0x12,A,A,0x10,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 1, foo
+
+#CHECK: clrj	%r1, %r2, 2, foo                  # encoding: [0xec,0x12,A,A,0x20,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrjh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x20,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrjnle	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x20,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 2, foo
+	clrjh	%r1, %r2, foo
+	clrjnle	%r1, %r2, foo
+
+#CHECK: clrj	%r1, %r2, 3, foo                  # encoding: [0xec,0x12,A,A,0x30,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 3, foo
+
+#CHECK: clrj	%r1, %r2, 4, foo                  # encoding: [0xec,0x12,A,A,0x40,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrjl	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x40,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrjnhe	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x40,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 4, foo
+	clrjl	%r1, %r2, foo
+	clrjnhe	%r1, %r2, foo
+
+#CHECK: clrj	%r1, %r2, 5, foo                  # encoding: [0xec,0x12,A,A,0x50,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 5, foo
+
+#CHECK: clrj	%r1, %r2, 6, foo                  # encoding: [0xec,0x12,A,A,0x60,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrjlh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x60,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrjne	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x60,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 6, foo
+	clrjlh	%r1, %r2, foo
+	clrjne	%r1, %r2, foo
+
+#CHECK: clrj	%r1, %r2, 7, foo                  # encoding: [0xec,0x12,A,A,0x70,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 7, foo
+
+#CHECK: clrj	%r1, %r2, 8, foo                  # encoding: [0xec,0x12,A,A,0x80,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrje	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x80,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrjnlh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x80,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 8, foo
+	clrje	%r1, %r2, foo
+	clrjnlh	%r1, %r2, foo
+
+#CHECK: clrj	%r1, %r2, 9, foo                  # encoding: [0xec,0x12,A,A,0x90,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 9, foo
+
+#CHECK: clrj	%r1, %r2, 10, foo                 # encoding: [0xec,0x12,A,A,0xa0,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrjhe	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xa0,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrjnl	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xa0,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 10, foo
+	clrjhe	%r1, %r2, foo
+	clrjnl	%r1, %r2, foo
+
+#CHECK: clrj	%r1, %r2, 11, foo                 # encoding: [0xec,0x12,A,A,0xb0,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 11, foo
+
+#CHECK: clrj	%r1, %r2, 12, foo                 # encoding: [0xec,0x12,A,A,0xc0,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrjle	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xc0,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrjnh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xc0,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 12, foo
+	clrjle	%r1, %r2, foo
+	clrjnh	%r1, %r2, foo
+
+#CHECK: clrj	%r1, %r2, 13, foo                 # encoding: [0xec,0x12,A,A,0xd0,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 13, foo
+
+#CHECK: clrj	%r1, %r2, 14, foo                 # encoding: [0xec,0x12,A,A,0xe0,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 14, foo
+
+#CHECK: clrj	%r1, %r2, 15, foo                 # encoding: [0xec,0x12,A,A,0xf0,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 15, foo
+
+#CHECK: clrj	%r1, %r2, 0, bar+100              # encoding: [0xec,0x12,A,A,0x00,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 0, bar+100
+
+#CHECK: clrjh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x20,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrjh	%r1, %r2, bar+100
+
+#CHECK: clrjnle	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x20,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrjnle	%r1, %r2, bar+100
+
+#CHECK: clrjl	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x40,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrjl	%r1, %r2, bar+100
+
+#CHECK: clrjnhe	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x40,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrjnhe	%r1, %r2, bar+100
+
+#CHECK: clrjlh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x60,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrjlh	%r1, %r2, bar+100
+
+#CHECK: clrjne	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x60,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrjne	%r1, %r2, bar+100
+
+#CHECK: clrje	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x80,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrje	%r1, %r2, bar+100
+
+#CHECK: clrjnlh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x80,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrjnlh	%r1, %r2, bar+100
+
+#CHECK: clrjhe	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xa0,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrjhe	%r1, %r2, bar+100
+
+#CHECK: clrjnl	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xa0,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrjnl	%r1, %r2, bar+100
+
+#CHECK: clrjle	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xc0,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrjle	%r1, %r2, bar+100
+
+#CHECK: clrjnh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xc0,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrjnh	%r1, %r2, bar+100
+
+#CHECK: clrj	%r1, %r2, 0, bar@PLT              # encoding: [0xec,0x12,A,A,0x00,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 0, bar@PLT
+
+#CHECK: clrjh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x20,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrjh	%r1, %r2, bar@PLT
+
+#CHECK: clrjnle	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x20,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrjnle	%r1, %r2, bar@PLT
+
+#CHECK: clrjl	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x40,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrjl	%r1, %r2, bar@PLT
+
+#CHECK: clrjnhe	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x40,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrjnhe	%r1, %r2, bar@PLT
+
+#CHECK: clrjlh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x60,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrjlh	%r1, %r2, bar@PLT
+
+#CHECK: clrjne	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x60,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrjne	%r1, %r2, bar@PLT
+
+#CHECK: clrje	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x80,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrje	%r1, %r2, bar@PLT
+
+#CHECK: clrjnlh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x80,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrjnlh	%r1, %r2, bar@PLT
+
+#CHECK: clrjhe	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xa0,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrjhe	%r1, %r2, bar@PLT
+
+#CHECK: clrjnl	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xa0,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrjnl	%r1, %r2, bar@PLT
+
+#CHECK: clrjle	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xc0,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrjle	%r1, %r2, bar@PLT
+
+#CHECK: clrjnh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xc0,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrjnh	%r1, %r2, bar@PLT
+
 #CHECK: clrl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc6,0x0f,A,A,A,A]
 #CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
 	clrl	%r0, -0x100000000
@@ -2815,6 +3846,16 @@
 	clrl	%r7,frob@PLT
 	clrl	%r8,frob@PLT
 
+#CHECK: clst	%r0, %r0                # encoding: [0xb2,0x5d,0x00,0x00]
+#CHECK: clst	%r0, %r15               # encoding: [0xb2,0x5d,0x00,0x0f]
+#CHECK: clst	%r15, %r0               # encoding: [0xb2,0x5d,0x00,0xf0]
+#CHECK: clst	%r7, %r8                # encoding: [0xb2,0x5d,0x00,0x78]
+
+	clst	%r0,%r0
+	clst	%r0,%r15
+	clst	%r15,%r0
+	clst	%r7,%r8
+
 #CHECK: cly	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x55]
 #CHECK: cly	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x55]
 #CHECK: cly	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x55]
@@ -3593,6 +4634,14 @@
 	iill	%r0, 0xffff
 	iill	%r15, 0
 
+#CHECK: ipm	%r0                     # encoding: [0xb2,0x22,0x00,0x00]
+#CHECK: ipm	%r1                     # encoding: [0xb2,0x22,0x00,0x10]
+#CHECK: ipm	%r15                    # encoding: [0xb2,0x22,0x00,0xf0]
+
+	ipm	%r0
+	ipm	%r1
+	ipm	%r15
+
 #CHECK: l	%r0, 0                  # encoding: [0x58,0x00,0x00,0x00]
 #CHECK: l	%r0, 4095               # encoding: [0x58,0x00,0x0f,0xff]
 #CHECK: l	%r0, 0(%r1)             # encoding: [0x58,0x00,0x10,0x00]
@@ -4683,6 +5732,36 @@
 	lnebr	%f15,%f0
 	lnebr	%f15,%f9
 
+#CHECK: lngfr	%r0, %r0                # encoding: [0xb9,0x11,0x00,0x00]
+#CHECK: lngfr	%r0, %r15               # encoding: [0xb9,0x11,0x00,0x0f]
+#CHECK: lngfr	%r15, %r0               # encoding: [0xb9,0x11,0x00,0xf0]
+#CHECK: lngfr	%r7, %r8                # encoding: [0xb9,0x11,0x00,0x78]
+
+	lngfr	%r0,%r0
+	lngfr	%r0,%r15
+	lngfr	%r15,%r0
+	lngfr	%r7,%r8
+
+#CHECK: lngr	%r0, %r0                # encoding: [0xb9,0x01,0x00,0x00]
+#CHECK: lngr	%r0, %r15               # encoding: [0xb9,0x01,0x00,0x0f]
+#CHECK: lngr	%r15, %r0               # encoding: [0xb9,0x01,0x00,0xf0]
+#CHECK: lngr	%r7, %r8                # encoding: [0xb9,0x01,0x00,0x78]
+
+	lngr	%r0,%r0
+	lngr	%r0,%r15
+	lngr	%r15,%r0
+	lngr	%r7,%r8
+
+#CHECK: lnr	%r0, %r0                # encoding: [0x11,0x00]
+#CHECK: lnr	%r0, %r15               # encoding: [0x11,0x0f]
+#CHECK: lnr	%r15, %r0               # encoding: [0x11,0xf0]
+#CHECK: lnr	%r7, %r8                # encoding: [0x11,0x78]
+
+	lnr	%r0,%r0
+	lnr	%r0,%r15
+	lnr	%r15,%r0
+	lnr	%r7,%r8
+
 #CHECK: lnxbr	%f0, %f8                # encoding: [0xb3,0x41,0x00,0x08]
 #CHECK: lnxbr	%f0, %f13               # encoding: [0xb3,0x41,0x00,0x0d]
 #CHECK: lnxbr	%f13, %f0               # encoding: [0xb3,0x41,0x00,0xd0]
@@ -4713,6 +5792,36 @@
 	lpebr	%f15,%f0
 	lpebr	%f15,%f9
 
+#CHECK: lpgfr	%r0, %r0                # encoding: [0xb9,0x10,0x00,0x00]
+#CHECK: lpgfr	%r0, %r15               # encoding: [0xb9,0x10,0x00,0x0f]
+#CHECK: lpgfr	%r15, %r0               # encoding: [0xb9,0x10,0x00,0xf0]
+#CHECK: lpgfr	%r7, %r8                # encoding: [0xb9,0x10,0x00,0x78]
+
+	lpgfr	%r0,%r0
+	lpgfr	%r0,%r15
+	lpgfr	%r15,%r0
+	lpgfr	%r7,%r8
+
+#CHECK: lpgr	%r0, %r0                # encoding: [0xb9,0x00,0x00,0x00]
+#CHECK: lpgr	%r0, %r15               # encoding: [0xb9,0x00,0x00,0x0f]
+#CHECK: lpgr	%r15, %r0               # encoding: [0xb9,0x00,0x00,0xf0]
+#CHECK: lpgr	%r7, %r8                # encoding: [0xb9,0x00,0x00,0x78]
+
+	lpgr	%r0,%r0
+	lpgr	%r0,%r15
+	lpgr	%r15,%r0
+	lpgr	%r7,%r8
+
+#CHECK: lpr	%r0, %r0                # encoding: [0x10,0x00]
+#CHECK: lpr	%r0, %r15               # encoding: [0x10,0x0f]
+#CHECK: lpr	%r15, %r0               # encoding: [0x10,0xf0]
+#CHECK: lpr	%r7, %r8                # encoding: [0x10,0x78]
+
+	lpr	%r0,%r0
+	lpr	%r0,%r15
+	lpr	%r15,%r0
+	lpr	%r7,%r8
+
 #CHECK: lpxbr	%f0, %f8                # encoding: [0xb3,0x40,0x00,0x08]
 #CHECK: lpxbr	%f0, %f13               # encoding: [0xb3,0x40,0x00,0x0d]
 #CHECK: lpxbr	%f13, %f0               # encoding: [0xb3,0x40,0x00,0xd0]
@@ -5608,6 +6717,16 @@
 	mviy	524287(%r1), 42
 	mviy	524287(%r15), 42
 
+#CHECK: mvst	%r0, %r0                # encoding: [0xb2,0x55,0x00,0x00]
+#CHECK: mvst	%r0, %r15               # encoding: [0xb2,0x55,0x00,0x0f]
+#CHECK: mvst	%r15, %r0               # encoding: [0xb2,0x55,0x00,0xf0]
+#CHECK: mvst	%r7, %r8                # encoding: [0xb2,0x55,0x00,0x78]
+
+	mvst	%r0,%r0
+	mvst	%r0,%r15
+	mvst	%r15,%r0
+	mvst	%r7,%r8
+
 #CHECK: mxbr	%f0, %f0                # encoding: [0xb3,0x4c,0x00,0x00]
 #CHECK: mxbr	%f0, %f13               # encoding: [0xb3,0x4c,0x00,0x0d]
 #CHECK: mxbr	%f8, %f5                # encoding: [0xb3,0x4c,0x00,0x85]
@@ -5660,6 +6779,32 @@
 	n	%r0, 4095(%r15,%r1)
 	n	%r15, 0
 
+#CHECK: nc	0(1), 0                 # encoding: [0xd4,0x00,0x00,0x00,0x00,0x00]
+#CHECK: nc	0(1), 0(%r1)            # encoding: [0xd4,0x00,0x00,0x00,0x10,0x00]
+#CHECK: nc	0(1), 0(%r15)           # encoding: [0xd4,0x00,0x00,0x00,0xf0,0x00]
+#CHECK: nc	0(1), 4095              # encoding: [0xd4,0x00,0x00,0x00,0x0f,0xff]
+#CHECK: nc	0(1), 4095(%r1)         # encoding: [0xd4,0x00,0x00,0x00,0x1f,0xff]
+#CHECK: nc	0(1), 4095(%r15)        # encoding: [0xd4,0x00,0x00,0x00,0xff,0xff]
+#CHECK: nc	0(1,%r1), 0             # encoding: [0xd4,0x00,0x10,0x00,0x00,0x00]
+#CHECK: nc	0(1,%r15), 0            # encoding: [0xd4,0x00,0xf0,0x00,0x00,0x00]
+#CHECK: nc	4095(1,%r1), 0          # encoding: [0xd4,0x00,0x1f,0xff,0x00,0x00]
+#CHECK: nc	4095(1,%r15), 0         # encoding: [0xd4,0x00,0xff,0xff,0x00,0x00]
+#CHECK: nc	0(256,%r1), 0           # encoding: [0xd4,0xff,0x10,0x00,0x00,0x00]
+#CHECK: nc	0(256,%r15), 0          # encoding: [0xd4,0xff,0xf0,0x00,0x00,0x00]
+
+	nc	0(1), 0
+	nc	0(1), 0(%r1)
+	nc	0(1), 0(%r15)
+	nc	0(1), 4095
+	nc	0(1), 4095(%r1)
+	nc	0(1), 4095(%r15)
+	nc	0(1,%r1), 0
+	nc	0(1,%r15), 0
+	nc	4095(1,%r1), 0
+	nc	4095(1,%r15), 0
+	nc	0(256,%r1), 0
+	nc	0(256,%r15), 0
+
 #CHECK: ng	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x80]
 #CHECK: ng	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x80]
 #CHECK: ng	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x80]
@@ -5834,6 +6979,32 @@
 	o	%r0, 4095(%r15,%r1)
 	o	%r15, 0
 
+#CHECK: oc	0(1), 0                 # encoding: [0xd6,0x00,0x00,0x00,0x00,0x00]
+#CHECK: oc	0(1), 0(%r1)            # encoding: [0xd6,0x00,0x00,0x00,0x10,0x00]
+#CHECK: oc	0(1), 0(%r15)           # encoding: [0xd6,0x00,0x00,0x00,0xf0,0x00]
+#CHECK: oc	0(1), 4095              # encoding: [0xd6,0x00,0x00,0x00,0x0f,0xff]
+#CHECK: oc	0(1), 4095(%r1)         # encoding: [0xd6,0x00,0x00,0x00,0x1f,0xff]
+#CHECK: oc	0(1), 4095(%r15)        # encoding: [0xd6,0x00,0x00,0x00,0xff,0xff]
+#CHECK: oc	0(1,%r1), 0             # encoding: [0xd6,0x00,0x10,0x00,0x00,0x00]
+#CHECK: oc	0(1,%r15), 0            # encoding: [0xd6,0x00,0xf0,0x00,0x00,0x00]
+#CHECK: oc	4095(1,%r1), 0          # encoding: [0xd6,0x00,0x1f,0xff,0x00,0x00]
+#CHECK: oc	4095(1,%r15), 0         # encoding: [0xd6,0x00,0xff,0xff,0x00,0x00]
+#CHECK: oc	0(256,%r1), 0           # encoding: [0xd6,0xff,0x10,0x00,0x00,0x00]
+#CHECK: oc	0(256,%r15), 0          # encoding: [0xd6,0xff,0xf0,0x00,0x00,0x00]
+
+	oc	0(1), 0
+	oc	0(1), 0(%r1)
+	oc	0(1), 0(%r15)
+	oc	0(1), 4095
+	oc	0(1), 4095(%r1)
+	oc	0(1), 4095(%r15)
+	oc	0(1,%r1), 0
+	oc	0(1,%r15), 0
+	oc	4095(1,%r1), 0
+	oc	4095(1,%r15), 0
+	oc	0(256,%r1), 0
+	oc	0(256,%r15), 0
+
 #CHECK: og	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x81]
 #CHECK: og	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x81]
 #CHECK: og	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x81]
@@ -5992,6 +7163,65 @@
 	oy	%r0, 524287(%r15,%r1)
 	oy	%r15, 0
 
+#CHECK: pfd	0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x36]
+#CHECK: pfd	0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x36]
+#CHECK: pfd	0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x36]
+#CHECK: pfd	0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x36]
+#CHECK: pfd	0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x36]
+#CHECK: pfd	0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x36]
+#CHECK: pfd	0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x36]
+#CHECK: pfd	0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x36]
+#CHECK: pfd	0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x36]
+#CHECK: pfd	15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x36]
+
+	pfd	0, -524288
+	pfd	0, -1
+	pfd	0, 0
+	pfd	0, 1
+	pfd	0, 524287
+	pfd	0, 0(%r1)
+	pfd	0, 0(%r15)
+	pfd	0, 524287(%r1,%r15)
+	pfd	0, 524287(%r15,%r1)
+	pfd	15, 0
+
+#CHECK: pfdrl	0, .[[LAB:L.*]]-4294967296 # encoding: [0xc6,0x02,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	pfdrl	0, -0x100000000
+#CHECK: pfdrl	0, .[[LAB:L.*]]-2	# encoding: [0xc6,0x02,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	pfdrl	0, -2
+#CHECK: pfdrl	0, .[[LAB:L.*]]	# encoding: [0xc6,0x02,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	pfdrl	0, 0
+#CHECK: pfdrl	0, .[[LAB:L.*]]+4294967294 # encoding: [0xc6,0x02,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	pfdrl	0, 0xfffffffe
+
+#CHECK: pfdrl	0, foo                # encoding: [0xc6,0x02,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: pfdrl	15, foo               # encoding: [0xc6,0xf2,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	pfdrl	0, foo
+	pfdrl	15, foo
+
+#CHECK: pfdrl	3, bar+100            # encoding: [0xc6,0x32,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: pfdrl	4, bar+100            # encoding: [0xc6,0x42,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	pfdrl	3, bar+100
+	pfdrl	4, bar+100
+
+#CHECK: pfdrl	7, frob@PLT           # encoding: [0xc6,0x72,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: pfdrl	8, frob@PLT           # encoding: [0xc6,0x82,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	pfdrl	7, frob@PLT
+	pfdrl	8, frob@PLT
+
 #CHECK: risbg	%r0, %r0, 0, 0, 0       # encoding: [0xec,0x00,0x00,0x00,0x00,0x55]
 #CHECK: risbg	%r0, %r0, 0, 0, 63      # encoding: [0xec,0x00,0x00,0x00,0x3f,0x55]
 #CHECK: risbg	%r0, %r0, 0, 255, 0     # encoding: [0xec,0x00,0x00,0xff,0x00,0x55]
@@ -6674,6 +7904,16 @@
 	srlg	%r0,%r0,524287(%r1)
 	srlg	%r0,%r0,524287(%r15)
 
+#CHECK: srst	%r0, %r0                # encoding: [0xb2,0x5e,0x00,0x00]
+#CHECK: srst	%r0, %r15               # encoding: [0xb2,0x5e,0x00,0x0f]
+#CHECK: srst	%r15, %r0               # encoding: [0xb2,0x5e,0x00,0xf0]
+#CHECK: srst	%r7, %r8                # encoding: [0xb2,0x5e,0x00,0x78]
+
+	srst	%r0,%r0
+	srst	%r0,%r15
+	srst	%r15,%r0
+	srst	%r7,%r8
+
 #CHECK: st	%r0, 0                  # encoding: [0x50,0x00,0x00,0x00]
 #CHECK: st	%r0, 4095               # encoding: [0x50,0x00,0x0f,0xff]
 #CHECK: st	%r0, 0(%r1)             # encoding: [0x50,0x00,0x10,0x00]
@@ -7101,6 +8341,84 @@
 	sy	%r0, 524287(%r15,%r1)
 	sy	%r15, 0
 
+#CHECK: tm	0, 0                    # encoding: [0x91,0x00,0x00,0x00]
+#CHECK: tm	4095, 0                 # encoding: [0x91,0x00,0x0f,0xff]
+#CHECK: tm	0, 255                  # encoding: [0x91,0xff,0x00,0x00]
+#CHECK: tm	0(%r1), 42              # encoding: [0x91,0x2a,0x10,0x00]
+#CHECK: tm	0(%r15), 42             # encoding: [0x91,0x2a,0xf0,0x00]
+#CHECK: tm	4095(%r1), 42           # encoding: [0x91,0x2a,0x1f,0xff]
+#CHECK: tm	4095(%r15), 42          # encoding: [0x91,0x2a,0xff,0xff]
+
+	tm	0, 0
+	tm	4095, 0
+	tm	0, 255
+	tm	0(%r1), 42
+	tm	0(%r15), 42
+	tm	4095(%r1), 42
+	tm	4095(%r15), 42
+
+#CHECK: tmhh	%r0, 0                  # encoding: [0xa7,0x02,0x00,0x00]
+#CHECK: tmhh	%r0, 32768              # encoding: [0xa7,0x02,0x80,0x00]
+#CHECK: tmhh	%r0, 65535              # encoding: [0xa7,0x02,0xff,0xff]
+#CHECK: tmhh	%r15, 0                 # encoding: [0xa7,0xf2,0x00,0x00]
+
+	tmhh	%r0, 0
+	tmhh	%r0, 0x8000
+	tmhh	%r0, 0xffff
+	tmhh	%r15, 0
+
+#CHECK: tmhl	%r0, 0                  # encoding: [0xa7,0x03,0x00,0x00]
+#CHECK: tmhl	%r0, 32768              # encoding: [0xa7,0x03,0x80,0x00]
+#CHECK: tmhl	%r0, 65535              # encoding: [0xa7,0x03,0xff,0xff]
+#CHECK: tmhl	%r15, 0                 # encoding: [0xa7,0xf3,0x00,0x00]
+
+	tmhl	%r0, 0
+	tmhl	%r0, 0x8000
+	tmhl	%r0, 0xffff
+	tmhl	%r15, 0
+
+#CHECK: tmlh	%r0, 0                  # encoding: [0xa7,0x00,0x00,0x00]
+#CHECK: tmlh	%r0, 32768              # encoding: [0xa7,0x00,0x80,0x00]
+#CHECK: tmlh	%r0, 65535              # encoding: [0xa7,0x00,0xff,0xff]
+#CHECK: tmlh	%r15, 0                 # encoding: [0xa7,0xf0,0x00,0x00]
+
+	tmlh	%r0, 0
+	tmlh	%r0, 0x8000
+	tmlh	%r0, 0xffff
+	tmlh	%r15, 0
+
+#CHECK: tmll	%r0, 0                  # encoding: [0xa7,0x01,0x00,0x00]
+#CHECK: tmll	%r0, 32768              # encoding: [0xa7,0x01,0x80,0x00]
+#CHECK: tmll	%r0, 65535              # encoding: [0xa7,0x01,0xff,0xff]
+#CHECK: tmll	%r15, 0                 # encoding: [0xa7,0xf1,0x00,0x00]
+
+	tmll	%r0, 0
+	tmll	%r0, 0x8000
+	tmll	%r0, 0xffff
+	tmll	%r15, 0
+
+#CHECK: tmy	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x51]
+#CHECK: tmy	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x51]
+#CHECK: tmy	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x51]
+#CHECK: tmy	1, 0                    # encoding: [0xeb,0x00,0x00,0x01,0x00,0x51]
+#CHECK: tmy	524287, 0               # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x51]
+#CHECK: tmy	0, 255                  # encoding: [0xeb,0xff,0x00,0x00,0x00,0x51]
+#CHECK: tmy	0(%r1), 42              # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x51]
+#CHECK: tmy	0(%r15), 42             # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x51]
+#CHECK: tmy	524287(%r1), 42         # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x51]
+#CHECK: tmy	524287(%r15), 42        # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x51]
+
+	tmy	-524288, 0
+	tmy	-1, 0
+	tmy	0, 0
+	tmy	1, 0
+	tmy	524287, 0
+	tmy	0, 255
+	tmy	0(%r1), 42
+	tmy	0(%r15), 42
+	tmy	524287(%r1), 42
+	tmy	524287(%r15), 42
+
 #CHECK: x	%r0, 0                  # encoding: [0x57,0x00,0x00,0x00]
 #CHECK: x	%r0, 4095               # encoding: [0x57,0x00,0x0f,0xff]
 #CHECK: x	%r0, 0(%r1)             # encoding: [0x57,0x00,0x10,0x00]
@@ -7117,6 +8435,32 @@
 	x	%r0, 4095(%r15,%r1)
 	x	%r15, 0
 
+#CHECK: xc	0(1), 0                 # encoding: [0xd7,0x00,0x00,0x00,0x00,0x00]
+#CHECK: xc	0(1), 0(%r1)            # encoding: [0xd7,0x00,0x00,0x00,0x10,0x00]
+#CHECK: xc	0(1), 0(%r15)           # encoding: [0xd7,0x00,0x00,0x00,0xf0,0x00]
+#CHECK: xc	0(1), 4095              # encoding: [0xd7,0x00,0x00,0x00,0x0f,0xff]
+#CHECK: xc	0(1), 4095(%r1)         # encoding: [0xd7,0x00,0x00,0x00,0x1f,0xff]
+#CHECK: xc	0(1), 4095(%r15)        # encoding: [0xd7,0x00,0x00,0x00,0xff,0xff]
+#CHECK: xc	0(1,%r1), 0             # encoding: [0xd7,0x00,0x10,0x00,0x00,0x00]
+#CHECK: xc	0(1,%r15), 0            # encoding: [0xd7,0x00,0xf0,0x00,0x00,0x00]
+#CHECK: xc	4095(1,%r1), 0          # encoding: [0xd7,0x00,0x1f,0xff,0x00,0x00]
+#CHECK: xc	4095(1,%r15), 0         # encoding: [0xd7,0x00,0xff,0xff,0x00,0x00]
+#CHECK: xc	0(256,%r1), 0           # encoding: [0xd7,0xff,0x10,0x00,0x00,0x00]
+#CHECK: xc	0(256,%r15), 0          # encoding: [0xd7,0xff,0xf0,0x00,0x00,0x00]
+
+	xc	0(1), 0
+	xc	0(1), 0(%r1)
+	xc	0(1), 0(%r15)
+	xc	0(1), 4095
+	xc	0(1), 4095(%r1)
+	xc	0(1), 4095(%r15)
+	xc	0(1,%r1), 0
+	xc	0(1,%r15), 0
+	xc	4095(1,%r1), 0
+	xc	4095(1,%r15), 0
+	xc	0(256,%r1), 0
+	xc	0(256,%r15), 0
+
 #CHECK: xg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x82]
 #CHECK: xg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x82]
 #CHECK: xg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x82]
diff --git a/test/MC/SystemZ/lit.local.cfg b/test/MC/SystemZ/lit.local.cfg
index abb6974..b12af09 100644
--- a/test/MC/SystemZ/lit.local.cfg
+++ b/test/MC/SystemZ/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'SystemZ' in targets:
     config.unsupported = True
diff --git a/test/MC/X86/AlignedBundling/lit.local.cfg b/test/MC/X86/AlignedBundling/lit.local.cfg
index 6c49f08..ba763cf 100644
--- a/test/MC/X86/AlignedBundling/lit.local.cfg
+++ b/test/MC/X86/AlignedBundling/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s
index 26a77c1..38f9190 100644
--- a/test/MC/X86/avx512-encodings.s
+++ b/test/MC/X86/avx512-encodings.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl --show-encoding %s | FileCheck %s
 
 // CHECK: vinserti32x4
 // CHECK: encoding: [0x62,0xa3,0x55,0x48,0x38,0xcd,0x01]
@@ -19,3 +19,27 @@ vextracti64x4  $1, %zmm9, %ymm17
 // CHECK: vextracti64x4
 // CHECK: encoding: [0x62,0x73,0xfd,0x48,0x3b,0x4f,0x10,0x01]
 vextracti64x4  $1, %zmm9, 512(%rdi)
+
+// CHECK: vpsrad
+// CHECK: encoding: [0x62,0xb1,0x35,0x40,0x72,0xe1,0x02]
+vpsrad $2, %zmm17, %zmm25
+
+// CHECK: vpsrad
+// CHECK: encoding: [0x62,0xf1,0x35,0x40,0x72,0x64,0xb7,0x08,0x02]
+vpsrad $2, 512(%rdi, %rsi, 4), %zmm25
+
+// CHECK: vpsrad
+// CHECK: encoding: [0x62,0x21,0x1d,0x48,0xe2,0xc9]
+vpsrad %xmm17, %zmm12, %zmm25
+
+// CHECK: vpsrad
+// CHECK: encoding: [0x62,0x61,0x1d,0x48,0xe2,0x4c,0xb7,0x20]
+vpsrad 512(%rdi, %rsi, 4), %zmm12, %zmm25
+
+// CHECK: vpbroadcastd {{.*}} {%k1} {z}
+// CHECK: encoding: [0x62,0xf2,0x7d,0xc9,0x58,0xc8]
+vpbroadcastd  %xmm0, %zmm1 {%k1} {z}
+
+// CHECK: vmovdqu64 {{.*}} {%k3}
+// CHECK: encoding: [0x62,0xf1,0xfe,0x4b,0x6f,0xc8]
+vmovdqu64 %zmm0, %zmm1 {%k3}
diff --git a/test/MC/X86/cfi_def_cfa-crash.s b/test/MC/X86/cfi_def_cfa-crash.s
new file mode 100644
index 0000000..9d22d6e
--- /dev/null
+++ b/test/MC/X86/cfi_def_cfa-crash.s
@@ -0,0 +1,73 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin -filetype=obj %s -o - | macho-dump | FileCheck %s
+
+// We were trying to generate compact unwind info for assembly like this.
+// The .cfi_def_cfa directive, however, throws a wrench into that and was
+// causing an llvm_unreachable() failure. Make sure the assembler can handle
+// the input. The actual eh_frames created using these directives are checked
+// elsewhere. This test is a simpler "does the code assemble" check.
+
+// rdar://15406518
+
+.macro SaveRegisters
+
+ push %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset rbp, -16
+
+ mov %rsp, %rbp
+ .cfi_def_cfa_register rbp
+
+ sub $$0x80+8, %rsp
+
+ movdqa %xmm0, -0x80(%rbp)
+ push %rax
+ movdqa %xmm1, -0x70(%rbp)
+ push %rdi
+ movdqa %xmm2, -0x60(%rbp)
+ push %rsi
+ movdqa %xmm3, -0x50(%rbp)
+ push %rdx
+ movdqa %xmm4, -0x40(%rbp)
+ push %rcx
+ movdqa %xmm5, -0x30(%rbp)
+ push %r8
+ movdqa %xmm6, -0x20(%rbp)
+ push %r9
+ movdqa %xmm7, -0x10(%rbp)
+
+.endmacro
+.macro RestoreRegisters
+
+ movdqa -0x80(%rbp), %xmm0
+ pop %r9
+ movdqa -0x70(%rbp), %xmm1
+ pop %r8
+ movdqa -0x60(%rbp), %xmm2
+ pop %rcx
+ movdqa -0x50(%rbp), %xmm3
+ pop %rdx
+ movdqa -0x40(%rbp), %xmm4
+ pop %rsi
+ movdqa -0x30(%rbp), %xmm5
+ pop %rdi
+ movdqa -0x20(%rbp), %xmm6
+ pop %rax
+ movdqa -0x10(%rbp), %xmm7
+
+ leave
+ .cfi_def_cfa rsp, 8
+ .cfi_same_value rbp
+
+.endmacro
+
+_foo:
+.cfi_startproc
+  SaveRegisters
+
+  RestoreRegisters
+  ret
+ .cfi_endproc
+
+
+
+// CHECK: 'section_name', '__eh_frame\x00
diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s
index ff86e8d..9677da7 100644
--- a/test/MC/X86/intel-syntax.s
+++ b/test/MC/X86/intel-syntax.s
@@ -63,6 +63,14 @@ _main:
     mov ECX, DWORD PTR [4*ECX + _fnan]
 // CHECK: movq %fs:320, %rax
     mov RAX, QWORD PTR FS:[320]
+// CHECK: movq %fs:320, %rax
+    mov RAX, QWORD PTR FS:320
+// CHECK: movq %rax, %fs:320
+    mov QWORD PTR FS:320, RAX
+// CHECK: movq %rax, %fs:20(%rbx)
+    mov QWORD PTR FS:20[rbx], RAX
+// CHECK: vshufpd $1, %xmm2, %xmm1, %xmm0
+    vshufpd XMM0, XMM1, XMM2, 1
 // CHECK: vpgatherdd %xmm8, (%r15,%xmm9,2), %xmm1
     vpgatherdd XMM10, DWORD PTR [R15 + 2*XMM9], XMM8
 // CHECK: movsd	-8, %xmm5
diff --git a/test/MC/X86/lit.local.cfg b/test/MC/X86/lit.local.cfg
index ad280c7..19840aa 100644
--- a/test/MC/X86/lit.local.cfg
+++ b/test/MC/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s
index c0eac5e..6b41f48 100644
--- a/test/MC/X86/x86-64.s
+++ b/test/MC/X86/x86-64.s
@@ -549,8 +549,8 @@ cvttpd2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
 
 // rdar://8490728 - llvm-mc rejects 'movmskpd'
 movmskpd	%xmm6, %rax
-// CHECK: movmskpd	%xmm6, %rax
-// CHECK: encoding: [0x66,0x48,0x0f,0x50,0xc6]
+// CHECK: movmskpd	%xmm6, %eax
+// CHECK: encoding: [0x66,0x0f,0x50,0xc6]
 movmskpd	%xmm6, %eax
 // CHECK: movmskpd	%xmm6, %eax
 // CHECK: encoding: [0x66,0x0f,0x50,0xc6]
@@ -1375,3 +1375,16 @@ fsub %st(1)
 fsubr %st(1)
 fdiv %st(1)
 fdivr %st(1)
+
+// CHECK: movd %xmm0, %eax
+// CHECK: movd %xmm0, %rax
+// CHECK: movd %xmm0, %rax
+// CHECK: vmovd %xmm0, %eax
+// CHECK: vmovq %xmm0, %rax
+// CHECK: vmovq %xmm0, %rax
+movd %xmm0, %eax
+movd %xmm0, %rax
+movq %xmm0, %rax
+vmovd %xmm0, %eax
+vmovd %xmm0, %rax
+vmovq %xmm0, %rax
diff --git a/test/MC/X86/x86_64-avx-encoding.s b/test/MC/X86/x86_64-avx-encoding.s
index 6da9e21..5ba8064 100644
--- a/test/MC/X86/x86_64-avx-encoding.s
+++ b/test/MC/X86/x86_64-avx-encoding.s
@@ -2212,11 +2212,11 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 // CHECK: encoding: [0xc5,0x79,0x7e,0x30]
           vmovd  %xmm14, (%rax)
 
-// CHECK: vmovd  %rax, %xmm14
+// CHECK: vmovq  %rax, %xmm14
 // CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0]
           vmovd  %rax, %xmm14
 
-// CHECK: vmovd %xmm0, %rax
+// CHECK: vmovq %xmm0, %rax
 // CHECK: encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
           vmovd %xmm0, %rax
 
@@ -4044,43 +4044,43 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 // CHECK: encoding: [0xc4,0x43,0x79,0x17,0xc0,0x0a]
           vextractps   $10, %xmm8, %r8
 
-// CHECK: vextractps   $7, %xmm4, %rcx
+// CHECK: vextractps   $7, %xmm4, %ecx
 // CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xe1,0x07]
           vextractps   $7, %xmm4, %rcx
 
-// CHECK: vmovd  %xmm4, %rcx
+// CHECK: vmovq  %xmm4, %rcx
 // CHECK: encoding: [0xc4,0xe1,0xf9,0x7e,0xe1]
           vmovd  %xmm4, %rcx
 
-// CHECK: vmovmskpd  %xmm4, %rcx
+// CHECK: vmovmskpd  %xmm4, %ecx
 // CHECK: encoding: [0xc5,0xf9,0x50,0xcc]
           vmovmskpd  %xmm4, %rcx
 
-// CHECK: vmovmskpd  %ymm4, %rcx
+// CHECK: vmovmskpd  %ymm4, %ecx
 // CHECK: encoding: [0xc5,0xfd,0x50,0xcc]
           vmovmskpd  %ymm4, %rcx
 
-// CHECK: vmovmskps  %xmm4, %rcx
+// CHECK: vmovmskps  %xmm4, %ecx
 // CHECK: encoding: [0xc5,0xf8,0x50,0xcc]
           vmovmskps  %xmm4, %rcx
 
-// CHECK: vmovmskps  %ymm4, %rcx
+// CHECK: vmovmskps  %ymm4, %ecx
 // CHECK: encoding: [0xc5,0xfc,0x50,0xcc]
           vmovmskps  %ymm4, %rcx
 
-// CHECK: vpextrb  $7, %xmm4, %rcx
+// CHECK: vpextrb  $7, %xmm4, %ecx
 // CHECK: encoding: [0xc4,0xe3,0x79,0x14,0xe1,0x07]
           vpextrb  $7, %xmm4, %rcx
 
-// CHECK: vpinsrw  $7, %r8, %xmm15, %xmm8
+// CHECK: vpinsrw  $7, %r8d, %xmm15, %xmm8
 // CHECK: encoding: [0xc4,0x41,0x01,0xc4,0xc0,0x07]
           vpinsrw  $7, %r8, %xmm15, %xmm8
 
-// CHECK: vpinsrw  $7, %rcx, %xmm4, %xmm6
+// CHECK: vpinsrw  $7, %ecx, %xmm4, %xmm6
 // CHECK: encoding: [0xc5,0xd9,0xc4,0xf1,0x07]
           vpinsrw  $7, %rcx, %xmm4, %xmm6
 
-// CHECK: vpmovmskb  %xmm4, %rcx
+// CHECK: vpmovmskb  %xmm4, %ecx
 // CHECK: encoding: [0xc5,0xf9,0xd7,0xcc]
           vpmovmskb  %xmm4, %rcx
 
@@ -4185,3 +4185,59 @@ _foo2:
 // CHECK: vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10
 // CHECK: encoding: [0xc4,0x02,0x3d,0x91,0x14,0x4f]
           vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10
+
+// CHECK: vmovaps %xmm0, %xmm8
+// CHECK: encoding: [0xc5,0x78,0x28,0xc0]
+          vmovaps %xmm0, %xmm8
+
+// CHECK: vmovaps %xmm8, %xmm0
+// CHECK: encoding: [0xc5,0x78,0x29,0xc0]
+          vmovaps %xmm8, %xmm0
+
+// CHECK: vmovaps %ymm0, %ymm8
+// CHECK: encoding: [0xc5,0x7c,0x28,0xc0]
+          vmovaps %ymm0, %ymm8
+
+// CHECK: vmovaps %ymm8, %ymm0
+// CHECK: encoding: [0xc5,0x7c,0x29,0xc0]
+          vmovaps %ymm8, %ymm0
+
+// CHECK: vmovups %xmm0, %xmm8
+// CHECK: encoding: [0xc5,0x78,0x10,0xc0]
+          vmovups %xmm0, %xmm8
+
+// CHECK: vmovups %xmm8, %xmm0
+// CHECK: encoding: [0xc5,0x78,0x11,0xc0]
+          vmovups %xmm8, %xmm0
+
+// CHECK: vmovups %ymm0, %ymm8
+// CHECK: encoding: [0xc5,0x7c,0x10,0xc0]
+          vmovups %ymm0, %ymm8
+
+// CHECK: vmovups %ymm8, %ymm0
+// CHECK: encoding: [0xc5,0x7c,0x11,0xc0]
+          vmovups %ymm8, %ymm0
+
+// CHECK: vmovss %xmm0, %xmm0, %xmm8
+// CHECK: encoding: [0xc5,0x7a,0x10,0xc0]
+          vmovss %xmm0, %xmm0, %xmm8
+
+// CHECK: vmovss %xmm0, %xmm8, %xmm0
+// CHECK: encoding: [0xc5,0xba,0x10,0xc0]
+          vmovss %xmm0, %xmm8, %xmm0
+
+// CHECK: vmovss %xmm8, %xmm0, %xmm0
+// CHECK: encoding: [0xc5,0x7a,0x11,0xc0]
+          vmovss %xmm8, %xmm0, %xmm0
+
+// CHECK: vmovsd %xmm0, %xmm0, %xmm8
+// CHECK: encoding: [0xc5,0x7b,0x10,0xc0]
+          vmovsd %xmm0, %xmm0, %xmm8
+
+// CHECK: vmovsd %xmm0, %xmm8, %xmm0
+// CHECK: encoding: [0xc5,0xbb,0x10,0xc0]
+          vmovsd %xmm0, %xmm8, %xmm0
+
+// CHECK: vmovsd %xmm8, %xmm0, %xmm0
+// CHECK: encoding: [0xc5,0x7b,0x11,0xc0]
+          vmovsd %xmm8, %xmm0, %xmm0
diff --git a/test/MC/X86/x86_64-encoding.s b/test/MC/X86/x86_64-encoding.s
index cfdf87f..40b93f0 100644
--- a/test/MC/X86/x86_64-encoding.s
+++ b/test/MC/X86/x86_64-encoding.s
@@ -120,6 +120,66 @@ movd %mm1, %edx
 // CHECK:  fixup A - offset: 5, value: CPI1_0-4
 pshufb	CPI1_0(%rip), %xmm1
 
+// CHECK: sha1rnds4 $1, %xmm1, %xmm2
+// CHECK:   encoding: [0x0f,0x3a,0xcc,0xd1,0x01]
+sha1rnds4 $1, %xmm1, %xmm2
+
+// CHECK: sha1rnds4 $1, (%rax), %xmm2
+// CHECK:   encoding: [0x0f,0x3a,0xcc,0x10,0x01]
+sha1rnds4 $1, (%rax), %xmm2
+
+// CHECK: sha1nexte %xmm1, %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xc8,0xd1]
+sha1nexte %xmm1, %xmm2
+
+// CHECK: sha1msg1 %xmm1, %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xc9,0xd1]
+sha1msg1 %xmm1, %xmm2
+
+// CHECK: sha1msg1 (%rax), %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xc9,0x10]
+sha1msg1 (%rax), %xmm2
+
+// CHECK: sha1msg2 %xmm1, %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xca,0xd1]
+sha1msg2 %xmm1, %xmm2
+
+// CHECK: sha1msg2 (%rax), %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xca,0x10]
+sha1msg2 (%rax), %xmm2
+
+// CHECK: sha256rnds2 (%rax), %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xcb,0x10]
+sha256rnds2 (%rax), %xmm2
+
+// CHECK: sha256rnds2 %xmm1, %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xcb,0xd1]
+sha256rnds2 %xmm1, %xmm2
+
+// CHECK: sha256rnds2 (%rax), %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xcb,0x10]
+sha256rnds2 %xmm0, (%rax), %xmm2
+
+// CHECK: sha256rnds2 %xmm1, %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xcb,0xd1]
+sha256rnds2 %xmm0, %xmm1, %xmm2
+
+// CHECK: sha256msg1 %xmm1, %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xcc,0xd1]
+sha256msg1 %xmm1, %xmm2
+
+// CHECK: sha256msg1 (%rax), %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xcc,0x10]
+sha256msg1 (%rax), %xmm2
+
+// CHECK: sha256msg2 %xmm1, %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xcd,0xd1]
+sha256msg2 %xmm1, %xmm2
+
+// CHECK: sha256msg2 (%rax), %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xcd,0x10]
+sha256msg2 (%rax), %xmm2
+
 // CHECK: movq  57005(,%riz), %rbx
 // CHECK: encoding: [0x48,0x8b,0x1c,0x25,0xad,0xde,0x00,0x00]
           movq  57005(,%riz), %rbx
@@ -171,3 +231,15 @@ pshufb	CPI1_0(%rip), %xmm1
 // CHECK: filds	(%rdi)
 // CHECK:  encoding: [0xdf,0x07]
         	filds	(%rdi)
+
+// CHECK: pmovmskb	%xmm5, %ecx
+// CHECK:  encoding: [0x66,0x0f,0xd7,0xcd]
+        	pmovmskb	%xmm5,%rcx
+
+// CHECK: pinsrw $3, %ecx, %xmm5
+// CHECK: encoding: [0x66,0x0f,0xc4,0xe9,0x03]
+          pinsrw $3, %ecx, %xmm5
+
+// CHECK: pinsrw $3, %ecx, %xmm5
+// CHECK: encoding: [0x66,0x0f,0xc4,0xe9,0x03]
+          pinsrw $3, %rcx, %xmm5
diff --git a/test/MC/X86/x86_64-tbm-encoding.s b/test/MC/X86/x86_64-tbm-encoding.s
new file mode 100644
index 0000000..180578b
--- /dev/null
+++ b/test/MC/X86/x86_64-tbm-encoding.s
@@ -0,0 +1,196 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// bextri 32 reg
+// CHECK: bextr   $2814, %edi, %eax
+// CHECK: encoding: [0x8f,0xea,0x78,0x10,0xc7,0xfe,0x0a,0x00,0x00]
+          bextr   $2814, %edi, %eax
+
+// bextri 32 mem
+// CHECK: bextr   $2814, (%rdi), %eax
+// CHECK: encoding: [0x8f,0xea,0x78,0x10,0x07,0xfe,0x0a,0x00,0x00]
+          bextr   $2814, (%rdi), %eax
+
+// bextri 64 reg
+// CHECK: bextr   $2814, %rdi, %rax
+// CHECK: encoding: [0x8f,0xea,0xf8,0x10,0xc7,0xfe,0x0a,0x00,0x00]
+          bextr   $2814, %rdi, %rax
+
+// bextri 64 mem
+// CHECK: bextr   $2814, (%rdi), %rax
+// CHECK: encoding: [0x8f,0xea,0xf8,0x10,0x07,0xfe,0x0a,0x00,0x00]
+          bextr   $2814, (%rdi), %rax
+
+// blcfill 32 reg
+// CHECK: blcfill %edi, %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0xcf]
+          blcfill %edi, %eax
+
+// blcfill 32 mem
+// CHECK: blcfill (%rdi), %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0x0f]
+          blcfill (%rdi), %eax
+
+// blcfill 64 reg
+// CHECK: blcfill %rdi, %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0xcf]
+          blcfill %rdi, %rax
+
+// blcfill 64 mem
+// CHECK: blcfill (%rdi), %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0x0f]
+          blcfill (%rdi), %rax
+
+// blci   32 reg
+// CHECK: blci    %edi, %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x02,0xf7]
+          blci    %edi, %eax
+
+// blci   32 mem
+// CHECK: blci    (%rdi), %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x02,0x37]
+          blci    (%rdi), %eax
+
+// blci   64 reg
+// CHECK: blci    %rdi, %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x02,0xf7]
+          blci    %rdi, %rax
+
+// blci   64 mem
+// CHECK: blci    (%rdi), %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x02,0x37]
+          blci    (%rdi), %rax
+
+// blcic  32 reg
+// CHECK: blcic   %edi, %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0xef]
+          blcic   %edi, %eax
+
+// blcic  32 mem
+// CHECK: blcic   (%rdi), %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0x2f]
+          blcic   (%rdi), %eax
+
+// blcic  64 reg
+// CHECK: blcic   %rdi, %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0xef]
+          blcic   %rdi, %rax
+
+// blcic  64 mem
+// CHECK: blcic   (%rdi), %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0x2f]
+          blcic   (%rdi), %rax
+
+// blcmsk 32 reg
+// CHECK: blcmsk  %edi, %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x02,0xcf]
+          blcmsk  %edi, %eax
+
+// blcmsk 32 mem
+// CHECK: blcmsk  (%rdi), %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x02,0x0f]
+          blcmsk  (%rdi), %eax
+
+// blcmsk 64 reg
+// CHECK: blcmsk  %rdi, %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x02,0xcf]
+          blcmsk  %rdi, %rax
+
+// blcmsk 64 mem
+// CHECK: blcmsk  (%rdi), %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x02,0x0f]
+          blcmsk  (%rdi), %rax
+
+// blcs   32 reg
+// CHECK: blcs    %edi, %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0xdf]
+          blcs    %edi, %eax
+
+// blcs   32 mem
+// CHECK: blcs    (%rdi), %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0x1f]
+          blcs    (%rdi), %eax
+
+// blcs   64 reg
+// CHECK: blcs    %rdi, %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0xdf]
+          blcs    %rdi, %rax
+
+// blcs   64 mem
+// CHECK: blcs    (%rdi), %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0x1f]
+          blcs    (%rdi), %rax
+
+// blsfill 32 reg
+// CHECK: blsfill %edi, %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0xd7]
+          blsfill %edi, %eax
+
+// blsfill 32 mem
+// CHECK: blsfill (%rdi), %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0x17]
+          blsfill (%rdi), %eax
+
+// blsfill 64 reg
+// CHECK: blsfill %rdi, %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0xd7]
+          blsfill %rdi, %rax
+
+// blsfill 64 mem
+// CHECK: blsfill (%rdi), %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0x17]
+          blsfill (%rdi), %rax
+
+// blsic  32 reg
+// CHECK: blsic   %edi, %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0xf7]
+          blsic   %edi, %eax
+
+// blsic  32 mem
+// CHECK: blsic   (%rdi), %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0x37]
+          blsic   (%rdi), %eax
+
+// blsic  64 reg
+// CHECK: blsic   %rdi, %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0xf7]
+          blsic   %rdi, %rax
+
+// t1mskc 32 reg
+// CHECK: t1mskc  %edi, %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0xff]
+          t1mskc  %edi, %eax
+
+// t1mskc 32 mem
+// CHECK: t1mskc  (%rdi), %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0x3f]
+          t1mskc  (%rdi), %eax
+
+// t1mskc 64 reg
+// CHECK: t1mskc  %rdi, %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0xff]
+          t1mskc  %rdi, %rax
+
+// t1mskc 64 mem
+// CHECK: t1mskc  (%rdi), %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0x3f]
+          t1mskc  (%rdi), %rax
+
+// tzmsk  32 reg
+// CHECK: tzmsk   %edi, %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0xe7]
+          tzmsk   %edi, %eax
+
+// tzmsk  32 mem
+// CHECK: tzmsk   (%rdi), %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0x27]
+          tzmsk   (%rdi), %eax
+
+// tzmsk  64 reg
+// CHECK: tzmsk   %rdi, %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0xe7]
+          tzmsk   %rdi, %rax
+
+// tzmsk  64 mem
+// CHECK: tzmsk   (%rdi), %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0x27]
+          tzmsk   (%rdi), %rax
diff --git a/test/MC/X86/x86_nop.s b/test/MC/X86/x86_nop.s
index 396e302..059f591 100644
--- a/test/MC/X86/x86_nop.s
+++ b/test/MC/X86/x86_nop.s
@@ -1,13 +1,36 @@
-# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=generic %s | llvm-objdump -d - | FileCheck %s
-# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=i386 %s | llvm-objdump -d - | FileCheck %s
-# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=i486 %s | llvm-objdump -d - | FileCheck %s
-# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=i586 %s | llvm-objdump -d - | FileCheck %s
-# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=pentium %s | llvm-objdump -d - | FileCheck %s
-# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=pentium-mmx %s | llvm-objdump -d - | FileCheck %s
-# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=geode %s | llvm-objdump -d - | FileCheck %s
-# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=i686 %s | llvm-objdump -d - | not FileCheck %s
-
-# CHECK-NOT: nop{{[lw]}}
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=generic %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=i386 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=i486 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=i586 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=pentium %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=pentium-mmx %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=geode %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=i686 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=k6 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=k6-2 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=k6-3 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=winchip-c6 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=winchip2 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=c3 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=c3-2 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=core2 %s | llvm-objdump -d - | FileCheck --check-prefix=NOPL %s
+
+
 inc %eax
 .align 8
 inc %eax
+
+// CHECK: 0:	40                                           	incl	%eax
+// CHECK: 1:	90                                           	nop
+// CHECK: 2:	90                                           	nop
+// CHECK: 3:	90                                           	nop
+// CHECK: 4:	90                                           	nop
+// CHECK: 5:	90                                           	nop
+// CHECK: 6:	90                                           	nop
+// CHECK: 7:	90                                           	nop
+// CHECK: 8:	40                                           	incl	%eax
+
+
+// NOPL: 0:	40                                           	incl	%eax
+// NOPL: 1:	0f 1f 80 00 00 00 00                         	nopl	(%eax)
+// NOPL: 8:	40                                           	incl	%eax
diff --git a/test/Makefile b/test/Makefile
index 7204147..d3227dd 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -31,16 +31,10 @@ endif
 
 ifdef TESTSUITE
 LIT_TESTSUITE := $(TESTSUITE)
-CLEANED_TESTSUITE := $(patsubst %/,%,$(TESTSUITE))
-CLEANED_TESTSUITE := $(patsubst test/%,%,$(CLEANED_TESTSUITE))
 else
 LIT_TESTSUITE := .
 endif
 
-ifdef VG
-VALGRIND := valgrind --tool=memcheck --quiet --trace-children=yes --error-exitcode=3 --leak-check=full $(VALGRIND_EXTRA_ARGS)
-endif
-
 # Check what to run for -all.
 LIT_ALL_TESTSUITES := $(LIT_TESTSUITE)
 
@@ -122,16 +116,6 @@ else
 ENABLE_ASSERTIONS=1
 endif
 
-# Derive whether or not LTO is enabled by checking the extra options.
-LTO_IS_ENABLED := 0
-ifneq ($(findstring -flto,$(CompileCommonOpts)),)
-LTO_IS_ENABLED := 1
-else
-ifneq ($(findstring -O4,$(CompileCommonOpts)),)
-LTO_IS_ENABLED := 1
-endif
-endif
-
 lit.site.cfg: FORCE
 	@echo "Making LLVM 'lit.site.cfg' file..."
 	@$(ECHOPATH) s=@LLVM_HOST_TRIPLE@=$(HOST_TRIPLE)=g > lit.tmp
@@ -142,10 +126,9 @@ lit.site.cfg: FORCE
 	@$(ECHOPATH) s=@SHLIBDIR@=$(SharedLibDir)=g >> lit.tmp
 	@$(ECHOPATH) s=@SHLIBEXT@=$(SHLIBEXT)=g >> lit.tmp
 	@$(ECHOPATH) s=@PYTHON_EXECUTABLE@=$(PYTHON)=g >> lit.tmp
-	@$(ECHOPATH) s=@OCAMLOPT@=$(OCAMLOPT) -cc $(subst *,'\\\"',*$(subst =,"\\=",$(CXX_FOR_OCAMLOPT))*) -I $(LibDir)/ocaml=g >> lit.tmp
+	@$(ECHOPATH) s=@OCAMLOPT@=$(OCAMLOPT) -cc $(subst *,'\\\"',*$(subst =,"\\=",$(CXX_FOR_OCAMLOPT))*) -cclib -L$(LibDir) -I $(LibDir)/ocaml=g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_ASSERTIONS@=$(ENABLE_ASSERTIONS)=g >> lit.tmp
-	@$(ECHOPATH) s=@LTO_IS_ENABLED@=$(LTO_IS_ENABLED)=g >> lit.tmp
 	@$(ECHOPATH) s=@TARGETS_TO_BUILD@=$(TARGETS_TO_BUILD)=g >> lit.tmp
 	@$(ECHOPATH) s=@LLVM_BINDINGS@=$(BINDINGS_TO_BUILD)=g >> lit.tmp
 	@$(ECHOPATH) s=@HOST_OS@=$(HOST_OS)=g >> lit.tmp
diff --git a/test/Object/Inputs/corrupt-version.elf-x86_64 b/test/Object/Inputs/corrupt-version.elf-x86_64
new file mode 100644
index 0000000..1241a27
--- /dev/null
+++ b/test/Object/Inputs/corrupt-version.elf-x86_64
diff --git a/test/Object/Inputs/corrupt.elf-x86-64 b/test/Object/Inputs/corrupt.elf-x86-64
new file mode 100644
index 0000000..8ae5f17
--- /dev/null
+++ b/test/Object/Inputs/corrupt.elf-x86-64
diff --git a/test/Object/Inputs/program-headers.mips b/test/Object/Inputs/program-headers.mips
new file mode 100755
index 0000000..54ebfea
--- /dev/null
+++ b/test/Object/Inputs/program-headers.mips
diff --git a/test/Object/Inputs/trivial-executable-test.macho-x86-64 b/test/Object/Inputs/trivial-executable-test.macho-x86-64
new file mode 100755
index 0000000..50a6bab
--- /dev/null
+++ b/test/Object/Inputs/trivial-executable-test.macho-x86-64
diff --git a/test/Object/Inputs/weak-global-symbol.macho-i386 b/test/Object/Inputs/weak-global-symbol.macho-i386
new file mode 100644
index 0000000..a9c8e0c
--- /dev/null
+++ b/test/Object/Inputs/weak-global-symbol.macho-i386
diff --git a/test/Object/Mips/lit.local.cfg b/test/Object/Mips/lit.local.cfg
index 1499317..88262fb 100644
--- a/test/Object/Mips/lit.local.cfg
+++ b/test/Object/Mips/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.test']
-
 targets = set(config.root.targets_to_build.split())
 if not 'Mips' in targets:
     config.unsupported = True
diff --git a/test/Object/X86/lit.local.cfg b/test/Object/X86/lit.local.cfg
index 6a29e92..ba763cf 100644
--- a/test/Object/X86/lit.local.cfg
+++ b/test/Object/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.test']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Object/X86/objdump-cfg-invalid-opcode.yaml b/test/Object/X86/objdump-cfg-invalid-opcode.yaml
new file mode 100644
index 0000000..56ab1d2
--- /dev/null
+++ b/test/Object/X86/objdump-cfg-invalid-opcode.yaml
@@ -0,0 +1,58 @@
+# RUN: yaml2obj -format=elf %s | llvm-objdump -d -yaml-cfg=%t - && FileCheck --check-prefix=CFG < %t %s
+# REQUIRES: shell
+#
+# Generated from:
+# main:
+# .LBL0_1:
+# 	movq	8(%rsi), %rax
+# 	<invalid opcode: 06>
+# 	nop
+
+!ELF
+FileHeader:
+  Class: ELFCLASS64
+  Data: ELFDATA2LSB
+  Type: ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+    Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+    Content: "488B46080690"
+
+## 0000000000000000 <main>:
+
+#CFG: Atoms:
+#CFG:   - StartAddress:    0x0000000000000000
+#CFG:     Size:            4
+#CFG:     Type:            Text
+
+##    0:   48 8b 46 08             mov    0x8(%rsi),%rax
+#CFG:       - Inst:            MOV64rm
+#CFG:         Size:            4
+#CFG:         Ops:             [ RRAX, RRSI, I1, R, I8, R ]
+
+
+#CFG:   - StartAddress:    0x0000000000000004
+#CFG:     Size:            1
+#CFG:     Type:            Data
+
+##    4:   06                      (bad)
+#CFG:     Content:         06
+
+#CFG:   - StartAddress:    0x0000000000000005
+#CFG:     Size:            1
+#CFG:     Type:            Text
+
+##    5:   90                      nop
+#CFG:       - Inst:            NOOP
+#CFG:         Size:            1
+#CFG:         Ops:             [  ]
+
+Symbols:
+  Global:
+    - Name: main
+      Type: STT_FUNC
+      Section: .text
+      Value: 0x0
+      Size: 6
diff --git a/test/Object/X86/objdump-cfg-textatomsize.yaml b/test/Object/X86/objdump-cfg-textatomsize.yaml
new file mode 100644
index 0000000..87cb4e1
--- /dev/null
+++ b/test/Object/X86/objdump-cfg-textatomsize.yaml
@@ -0,0 +1,39 @@
+# RUN: yaml2obj -format=elf %s | llvm-objdump -d -yaml-cfg=%t - && FileCheck --check-prefix=CFG < %t %s
+# REQUIRES: shell
+#
+# Generated from:
+# main:
+# .LBL0_1:
+# 	jmp	.LBL0_1
+#
+
+!ELF
+FileHeader:
+  Class: ELFCLASS64
+  Data: ELFDATA2LSB
+  Type: ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+    Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+    Content: "EBFE"
+
+## 0000000000000000 <main>:
+
+#CFG: Atoms:
+#CFG:   - StartAddress:    0x0000000000000000
+#CFG:     Size:            2
+
+##    0:   eb fe          jmp $-2
+#CFG:       - Inst:            JMP_1
+#CFG:         Size:            2
+#CFG:         Ops:             [ I-2 ]
+
+Symbols:
+  Global:
+    - Name: main
+      Type: STT_FUNC
+      Section: .text
+      Value: 0x0
+      Size: 2
diff --git a/test/Object/X86/objdump-cfg.yaml b/test/Object/X86/objdump-cfg.yaml
new file mode 100644
index 0000000..c5bff03
--- /dev/null
+++ b/test/Object/X86/objdump-cfg.yaml
@@ -0,0 +1,86 @@
+# RUN: yaml2obj -format=elf %s | llvm-objdump -d -yaml-cfg=%t - && FileCheck --check-prefix=CFG < %t %s
+# REQUIRES: shell
+#
+# Generated from:
+# main:
+# 	movl	$48, %eax
+# 	cmpl	$3, %edi
+# 	jl	.LBB0_2
+# 	movq	8(%rsi), %rax
+# 	movsbl	(%rax), %eax
+# .LBB0_2:
+# 	ret
+#
+
+!ELF
+FileHeader:
+  Class: ELFCLASS64
+  Data: ELFDATA2LSB
+  Type: ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+    Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+    Content: "B83000000083FF037C07488B46080FBE00C3"
+
+## 0000000000000000 <main>:
+
+#CFG: Atoms:
+#CFG:   - StartAddress:    0x0000000000000000
+#CFG:     Size:            10
+
+##    0:   b8 30 00 00 00          mov    $0x30,%eax
+#CFG:       - Inst:            MOV32ri
+#CFG:         Size:            5
+#CFG:         Ops:             [ REAX, I48 ]
+
+##    5:   83 ff 03                cmp    $0x3,%edi
+#CFG:       - Inst:            CMP32ri8
+#CFG:         Size:            3
+#CFG:         Ops:             [ REDI, I3 ]
+
+##    8:   7c 07                   jl     11 <main+0x11>
+#CFG:       - Inst:            JL_1
+#CFG:         Size:            2
+#CFG:         Ops:             [ I7 ]
+
+#CFG:   - StartAddress:    0x000000000000000A
+#CFG:     Size:            7
+
+##    a:   48 8b 46 08             mov    0x8(%rsi),%rax
+#CFG:       - Inst:            MOV64rm
+#CFG:         Size:            4
+#CFG:         Ops:             [ RRAX, RRSI, I1, R, I8, R ]
+
+##    e:   0f be 00                movsbl (%rax),%eax
+#CFG:       - Inst:            MOVSX32rm8
+#CFG:         Size:            3
+#CFG:         Ops:             [ REAX, RRAX, I1, R, I0, R ]
+#CFG:   - StartAddress:    0x0000000000000011
+#CFG:     Size:            1
+
+##   11:   c3                      retq
+#CFG:       - Inst:            RET
+#CFG:         Size:            1
+#CFG:         Ops:             [  ]
+
+Symbols:
+  Global:
+    - Name: main
+      Type: STT_FUNC
+      Section: .text
+      Value: 0x0
+      Size: 18
+
+#CFG: Functions:
+#CFG:     BasicBlocks:
+#CFG:       - Address:         0x0000000000000000
+#CFG:         Preds:           [  ]
+#CFG:         Succs:           [ 0x0000000000000011, 0x000000000000000A ]
+#CFG:       - Address:         0x0000000000000011
+#CFG:         Preds:           [ 0x0000000000000000, 0x000000000000000A ]
+#CFG:         Succs:           [  ]
+#CFG:       - Address:         0x000000000000000A
+#CFG:         Preds:           [ 0x0000000000000000 ]
+#CFG:         Succs:           [ 0x0000000000000011 ]
diff --git a/test/Object/X86/objdump-disassembly-symbolic.test b/test/Object/X86/objdump-disassembly-symbolic.test
index 667bce9..858653e 100644
--- a/test/Object/X86/objdump-disassembly-symbolic.test
+++ b/test/Object/X86/objdump-disassembly-symbolic.test
@@ -3,6 +3,11 @@ RUN:              | FileCheck %s -check-prefix ELF-x86-64
 RUN: llvm-objdump -d -symbolize %p/../Inputs/trivial-object-test.macho-x86-64 \
 RUN:              | FileCheck %s -check-prefix MACHO-x86-64
 
+# Generate this using:
+#   ld trivial-object-test.macho-x86-64 -undefined dynamic_lookup
+RUN: llvm-objdump -d -symbolize %p/../Inputs/trivial-executable-test.macho-x86-64 \
+RUN:              | FileCheck %s -check-prefix MACHO-STUBS-x86-64
+
 ELF-x86-64: file format ELF64-x86-64
 ELF-x86-64: Disassembly of section .text:
 ELF-x86-64: main:
@@ -28,3 +33,16 @@ MACHO-x86-64:       1a:	e8 00 00 00 00                               	callq	_Som
 MACHO-x86-64:       1f:	8b 44 24 04                                  	movl	4(%rsp), %eax
 MACHO-x86-64:       23:	48 83 c4 08                                  	addq	$8, %rsp
 MACHO-x86-64:       27:	c3                                           	ret
+
+MACHO-STUBS-x86-64: file format Mach-O 64-bit x86-64
+MACHO-STUBS-x86-64: Disassembly of section __TEXT,__text:
+MACHO-STUBS-x86-64: _main:
+MACHO-STUBS-x86-64:     1f90:       48 83 ec 08                                     subq    $8, %rsp
+MACHO-STUBS-x86-64:     1f94:       c7 44 24 04 00 00 00 00                         movl    $0, 4(%rsp)
+MACHO-STUBS-x86-64:     1f9c:       48 8d 3d 45 00 00 00                            leaq    69(%rip), %rdi ## literal pool for: Hello World!
+MACHO-STUBS-x86-64:     1fa3:       e8 16 00 00 00                                  callq   puts
+MACHO-STUBS-x86-64:     1fa8:       30 c0                                           xorb    %al, %al
+MACHO-STUBS-x86-64:     1faa:       e8 09 00 00 00                                  callq   SomeOtherFunction
+MACHO-STUBS-x86-64:     1faf:       8b 44 24 04                                     movl    4(%rsp), %eax
+MACHO-STUBS-x86-64:     1fb3:       48 83 c4 08                                     addq    $8, %rsp
+MACHO-STUBS-x86-64:     1fb7:       c3                                              ret
diff --git a/test/Object/archive-symtab.test b/test/Object/archive-symtab.test
index 0d2504d..6379504 100644
--- a/test/Object/archive-symtab.test
+++ b/test/Object/archive-symtab.test
@@ -48,3 +48,12 @@ CORRUPT-NEXT: 00000016 T main
 check that the we *don't* update the symbol table.
 RUN: llvm-ar s %t.a
 RUN: llvm-nm -s %t.a | FileCheck %s --check-prefix=CORRUPT
+
+repeate the test with llvm-ranlib
+
+RUN: rm -f %t.a
+RUN: llvm-ar rcS %t.a %p/Inputs/trivial-object-test.elf-x86-64 %p/Inputs/trivial-object-test2.elf-x86-64
+RUN: llvm-nm -s %t.a | FileCheck %s --check-prefix=NOMAP
+
+RUN: llvm-ranlib %t.a
+RUN: llvm-nm -s %t.a | FileCheck %s
diff --git a/test/Object/corrupt.test b/test/Object/corrupt.test
new file mode 100644
index 0000000..ef72a09
--- /dev/null
+++ b/test/Object/corrupt.test
@@ -0,0 +1,24 @@
+// Section name offset overflows section name string table.
+RUN: not llvm-readobj %p/Inputs/corrupt.elf-x86-64 -sections \
+RUN:     2>&1 | FileCheck --check-prefix=SECNAME %s
+
+// Section data offset past end of file.
+RUN: not llvm-readobj %p/Inputs/corrupt.elf-x86-64 -sections -section-data \
+RUN:     2>&1 | FileCheck --check-prefix=SECDATA %s
+
+// Symbol name offset overflows string table.
+RUN: not llvm-readobj %p/Inputs/corrupt.elf-x86-64 -symbols \
+RUN:     2>&1 | FileCheck --check-prefix=SYMNAME %s
+
+// Version index in .gnu.version overflows the version map.
+RUN: not llvm-readobj %p/Inputs/corrupt-version.elf-x86_64 -dt \
+RUN:     2>&1 | FileCheck --check-prefix=VER %s
+
+SECNAME: Error reading file: Invalid data was encountered while parsing the file.
+
+SECDATA: Error reading file: Invalid data was encountered while parsing the file.
+SECDATA: Error reading file: Invalid data was encountered while parsing the file.
+
+SYMNAME: Error reading file: Invalid data was encountered while parsing the file.
+
+VER: Error reading file: Invalid data was encountered while parsing the file.
diff --git a/test/Object/directory.ll b/test/Object/directory.ll
index bf8ff32..48eefcb 100644
--- a/test/Object/directory.ll
+++ b/test/Object/directory.ll
@@ -2,9 +2,6 @@
 ;RUN: not llvm-ar r %T/test.a . 2>&1 | FileCheck %s
 ;CHECK: .: Is a directory
 
-; Opening a directory works on cygwin and freebsd.
-;XFAIL: freebsd, cygwin
-
 ;RUN: rm -f %T/test.a
 ;RUN: touch %T/a-very-long-file-name
 ;RUN: llvm-ar r %T/test.a %s %T/a-very-long-file-name
diff --git a/test/Object/nm-trivial-object.test b/test/Object/nm-trivial-object.test
index d517745..748d6f2 100644
--- a/test/Object/nm-trivial-object.test
+++ b/test/Object/nm-trivial-object.test
@@ -25,10 +25,10 @@ ELF:          U puts
 
 
 macho: 00000000 U _SomeOtherFunction
-macho: 00000000 s _main
+macho: 00000000 T _main
 macho: 00000000 U _puts
 
 macho64: 00000028 s L_.str
-macho64: 00000000 u _SomeOtherFunction
-macho64: 00000000 s _main
-macho64: 00000000 u _puts
+macho64: 00000000 U _SomeOtherFunction
+macho64: 00000000 T _main
+macho64: 00000000 U _puts
diff --git a/test/Object/nm-weak-global-macho.test b/test/Object/nm-weak-global-macho.test
new file mode 100644
index 0000000..ede2609
--- /dev/null
+++ b/test/Object/nm-weak-global-macho.test
@@ -0,0 +1,3 @@
+RUN: llvm-nm %p/Inputs/weak-global-symbol.macho-i386 | FileCheck %s
+
+CHECK: 00000000 S _a
diff --git a/test/Other/X86/lit.local.cfg b/test/Other/X86/lit.local.cfg
index da2db5a..ba763cf 100644
--- a/test/Other/X86/lit.local.cfg
+++ b/test/Other/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Other/attribute-comment.ll b/test/Other/attribute-comment.ll
index 7354e7f..d12b179 100644
--- a/test/Other/attribute-comment.ll
+++ b/test/Other/attribute-comment.ll
@@ -6,4 +6,4 @@ define void @test1() #0 {
   ret void
 }
 
-attributes #0 = { nounwind ssp "less-precise-fpmad"="false" uwtable "no-frame-pointer-elim"="true" readnone "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp "less-precise-fpmad"="false" uwtable "no-frame-pointer-elim"="true" readnone "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Other/close-stderr.ll b/test/Other/close-stderr.ll
index 6e180cd..b310bc2 100644
--- a/test/Other/close-stderr.ll
+++ b/test/Other/close-stderr.ll
@@ -9,8 +9,5 @@
 ; XFAIL: vg_leak
 ; REQUIRES: shell
 
-; opt will fail to open /dev/null on native win32.
-; XFAIL: win32
-
 ; Test that the error handling when writing to stderr fails exits the
 ; program cleanly rather than aborting.
diff --git a/test/Other/constant-fold-gep-address-spaces.ll b/test/Other/constant-fold-gep-address-spaces.ll
new file mode 100644
index 0000000..f6abe74
--- /dev/null
+++ b/test/Other/constant-fold-gep-address-spaces.ll
@@ -0,0 +1,235 @@
+; "PLAIN" - No optimizations. This tests the target-independent
+; constant folder.
+; RUN: opt -S -o - %s | FileCheck --check-prefix=PLAIN %s
+
+target datalayout = "e-p:128:128:128-p1:32:32:32-p2:8:8:8-p3:16:16:16-p4:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
+
+; The automatic constant folder in opt does not have targetdata access, so
+; it can't fold gep arithmetic, in general. However, the constant folder run
+; from instcombine and global opt can use targetdata.
+; PLAIN: @G8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -1)
+@G8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -1)
+; PLAIN: @G1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 1 to i1 addrspace(2)*), i8 -1)
+@G1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 1 to i1 addrspace(2)*), i8 -1)
+; PLAIN: @F8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -2)
+@F8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -2)
+; PLAIN: @F1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 1 to i1 addrspace(2)*), i8 -2)
+@F1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 1 to i1 addrspace(2)*), i8 -2)
+; PLAIN: @H8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* null, i32 -1)
+@H8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 0 to i8 addrspace(1)*), i32 -1)
+; PLAIN: @H1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* null, i8 -1)
+@H1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 0 to i1 addrspace(2)*), i8 -1)
+
+
+; The target-independent folder should be able to do some clever
+; simplifications on sizeof, alignof, and offsetof expressions. The
+; target-dependent folder should fold these down to constants.
+; PLAIN-X: @a = constant i64 mul (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 2310)
+@a = constant i64 mul (i64 3, i64 mul (i64 ptrtoint ({[7 x double], [7 x double]} addrspace(4)* getelementptr ({[7 x double], [7 x double]} addrspace(4)* null, i64 11) to i64), i64 5))
+
+; PLAIN-X: @b = constant i64 ptrtoint (double addrspace(4)* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64)
+@b = constant i64 ptrtoint ([13 x double] addrspace(4)* getelementptr ({i1, [13 x double]} addrspace(4)* null, i64 0, i32 1) to i64)
+
+; PLAIN-X: @c = constant i64 mul nuw (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 2)
+@c = constant i64 ptrtoint (double addrspace(4)* getelementptr ({double, double, double, double} addrspace(4)* null, i64 0, i32 2) to i64)
+
+; PLAIN-X: @d = constant i64 mul nuw (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 11)
+@d = constant i64 ptrtoint (double addrspace(4)* getelementptr ([13 x double] addrspace(4)* null, i64 0, i32 11) to i64)
+
+; PLAIN-X: @e = constant i64 ptrtoint (double addrspace(4)* getelementptr ({ double, float, double, double }* null, i64 0, i32 2) to i64)
+@e = constant i64 ptrtoint (double addrspace(4)* getelementptr ({double, float, double, double} addrspace(4)* null, i64 0, i32 2) to i64)
+
+; PLAIN-X: @f = constant i64 1
+@f = constant i64 ptrtoint (<{ i16, i128 }> addrspace(4)* getelementptr ({i1, <{ i16, i128 }>} addrspace(4)* null, i64 0, i32 1) to i64)
+
+; PLAIN-X: @g = constant i64 ptrtoint (double addrspace(4)* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64)
+@g = constant i64 ptrtoint ({double, double} addrspace(4)* getelementptr ({i1, {double, double}} addrspace(4)* null, i64 0, i32 1) to i64)
+
+; PLAIN-X: @h = constant i64 ptrtoint (i1 addrspace(2)* getelementptr (i1 addrspace(2)* null, i32 1) to i64)
+@h = constant i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i64 1) to i64)
+
+; PLAIN-X: @i = constant i64 ptrtoint (i1 addrspace(2)* getelementptr ({ i1, i1 addrspace(2)* }* null, i64 0, i32 1) to i64)
+@i = constant i64 ptrtoint (double addrspace(4)* getelementptr ({i1, double} addrspace(4)* null, i64 0, i32 1) to i64)
+
+; The target-dependent folder should cast GEP indices to integer-sized pointers.
+
+; PLAIN: @M = constant i64 addrspace(4)* getelementptr (i64 addrspace(4)* null, i32 1)
+; PLAIN: @N = constant i64 addrspace(4)* getelementptr ({ i64, i64 } addrspace(4)* null, i32 0, i32 1)
+; PLAIN: @O = constant i64 addrspace(4)* getelementptr ([2 x i64] addrspace(4)* null, i32 0, i32 1)
+
+@M = constant i64 addrspace(4)* getelementptr (i64 addrspace(4)* null, i32 1)
+@N = constant i64 addrspace(4)* getelementptr ({ i64, i64 } addrspace(4)* null, i32 0, i32 1)
+@O = constant i64 addrspace(4)* getelementptr ([2 x i64] addrspace(4)* null, i32 0, i32 1)
+
+; Fold GEP of a GEP. Very simple cases are folded.
+
+; PLAIN-X: @Y = global [3 x { i32, i32 }]addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* @ext, i64 2)
+@ext = external addrspace(3) global [3 x { i32, i32 }]
+@Y = global [3 x { i32, i32 }]addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* @ext, i64 1), i64 1)
+
+; PLAIN-X: @Z = global i32addrspace(3)* getelementptr inbounds (i32addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* @ext, i64 0, i64 1, i32 0), i64 1)
+@Z = global i32addrspace(3)* getelementptr inbounds (i32addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* @ext, i64 0, i64 1, i32 0), i64 1)
+
+
+; Duplicate all of the above as function return values rather than
+; global initializers.
+
+; PLAIN: define i8 addrspace(1)* @goo8() #0 {
+; PLAIN:   %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -1) to i8 addrspace(1)*
+; PLAIN:   ret i8 addrspace(1)* %t
+; PLAIN: }
+; PLAIN: define i1 addrspace(2)* @goo1() #0 {
+; PLAIN:   %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 1 to i1 addrspace(2)*), i32 -1) to i1 addrspace(2)*
+; PLAIN:   ret i1 addrspace(2)* %t
+; PLAIN: }
+; PLAIN: define i8 addrspace(1)* @foo8() #0 {
+; PLAIN:   %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -2) to i8 addrspace(1)*
+; PLAIN:   ret i8 addrspace(1)* %t
+; PLAIN: }
+; PLAIN: define i1 addrspace(2)* @foo1() #0 {
+; PLAIN:   %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 1 to i1 addrspace(2)*), i32 -2) to i1 addrspace(2)*
+; PLAIN:   ret i1 addrspace(2)* %t
+; PLAIN: }
+; PLAIN: define i8 addrspace(1)* @hoo8() #0 {
+; PLAIN:   %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* null, i32 -1) to i8 addrspace(1)*
+; PLAIN:   ret i8 addrspace(1)* %t
+; PLAIN: }
+; PLAIN: define i1 addrspace(2)* @hoo1() #0 {
+; PLAIN:   %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* null, i32 -1) to i1 addrspace(2)*
+; PLAIN:   ret i1 addrspace(2)* %t
+; PLAIN: }
+define i8 addrspace(1)* @goo8() #0 {
+  %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -1) to i8 addrspace(1)*
+  ret i8 addrspace(1)* %t
+}
+define i1 addrspace(2)* @goo1() #0 {
+  %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 1 to i1 addrspace(2)*), i32 -1) to i1 addrspace(2)*
+  ret i1 addrspace(2)* %t
+}
+define i8 addrspace(1)* @foo8() #0 {
+  %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -2) to i8 addrspace(1)*
+  ret i8 addrspace(1)* %t
+}
+define i1 addrspace(2)* @foo1() #0 {
+  %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 1 to i1 addrspace(2)*), i32 -2) to i1 addrspace(2)*
+  ret i1 addrspace(2)* %t
+}
+define i8 addrspace(1)* @hoo8() #0 {
+  %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 0 to i8 addrspace(1)*), i32 -1) to i8 addrspace(1)*
+  ret i8 addrspace(1)* %t
+}
+define i1 addrspace(2)* @hoo1() #0 {
+  %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 0 to i1 addrspace(2)*), i32 -1) to i1 addrspace(2)*
+  ret i1 addrspace(2)* %t
+}
+
+; PLAIN-X: define i64 @fa() #0 {
+; PLAIN-X:   %t = bitcast i64 mul (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 2310) to i64
+; PLAIN-X:   ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @fb() #0 {
+; PLAIN-X:   %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64) to i64
+; PLAIN-X:   ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @fc() #0 {
+; PLAIN-X:   %t = bitcast i64 mul nuw (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 2) to i64
+; PLAIN-X:   ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @fd() #0 {
+; PLAIN-X:   %t = bitcast i64 mul nuw (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 11) to i64
+; PLAIN-X:   ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @fe() #0 {
+; PLAIN-X:   %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({ double, float, double, double }* null, i64 0, i32 2) to i64) to i64
+; PLAIN-X:   ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @ff() #0 {
+; PLAIN-X:   %t = bitcast i64 1 to i64
+; PLAIN-X:   ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @fg() #0 {
+; PLAIN-X:   %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64) to i64
+; PLAIN-X:   ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @fh() #0 {
+; PLAIN-X:   %t = bitcast i64 ptrtoint (i1 addrspace(2)* getelementptr (i1 addrspace(2)* null, i32 1) to i64) to i64
+; PLAIN-X:   ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @fi() #0 {
+; PLAIN-X:   %t = bitcast i64 ptrtoint (i1 addrspace(2)* getelementptr ({ i1, i1 addrspace(2)* }* null, i64 0, i32 1) to i64) to i64
+; PLAIN-X:   ret i64 %t
+; PLAIN-X: }
+define i64 @fa() #0 {
+  %t = bitcast i64 mul (i64 3, i64 mul (i64 ptrtoint ({[7 x double], [7 x double]}* getelementptr ({[7 x double], [7 x double]}* null, i64 11) to i64), i64 5)) to i64
+  ret i64 %t
+}
+define i64 @fb() #0 {
+  %t = bitcast i64 ptrtoint ([13 x double] addrspace(4)* getelementptr ({i1, [13 x double]} addrspace(4)* null, i64 0, i32 1) to i64) to i64
+  ret i64 %t
+}
+define i64 @fc() #0 {
+  %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({double, double, double, double} addrspace(4)* null, i64 0, i32 2) to i64) to i64
+  ret i64 %t
+}
+define i64 @fd() #0 {
+  %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ([13 x double] addrspace(4)* null, i64 0, i32 11) to i64) to i64
+  ret i64 %t
+}
+define i64 @fe() #0 {
+  %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({double, float, double, double} addrspace(4)* null, i64 0, i32 2) to i64) to i64
+  ret i64 %t
+}
+define i64 @ff() #0 {
+  %t = bitcast i64 ptrtoint (<{ i16, i128 }> addrspace(4)* getelementptr ({i1, <{ i16, i128 }>} addrspace(4)* null, i64 0, i32 1) to i64) to i64
+  ret i64 %t
+}
+define i64 @fg() #0 {
+  %t = bitcast i64 ptrtoint ({double, double} addrspace(4)* getelementptr ({i1, {double, double}} addrspace(4)* null, i64 0, i32 1) to i64) to i64
+  ret i64 %t
+}
+define i64 @fh() #0 {
+  %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64) to i64
+  ret i64 %t
+}
+define i64 @fi() #0 {
+  %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({i1, double}addrspace(4)* null, i64 0, i32 1) to i64) to i64
+  ret i64 %t
+}
+
+; PLAIN: define i64* @fM() #0 {
+; PLAIN:   %t = bitcast i64* getelementptr (i64* null, i32 1) to i64*
+; PLAIN:   ret i64* %t
+; PLAIN: }
+; PLAIN: define i64* @fN() #0 {
+; PLAIN:   %t = bitcast i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1) to i64*
+; PLAIN:   ret i64* %t
+; PLAIN: }
+; PLAIN: define i64* @fO() #0 {
+; PLAIN:   %t = bitcast i64* getelementptr ([2 x i64]* null, i32 0, i32 1) to i64*
+; PLAIN:   ret i64* %t
+; PLAIN: }
+
+define i64* @fM() #0 {
+  %t = bitcast i64* getelementptr (i64* null, i32 1) to i64*
+  ret i64* %t
+}
+define i64* @fN() #0 {
+  %t = bitcast i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1) to i64*
+  ret i64* %t
+}
+define i64* @fO() #0 {
+  %t = bitcast i64* getelementptr ([2 x i64]* null, i32 0, i32 1) to i64*
+  ret i64* %t
+}
+
+; PLAIN: define i32 addrspace(1)* @fZ() #0 {
+; PLAIN:   %t = bitcast i32 addrspace(1)* getelementptr inbounds (i32 addrspace(1)* getelementptr inbounds ([3 x { i32, i32 }] addrspace(1)* @ext2, i64 0, i64 1, i32 0), i64 1) to i32 addrspace(1)*
+; PLAIN:   ret i32 addrspace(1)* %t
+; PLAIN: }
+@ext2 = external addrspace(1) global [3 x { i32, i32 }]
+define i32 addrspace(1)* @fZ() #0 {
+  %t = bitcast i32 addrspace(1)* getelementptr inbounds (i32 addrspace(1)* getelementptr inbounds ([3 x { i32, i32 }] addrspace(1)* @ext2, i64 0, i64 1, i32 0), i64 1) to i32 addrspace(1)*
+  ret i32 addrspace(1)* %t
+}
+
+attributes #0 = { nounwind }
diff --git a/test/Other/constant-fold-gep.ll b/test/Other/constant-fold-gep.ll
index 44b6628..aed4145 100644
--- a/test/Other/constant-fold-gep.ll
+++ b/test/Other/constant-fold-gep.ll
@@ -454,10 +454,10 @@ define i32* @fZ() nounwind {
 
 define i8* @different_addrspace() nounwind noinline {
 ; OPT: different_addrspace
-  %p = getelementptr inbounds i8* bitcast ([4 x i8] addrspace(12)* @p12 to i8*),
+  %p = getelementptr inbounds i8* addrspacecast ([4 x i8] addrspace(12)* @p12 to i8*),
                                   i32 2
   ret i8* %p
-; OPT: ret i8* getelementptr (i8* bitcast ([4 x i8] addrspace(12)* @p12 to i8*), i32 2)
+; OPT: ret i8* getelementptr (i8* addrspacecast ([4 x i8] addrspace(12)* @p12 to i8*), i32 2)
 }
 
 define i8* @same_addrspace() nounwind noinline {
diff --git a/test/Other/extract-alias.ll b/test/Other/extract-alias.ll
index d5bab4b..d1e4af5 100644
--- a/test/Other/extract-alias.ll
+++ b/test/Other/extract-alias.ll
@@ -1,7 +1,7 @@
 ; RUN: llvm-extract -func foo -S < %s | FileCheck %s
 ; RUN: llvm-extract -delete -func foo -S < %s | FileCheck --check-prefix=DELETE %s
 ; RUN: llvm-extract -alias zeda0 -S < %s | FileCheck --check-prefix=ALIAS %s
-; RUN: llvm-extract -ralias .*bar -S < %s | FileCheck --check-prefix=ALIASRE %s
+; RUN: llvm-extract -ralias '.*bar' -S < %s | FileCheck --check-prefix=ALIASRE %s
 
 ; Both aliases should be converted to declarations
 ; CHECK:      @zeda0 = external global i32
diff --git a/test/Other/extract-linkonce.ll b/test/Other/extract-linkonce.ll
index 31fbf3a..4c6b6b7 100644
--- a/test/Other/extract-linkonce.ll
+++ b/test/Other/extract-linkonce.ll
@@ -1,15 +1,16 @@
 ; RUN: llvm-extract -func foo -S < %s | FileCheck %s
 ; RUN: llvm-extract -delete -func foo -S < %s | FileCheck --check-prefix=DELETE %s
 
-; Test that we don't convert weak_odr to external definitions.
+; Test that linkonce definitions are mapped to weak so that they are not
+; dropped.
 
-; CHECK:      @bar = external hidden global i32
-; CHECK:      define hidden i32* @foo() {
+; CHECK:      @bar = external global i32
+; CHECK:      define weak i32* @foo() {
 ; CHECK-NEXT:  ret i32* @bar
 ; CHECK-NEXT: }
 
-; DELETE: @bar = hidden global i32 42
-; DELETE: declare hidden i32* @foo()
+; DELETE: @bar = weak global i32 42
+; DELETE: declare i32* @foo()
 
 @bar = linkonce global i32 42
 
diff --git a/test/Other/lit.local.cfg b/test/Other/lit.local.cfg
deleted file mode 100644
index 67c7ec7..0000000
--- a/test/Other/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.txt', '.test']
diff --git a/test/Other/optimize-options.ll b/test/Other/optimize-options.ll
index 888a78f..22dd842 100644
--- a/test/Other/optimize-options.ll
+++ b/test/Other/optimize-options.ll
@@ -1,8 +1,8 @@
-;RUN: opt -S -O1 -debug-pass=Arguments 2>&1 | FileCheck %s
-;RUN: opt -S -O2 -debug-pass=Arguments 2>&1 | FileCheck %s
-;RUN: opt -S -Os -debug-pass=Arguments 2>&1 | FileCheck %s
-;RUN: opt -S -Oz -debug-pass=Arguments 2>&1 | FileCheck %s
-;RUN: opt -S -O3 -debug-pass=Arguments 2>&1 | FileCheck %s
+;RUN: opt -S -O1 -debug-pass=Arguments %s 2>&1 | FileCheck %s
+;RUN: opt -S -O2 -debug-pass=Arguments %s 2>&1 | FileCheck %s
+;RUN: opt -S -Os -debug-pass=Arguments %s 2>&1 | FileCheck %s
+;RUN: opt -S -Oz -debug-pass=Arguments %s 2>&1 | FileCheck %s
+;RUN: opt -S -O3 -debug-pass=Arguments %s 2>&1 | FileCheck %s
 
-; Just check that we get a non-empty set of passes for each -O opton.
+; Just check that we get a non-empty set of passes for each -O option.
 ;CHECK: Pass Arguments: {{.*}} -print-module
diff --git a/test/TableGen/2003-08-03-PassCode.td b/test/TableGen/2003-08-03-PassCode.td
index de7d626..b851a15 100644
--- a/test/TableGen/2003-08-03-PassCode.td
+++ b/test/TableGen/2003-08-03-PassCode.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s
-// XFAIL: vg_leak
 
 class test<code C> {
   code Code = C;
diff --git a/test/TableGen/2006-09-18-LargeInt.td b/test/TableGen/2006-09-18-LargeInt.td
index 94cd1ec..5380212 100644
--- a/test/TableGen/2006-09-18-LargeInt.td
+++ b/test/TableGen/2006-09-18-LargeInt.td
@@ -1,4 +1,6 @@
-// RUN: llvm-tblgen %s | grep -- 4294901760
+// RUN: llvm-tblgen %s | FileCheck %s
+
+// CHECK: 4294901760
 
 def X {
   int Y = 0xFFFF0000;
diff --git a/test/TableGen/2010-03-24-PrematureDefaults.td b/test/TableGen/2010-03-24-PrematureDefaults.td
index 716a1d5..24f6c93 100644
--- a/test/TableGen/2010-03-24-PrematureDefaults.td
+++ b/test/TableGen/2010-03-24-PrematureDefaults.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class A<int k, bits<2> x = 1> {
   int K = k;
diff --git a/test/TableGen/CStyleComment.td b/test/TableGen/CStyleComment.td
index 55fb0e7..9c50f7e 100644
--- a/test/TableGen/CStyleComment.td
+++ b/test/TableGen/CStyleComment.td
@@ -1,7 +1,6 @@
 // Test that multiline, nested, comments work correctly.
 //
 // RUN: llvm-tblgen < %s
-// XFAIL: vg_leak
 
 /* Foo
   bar
diff --git a/test/TableGen/Dag.td b/test/TableGen/Dag.td
index 14d616b..fea3aee 100644
--- a/test/TableGen/Dag.td
+++ b/test/TableGen/Dag.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 //===----------------------------------------------------------------------===//
 // Substitution of an int.
diff --git a/test/TableGen/DefmInherit.td b/test/TableGen/DefmInherit.td
index b52a709..bfbb435 100644
--- a/test/TableGen/DefmInherit.td
+++ b/test/TableGen/DefmInherit.td
@@ -1,4 +1,11 @@
-// RUN: llvm-tblgen %s | grep "zing = 4" | count 4
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK-NOT: zing = 4
 
 class C1<int A, string B> { 
   int bar = A;
diff --git a/test/TableGen/DefmInsideMultiClass.td b/test/TableGen/DefmInsideMultiClass.td
index 0aea212..d34974d 100644
--- a/test/TableGen/DefmInsideMultiClass.td
+++ b/test/TableGen/DefmInsideMultiClass.td
@@ -1,4 +1,8 @@
-// RUN: llvm-tblgen %s | grep ADDPSrr | count 1
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+// CHECK: ADDPSrr
+// CHECK-NOT: ADDPSrr
 
 class Instruction<bits<4> opc, string Name> {
   bits<4> opcode = opc;
diff --git a/test/TableGen/ForeachList.td b/test/TableGen/ForeachList.td
index 9bc76e0..99b7e14 100644
--- a/test/TableGen/ForeachList.td
+++ b/test/TableGen/ForeachList.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Register<string name, int idx> {
   string Name = name;
diff --git a/test/TableGen/ForeachLoop.td b/test/TableGen/ForeachLoop.td
index a49a60b..4aacc74 100644
--- a/test/TableGen/ForeachLoop.td
+++ b/test/TableGen/ForeachLoop.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Register<string name, int idx> {
   string Name = name;
diff --git a/test/TableGen/Include.td b/test/TableGen/Include.td
index 8783638..1cb779f 100644
--- a/test/TableGen/Include.td
+++ b/test/TableGen/Include.td
@@ -1,5 +1,5 @@
 // RUN: llvm-tblgen -I %p %s
-// XFAIL: vg_leak
+
 def BeforeInclude;
 
 include "Include.inc"
diff --git a/test/TableGen/IntBitInit.td b/test/TableGen/IntBitInit.td
index 83713a3..4e150f1 100644
--- a/test/TableGen/IntBitInit.td
+++ b/test/TableGen/IntBitInit.td
@@ -1,5 +1,5 @@
 // RUN: llvm-tblgen %s
-// XFAIL: vg_leak
+
 def {
   bit A = 1;
   int B = A;
diff --git a/test/TableGen/LazyChange.td b/test/TableGen/LazyChange.td
index 919a1a7..2ad6191 100644
--- a/test/TableGen/LazyChange.td
+++ b/test/TableGen/LazyChange.td
@@ -1,4 +1,6 @@
-// RUN: llvm-tblgen %s | grep "int Y = 3"
+// RUN: llvm-tblgen %s | FileCheck %s
+
+// CHECK: int Y = 3
 
 class C {
   int X = 4;
diff --git a/test/TableGen/LetInsideMultiClasses.td b/test/TableGen/LetInsideMultiClasses.td
index 72f48b6..095f37b 100644
--- a/test/TableGen/LetInsideMultiClasses.td
+++ b/test/TableGen/LetInsideMultiClasses.td
@@ -1,4 +1,10 @@
-// RUN: llvm-tblgen %s | grep "bit IsDouble = 1;" | count 3
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+// CHECK: bit IsDouble = 1;
+// CHECK: bit IsDouble = 1;
+// CHECK: bit IsDouble = 1;
+// CHECK-NOT: bit IsDouble = 1;
 
 class Instruction<bits<4> opc, string Name> {
   bits<4> opcode = opc;
diff --git a/test/TableGen/ListOfList.td b/test/TableGen/ListOfList.td
index adf9fe4..56f964e 100644
--- a/test/TableGen/ListOfList.td
+++ b/test/TableGen/ListOfList.td
@@ -1,6 +1,5 @@
-// RUN llvm-tblgen %s | FileCheck %s
-
-// RUN: llvm-tblgen %s | grep "foo" | count 1
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Base<string t> {
   string text = t;
@@ -11,3 +10,4 @@ class Derived<list<list<string>> thetext> : Base<thetext[0][0]>;
 def FOO : Derived<[["foo"]]>;
 
 // CHECK: text = "foo"
+// CHECK-NOT: text = "foo"
diff --git a/test/TableGen/LoLoL.td b/test/TableGen/LoLoL.td
index f758e1b..778c960 100644
--- a/test/TableGen/LoLoL.td
+++ b/test/TableGen/LoLoL.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Base<list<int> v> {
   list<int> values = v;
diff --git a/test/TableGen/MultiClass.td b/test/TableGen/MultiClass.td
index ef320cf..9c39963 100644
--- a/test/TableGen/MultiClass.td
+++ b/test/TableGen/MultiClass.td
@@ -1,4 +1,9 @@
-// RUN: llvm-tblgen %s | grep "zing = 4" | count 2
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK-NOT: zing = 4
 
 class C1<int A, string B> { 
   int bar = A;
diff --git a/test/TableGen/MultiClassDefName.td b/test/TableGen/MultiClassDefName.td
index 75d6af5..d3c6de7 100644
--- a/test/TableGen/MultiClassDefName.td
+++ b/test/TableGen/MultiClassDefName.td
@@ -1,4 +1,8 @@
-// RUN: llvm-tblgen %s | grep WorldHelloCC | count 1
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+// CHECK: WorldHelloCC
+// CHECK-NOT: WorldHelloCC
 
 class C<string n> {
   string name = n;
diff --git a/test/TableGen/MultiClassInherit.td b/test/TableGen/MultiClassInherit.td
index 9d1470a..04fef2c 100644
--- a/test/TableGen/MultiClassInherit.td
+++ b/test/TableGen/MultiClassInherit.td
@@ -1,4 +1,36 @@
-// RUN: llvm-tblgen %s | grep "zing = 4" | count 28
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+// "zing = 4" x 28
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK-NOT: zing = 4
 
 class C1<int A, string B> { 
   int bar = A;
diff --git a/test/TableGen/MultiPat.td b/test/TableGen/MultiPat.td
index b49b06c..b379277 100644
--- a/test/TableGen/MultiPat.td
+++ b/test/TableGen/MultiPat.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class ValueType<int size, int value> {
   int Size = size;
diff --git a/test/TableGen/NestedForeach.td b/test/TableGen/NestedForeach.td
index 5b63175..e8c16f7 100644
--- a/test/TableGen/NestedForeach.td
+++ b/test/TableGen/NestedForeach.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Droid<string series, int release, string model, int patchlevel> {
   string Series = series;
diff --git a/test/TableGen/Paste.td b/test/TableGen/Paste.td
index 33d61cc..a7e2a5b 100644
--- a/test/TableGen/Paste.td
+++ b/test/TableGen/Paste.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Instr<int i> {
   int index = i;
diff --git a/test/TableGen/SetTheory.td b/test/TableGen/SetTheory.td
index f26b9e6..7613323 100644
--- a/test/TableGen/SetTheory.td
+++ b/test/TableGen/SetTheory.td
@@ -1,5 +1,6 @@
 // Test evaluation of set operations in dags.
 // RUN: llvm-tblgen -print-sets %s | FileCheck %s
+// XFAIL: vg_leak
 //
 // The -print-sets driver configures a primitive SetTheory instance that
 // understands these sets:
diff --git a/test/TableGen/SiblingForeach.td b/test/TableGen/SiblingForeach.td
index e4c4704..a11f6f8 100644
--- a/test/TableGen/SiblingForeach.td
+++ b/test/TableGen/SiblingForeach.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Set<int i = 0, int j = 0, int k = 0> {
   int I = i;
diff --git a/test/TableGen/Slice.td b/test/TableGen/Slice.td
index 7a35d31..89deaef 100644
--- a/test/TableGen/Slice.td
+++ b/test/TableGen/Slice.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class ValueType<int size, int value> {
   int Size = size;
diff --git a/test/TableGen/String.td b/test/TableGen/String.td
index c71ed50..576ba81 100644
--- a/test/TableGen/String.td
+++ b/test/TableGen/String.td
@@ -1,5 +1,5 @@
 // RUN: llvm-tblgen %s 
-// XFAIL: vg_leak
+
 class x {
   string y = "missing terminating '\"' character";
 }
diff --git a/test/TableGen/TargetInstrSpec.td b/test/TableGen/TargetInstrSpec.td
index bf2d257..32253a3 100644
--- a/test/TableGen/TargetInstrSpec.td
+++ b/test/TableGen/TargetInstrSpec.td
@@ -1,5 +1,11 @@
-// RUN: llvm-tblgen %s | grep '\[(set VR128:$dst, (int_x86_sse2_add_pd VR128:$src1, VR128:$src2))\]' | count 1
-// RUN: llvm-tblgen %s | grep '\[(set VR128:$dst, (int_x86_sse2_add_ps VR128:$src1, VR128:$src2))\]' | count 1
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+// CHECK: [(set VR128:$dst, (int_x86_sse2_add_pd VR128:$src1, VR128:$src2))]
+// CHECK-NOT: [(set VR128:$dst, (int_x86_sse2_add_pd VR128:$src1, VR128:$src2))]
+
+// CHECK: [(set VR128:$dst, (int_x86_sse2_add_ps VR128:$src1, VR128:$src2))]
+// CHECK-NOT: [(set VR128:$dst, (int_x86_sse2_add_ps VR128:$src1, VR128:$src2))]
 
 class ValueType<int size, int value> {
   int Size = size;
diff --git a/test/TableGen/TwoLevelName.td b/test/TableGen/TwoLevelName.td
index e886962..9c502f4 100644
--- a/test/TableGen/TwoLevelName.td
+++ b/test/TableGen/TwoLevelName.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Type<string name, int length, int width> {
   string Name = name;
diff --git a/test/TableGen/cast.td b/test/TableGen/cast.td
index b9e4b37..a8bd207 100644
--- a/test/TableGen/cast.td
+++ b/test/TableGen/cast.td
@@ -1,4 +1,10 @@
-// RUN: llvm-tblgen %s | grep "add_ps" | count 3
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+// CHECK: add_ps
+// CHECK: add_ps
+// CHECK: add_ps
+// CHECK-NOT: add_ps
 
 class ValueType<int size, int value> {
   int Size = size;
diff --git a/test/TableGen/defmclass.td b/test/TableGen/defmclass.td
index 6198c00..80f03b3 100644
--- a/test/TableGen/defmclass.td
+++ b/test/TableGen/defmclass.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class XD { bits<4> Prefix = 11; }
 // CHECK: Prefix = { 1, 1, 0, 0 };
diff --git a/test/TableGen/eq.td b/test/TableGen/eq.td
index fc3ad42..f8daf88 100644
--- a/test/TableGen/eq.td
+++ b/test/TableGen/eq.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 // CHECK: Value = 0
 // CHECK: Value = 1
 
diff --git a/test/TableGen/eqbit.td b/test/TableGen/eqbit.td
index b77b1a2..1d58fa0 100644
--- a/test/TableGen/eqbit.td
+++ b/test/TableGen/eqbit.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 // CHECK: a = 6
 // CHECK: a = 5
 
diff --git a/test/TableGen/foreach.td b/test/TableGen/foreach.td
index 7b7c199..541da49 100644
--- a/test/TableGen/foreach.td
+++ b/test/TableGen/foreach.td
@@ -1,6 +1,14 @@
-// RUN: llvm-tblgen %s | grep 'Jr' | count 2
-// RUN: llvm-tblgen %s | grep 'Sr' | count 2
-// RUN: llvm-tblgen %s | grep '"NAME"' | count 1
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+// CHECK: Classes
+// CHECK: Sr
+// CHECK: Jr
+// CHECK: "NAME"
+
+// CHECK: Defs
+// CHECK: Jr
+// CHECK: Sr
 
 // Variables for foreach
 class decls {
diff --git a/test/TableGen/if.td b/test/TableGen/if.td
index e4df74f..1d8d623 100644
--- a/test/TableGen/if.td
+++ b/test/TableGen/if.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 // Support for an `!if' operator as part of a `let' statement.
 // CHECK:      class C
diff --git a/test/TableGen/ifbit.td b/test/TableGen/ifbit.td
index e334121..88f575e 100644
--- a/test/TableGen/ifbit.td
+++ b/test/TableGen/ifbit.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 // CHECK: a = 6
 // CHECK: a = 5
 
diff --git a/test/TableGen/intrinsic-order.td b/test/TableGen/intrinsic-order.td
index 5eadf60..13c2db2 100644
--- a/test/TableGen/intrinsic-order.td
+++ b/test/TableGen/intrinsic-order.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen -gen-intrinsic %s | FileCheck %s
+// XFAIL: vg_leak
 
 class IntrinsicProperty;
 
diff --git a/test/TableGen/intrinsic-varargs.td b/test/TableGen/intrinsic-varargs.td
new file mode 100644
index 0000000..3e48f8d
--- /dev/null
+++ b/test/TableGen/intrinsic-varargs.td
@@ -0,0 +1,30 @@
+// RUN: llvm-tblgen -gen-intrinsic %s | FileCheck %s
+// XFAIL: vg_leak
+
+class IntrinsicProperty;
+
+class ValueType<int size, int value> {
+  string Namespace = "MVT";
+  int Size = size;
+  int Value = value;
+}
+
+class LLVMType<ValueType vt> {
+  ValueType VT = vt;
+}
+
+class Intrinsic<string name, list<LLVMType> param_types = []> {
+  string LLVMName = name;
+  bit isTarget = 0;
+  string TargetPrefix = "";
+  list<LLVMType> RetTypes = [];
+  list<LLVMType> ParamTypes = param_types;
+  list<IntrinsicProperty> Properties = [];
+}
+
+// isVoid needs to match the definition in ValueTypes.td
+def isVoid : ValueType<0, 56>;   // Produces no value
+def llvm_vararg_ty : LLVMType<isVoid>;   // this means vararg here
+
+// CHECK: /* 0 */ 0, 27, 0,
+def int_foo : Intrinsic<"llvm.foo", [llvm_vararg_ty]>;
diff --git a/test/TableGen/lisp.td b/test/TableGen/lisp.td
index efe0002..9e58605 100644
--- a/test/TableGen/lisp.td
+++ b/test/TableGen/lisp.td
@@ -1,4 +1,20 @@
-// RUN: llvm-tblgen %s | grep ""
+// RUN: llvm-tblgen %s
+// XFAIL: vg_leak
+
+// CHECK:      def One {
+// CHECK-NEXT:   list<string> names = ["Jeffrey Sinclair"];
+// CHECK-NEXT:   string element = "Jeffrey Sinclair";
+// CHECK-NEXT:   list<string> rest = [];
+// CHECK-NEXT:   int null = 1;
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
+// CHECK-NEXT: def Three {
+// CHECK-NEXT:   list<string> names = ["Tom", "Dick", "Harry"];
+// CHECK-NEXT:   string element = "Tom";
+// CHECK-NEXT:   list<string> rest = ["Dick", "Harry"];
+// CHECK-NEXT:   int null = 0;
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
 
 class List<list<string> n> {
   list<string> names = n;
diff --git a/test/TableGen/list-element-bitref.td b/test/TableGen/list-element-bitref.td
index 7db3d31..4622f28 100644
--- a/test/TableGen/list-element-bitref.td
+++ b/test/TableGen/list-element-bitref.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class C<list<bits<8>> L> {
   bits<2> V0 = L[0]{1-0};
diff --git a/test/TableGen/math.td b/test/TableGen/math.td
index bde267a..59d16ae 100644
--- a/test/TableGen/math.td
+++ b/test/TableGen/math.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Int<int value> {
   int Value = value;
diff --git a/test/TableGen/nested-comment.td b/test/TableGen/nested-comment.td
index bf030e7..f8581ce 100644
--- a/test/TableGen/nested-comment.td
+++ b/test/TableGen/nested-comment.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen < %s
-// XFAIL: vg_leak
 
 /* foo
 
diff --git a/test/TableGen/pr8330.td b/test/TableGen/pr8330.td
index e672014..7779b63 100644
--- a/test/TableGen/pr8330.td
+++ b/test/TableGen/pr8330.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Or4<bits<8> Val> {
   bits<8> V = {Val{7}, Val{6}, Val{5}, Val{4}, Val{3}, 1, Val{1}, Val{0} };
diff --git a/test/TableGen/strconcat.td b/test/TableGen/strconcat.td
index 0173c49..dfb1a94 100644
--- a/test/TableGen/strconcat.td
+++ b/test/TableGen/strconcat.td
@@ -1,4 +1,6 @@
-// RUN: llvm-tblgen %s | grep fufoo
+// RUN: llvm-tblgen %s | FileCheck %s
+
+// CHECK: fufoo
 
 class Y<string S> {
   string T = !strconcat(S, "foo");
diff --git a/test/TableGen/subst.td b/test/TableGen/subst.td
index e265b44..34818af 100644
--- a/test/TableGen/subst.td
+++ b/test/TableGen/subst.td
@@ -1,9 +1,5 @@
-// RUN: llvm-tblgen %s | grep "Smith" | count 7
-// RUN: llvm-tblgen %s | grep "Johnson" | count 2
-// RUN: llvm-tblgen %s | grep "FIRST" | count 1
-// RUN: llvm-tblgen %s | grep "LAST" | count 1
-// RUN: llvm-tblgen %s | grep "TVAR" | count 2
-// RUN: llvm-tblgen %s | grep "Bogus" | count 1
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Honorific<string t> {
   string honorific = t;
@@ -27,3 +23,56 @@ def JohnSmith : AName<"FIRST LAST", TVAR>;
 def JaneSmith : AName<"Jane LAST", Ms>;
 def JohnSmithJones : AName<"FIRST LAST-Jones", Mr>;
 def JimmyJohnson : AName<"Jimmy Johnson", Mr>;
+
+// CHECK:      ------------- Classes -----------------
+// CHECK-NEXT: class AName<string AName:name = ?, Honorific AName:honorific = ?> {
+// CHECK-NEXT:   string name = !subst("FIRST", "John", !subst("LAST", "Smith", AName:name));
+// CHECK-NEXT:   Honorific honorific = !subst(TVAR, Mr, AName:honorific);
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
+// CHECK-NEXT: class Honorific<string Honorific:t = ?> {
+// CHECK-NEXT:   string honorific = Honorific:t;
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
+// CHECK-NEXT: class Name<string Name:n = ?, Honorific Name:t = ?> {
+// CHECK-NEXT:   string name = Name:n;
+// CHECK-NEXT:   Honorific honorific = Name:t;
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
+// CHECK-NEXT: ------------- Defs -----------------
+// CHECK-NEXT: def JaneSmith {
+// CHECK-NEXT:   string name = "Jane Smith";
+// CHECK-NEXT:   Honorific honorific = Ms;
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
+// CHECK-NEXT: def JimmyJohnson {
+// CHECK-NEXT:   string name = "Jimmy Johnson";
+// CHECK-NEXT:   Honorific honorific = Mr;
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
+// CHECK-NEXT: def JohnSmith {
+// CHECK-NEXT:   string name = "John Smith";
+// CHECK-NEXT:   Honorific honorific = Mr;
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
+// CHECK-NEXT: def JohnSmithJones {
+// CHECK-NEXT:   string name = "John Smith-Jones";
+// CHECK-NEXT:   Honorific honorific = Mr;
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
+// CHECK-NEXT: def Mr 
+// CHECK-NEXT:   string honorific = "Mr.";
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
+// CHECK-NEXT: def Mrs {
+// CHECK-NEXT:   string honorific = "Mrs.";
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
+// CHECK-NEXT: def Ms {
+// CHECK-NEXT:   string honorific = "Ms.";
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
+// CHECK-NEXT: def TVAR {
+// CHECK-NEXT:   string honorific = "Bogus";
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
diff --git a/test/TableGen/subst2.td b/test/TableGen/subst2.td
index ce73077..7c007f7 100644
--- a/test/TableGen/subst2.td
+++ b/test/TableGen/subst2.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 // CHECK: No subst
 // CHECK: No foo
 // CHECK: RECURSE foo
diff --git a/test/TableGen/usevalname.td b/test/TableGen/usevalname.td
index a80ba12..d85b98a 100644
--- a/test/TableGen/usevalname.td
+++ b/test/TableGen/usevalname.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Instr<list<dag> pat> {
   list<dag> Pattern = pat;
diff --git a/test/Transforms/ADCE/lit.local.cfg b/test/Transforms/ADCE/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/ADCE/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ArgumentPromotion/lit.local.cfg b/test/Transforms/ArgumentPromotion/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/ArgumentPromotion/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ArgumentPromotion/reserve-tbaa.ll b/test/Transforms/ArgumentPromotion/reserve-tbaa.ll
new file mode 100644
index 0000000..4688a83
--- /dev/null
+++ b/test/Transforms/ArgumentPromotion/reserve-tbaa.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -argpromotion -S
+
+; PR17906
+; When we promote two arguments in a single function with different types,
+; before the fix, we used the same tag for the newly-created two loads.
+; This testing case makes sure that we correctly transfer the tbaa tags from the
+; original loads to the newly-created loads when promoting pointer arguments.
+
+@a = global i32* null, align 8
+@e = global i32** @a, align 8
+@g = global i32 0, align 4
+@c = global i64 0, align 8
+@d = global i8 0, align 1
+
+define internal fastcc void @fn(i32* nocapture readonly %p1, i64* nocapture readonly %p2) {
+entry:
+  %0 = load i64* %p2, align 8, !tbaa !1
+  %conv = trunc i64 %0 to i32
+  %1 = load i32* %p1, align 4, !tbaa !5
+  %conv1 = trunc i32 %1 to i8
+  store i8 %conv1, i8* @d, align 1, !tbaa !7
+  ret void
+}
+
+define i32 @main() {
+entry:
+; CHECK-LABEL: main
+; CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa ![[I32:[0-9]+]]
+; CHECK: %g.val = load i32* @g, align 4, !tbaa ![[I32]]
+; CHECK: %c.val = load i64* @c, align 8, !tbaa ![[LONG:[0-9]+]]
+  %0 = load i32*** @e, align 8, !tbaa !8
+  store i32* @g, i32** %0, align 8, !tbaa !8
+  %1 = load i32** @a, align 8, !tbaa !8
+  store i32 1, i32* %1, align 4, !tbaa !5
+  call fastcc void @fn(i32* @g, i64* @c)
+
+  ret i32 0
+}
+
+!1 = metadata !{metadata !2, metadata !2, i64 0}
+!2 = metadata !{metadata !"long", metadata !3, i64 0}
+!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
+!4 = metadata !{metadata !"Simple C/C++ TBAA"}
+!5 = metadata !{metadata !6, metadata !6, i64 0}
+!6 = metadata !{metadata !"int", metadata !3, i64 0}
+!7 = metadata !{metadata !3, metadata !3, i64 0}
+!8 = metadata !{metadata !9, metadata !9, i64 0}
+!9 = metadata !{metadata !"any pointer", metadata !3, i64 0}
+; CHECK: ![[I32]] = metadata !{metadata ![[I32_TYPE:[0-9]+]], metadata ![[I32_TYPE]], i64 0}
+; CHECK: ![[I32_TYPE]] = metadata !{metadata !"int", metadata !{{.*}}, i64 0}
+; CHECK: ![[LONG]] = metadata !{metadata ![[LONG_TYPE:[0-9]+]], metadata ![[LONG_TYPE]], i64 0}
+; CHECK: ![[LONG_TYPE]] = metadata !{metadata !"long", metadata !{{.*}}, i64 0}
diff --git a/test/Transforms/BBVectorize/X86/pr15289.ll b/test/Transforms/BBVectorize/X86/pr15289.ll
index 07cc5d8..42bd0ff 100644
--- a/test/Transforms/BBVectorize/X86/pr15289.ll
+++ b/test/Transforms/BBVectorize/X86/pr15289.ll
@@ -45,7 +45,7 @@ entry:
   %13 = fmul double %3, %12
   %14 = fmul double %3, undef
   %15 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 0, i32 0
-  store double %13, double* %15, align 8, !tbaa !0
+  store double %13, double* %15, align 8
   %16 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 0, i32 1
   %17 = fmul double undef, %8
   %18 = fmul double %17, undef
@@ -54,7 +54,7 @@ entry:
   %21 = fmul double %3, %19
   %22 = fsub double -0.000000e+00, %21
   %23 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 1, i32 0
-  store double %22, double* %23, align 8, !tbaa !0
+  store double %22, double* %23, align 8
   %24 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 1, i32 1
   %25 = fmul double undef, 0x3FE42F601A8C6794
   %26 = fmul double undef, 2.000000e+00
@@ -62,7 +62,7 @@ entry:
   %28 = fmul double %6, undef
   %29 = fsub double undef, %28
   %30 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 2, i32 0
-  store double undef, double* %30, align 8, !tbaa !0
+  store double undef, double* %30, align 8
   %31 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 2, i32 1
   %32 = fmul double undef, %17
   %33 = fmul double undef, %17
@@ -71,7 +71,7 @@ entry:
   %36 = fsub double undef, %35
   %37 = fmul double %3, %34
   %38 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 3, i32 0
-  store double %37, double* %38, align 8, !tbaa !0
+  store double %37, double* %38, align 8
   %39 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 3, i32 1
   %40 = fmul double undef, %8
   %41 = fmul double undef, %40
@@ -79,20 +79,17 @@ entry:
   %43 = fsub double undef, %42
   %44 = fmul double %3, %43
   %45 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 4, i32 0
-  store double %13, double* %45, align 8, !tbaa !0
+  store double %13, double* %45, align 8
   %46 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 4, i32 1
   %47 = fsub double -0.000000e+00, %14
-  store double %47, double* %16, align 8, !tbaa !0
-  store double undef, double* %24, align 8, !tbaa !0
-  store double -0.000000e+00, double* %31, align 8, !tbaa !0
-  store double undef, double* %39, align 8, !tbaa !0
-  store double undef, double* %46, align 8, !tbaa !0
+  store double %47, double* %16, align 8
+  store double undef, double* %24, align 8
+  store double -0.000000e+00, double* %31, align 8
+  store double undef, double* %39, align 8
+  store double undef, double* %46, align 8
   ret void
 }
 
 attributes #0 = { nounwind uwtable }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind }
-
-!0 = metadata !{metadata !"alias set 17: real(kind=8)", metadata !1}
-!1 = metadata !{metadata !1}
diff --git a/test/Transforms/BBVectorize/X86/wr-aliases.ll b/test/Transforms/BBVectorize/X86/wr-aliases.ll
new file mode 100644
index 0000000..34b1d4e
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/wr-aliases.ll
@@ -0,0 +1,144 @@
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -bb-vectorize -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%class.QBezier.15 = type { double, double, double, double, double, double, double, double }
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0
+
+; Function Attrs: uwtable
+declare fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval nocapture readonly align 8) #1
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+define void @main_arrayctor.cont([10 x %class.QBezier.15]* %beziers, %class.QBezier.15* %agg.tmp.i, %class.QBezier.15* %agg.tmp55.i, %class.QBezier.15* %agg.tmp56.i) {
+newFuncRoot:
+  br label %arrayctor.cont
+
+arrayctor.cont.ret.exitStub:                      ; preds = %arrayctor.cont
+  ret void
+
+; CHECK-LABEL: @main_arrayctor.cont
+; CHECK: <2 x double>
+; CHECK: @_ZL12printQBezier7QBezier
+; CHECK: store double %mul8.i, double* %x3.i, align 16
+; CHECK: load double* %x3.i, align 16
+; CHECK: ret
+
+arrayctor.cont:                                   ; preds = %newFuncRoot
+  %ref.tmp.sroa.0.0.idx = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 0
+  store double 1.000000e+01, double* %ref.tmp.sroa.0.0.idx, align 16
+  %ref.tmp.sroa.2.0.idx1 = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 1
+  store double 2.000000e+01, double* %ref.tmp.sroa.2.0.idx1, align 8
+  %ref.tmp.sroa.3.0.idx2 = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 2
+  store double 3.000000e+01, double* %ref.tmp.sroa.3.0.idx2, align 16
+  %ref.tmp.sroa.4.0.idx3 = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 3
+  store double 4.000000e+01, double* %ref.tmp.sroa.4.0.idx3, align 8
+  %ref.tmp.sroa.5.0.idx4 = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 4
+  store double 5.000000e+01, double* %ref.tmp.sroa.5.0.idx4, align 16
+  %ref.tmp.sroa.6.0.idx5 = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 5
+  store double 6.000000e+01, double* %ref.tmp.sroa.6.0.idx5, align 8
+  %ref.tmp.sroa.7.0.idx6 = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 6
+  store double 7.000000e+01, double* %ref.tmp.sroa.7.0.idx6, align 16
+  %ref.tmp.sroa.8.0.idx7 = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 7
+  store double 8.000000e+01, double* %ref.tmp.sroa.8.0.idx7, align 8
+  %add.ptr = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1
+  %v0 = bitcast %class.QBezier.15* %agg.tmp.i to i8*
+  call void @llvm.lifetime.start(i64 64, i8* %v0)
+  %v1 = bitcast %class.QBezier.15* %agg.tmp55.i to i8*
+  call void @llvm.lifetime.start(i64 64, i8* %v1)
+  %v2 = bitcast %class.QBezier.15* %agg.tmp56.i to i8*
+  call void @llvm.lifetime.start(i64 64, i8* %v2)
+  %v3 = bitcast [10 x %class.QBezier.15]* %beziers to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v0, i8* %v3, i64 64, i32 8, i1 false)
+  call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp.i)
+  %x2.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 2
+  %v4 = load double* %x2.i, align 16
+  %x3.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 4
+  %v5 = load double* %x3.i, align 16
+  %add.i = fadd double %v4, %v5
+  %mul.i = fmul double 5.000000e-01, %add.i
+  %x1.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 0
+  %v6 = load double* %x1.i, align 16
+  %add3.i = fadd double %v4, %v6
+  %mul4.i = fmul double 5.000000e-01, %add3.i
+  %x25.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 2
+  store double %mul4.i, double* %x25.i, align 16
+  %v7 = load double* %x3.i, align 16
+  %x4.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 6
+  %v8 = load double* %x4.i, align 16
+  %add7.i = fadd double %v7, %v8
+  %mul8.i = fmul double 5.000000e-01, %add7.i
+  store double %mul8.i, double* %x3.i, align 16
+  %v9 = load double* %x1.i, align 16
+  %x111.i = getelementptr inbounds %class.QBezier.15* %add.ptr, i64 0, i32 0
+  store double %v9, double* %x111.i, align 16
+  %v10 = load double* %x25.i, align 16
+  %add15.i = fadd double %mul.i, %v10
+  %mul16.i = fmul double 5.000000e-01, %add15.i
+  %x317.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 4
+  store double %mul16.i, double* %x317.i, align 16
+  %v11 = load double* %x3.i, align 16
+  %add19.i = fadd double %mul.i, %v11
+  %mul20.i = fmul double 5.000000e-01, %add19.i
+  store double %mul20.i, double* %x2.i, align 16
+  %v12 = load double* %x317.i, align 16
+  %add24.i = fadd double %v12, %mul20.i
+  %mul25.i = fmul double 5.000000e-01, %add24.i
+  store double %mul25.i, double* %x1.i, align 16
+  %x427.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 6
+  store double %mul25.i, double* %x427.i, align 16
+  %y2.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 3
+  %v13 = load double* %y2.i, align 8
+  %y3.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 5
+  %v14 = load double* %y3.i, align 8
+  %add28.i = fadd double %v13, %v14
+  %div.i = fmul double 5.000000e-01, %add28.i
+  %y1.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 1
+  %v15 = load double* %y1.i, align 8
+  %add30.i = fadd double %v13, %v15
+  %mul31.i = fmul double 5.000000e-01, %add30.i
+  %y232.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 3
+  store double %mul31.i, double* %y232.i, align 8
+  %v16 = load double* %y3.i, align 8
+  %y4.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 7
+  %v17 = load double* %y4.i, align 8
+  %add34.i = fadd double %v16, %v17
+  %mul35.i = fmul double 5.000000e-01, %add34.i
+  store double %mul35.i, double* %y3.i, align 8
+  %v18 = load double* %y1.i, align 8
+  %y138.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 1
+  store double %v18, double* %y138.i, align 8
+  %v19 = load double* %y232.i, align 8
+  %add42.i = fadd double %div.i, %v19
+  %mul43.i = fmul double 5.000000e-01, %add42.i
+  %y344.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 5
+  store double %mul43.i, double* %y344.i, align 8
+  %v20 = load double* %y3.i, align 8
+  %add46.i = fadd double %div.i, %v20
+  %mul47.i = fmul double 5.000000e-01, %add46.i
+  store double %mul47.i, double* %y2.i, align 8
+  %v21 = load double* %y344.i, align 8
+  %add51.i = fadd double %v21, %mul47.i
+  %mul52.i = fmul double 5.000000e-01, %add51.i
+  store double %mul52.i, double* %y1.i, align 8
+  %y454.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 7
+  store double %mul52.i, double* %y454.i, align 8
+  %v22 = bitcast %class.QBezier.15* %add.ptr to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v1, i8* %v22, i64 64, i32 8, i1 false)
+  call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp55.i)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v2, i8* %v3, i64 64, i32 8, i1 false)
+  call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp56.i)
+  call void @llvm.lifetime.end(i64 64, i8* %v0)
+  call void @llvm.lifetime.end(i64 64, i8* %v1)
+  call void @llvm.lifetime.end(i64 64, i8* %v2)
+  br label %arrayctor.cont.ret.exitStub
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/BBVectorize/lit.local.cfg b/test/Transforms/BBVectorize/lit.local.cfg
index a8ad0f1..ba763cf 100644
--- a/test/Transforms/BBVectorize/lit.local.cfg
+++ b/test/Transforms/BBVectorize/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Transforms/BBVectorize/xcore/no-vector-registers.ll b/test/Transforms/BBVectorize/xcore/no-vector-registers.ll
new file mode 100644
index 0000000..9ebdb73
--- /dev/null
+++ b/test/Transforms/BBVectorize/xcore/no-vector-registers.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S -mtriple=xcore | FileCheck %s
+
+target datalayout = "e-p:32:32:32-a0:0:32-n32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f16:16:32-f32:32:32-f64:32:32"
+target triple = "xcore"
+
+; Basic depth-3 chain
+define double @test1(double %A1, double %A2, double %B1, double %B2) {
+; CHECK-LABEL: @test1(
+; CHECK-NOT: <2 x double>
+  %X1 = fsub double %A1, %B1
+  %X2 = fsub double %A2, %B2
+  %Y1 = fmul double %X1, %A1
+  %Y2 = fmul double %X2, %A2
+  %Z1 = fadd double %Y1, %B1
+  %Z2 = fadd double %Y2, %B2
+  %R  = fmul double %Z1, %Z2
+  ret double %R
+}
diff --git a/test/Transforms/BlockPlacement/basictest.ll b/test/Transforms/BlockPlacement/basictest.ll
deleted file mode 100644
index 47b5079..0000000
--- a/test/Transforms/BlockPlacement/basictest.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: opt < %s -block-placement -disable-output -print-function 2> /dev/null
-
-define i32 @test() {
-        br i1 true, label %X, label %Y
-
-A:              ; preds = %Y, %X
-        ret i32 0
-
-X:              ; preds = %0
-        br label %A
-
-Y:              ; preds = %0
-        br label %A
-}
-
diff --git a/test/Transforms/BlockPlacement/lit.local.cfg b/test/Transforms/BlockPlacement/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/BlockPlacement/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/CodeExtractor/lit.local.cfg b/test/Transforms/CodeExtractor/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/CodeExtractor/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/CodeGenPrepare/lit.local.cfg b/test/Transforms/CodeGenPrepare/lit.local.cfg
deleted file mode 100644
index c6106e4..0000000
--- a/test/Transforms/CodeGenPrepare/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/ConstProp/lit.local.cfg b/test/Transforms/ConstProp/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/ConstProp/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ConstProp/loads.ll b/test/Transforms/ConstProp/loads.ll
index 795dc07..d05db47 100644
--- a/test/Transforms/ConstProp/loads.ll
+++ b/test/Transforms/ConstProp/loads.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -default-data-layout="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" -instcombine -S | FileCheck %s --check-prefix=LE
-; RUN: opt < %s -default-data-layout="E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" -instcombine -S | FileCheck %s --check-prefix=BE
+; RUN: opt < %s -default-data-layout="e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" -instcombine -S | FileCheck %s --check-prefix=LE
+; RUN: opt < %s -default-data-layout="E-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" -instcombine -S | FileCheck %s --check-prefix=BE
 
 ; {{ 0xDEADBEEF, 0xBA }, 0xCAFEBABE}
 @g1 = constant {{i32,i8},i32} {{i32,i8} { i32 -559038737, i8 186 }, i32 -889275714 }
@@ -155,7 +155,7 @@ entry:
 @test12g = private constant [6 x i8] c"a\00b\00\00\00"
 
 define i16 @test12() {
-  %a = load i16* getelementptr inbounds ([3 x i16]* bitcast ([6 x i8]* @test12g to [3 x i16]*), i32 0, i64 1) 
+  %a = load i16* getelementptr inbounds ([3 x i16]* bitcast ([6 x i8]* @test12g to [3 x i16]*), i32 0, i64 1)
   ret i16 %a
 
 ; 0x0062
@@ -194,6 +194,20 @@ entry:
 ; BE: ret i64 1
 }
 
+; Check with address space pointers
+@g6_as1 = constant [2 x i8 addrspace(1)*] [i8 addrspace(1)* inttoptr (i16 1 to i8 addrspace(1)*), i8 addrspace(1)* inttoptr (i16 2 to i8 addrspace(1)*)]
+define i16 @test14_as1() nounwind {
+entry:
+  %tmp = load i16* bitcast ([2 x i8 addrspace(1)*]* @g6_as1 to i16*)
+  ret i16 %tmp
+
+; LE: @test14_as1
+; LE: ret i16 1
+
+; BE: @test14_as1
+; BE: ret i16 1
+}
+
 define i64 @test15() nounwind {
 entry:
   %tmp = load i64* bitcast (i8** getelementptr inbounds ([2 x i8*]* @g6, i32 0, i64 1) to i64*)
diff --git a/test/Transforms/ConstantMerge/align.ll b/test/Transforms/ConstantMerge/align.ll
new file mode 100644
index 0000000..c1cbcb3
--- /dev/null
+++ b/test/Transforms/ConstantMerge/align.ll
@@ -0,0 +1,28 @@
+; RUN: opt -constmerge -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+
+; Test that with a TD we do merge and mark the alignment as 4
+@T1A = internal unnamed_addr constant i32 1
+@T1B = internal unnamed_addr constant i32 1, align 2
+; CHECK: @T1B = internal unnamed_addr constant i32 1, align 4
+
+define void @test1(i32** %P1, i32** %P2) {
+  store i32* @T1A, i32** %P1
+  store i32* @T1B, i32** %P2
+  ret void
+}
+
+
+; Test that even with a TD we set the alignment to the maximum if both constants
+; have explicit alignments.
+@T2A = internal unnamed_addr constant i32 2, align 1
+@T2B = internal unnamed_addr constant i32 2, align 2
+; CHECK: @T2B = internal unnamed_addr constant i32 2, align 2
+
+define void @test2(i32** %P1, i32** %P2) {
+  store i32* @T2A, i32** %P1
+  store i32* @T2B, i32** %P2
+  ret void
+}
diff --git a/test/Transforms/ConstantMerge/lit.local.cfg b/test/Transforms/ConstantMerge/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/ConstantMerge/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/CorrelatedValuePropagation/lit.local.cfg b/test/Transforms/CorrelatedValuePropagation/lit.local.cfg
deleted file mode 100644
index c6106e4..0000000
--- a/test/Transforms/CorrelatedValuePropagation/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
index 4cb742d..26982db 100644
--- a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
+++ b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
@@ -44,11 +44,12 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 ; CHECK: attributes [[NUW]] = { nounwind }
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!30}
 !0 = metadata !{i32 524545, metadata !1, metadata !"name", metadata !2, i32 8, metadata !6} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 524334, metadata !28, metadata !2, metadata !"vfs_addname", metadata !"vfs_addname", metadata !"vfs_addname", i32 12, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 524329, metadata !28} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 524305, metadata !28, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", i1 true, metadata !"", i32 0, metadata !29, metadata !29, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !28, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 524309, metadata !28, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6, metadata !6, metadata !9, metadata !9, metadata !9}
 !6 = metadata !{i32 524303, metadata !28, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
 !7 = metadata !{i32 524326, metadata !28, metadata !2, metadata !"", i32 0, i64 8, i64 8, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ]
@@ -61,7 +62,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !14 = metadata !{i32 524299, metadata !28, metadata !1, i32 12, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !15 = metadata !{i32 524545, metadata !16, metadata !"name", metadata !2, i32 17, metadata !6} ; [ DW_TAG_arg_variable ]
 !16 = metadata !{i32 524334, metadata !28, metadata !2, metadata !"add_name_internal", metadata !"add_name_internal", metadata !"add_name_internal", i32 22, metadata !17, i1 true, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 524309, metadata !28, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!17 = metadata !{i32 524309, metadata !28, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !18 = metadata !{metadata !6, metadata !6, metadata !9, metadata !9, metadata !19, metadata !9}
 !19 = metadata !{i32 524324, metadata !28, metadata !2, metadata !"unsigned char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
 !20 = metadata !{i32 524545, metadata !16, metadata !"len", metadata !2, i32 18, metadata !9} ; [ DW_TAG_arg_variable ]
@@ -74,3 +75,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !27 = metadata !{i32 26, i32 0, metadata !25, null}
 !28 = metadata !{metadata !"tail.c", metadata !"/Users/echeng/LLVM/radars/r7927803/"}
 !29 = metadata !{i32 0}
+!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/DeadArgElim/dbginfo.ll b/test/Transforms/DeadArgElim/dbginfo.ll
index 21de114..7bdcbf5 100644
--- a/test/Transforms/DeadArgElim/dbginfo.ll
+++ b/test/Transforms/DeadArgElim/dbginfo.ll
@@ -35,13 +35,14 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21}
 
 !0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.2 (trunk 165305)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/samsonov/tmp/clang-di/test.cc] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !8, metadata !9}
 !5 = metadata !{i32 786478, metadata !20, metadata !6, metadata !"run", metadata !"run", metadata !"", i32 8, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [run]
 !6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !1, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !1, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{i32 786478, metadata !20, metadata !6, metadata !"dead_vararg", metadata !"dead_vararg", metadata !"", i32 5, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (...)* @_ZN12_GLOBAL__N_111dead_varargEz, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [dead_vararg]
 
 ; CHECK: metadata !"dead_vararg"{{.*}}void ()* @_ZN12_GLOBAL__N_111dead_varargEz
@@ -61,3 +62,4 @@ entry:
 !18 = metadata !{i32 786443, metadata !20, metadata !8, i32 5, i32 23, i32 1} ; [ DW_TAG_lexical_block ] [/home/samsonov/tmp/clang-di/test.cc]
 !19 = metadata !{i32 5, i32 30, metadata !18, null}
 !20 = metadata !{metadata !"test.cc", metadata !"/home/samsonov/tmp/clang-di"}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/DeadArgElim/linkage.ll b/test/Transforms/DeadArgElim/linkage.ll
new file mode 100644
index 0000000..f475484
--- /dev/null
+++ b/test/Transforms/DeadArgElim/linkage.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -deadargelim -S | FileCheck %s
+
+; rdar://11546243
+%struct.A = type { i8 }
+
+define available_externally void @_Z17externallyDefinedP1A(%struct.A* %a) {
+entry:
+  call void @_Z3foov()
+  ret void
+}
+
+declare void @_Z3foov()
+
+define void @_Z4testP1A(%struct.A* %a) {
+; CHECK: @_Z4testP1A
+; CHECK: @_Z17externallyDefinedP1A(%struct.A* %a)
+
+entry:
+  call void @_Z17externallyDefinedP1A(%struct.A* %a)
+  ret void
+}
diff --git a/test/Transforms/DeadArgElim/lit.local.cfg b/test/Transforms/DeadArgElim/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/DeadArgElim/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/DeadStoreElimination/inst-limits.ll b/test/Transforms/DeadStoreElimination/inst-limits.ll
new file mode 100644
index 0000000..9df8801
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/inst-limits.ll
@@ -0,0 +1,261 @@
+; RUN: opt -S -dse < %s | FileCheck %s
+
+; If there are two stores to the same location, DSE should be able to remove
+; the first store if the two stores are separated by no more than 98
+; instructions. The existence of debug intrinsics between the stores should
+; not affect this instruction limit.
+
+@x = global i32 0, align 4
+
+; Function Attrs: nounwind
+define i32 @test_within_limit() {
+entry:
+  ; The first store; later there is a second store to the same location,
+  ; so this store should be optimized away by DSE.
+  ; CHECK-NOT: store i32 1, i32* @x, align 4
+  store i32 1, i32* @x, align 4
+
+  ; Insert 98 dummy instructions between the two stores
+  %0 = bitcast i32 0 to i32
+  %1 = bitcast i32 0 to i32
+  %2 = bitcast i32 0 to i32
+  %3 = bitcast i32 0 to i32
+  %4 = bitcast i32 0 to i32
+  %5 = bitcast i32 0 to i32
+  %6 = bitcast i32 0 to i32
+  %7 = bitcast i32 0 to i32
+  %8 = bitcast i32 0 to i32
+  %9 = bitcast i32 0 to i32
+  %10 = bitcast i32 0 to i32
+  %11 = bitcast i32 0 to i32
+  %12 = bitcast i32 0 to i32
+  %13 = bitcast i32 0 to i32
+  %14 = bitcast i32 0 to i32
+  %15 = bitcast i32 0 to i32
+  %16 = bitcast i32 0 to i32
+  %17 = bitcast i32 0 to i32
+  %18 = bitcast i32 0 to i32
+  %19 = bitcast i32 0 to i32
+  %20 = bitcast i32 0 to i32
+  %21 = bitcast i32 0 to i32
+  %22 = bitcast i32 0 to i32
+  %23 = bitcast i32 0 to i32
+  %24 = bitcast i32 0 to i32
+  %25 = bitcast i32 0 to i32
+  %26 = bitcast i32 0 to i32
+  %27 = bitcast i32 0 to i32
+  %28 = bitcast i32 0 to i32
+  %29 = bitcast i32 0 to i32
+  %30 = bitcast i32 0 to i32
+  %31 = bitcast i32 0 to i32
+  %32 = bitcast i32 0 to i32
+  %33 = bitcast i32 0 to i32
+  %34 = bitcast i32 0 to i32
+  %35 = bitcast i32 0 to i32
+  %36 = bitcast i32 0 to i32
+  %37 = bitcast i32 0 to i32
+  %38 = bitcast i32 0 to i32
+  %39 = bitcast i32 0 to i32
+  %40 = bitcast i32 0 to i32
+  %41 = bitcast i32 0 to i32
+  %42 = bitcast i32 0 to i32
+  %43 = bitcast i32 0 to i32
+  %44 = bitcast i32 0 to i32
+  %45 = bitcast i32 0 to i32
+  %46 = bitcast i32 0 to i32
+  %47 = bitcast i32 0 to i32
+  %48 = bitcast i32 0 to i32
+  %49 = bitcast i32 0 to i32
+  %50 = bitcast i32 0 to i32
+  %51 = bitcast i32 0 to i32
+  %52 = bitcast i32 0 to i32
+  %53 = bitcast i32 0 to i32
+  %54 = bitcast i32 0 to i32
+  %55 = bitcast i32 0 to i32
+  %56 = bitcast i32 0 to i32
+  %57 = bitcast i32 0 to i32
+  %58 = bitcast i32 0 to i32
+  %59 = bitcast i32 0 to i32
+  %60 = bitcast i32 0 to i32
+  %61 = bitcast i32 0 to i32
+  %62 = bitcast i32 0 to i32
+  %63 = bitcast i32 0 to i32
+  %64 = bitcast i32 0 to i32
+  %65 = bitcast i32 0 to i32
+  %66 = bitcast i32 0 to i32
+  %67 = bitcast i32 0 to i32
+  %68 = bitcast i32 0 to i32
+  %69 = bitcast i32 0 to i32
+  %70 = bitcast i32 0 to i32
+  %71 = bitcast i32 0 to i32
+  %72 = bitcast i32 0 to i32
+  %73 = bitcast i32 0 to i32
+  %74 = bitcast i32 0 to i32
+  %75 = bitcast i32 0 to i32
+  %76 = bitcast i32 0 to i32
+  %77 = bitcast i32 0 to i32
+  %78 = bitcast i32 0 to i32
+  %79 = bitcast i32 0 to i32
+  %80 = bitcast i32 0 to i32
+  %81 = bitcast i32 0 to i32
+  %82 = bitcast i32 0 to i32
+  %83 = bitcast i32 0 to i32
+  %84 = bitcast i32 0 to i32
+  %85 = bitcast i32 0 to i32
+  %86 = bitcast i32 0 to i32
+  %87 = bitcast i32 0 to i32
+  %88 = bitcast i32 0 to i32
+  %89 = bitcast i32 0 to i32
+  %90 = bitcast i32 0 to i32
+  %91 = bitcast i32 0 to i32
+  %92 = bitcast i32 0 to i32
+  %93 = bitcast i32 0 to i32
+  %94 = bitcast i32 0 to i32
+  %95 = bitcast i32 0 to i32
+  %96 = bitcast i32 0 to i32
+  %97 = bitcast i32 0 to i32
+
+  ; Insert a meaningless dbg.value intrinsic; it should have no
+  ; effect on the working of DSE in any way.
+  call void @llvm.dbg.value(metadata !12, i64 0, metadata !10)
+
+  ; CHECK:  store i32 -1, i32* @x, align 4
+  store i32 -1, i32* @x, align 4
+  ret i32 0
+}
+
+; Function Attrs: nounwind
+define i32 @test_outside_limit() {
+entry:
+  ; The first store; later there is a second store to the same location
+  ; CHECK: store i32 1, i32* @x, align 4
+  store i32 1, i32* @x, align 4
+
+  ; Insert 99 dummy instructions between the two stores; this is
+  ; one too many instruction for the DSE to take place.
+  %0 = bitcast i32 0 to i32
+  %1 = bitcast i32 0 to i32
+  %2 = bitcast i32 0 to i32
+  %3 = bitcast i32 0 to i32
+  %4 = bitcast i32 0 to i32
+  %5 = bitcast i32 0 to i32
+  %6 = bitcast i32 0 to i32
+  %7 = bitcast i32 0 to i32
+  %8 = bitcast i32 0 to i32
+  %9 = bitcast i32 0 to i32
+  %10 = bitcast i32 0 to i32
+  %11 = bitcast i32 0 to i32
+  %12 = bitcast i32 0 to i32
+  %13 = bitcast i32 0 to i32
+  %14 = bitcast i32 0 to i32
+  %15 = bitcast i32 0 to i32
+  %16 = bitcast i32 0 to i32
+  %17 = bitcast i32 0 to i32
+  %18 = bitcast i32 0 to i32
+  %19 = bitcast i32 0 to i32
+  %20 = bitcast i32 0 to i32
+  %21 = bitcast i32 0 to i32
+  %22 = bitcast i32 0 to i32
+  %23 = bitcast i32 0 to i32
+  %24 = bitcast i32 0 to i32
+  %25 = bitcast i32 0 to i32
+  %26 = bitcast i32 0 to i32
+  %27 = bitcast i32 0 to i32
+  %28 = bitcast i32 0 to i32
+  %29 = bitcast i32 0 to i32
+  %30 = bitcast i32 0 to i32
+  %31 = bitcast i32 0 to i32
+  %32 = bitcast i32 0 to i32
+  %33 = bitcast i32 0 to i32
+  %34 = bitcast i32 0 to i32
+  %35 = bitcast i32 0 to i32
+  %36 = bitcast i32 0 to i32
+  %37 = bitcast i32 0 to i32
+  %38 = bitcast i32 0 to i32
+  %39 = bitcast i32 0 to i32
+  %40 = bitcast i32 0 to i32
+  %41 = bitcast i32 0 to i32
+  %42 = bitcast i32 0 to i32
+  %43 = bitcast i32 0 to i32
+  %44 = bitcast i32 0 to i32
+  %45 = bitcast i32 0 to i32
+  %46 = bitcast i32 0 to i32
+  %47 = bitcast i32 0 to i32
+  %48 = bitcast i32 0 to i32
+  %49 = bitcast i32 0 to i32
+  %50 = bitcast i32 0 to i32
+  %51 = bitcast i32 0 to i32
+  %52 = bitcast i32 0 to i32
+  %53 = bitcast i32 0 to i32
+  %54 = bitcast i32 0 to i32
+  %55 = bitcast i32 0 to i32
+  %56 = bitcast i32 0 to i32
+  %57 = bitcast i32 0 to i32
+  %58 = bitcast i32 0 to i32
+  %59 = bitcast i32 0 to i32
+  %60 = bitcast i32 0 to i32
+  %61 = bitcast i32 0 to i32
+  %62 = bitcast i32 0 to i32
+  %63 = bitcast i32 0 to i32
+  %64 = bitcast i32 0 to i32
+  %65 = bitcast i32 0 to i32
+  %66 = bitcast i32 0 to i32
+  %67 = bitcast i32 0 to i32
+  %68 = bitcast i32 0 to i32
+  %69 = bitcast i32 0 to i32
+  %70 = bitcast i32 0 to i32
+  %71 = bitcast i32 0 to i32
+  %72 = bitcast i32 0 to i32
+  %73 = bitcast i32 0 to i32
+  %74 = bitcast i32 0 to i32
+  %75 = bitcast i32 0 to i32
+  %76 = bitcast i32 0 to i32
+  %77 = bitcast i32 0 to i32
+  %78 = bitcast i32 0 to i32
+  %79 = bitcast i32 0 to i32
+  %80 = bitcast i32 0 to i32
+  %81 = bitcast i32 0 to i32
+  %82 = bitcast i32 0 to i32
+  %83 = bitcast i32 0 to i32
+  %84 = bitcast i32 0 to i32
+  %85 = bitcast i32 0 to i32
+  %86 = bitcast i32 0 to i32
+  %87 = bitcast i32 0 to i32
+  %88 = bitcast i32 0 to i32
+  %89 = bitcast i32 0 to i32
+  %90 = bitcast i32 0 to i32
+  %91 = bitcast i32 0 to i32
+  %92 = bitcast i32 0 to i32
+  %93 = bitcast i32 0 to i32
+  %94 = bitcast i32 0 to i32
+  %95 = bitcast i32 0 to i32
+  %96 = bitcast i32 0 to i32
+  %97 = bitcast i32 0 to i32
+  %98 = bitcast i32 0 to i32
+
+  ; CHECK:  store i32 -1, i32* @x, align 4
+  store i32 -1, i32* @x, align 4
+  ret i32 0
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11, !13}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !9, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/home/tmp/test.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"test.c", metadata !"/home/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_within_limit", metadata !"test_within_limit", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @test_within_limit, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [test]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/home/tmp/test.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 786484, i32 0, null, metadata !"x", metadata !"x", metadata !"", metadata !5, i32 1, metadata !8, i32 0, i32 1, i32* @x, null} ; [ DW_TAG_variable ] [x] [line 1] [def]
+!11 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!12 = metadata !{i32* undef}
+
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/DeadStoreElimination/lit.local.cfg b/test/Transforms/DeadStoreElimination/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/DeadStoreElimination/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/DebugIR/lit.local.cfg b/test/Transforms/DebugIR/lit.local.cfg
deleted file mode 100644
index c6106e4..0000000
--- a/test/Transforms/DebugIR/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/DebugIR/simple-addrspace.ll b/test/Transforms/DebugIR/simple-addrspace.ll
new file mode 100644
index 0000000..6bea9b2
--- /dev/null
+++ b/test/Transforms/DebugIR/simple-addrspace.ll
@@ -0,0 +1,13 @@
+; RUN: opt -debug-ir -S %s -o - | FileCheck %s
+
+target datalayout = "e-p:64:64:64-p1:16:16:16"
+
+define void @foo(i32 addrspace(1)*) nounwind {
+  ret void
+}
+
+; Make sure the pointer size is 16
+
+; CHECK: metadata !"i32 addrspace(1)*", i32 0, i64 16, i64 2, i64 0, i32 0
+
+
diff --git a/test/Transforms/EarlyCSE/lit.local.cfg b/test/Transforms/EarlyCSE/lit.local.cfg
deleted file mode 100644
index c6106e4..0000000
--- a/test/Transforms/EarlyCSE/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/FunctionAttrs/annotate-1.ll b/test/Transforms/FunctionAttrs/annotate-1.ll
index adb7bce..9fba7a9 100644
--- a/test/Transforms/FunctionAttrs/annotate-1.ll
+++ b/test/Transforms/FunctionAttrs/annotate-1.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -functionattrs -S | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -functionattrs -S | FileCheck -check-prefix=POSIX %s
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -functionattrs -S | FileCheck -check-prefix=CHECK-POSIX %s
 
 declare i8* @fopen(i8*, i8*)
 ; CHECK: declare noalias i8* @fopen(i8* nocapture readonly, i8* nocapture readonly) [[G0:#[0-9]]] 
diff --git a/test/Transforms/FunctionAttrs/lit.local.cfg b/test/Transforms/FunctionAttrs/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/FunctionAttrs/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GCOVProfiling/linkagename.ll b/test/Transforms/GCOVProfiling/linkagename.ll
index 9453e1e..ed3a5bd 100644
--- a/test/Transforms/GCOVProfiling/linkagename.ll
+++ b/test/Transforms/GCOVProfiling/linkagename.ll
@@ -12,6 +12,7 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10}
 !llvm.gcov = !{!9}
 
 !0 = metadata !{i32 786449, metadata !2, i32 4, metadata !"clang version 3.3 (trunk 177323)", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !4, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/home/nlewycky/hello.cc] [DW_LANG_C_plus_plus]
@@ -20,8 +21,9 @@ entry:
 !3 = metadata !{i32 0}
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3foov, null, null, metadata !3, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!6 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{null}
 !8 = metadata !{i32 1, i32 0, metadata !5, null}
 
 
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/GCOVProfiling/lit.local.cfg b/test/Transforms/GCOVProfiling/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/GCOVProfiling/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GCOVProfiling/version.ll b/test/Transforms/GCOVProfiling/version.ll
index a90290f..2f1bd70 100644
--- a/test/Transforms/GCOVProfiling/version.ll
+++ b/test/Transforms/GCOVProfiling/version.ll
@@ -16,6 +16,7 @@ define void @test() {
 
 !llvm.gcov = !{!9}
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!12}
 
 !0 = metadata !{i32 786449, metadata !11, i32 4, metadata !"clang version 3.3 (trunk 176994)", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !4, metadata !3, null, metadata !""} ; [ DW_TAG_compile_unit ] [./version] [DW_LANG_C_plus_plus]
 !2 = metadata !{i32 786473, metadata !11} ; [ DW_TAG_file_type ]
@@ -23,8 +24,9 @@ define void @test() {
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !10, metadata !6, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @test, null, null, metadata !3, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [test]
 !6 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !3, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !3, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{i32 1, i32 0, metadata !5, null}
 ;; !9 is added through the echo line at the top.
 !10 = metadata !{metadata !"<stdin>", metadata !"."}
 !11 = metadata !{metadata !"version", metadata !"/usr/local/google/home/nlewycky"}
+!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll b/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
index a1cc008..5a15f0e 100644
--- a/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
+++ b/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
@@ -2,7 +2,7 @@
 
 @last = external global [65 x i32*]
 
-define i32 @NextRootMove(i32 %wtm) {
+define i32 @NextRootMove(i32 %wtm, i32 %x, i32 %y, i32 %z) {
 entry:
         %A = alloca i32*
 	%tmp17618 = load i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4
@@ -15,12 +15,14 @@ entry:
 	br label %cond_true116
 
 cond_true116:
-	br i1 false, label %cond_true128, label %cond_true145
+   %cmp = icmp eq i32 %x, %y
+	br i1 %cmp, label %cond_true128, label %cond_true145
 
 cond_true128:
 	%tmp17625 = load i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4
         store i32* %tmp17625, i32** %A
-	br i1 false, label %bb98.backedge, label %return.loopexit
+   %cmp1 = icmp eq i32 %x, %z
+	br i1 %cmp1 , label %bb98.backedge, label %return.loopexit
 
 bb98.backedge:
 	br label %cond_true116
diff --git a/test/Transforms/GVN/2008-07-02-Unreachable.ll b/test/Transforms/GVN/2008-07-02-Unreachable.ll
index 4f07868..ce83fa4 100644
--- a/test/Transforms/GVN/2008-07-02-Unreachable.ll
+++ b/test/Transforms/GVN/2008-07-02-Unreachable.ll
@@ -3,10 +3,11 @@
 
 @g_3 = external global i8		; <i8*> [#uses=2]
 
-define i8 @func_1() nounwind  {
+define i8 @func_1(i32 %x, i32 %y) nounwind  {
 entry:
   %A = alloca i8
-	br i1 false, label %ifelse, label %ifthen
+    %cmp = icmp eq i32 %x, %y
+	br i1 %cmp, label %ifelse, label %ifthen
 
 ifthen:		; preds = %entry
 	br label %ifend
@@ -14,9 +15,6 @@ ifthen:		; preds = %entry
 ifelse:		; preds = %entry
 	%tmp3 = load i8* @g_3		; <i8> [#uses=0]
         store i8 %tmp3, i8* %A
-	br label %forcond.thread
-
-forcond.thread:		; preds = %ifelse
 	br label %afterfor
 
 forcond:		; preds = %forinc
diff --git a/test/Transforms/GVN/2011-06-01-NonLocalMemdepMiscompile.ll b/test/Transforms/GVN/2011-06-01-NonLocalMemdepMiscompile.ll
index 4613bc4..298f274 100644
--- a/test/Transforms/GVN/2011-06-01-NonLocalMemdepMiscompile.ll
+++ b/test/Transforms/GVN/2011-06-01-NonLocalMemdepMiscompile.ll
@@ -19,10 +19,10 @@ bb1:
   br i1 undef, label %bb3, label %bb15
 
 ; CHECK: bb1:
-; CHECK: %tmp16 = phi i8* [ getelementptr (i8* null, i64 undef), %bb10 ], [ null, %bb ]
+; CHECK: [[TMP:%.*]] = phi i8* [ getelementptr (i8* null, i64 undef), %bb10 ], [ null, %bb ]
 
 ; CHECK: bb1.bb15_crit_edge:
-; CHECK: %tmp17.pre = load i8* %tmp16, align 1
+; CHECK: %tmp17.pre = load i8* [[TMP]], align 1
 
 bb3:
   call void @isalnum()
diff --git a/test/Transforms/GVN/cond_br.ll b/test/Transforms/GVN/cond_br.ll
new file mode 100644
index 0000000..918e7d4
--- /dev/null
+++ b/test/Transforms/GVN/cond_br.ll
@@ -0,0 +1,55 @@
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
+@y = external global i32
+@z = external global i32
+
+; Function Attrs: nounwind ssp uwtable
+define void @foo(i32 %x) {
+; CHECK: @foo(i32 %x)
+; CHECK: %.pre = load i32* @y
+; CHECK: call void @bar(i32 %.pre)
+
+  %t = sub i32 %x, %x
+  %.pre = load i32* @y, align 4
+  %cmp = icmp sgt i32 %t, 2
+  br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
+
+entry.if.end_crit_edge:                           ; preds = %entry
+  br label %if.end
+
+if.then:                                          ; preds = %entry
+  %add = add nsw i32 %x, 3
+  store i32 %add, i32* @y, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %entry.if.end_crit_edge, %if.then
+  %1 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ %add, %if.then ]
+  tail call void @bar(i32 %1)
+  ret void
+}
+
+define void @foo2(i32 %x) {
+; CHECK: @foo2(i32 %x)
+; CHECK: %.pre = load i32* @y
+; CHECK: tail call void @bar(i32 %.pre)
+entry:
+  %t = sub i32 %x, %x
+  %.pre = load i32* @y, align 4
+  %cmp = icmp sgt i32 %t, 2
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %add = add nsw i32 %x, 3
+  store i32 %add, i32* @y, align 4
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  store i32 1, i32* @z, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %0 = phi i32 [ %.pre, %if.else ], [ %add, %if.then ]
+  tail call void @bar(i32 %0)
+  ret void
+}
+
+declare void @bar(i32)
diff --git a/test/Transforms/GVN/cond_br2.ll b/test/Transforms/GVN/cond_br2.ll
new file mode 100644
index 0000000..27e6f75
--- /dev/null
+++ b/test/Transforms/GVN/cond_br2.ll
@@ -0,0 +1,140 @@
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+%"class.llvm::SmallVector" = type { %"class.llvm::SmallVectorImpl", [1 x %"union.llvm::SmallVectorBase::U"] }
+%"class.llvm::SmallVectorImpl" = type { %"class.llvm::SmallVectorTemplateBase" }
+%"class.llvm::SmallVectorTemplateBase" = type { %"class.llvm::SmallVectorTemplateCommon" }
+%"class.llvm::SmallVectorTemplateCommon" = type { %"class.llvm::SmallVectorBase" }
+%"class.llvm::SmallVectorBase" = type { i8*, i8*, i8*, %"union.llvm::SmallVectorBase::U" }
+%"union.llvm::SmallVectorBase::U" = type { x86_fp80 }
+
+; Function Attrs: ssp uwtable
+define void @_Z4testv() #0 {
+; CHECK: @_Z4testv()
+; CHECK: invoke.cont:
+; CHECK: br i1 true, label %new.notnull.i11, label %if.end.i14
+; CHECK: Retry.i10:
+
+entry:
+  %sv = alloca %"class.llvm::SmallVector", align 16
+  %0 = bitcast %"class.llvm::SmallVector"* %sv to i8*
+  call void @llvm.lifetime.start(i64 64, i8* %0) #1
+  %BeginX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %FirstEl.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 3
+  %1 = bitcast %"union.llvm::SmallVectorBase::U"* %FirstEl.i.i.i.i.i.i to i8*
+  store i8* %1, i8** %BeginX.i.i.i.i.i.i, align 16, !tbaa !4
+  %EndX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 1
+  store i8* %1, i8** %EndX.i.i.i.i.i.i, align 8, !tbaa !4
+  %CapacityX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 2
+  %add.ptr.i.i.i.i2.i.i = getelementptr inbounds %"union.llvm::SmallVectorBase::U"* %FirstEl.i.i.i.i.i.i, i64 2
+  %add.ptr.i.i.i.i.i.i = bitcast %"union.llvm::SmallVectorBase::U"* %add.ptr.i.i.i.i2.i.i to i8*
+  store i8* %add.ptr.i.i.i.i.i.i, i8** %CapacityX.i.i.i.i.i.i, align 16, !tbaa !4
+  %EndX.i = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 1
+  %2 = load i8** %EndX.i, align 8, !tbaa !4
+  %CapacityX.i = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 2
+  %cmp.i = icmp ult i8* %2, %add.ptr.i.i.i.i.i.i
+  br i1 %cmp.i, label %Retry.i, label %if.end.i
+
+Retry.i:                                          ; preds = %.noexc, %entry
+  %3 = phi i8* [ %2, %entry ], [ %.pre.i, %.noexc ]
+  %new.isnull.i = icmp eq i8* %3, null
+  br i1 %new.isnull.i, label %invoke.cont, label %new.notnull.i
+
+new.notnull.i:                                    ; preds = %Retry.i
+  %4 = bitcast i8* %3 to i32*
+  store i32 1, i32* %4, align 4, !tbaa !5
+  br label %invoke.cont
+
+if.end.i:                                         ; preds = %entry
+  %5 = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0
+  invoke void @_ZN4llvm15SmallVectorBase8grow_podEmm(%"class.llvm::SmallVectorBase"* %5, i64 0, i64 4)
+          to label %.noexc unwind label %lpad
+
+.noexc:                                           ; preds = %if.end.i
+  %.pre.i = load i8** %EndX.i, align 8, !tbaa !4
+  br label %Retry.i
+
+invoke.cont:                                      ; preds = %new.notnull.i, %Retry.i
+  %add.ptr.i = getelementptr inbounds i8* %3, i64 4
+  store i8* %add.ptr.i, i8** %EndX.i, align 8, !tbaa !4
+  %6 = load i8** %CapacityX.i, align 16, !tbaa !4
+  %cmp.i8 = icmp ult i8* %add.ptr.i, %6
+  br i1 %cmp.i8, label %new.notnull.i11, label %if.end.i14
+
+Retry.i10:                                        ; preds = %if.end.i14
+  %.pre.i13 = load i8** %EndX.i, align 8, !tbaa !4
+  %new.isnull.i9 = icmp eq i8* %.pre.i13, null
+  br i1 %new.isnull.i9, label %invoke.cont2, label %new.notnull.i11
+
+new.notnull.i11:                                  ; preds = %invoke.cont, %Retry.i10
+  %7 = phi i8* [ %.pre.i13, %Retry.i10 ], [ %add.ptr.i, %invoke.cont ]
+  %8 = bitcast i8* %7 to i32*
+  store i32 2, i32* %8, align 4, !tbaa !5
+  br label %invoke.cont2
+
+if.end.i14:                                       ; preds = %invoke.cont
+  %9 = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0
+  invoke void @_ZN4llvm15SmallVectorBase8grow_podEmm(%"class.llvm::SmallVectorBase"* %9, i64 0, i64 4)
+          to label %Retry.i10 unwind label %lpad
+
+invoke.cont2:                                     ; preds = %new.notnull.i11, %Retry.i10
+  %10 = phi i8* [ null, %Retry.i10 ], [ %7, %new.notnull.i11 ]
+  %add.ptr.i12 = getelementptr inbounds i8* %10, i64 4
+  store i8* %add.ptr.i12, i8** %EndX.i, align 8, !tbaa !4
+  invoke void @_Z1gRN4llvm11SmallVectorIiLj8EEE(%"class.llvm::SmallVector"* %sv)
+          to label %invoke.cont3 unwind label %lpad
+
+invoke.cont3:                                     ; preds = %invoke.cont2
+  %11 = load i8** %BeginX.i.i.i.i.i.i, align 16, !tbaa !4
+  %cmp.i.i.i.i19 = icmp eq i8* %11, %1
+  br i1 %cmp.i.i.i.i19, label %_ZN4llvm11SmallVectorIiLj8EED1Ev.exit21, label %if.then.i.i.i20
+
+if.then.i.i.i20:                                  ; preds = %invoke.cont3
+  call void @free(i8* %11) #1
+  br label %_ZN4llvm11SmallVectorIiLj8EED1Ev.exit21
+
+_ZN4llvm11SmallVectorIiLj8EED1Ev.exit21:          ; preds = %invoke.cont3, %if.then.i.i.i20
+  call void @llvm.lifetime.end(i64 64, i8* %0) #1
+  ret void
+
+lpad:                                             ; preds = %if.end.i14, %if.end.i, %invoke.cont2
+  %12 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  %13 = load i8** %BeginX.i.i.i.i.i.i, align 16, !tbaa !4
+  %cmp.i.i.i.i = icmp eq i8* %13, %1
+  br i1 %cmp.i.i.i.i, label %eh.resume, label %if.then.i.i.i
+
+if.then.i.i.i:                                    ; preds = %lpad
+  call void @free(i8* %13) #1
+  br label %eh.resume
+
+eh.resume:                                        ; preds = %if.then.i.i.i, %lpad
+  resume { i8*, i32 } %12
+}
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_Z1gRN4llvm11SmallVectorIiLj8EEE(%"class.llvm::SmallVector"*) #2
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
+declare void @_ZN4llvm15SmallVectorBase8grow_podEmm(%"class.llvm::SmallVectorBase"*, i64, i64) #2
+
+; Function Attrs: nounwind
+declare void @free(i8* nocapture) #3
+
+attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"int", metadata !1}
+!4 = metadata !{metadata !0, metadata !0, i64 0}
+!5 = metadata !{metadata !3, metadata !3, i64 0}
diff --git a/test/Transforms/GVN/lit.local.cfg b/test/Transforms/GVN/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/GVN/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GVN/local-pre.ll b/test/Transforms/GVN/local-pre.ll
index 1d0dadf..2c92699 100644
--- a/test/Transforms/GVN/local-pre.ll
+++ b/test/Transforms/GVN/local-pre.ll
@@ -1,9 +1,9 @@
 ; RUN: opt < %s -gvn -enable-pre -S | grep "b.pre"
 
-define i32 @main(i32 %p) {
+define i32 @main(i32 %p, i32 %q) {
 block1:
-  
-	br i1 true, label %block2, label %block3
+    %cmp = icmp eq i32 %p, %q 
+	br i1 %cmp, label %block2, label %block3
 
 block2:
  %a = add i32 %p, 1
diff --git a/test/Transforms/GVN/malloc-load-removal.ll b/test/Transforms/GVN/malloc-load-removal.ll
index e93a62a..d2d2fd7 100644
--- a/test/Transforms/GVN/malloc-load-removal.ll
+++ b/test/Transforms/GVN/malloc-load-removal.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 declare i8* @malloc(i64) nounwind
 
-define noalias i8* @test() nounwind uwtable ssp {
+define noalias i8* @test1() nounwind uwtable ssp {
 entry:
   %call = tail call i8* @malloc(i64 100) nounwind
   %0 = load i8* %call, align 1
@@ -21,11 +21,36 @@ if.then:                                          ; preds = %entry
 if.end:                                           ; preds = %if.then, %entry
   ret i8* %call
 
-; CHECK-LABEL: @test(
+; CHECK-LABEL: @test1(
 ; CHECK-NOT: load
 ; CHECK-NOT: icmp
 
-; CHECK_NO_LIBCALLS-LABEL: @test(
+; CHECK_NO_LIBCALLS-LABEL: @test1(
+; CHECK_NO_LIBCALLS: load
+; CHECK_NO_LIBCALLS: icmp
+}
+
+declare i8* @_Znwm(i64) nounwind
+
+define noalias i8* @test2() nounwind uwtable ssp {
+entry:
+  %call = tail call i8* @_Znwm(i64 100) nounwind
+  %0 = load i8* %call, align 1
+  %tobool = icmp eq i8 %0, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  store i8 0, i8* %call, align 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i8* %call
+
+; CHECK-LABEL: @test2(
+; CHECK-NOT: load
+; CHECK-NOT: icmp
+
+; CHECK_NO_LIBCALLS-LABEL: @test2(
 ; CHECK_NO_LIBCALLS: load
 ; CHECK_NO_LIBCALLS: icmp
 }
diff --git a/test/Transforms/GVN/pr17732.ll b/test/Transforms/GVN/pr17732.ll
new file mode 100644
index 0000000..606a195
--- /dev/null
+++ b/test/Transforms/GVN/pr17732.ll
@@ -0,0 +1,30 @@
+; RUN: opt -gvn -S -o - < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.with_array = type { [2 x i8], i32, i8 }
+%struct.with_vector = type { <2 x i8>, i32, i8 }
+
+@main.obj_with_array = private unnamed_addr constant { [2 x i8], i32, i8, [3 x i8] } { [2 x i8] zeroinitializer, i32 0, i8 1, [3 x i8] undef }, align 4
+@array_with_zeroinit = common global %struct.with_array zeroinitializer, align 4
+
+@main.obj_with_vector = private unnamed_addr constant { <2 x i8>, i32, i8, [3 x i8] } { <2 x i8> zeroinitializer, i32 0, i8 1, [3 x i8] undef }, align 4
+@vector_with_zeroinit = common global %struct.with_vector zeroinitializer, align 4
+
+define i32 @main() {
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds (%struct.with_array* @array_with_zeroinit, i64 0, i32 0, i64 0), i8* getelementptr inbounds ({ [2 x i8], i32, i8, [3 x i8] }* @main.obj_with_array, i64 0, i32 0, i64 0), i64 12, i32 4, i1 false)
+  %0 = load i8* getelementptr inbounds (%struct.with_array* @array_with_zeroinit, i64 0, i32 2), align 4
+
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds (%struct.with_vector* @vector_with_zeroinit, i64 0, i32 0, i64 0), i8* getelementptr inbounds ({ <2 x i8>, i32, i8, [3 x i8] }* @main.obj_with_vector, i64 0, i32 0, i64 0), i64 12, i32 4, i1 false)
+  %1 = load i8* getelementptr inbounds (%struct.with_vector* @vector_with_zeroinit, i64 0, i32 2), align 4
+  %conv0 = sext i8 %0 to i32
+  %conv1 = sext i8 %1 to i32
+  %and = and i32 %conv0, %conv1
+  ret i32 %and
+; CHECK-LABEL: define i32 @main(
+; CHECK: ret i32 1
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
diff --git a/test/Transforms/GVN/pr17852.ll b/test/Transforms/GVN/pr17852.ll
new file mode 100644
index 0000000..e95ff7f
--- /dev/null
+++ b/test/Transforms/GVN/pr17852.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s -basicaa -gvn
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+%struct.S0 = type { [2 x i8], [2 x i8], [4 x i8], [2 x i8], i32, i32, i32, i32 }
+define void @fn1(%struct.S0* byval align 8 %p1) {
+  br label %for.cond
+for.cond:                                         ; preds = %1, %0
+  br label %for.end
+  %f2 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 2
+  %f9 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 7
+  br label %for.cond
+for.end:                                          ; preds = %for.cond
+  br i1 true, label %if.else, label %if.then
+if.then:                                          ; preds = %for.end
+  %f22 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 2
+  %f7 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 5
+  %tmp7 = load i32* %f7, align 8
+  br label %if.end40
+if.else:                                          ; preds = %for.end
+  br i1 false, label %for.cond18, label %if.then6
+if.then6:                                         ; preds = %if.else
+  %f3 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 2
+  %tmp10 = bitcast %struct.S0* %p1 to i16*
+  %f5 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 3
+  %tmp11 = bitcast [2 x i8]* %f5 to i16*
+  %bf.load13 = load i16* %tmp11, align 8
+  br label %if.end36
+for.cond18:                                       ; preds = %if.else
+  call void @fn4()
+  br i1 true, label %if.end, label %if.end36
+if.end:                                           ; preds = %for.cond18
+  %f321 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 2
+  %f925 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 7
+  %f526 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 3
+  %tmp15 = bitcast [2 x i8]* %f526 to i16*
+  %bf.load27 = load i16* %tmp15, align 8
+  %tmp16 = bitcast %struct.S0* %p1 to i16*
+  br label %if.end36
+if.end36:                                         ; preds = %if.end, %for.cond18, %if.then6
+  %f537 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 3
+  %tmp17 = bitcast [2 x i8]* %f537 to i16*
+  %bf.load38 = load i16* %tmp17, align 8
+  %bf.clear39 = and i16 %bf.load38, -16384
+  br label %if.end40
+if.end40:                                         ; preds = %if.end36, %if.then
+  %f6 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 4
+  %tmp18 = load i32* %f6, align 4
+  call void @fn2(i32 %tmp18)
+  %f8 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 6
+  %tmp19 = load i32* %f8, align 4
+  %tobool41 = icmp eq i32 %tmp19, 0
+  br i1 true, label %if.end50, label %if.then42
+if.then42:                                        ; preds = %if.end40
+  %tmp20 = bitcast %struct.S0* %p1 to i16*
+  %f547 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 3
+  %tmp21 = bitcast [2 x i8]* %f547 to i16*
+  %bf.load48 = load i16* %tmp21, align 8
+  br label %if.end50
+if.end50:                                         ; preds = %if.then42, %if.end40
+  %f551 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 3
+  %tmp22 = bitcast [2 x i8]* %f551 to i16*
+  %bf.load52 = load i16* %tmp22, align 8
+  %bf.clear53 = and i16 %bf.load52, -16384
+  ret void
+}
+declare void @fn2(i32)
+declare void @fn4()
diff --git a/test/Transforms/GVN/preserve-tbaa.ll b/test/Transforms/GVN/preserve-tbaa.ll
index e52772b..c52ed96 100644
--- a/test/Transforms/GVN/preserve-tbaa.ll
+++ b/test/Transforms/GVN/preserve-tbaa.ll
@@ -25,6 +25,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
-!0 = metadata !{metadata !"short", metadata !1}
+!0 = metadata !{metadata !3, metadata !3, i64 0}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/Transforms/GVN/rle-nonlocal.ll b/test/Transforms/GVN/rle-nonlocal.ll
index 6b74e9a..8229aaa 100644
--- a/test/Transforms/GVN/rle-nonlocal.ll
+++ b/test/Transforms/GVN/rle-nonlocal.ll
@@ -1,8 +1,9 @@
 ; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
 
-define i32 @main(i32** %p) {
+define i32 @main(i32** %p, i32 %x, i32 %y) {
 block1:
-	br i1 true, label %block2, label %block3
+    %cmp = icmp eq i32 %x, %y
+	br i1 %cmp , label %block2, label %block3
 
 block2:
  %a = load i32** %p
diff --git a/test/Transforms/GVN/rle-semidominated.ll b/test/Transforms/GVN/rle-semidominated.ll
index 71aa548..923cd03 100644
--- a/test/Transforms/GVN/rle-semidominated.ll
+++ b/test/Transforms/GVN/rle-semidominated.ll
@@ -1,9 +1,10 @@
 ; RUN: opt < %s -basicaa -gvn -S | grep "DEAD = phi i32 "
 
-define i32 @main(i32* %p) {
+define i32 @main(i32* %p, i32 %x, i32 %y) {
 block1:
   %z = load i32* %p
-	br i1 true, label %block2, label %block3
+  %cmp = icmp eq i32 %x, %y
+	br i1 %cmp, label %block2, label %block3
 
 block2:
  br label %block4
diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll
index 8787dd5..8d289b0 100644
--- a/test/Transforms/GVN/rle.ll
+++ b/test/Transforms/GVN/rle.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -default-data-layout="e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -basicaa -gvn -S -die | FileCheck %s
-; RUN: opt < %s -default-data-layout="E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32"      -basicaa -gvn -S -die | FileCheck %s
+; RUN: opt < %s -default-data-layout="e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -basicaa -gvn -S -die | FileCheck %s
+; RUN: opt < %s -default-data-layout="E-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32"      -basicaa -gvn -S -die | FileCheck %s
 
 ;; Trivial RLE test.
 define i32 @test0(i32 %V, i32* %P) {
@@ -195,6 +195,7 @@ Cont:
 }
 
 @GCst = constant {i32, float, i32 } { i32 42, float 14., i32 97 }
+@GCst_as1 = addrspace(1) constant {i32, float, i32 } { i32 42, float 14., i32 97 }
 
 ; memset -> float forwarding.
 define float @memcpy_to_float_local(float* %A) nounwind ssp {
@@ -209,7 +210,18 @@ entry:
 ; CHECK: ret float 1.400000e+01
 }
 
-
+; memcpy from address space 1
+define float @memcpy_to_float_local_as1(float* %A) nounwind ssp {
+entry:
+  %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
+  tail call void @llvm.memcpy.p0i8.p1i8.i64(i8* %conv, i8 addrspace(1)* bitcast ({i32, float, i32 } addrspace(1)* @GCst_as1 to i8 addrspace(1)*), i64 12, i32 1, i1 false)
+  %arrayidx = getelementptr inbounds float* %A, i64 1 ; <float*> [#uses=1]
+  %tmp2 = load float* %arrayidx                   ; <float> [#uses=1]
+  ret float %tmp2
+; CHECK-LABEL: @memcpy_to_float_local_as1(
+; CHECK-NOT: load
+; CHECK: ret float 1.400000e+01
+}
 
 ;; non-local i32/float -> i8 load forwarding.
 define i8 @coerce_mustalias_nonlocal0(i32* %P, i1 %cond) {
@@ -357,13 +369,14 @@ Cont:
 ; CHECK: ret i8 %A
 }
 
-define i32 @chained_load(i32** %p) {
+define i32 @chained_load(i32** %p, i32 %x, i32 %y) {
 block1:
   %A = alloca i32*
 
   %z = load i32** %p
   store i32* %z, i32** %A
-  br i1 true, label %block2, label %block3
+  %cmp = icmp eq i32 %x, %y
+  br i1 %cmp, label %block2, label %block3
 
 block2:
  %a = load i32** %p
@@ -427,10 +440,11 @@ TY:
   ret i32 0
 }
 
-define i32 @phi_trans3(i32* %p) {
+define i32 @phi_trans3(i32* %p, i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @phi_trans3(
 block1:
-  br i1 true, label %block2, label %block3
+  %cmpxy = icmp eq i32 %x, %y
+  br i1 %cmpxy, label %block2, label %block3
 
 block2:
  store i32 87, i32* %p
@@ -443,7 +457,7 @@ block3:
 
 block4:
   %A = phi i32 [-1, %block2], [42, %block3]
-  br i1 true, label %block5, label %exit
+  br i1 %cmpxy, label %block5, label %exit
   
 ; CHECK: block4:
 ; CHECK-NEXT: %D = phi i32 [ 87, %block2 ], [ 97, %block3 ]  
@@ -451,11 +465,11 @@ block4:
 
 block5:
   %B = add i32 %A, 1
-  br i1 true, label %block6, label %exit
+  br i1 %cmpxy, label %block6, label %exit
   
 block6:
   %C = getelementptr i32* %p, i32 %B
-  br i1 true, label %block7, label %exit
+  br i1 %cmpxy, label %block7, label %exit
   
 block7:
   %D = load i32* %C
@@ -645,6 +659,8 @@ entry:
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p1i8.i64(i8* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
+
 
 ;;===----------------------------------------------------------------------===;;
 ;; Load -> Store dependency which isn't interfered with by a call that happens
diff --git a/test/Transforms/GVN/tbaa.ll b/test/Transforms/GVN/tbaa.ll
index 85fe39a..d6412fc 100644
--- a/test/Transforms/GVN/tbaa.ll
+++ b/test/Transforms/GVN/tbaa.ll
@@ -13,7 +13,7 @@ define i32 @test1(i8* %p, i8* %q) {
 
 define i32 @test2(i8* %p, i8* %q) {
 ; CHECK: @test2(i8* %p, i8* %q)
-; CHECK: call i32 @foo(i8* %p), !tbaa !0
+; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGC:!.*]]
 ; CHECK: %c = add i32 %a, %a
   %a = call i32 @foo(i8* %p), !tbaa !0
   %b = call i32 @foo(i8* %p), !tbaa !0
@@ -23,7 +23,7 @@ define i32 @test2(i8* %p, i8* %q) {
 
 define i32 @test3(i8* %p, i8* %q) {
 ; CHECK: @test3(i8* %p, i8* %q)
-; CHECK: call i32 @foo(i8* %p), !tbaa !3
+; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGB:!.*]]
 ; CHECK: %c = add i32 %a, %a
   %a = call i32 @foo(i8* %p), !tbaa !3
   %b = call i32 @foo(i8* %p), !tbaa !3
@@ -33,7 +33,7 @@ define i32 @test3(i8* %p, i8* %q) {
 
 define i32 @test4(i8* %p, i8* %q) {
 ; CHECK: @test4(i8* %p, i8* %q)
-; CHECK: call i32 @foo(i8* %p), !tbaa !1
+; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA:!.*]]
 ; CHECK: %c = add i32 %a, %a
   %a = call i32 @foo(i8* %p), !tbaa !1
   %b = call i32 @foo(i8* %p), !tbaa !0
@@ -43,7 +43,7 @@ define i32 @test4(i8* %p, i8* %q) {
 
 define i32 @test5(i8* %p, i8* %q) {
 ; CHECK: @test5(i8* %p, i8* %q)
-; CHECK: call i32 @foo(i8* %p), !tbaa !1
+; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA:!.*]]
 ; CHECK: %c = add i32 %a, %a
   %a = call i32 @foo(i8* %p), !tbaa !0
   %b = call i32 @foo(i8* %p), !tbaa !1
@@ -53,7 +53,7 @@ define i32 @test5(i8* %p, i8* %q) {
 
 define i32 @test6(i8* %p, i8* %q) {
 ; CHECK: @test6(i8* %p, i8* %q)
-; CHECK: call i32 @foo(i8* %p), !tbaa !1
+; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA:!.*]]
 ; CHECK: %c = add i32 %a, %a
   %a = call i32 @foo(i8* %p), !tbaa !0
   %b = call i32 @foo(i8* %p), !tbaa !3
@@ -74,8 +74,18 @@ define i32 @test7(i8* %p, i8* %q) {
 
 declare i32 @foo(i8*) readonly
 
-!0 = metadata !{metadata !"C", metadata !1}
-!1 = metadata !{metadata !"A", metadata !2}
+; CHECK: [[TAGC]] = metadata !{metadata [[TYPEC:!.*]], metadata [[TYPEC]], i64 0}
+; CHECK: [[TYPEC]] = metadata !{metadata !"C", metadata [[TYPEA:!.*]]}
+; CHECK: [[TYPEA]] = metadata !{metadata !"A", metadata !{{.*}}}
+; CHECK: [[TAGB]] = metadata !{metadata [[TYPEB:!.*]], metadata [[TYPEB]], i64 0}
+; CHECK: [[TYPEB]] = metadata !{metadata !"B", metadata [[TYPEA]]}
+; CHECK: [[TAGA]] = metadata !{metadata [[TYPEA]], metadata [[TYPEA]], i64 0}
+!0 = metadata !{metadata !5, metadata !5, i64 0}
+!1 = metadata !{metadata !6, metadata !6, i64 0}
 !2 = metadata !{metadata !"tbaa root", null}
-!3 = metadata !{metadata !"B", metadata !1}
-!4 = metadata !{metadata !"another root", null}
+!3 = metadata !{metadata !7, metadata !7, i64 0}
+!4 = metadata !{metadata !8, metadata !8, i64 0}
+!5 = metadata !{metadata !"C", metadata !6}
+!6 = metadata !{metadata !"A", metadata !2}
+!7 = metadata !{metadata !"B", metadata !6}
+!8 = metadata !{metadata !"another root", null}
diff --git a/test/Transforms/GlobalDCE/lit.local.cfg b/test/Transforms/GlobalDCE/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/GlobalDCE/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll b/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
index 390e77a..0867ca9 100644
--- a/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
+++ b/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
@@ -4,7 +4,7 @@
 
 ; RUN: opt < %s -globalopt -S > %t
 ; Check that the new global values still have their address space
-; RUN: cat %t | grep addrspace.*global
+; RUN: cat %t | grep 'addrspace.*global'
 
 @struct = internal addrspace(1) global { i32, i32 } zeroinitializer
 @array = internal addrspace(1) global [ 2 x i32 ] zeroinitializer 
diff --git a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
index e08320b..0108960 100644
--- a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
+++ b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
@@ -60,7 +60,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !2 = metadata !{i32 458788, null, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !3 = metadata !{i32 459009, metadata !4, metadata !"i", metadata !1, i32 4, metadata !2} ; [ DW_TAG_arg_variable ]
 !4 = metadata !{i32 458798, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 4, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!5 = metadata !{i32 458773, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 458773, metadata !1, null, metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{metadata !2, metadata !2}
 !7 = metadata !{i32 5, i32 0, metadata !8, null}
 !8 = metadata !{i32 458763, metadata !20, metadata !4, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
@@ -71,7 +71,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !13 = metadata !{i32 14, i32 0, metadata !14, null}
 !14 = metadata !{i32 458763, metadata !20, metadata !15, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !15 = metadata !{i32 458798, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", i32 13, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!16 = metadata !{i32 458773, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{i32 458773, metadata !1, null, metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !17 = metadata !{metadata !2}
 !18 = metadata !{i32 15, i32 0, metadata !14, null}
 !19 = metadata !{i32 16, i32 0, metadata !14, null}
diff --git a/test/Transforms/GlobalOpt/array-elem-refs.ll b/test/Transforms/GlobalOpt/array-elem-refs.ll
new file mode 100644
index 0000000..ec472b0
--- /dev/null
+++ b/test/Transforms/GlobalOpt/array-elem-refs.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -S -globalopt | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.S = type { i8, i8 }
+
+@c = internal global i8** bitcast (i8* getelementptr (i8* bitcast ([8 x i8*]* @b to i8*), i64 48) to i8**), align 8
+@b = internal global [8 x i8*] [i8* null, i8* null, i8* null, i8* null, i8* null, i8* null, i8* getelementptr inbounds (%struct.S* @a, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S* @a, i32 0, i32 0), i64 1)], align 16
+@a = internal global %struct.S zeroinitializer, align 1
+
+; Function Attrs: nounwind uwtable
+define signext i8 @foo() #0 {
+entry:
+  %0 = load i8*** @c, align 8
+  %1 = load i8** %0, align 8
+  %2 = load i8* %1, align 1
+  ret i8 %2
+
+; CHECK-LABEL: @foo
+; CHECK: ret i8 0
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  ret i32 0
+}
+
+attributes #0 = { nounwind uwtable }
+
diff --git a/test/Transforms/GlobalOpt/atomic.ll b/test/Transforms/GlobalOpt/atomic.ll
index 4c3f439..ac05bfd 100644
--- a/test/Transforms/GlobalOpt/atomic.ll
+++ b/test/Transforms/GlobalOpt/atomic.ll
@@ -1,10 +1,25 @@
 ; RUN: opt -globalopt < %s -S -o - | FileCheck %s
 
 @GV1 = internal global i64 1
+@GV2 = internal global i32 0
+
 ; CHECK: @GV1 = internal unnamed_addr constant i64 1
+; CHECK: @GV2 = internal unnamed_addr global i32 0
 
 define void @test1() {
 entry:
   %0 = load atomic i8* bitcast (i64* @GV1 to i8*) acquire, align 8
   ret void
 }
+
+; PR17163
+define void @test2a() {
+entry:
+  store atomic i32 10, i32* @GV2 seq_cst, align 4
+  ret void
+}
+define i32 @test2b() {
+entry:
+  %atomic-load = load atomic i32* @GV2 seq_cst, align 4
+  ret i32 %atomic-load
+}
diff --git a/test/Transforms/GlobalOpt/lit.local.cfg b/test/Transforms/GlobalOpt/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/GlobalOpt/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/IPConstantProp/lit.local.cfg b/test/Transforms/IPConstantProp/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/IPConstantProp/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll b/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
new file mode 100644
index 0000000..e4c31d1
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
@@ -0,0 +1,69 @@
+; RUN: opt -S -indvars -o - %s | FileCheck %s
+target datalayout = "e-p:32:32:32-p1:64:64:64-p2:8:8:8-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-n8:16:32:64"
+
+; Derived from ptriv in lftr-reuse.ll
+define void @ptriv_as2(i8 addrspace(2)* %base, i32 %n) nounwind {
+; CHECK-LABEL: @ptriv_as2(
+entry:
+  %idx.trunc = trunc i32 %n to i8
+  %add.ptr = getelementptr inbounds i8 addrspace(2)* %base, i8 %idx.trunc
+  %cmp1 = icmp ult i8 addrspace(2)* %base, %add.ptr
+  br i1 %cmp1, label %for.body, label %for.end
+
+; Make sure the added GEP has the right index type
+; CHECK: %lftr.limit = getelementptr i8 addrspace(2)* %base, i8 %0
+
+; CHECK: for.body:
+; CHECK: phi i8 addrspace(2)*
+; CHECK-NOT: phi
+; CHECK-NOT: add{{^rspace}}
+; CHECK: icmp ne i8 addrspace(2)*
+; CHECK: br i1
+for.body:
+  %p.02 = phi i8 addrspace(2)* [ %base, %entry ], [ %incdec.ptr, %for.body ]
+  ; cruft to make the IV useful
+  %sub.ptr.lhs.cast = ptrtoint i8 addrspace(2)* %p.02 to i8
+  %sub.ptr.rhs.cast = ptrtoint i8 addrspace(2)* %base to i8
+  %sub.ptr.sub = sub i8 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+  store i8 %sub.ptr.sub, i8 addrspace(2)* %p.02
+  %incdec.ptr = getelementptr inbounds i8 addrspace(2)* %p.02, i32 1
+  %cmp = icmp ult i8 addrspace(2)* %incdec.ptr, %add.ptr
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+define void @ptriv_as3(i8 addrspace(3)* %base, i32 %n) nounwind {
+; CHECK-LABEL: @ptriv_as3(
+entry:
+  %idx.trunc = trunc i32 %n to i16
+  %add.ptr = getelementptr inbounds i8 addrspace(3)* %base, i16 %idx.trunc
+  %cmp1 = icmp ult i8 addrspace(3)* %base, %add.ptr
+  br i1 %cmp1, label %for.body, label %for.end
+
+; Make sure the added GEP has the right index type
+; CHECK: %lftr.limit = getelementptr i8 addrspace(3)* %base, i16 %0
+
+; CHECK: for.body:
+; CHECK: phi i8 addrspace(3)*
+; CHECK-NOT: phi
+; CHECK-NOT: add{{^rspace}}
+; CHECK: icmp ne i8 addrspace(3)*
+; CHECK: br i1
+for.body:
+  %p.02 = phi i8 addrspace(3)* [ %base, %entry ], [ %incdec.ptr, %for.body ]
+  ; cruft to make the IV useful
+  %sub.ptr.lhs.cast = ptrtoint i8 addrspace(3)* %p.02 to i16
+  %sub.ptr.rhs.cast = ptrtoint i8 addrspace(3)* %base to i16
+  %sub.ptr.sub = sub i16 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+  %conv = trunc i16 %sub.ptr.sub to i8
+  store i8 %conv, i8 addrspace(3)* %p.02
+  %incdec.ptr = getelementptr inbounds i8 addrspace(3)* %p.02, i32 1
+  %cmp = icmp ult i8 addrspace(3)* %incdec.ptr, %add.ptr
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
diff --git a/test/Transforms/IndVarSimplify/lftr-zext.ll b/test/Transforms/IndVarSimplify/lftr-zext.ll
new file mode 100644
index 0000000..32fa61a
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/lftr-zext.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+@data = common global [240 x i8] zeroinitializer, align 16
+
+define void @foo(i8* %a) nounwind uwtable ssp {
+; CHECK: %exitcond
+; CHECK-NOT: ([240 x i8]* @data, i64 0, i64 -16)
+  br label %1
+
+; <label>:1                                       ; preds = %0, %1
+  %i.0 = phi i8 [ 0, %0 ], [ %5, %1 ]
+  %p.0 = phi i8* [ getelementptr inbounds ([240 x i8]* @data, i64 0, i64 0), %0 ], [ %4, %1 ]
+  %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
+  %2 = getelementptr inbounds i8* %.0, i64 1
+  %3 = load i8* %.0, align 1
+  %4 = getelementptr inbounds i8* %p.0, i64 1
+  store i8 %3, i8* %p.0, align 1
+  %5 = add i8 %i.0, 1
+  %6 = icmp ult i8 %5, -16
+  br i1 %6, label %1, label %7
+
+; <label>:7                                       ; preds = %1
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/lit.local.cfg b/test/Transforms/IndVarSimplify/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/IndVarSimplify/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate_1.ll b/test/Transforms/IndVarSimplify/loop_evaluate_1.ll
index abf1bc3..5d2c8c7 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate_1.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate_1.ll
@@ -1,8 +1,9 @@
-; RUN: opt < %s -indvars -loop-deletion -simplifycfg -S | not grep br
-;
-; Testcase distilled from 256.bzip2
+; RUN: opt < %s -indvars -loop-deletion -simplifycfg -S | FileCheck %s
 
-define i32 @main() {
+; Testcase distilled from 256.bzip2
+; CHECK-LABEL: @test1
+; CHECK-NOT: br
+define i32 @test1() {
 entry:
         br label %loopentry
 
@@ -19,3 +20,28 @@ loopexit:               ; preds = %loopentry
         ret i32 %tmp.2
 }
 
+
+; PR12377
+; CHECK-LABEL: @test2
+; CHECK: [[VAR1:%.+]] = add i32 %arg, -11
+; CHECK: [[VAR2:%.+]] = lshr i32 [[VAR1]], 1
+; CHECK: [[VAR3:%.+]] = add i32 [[VAR2]], 1
+; CHECK: [[VAR4:%.+]] = phi i32 [ 0, %bb ], [ [[VAR3]], %bb1.preheader ]
+; CHECK: ret i32 [[VAR4]]
+define i32 @test2(i32 %arg) {
+bb:
+  %tmp = icmp ugt i32 %arg, 10
+  br i1 %tmp, label %bb1, label %bb7
+
+bb1:                                              ; preds = %bb1, %bb
+  %tmp2 = phi i32 [ %tmp5, %bb1 ], [ 0, %bb ]
+  %tmp3 = phi i32 [ %tmp4, %bb1 ], [ %arg, %bb ]
+  %tmp4 = add i32 %tmp3, -2
+  %tmp5 = add i32 %tmp2, 1
+  %tmp6 = icmp ugt i32 %tmp4, 10
+  br i1 %tmp6, label %bb1, label %bb7
+
+bb7:                                              ; preds = %bb1, %bb
+  %tmp8 = phi i32 [ 0, %bb ], [ %tmp5, %bb1 ]
+  ret i32 %tmp8
+}
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate_6.ll b/test/Transforms/IndVarSimplify/loop_evaluate_6.ll
index da38de5..af01fe5 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate_6.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate_6.ll
@@ -1,9 +1,4 @@
 ; RUN: opt < %s -indvars -loop-deletion -S | grep phi | count 1
-; XFAIL: *
-
-; Indvars can't evaluate this loop, because ScalarEvolution can't compute
-; an exact trip count, because it doesn't know if dividing by the stride will
-; have a remainder. It could be done with more aggressive VRP though.
 
 define i32 @test(i32 %x_offs) nounwind readnone {
 entry:
diff --git a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
index 507f695..0576692 100644
--- a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
+++ b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
@@ -223,13 +223,18 @@ entry:
   %halfLim = ashr i32 %limit, 2
   br label %loop
 
-; Test cloning an or, which is not an OverflowBinaryOperator.
+; This test originally checked that the OR instruction was cloned. Now the
+; ScalarEvolution is able to understand the loop evolution and that '%iv' at the
+; end of the loop is an even value. Thus '%val' is computed at the end of the
+; loop and the OR instruction is replaced by an ADD keeping the result
+; equivalent.
 ;
 ; CHECK: loop:
 ; CHECK: phi i64
 ; CHECK-NOT: sext
-; CHECK: or i64
+; CHECK: icmp slt i32
 ; CHECK: exit:
+; CHECK: add i64
 loop:
   %iv = phi i32 [ 0, %entry], [ %iv.next, %loop ]
   %t1 = sext i32 %iv to i64
diff --git a/test/Transforms/Inline/alloca-merge-align-nodl.ll b/test/Transforms/Inline/alloca-merge-align-nodl.ll
index 203f52b..301505f 100644
--- a/test/Transforms/Inline/alloca-merge-align-nodl.ll
+++ b/test/Transforms/Inline/alloca-merge-align-nodl.ll
@@ -8,13 +8,13 @@ define void @foo(%struct.s* byval nocapture readonly %a) {
 entry:
   %x = alloca [2 x i32], align 4
   %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4, !tbaa !0
+  %0 = load i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
-  store i32 %0, i32* %arrayidx, align 4, !tbaa !0
+  store i32 %0, i32* %arrayidx, align 4
   %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4, !tbaa !0
+  %1 = load i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
-  store i32 %1, i32* %arrayidx2, align 4, !tbaa !0
+  store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx) #2
   ret void
 }
@@ -23,13 +23,13 @@ define void @foo0(%struct.s* byval nocapture readonly %a) {
 entry:
   %x = alloca [2 x i32]
   %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4, !tbaa !0
+  %0 = load i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
-  store i32 %0, i32* %arrayidx, align 4, !tbaa !0
+  store i32 %0, i32* %arrayidx, align 4
   %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4, !tbaa !0
+  %1 = load i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
-  store i32 %1, i32* %arrayidx2, align 4, !tbaa !0
+  store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx) #2
   ret void
 }
@@ -40,13 +40,13 @@ define void @goo(%struct.s* byval nocapture readonly %a) {
 entry:
   %x = alloca [2 x i32], align 32
   %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4, !tbaa !0
+  %0 = load i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
-  store i32 %0, i32* %arrayidx, align 32, !tbaa !0
+  store i32 %0, i32* %arrayidx, align 32
   %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4, !tbaa !0
+  %1 = load i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
-  store i32 %1, i32* %arrayidx2, align 4, !tbaa !0
+  store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx) #2
   ret void
 }
@@ -62,9 +62,9 @@ entry:
   %tmpcast = bitcast i64* %a to %struct.s*
   store i64 0, i64* %a, align 8
   %a1 = bitcast i64* %a to i32*
-  store i32 1, i32* %a1, align 8, !tbaa !0
+  store i32 1, i32* %a1, align 8
   call void @foo(%struct.s* byval %tmpcast)
-  store i32 2, i32* %a1, align 8, !tbaa !0
+  store i32 2, i32* %a1, align 8
   call void @goo(%struct.s* byval %tmpcast)
   ret i32 0
 }
@@ -80,14 +80,9 @@ entry:
   %tmpcast = bitcast i64* %a to %struct.s*
   store i64 0, i64* %a, align 8
   %a1 = bitcast i64* %a to i32*
-  store i32 1, i32* %a1, align 8, !tbaa !0
+  store i32 1, i32* %a1, align 8
   call void @foo0(%struct.s* byval %tmpcast)
-  store i32 2, i32* %a1, align 8, !tbaa !0
+  store i32 2, i32* %a1, align 8
   call void @goo(%struct.s* byval %tmpcast)
   ret i32 0
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-
diff --git a/test/Transforms/Inline/alloca-merge-align.ll b/test/Transforms/Inline/alloca-merge-align.ll
index d789c79..d357b3c 100644
--- a/test/Transforms/Inline/alloca-merge-align.ll
+++ b/test/Transforms/Inline/alloca-merge-align.ll
@@ -9,13 +9,13 @@ define void @foo(%struct.s* byval nocapture readonly %a) {
 entry:
   %x = alloca [2 x i32], align 4
   %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4, !tbaa !0
+  %0 = load i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
-  store i32 %0, i32* %arrayidx, align 4, !tbaa !0
+  store i32 %0, i32* %arrayidx, align 4
   %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4, !tbaa !0
+  %1 = load i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
-  store i32 %1, i32* %arrayidx2, align 4, !tbaa !0
+  store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx) #2
   ret void
 }
@@ -24,13 +24,13 @@ define void @foo0(%struct.s* byval nocapture readonly %a) {
 entry:
   %x = alloca [2 x i32]
   %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4, !tbaa !0
+  %0 = load i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
-  store i32 %0, i32* %arrayidx, align 4, !tbaa !0
+  store i32 %0, i32* %arrayidx, align 4
   %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4, !tbaa !0
+  %1 = load i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
-  store i32 %1, i32* %arrayidx2, align 4, !tbaa !0
+  store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx) #2
   ret void
 }
@@ -39,13 +39,13 @@ define void @foo1(%struct.s* byval nocapture readonly %a) {
 entry:
   %x = alloca [2 x i32], align 1
   %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4, !tbaa !0
+  %0 = load i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
-  store i32 %0, i32* %arrayidx, align 4, !tbaa !0
+  store i32 %0, i32* %arrayidx, align 4
   %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4, !tbaa !0
+  %1 = load i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
-  store i32 %1, i32* %arrayidx2, align 4, !tbaa !0
+  store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx) #2
   ret void
 }
@@ -56,13 +56,13 @@ define void @goo(%struct.s* byval nocapture readonly %a) {
 entry:
   %x = alloca [2 x i32], align 32
   %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4, !tbaa !0
+  %0 = load i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
-  store i32 %0, i32* %arrayidx, align 32, !tbaa !0
+  store i32 %0, i32* %arrayidx, align 32
   %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4, !tbaa !0
+  %1 = load i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
-  store i32 %1, i32* %arrayidx2, align 4, !tbaa !0
+  store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx) #2
   ret void
 }
@@ -78,9 +78,9 @@ entry:
   %tmpcast = bitcast i64* %a to %struct.s*
   store i64 0, i64* %a, align 8
   %a1 = bitcast i64* %a to i32*
-  store i32 1, i32* %a1, align 8, !tbaa !0
+  store i32 1, i32* %a1, align 8
   call void @foo(%struct.s* byval %tmpcast)
-  store i32 2, i32* %a1, align 8, !tbaa !0
+  store i32 2, i32* %a1, align 8
   call void @goo(%struct.s* byval %tmpcast)
   ret i32 0
 }
@@ -96,9 +96,9 @@ entry:
   %tmpcast = bitcast i64* %a to %struct.s*
   store i64 0, i64* %a, align 8
   %a1 = bitcast i64* %a to i32*
-  store i32 1, i32* %a1, align 8, !tbaa !0
+  store i32 1, i32* %a1, align 8
   call void @foo0(%struct.s* byval %tmpcast)
-  store i32 2, i32* %a1, align 8, !tbaa !0
+  store i32 2, i32* %a1, align 8
   call void @goo(%struct.s* byval %tmpcast)
   ret i32 0
 }
@@ -114,14 +114,9 @@ entry:
   %tmpcast = bitcast i64* %a to %struct.s*
   store i64 0, i64* %a, align 8
   %a1 = bitcast i64* %a to i32*
-  store i32 1, i32* %a1, align 8, !tbaa !0
+  store i32 1, i32* %a1, align 8
   call void @foo0(%struct.s* byval %tmpcast)
-  store i32 2, i32* %a1, align 8, !tbaa !0
+  store i32 2, i32* %a1, align 8
   call void @foo1(%struct.s* byval %tmpcast)
   ret i32 0
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-
diff --git a/test/Transforms/Inline/attributes.ll b/test/Transforms/Inline/attributes.ll
new file mode 100644
index 0000000..53fb13f
--- /dev/null
+++ b/test/Transforms/Inline/attributes.ll
@@ -0,0 +1,112 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+define i32 @noattr_callee(i32 %i) {
+  ret i32 %i
+}
+
+define i32 @sanitize_address_callee(i32 %i) sanitize_address {
+  ret i32 %i
+}
+
+define i32 @sanitize_thread_callee(i32 %i) sanitize_thread {
+  ret i32 %i
+}
+
+define i32 @sanitize_memory_callee(i32 %i) sanitize_memory {
+  ret i32 %i
+}
+
+define i32 @alwaysinline_callee(i32 %i) alwaysinline {
+  ret i32 %i
+}
+
+define i32 @alwaysinline_sanitize_address_callee(i32 %i) alwaysinline sanitize_address {
+  ret i32 %i
+}
+
+define i32 @alwaysinline_sanitize_thread_callee(i32 %i) alwaysinline sanitize_thread {
+  ret i32 %i
+}
+
+define i32 @alwaysinline_sanitize_memory_callee(i32 %i) alwaysinline sanitize_memory {
+  ret i32 %i
+}
+
+
+; Check that:
+;  * noattr callee is inlined into noattr caller,
+;  * sanitize_(address|memory|thread) callee is not inlined into noattr caller,
+;  * alwaysinline callee is always inlined no matter what sanitize_* attributes are present.
+
+define i32 @test_no_sanitize_address(i32 %arg) {
+  %x1 = call i32 @noattr_callee(i32 %arg)
+  %x2 = call i32 @sanitize_address_callee(i32 %x1)
+  %x3 = call i32 @alwaysinline_callee(i32 %x2)
+  %x4 = call i32 @alwaysinline_sanitize_address_callee(i32 %x3)
+  ret i32 %x4
+; CHECK-LABEL: @test_no_sanitize_address(
+; CHECK-NEXT: @sanitize_address_callee
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_no_sanitize_memory(i32 %arg) {
+  %x1 = call i32 @noattr_callee(i32 %arg)
+  %x2 = call i32 @sanitize_memory_callee(i32 %x1)
+  %x3 = call i32 @alwaysinline_callee(i32 %x2)
+  %x4 = call i32 @alwaysinline_sanitize_memory_callee(i32 %x3)
+  ret i32 %x4
+; CHECK-LABEL: @test_no_sanitize_memory(
+; CHECK-NEXT: @sanitize_memory_callee
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_no_sanitize_thread(i32 %arg) {
+  %x1 = call i32 @noattr_callee(i32 %arg)
+  %x2 = call i32 @sanitize_thread_callee(i32 %x1)
+  %x3 = call i32 @alwaysinline_callee(i32 %x2)
+  %x4 = call i32 @alwaysinline_sanitize_thread_callee(i32 %x3)
+  ret i32 %x4
+; CHECK-LABEL: @test_no_sanitize_thread(
+; CHECK-NEXT: @sanitize_thread_callee
+; CHECK-NEXT: ret i32
+}
+
+
+; Check that:
+;  * noattr callee is not inlined into sanitize_(address|memory|thread) caller,
+;  * sanitize_(address|memory|thread) callee is inlined into the caller with the same attribute,
+;  * alwaysinline callee is always inlined no matter what sanitize_* attributes are present.
+
+define i32 @test_sanitize_address(i32 %arg) sanitize_address {
+  %x1 = call i32 @noattr_callee(i32 %arg)
+  %x2 = call i32 @sanitize_address_callee(i32 %x1)
+  %x3 = call i32 @alwaysinline_callee(i32 %x2)
+  %x4 = call i32 @alwaysinline_sanitize_address_callee(i32 %x3)
+  ret i32 %x4
+; CHECK-LABEL: @test_sanitize_address(
+; CHECK-NEXT: @noattr_callee
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_sanitize_memory(i32 %arg) sanitize_memory {
+  %x1 = call i32 @noattr_callee(i32 %arg)
+  %x2 = call i32 @sanitize_memory_callee(i32 %x1)
+  %x3 = call i32 @alwaysinline_callee(i32 %x2)
+  %x4 = call i32 @alwaysinline_sanitize_memory_callee(i32 %x3)
+  ret i32 %x4
+; CHECK-LABEL: @test_sanitize_memory(
+; CHECK-NEXT: @noattr_callee
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_sanitize_thread(i32 %arg) sanitize_thread {
+  %x1 = call i32 @noattr_callee(i32 %arg)
+  %x2 = call i32 @sanitize_thread_callee(i32 %x1)
+  %x3 = call i32 @alwaysinline_callee(i32 %x2)
+  %x4 = call i32 @alwaysinline_sanitize_thread_callee(i32 %x3)
+  ret i32 %x4
+; CHECK-LABEL: @test_sanitize_thread(
+; CHECK-NEXT: @noattr_callee
+; CHECK-NEXT: ret i32
+}
diff --git a/test/Transforms/Inline/byval.ll b/test/Transforms/Inline/byval.ll
index e601faf..d7597ad 100644
--- a/test/Transforms/Inline/byval.ll
+++ b/test/Transforms/Inline/byval.ll
@@ -104,3 +104,26 @@ entry:
 ; CHECK: ret i32 4
 }
 
+%struct.S0 = type { i32 }
+
+@b = global %struct.S0 { i32 1 }, align 4
+@a = common global i32 0, align 4
+
+define internal void @f5(%struct.S0* byval nocapture readonly align 4 %p) {
+entry:
+	store i32 0, i32* getelementptr inbounds (%struct.S0* @b, i64 0, i32 0), align 4
+	%f2 = getelementptr inbounds %struct.S0* %p, i64 0, i32 0
+	%0 = load i32* %f2, align 4
+	store i32 %0, i32* @a, align 4
+	ret void
+}
+
+define i32 @test5() {
+entry:
+	tail call void @f5(%struct.S0* byval align 4 @b)
+	%0 = load i32* @a, align 4
+	ret i32 %0
+; CHECK: @test5()
+; CHECK: store i32 0, i32* getelementptr inbounds (%struct.S0* @b, i64 0, i32 0), align 4
+; CHECK-NOT: load i32* getelementptr inbounds (%struct.S0* @b, i64 0, i32 0), align 4
+}
diff --git a/test/Transforms/Inline/delete-call.ll b/test/Transforms/Inline/delete-call.ll
index 97c52af..7f30ffb 100644
--- a/test/Transforms/Inline/delete-call.ll
+++ b/test/Transforms/Inline/delete-call.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -S -inline -stats < %s 2>&1 | FileCheck %s
 ; CHECK: Number of functions inlined
 
-; RUN: opt -S -inline -functionattrs -stats < %s 2>&1 | FileCheck -check-prefix=FUNCTIONATTRS %s
+; RUN: opt -S -inline -functionattrs -stats < %s 2>&1 | FileCheck -check-prefix=CHECK-FUNCTIONATTRS %s
 ; CHECK-FUNCTIONATTRS: Number of call sites deleted, not inlined
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
diff --git a/test/Transforms/Inline/inline-invoke-with-asm-call.ll b/test/Transforms/Inline/inline-invoke-with-asm-call.ll
new file mode 100644
index 0000000..876f8d7
--- /dev/null
+++ b/test/Transforms/Inline/inline-invoke-with-asm-call.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+target triple = "x86_64-apple-darwin"
+
+; In inliner, we assume that inline asm does not throw. This testing case makes
+; sure that the inliner does not convert "call asm" to "invoke asm".
+; rdar://15317907
+; CHECK-LABEL: @caller
+; Make sure we are generating "call asm" instead of "invoke asm".
+; CHECK: call void asm
+; CHECK-LABEL: @callee_with_asm
+define void @caller() {
+  br i1 undef, label %1, label %4
+
+; <label>:1
+  invoke void @callee_with_asm()
+          to label %4 unwind label %2
+
+; <label>:2
+  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          cleanup
+  resume { i8*, i32 } undef
+
+; <label>:4
+  ret void
+}
+
+define void @callee_with_asm() {
+  call void asm sideeffect "mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue", ""()
+  ret void
+}
+
+declare i32 @__objc_personality_v0(...)
diff --git a/test/Transforms/Inline/inline-optnone.ll b/test/Transforms/Inline/inline-optnone.ll
new file mode 100644
index 0000000..9b99c45
--- /dev/null
+++ b/test/Transforms/Inline/inline-optnone.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+
+; Test that functions with attribute optnone are not inlined.
+; Also test that only functions with attribute alwaysinline are
+; valid candidates for inlining if the caller has the optnone attribute.
+
+; Function Attrs: alwaysinline nounwind readnone uwtable
+define i32 @alwaysInlineFunction(i32 %a) #0 {
+entry:
+  %mul = mul i32 %a, %a
+  ret i32 %mul
+}
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @simpleFunction(i32 %a) #1 {
+entry:
+  %add = add i32 %a, %a
+  ret i32 %add
+}
+
+; Function Attrs: nounwind noinline optnone readnone uwtable
+define i32 @OptnoneFunction(i32 %a) #2 {
+entry:
+  %0 = tail call i32 @alwaysInlineFunction(i32 %a)
+  %1 = tail call i32 @simpleFunction(i32 %a)
+  %add = add i32 %0, %1
+  ret i32 %add
+}
+
+; CHECK-LABEL: @OptnoneFunction
+; CHECK-NOT: call i32 @alwaysInlineFunction(i32 %a)
+; CHECK: call i32 @simpleFunction(i32 %a)
+; CHECK: ret
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @bar(i32 %a) #1 {
+entry:
+  %0 = tail call i32 @OptnoneFunction(i32 5)
+  %1 = tail call i32 @simpleFunction(i32 6)
+  %add = add i32 %0, %1
+  ret i32 %add
+}
+
+; CHECK-LABEL: @bar
+; CHECK: call i32 @OptnoneFunction(i32 5)
+; CHECK-NOT: call i32 @simpleFunction(i32 6)
+; CHECK: ret
+
+
+attributes #0 = { alwaysinline nounwind readnone uwtable }
+attributes #1 = { nounwind readnone uwtable }
+attributes #2 = { nounwind noinline optnone readnone uwtable }
diff --git a/test/Transforms/Inline/inline_returns_twice.ll b/test/Transforms/Inline/inline_returns_twice.ll
index f316c91..678ee82 100644
--- a/test/Transforms/Inline/inline_returns_twice.ll
+++ b/test/Transforms/Inline/inline_returns_twice.ll
@@ -4,38 +4,81 @@
 ; if they are themselve marked as such.
 
 declare i32 @a() returns_twice
-declare i32 @b() returns_twice
 
-define i32 @f() {
+define i32 @inner1() {
 entry:
   %call = call i32 @a() returns_twice
   %add = add nsw i32 1, %call
   ret i32 %add
 }
 
-define i32 @g() {
+define i32 @outer1() {
 entry:
-; CHECK-LABEL: define i32 @g(
-; CHECK: call i32 @f()
-; CHECK-NOT: call i32 @a()
-  %call = call i32 @f()
+; CHECK-LABEL: define i32 @outer1(
+; CHECK: call i32 @inner1()
+  %call = call i32 @inner1()
   %add = add nsw i32 1, %call
   ret i32 %add
 }
 
-define i32 @h() returns_twice {
+define i32 @inner2() returns_twice {
 entry:
-  %call = call i32 @b() returns_twice
+  %call = call i32 @a() returns_twice
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @outer2() {
+entry:
+; CHECK-LABEL: define i32 @outer2(
+; CHECK: call i32 @a()
+  %call = call i32 @inner2() returns_twice
   %add = add nsw i32 1, %call
   ret i32 %add
 }
 
-define i32 @i() {
+define i32 @inner3() {
+entry:
+  %invoke = invoke i32 @a() returns_twice
+      to label %cont unwind label %lpad
+
+cont:
+  %add = add nsw i32 1, %invoke
+  ret i32 %add
+
+lpad:
+  %lp = landingpad i32 personality i8* null cleanup
+  resume i32 %lp
+}
+
+define i32 @outer3() {
+entry:
+; CHECK-LABEL: define i32 @outer3(
+; CHECK: call i32 @inner3()
+  %call = call i32 @inner3()
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @inner4() returns_twice {
+entry:
+  %invoke = invoke i32 @a() returns_twice
+      to label %cont unwind label %lpad
+
+cont:
+  %add = add nsw i32 1, %invoke
+  ret i32 %add
+
+lpad:
+  %lp = landingpad i32 personality i8* null cleanup
+  resume i32 %lp
+}
+
+define i32 @outer4() {
 entry:
-; CHECK-LABEL: define i32 @i(
-; CHECK: call i32 @b()
-; CHECK-NOT: call i32 @h()
-  %call = call i32 @h() returns_twice
+; CHECK-LABEL: define i32 @outer4(
+; CHECK: invoke i32 @a()
+  %call = call i32 @inner4() returns_twice
   %add = add nsw i32 1, %call
   ret i32 %add
 }
diff --git a/test/Transforms/Inline/invoke-cost.ll b/test/Transforms/Inline/invoke-cost.ll
new file mode 100644
index 0000000..84d33ad
--- /dev/null
+++ b/test/Transforms/Inline/invoke-cost.ll
@@ -0,0 +1,45 @@
+; RUN: opt -inline < %s -S -o - -inline-threshold=100 | FileCheck %s
+
+target datalayout = "p:32:32"
+
+@glbl = external global i32
+
+declare void @f()
+declare i32 @__gxx_personality_v0(...)
+declare i8* @__cxa_begin_catch(i8*)
+declare void @__cxa_end_catch()
+declare void @_ZSt9terminatev()
+
+define void @inner1() {
+entry:
+  invoke void @f() to label %cont1 unwind label %terminate.lpad
+
+cont1:
+  invoke void @f() to label %cont2 unwind label %terminate.lpad
+
+cont2:
+  invoke void @f() to label %cont3 unwind label %terminate.lpad
+
+cont3:
+  invoke void @f() to label %cont4 unwind label %terminate.lpad
+
+cont4:
+  ret void
+
+terminate.lpad:
+  landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            catch i8* null
+  call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+}
+
+define void @outer1() {
+; CHECK-LABEL: @outer1(
+;
+; This call should not get inlined because inner1 actually calls a function
+; many times, but it only does so through invoke as opposed to call.
+;
+; CHECK: call void @inner1
+  call void @inner1()
+  ret void
+}
diff --git a/test/Transforms/Inline/lit.local.cfg b/test/Transforms/Inline/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/Inline/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Inline/ptr-diff.ll b/test/Transforms/Inline/ptr-diff.ll
index 01b42da..af42bc7 100644
--- a/test/Transforms/Inline/ptr-diff.ll
+++ b/test/Transforms/Inline/ptr-diff.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -inline < %s -S -o - -inline-threshold=10 | FileCheck %s
 
-target datalayout = "p:32:32"
+target datalayout = "p:32:32-p1:64:64-p2:16:16-n16:32:64"
 
 define i32 @outer1() {
 ; CHECK-LABEL: @outer1(
@@ -56,3 +56,46 @@ else:
   %t = load i32* %begin
   ret i32 %t
 }
+
+; The inttoptrs are free since it is a smaller integer to a larger
+; pointer size
+define i32 @inttoptr_free_cost(i32 %a, i32 %b, i32 %c) {
+  %p1 = inttoptr i32 %a to i32 addrspace(1)*
+  %p2 = inttoptr i32 %b to i32 addrspace(1)*
+  %p3 = inttoptr i32 %c to i32 addrspace(1)*
+  %t1 = load i32 addrspace(1)* %p1
+  %t2 = load i32 addrspace(1)* %p2
+  %t3 = load i32 addrspace(1)* %p3
+  %s = add i32 %t1, %t2
+  %s1 = add i32 %s, %t3
+  ret i32 %s1
+}
+
+define i32 @inttoptr_free_cost_user(i32 %begin, i32 %end) {
+; CHECK-LABEL: @inttoptr_free_cost_user(
+; CHECK-NOT: call
+  %x = call i32 @inttoptr_free_cost(i32 %begin, i32 %end, i32 9)
+  ret i32 %x
+}
+
+; The inttoptrs have a cost since it is a larger integer to a smaller
+; pointer size
+define i32 @inttoptr_cost_smaller_ptr(i32 %a, i32 %b, i32 %c) {
+  %p1 = inttoptr i32 %a to i32 addrspace(2)*
+  %p2 = inttoptr i32 %b to i32 addrspace(2)*
+  %p3 = inttoptr i32 %c to i32 addrspace(2)*
+  %t1 = load i32 addrspace(2)* %p1
+  %t2 = load i32 addrspace(2)* %p2
+  %t3 = load i32 addrspace(2)* %p3
+  %s = add i32 %t1, %t2
+  %s1 = add i32 %s, %t3
+  ret i32 %s1
+}
+
+define i32 @inttoptr_cost_smaller_ptr_user(i32 %begin, i32 %end) {
+; CHECK-LABEL: @inttoptr_cost_smaller_ptr_user(
+; CHECK: call
+  %x = call i32 @inttoptr_cost_smaller_ptr(i32 %begin, i32 %end, i32 9)
+  ret i32 %x
+}
+
diff --git a/test/Transforms/InstCombine/2002-05-14-SubFailure.ll b/test/Transforms/InstCombine/2002-05-14-SubFailure.ll
index d2b2b00..854ec60 100644
--- a/test/Transforms/InstCombine/2002-05-14-SubFailure.ll
+++ b/test/Transforms/InstCombine/2002-05-14-SubFailure.ll
@@ -1,7 +1,8 @@
 ; Instcombine was missing a test that caused it to make illegal transformations
 ; sometimes.  In this case, it transforms the sub into an add:
-; RUN: opt < %s -instcombine -S | grep sub
-;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: sub
+
 define i32 @test(i32 %i, i32 %j) {
         %A = mul i32 %i, %j
         %B = sub i32 2, %A
diff --git a/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll b/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll
index 22574f7..49e55c6 100644
--- a/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll
+++ b/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll
@@ -1,4 +1,6 @@
-; RUN: opt < %s -instcombine -S | not grep add
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK-NOT: add
 
 define i32 @test(i32 %A) {
         %A.neg = sub i32 0, %A          ; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll b/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
index c02d33c..bb9a818 100644
--- a/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
+++ b/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
@@ -1,6 +1,7 @@
-; This testcase can be simplified by "realizing" that alloca can never return 
+; This testcase can be simplified by "realizing" that alloca can never return
 ; null.
-; RUN: opt < %s -instcombine -simplifycfg -S | not grep br
+; RUN: opt < %s -instcombine -simplifycfg -S | FileCheck %s
+; CHECK-NOT: br
 
 declare i32 @bitmap_clear(...)
 
diff --git a/test/Transforms/InstCombine/2006-10-20-mask.ll b/test/Transforms/InstCombine/2006-10-20-mask.ll
index 0aaa5e8..e9797ae 100644
--- a/test/Transforms/InstCombine/2006-10-20-mask.ll
+++ b/test/Transforms/InstCombine/2006-10-20-mask.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN:    grep and
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: and
 
 define i64 @foo(i64 %tmp, i64 %tmp2) {
         %tmp.upgrd.1 = trunc i64 %tmp to i32            ; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll b/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
index d3ba1e2..8ab50e2 100644
--- a/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
+++ b/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
@@ -1,5 +1,6 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN:   grep mul | count 2
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: mul
+; CHECK: mul
 
 define <4 x float> @test(<4 x float> %V) {
         %Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >                ; <<4 x float>> [#uses=1]
diff --git a/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll b/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll
index 2665791..272753c 100644
--- a/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll
+++ b/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -instcombine -S | grep select
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: select
 ; END.
 
 target datalayout = "e-p:32:32"
diff --git a/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll b/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll
index c161bcc..6b4e89d 100644
--- a/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll
+++ b/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -instcombine -S | grep select
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: select
 
 define double @fold(i1 %a, double %b) {
 %s = select i1 %a, double 0., double 1.
diff --git a/test/Transforms/InstCombine/2008-02-13-MulURem.ll b/test/Transforms/InstCombine/2008-02-13-MulURem.ll
index a88c510..d85ef97 100644
--- a/test/Transforms/InstCombine/2008-02-13-MulURem.ll
+++ b/test/Transforms/InstCombine/2008-02-13-MulURem.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -instcombine -S | grep rem
+; RUN: opt < %s -instcombine -S | FileCheck %s
 ; PR1933
 
+; CHECK: rem
+
 define i32 @fold(i32 %a) {
   %s = mul i32 %a, 3
   %c = urem i32 %s, 3
diff --git a/test/Transforms/InstCombine/2008-05-31-AddBool.ll b/test/Transforms/InstCombine/2008-05-31-AddBool.ll
index ed20690..31b1719 100644
--- a/test/Transforms/InstCombine/2008-05-31-AddBool.ll
+++ b/test/Transforms/InstCombine/2008-05-31-AddBool.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -instcombine -S | grep "xor"
+; RUN: opt < %s -instcombine -S | FileCheck %s
 ; PR2389
 
+; CHECK: xor
+
 define i1 @test(i1 %a, i1 %b) {
   %A = add i1 %a, %b
   ret i1 %A
diff --git a/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll b/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll
index 949fc59..e354311 100644
--- a/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll
+++ b/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll
@@ -5,7 +5,7 @@ target triple = "i386-apple-darwin9.6"
 
 define i32 @test(i32* %P) nounwind {
 entry:
-  %Q = bitcast i32* %P to i32 addrspace(1)*
+  %Q = addrspacecast i32* %P to i32 addrspace(1)*
   store i32 0, i32 addrspace(1)* %Q, align 4
   ret i32 0
 }
diff --git a/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll b/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
index 6f3df5b..4d185bf 100644
--- a/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
+++ b/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
@@ -1,10 +1,10 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; CHECK: bitcast
+; CHECK: addrspacecast
 
 @base = internal addrspace(3) unnamed_addr global [16 x i32] zeroinitializer, align 16
 declare void @foo(i32*)
 
 define void @test() nounwind {
-  call void @foo(i32* getelementptr (i32* bitcast ([16 x i32] addrspace(3)* @base to i32*), i64 2147483647)) nounwind
+  call void @foo(i32* getelementptr (i32* addrspacecast ([16 x i32] addrspace(3)* @base to i32*), i64 2147483647)) nounwind
   ret void
 }
diff --git a/test/Transforms/InstCombine/addrspacecast.ll b/test/Transforms/InstCombine/addrspacecast.ll
new file mode 100644
index 0000000..d908b55
--- /dev/null
+++ b/test/Transforms/InstCombine/addrspacecast.ll
@@ -0,0 +1,69 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-p1:32:32:32-p2:16:16:16-n8:16:32:64"
+
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p1i8.i32(i8*, i8 addrspace(1)*, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p2i8.i32(i8*, i8 addrspace(2)*, i32, i32, i1) nounwind
+
+
+define i32* @combine_redundant_addrspacecast(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_redundant_addrspacecast(
+; CHECK: addrspacecast i32 addrspace(1)* %x to i32*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to i32 addrspace(3)*
+  %z = addrspacecast i32 addrspace(3)* %y to i32*
+  ret i32* %z
+}
+
+define <4 x i32*> @combine_redundant_addrspacecast_vector(<4 x i32 addrspace(1)*> %x) nounwind {
+; CHECK-LABEL: @combine_redundant_addrspacecast_vector(
+; CHECK: addrspacecast <4 x i32 addrspace(1)*> %x to <4 x i32*>
+; CHECK-NEXT: ret
+  %y = addrspacecast <4 x i32 addrspace(1)*> %x to <4 x i32 addrspace(3)*>
+  %z = addrspacecast <4 x i32 addrspace(3)*> %y to <4 x i32*>
+  ret <4 x i32*> %z
+}
+
+define float* @combine_redundant_addrspacecast_types(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_redundant_addrspacecast_types(
+; CHECK: addrspacecast i32 addrspace(1)* %x to float*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to i32 addrspace(3)*
+  %z = addrspacecast i32 addrspace(3)* %y to float*
+  ret float* %z
+}
+
+@const_array = addrspace(2) constant [60 x i8] [i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22 ]
+
+declare void @foo(i8*) nounwind
+
+; A copy from a constant addrspacecast'ed global
+; CHECK-LABEL: @memcpy_addrspacecast(
+; CHECK-NOT:  call void @llvm.memcpy
+define i32 @memcpy_addrspacecast() nounwind {
+entry:
+  %alloca = alloca i8, i32 48
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %alloca, i8 addrspace(1)* addrspacecast (i8 addrspace(2)* getelementptr inbounds ([60 x i8] addrspace(2)* @const_array, i16 0, i16 4) to i8 addrspace(1)*), i32 48, i32 4, i1 false) nounwind
+  br label %loop.body
+
+loop.body:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %loop.body ]
+  %sum = phi i32 [ 0, %entry ], [ %sum.inc, %loop.body]
+  %ptr = getelementptr i8* %alloca, i32 %i
+  %load = load i8* %ptr
+  %ext = zext i8 %load to i32
+  %sum.inc = add i32 %sum, %ext
+  %i.inc = add i32 %i, 1
+  %cmp = icmp ne i32 %i, 48
+  br i1 %cmp, label %loop.body, label %end
+
+end:
+  ret i32 %sum.inc
+}
+
diff --git a/test/Transforms/InstCombine/align-addr.ll b/test/Transforms/InstCombine/align-addr.ll
index e33ee9f..4d22c2c 100644
--- a/test/Transforms/InstCombine/align-addr.ll
+++ b/test/Transforms/InstCombine/align-addr.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+target datalayout = "E-p:64:64:64-p1:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 ; Instcombine should be able to prove vector alignment in the
 ; presence of a few mild address computation tricks.
@@ -47,6 +47,27 @@ entry:
 	ret <16 x i8> %tmp
 }
 
+@GLOBAL_as1 = internal addrspace(1) global [4 x i32] zeroinitializer
+
+define <16 x i8> @test1_as1(<2 x i64> %x) {
+; CHECK-LABEL: @test1_as1(
+; CHECK: tmp = load
+; CHECK: GLOBAL_as1{{.*}}align 16
+  %tmp = load <16 x i8> addrspace(1)* bitcast ([4 x i32] addrspace(1)* @GLOBAL_as1 to <16 x i8> addrspace(1)*), align 1
+  ret <16 x i8> %tmp
+}
+
+@GLOBAL_as1_gep = internal addrspace(1) global [8 x i32] zeroinitializer
+
+define <16 x i8> @test1_as1_gep(<2 x i64> %x) {
+; CHECK-LABEL: @test1_as1_gep(
+; CHECK: tmp = load
+; CHECK: GLOBAL_as1_gep{{.*}}align 16
+  %tmp = load <16 x i8> addrspace(1)* bitcast (i32 addrspace(1)* getelementptr ([8 x i32] addrspace(1)* @GLOBAL_as1_gep, i16 0, i16 4) to <16 x i8> addrspace(1)*), align 1
+  ret <16 x i8> %tmp
+}
+
+
 ; When a load or store lacks an explicit alignment, add one.
 
 ; CHECK-LABEL: @test2(
diff --git a/test/Transforms/InstCombine/alloca.ll b/test/Transforms/InstCombine/alloca.ll
index 9a80ad9..ae1cfa1 100644
--- a/test/Transforms/InstCombine/alloca.ll
+++ b/test/Transforms/InstCombine/alloca.ll
@@ -1,7 +1,7 @@
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+; RUN: opt < %s -instcombine -S -default-data-layout="E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" | FileCheck %s
+; RUN: opt < %s -instcombine -S -default-data-layout="E-p:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" | FileCheck %s -check-prefix=P32
+; RUN: opt < %s -instcombine -S | FileCheck %s -check-prefix=NODL
 
-; RUN: opt < %s -instcombine -S | FileCheck %s
-; END.
 
 declare void @use(...)
 
@@ -110,3 +110,22 @@ entry:
 }
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+
+; Check that the GEP indices use the pointer size, or 64 if unknown
+define void @test8() {
+; CHECK-LABEL: @test8(
+; CHECK: alloca [100 x i32]
+; CHECK: getelementptr inbounds [100 x i32]* %x1, i64 0, i64 0
+
+; P32-LABEL: @test8(
+; P32: alloca [100 x i32]
+; P32: getelementptr inbounds [100 x i32]* %x1, i32 0, i32 0
+
+; NODL-LABEL: @test8(
+; NODL: alloca [100 x i32]
+; NODL: getelementptr inbounds [100 x i32]* %x1, i64 0, i64 0
+  %x = alloca i32, i32 100
+  call void (...)* @use(i32* %x)
+  ret void
+}
diff --git a/test/Transforms/InstCombine/and2.ll b/test/Transforms/InstCombine/and2.ll
index 504391a..e88fd59 100644
--- a/test/Transforms/InstCombine/and2.ll
+++ b/test/Transforms/InstCombine/and2.ll
@@ -42,3 +42,15 @@ define <4 x i32> @test5(<4 x i32> %A) {
   %2 = and <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %1
   ret <4 x i32> %2
 }
+
+; Check that we combine "if x!=0 && x!=-1" into "if x+1u>1"
+define i32 @test6(i64 %x) nounwind {
+; CHECK: @test6
+; CHECK-NEXT: add i64 %x, 1
+; CHECK-NEXT: icmp ugt i64 %x.off, 1
+  %cmp1 = icmp ne i64 %x, -1
+  %not.cmp = icmp ne i64 %x, 0
+  %.cmp1 = and i1 %cmp1, %not.cmp
+  %land.ext = zext i1 %.cmp1 to i32
+  ret i32 %land.ext
+}
diff --git a/test/Transforms/InstCombine/apint-select.ll b/test/Transforms/InstCombine/apint-select.ll
index f2ea601..cf24a44 100644
--- a/test/Transforms/InstCombine/apint-select.ll
+++ b/test/Transforms/InstCombine/apint-select.ll
@@ -1,6 +1,7 @@
 ; This test makes sure that these instructions are properly eliminated.
 
-; RUN: opt < %s -instcombine -S | not grep select
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK-NOT: select
 
 
 define i41 @test1(i1 %C) {
@@ -37,7 +38,7 @@ define i41 @test5(i41 %X) {
 
 define i1023 @test6(i1023 %X) {
     ;; ((X & 27) ? 27 : 0)
-    %Y = and i1023 %X, 64 
+    %Y = and i1023 %X, 64
     %t = icmp ne i1023 %Y, 0
     %V = select i1 %t, i1023 64, i1023 0
     ret i1023 %V
diff --git a/test/Transforms/InstCombine/bitcast-bigendian.ll b/test/Transforms/InstCombine/bitcast-bigendian.ll
index 28b0e9a..ed812e1 100644
--- a/test/Transforms/InstCombine/bitcast-bigendian.ll
+++ b/test/Transforms/InstCombine/bitcast-bigendian.ll
@@ -48,3 +48,44 @@ define float @test3(<2 x float> %A, <2 x i64> %B) {
 ; CHECK-NEXT:  ret float %add
 }
 
+define <2 x i32> @test4(i32 %A, i32 %B){
+  %tmp38 = zext i32 %A to i64
+  %tmp32 = zext i32 %B to i64
+  %tmp33 = shl i64 %tmp32, 32
+  %ins35 = or i64 %tmp33, %tmp38
+  %tmp43 = bitcast i64 %ins35 to <2 x i32>
+  ret <2 x i32> %tmp43
+  ; CHECK-LABEL: @test4(
+  ; CHECK-NEXT: insertelement <2 x i32> undef, i32 %B, i32 0
+  ; CHECK-NEXT: insertelement <2 x i32> {{.*}}, i32 %A, i32 1
+  ; CHECK-NEXT: ret <2 x i32>
+
+}
+
+define <2 x float> @test5(float %A, float %B) {
+  %tmp37 = bitcast float %A to i32
+  %tmp38 = zext i32 %tmp37 to i64
+  %tmp31 = bitcast float %B to i32
+  %tmp32 = zext i32 %tmp31 to i64
+  %tmp33 = shl i64 %tmp32, 32
+  %ins35 = or i64 %tmp33, %tmp38
+  %tmp43 = bitcast i64 %ins35 to <2 x float>
+  ret <2 x float> %tmp43
+  ; CHECK-LABEL: @test5(
+  ; CHECK-NEXT: insertelement <2 x float> undef, float %B, i32 0
+  ; CHECK-NEXT: insertelement <2 x float> {{.*}}, float %A, i32 1
+  ; CHECK-NEXT: ret <2 x float>
+}
+
+define <2 x float> @test6(float %A){
+  %tmp23 = bitcast float %A to i32              ; <i32> [#uses=1]
+  %tmp24 = zext i32 %tmp23 to i64                 ; <i64> [#uses=1]
+  %tmp25 = shl i64 %tmp24, 32                     ; <i64> [#uses=1]
+  %mask20 = or i64 %tmp25, 1109917696             ; <i64> [#uses=1]
+  %tmp35 = bitcast i64 %mask20 to <2 x float>     ; <<2 x float>> [#uses=1]
+  ret <2 x float> %tmp35
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: insertelement <2 x float> undef, float %A, i32 0
+; CHECK-NEXT: insertelement <2 x float> {{.*}}, float 4.200000e+01, i32 1
+; CHECK: ret
+}
diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll
index 4ef8790..c7a520b 100644
--- a/test/Transforms/InstCombine/bitcast.ll
+++ b/test/Transforms/InstCombine/bitcast.ll
@@ -144,3 +144,13 @@ define <2 x i16> @BitcastInsert(i32 %a) {
 ; CHECK-LABEL: @BitcastInsert(
 ; CHECK: bitcast i32 %a to <2 x i16>
 }
+
+; PR17293
+define <2 x i64> @test7(<2 x i8*>* %arg) nounwind {
+  %cast = bitcast <2 x i8*>* %arg to <2 x i64>*
+  %load = load <2 x i64>* %cast, align 16
+  ret <2 x i64> %load
+; CHECK: @test7
+; CHECK: bitcast
+; CHECK: load
+}
diff --git a/test/Transforms/InstCombine/call.ll b/test/Transforms/InstCombine/call.ll
index 55833fb..e68c0ad 100644
--- a/test/Transforms/InstCombine/call.ll
+++ b/test/Transforms/InstCombine/call.ll
@@ -1,7 +1,7 @@
 ; Ignore stderr, we expect warnings there
 ; RUN: opt < %s -instcombine 2> /dev/null -S | FileCheck %s
 
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+target datalayout = "E-p:64:64:64-p1:16:16:16-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 ; Simple case, argument translatable without changing the value
 declare void @test1a(i8*)
@@ -15,6 +15,28 @@ define void @test1(i32* %A) {
   ret void
 }
 
+
+; Should not do because of change in address space of the parameter
+define void @test1_as1_illegal(i32 addrspace(1)* %A) {
+; CHECK-LABEL: @test1_as1_illegal(
+; CHECK: call void bitcast
+  call void bitcast (void (i8*)* @test1a to void (i32 addrspace(1)*)*)(i32 addrspace(1)* %A)
+  ret void
+}
+
+; Test1, but the argument has a different sized address-space
+declare void @test1a_as1(i8 addrspace(1)*)
+
+; This one is OK to perform
+define void @test1_as1(i32 addrspace(1)* %A) {
+; CHECK-LABEL: @test1_as1(
+; CHECK: %1 = bitcast i32 addrspace(1)* %A to i8 addrspace(1)*
+; CHECK: call void @test1a_as1(i8 addrspace(1)* %1)
+; CHECK: ret void
+  call void bitcast (void (i8 addrspace(1)*)* @test1a_as1 to void (i32 addrspace(1)*)*)(i32 addrspace(1)* %A )
+  ret void
+}
+
 ; More complex case, translate argument because of resolution.  This is safe
 ; because we have the body of the function
 define void @test2a(i8 %A) {
@@ -135,3 +157,122 @@ entry:
 ; CHECK: call i8* bitcast
 }
 
+
+; Parameter that's a vector of pointers
+declare void @test10a(<2 x i8*>)
+
+define void @test10(<2 x i32*> %A) {
+; CHECK-LABEL: @test10(
+; CHECK: %1 = bitcast <2 x i32*> %A to <2 x i8*>
+; CHECK: call void @test10a(<2 x i8*> %1)
+; CHECK: ret void
+  call void bitcast (void (<2 x i8*>)* @test10a to void (<2 x i32*>)*)(<2 x i32*> %A)
+  ret void
+}
+
+; Don't transform because different address spaces
+declare void @test10a_mixed_as(<2 x i8 addrspace(1)*>)
+
+define void @test10_mixed_as(<2 x i8*> %A) {
+; CHECK-LABEL: @test10_mixed_as(
+; CHECK: call void bitcast
+  call void bitcast (void (<2 x i8 addrspace(1)*>)* @test10a_mixed_as to void (<2 x i8*>)*)(<2 x i8*> %A)
+  ret void
+}
+
+; Return type that's a pointer
+define i8* @test11a() {
+  ret i8* zeroinitializer
+}
+
+define i32* @test11() {
+; CHECK-LABEL: @test11(
+; CHECK: %X = call i8* @test11a()
+; CHECK: %1 = bitcast i8* %X to i32*
+  %X = call i32* bitcast (i8* ()* @test11a to i32* ()*)()
+  ret i32* %X
+}
+
+; Return type that's a pointer with a different address space
+define i8 addrspace(1)* @test11a_mixed_as() {
+  ret i8 addrspace(1)* zeroinitializer
+}
+
+define i8* @test11_mixed_as() {
+; CHECK-LABEL: @test11_mixed_as(
+; CHECK: call i8* bitcast
+  %X = call i8* bitcast (i8 addrspace(1)* ()* @test11a_mixed_as to i8* ()*)()
+  ret i8* %X
+}
+
+; Return type that's a vector of pointers
+define <2 x i8*> @test12a() {
+  ret <2 x i8*> zeroinitializer
+}
+
+define <2 x i32*> @test12() {
+; CHECK-LABEL: @test12(
+; CHECK: %X = call <2 x i8*> @test12a()
+; CHECK: %1 = bitcast <2 x i8*> %X to <2 x i32*>
+  %X = call <2 x i32*> bitcast (<2 x i8*> ()* @test12a to <2 x i32*> ()*)()
+  ret <2 x i32*> %X
+}
+
+define <2 x i8 addrspace(1)*> @test12a_mixed_as() {
+  ret <2 x i8 addrspace(1)*> zeroinitializer
+}
+
+define <2 x i8*> @test12_mixed_as() {
+; CHECK-LABEL: @test12_mixed_as(
+; CHECK: call <2 x i8*> bitcast
+  %X = call <2 x i8*> bitcast (<2 x i8 addrspace(1)*> ()* @test12a_mixed_as to <2 x i8*> ()*)()
+  ret <2 x i8*> %X
+}
+
+
+; Mix parameter that's a vector of integers and pointers of the same size
+declare void @test13a(<2 x i64>)
+
+define void @test13(<2 x i32*> %A) {
+; CHECK-LABEL: @test13(
+; CHECK: call void bitcast
+  call void bitcast (void (<2 x i64>)* @test13a to void (<2 x i32*>)*)(<2 x i32*> %A)
+  ret void
+}
+
+; Mix parameter that's a vector of integers and pointers of the same
+; size, but the other way around
+declare void @test14a(<2 x i8*>)
+
+define void @test14(<2 x i64> %A) {
+; CHECK-LABEL: @test14(
+; CHECK: call void bitcast
+  call void bitcast (void (<2 x i8*>)* @test14a to void (<2 x i64>)*)(<2 x i64> %A)
+  ret void
+}
+
+
+; Return type that's a vector
+define <2 x i16> @test15a() {
+  ret <2 x i16> zeroinitializer
+}
+
+define i32 @test15() {
+; CHECK-LABEL: @test15(
+; CHECK: %X = call <2 x i16> @test15a()
+; CHECK: %1 = bitcast <2 x i16> %X to i32
+  %X = call i32 bitcast (<2 x i16> ()* @test15a to i32 ()*)( )
+  ret i32 %X
+}
+
+define i32 @test16a() {
+  ret i32 0
+}
+
+define <2 x i16> @test16() {
+; CHECK-LABEL: @test16(
+; CHECK: %X = call i32 @test16a()
+; CHECK: %1 = bitcast i32 %X to <2 x i16>
+  %X = call <2 x i16> bitcast (i32 ()* @test16a to <2 x i16> ()*)( )
+  ret <2 x i16> %X
+}
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index 52ea7b9..cac0ec1 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -1,6 +1,6 @@
 ; Tests to make sure elimination of casts is working correctly
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n8:16:32:64"
+target datalayout = "E-p:64:64:64-p1:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n8:16:32:64"
 
 @inbuf = external global [32832 x i8]           ; <[32832 x i8]*> [#uses=1]
 
@@ -708,6 +708,19 @@ define %s @test68(%s *%p, i64 %i) {
 ; CHECK-NEXT: ret %s
 }
 
+define %s @test68_as1(%s addrspace(1)* %p, i32 %i) {
+; CHECK-LABEL: @test68_as1(
+  %o = mul i32 %i, 12
+  %q = bitcast %s addrspace(1)* %p to i8 addrspace(1)*
+  %pp = getelementptr inbounds i8 addrspace(1)* %q, i32 %o
+; CHECK-NEXT: getelementptr %s addrspace(1)*
+  %r = bitcast i8 addrspace(1)* %pp to %s addrspace(1)*
+  %l = load %s addrspace(1)* %r
+; CHECK-NEXT: load %s addrspace(1)*
+  ret %s %l
+; CHECK-NEXT: ret %s
+}
+
 define double @test69(double *%p, i64 %i) {
 ; CHECK-LABEL: @test69(
   %o = shl nsw i64 %i, 3
@@ -890,6 +903,20 @@ define double @test80([100 x double]* %p, i32 %i) {
 ; CHECK-NEXT: ret double
 }
 
+define double @test80_as1([100 x double] addrspace(1)* %p, i16 %i) {
+; CHECK-LABEL: @test80_as1(
+  %tmp = mul nsw i16 %i, 8
+; CHECK-NEXT: sext i16 %i to i32
+  %q = bitcast [100 x double] addrspace(1)* %p to i8 addrspace(1)*
+  %pp = getelementptr i8 addrspace(1)* %q, i16 %tmp
+; CHECK-NEXT: getelementptr [100 x double] addrspace(1)*
+  %r = bitcast i8 addrspace(1)* %pp to double addrspace(1)*
+  %l = load double addrspace(1)* %r
+; CHECK-NEXT: load double addrspace(1)*
+  ret double %l
+; CHECK-NEXT: ret double
+}
+
 define double @test81(double *%p, float %f) {
   %i = fptosi float %f to i64
   %q = bitcast double* %p to i8*
diff --git a/test/Transforms/InstCombine/cast_ptr.ll b/test/Transforms/InstCombine/cast_ptr.ll
index 7910ea3..23006a8 100644
--- a/test/Transforms/InstCombine/cast_ptr.ll
+++ b/test/Transforms/InstCombine/cast_ptr.ll
@@ -1,7 +1,7 @@
 ; Tests to make sure elimination of casts is working correctly
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-target datalayout = "p:32:32"
+target datalayout = "p:32:32-p1:32:32-p2:16:16"
 
 ; This shouldn't convert to getelementptr because the relationship
 ; between the arithmetic and the layout of allocated memory is
@@ -27,6 +27,26 @@ define i1 @test2(i8* %a, i8* %b) {
         ret i1 %r
 }
 
+; These casts should be folded away.
+; CHECK-LABEL: @test2_as2_same_int(
+; CHECK: icmp eq i8 addrspace(2)* %a, %b
+define i1 @test2_as2_same_int(i8 addrspace(2)* %a, i8 addrspace(2)* %b) {
+  %tmpa = ptrtoint i8 addrspace(2)* %a to i16
+  %tmpb = ptrtoint i8 addrspace(2)* %b to i16
+  %r = icmp eq i16 %tmpa, %tmpb
+  ret i1 %r
+}
+
+; These casts should be folded away.
+; CHECK-LABEL: @test2_as2_larger(
+; CHECK: icmp eq i8 addrspace(2)* %a, %b
+define i1 @test2_as2_larger(i8 addrspace(2)* %a, i8 addrspace(2)* %b) {
+  %tmpa = ptrtoint i8 addrspace(2)* %a to i32
+  %tmpb = ptrtoint i8 addrspace(2)* %b to i32
+  %r = icmp eq i32 %tmpa, %tmpb
+  ret i1 %r
+}
+
 ; These casts should also be folded away.
 ; CHECK-LABEL: @test3(
 ; CHECK: icmp eq i8* %a, @global
@@ -43,11 +63,20 @@ define i1 @test4(i32 %A) {
   ret i1 %C
 ; CHECK-LABEL: @test4(
 ; CHECK-NEXT: %C = icmp eq i32 %A, 0
-; CHECK-NEXT: ret i1 %C 
+; CHECK-NEXT: ret i1 %C
 }
 
+define i1 @test4_as2(i16 %A) {
+; CHECK-LABEL: @test4_as2(
+; CHECK-NEXT: %C = icmp eq i16 %A, 0
+; CHECK-NEXT: ret i1 %C
+  %B = inttoptr i16 %A to i8 addrspace(2)*
+  %C = icmp eq i8 addrspace(2)* %B, null
+  ret i1 %C
+}
 
-; Pulling the cast out of the load allows us to eliminate the load, and then 
+
+; Pulling the cast out of the load allows us to eliminate the load, and then
 ; the whole array.
 
         %op = type { float }
@@ -69,11 +98,11 @@ define %unop* @test5(%op* %O) {
 ; InstCombine can not 'load (cast P)' -> cast (load P)' if the cast changes
 ; the address space.
 
-define i8 @test6(i8 addrspace(1)* %source) {                                                                                        
-entry: 
-  %arrayidx223 = bitcast i8 addrspace(1)* %source to i8*
+define i8 @test6(i8 addrspace(1)* %source) {
+entry:
+  %arrayidx223 = addrspacecast i8 addrspace(1)* %source to i8*
   %tmp4 = load i8* %arrayidx223
   ret i8 %tmp4
 ; CHECK-LABEL: @test6(
 ; CHECK: load i8* %arrayidx223
-} 
+}
diff --git a/test/Transforms/InstCombine/compare-signs.ll b/test/Transforms/InstCombine/compare-signs.ll
index cdf95ab..62cd5b3 100644
--- a/test/Transforms/InstCombine/compare-signs.ll
+++ b/test/Transforms/InstCombine/compare-signs.ll
@@ -24,9 +24,9 @@
 define i32 @test3(i32 %a, i32 %b) nounwind readnone {
 ; CHECK-LABEL: @test3(
 entry:
-; CHECK: xor i32 %a, %b
-; CHECK: lshr i32 %0, 31
-; CHECK: xor i32 %1, 1
+; CHECK: [[XOR1:%.*]] = xor i32 %a, %b
+; CHECK: [[SHIFT:%.*]] = lshr i32 [[XOR1]], 31
+; CHECK: [[XOR2:%.*]] = xor i32 [[SHIFT]], 1
         %0 = lshr i32 %a, 31            ; <i32> [#uses=1]
         %1 = lshr i32 %b, 31            ; <i32> [#uses=1]
         %2 = icmp eq i32 %0, %1         ; <i1> [#uses=1]
@@ -34,7 +34,7 @@ entry:
         ret i32 %3
 ; CHECK-NOT: icmp
 ; CHECK-NOT: zext
-; CHECK: ret i32 %2
+; CHECK: ret i32 [[XOR2]]
 }
 
 ; Variation on @test3: checking the 2nd bit in a situation where the 5th bit
diff --git a/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll b/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
new file mode 100644
index 0000000..9f21d54
--- /dev/null
+++ b/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
@@ -0,0 +1,232 @@
+; RUN: opt -S -instcombine %s -o - | FileCheck %s
+target datalayout = "e-p:32:32:32-p1:64:64:64-p2:8:8:8-p3:16:16:16-p4:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
+
+@g = addrspace(3) global i32 89
+
+@const_zero_i8_as1 = addrspace(1) constant i8 0
+@const_zero_i32_as1 = addrspace(1) constant i32 0
+
+@const_zero_i8_as2 = addrspace(2) constant i8 0
+@const_zero_i32_as2 = addrspace(2) constant i32 0
+
+@const_zero_i8_as3 = addrspace(3) constant i8 0
+@const_zero_i32_as3 = addrspace(3) constant i32 0
+
+; Test constant folding of inttoptr (ptrtoint constantexpr)
+; The intermediate integer size is the same as the pointer size
+define i32 addrspace(3)* @test_constant_fold_inttoptr_as_pointer_same_size() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_same_size(
+; CHECK-NEXT: ret i32 addrspace(3)* @const_zero_i32_as3
+  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i32
+  %y = inttoptr i32 %x to i32 addrspace(3)*
+  ret i32 addrspace(3)* %y
+}
+
+; The intermediate integer size is larger than the pointer size
+define i32 addrspace(2)* @test_constant_fold_inttoptr_as_pointer_smaller() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_smaller(
+; CHECK-NEXT: ret i32 addrspace(2)* @const_zero_i32_as2
+  %x = ptrtoint i32 addrspace(2)* @const_zero_i32_as2 to i16
+  %y = inttoptr i16 %x to i32 addrspace(2)*
+  ret i32 addrspace(2)* %y
+}
+
+; Different address spaces that are the same size, but they are
+; different so nothing should happen
+define i32 addrspace(4)* @test_constant_fold_inttoptr_as_pointer_smaller_different_as() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_smaller_different_as(
+; CHECK-NEXT: ret i32 addrspace(4)* inttoptr (i16 ptrtoint (i32 addrspace(3)* @const_zero_i32_as3 to i16) to i32 addrspace(4)*)
+  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i16
+  %y = inttoptr i16 %x to i32 addrspace(4)*
+  ret i32 addrspace(4)* %y
+}
+
+; Make sure we don't introduce a bitcast between different sized
+; address spaces when folding this
+define i32 addrspace(2)* @test_constant_fold_inttoptr_as_pointer_smaller_different_size_as() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_smaller_different_size_as(
+; CHECK-NEXT: ret i32 addrspace(2)* inttoptr (i32 ptrtoint (i32 addrspace(3)* @const_zero_i32_as3 to i32) to i32 addrspace(2)*)
+  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i32
+  %y = inttoptr i32 %x to i32 addrspace(2)*
+  ret i32 addrspace(2)* %y
+}
+
+; The intermediate integer size is too small, nothing should happen
+define i32 addrspace(3)* @test_constant_fold_inttoptr_as_pointer_larger() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_larger(
+; CHECK-NEXT: ret i32 addrspace(3)* inttoptr (i8 ptrtoint (i32 addrspace(3)* @const_zero_i32_as3 to i8) to i32 addrspace(3)*)
+  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i8
+  %y = inttoptr i8 %x to i32 addrspace(3)*
+  ret i32 addrspace(3)* %y
+}
+
+define i8 @const_fold_ptrtoint() {
+; CHECK-LABEL: @const_fold_ptrtoint(
+; CHECK-NEXT: ret i8 4
+  ret i8 ptrtoint (i32 addrspace(2)* inttoptr (i4 4 to i32 addrspace(2)*) to i8)
+}
+
+; Test that mask happens when the destination pointer is smaller than
+; the original
+define i8 @const_fold_ptrtoint_mask() {
+; CHECK-LABEL: @const_fold_ptrtoint_mask(
+; CHECK-NEXT: ret i8 1
+  ret i8 ptrtoint (i32 addrspace(3)* inttoptr (i32 257 to i32 addrspace(3)*) to i8)
+}
+
+; Address space 0 is too small for the correct mask, should mask with
+; 64-bits instead of 32
+define i64 @const_fold_ptrtoint_mask_small_as0() {
+; CHECK-LABEL: @const_fold_ptrtoint_mask_small_as0(
+; CHECK: ret i64 -1
+  ret i64 ptrtoint (i32 addrspace(1)* inttoptr (i128 -1 to i32 addrspace(1)*) to i64)
+}
+
+define i32 addrspace(3)* @const_inttoptr() {
+; CHECK-LABEL: @const_inttoptr(
+; CHECK-NEXT: ret i32 addrspace(3)* inttoptr (i16 4 to i32 addrspace(3)*)
+  %p = inttoptr i16 4 to i32 addrspace(3)*
+  ret i32 addrspace(3)* %p
+}
+
+define i16 @const_ptrtoint() {
+; CHECK-LABEL: @const_ptrtoint(
+; CHECK-NEXT: ret i16 ptrtoint (i32 addrspace(3)* @g to i16)
+  %i = ptrtoint i32 addrspace(3)* @g to i16
+  ret i16 %i
+}
+
+define i16 @const_inttoptr_ptrtoint() {
+; CHECK-LABEL: @const_inttoptr_ptrtoint(
+; CHECK-NEXT: ret i16 9
+  ret i16 ptrtoint (i32 addrspace(3)* inttoptr (i16 9 to i32 addrspace(3)*) to i16)
+}
+
+define i1 @constant_fold_cmp_constantexpr_inttoptr() {
+; CHECK-LABEL: @constant_fold_cmp_constantexpr_inttoptr(
+; CHECK-NEXT: ret i1 true
+  %x = icmp eq i32 addrspace(3)* inttoptr (i16 0 to i32 addrspace(3)*), null
+  ret i1 %x
+}
+
+define i1 @constant_fold_inttoptr_null(i16 %i) {
+; CHECK-LABEL: @constant_fold_inttoptr_null(
+; CHECK-NEXT: ret i1 false
+  %x = icmp eq i32 addrspace(3)* inttoptr (i16 99 to i32 addrspace(3)*), inttoptr (i16 0 to i32 addrspace(3)*)
+  ret i1 %x
+}
+
+define i1 @constant_fold_ptrtoint_null() {
+; CHECK-LABEL: @constant_fold_ptrtoint_null(
+; CHECK-NEXT: ret i1 false
+  %x = icmp eq i16 ptrtoint (i32 addrspace(3)* @g to i16), ptrtoint (i32 addrspace(3)* null to i16)
+  ret i1 %x
+}
+
+define i1 @constant_fold_ptrtoint_null_2() {
+; CHECK-LABEL: @constant_fold_ptrtoint_null_2(
+; CHECK-NEXT: ret i1 false
+  %x = icmp eq i16 ptrtoint (i32 addrspace(3)* null to i16), ptrtoint (i32 addrspace(3)* @g to i16)
+  ret i1 %x
+}
+
+define i1 @constant_fold_ptrtoint() {
+; CHECK-LABEL: @constant_fold_ptrtoint(
+; CHECK-NEXT: ret i1 true
+  %x = icmp eq i16 ptrtoint (i32 addrspace(3)* @g to i16), ptrtoint (i32 addrspace(3)* @g to i16)
+  ret i1 %x
+}
+
+define i1 @constant_fold_inttoptr() {
+; CHECK-LABEL: @constant_fold_inttoptr(
+; CHECK-NEXT: ret i1 false
+  %x = icmp eq i32 addrspace(3)* inttoptr (i16 99 to i32 addrspace(3)*), inttoptr (i16 27 to i32 addrspace(3)*)
+  ret i1 %x
+}
+
+@g_float_as3 = addrspace(3) global float zeroinitializer
+@g_v4f_as3 = addrspace(3) global <4 x float> zeroinitializer
+
+define float @constant_fold_bitcast_ftoi_load() {
+; CHECK-LABEL: @constant_fold_bitcast_ftoi_load(
+; CHECK: load float addrspace(3)* bitcast (i32 addrspace(3)* @g to float addrspace(3)*), align 4
+  %a = load float addrspace(3)* bitcast (i32 addrspace(3)* @g to float addrspace(3)*), align 4
+  ret float %a
+}
+
+define i32 @constant_fold_bitcast_itof_load() {
+; CHECK-LABEL: @constant_fold_bitcast_itof_load(
+; CHECK: load i32 addrspace(3)* bitcast (float addrspace(3)* @g_float_as3 to i32 addrspace(3)*), align 4
+  %a = load i32 addrspace(3)* bitcast (float addrspace(3)* @g_float_as3 to i32 addrspace(3)*), align 4
+  ret i32 %a
+}
+
+define <4 x i32> @constant_fold_bitcast_vector_as() {
+; CHECK-LABEL: @constant_fold_bitcast_vector_as(
+; CHECK: load <4 x float> addrspace(3)* @g_v4f_as3, align 16
+; CHECK: bitcast <4 x float> %1 to <4 x i32>
+  %a = load <4 x i32> addrspace(3)* bitcast (<4 x float> addrspace(3)* @g_v4f_as3 to <4 x i32> addrspace(3)*), align 4
+  ret <4 x i32> %a
+}
+
+@i32_array_as3 = addrspace(3) global [10 x i32] zeroinitializer
+
+define i32 @test_cast_gep_small_indices_as() {
+; CHECK-LABEL: @test_cast_gep_small_indices_as(
+; CHECK: load i32 addrspace(3)* getelementptr inbounds ([10 x i32] addrspace(3)* @i32_array_as3, i16 0, i16 0), align 16
+   %p = getelementptr [10 x i32] addrspace(3)* @i32_array_as3, i7 0, i7 0
+   %x = load i32 addrspace(3)* %p, align 4
+   ret i32 %x
+}
+
+%struct.foo = type { float, float, [4 x i32], i32 addrspace(3)* }
+
+@constant_fold_global_ptr = addrspace(3) global %struct.foo {
+  float 0.0,
+  float 0.0,
+  [4 x i32] zeroinitializer,
+  i32 addrspace(3)* getelementptr ([10 x i32] addrspace(3)* @i32_array_as3, i64 0, i64 0)
+}
+
+define i32 @test_cast_gep_large_indices_as() {
+; CHECK-LABEL: @test_cast_gep_large_indices_as(
+; CHECK: load i32 addrspace(3)* getelementptr inbounds ([10 x i32] addrspace(3)* @i32_array_as3, i16 0, i16 0), align 16
+   %p = getelementptr [10 x i32] addrspace(3)* @i32_array_as3, i64 0, i64 0
+   %x = load i32 addrspace(3)* %p, align 4
+   ret i32 %x
+}
+
+define i32 @test_constant_cast_gep_struct_indices_as() {
+; CHECK-LABEL: @test_constant_cast_gep_struct_indices_as(
+; CHECK: load i32 addrspace(3)* getelementptr inbounds (%struct.foo addrspace(3)* @constant_fold_global_ptr, i16 0, i32 2, i16 2), align 8
+  %x = getelementptr %struct.foo addrspace(3)* @constant_fold_global_ptr, i18 0, i32 2, i12 2
+  %y = load i32 addrspace(3)* %x, align 4
+  ret i32 %y
+}
+
+@constant_data_as3 = addrspace(3) constant [5 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5]
+
+define i32 @test_read_data_from_global_as3() {
+; CHECK-LABEL: @test_read_data_from_global_as3(
+; CHECK-NEXT: ret i32 2
+  %x = getelementptr [5 x i32] addrspace(3)* @constant_data_as3, i32 0, i32 1
+  %y = load i32 addrspace(3)* %x, align 4
+  ret i32 %y
+}
+
+@a = addrspace(1) constant i32 9
+@b = addrspace(1) constant i32 23
+@c = addrspace(1) constant i32 34
+@d = addrspace(1) constant i32 99
+
+@ptr_array = addrspace(2) constant [4 x i32 addrspace(1)*] [ i32 addrspace(1)* @a, i32 addrspace(1)* @b, i32 addrspace(1)* @c, i32 addrspace(1)* @d]
+@indirect = addrspace(0) constant i32 addrspace(1)* addrspace(2)* getelementptr inbounds ([4 x i32 addrspace(1)*] addrspace(2)* @ptr_array, i1 0, i32 2)
+
+define i32 @constant_through_array_as_ptrs() {
+; CHECK-LABEL: @constant_through_array_as_ptrs(
+; CHECK-NEXT: ret i32 34
+  %p = load i32 addrspace(1)* addrspace(2)* addrspace(0)* @indirect, align 4
+  %a = load i32 addrspace(1)* addrspace(2)* %p, align 4
+  %b = load i32 addrspace(1)* %a, align 4
+  ret i32 %b
+}
diff --git a/test/Transforms/InstCombine/constant-fold-gep.ll b/test/Transforms/InstCombine/constant-fold-gep.ll
index 9f82e66..5fb5602 100644
--- a/test/Transforms/InstCombine/constant-fold-gep.ll
+++ b/test/Transforms/InstCombine/constant-fold-gep.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target datalayout = "E-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 ; Constant folding should fix notionally out-of-bounds indices
 ; and add inbounds keywords.
@@ -72,3 +72,21 @@ entry:
   ret i64 %E
   ; CHECK: ret i64 1000
 }
+
+@X_as1 = addrspace(1) global [1000 x i8] zeroinitializer, align 16
+
+define i16 @test2_as1() {
+; CHECK-LABEL: @test2_as1(
+  ; CHECK: ret i16 1000
+
+entry:
+  %A = bitcast i8 addrspace(1)* getelementptr inbounds ([1000 x i8] addrspace(1)* @X_as1, i64 1, i64 0) to i8 addrspace(1)*
+  %B = bitcast i8 addrspace(1)* getelementptr inbounds ([1000 x i8] addrspace(1)* @X_as1, i64 0, i64 0) to i8 addrspace(1)*
+
+  %B2 = ptrtoint i8 addrspace(1)* %B to i16
+  %C = sub i16 0, %B2
+  %D = getelementptr i8 addrspace(1)* %A, i16 %C
+  %E = ptrtoint i8 addrspace(1)* %D to i16
+
+  ret i16 %E
+}
diff --git a/test/Transforms/InstCombine/debug-line.ll b/test/Transforms/InstCombine/debug-line.ll
index a76c353..2e3785f 100644
--- a/test/Transforms/InstCombine/debug-line.ll
+++ b/test/Transforms/InstCombine/debug-line.ll
@@ -12,15 +12,17 @@ define void @foo() nounwind ssp {
 declare i32 @printf(i8*, ...)
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!10}
 !llvm.dbg.sp = !{!0}
 
 !0 = metadata !{i32 589870, metadata !8, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 589865, metadata !8} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 589841, metadata !8, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !9, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !8, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 589845, metadata !8, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 5, i32 2, metadata !6, null}
 !6 = metadata !{i32 589835, metadata !8, metadata !0, i32 4, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
 !7 = metadata !{i32 6, i32 1, metadata !6, null}
 !8 = metadata !{metadata !"m.c", metadata !"/private/tmp"}
 !9 = metadata !{metadata !0}
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/InstCombine/debuginfo.ll b/test/Transforms/InstCombine/debuginfo.ll
index 2f080bf..75082dc 100644
--- a/test/Transforms/InstCombine/debuginfo.ll
+++ b/test/Transforms/InstCombine/debuginfo.ll
@@ -2,7 +2,7 @@
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) nounwind readnone
 
 declare i8* @foo(i8*, i32, i64, i64) nounwind
 
@@ -23,18 +23,19 @@ entry:
   %tmp1 = load i32* %__val.addr, align 4, !dbg !21
   %tmp2 = load i64* %__len.addr, align 8, !dbg !21
   %tmp3 = load i8** %__dest.addr, align 8, !dbg !21
-  %0 = call i64 @llvm.objectsize.i64(i8* %tmp3, i1 false), !dbg !21
+  %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %tmp3, i1 false), !dbg !21
   %call = call i8* @foo(i8* %tmp, i32 %tmp1, i64 %tmp2, i64 %0), !dbg !21
   ret i8* %call, !dbg !21
 }
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!30}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"__dest", metadata !2, i32 16777294, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !27, metadata !2, metadata !"foobar", metadata !"foobar", metadata !"", i32 79, metadata !4, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i8* (i8*, i32, i64)* @foobar, null, null, metadata !25, i32 79} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !27, metadata !2, metadata !"foobar", metadata !"foobar", metadata !"", i32 79, metadata !4, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8* (i8*, i32, i64)* @foobar, null, null, metadata !25, i32 79} ; [ DW_TAG_subprogram ] [line 79] [local] [def] [foobar]
 !2 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786449, metadata !28, i32 12, metadata !"clang version 3.0 (trunk 127710)", i1 true, metadata !"", i32 0, metadata !29, metadata !29, metadata !24, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !27, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 786453, metadata !27, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6}
 !6 = metadata !{i32 786447, null, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
 !7 = metadata !{i32 786689, metadata !1, metadata !"__val", metadata !2, i32 33554510, metadata !8, i32 0, null} ; [ DW_TAG_arg_variable ]
@@ -55,3 +56,4 @@ entry:
 !27 = metadata !{metadata !"string.h", metadata !"Game"}
 !28 = metadata !{metadata !"bits.c", metadata !"Game"}
 !29 = metadata !{i32 0}
+!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/InstCombine/double-float-shrink-1.ll b/test/Transforms/InstCombine/double-float-shrink-1.ll
index e5448ee..5cacb59 100644
--- a/test/Transforms/InstCombine/double-float-shrink-1.ll
+++ b/test/Transforms/InstCombine/double-float-shrink-1.ll
@@ -263,6 +263,7 @@ define double @sin_test2(float %f) nounwind readnone {
    ret double %call
 ; CHECK: call double @sin(double %conv)
 }
+
 define float @sqrt_test(float %f) nounwind readnone {
 ; CHECK: sqrt_test
    %conv = fpext float %f to double
@@ -272,6 +273,15 @@ define float @sqrt_test(float %f) nounwind readnone {
 ; CHECK: call float @sqrtf(float %f)
 }
 
+define float @sqrt_int_test(float %f) nounwind readnone {
+; CHECK: sqrt_int_test
+   %conv = fpext float %f to double
+   %call = call double @llvm.sqrt.f64(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
+; CHECK: call float @llvm.sqrt.f32(float %f)
+}
+
 define double @sqrt_test2(float %f) nounwind readnone {
 ; CHECK: sqrt_test2
    %conv = fpext float %f to double
@@ -331,3 +341,6 @@ declare double @acos(double) nounwind readnone
 declare double @acosh(double) nounwind readnone
 declare double @asin(double) nounwind readnone
 declare double @asinh(double) nounwind readnone
+
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
diff --git a/test/Transforms/InstCombine/enforce-known-alignment.ll b/test/Transforms/InstCombine/enforce-known-alignment.ll
index 6645d99..46bb605 100644
--- a/test/Transforms/InstCombine/enforce-known-alignment.ll
+++ b/test/Transforms/InstCombine/enforce-known-alignment.ll
@@ -1,8 +1,12 @@
-; RUN: opt < %s -instcombine -S | grep alloca | grep "align 16"
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+; RUN: opt  -instcombine -S %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
 
 define void @foo(i32) {
+; CHECK-LABEL: @foo(
+; CHECK: alloca
+; CHECK: align 16
 	%2 = alloca [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>], align 16		; <[3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>]*> [#uses=1]
 	%3 = getelementptr [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>]* %2, i32 0, i32 0		; <<{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>*> [#uses=1]
 	%4 = getelementptr <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>* %3, i32 0, i32 0		; <{ { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } }*> [#uses=1]
@@ -11,8 +15,24 @@ define void @foo(i32) {
 	%7 = getelementptr { [8 x i16] }* %6, i32 0, i32 0		; <[8 x i16]*> [#uses=1]
 	%8 = getelementptr [8 x i16]* %7, i32 0, i32 0		; <i16*> [#uses=1]
 	store i16 0, i16* %8, align 16
-        call void @bar(i16* %8)
+    call void @bar(i16* %8)
 	ret void
 }
 
 declare void @bar(i16*)
+
+define void @foo_as1(i32 %a, [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>] addrspace(1)* %b) {
+; CHECK-LABEL: @foo_as1(
+; CHECK: align 16
+  %1 = getelementptr [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>] addrspace(1)* %b, i32 0, i32 0        ; <<{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>*> [#uses=1]
+  %2 = getelementptr <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }> addrspace(1)* %1, i32 0, i32 0      ; <{ { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } }*> [#uses=1]
+  %3 = getelementptr { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } addrspace(1)* %2, i32 0, i32 0        ; <{ [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 }*> [#uses=1]
+  %4 = bitcast { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } addrspace(1)* %3 to { [8 x i16] } addrspace(1)*     ; <{ [8 x i16] }*> [#uses=1]
+  %5 = getelementptr { [8 x i16] } addrspace(1)* %4, i32 0, i32 0     ; <[8 x i16]*> [#uses=1]
+  %6 = getelementptr [8 x i16] addrspace(1)* %5, i32 0, i32 0     ; <i16*> [#uses=1]
+  store i16 0, i16 addrspace(1)* %6, align 16
+  call void @bar_as1(i16 addrspace(1)* %6)
+  ret void
+}
+
+declare void @bar_as1(i16 addrspace(1)*)
diff --git a/test/Transforms/InstCombine/err-rep-cold.ll b/test/Transforms/InstCombine/err-rep-cold.ll
new file mode 100644
index 0000000..0cbafc4
--- /dev/null
+++ b/test/Transforms/InstCombine/err-rep-cold.ll
@@ -0,0 +1,77 @@
+; Test the static branch probability heuristics for error-reporting functions.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+
+@stdout = external global %struct._IO_FILE*
+@stderr = external global %struct._IO_FILE*
+@.str = private unnamed_addr constant [13 x i8] c"an error: %d\00", align 1
+@.str1 = private unnamed_addr constant [9 x i8] c"an error\00", align 1
+
+define i32 @test1(i32 %a) #0 {
+; CHECK-LABEL: @test1
+entry:
+  %cmp = icmp sgt i32 %a, 8
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %0 = load %struct._IO_FILE** @stderr, align 8
+  %call = tail call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([13 x i8]* @.str, i64 0, i64 0), i32 %a) #1
+  br label %return
+
+; CHECK: %call = tail call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([13 x i8]* @.str, i64 0, i64 0), i32 %a) #[[AT1:[0-9]+]]
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ 1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+declare i32 @fprintf(%struct._IO_FILE* nocapture, i8* nocapture readonly, ...) #1
+
+define i32 @test2(i32 %a) #0 {
+; CHECK-LABEL: @test2
+entry:
+  %cmp = icmp sgt i32 %a, 8
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %0 = load %struct._IO_FILE** @stderr, align 8
+  %1 = tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0)
+  br label %return
+
+; CHECK: tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0) #[[AT2:[0-9]+]]
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ 1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) #1
+
+define i32 @test3(i32 %a) #0 {
+; CHECK-LABEL: @test3
+entry:
+  %cmp = icmp sgt i32 %a, 8
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %0 = load %struct._IO_FILE** @stdout, align 8
+  %1 = tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0)
+  br label %return
+
+; CHECK-NOT: tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0) #[[AT2]]
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ 1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
+; CHECK: attributes #[[AT1]] = { cold nounwind }
+; CHECK: attributes #[[AT2]] = { cold }
+
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll
index a9a7015..d8ba2a5 100644
--- a/test/Transforms/InstCombine/fast-math.ll
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -202,6 +202,18 @@ define float @fmul2(float %f1) {
 ; CHECK: fdiv fast float 1.200000e+07, %f1
 }
 
+; X/C1 * C2 => X * (C2/C1) is disabled if X/C1 has multiple uses
+@fmul2_external = external global float
+define float @fmul2_disable(float %f1) {
+  %div = fdiv fast float 1.000000e+00, %f1 
+  store float %div, float* @fmul2_external
+  %mul = fmul fast float %div, 2.000000e+00
+  ret float %mul
+; CHECK-LABEL: @fmul2_disable
+; CHECK: store
+; CHECK: fmul fast
+}
+
 ; X/C1 * C2 => X * (C2/C1) (if C2/C1 is normal Fp)
 define float @fmul3(float %f1, float %f2) {
   %t1 = fdiv float %f1, 2.0e+3
diff --git a/test/Transforms/InstCombine/ffs-1.ll b/test/Transforms/InstCombine/ffs-1.ll
index 8f0b38f..1dec11d 100644
--- a/test/Transforms/InstCombine/ffs-1.ll
+++ b/test/Transforms/InstCombine/ffs-1.ll
@@ -1,7 +1,7 @@
 ; Test that the ffs* library call simplifier works correctly.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple i386-pc-linux -instcombine -S | FileCheck %s -check-prefix=LINUX
+; RUN: opt < %s -mtriple i386-pc-linux -instcombine -S | FileCheck %s -check-prefix=CHECK-LINUX
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
diff --git a/test/Transforms/InstCombine/fmul.ll b/test/Transforms/InstCombine/fmul.ll
index cf57bed..402ee52 100644
--- a/test/Transforms/InstCombine/fmul.ll
+++ b/test/Transforms/InstCombine/fmul.ll
@@ -70,3 +70,26 @@ define float @test7(float %x, float %y) {
 ; CHECK-LABEL: @test7(
 ; CHECK: fsub float -0.000000e+00, %x
 }
+
+; Don't crash when attempting to cast a constant FMul to an instruction.
+define void @test8(i32* %inout) {
+entry:
+  %0 = load i32* %inout, align 4
+  %conv = uitofp i32 %0 to float
+  %vecinit = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, float %conv, i32 3
+  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %vecinit
+  %1 = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %mul = fmul <4 x float> zeroinitializer, %1
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %local_var_7.0 = phi <4 x float> [ %mul, %entry ], [ %2, %for.body ]
+  br i1 undef, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %2 = insertelement <4 x float> %local_var_7.0, float 0.000000e+00, i32 2
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
diff --git a/test/Transforms/InstCombine/fold-vector-select.ll b/test/Transforms/InstCombine/fold-vector-select.ll
index 2cb970b..b58d9dc 100644
--- a/test/Transforms/InstCombine/fold-vector-select.ll
+++ b/test/Transforms/InstCombine/fold-vector-select.ll
@@ -1,4 +1,6 @@
-; RUN: opt < %s -instcombine -S | not grep select
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK-NOT: select
 
 define void @foo(<4 x i32> *%A, <4 x i32> *%B, <4 x i32> *%C, <4 x i32> *%D,
                  <4 x i32> *%E, <4 x i32> *%F, <4 x i32> *%G, <4 x i32> *%H,
diff --git a/test/Transforms/InstCombine/fpcast.ll b/test/Transforms/InstCombine/fpcast.ll
index 09f0532..05d1b48 100644
--- a/test/Transforms/InstCombine/fpcast.ll
+++ b/test/Transforms/InstCombine/fpcast.ll
@@ -31,4 +31,16 @@ define half @test4(float %a) {
   ret half %c
 }
 
+; CHECK: test5
+define half @test5(float %a, float %b, float %c) {
+; CHECK: fcmp ogt
+; CHECK: fptrunc
+; CHECK: select
+; CHECK: half 0xH3C00
+  %d = fcmp ogt float %a, %b
+  %e = select i1 %d, float %c, float 1.0
+  %f = fptrunc float %e to half
+  ret half %f
+}
+
 declare float @llvm.fabs.f32(float) nounwind readonly
diff --git a/test/Transforms/InstCombine/fprintf-1.ll b/test/Transforms/InstCombine/fprintf-1.ll
index 1b7c104..3f6a314 100644
--- a/test/Transforms/InstCombine/fprintf-1.ll
+++ b/test/Transforms/InstCombine/fprintf-1.ll
@@ -1,7 +1,7 @@
 ; Test that the fprintf library call simplifier works correctly.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=IPRINTF
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=CHECK-IPRINTF
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
diff --git a/test/Transforms/InstCombine/gep-addrspace.ll b/test/Transforms/InstCombine/gep-addrspace.ll
index dfe12db..24c355d 100644
--- a/test/Transforms/InstCombine/gep-addrspace.ll
+++ b/test/Transforms/InstCombine/gep-addrspace.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-pc-win32"
 define void @func(%myStruct addrspace(1)* nocapture %p) nounwind {
 ST:
   %A = getelementptr inbounds %myStruct addrspace(1)* %p, i64 0
-  %B = bitcast %myStruct addrspace(1)* %A to %myStruct*
+  %B = addrspacecast %myStruct addrspace(1)* %A to %myStruct*
   %C = getelementptr inbounds %myStruct* %B, i32 0, i32 1
   %D = getelementptr inbounds [3 x float]* %C, i32 0, i32 2
   %E = load float* %D, align 4
diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll
index 90f144a..c29a7dc 100644
--- a/test/Transforms/InstCombine/getelementptr.ll
+++ b/test/Transforms/InstCombine/getelementptr.ll
@@ -1,6 +1,7 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-target datalayout = "e-p:64:64"
+target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32"
+
 %intstruct = type { i32 }
 %pair = type { i32, i32 }
 %struct.B = type { double }
@@ -8,15 +9,23 @@ target datalayout = "e-p:64:64"
 
 
 @Global = constant [10 x i8] c"helloworld"
+@Global_as1 = addrspace(1) constant [10 x i8] c"helloworld"
 
 ; Test noop elimination
 define i32* @test1(i32* %I) {
-        %A = getelementptr i32* %I, i64 0 
+        %A = getelementptr i32* %I, i64 0
         ret i32* %A
 ; CHECK-LABEL: @test1(
 ; CHECK: ret i32* %I
 }
 
+define i32 addrspace(1)* @test1_as1(i32 addrspace(1)* %I) {
+  %A = getelementptr i32 addrspace(1)* %I, i64 0
+  ret i32 addrspace(1)* %A
+; CHECK-LABEL: @test1_as1(
+; CHECK: ret i32 addrspace(1)* %I
+}
+
 ; Test noop elimination
 define i32* @test2(i32* %I) {
         %A = getelementptr i32* %I
@@ -36,7 +45,7 @@ define i32* @test3(i32* %I) {
 
 ; Test that two getelementptr insts fold
 define i32* @test4({ i32 }* %I) {
-        %A = getelementptr { i32 }* %I, i64 1 
+        %A = getelementptr { i32 }* %I, i64 1
         %B = getelementptr { i32 }* %A, i64 0, i32 0
         ret i32* %B
 ; CHECK-LABEL: @test4(
@@ -45,17 +54,53 @@ define i32* @test4({ i32 }* %I) {
 
 define void @test5(i8 %B) {
         ; This should be turned into a constexpr instead of being an instruction
-        %A = getelementptr [10 x i8]* @Global, i64 0, i64 4 
+        %A = getelementptr [10 x i8]* @Global, i64 0, i64 4
         store i8 %B, i8* %A
         ret void
 ; CHECK-LABEL: @test5(
 ; CHECK: store i8 %B, i8* getelementptr inbounds ([10 x i8]* @Global, i64 0, i64 4)
 }
 
+define void @test5_as1(i8 %B) {
+        ; This should be turned into a constexpr instead of being an instruction
+        %A = getelementptr [10 x i8] addrspace(1)* @Global_as1, i16 0, i16 4
+        store i8 %B, i8 addrspace(1)* %A
+        ret void
+; CHECK-LABEL: @test5_as1(
+; CHECK: store i8 %B, i8 addrspace(1)* getelementptr inbounds ([10 x i8] addrspace(1)* @Global_as1, i16 0, i16 4)
+}
+
+%as1_ptr_struct = type { i32 addrspace(1)* }
+%as2_ptr_struct = type { i32 addrspace(2)* }
+
+@global_as2 = addrspace(2) global i32 zeroinitializer
+@global_as1_as2_ptr = addrspace(1) global %as2_ptr_struct { i32 addrspace(2)* @global_as2 }
+
+; This should be turned into a constexpr instead of being an instruction
+define void @test_evaluate_gep_nested_as_ptrs(i32 addrspace(2)* %B) {
+; CHECK-LABEL: @test_evaluate_gep_nested_as_ptrs(
+; CHECK-NEXT: store i32 addrspace(2)* %B, i32 addrspace(2)* addrspace(1)* getelementptr inbounds (%as2_ptr_struct addrspace(1)* @global_as1_as2_ptr, i16 0, i32 0), align 8
+; CHECK-NEXT: ret void
+  %A = getelementptr %as2_ptr_struct addrspace(1)* @global_as1_as2_ptr, i16 0, i32 0
+  store i32 addrspace(2)* %B, i32 addrspace(2)* addrspace(1)* %A
+  ret void
+}
+
+@arst = addrspace(1) global [4 x i8 addrspace(2)*] zeroinitializer
+
+define void @test_evaluate_gep_as_ptrs_array(i8 addrspace(2)* %B) {
+; CHECK-LABEL: @test_evaluate_gep_as_ptrs_array(
+; CHECK-NEXT: store i8 addrspace(2)* %B, i8 addrspace(2)* addrspace(1)* getelementptr inbounds ([4 x i8 addrspace(2)*] addrspace(1)* @arst, i16 0, i16 2), align 4
+
+; CHECK-NEXT: ret void
+  %A = getelementptr [4 x i8 addrspace(2)*] addrspace(1)* @arst, i16 0, i16 2
+  store i8 addrspace(2)* %B, i8 addrspace(2)* addrspace(1)* %A
+  ret void
+}
 
 define i32* @test7(i32* %I, i64 %C, i64 %D) {
-        %A = getelementptr i32* %I, i64 %C 
-        %B = getelementptr i32* %A, i64 %D 
+        %A = getelementptr i32* %I, i64 %C
+        %B = getelementptr i32* %A, i64 %D
         ret i32* %B
 ; CHECK-LABEL: @test7(
 ; CHECK: %A.sum = add i64 %C, %D
@@ -64,8 +109,8 @@ define i32* @test7(i32* %I, i64 %C, i64 %D) {
 
 define i8* @test8([10 x i32]* %X) {
         ;; Fold into the cast.
-        %A = getelementptr [10 x i32]* %X, i64 0, i64 0 
-        %B = bitcast i32* %A to i8*     
+        %A = getelementptr [10 x i32]* %X, i64 0, i64 0
+        %B = bitcast i32* %A to i8*
         ret i8* %B
 ; CHECK-LABEL: @test8(
 ; CHECK: bitcast [10 x i32]* %X to i8*
@@ -73,7 +118,7 @@ define i8* @test8([10 x i32]* %X) {
 
 define i32 @test9() {
         %A = getelementptr { i32, double }* null, i32 0, i32 1
-        %B = ptrtoint double* %A to i32        
+        %B = ptrtoint double* %A to i32
         ret i32 %B
 ; CHECK-LABEL: @test9(
 ; CHECK: ret i32 8
@@ -83,15 +128,15 @@ define i1 @test10({ i32, i32 }* %x, { i32, i32 }* %y) {
         %tmp.1 = getelementptr { i32, i32 }* %x, i32 0, i32 1
         %tmp.3 = getelementptr { i32, i32 }* %y, i32 0, i32 1
         ;; seteq x, y
-        %tmp.4 = icmp eq i32* %tmp.1, %tmp.3       
+        %tmp.4 = icmp eq i32* %tmp.1, %tmp.3
         ret i1 %tmp.4
 ; CHECK-LABEL: @test10(
 ; CHECK: icmp eq { i32, i32 }* %x, %y
 }
 
 define i1 @test11({ i32, i32 }* %X) {
-        %P = getelementptr { i32, i32 }* %X, i32 0, i32 0 
-        %Q = icmp eq i32* %P, null             
+        %P = getelementptr { i32, i32 }* %X, i32 0, i32 0
+        %Q = icmp eq i32* %P, null
         ret i1 %Q
 ; CHECK-LABEL: @test11(
 ; CHECK: icmp eq { i32, i32 }* %X, null
@@ -105,11 +150,11 @@ entry:
   store i32 10, i32* %g3, align 4
 
   %g4 = getelementptr %struct.A* %a, i32 0, i32 0
-  
+
   %new_a = bitcast %struct.B* %g4 to %struct.A*
 
-  %g5 = getelementptr %struct.A* %new_a, i32 0, i32 1	
-  %a_a = load i32* %g5, align 4	
+  %g5 = getelementptr %struct.A* %new_a, i32 0, i32 1
+  %a_a = load i32* %g5, align 4
   ret i32 %a_a
 ; CHECK-LABEL:      @test12(
 ; CHECK:      getelementptr %struct.A* %a, i64 0, i32 1
@@ -129,8 +174,68 @@ define i1 @test13(i64 %X, %S* %P) {
 ; CHECK:    %C = icmp eq i64 %X, -1
 }
 
-
-@G = external global [3 x i8]      
+define <2 x i1> @test13_vector(<2 x i64> %X, <2 x %S*> %P) nounwind {
+; CHECK-LABEL: @test13_vector(
+; CHECK-NEXT: shl nuw <2 x i64> %X, <i64 2, i64 2>
+; CHECK-NEXT: add <2 x i64> %A.idx, <i64 4, i64 4>
+; CHECK-NEXT: icmp eq <2 x i64> %A.offs, zeroinitializer
+  %A = getelementptr inbounds <2 x %S*> %P, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 1>, <2 x i64> %X
+  %B = getelementptr inbounds <2 x %S*> %P, <2 x i64> <i64 0, i64 0>, <2 x i32> <i32 0, i32 0>
+  %C = icmp eq <2 x i32*> %A, %B
+  ret <2 x i1> %C
+}
+
+define i1 @test13_as1(i16 %X, %S addrspace(1)* %P) {
+; CHECK-LABEL: @test13_as1(
+; CHECK-NEXT:  %C = icmp eq i16 %X, -1
+; CHECK-NEXT: ret i1 %C
+  %A = getelementptr inbounds %S addrspace(1)* %P, i16 0, i32 1, i16 %X
+  %B = getelementptr inbounds %S addrspace(1)* %P, i16 0, i32 0
+  %C = icmp eq i32 addrspace(1)* %A, %B
+  ret i1 %C
+}
+
+define <2 x i1> @test13_vector_as1(<2 x i16> %X, <2 x %S addrspace(1)*> %P) {
+; CHECK-LABEL: @test13_vector_as1(
+; CHECK-NEXT: shl nuw <2 x i16> %X, <i16 2, i16 2>
+; CHECK-NEXT: add <2 x i16> %A.idx, <i16 4, i16 4>
+; CHECK-NEXT: icmp eq <2 x i16> %A.offs, zeroinitializer
+; CHECK-NEXT: ret <2 x i1>
+  %A = getelementptr inbounds <2 x %S addrspace(1)*> %P, <2 x i16> <i16 0, i16 0>, <2 x i32> <i32 1, i32 1>, <2 x i16> %X
+  %B = getelementptr inbounds <2 x %S addrspace(1)*> %P, <2 x i16> <i16 0, i16 0>, <2 x i32> <i32 0, i32 0>
+  %C = icmp eq <2 x i32 addrspace(1)*> %A, %B
+  ret <2 x i1> %C
+}
+
+define i1 @test13_i32(i32 %X, %S* %P) {
+; CHECK-LABEL: @test13_i32(
+; CHECK: %C = icmp eq i32 %X, -1
+  %A = getelementptr inbounds %S* %P, i32 0, i32 1, i32 %X
+  %B = getelementptr inbounds %S* %P, i32 0, i32 0
+  %C = icmp eq i32* %A, %B
+  ret i1 %C
+}
+
+define i1 @test13_i16(i16 %X, %S* %P) {
+; CHECK-LABEL: @test13_i16(
+; CHECK: %C = icmp eq i16 %X, -1
+  %A = getelementptr inbounds %S* %P, i16 0, i32 1, i16 %X
+  %B = getelementptr inbounds %S* %P, i16 0, i32 0
+  %C = icmp eq i32* %A, %B
+  ret i1 %C
+}
+
+define i1 @test13_i128(i128 %X, %S* %P) {
+; CHECK-LABEL: @test13_i128(
+; CHECK: %C = icmp eq i64 %1, -1
+  %A = getelementptr inbounds %S* %P, i128 0, i32 1, i128 %X
+  %B = getelementptr inbounds %S* %P, i128 0, i32 0
+  %C = icmp eq i32* %A, %B
+  ret i1 %C
+}
+
+
+@G = external global [3 x i8]
 define i8* @test14(i32 %Idx) {
         %idx = zext i32 %Idx to i64
         %tmp = getelementptr i8* getelementptr ([3 x i8]* @G, i32 0, i32 0), i64 %idx
@@ -151,7 +256,7 @@ define i32 *@test15(i64 %X) {
 
 
 define i32* @test16(i32* %X, i32 %Idx) {
-        %R = getelementptr i32* %X, i32 %Idx       
+        %R = getelementptr i32* %X, i32 %Idx
         ret i32* %R
 ; CHECK-LABEL: @test16(
 ; CHECK: sext i32 %Idx to i64
@@ -164,7 +269,7 @@ define i1 @test17(i16* %P, i32 %I, i32 %J) {
         %C = icmp ult i16* %X, %Y
         ret i1 %C
 ; CHECK-LABEL: @test17(
-; CHECK: %C = icmp slt i32 %I, %J 
+; CHECK: %C = icmp slt i32 %I, %J
 }
 
 define i1 @test18(i16* %P, i32 %I) {
@@ -175,6 +280,55 @@ define i1 @test18(i16* %P, i32 %I) {
 ; CHECK: %C = icmp slt i32 %I, 0
 }
 
+; Larger than the pointer size for a non-zero address space
+define i1 @test18_as1(i16 addrspace(1)* %P, i32 %I) {
+; CHECK-LABEL: @test18_as1(
+; CHECK-NEXT: %1 = trunc i32 %I to i16
+; CHECK-NEXT: %C = icmp slt i16 %1, 0
+; CHECK-NEXT: ret i1 %C
+  %X = getelementptr inbounds i16 addrspace(1)* %P, i32 %I
+  %C = icmp ult i16 addrspace(1)* %X, %P
+  ret i1 %C
+}
+
+; Smaller than the pointer size for a non-zero address space
+define i1 @test18_as1_i32(i16 addrspace(1)* %P, i32 %I) {
+; CHECK-LABEL: @test18_as1_i32(
+; CHECK-NEXT: %1 = trunc i32 %I to i16
+; CHECK-NEXT: %C = icmp slt i16 %1, 0
+; CHECK-NEXT: ret i1 %C
+  %X = getelementptr inbounds i16 addrspace(1)* %P, i32 %I
+  %C = icmp ult i16 addrspace(1)* %X, %P
+  ret i1 %C
+}
+
+; Smaller than pointer size
+define i1 @test18_i16(i16* %P, i16 %I) {
+; CHECK-LABEL: @test18_i16(
+; CHECK: %C = icmp slt i16 %I, 0
+  %X = getelementptr inbounds i16* %P, i16 %I
+  %C = icmp ult i16* %X, %P
+  ret i1 %C
+}
+
+; Same as pointer size
+define i1 @test18_i64(i16* %P, i64 %I) {
+; CHECK-LABEL: @test18_i64(
+; CHECK: %C = icmp slt i64 %I, 0
+  %X = getelementptr inbounds i16* %P, i64 %I
+  %C = icmp ult i16* %X, %P
+  ret i1 %C
+}
+
+; Larger than the pointer size
+define i1 @test18_i128(i16* %P, i128 %I) {
+; CHECK-LABEL: @test18_i128(
+; CHECK: %C = icmp slt i64 %1, 0
+  %X = getelementptr inbounds i16* %P, i128 %I
+  %C = icmp ult i16* %X, %P
+  ret i1 %C
+}
+
 define i32 @test19(i32* %P, i32 %A, i32 %B) {
         %tmp.4 = getelementptr inbounds i32* %P, i32 %A
         %tmp.9 = getelementptr inbounds i32* %P, i32 %B
@@ -194,6 +348,15 @@ define i32 @test20(i32* %P, i32 %A, i32 %B) {
 ; CHECK: icmp eq i32 %A, 0
 }
 
+define i32 @test20_as1(i32 addrspace(1)* %P, i32 %A, i32 %B) {
+  %tmp.4 = getelementptr inbounds i32 addrspace(1)* %P, i32 %A
+  %tmp.6 = icmp eq i32 addrspace(1)* %tmp.4, %P
+  %tmp.7 = zext i1 %tmp.6 to i32
+  ret i32 %tmp.7
+; CHECK-LABEL: @test20_as1(
+; CHECK: icmp eq i16 %1, 0
+}
+
 
 define i32 @test21() {
         %pbob1 = alloca %intstruct
@@ -210,8 +373,8 @@ define i32 @test21() {
 @B = global i32 2               ; <i32*> [#uses=1]
 
 define i1 @test22() {
-        %C = icmp ult i32* getelementptr (i32* @A, i64 1), 
-                           getelementptr (i32* @B, i64 2) 
+        %C = icmp ult i32* getelementptr (i32* @A, i64 1),
+                           getelementptr (i32* @B, i64 2)
         ret i1 %C
 ; CHECK-LABEL: @test22(
 ; CHECK: icmp ult (i32* getelementptr inbounds (i32* @A, i64 1), i32* getelementptr (i32* @B, i64 2))
@@ -262,15 +425,15 @@ define i1 @test26(i8* %arr) {
 
 define i32 @test27(%struct.compat_siginfo* %to, %struct.siginfo_t* %from) {
 entry:
-	%from_addr = alloca %struct.siginfo_t*	
-	%tmp344 = load %struct.siginfo_t** %from_addr, align 8	
+	%from_addr = alloca %struct.siginfo_t*
+	%tmp344 = load %struct.siginfo_t** %from_addr, align 8
 	%tmp345 = getelementptr %struct.siginfo_t* %tmp344, i32 0, i32 3
 	%tmp346 = getelementptr { { i32, i32, [0 x i8], %struct.sigval_t, i32 }, [88 x i8] }* %tmp345, i32 0, i32 0
-	%tmp346347 = bitcast { i32, i32, [0 x i8], %struct.sigval_t, i32 }* %tmp346 to { i32, i32, %struct.sigval_t }*	
+	%tmp346347 = bitcast { i32, i32, [0 x i8], %struct.sigval_t, i32 }* %tmp346 to { i32, i32, %struct.sigval_t }*
 	%tmp348 = getelementptr { i32, i32, %struct.sigval_t }* %tmp346347, i32 0, i32 2
 	%tmp349 = getelementptr %struct.sigval_t* %tmp348, i32 0, i32 0
 	%tmp349350 = bitcast i8** %tmp349 to i32*
-	%tmp351 = load i32* %tmp349350, align 8	
+	%tmp351 = load i32* %tmp349350, align 8
 	%tmp360 = call i32 asm sideeffect "...",
         "=r,ir,*m,i,0,~{dirflag},~{fpsr},~{flags}"( i32 %tmp351,
          %struct.__large_struct* null, i32 -14, i32 0 )
@@ -280,28 +443,28 @@ entry:
 
 ; PR1978
 	%struct.x = type <{ i8 }>
-@.str = internal constant [6 x i8] c"Main!\00"	
-@.str1 = internal constant [12 x i8] c"destroy %p\0A\00"	
+@.str = internal constant [6 x i8] c"Main!\00"
+@.str1 = internal constant [12 x i8] c"destroy %p\0A\00"
 
 define i32 @test28() nounwind  {
 entry:
 	%orientations = alloca [1 x [1 x %struct.x]]
-	%tmp3 = call i32 @puts( i8* getelementptr ([6 x i8]* @.str, i32 0, i32 0) ) nounwind 
+	%tmp3 = call i32 @puts( i8* getelementptr ([6 x i8]* @.str, i32 0, i32 0) ) nounwind
 	%tmp45 = getelementptr inbounds [1 x [1 x %struct.x]]* %orientations, i32 1, i32 0, i32 0
 	%orientations62 = getelementptr [1 x [1 x %struct.x]]* %orientations, i32 0, i32 0, i32 0
 	br label %bb10
 
 bb10:
 	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb10 ]
-	%tmp.0.reg2mem.0.rec = mul i32 %indvar, -1	
-	%tmp12.rec = add i32 %tmp.0.reg2mem.0.rec, -1	
+	%tmp.0.reg2mem.0.rec = mul i32 %indvar, -1
+	%tmp12.rec = add i32 %tmp.0.reg2mem.0.rec, -1
 	%tmp12 = getelementptr inbounds %struct.x* %tmp45, i32 %tmp12.rec
 	%tmp16 = call i32 (i8*, ...)* @printf( i8* getelementptr ([12 x i8]* @.str1, i32 0, i32 0), %struct.x* %tmp12 ) nounwind
 	%tmp84 = icmp eq %struct.x* %tmp12, %orientations62
 	%indvar.next = add i32 %indvar, 1
 	br i1 %tmp84, label %bb17, label %bb10
 
-bb17:	
+bb17:
 	ret i32 0
 ; CHECK-LABEL: @test28(
 ; CHECK: icmp eq i32 %indvar, 0
@@ -318,7 +481,7 @@ declare i32 @printf(i8*, ...)
 	%T = type <{ i64, i64, i64 }>
 define i32 @test29(i8* %start, i32 %X) nounwind {
 entry:
-	%tmp3 = load i64* null		
+	%tmp3 = load i64* null
 	%add.ptr = getelementptr i8* %start, i64 %tmp3
 	%tmp158 = load i32* null
 	%add.ptr159 = getelementptr %T* null, i32 %tmp158
@@ -356,7 +519,7 @@ declare void @test30f(i32*)
 define i1 @test31(i32* %A) {
         %B = getelementptr i32* %A, i32 1
         %C = getelementptr i32* %A, i64 1
-        %V = icmp eq i32* %B, %C 
+        %V = icmp eq i32* %B, %C
         ret i1 %V
 ; CHECK-LABEL: @test31(
 ; CHECK: ret i1 true
@@ -372,7 +535,7 @@ define i8* @test32(i8* %v) {
 	%D = getelementptr { [16 x i8] }* %C, i32 0, i32 0, i32 8
 	%E = bitcast i8* %D to i8**
 	store i8* %v, i8** %E
-	%F = getelementptr [4 x i8*]* %A, i32 0, i32 2	
+	%F = getelementptr [4 x i8*]* %A, i32 0, i32 2
 	%G = load i8** %F
 	ret i8* %G
 ; CHECK-LABEL: @test32(
@@ -384,23 +547,46 @@ define i8* @test32(i8* %v) {
 %struct.Key = type { { i32, i32 } }
 %struct.anon = type <{ i8, [3 x i8], i32 }>
 
-define i32 *@test33(%struct.Key *%A) {
-	%B = bitcast %struct.Key* %A to %struct.anon*
-        %C = getelementptr %struct.anon* %B, i32 0, i32 2 
-	ret i32 *%C
+define i32* @test33(%struct.Key* %A) {
 ; CHECK-LABEL: @test33(
 ; CHECK: getelementptr %struct.Key* %A, i64 0, i32 0, i32 1
+  %B = bitcast %struct.Key* %A to %struct.anon*
+  %C = getelementptr %struct.anon* %B, i32 0, i32 2
+  ret i32* %C
 }
 
+define i32 addrspace(1)* @test33_as1(%struct.Key addrspace(1)* %A) {
+; CHECK-LABEL: @test33_as1(
+; CHECK: getelementptr %struct.Key addrspace(1)* %A, i16 0, i32 0, i32 1
+  %B = bitcast %struct.Key addrspace(1)* %A to %struct.anon addrspace(1)*
+  %C = getelementptr %struct.anon addrspace(1)* %B, i32 0, i32 2
+  ret i32 addrspace(1)* %C
+}
 
+define i32 addrspace(1)* @test33_array_as1([10 x i32] addrspace(1)* %A) {
+; CHECK-LABEL: @test33_array_as1(
+; CHECK: getelementptr [10 x i32] addrspace(1)* %A, i16 0, i16 2
+  %B = bitcast [10 x i32] addrspace(1)* %A to [5 x i32] addrspace(1)*
+  %C = getelementptr [5 x i32] addrspace(1)* %B, i32 0, i32 2
+  ret i32 addrspace(1)* %C
+}
+
+; Make sure the GEP indices use the right pointer sized integer
+define i32 addrspace(1)* @test33_array_struct_as1([10 x %struct.Key] addrspace(1)* %A) {
+; CHECK-LABEL: @test33_array_struct_as1(
+; CHECK: getelementptr [10 x %struct.Key] addrspace(1)* %A, i16 0, i16 1, i32 0, i32 0
+  %B = bitcast [10 x %struct.Key] addrspace(1)* %A to [20 x i32] addrspace(1)*
+  %C = getelementptr [20 x i32] addrspace(1)* %B, i32 0, i32 2
+  ret i32 addrspace(1)* %C
+}
 
 	%T2 = type { i8*, i8 }
 define i8* @test34(i8* %Val, i64 %V) nounwind {
 entry:
-	%A = alloca %T2, align 8	
+	%A = alloca %T2, align 8
 	%mrv_gep = bitcast %T2* %A to i64*
 	%B = getelementptr %T2* %A, i64 0, i32 0
-        
+
       	store i64 %V, i64* %mrv_gep
 	%C = load i8** %B, align 8
 	ret i8* %C
@@ -519,4 +705,88 @@ define i1 @pr16483([1 x i8]* %a, [1 x i8]* %b) {
 ; CHECK-NEXT: icmp ult  [1 x i8]* %a, %b
 }
 
+define i8 @test_gep_bitcast_as1(i32 addrspace(1)* %arr, i16 %N) {
+; CHECK-LABEL: @test_gep_bitcast_as1(
+; CHECK: getelementptr i32 addrspace(1)* %arr, i16 %N
+; CHECK: bitcast
+  %cast = bitcast i32 addrspace(1)* %arr to i8 addrspace(1)*
+  %V = mul i16 %N, 4
+  %t = getelementptr i8 addrspace(1)* %cast, i16 %V
+  %x = load i8 addrspace(1)* %t
+  ret i8 %x
+}
+
+; The element size of the array matches the element size of the pointer
+define i64 @test_gep_bitcast_array_same_size_element([100 x double]* %arr, i64 %N) {
+; CHECK-LABEL: @test_gep_bitcast_array_same_size_element(
+; CHECK: getelementptr [100 x double]* %arr, i64 0, i64 %V
+; CHECK: bitcast
+  %cast = bitcast [100 x double]* %arr to i64*
+  %V = mul i64 %N, 8
+  %t = getelementptr i64* %cast, i64 %V
+  %x = load i64* %t
+  ret i64 %x
+}
+
+; The element size of the array is different the element size of the pointer
+define i8 @test_gep_bitcast_array_different_size_element([100 x double]* %arr, i64 %N) {
+; CHECK-LABEL: @test_gep_bitcast_array_different_size_element(
+; CHECK: getelementptr [100 x double]* %arr, i64 0, i64 %N
+; CHECK: bitcast
+  %cast = bitcast [100 x double]* %arr to i8*
+  %V = mul i64 %N, 8
+  %t = getelementptr i8* %cast, i64 %V
+  %x = load i8* %t
+  ret i8 %x
+}
+
+define i64 @test_gep_bitcast_array_same_size_element_as1([100 x double] addrspace(1)* %arr, i16 %N) {
+; CHECK-LABEL: @test_gep_bitcast_array_same_size_element_as1(
+; CHECK: getelementptr [100 x double] addrspace(1)* %arr, i16 0, i16 %V
+; CHECK: bitcast
+  %cast = bitcast [100 x double] addrspace(1)* %arr to i64 addrspace(1)*
+  %V = mul i16 %N, 8
+  %t = getelementptr i64 addrspace(1)* %cast, i16 %V
+  %x = load i64 addrspace(1)* %t
+  ret i64 %x
+}
+
+define i8 @test_gep_bitcast_array_different_size_element_as1([100 x double] addrspace(1)* %arr, i16 %N) {
+; CHECK-LABEL: @test_gep_bitcast_array_different_size_element_as1(
+; CHECK: getelementptr [100 x double] addrspace(1)* %arr, i16 0, i16 %N
+; CHECK: bitcast
+  %cast = bitcast [100 x double] addrspace(1)* %arr to i8 addrspace(1)*
+  %V = mul i16 %N, 8
+  %t = getelementptr i8 addrspace(1)* %cast, i16 %V
+  %x = load i8 addrspace(1)* %t
+  ret i8 %x
+}
+
+define i64 @test40() {
+  %array = alloca [3 x i32], align 4
+  %gep = getelementptr inbounds [3 x i32]* %array, i64 0, i64 2
+  %gepi8 = bitcast i32* %gep to i8*
+  %p = ptrtoint [3 x i32]* %array to i64
+  %np = sub i64 0, %p
+  %gep2 = getelementptr i8* %gepi8, i64 %np
+  %ret = ptrtoint i8* %gep2 to i64
+  ret i64 %ret
+
+; CHECK-LABEL: @test40
+; CHECK-NEXT: ret i64 8
+}
+
+define i16 @test41([3 x i32] addrspace(1)* %array) {
+  %gep = getelementptr inbounds [3 x i32] addrspace(1)* %array, i16 0, i16 2
+  %gepi8 = bitcast i32 addrspace(1)* %gep to i8 addrspace(1)*
+  %p = ptrtoint [3 x i32] addrspace(1)* %array to i16
+  %np = sub i16 0, %p
+  %gep2 = getelementptr i8 addrspace(1)* %gepi8, i16 %np
+  %ret = ptrtoint i8 addrspace(1)* %gep2 to i16
+  ret i16 %ret
+
+; CHECK-LABEL: @test41(
+; CHECK-NEXT: ret i16 8
+}
+
 ; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/InstCombine/icmp-logical.ll b/test/Transforms/InstCombine/icmp-logical.ll
new file mode 100644
index 0000000..d5d8cbc
--- /dev/null
+++ b/test/Transforms/InstCombine/icmp-logical.ll
@@ -0,0 +1,152 @@
+; RUN: opt -instcombine -S -o - %s | FileCheck %s
+
+define i1 @masked_and_notallzeroes(i32 %A) {
+; CHECK-LABEL: @masked_and_notallzeroes
+; CHECK: [[MASK:%.*]] = and i32 %A, 7
+; CHECK: icmp ne i32 [[MASK]], 0
+; CHECK-NOT: and i32 %A, 39
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp ne i32 %mask1, 0
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp ne i32 %mask2, 0
+
+  %res = and i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_or_allzeroes(i32 %A) {
+; CHECK-LABEL: @masked_or_allzeroes
+; CHECK: [[MASK:%.*]] = and i32 %A, 7
+; CHECK: icmp eq i32 [[MASK]], 0
+; CHECK-NOT: and i32 %A, 39
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp eq i32 %mask1, 0
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp eq i32 %mask2, 0
+
+  %res = or i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_and_notallones(i32 %A) {
+; CHECK-LABEL: @masked_and_notallones
+; CHECK: [[MASK:%.*]] = and i32 %A, 7
+; CHECK: icmp ne i32 [[MASK]], 7
+; CHECK-NOT: and i32 %A, 39
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp ne i32 %mask1, 7
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp ne i32 %mask2, 39
+
+  %res = and i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_or_allones(i32 %A) {
+; CHECK-LABEL: @masked_or_allones
+; CHECK: [[MASK:%.*]] = and i32 %A, 7
+; CHECK: icmp eq i32 [[MASK]], 7
+; CHECK-NOT: and i32 %A, 39
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp eq i32 %mask1, 7
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp eq i32 %mask2, 39
+
+  %res = or i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_and_notA(i32 %A) {
+; CHECK-LABEL: @masked_and_notA
+; CHECK: [[MASK:%.*]] = and i32 %A, 39
+; CHECK: icmp ne i32 [[MASK]], %A
+; CHECK-NOT: and i32 %A, 7
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp ne i32 %mask1, %A
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp ne i32 %mask2, %A
+
+  %res = and i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_or_A(i32 %A) {
+; CHECK-LABEL: @masked_or_A
+; CHECK: [[MASK:%.*]] = and i32 %A, 39
+; CHECK: icmp eq i32 [[MASK]], %A
+; CHECK-NOT: and i32 %A, 7
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp eq i32 %mask1, %A
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp eq i32 %mask2, %A
+
+  %res = or i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_or_allzeroes_notoptimised(i32 %A) {
+; CHECK-LABEL: @masked_or_allzeroes_notoptimised
+; CHECK: [[MASK:%.*]] = and i32 %A, 15
+; CHECK: icmp eq i32 [[MASK]], 0
+; CHECK: [[MASK:%.*]] = and i32 %A, 39
+; CHECK: icmp eq i32 [[MASK]], 0
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 15
+  %tst1 = icmp eq i32 %mask1, 0
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp eq i32 %mask2, 0
+
+  %res = or i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @nomask_lhs(i32 %in) {
+; CHECK-LABEL: @nomask_lhs
+; CHECK: [[MASK:%.*]] = and i32 %in, 1
+; CHECK: icmp eq i32 [[MASK]], 0
+; CHECK-NOT: icmp
+; CHECK: ret i1
+  %tst1 = icmp eq i32 %in, 0
+
+  %masked = and i32 %in, 1
+  %tst2 = icmp eq i32 %masked, 0
+
+  %val = or i1 %tst1, %tst2
+  ret i1 %val
+}
+
+
+define i1 @nomask_rhs(i32 %in) {
+; CHECK-LABEL: @nomask_rhs
+; CHECK: [[MASK:%.*]] = and i32 %in, 1
+; CHECK: icmp eq i32 [[MASK]], 0
+; CHECK-NOT: icmp
+; CHECK: ret i1
+  %masked = and i32 %in, 1
+  %tst1 = icmp eq i32 %masked, 0
+
+  %tst2 = icmp eq i32 %in, 0
+
+  %val = or i1 %tst1, %tst2
+  ret i1 %val
+}
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index dfeac67..12a4744 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 target datalayout =
-"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+"e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
 define i32 @test1(i32 %X) {
 entry:
@@ -79,7 +79,7 @@ entry:
 
 define i1 @test8(i32 %x){
 entry:
-  %a = add i32 %x, -1 
+  %a = add i32 %x, -1
   %b = icmp eq i32 %a, %x
   ret i1 %b
 ; CHECK-LABEL: @test8(
@@ -89,7 +89,7 @@ entry:
 define i1 @test9(i32 %x)  {
 entry:
   %a = add i32 %x, -2
-  %b = icmp ugt i32 %x, %a 
+  %b = icmp ugt i32 %x, %a
   ret i1 %b
 ; CHECK-LABEL: @test9(
 ; CHECK: icmp ugt i32 %x, 1
@@ -98,10 +98,9 @@ entry:
 
 define i1 @test10(i32 %x){
 entry:
-  %a = add i32 %x, -1      
-  %b = icmp slt i32 %a, %x 
+  %a = add i32 %x, -1
+  %b = icmp slt i32 %a, %x
   ret i1 %b
-  
 ; CHECK-LABEL: @test10(
 ; CHECK: %b = icmp ne i32 %x, -2147483648
 ; CHECK: ret i1 %b
@@ -234,6 +233,18 @@ define i1 @test24(i64 %i) {
   ret i1 %cmp
 }
 
+@X_as1 = addrspace(1) global [1000 x i32] zeroinitializer
+
+; CHECK: @test24_as1
+; CHECK: trunc i64 %i to i16
+; CHECK: %cmp = icmp eq i16 %1, 1000
+; CHECK: ret i1 %cmp
+define i1 @test24_as1(i64 %i) {
+  %p1 = getelementptr inbounds i32 addrspace(1)* getelementptr inbounds ([1000 x i32] addrspace(1)* @X_as1, i64 0, i64 0), i64 %i
+  %cmp = icmp eq i32 addrspace(1)* %p1, getelementptr inbounds ([1000 x i32] addrspace(1)* @X_as1, i64 1, i64 0)
+  ret i1 %cmp
+}
+
 ; CHECK-LABEL: @test25(
 ; X + Z > Y + Z -> X > Y if there is no overflow.
 ; CHECK: %c = icmp sgt i32 %x, %y
@@ -473,7 +484,7 @@ define <2 x i1> @test49(<2 x i32> %tmp3) {
 entry:
   %tmp11 = and <2 x i32> %tmp3, <i32 3, i32 3>
   %cmp = icmp ult <2 x i32> %tmp11, <i32 4, i32 4>
-  ret <2 x i1> %cmp  
+  ret <2 x i1> %cmp
 }
 
 ; PR9343 #7
@@ -512,12 +523,12 @@ define i1 @test52(i32 %x1) nounwind {
 
 ; PR9838
 ; CHECK-LABEL: @test53(
-; CHECK-NEXT: ashr exact
-; CHECK-NEXT: ashr
+; CHECK-NEXT: sdiv exact
+; CHECK-NEXT: sdiv
 ; CHECK-NEXT: icmp
 define i1 @test53(i32 %a, i32 %b) nounwind {
- %x = ashr exact i32 %a, 30
- %y = ashr i32 %b, 30
+ %x = sdiv exact i32 %a, 30
+ %y = sdiv i32 %b, 30
  %z = icmp eq i32 %x, %y
  ret i1 %z
 }
@@ -603,6 +614,21 @@ define i1 @test59(i8* %foo) {
 ; CHECK: ret i1 true
 }
 
+define i1 @test59_as1(i8 addrspace(1)* %foo) {
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32 addrspace(1)* %bit, i64 2
+  %gep2 = getelementptr inbounds i8 addrspace(1)* %foo, i64 10
+  %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  %use = ptrtoint i8 addrspace(1)* %cast1 to i64
+  %call = call i32 @test58_d(i64 %use) nounwind
+  ret i1 %cmp
+; CHECK: @test59_as1
+; CHECK: %[[GEP:.+]] = getelementptr inbounds i8 addrspace(1)* %foo, i16 8
+; CHECK: ptrtoint i8 addrspace(1)* %[[GEP]] to i16
+; CHECK: ret i1 true
+}
+
 define i1 @test60(i8* %foo, i64 %i, i64 %j) {
   %bit = bitcast i8* %foo to i32*
   %gep1 = getelementptr inbounds i32* %bit, i64 %i
@@ -616,6 +642,21 @@ define i1 @test60(i8* %foo, i64 %i, i64 %j) {
 ; CHECK-NEXT: ret i1
 }
 
+define i1 @test60_as1(i8 addrspace(1)* %foo, i64 %i, i64 %j) {
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32 addrspace(1)* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8 addrspace(1)* %foo, i64 %j
+  %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  ret i1 %cmp
+; CHECK: @test60_as1
+; CHECK: trunc i64 %i to i16
+; CHECK: trunc i64 %j to i16
+; CHECK: %gep1.idx = shl nuw i16 %{{.+}}, 2
+; CHECK-NEXT: icmp sgt i16 %{{.+}}, %gep1.idx
+; CHECK-NEXT: ret i1
+}
+
 define i1 @test61(i8* %foo, i64 %i, i64 %j) {
   %bit = bitcast i8* %foo to i32*
   %gep1 = getelementptr i32* %bit, i64 %i
@@ -629,6 +670,19 @@ define i1 @test61(i8* %foo, i64 %i, i64 %j) {
 ; CHECK-NEXT: ret i1
 }
 
+define i1 @test61_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) {
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr i32 addrspace(1)* %bit, i16 %i
+  %gep2 = getelementptr i8 addrspace(1)* %foo, i16 %j
+  %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  ret i1 %cmp
+; Don't transform non-inbounds GEPs.
+; CHECK: @test61_as1
+; CHECK: icmp ult i8 addrspace(1)* %cast1, %gep2
+; CHECK-NEXT: ret i1
+}
+
 define i1 @test62(i8* %a) {
   %arrayidx1 = getelementptr inbounds i8* %a, i64 1
   %arrayidx2 = getelementptr inbounds i8* %a, i64 10
@@ -638,6 +692,15 @@ define i1 @test62(i8* %a) {
 ; CHECK-NEXT: ret i1 true
 }
 
+define i1 @test62_as1(i8 addrspace(1)* %a) {
+; CHECK-LABEL: @test62_as1(
+; CHECK-NEXT: ret i1 true
+  %arrayidx1 = getelementptr inbounds i8 addrspace(1)* %a, i64 1
+  %arrayidx2 = getelementptr inbounds i8 addrspace(1)* %a, i64 10
+  %cmp = icmp slt i8 addrspace(1)* %arrayidx1, %arrayidx2
+  ret i1 %cmp
+}
+
 define i1 @test63(i8 %a, i32 %b) nounwind {
   %z = zext i8 %a to i32
   %t = and i32 %b, 255
@@ -999,6 +1062,15 @@ define i1 @test71(i8* %x) {
   ret i1 %c
 }
 
+define i1 @test71_as1(i8 addrspace(1)* %x) {
+; CHECK-LABEL: @test71_as1(
+; CHECK-NEXT: ret i1 false
+  %a = getelementptr i8 addrspace(1)* %x, i64 8
+  %b = getelementptr inbounds i8 addrspace(1)* %x, i64 8
+  %c = icmp ugt i8 addrspace(1)* %a, %b
+  ret i1 %c
+}
+
 ; CHECK-LABEL: @icmp_shl_1_V_ult_32(
 ; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %V, 5
 ; CHECK-NEXT: ret i1 [[CMP]]
@@ -1199,3 +1271,88 @@ define i1 @icmp_sub_-1_X_uge_4(i32 %X) {
   %cmp = icmp uge i32 %sub, 4
   ret i1 %cmp
 }
+
+; CHECK-LABEL: @icmp_swap_operands_for_cse
+; CHECK: [[CMP:%[a-z0-9]+]] = icmp ult i32 %X, %Y
+; CHECK-NEXT: br i1 [[CMP]], label %true, label %false
+; CHECK: ret i1
+define i1 @icmp_swap_operands_for_cse(i32 %X, i32 %Y) {
+entry:
+  %sub = sub i32 %X, %Y
+  %cmp = icmp ugt i32 %Y, %X
+  br i1 %cmp, label %true, label %false
+true:
+  %restrue = trunc i32 %sub to i1
+  br label %end
+false:
+  %shift = lshr i32 %sub, 4
+  %resfalse = trunc i32 %shift to i1
+  br label %end
+end:
+  %res = phi i1 [%restrue, %true], [%resfalse, %false]
+  ret i1 %res
+}
+
+; CHECK-LABEL: @icmp_swap_operands_for_cse2
+; CHECK: [[CMP:%[a-z0-9]+]] = icmp ult i32 %X, %Y
+; CHECK-NEXT: br i1 [[CMP]], label %true, label %false
+; CHECK: ret i1
+define i1 @icmp_swap_operands_for_cse2(i32 %X, i32 %Y) {
+entry:
+  %cmp = icmp ugt i32 %Y, %X
+  br i1 %cmp, label %true, label %false
+true:
+  %sub = sub i32 %X, %Y
+  %sub1 = sub i32 %X, %Y
+  %add = add i32 %sub, %sub1
+  %restrue = trunc i32 %add to i1
+  br label %end
+false:
+  %sub2 = sub i32 %Y, %X
+  %resfalse = trunc i32 %sub2 to i1
+  br label %end
+end:
+  %res = phi i1 [%restrue, %true], [%resfalse, %false]
+  ret i1 %res
+}
+
+; CHECK-LABEL: @icmp_do_not_swap_operands_for_cse
+; CHECK: [[CMP:%[a-z0-9]+]] = icmp ugt i32 %Y, %X
+; CHECK-NEXT: br i1 [[CMP]], label %true, label %false
+; CHECK: ret i1
+define i1 @icmp_do_not_swap_operands_for_cse(i32 %X, i32 %Y) {
+entry:
+  %cmp = icmp ugt i32 %Y, %X
+  br i1 %cmp, label %true, label %false
+true:
+  %sub = sub i32 %X, %Y
+  %restrue = trunc i32 %sub to i1
+  br label %end
+false:
+  %sub2 = sub i32 %Y, %X
+  %resfalse = trunc i32 %sub2 to i1
+  br label %end
+end:
+  %res = phi i1 [%restrue, %true], [%resfalse, %false]
+  ret i1 %res
+}
+
+; CHECK-LABEL: @icmp_lshr_lshr_eq
+; CHECK: %z.unshifted = xor i32 %a, %b
+; CHECK: %z = icmp ult i32 %z.unshifted, 1073741824
+define i1 @icmp_lshr_lshr_eq(i32 %a, i32 %b) nounwind {
+ %x = lshr i32 %a, 30
+ %y = lshr i32 %b, 30
+ %z = icmp eq i32 %x, %y
+ ret i1 %z
+}
+
+; CHECK-LABEL: @icmp_ashr_ashr_ne
+; CHECK: %z.unshifted = xor i32 %a, %b
+; CHECK: %z = icmp ugt i32 %z.unshifted, 255
+define i1 @icmp_ashr_ashr_ne(i32 %a, i32 %b) nounwind {
+ %x = ashr i32 %a, 8
+ %y = ashr i32 %b, 8
+ %z = icmp ne i32 %x, %y
+ ret i1 %z
+}
diff --git a/test/Transforms/InstCombine/lit.local.cfg b/test/Transforms/InstCombine/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/InstCombine/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/InstCombine/load-cmp.ll b/test/Transforms/InstCombine/load-cmp.ll
index 95dc48c..9810026 100644
--- a/test/Transforms/InstCombine/load-cmp.ll
+++ b/test/Transforms/InstCombine/load-cmp.ll
@@ -1,18 +1,75 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck -check-prefix=NODL %s
+; RUN: opt -instcombine -S -default-data-layout="p:32:32:32-p1:16:16:16-n8:16:32:64" < %s | FileCheck -check-prefix=P32 %s
 
-@G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85, 
+@G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
                                      i16 73, i16 82, i16 69, i16 68, i16 0]
+
+@G16_as1 = internal addrspace(1) constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
+                                                      i16 73, i16 82, i16 69, i16 68, i16 0]
+
 @GD = internal constant [6 x double]
    [double -10.0, double 1.0, double 4.0, double 2.0, double -20.0, double -40.0]
 
+%Foo = type { i32, i32, i32, i32 }
+
+@GS = internal constant %Foo { i32 1, i32 4, i32 9, i32 14 }
+
+@GStructArr = internal constant [4 x %Foo] [ %Foo { i32 1, i32 4, i32 9, i32 14 },
+                                             %Foo { i32 5, i32 4, i32 6, i32 11 },
+                                             %Foo { i32 6, i32 5, i32 9, i32 20 },
+                                             %Foo { i32 12, i32 3, i32 9, i32 8 } ]
+
+
 define i1 @test1(i32 %X) {
   %P = getelementptr inbounds [10 x i16]* @G16, i32 0, i32 %X
   %Q = load i16* %P
   %R = icmp eq i16 %Q, 0
   ret i1 %R
-; CHECK-LABEL: @test1(
-; CHECK-NEXT: %R = icmp eq i32 %X, 9
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test1(
+; NODL-NEXT: %R = icmp eq i32 %X, 9
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test1(
+; P32-NEXT: %R = icmp eq i32 %X, 9
+; P32-NEXT: ret i1 %R
+}
+
+define i1 @test1_noinbounds(i32 %X) {
+  %P = getelementptr [10 x i16]* @G16, i32 0, i32 %X
+  %Q = load i16* %P
+  %R = icmp eq i16 %Q, 0
+  ret i1 %R
+; NODL-LABEL: @test1_noinbounds(
+; NODL-NEXT: %P = getelementptr [10 x i16]* @G16, i32 0, i32 %X
+
+; P32-LABEL: @test1_noinbounds(
+; P32-NEXT: %R = icmp eq i32 %X, 9
+; P32-NEXT: ret i1 %R
+}
+
+define i1 @test1_noinbounds_i64(i64 %X) {
+  %P = getelementptr [10 x i16]* @G16, i64 0, i64 %X
+  %Q = load i16* %P
+  %R = icmp eq i16 %Q, 0
+  ret i1 %R
+; NODL-LABEL: @test1_noinbounds_i64(
+; NODL-NEXT: %P = getelementptr [10 x i16]* @G16, i64 0, i64 %X
+
+; P32-LABEL: @test1_noinbounds_i64(
+; P32: %R = icmp eq i32 %1, 9
+; P32-NEXT: ret i1 %R
+}
+
+define i1 @test1_noinbounds_as1(i32 %x) {
+  %p = getelementptr [10 x i16] addrspace(1)* @G16_as1, i16 0, i32 %x
+  %q = load i16 addrspace(1)* %p
+  %r = icmp eq i16 %q, 0
+  ret i1 %r
+
+; P32-LABEL: @test1_noinbounds_as1(
+; P32-NEXT: trunc i32 %x to i16
+; P32-NEXT: %r = icmp eq i16 %1, 9
+; P32-NEXT: ret i1 %r
 }
 
 define i1 @test2(i32 %X) {
@@ -20,9 +77,9 @@ define i1 @test2(i32 %X) {
   %Q = load i16* %P
   %R = icmp slt i16 %Q, 85
   ret i1 %R
-; CHECK-LABEL: @test2(
-; CHECK-NEXT: %R = icmp ne i32 %X, 4
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test2(
+; NODL-NEXT: %R = icmp ne i32 %X, 4
+; NODL-NEXT: ret i1 %R
 }
 
 define i1 @test3(i32 %X) {
@@ -30,9 +87,14 @@ define i1 @test3(i32 %X) {
   %Q = load double* %P
   %R = fcmp oeq double %Q, 1.0
   ret i1 %R
-; CHECK-LABEL: @test3(
-; CHECK-NEXT: %R = icmp eq i32 %X, 1
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test3(
+; NODL-NEXT: %R = icmp eq i32 %X, 1
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test3(
+; P32-NEXT: %R = icmp eq i32 %X, 1
+; P32-NEXT: ret i1 %R
+
 }
 
 define i1 @test4(i32 %X) {
@@ -40,11 +102,17 @@ define i1 @test4(i32 %X) {
   %Q = load i16* %P
   %R = icmp sle i16 %Q, 73
   ret i1 %R
-; CHECK-LABEL: @test4(
-; CHECK-NEXT: lshr i32 933, %X
-; CHECK-NEXT: and i32 {{.*}}, 1
-; CHECK-NEXT: %R = icmp ne i32 {{.*}}, 0
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test4(
+; NODL-NEXT: lshr i32 933, %X
+; NODL-NEXT: and i32 {{.*}}, 1
+; NODL-NEXT: %R = icmp ne i32 {{.*}}, 0
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test4(
+; P32-NEXT: lshr i32 933, %X
+; P32-NEXT: and i32 {{.*}}, 1
+; P32-NEXT: %R = icmp ne i32 {{.*}}, 0
+; P32-NEXT: ret i1 %R
 }
 
 define i1 @test4_i16(i16 %X) {
@@ -52,11 +120,19 @@ define i1 @test4_i16(i16 %X) {
   %Q = load i16* %P
   %R = icmp sle i16 %Q, 73
   ret i1 %R
-; CHECK-LABEL: @test4_i16(
-; CHECK-NEXT: lshr i16 933, %X
-; CHECK-NEXT: and i16 {{.*}}, 1
-; CHECK-NEXT: %R = icmp ne i16 {{.*}}, 0
-; CHECK-NEXT: ret i1 %R
+
+; NODL-LABEL: @test4_i16(
+; NODL-NEXT: lshr i16 933, %X
+; NODL-NEXT: and i16 {{.*}}, 1
+; NODL-NEXT: %R = icmp ne i16 {{.*}}, 0
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test4_i16(
+; P32-NEXT: sext i16 %X to i32
+; P32-NEXT: lshr i32 933, %1
+; P32-NEXT: and i32 {{.*}}, 1
+; P32-NEXT: %R = icmp ne i32 {{.*}}, 0
+; P32-NEXT: ret i1 %R
 }
 
 define i1 @test5(i32 %X) {
@@ -64,11 +140,17 @@ define i1 @test5(i32 %X) {
   %Q = load i16* %P
   %R = icmp eq i16 %Q, 69
   ret i1 %R
-; CHECK-LABEL: @test5(
-; CHECK-NEXT: icmp eq i32 %X, 2
-; CHECK-NEXT: icmp eq i32 %X, 7
-; CHECK-NEXT: %R = or i1
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test5(
+; NODL-NEXT: icmp eq i32 %X, 2
+; NODL-NEXT: icmp eq i32 %X, 7
+; NODL-NEXT: %R = or i1
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test5(
+; P32-NEXT: icmp eq i32 %X, 2
+; P32-NEXT: icmp eq i32 %X, 7
+; P32-NEXT: %R = or i1
+; P32-NEXT: ret i1 %R
 }
 
 define i1 @test6(i32 %X) {
@@ -76,10 +158,15 @@ define i1 @test6(i32 %X) {
   %Q = load double* %P
   %R = fcmp ogt double %Q, 0.0
   ret i1 %R
-; CHECK-LABEL: @test6(
-; CHECK-NEXT: add i32 %X, -1
-; CHECK-NEXT: %R = icmp ult i32 {{.*}}, 3
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test6(
+; NODL-NEXT: add i32 %X, -1
+; NODL-NEXT: %R = icmp ult i32 {{.*}}, 3
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test6(
+; P32-NEXT: add i32 %X, -1
+; P32-NEXT: %R = icmp ult i32 {{.*}}, 3
+; P32-NEXT: ret i1 %R
 }
 
 define i1 @test7(i32 %X) {
@@ -87,10 +174,15 @@ define i1 @test7(i32 %X) {
   %Q = load double* %P
   %R = fcmp olt double %Q, 0.0
   ret i1 %R
-; CHECK-LABEL: @test7(
-; CHECK-NEXT: add i32 %X, -1
-; CHECK-NEXT: %R = icmp ugt i32 {{.*}}, 2
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test7(
+; NODL-NEXT: add i32 %X, -1
+; NODL-NEXT: %R = icmp ugt i32 {{.*}}, 2
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test7(
+; P32-NEXT: add i32 %X, -1
+; P32-NEXT: %R = icmp ugt i32 {{.*}}, 2
+; P32-NEXT: ret i1 %R
 }
 
 define i1 @test8(i32 %X) {
@@ -99,10 +191,15 @@ define i1 @test8(i32 %X) {
   %R = and i16 %Q, 3
   %S = icmp eq i16 %R, 0
   ret i1 %S
-; CHECK-LABEL: @test8(
-; CHECK-NEXT: and i32 %X, -2
-; CHECK-NEXT: icmp eq i32 {{.*}}, 8
-; CHECK-NEXT: ret i1
+; NODL-LABEL: @test8(
+; NODL-NEXT: and i32 %X, -2
+; NODL-NEXT: icmp eq i32 {{.*}}, 8
+; NODL-NEXT: ret i1
+
+; P32-LABEL: @test8(
+; P32-NEXT: and i32 %X, -2
+; P32-NEXT: icmp eq i32 {{.*}}, 8
+; P32-NEXT: ret i1
 }
 
 @GA = internal constant [4 x { i32, i32 } ] [
@@ -117,8 +214,161 @@ define i1 @test9(i32 %X) {
   %Q = load i32* %P
   %R = icmp eq i32 %Q, 1
   ret i1 %R
-; CHECK-LABEL: @test9(
-; CHECK-NEXT: add i32 %X, -1
-; CHECK-NEXT: icmp ult i32 {{.*}}, 2
-; CHECK-NEXT: ret i1
+; NODL-LABEL: @test9(
+; NODL-NEXT: add i32 %X, -1
+; NODL-NEXT: icmp ult i32 {{.*}}, 2
+; NODL-NEXT: ret i1
+
+; P32-LABEL: @test9(
+; P32-NEXT: add i32 %X, -1
+; P32-NEXT: icmp ult i32 {{.*}}, 2
+; P32-NEXT: ret i1
+}
+
+define i1 @test10_struct(i32 %x) {
+; NODL-LABEL: @test10_struct(
+; NODL: getelementptr inbounds %Foo* @GS, i32 %x, i32 0
+
+; P32-LABEL: @test10_struct(
+; P32: getelementptr inbounds %Foo* @GS, i32 %x, i32 0
+  %p = getelementptr inbounds %Foo* @GS, i32 %x, i32 0
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_noinbounds(i32 %x) {
+; NODL-LABEL: @test10_struct_noinbounds(
+; NODL: getelementptr %Foo* @GS, i32 %x, i32 0
+
+; P32-LABEL: @test10_struct_noinbounds(
+; P32: getelementptr %Foo* @GS, i32 %x, i32 0
+  %p = getelementptr %Foo* @GS, i32 %x, i32 0
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+; Test that the GEP indices are converted before we ever get here
+; Index < ptr size
+define i1 @test10_struct_i16(i16 %x){
+; NODL-LABEL: @test10_struct_i16(
+; NODL: getelementptr inbounds %Foo* @GS, i16 %x, i32 0
+
+; P32-LABEL: @test10_struct_i16(
+; P32: %1 = sext i16 %x to i32
+; P32: getelementptr inbounds %Foo* @GS, i32 %1, i32 0
+  %p = getelementptr inbounds %Foo* @GS, i16 %x, i32 0
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 0
+  ret i1 %r
+}
+
+; Test that the GEP indices are converted before we ever get here
+; Index > ptr size
+define i1 @test10_struct_i64(i64 %x){
+; NODL-LABEL: @test10_struct_i64(
+; NODL: getelementptr inbounds %Foo* @GS, i64 %x, i32 0
+
+; P32-LABEL: @test10_struct_i64(
+; P32: %1 = trunc i64 %x to i32
+; P32: getelementptr inbounds %Foo* @GS, i32 %1, i32 0
+  %p = getelementptr inbounds %Foo* @GS, i64 %x, i32 0
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 0
+  ret i1 %r
+}
+
+define i1 @test10_struct_noinbounds_i16(i16 %x) {
+; NODL-LABEL: @test10_struct_noinbounds_i16(
+; NODL: getelementptr %Foo* @GS, i16 %x, i32 0
+
+; P32-LABEL: @test10_struct_noinbounds_i16(
+; P32: %1 = sext i16 %x to i32
+; P32: getelementptr %Foo* @GS, i32 %1, i32 0
+  %p = getelementptr %Foo* @GS, i16 %x, i32 0
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 0
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr(i32 %x) {
+; NODL-LABEL: @test10_struct_arr(
+; NODL-NEXT: %r = icmp ne i32 %x, 1
+; NODL-NEXT: ret i1 %r
+
+; P32-LABEL: @test10_struct_arr(
+; P32-NEXT: %r = icmp ne i32 %x, 1
+; P32-NEXT: ret i1 %r
+  %p = getelementptr inbounds [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_noinbounds(i32 %x) {
+; NODL-LABEL: @test10_struct_arr_noinbounds(
+; NODL-NEXT  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
+
+; P32-LABEL: @test10_struct_arr_noinbounds(
+; P32-NEXT  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
+  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_i16(i16 %x) {
+; NODL-LABEL: @test10_struct_arr_i16(
+; NODL-NEXT: %r = icmp ne i16 %x, 1
+; NODL-NEXT: ret i1 %r
+
+; P32-LABEL: @test10_struct_arr_i16(
+; P32-NEXT: %r = icmp ne i16 %x, 1
+; P32-NEXT: ret i1 %r
+  %p = getelementptr inbounds [4 x %Foo]* @GStructArr, i16 0, i16 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_i64(i64 %x) {
+; NODL-LABEL: @test10_struct_arr_i64(
+; NODL-NEXT: %r = icmp ne i64 %x, 1
+; NODL-NEXT: ret i1 %r
+
+; P32-LABEL: @test10_struct_arr_i64(
+; P32-NEXT: trunc i64 %x to i32
+; P32-NEXT: %r = icmp ne i32 %1, 1
+; P32-NEXT: ret i1 %r
+  %p = getelementptr inbounds [4 x %Foo]* @GStructArr, i64 0, i64 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_noinbounds_i16(i16 %x) {
+; NODL-LABEL: @test10_struct_arr_noinbounds_i16(
+; NODL-NEXT:  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i16 %x, i32 2
+
+; P32-LABEL: @test10_struct_arr_noinbounds_i16(
+; P32-NEXT: %r = icmp ne i16 %x, 1
+  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i16 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_noinbounds_i64(i64 %x) {
+; FIXME: Should be no trunc?
+; NODL-LABEL: @test10_struct_arr_noinbounds_i64(
+; NODL-NEXT:  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i64 %x, i32 2
+
+; P32-LABEL: @test10_struct_arr_noinbounds_i64(
+; P32: %r = icmp ne i32 %1, 1
+; P32-NEXT: ret i1 %r
+  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i64 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
 }
diff --git a/test/Transforms/InstCombine/multi-size-address-space-pointer.ll b/test/Transforms/InstCombine/multi-size-address-space-pointer.ll
new file mode 100644
index 0000000..2d88bed
--- /dev/null
+++ b/test/Transforms/InstCombine/multi-size-address-space-pointer.ll
@@ -0,0 +1,112 @@
+; RUN: opt -S -instcombine %s -o - | FileCheck %s
+target datalayout = "e-p:32:32:32-p1:64:64:64-p2:8:8:8-p3:16:16:16-p4:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
+
+
+define i32 @test_as0(i32 addrspace(0)* %a) {
+; CHECK-LABEL: @test_as0(
+; CHECK: %arrayidx = getelementptr i32* %a, i32 1
+  %arrayidx = getelementptr i32 addrspace(0)* %a, i64 1
+  %y = load i32 addrspace(0)* %arrayidx, align 4
+  ret i32 %y
+}
+
+define i32 @test_as1(i32 addrspace(1)* %a) {
+; CHECK-LABEL: @test_as1(
+; CHECK: %arrayidx = getelementptr i32 addrspace(1)* %a, i64 1
+  %arrayidx = getelementptr i32 addrspace(1)* %a, i32 1
+  %y = load i32 addrspace(1)* %arrayidx, align 4
+  ret i32 %y
+}
+
+define i32 @test_as2(i32 addrspace(2)* %a) {
+; CHECK-LABEL: @test_as2(
+; CHECK: %arrayidx = getelementptr i32 addrspace(2)* %a, i8 1
+  %arrayidx = getelementptr i32 addrspace(2)* %a, i32 1
+  %y = load i32 addrspace(2)* %arrayidx, align 4
+  ret i32 %y
+}
+
+define i32 @test_as3(i32 addrspace(3)* %a) {
+; CHECK-LABEL: @test_as3(
+; CHECK: %arrayidx = getelementptr i32 addrspace(3)* %a, i16 1
+  %arrayidx = getelementptr i32 addrspace(3)* %a, i32 1
+  %y = load i32 addrspace(3)* %arrayidx, align 4
+  ret i32 %y
+}
+
+define i32 @test_combine_ptrtoint(i32 addrspace(2)* %a) {
+; CHECK-LABEL: @test_combine_ptrtoint(
+; CHECK-NEXT: %y = load i32 addrspace(2)* %a
+; CHECK-NEXT: ret i32 %y
+  %cast = ptrtoint i32 addrspace(2)* %a to i8
+  %castback = inttoptr i8 %cast to i32 addrspace(2)*
+  %y = load i32 addrspace(2)* %castback, align 4
+  ret i32 %y
+}
+
+define i8 @test_combine_inttoptr(i8 %a) {
+; CHECK-LABEL: @test_combine_inttoptr(
+; CHECK-NEXT: ret i8 %a
+  %cast = inttoptr i8 %a to i32 addrspace(2)*
+  %castback = ptrtoint i32 addrspace(2)* %cast to i8
+  ret i8 %castback
+}
+
+define i32 @test_combine_vector_ptrtoint(<2 x i32 addrspace(2)*> %a) {
+; CHECK-LABEL: @test_combine_vector_ptrtoint(
+; CHECK-NEXT: %p = extractelement <2 x i32 addrspace(2)*> %a, i32 0
+; CHECK-NEXT: %y = load i32 addrspace(2)* %p, align 4
+; CHECK-NEXT: ret i32 %y
+  %cast = ptrtoint <2 x i32 addrspace(2)*> %a to <2 x i8>
+  %castback = inttoptr <2 x i8> %cast to <2 x i32 addrspace(2)*>
+  %p = extractelement <2 x i32 addrspace(2)*> %castback, i32 0
+  %y = load i32 addrspace(2)* %p, align 4
+  ret i32 %y
+}
+
+define <2 x i8> @test_combine_vector_inttoptr(<2 x i8> %a) {
+; CHECK-LABEL: @test_combine_vector_inttoptr(
+; CHECK-NEXT: ret <2 x i8> %a
+  %cast = inttoptr <2 x i8> %a to <2 x i32 addrspace(2)*>
+  %castback = ptrtoint <2 x i32 addrspace(2)*> %cast to <2 x i8>
+  ret <2 x i8> %castback
+}
+
+; Check that the GEP index is changed to the address space integer type (i64 -> i8)
+define i32 addrspace(2)* @shrink_gep_constant_index_64_as2(i32 addrspace(2)* %p) {
+; CHECK-LABEL: @shrink_gep_constant_index_64_as2(
+; CHECK-NEXT: getelementptr i32 addrspace(2)* %p, i8 1
+  %ret = getelementptr i32 addrspace(2)* %p, i64 1
+  ret i32 addrspace(2)* %ret
+}
+
+define i32 addrspace(2)* @shrink_gep_constant_index_32_as2(i32 addrspace(2)* %p) {
+; CHECK-LABEL: @shrink_gep_constant_index_32_as2(
+; CHECK-NEXT: getelementptr i32 addrspace(2)* %p, i8 1
+  %ret = getelementptr i32 addrspace(2)* %p, i32 1
+  ret i32 addrspace(2)* %ret
+}
+
+define i32 addrspace(3)* @shrink_gep_constant_index_64_as3(i32 addrspace(3)* %p) {
+; CHECK-LABEL: @shrink_gep_constant_index_64_as3(
+; CHECK-NEXT: getelementptr i32 addrspace(3)* %p, i16 1
+  %ret = getelementptr i32 addrspace(3)* %p, i64 1
+  ret i32 addrspace(3)* %ret
+}
+
+define i32 addrspace(2)* @shrink_gep_variable_index_64_as2(i32 addrspace(2)* %p, i64 %idx) {
+; CHECK-LABEL: @shrink_gep_variable_index_64_as2(
+; CHECK-NEXT: %1 = trunc i64 %idx to i8
+; CHECK-NEXT: getelementptr i32 addrspace(2)* %p, i8 %1
+  %ret = getelementptr i32 addrspace(2)* %p, i64 %idx
+  ret i32 addrspace(2)* %ret
+}
+
+define i32 addrspace(1)* @grow_gep_variable_index_8_as1(i32 addrspace(1)* %p, i8 %idx) {
+; CHECK-LABEL: @grow_gep_variable_index_8_as1(
+; CHECK-NEXT: %1 = sext i8 %idx to i64
+; CHECK-NEXT: getelementptr i32 addrspace(1)* %p, i64 %1
+  %ret = getelementptr i32 addrspace(1)* %p, i8 %idx
+  ret i32 addrspace(1)* %ret
+}
+
diff --git a/test/Transforms/InstCombine/objsize-address-space.ll b/test/Transforms/InstCombine/objsize-address-space.ll
new file mode 100644
index 0000000..9cb6884
--- /dev/null
+++ b/test/Transforms/InstCombine/objsize-address-space.ll
@@ -0,0 +1,80 @@
+; RUN: opt -S -instcombine -o - %s | FileCheck %s
+target datalayout = "e-p:32:32:32-p1:64:64:64-p2:8:8:8-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
+
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p2i8(i8 addrspace(2)*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)*, i1) nounwind readonly
+declare i16 @llvm.objectsize.i16.p3i8(i8 addrspace(3)*, i1) nounwind readonly
+
+@array_as2 = private addrspace(2) global [60 x i8] zeroinitializer, align 4
+
+@array_as1_pointers = private global [10 x i32 addrspace(1)*] zeroinitializer, align 4
+@array_as2_pointers = private global [24 x i32 addrspace(2)*] zeroinitializer, align 4
+@array_as3_pointers = private global [42 x i32 addrspace(3)*] zeroinitializer, align 4
+
+@array_as2_as1_pointer_pointers = private global [16 x i32 addrspace(2)* addrspace(1)*] zeroinitializer, align 4
+
+
+@a_as3 = private addrspace(3) global [60 x i8] zeroinitializer, align 1
+
+define i32 @foo_as3() nounwind {
+; CHECK-LABEL: @foo_as3(
+; CHECK-NEXT: ret i32 60
+  %1 = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* getelementptr inbounds ([60 x i8] addrspace(3)* @a_as3, i32 0, i32 0), i1 false)
+  ret i32 %1
+}
+
+define i16 @foo_as3_i16() nounwind {
+; CHECK-LABEL: @foo_as3_i16(
+; CHECK-NEXT: ret i16 60
+  %1 = call i16 @llvm.objectsize.i16.p3i8(i8 addrspace(3)* getelementptr inbounds ([60 x i8] addrspace(3)* @a_as3, i32 0, i32 0), i1 false)
+  ret i16 %1
+}
+
+@a_alias = alias weak [60 x i8] addrspace(3)* @a_as3
+define i32 @foo_alias() nounwind {
+  %1 = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* getelementptr inbounds ([60 x i8] addrspace(3)* @a_alias, i32 0, i32 0), i1 false)
+  ret i32 %1
+}
+
+define i32 @array_as2_size() {
+; CHECK-LABEL: @array_as2_size(
+; CHECK-NEXT: ret i32 60
+  %bc = bitcast [60 x i8] addrspace(2)* @array_as2 to i8 addrspace(2)*
+  %1 = call i32 @llvm.objectsize.i32.p2i8(i8 addrspace(2)* %bc, i1 false)
+  ret i32 %1
+}
+
+define i32 @pointer_array_as1() {
+; CHECK-LABEL: @pointer_array_as1(
+; CHECK-NEXT: ret i32 80
+  %bc = addrspacecast [10 x i32 addrspace(1)*]* @array_as1_pointers to i8 addrspace(1)*
+  %1 = call i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)* %bc, i1 false)
+  ret i32 %1
+}
+
+define i32 @pointer_array_as2() {
+; CHECK-LABEL: @pointer_array_as2(
+; CHECK-NEXT: ret i32 24
+  %bc = bitcast [24 x i32 addrspace(2)*]* @array_as2_pointers to i8*
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
+  ret i32 %1
+}
+
+define i32 @pointer_array_as3() {
+; CHECK-LABEL: @pointer_array_as3(
+; CHECK-NEXT: ret i32 84
+  %bc = bitcast [42 x i32 addrspace(3)*]* @array_as3_pointers to i8*
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
+  ret i32 %1
+}
+
+define i32 @pointer_pointer_array_as2_as1() {
+; CHECK-LABEL: @pointer_pointer_array_as2_as1(
+; CHECK-NEXT: ret i32 128
+  %bc = bitcast [16 x i32 addrspace(2)* addrspace(1)*]* @array_as2_as1_pointer_pointers to i8*
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
+  ret i32 %1
+}
+
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index b5351e9..6459032 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -5,11 +5,10 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 
 @a = private global [60 x i8] zeroinitializer, align 1 ; <[60 x i8]*>
 @.str = private constant [8 x i8] c"abcdefg\00"   ; <[8 x i8]*>
-
 define i32 @foo() nounwind {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT: ret i32 60
-  %1 = call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false)
   ret i32 %1
 }
 
@@ -17,7 +16,7 @@ define i8* @bar() nounwind {
 ; CHECK-LABEL: @bar(
 entry:
   %retval = alloca i8*
-  %0 = call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false)
+  %0 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false)
   %cmp = icmp ne i32 %0, -1
 ; CHECK: br i1 true
   br i1 %cmp, label %cond.true, label %cond.false
@@ -34,7 +33,7 @@ cond.false:
 define i32 @f() nounwind {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT: ret i32 0
-  %1 = call i32 @llvm.objectsize.i32(i8* getelementptr ([60 x i8]* @a, i32 1, i32 0), i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr ([60 x i8]* @a, i32 1, i32 0), i1 false)
   ret i32 %1
 }
 
@@ -43,16 +42,16 @@ define i32 @f() nounwind {
 define i1 @baz() nounwind {
 ; CHECK-LABEL: @baz(
 ; CHECK-NEXT: objectsize
-  %1 = tail call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([0 x i8]* @window, i32 0, i32 0), i1 false)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([0 x i8]* @window, i32 0, i32 0), i1 false)
   %2 = icmp eq i32 %1, -1
   ret i1 %2
 }
 
 define void @test1(i8* %q, i32 %x) nounwind noinline {
 ; CHECK-LABEL: @test1(
-; CHECK: objectsize.i32
+; CHECK: objectsize.i32.p0i8
 entry:
-  %0 = call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([0 x i8]* @window, i32 0, i32 10), i1 false) ; <i64> [#uses=1]
+  %0 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([0 x i8]* @window, i32 0, i32 10), i1 false) ; <i64> [#uses=1]
   %1 = icmp eq i32 %0, -1                         ; <i1> [#uses=1]
   br i1 %1, label %"47", label %"46"
 
@@ -68,7 +67,7 @@ entry:
 define i32 @test2() nounwind {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT: ret i32 34
-  %1 = call i32 @llvm.objectsize.i32(i8* getelementptr (i8* bitcast ([9 x i32]* @.str5 to i8*), i32 2), i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr (i8* bitcast ([9 x i32]* @.str5 to i8*), i32 2), i1 false)
   ret i32 %1
 }
 
@@ -77,7 +76,7 @@ define i32 @test2() nounwind {
 
 declare i8* @__memcpy_chk(i8*, i8*, i32, i32) nounwind
 
-declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readonly
 
 declare i8* @__inline_memcpy_chk(i8*, i8*, i32) nounwind inlinehint
 
@@ -89,7 +88,7 @@ entry:
 bb11:
   %0 = getelementptr inbounds float* getelementptr inbounds ([480 x float]* @array, i32 0, i32 128), i32 -127 ; <float*> [#uses=1]
   %1 = bitcast float* %0 to i8*                   ; <i8*> [#uses=1]
-  %2 = call i32 @llvm.objectsize.i32(i8* %1, i1 false) ; <i32> [#uses=1]
+  %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %1, i1 false) ; <i32> [#uses=1]
   %3 = call i8* @__memcpy_chk(i8* undef, i8* undef, i32 512, i32 %2) nounwind ; <i8*> [#uses=0]
 ; CHECK: unreachable
   unreachable
@@ -111,7 +110,7 @@ define i32 @test4(i8** %esc) nounwind ssp {
 entry:
   %0 = alloca %struct.data, align 8
   %1 = bitcast %struct.data* %0 to i8*
-  %2 = call i32 @llvm.objectsize.i32(i8* %1, i1 false) nounwind
+  %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %1, i1 false) nounwind
 ; CHECK-NOT: @llvm.objectsize
 ; CHECK: @llvm.memset.p0i8.i32(i8* %1, i8 0, i32 1824, i32 8, i1 false)
   %3 = call i8* @__memset_chk(i8* %1, i32 0, i32 1824, i32 %2) nounwind
@@ -126,7 +125,7 @@ define i8* @test5(i32 %n) nounwind ssp {
 ; CHECK-LABEL: @test5(
 entry:
   %0 = tail call noalias i8* @malloc(i32 20) nounwind
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %0, i1 false)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %0, i1 false)
   %2 = load i8** @s, align 8
 ; CHECK-NOT: @llvm.objectsize
 ; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 10, i32 1, i1 false)
@@ -138,7 +137,7 @@ define void @test6(i32 %n) nounwind ssp {
 ; CHECK-LABEL: @test6(
 entry:
   %0 = tail call noalias i8* @malloc(i32 20) nounwind
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %0, i1 false)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %0, i1 false)
   %2 = load i8** @s, align 8
 ; CHECK-NOT: @llvm.objectsize
 ; CHECK: @__memcpy_chk(i8* %0, i8* %1, i32 30, i32 20)
@@ -155,7 +154,7 @@ define i32 @test7(i8** %esc) {
   %alloc = call noalias i8* @malloc(i32 48) nounwind
   store i8* %alloc, i8** %esc
   %gep = getelementptr inbounds i8* %alloc, i32 16
-  %objsize = call i32 @llvm.objectsize.i32(i8* %gep, i1 false) nounwind readonly
+  %objsize = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 false) nounwind readonly
 ; CHECK: ret i32 32
   ret i32 %objsize
 }
@@ -167,7 +166,7 @@ define i32 @test8(i8** %esc) {
   %alloc = call noalias i8* @calloc(i32 5, i32 7) nounwind
   store i8* %alloc, i8** %esc
   %gep = getelementptr inbounds i8* %alloc, i32 5
-  %objsize = call i32 @llvm.objectsize.i32(i8* %gep, i1 false) nounwind readonly
+  %objsize = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 false) nounwind readonly
 ; CHECK: ret i32 30
   ret i32 %objsize
 }
@@ -179,7 +178,7 @@ declare noalias i8* @strndup(i8* nocapture, i32) nounwind
 define i32 @test9(i8** %esc) {
   %call = tail call i8* @strdup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0)) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
 ; CHECK: ret i32 8
   ret i32 %1
 }
@@ -188,7 +187,7 @@ define i32 @test9(i8** %esc) {
 define i32 @test10(i8** %esc) {
   %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 3) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
 ; CHECK: ret i32 4
   ret i32 %1
 }
@@ -197,7 +196,7 @@ define i32 @test10(i8** %esc) {
 define i32 @test11(i8** %esc) {
   %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 7) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
 ; CHECK: ret i32 8
   ret i32 %1
 }
@@ -206,7 +205,7 @@ define i32 @test11(i8** %esc) {
 define i32 @test12(i8** %esc) {
   %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 8) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
 ; CHECK: ret i32 8
   ret i32 %1
 }
@@ -215,7 +214,7 @@ define i32 @test12(i8** %esc) {
 define i32 @test13(i8** %esc) {
   %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 57) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
 ; CHECK: ret i32 8
   ret i32 %1
 }
@@ -229,8 +228,8 @@ entry:
 xpto:
   %select = select i1 %bool, i8* %select, i8* %a
   %select2 = select i1 %bool, i8* %a, i8* %select2
-  %0 = tail call i32 @llvm.objectsize.i32(i8* %select, i1 true)
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %select2, i1 true)
+  %0 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %select, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %select2, i1 true)
   %2 = add i32 %0, %1
 ; CHECK: ret i32 undef
   ret i32 %2
@@ -249,7 +248,7 @@ entry:
 xpto:
   %gep2 = getelementptr i8* %gep, i32 1
   %gep = getelementptr i8* %gep2, i32 1
-  %o = call i32 @llvm.objectsize.i32(i8* %gep, i1 true)
+  %o = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 true)
 ; CHECK: ret i32 undef
   ret i32 %o
 
@@ -263,7 +262,7 @@ return:
 ; CHECK-NEXT: ret i32 60
 define i32 @test18() {
   %bc = bitcast [60 x i8]* @globalalias to i8*
-  %1 = call i32 @llvm.objectsize.i32(i8* %bc, i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
   ret i32 %1
 }
 
@@ -273,6 +272,7 @@ define i32 @test18() {
 ; CHECK: llvm.objectsize
 define i32 @test19() {
   %bc = bitcast [60 x i8]* @globalalias2 to i8*
-  %1 = call i32 @llvm.objectsize.i32(i8* %bc, i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
   ret i32 %1
 }
+
diff --git a/test/Transforms/InstCombine/onehot_merge.ll b/test/Transforms/InstCombine/onehot_merge.ll
new file mode 100644
index 0000000..51f955c
--- /dev/null
+++ b/test/Transforms/InstCombine/onehot_merge.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+;CHECK: @and_consts
+;CHECK: and i32 %k, 12
+;CHECK: icmp ne i32 %0, 12
+;CHECK: ret
+define i1 @and_consts(i32 %k, i32 %c1, i32 %c2) {
+bb:
+  %tmp1 = and i32 4, %k
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp5 = and i32 8, %k
+  %tmp6 = icmp eq i32 %tmp5, 0
+  %or = or i1 %tmp2, %tmp6
+  ret i1 %or
+}
+
+;CHECK: @foo1_and
+;CHECK:  shl i32 1, %c1
+;CHECK-NEXT:  shl i32 1, %c2
+;CHECK-NEXT:  or i32
+;CHECK-NEXT:  and i32
+;CHECK-NEXT:  icmp ne i32 %1, %0
+;CHECK: ret
+define i1 @foo1_and(i32 %k, i32 %c1, i32 %c2) {
+bb:
+  %tmp = shl i32 1, %c1
+  %tmp4 = shl i32 1, %c2
+  %tmp1 = and i32 %tmp, %k
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp5 = and i32 %tmp4, %k
+  %tmp6 = icmp eq i32 %tmp5, 0
+  %or = or i1 %tmp2, %tmp6
+  ret i1 %or
+}
+
diff --git a/test/Transforms/InstCombine/phi-select-constexpr.ll b/test/Transforms/InstCombine/phi-select-constexpr.ll
new file mode 100644
index 0000000..054e069
--- /dev/null
+++ b/test/Transforms/InstCombine/phi-select-constexpr.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -S -instcombine | FileCheck %s
+@A = extern_weak global i32, align 4
+@B = extern_weak global i32, align 4
+
+define i32 @foo(i1 %which) {
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+; CHECK-LABEL: final:
+; CHECK: phi i32 [ 1, %entry ], [ select (i1 icmp eq (i32* @A, i32* @B), i32 2, i32 1), %delay ]
+final:
+  %use2 = phi i1 [ false, %entry ], [ icmp eq (i32* @A, i32* @B), %delay ]
+  %value = select i1 %use2, i32 2, i32 1
+  ret i32 %value
+}
+
diff --git a/test/Transforms/InstCombine/pow-1.ll b/test/Transforms/InstCombine/pow-1.ll
index 0fdafeb..9f1d073 100644
--- a/test/Transforms/InstCombine/pow-1.ll
+++ b/test/Transforms/InstCombine/pow-1.ll
@@ -151,4 +151,17 @@ define double @test_simplify16(double %x) {
 ; CHECK-NEXT: ret double [[RECIPROCAL]]
 }
 
+declare double @llvm.pow.f64(double %Val, double %Power)
+define double @test_simplify17(double %x) {
+; CHECK-LABEL: @test_simplify17(
+  %retval = call double @llvm.pow.f64(double %x, double 0.5)
+; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) [[NUW_RO]]
+; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) [[NUW_RO]]
+; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq double %x, 0xFFF0000000000000
+; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], double 0x7FF0000000000000, double [[FABS]]
+  ret double %retval
+; CHECK-NEXT: ret double [[SELECT]]
+}
+
 ; CHECK: attributes [[NUW_RO]] = { nounwind readonly }
+
diff --git a/test/Transforms/InstCombine/pow-3.ll b/test/Transforms/InstCombine/pow-3.ll
new file mode 100644
index 0000000..1c5cf91
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-3.ll
@@ -0,0 +1,12 @@
+; Test that the pow won't get simplified to sqrt(fabs) when they are not available.
+;
+; RUN: opt < %s -disable-simplify-libcalls -instcombine -S | FileCheck %s
+
+declare double @llvm.pow.f64(double %Val, double %Power)
+
+define double @test_simplify_unavailable(double %x) {
+; CHECK-LABEL: @test_simplify_unavailable(
+  %retval = call double @llvm.pow.f64(double %x, double 0.5)
+; CHECK-NEXT: call double @llvm.pow.f64(double %x, double 5.000000e-01)
+  ret double %retval
+}
diff --git a/test/Transforms/InstCombine/pr17827.ll b/test/Transforms/InstCombine/pr17827.ll
new file mode 100644
index 0000000..a8b5926
--- /dev/null
+++ b/test/Transforms/InstCombine/pr17827.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; With left shift, the comparison should not be modified.
+; CHECK-LABEL: @test_shift_and_cmp_not_changed1(
+; CHECK: icmp slt i8 %andp, 32
+define i1 @test_shift_and_cmp_not_changed1(i8 %p) #0 {
+entry:
+  %shlp = shl i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
+; With arithmetic right shift, the comparison should not be modified.
+; CHECK-LABEL: @test_shift_and_cmp_not_changed2(
+; CHECK: icmp slt i8 %andp, 32
+define i1 @test_shift_and_cmp_not_changed2(i8 %p) #0 {
+entry:
+  %shlp = ashr i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
+; This should simplify functionally to the left shift case.
+; The extra input parameter should be optimized away.
+; CHECK-LABEL: @test_shift_and_cmp_changed1(
+; CHECK:  %andp = shl i8 %p, 5
+; CHECK-NEXT: %shl = and i8 %andp, -64
+; CHECK-NEXT:  %cmp = icmp slt i8 %shl, 32
+define i1 @test_shift_and_cmp_changed1(i8 %p, i8 %q) #0 {
+entry:
+  %andp = and i8 %p, 6
+  %andq = and i8 %q, 8
+  %or = or i8 %andq, %andp
+  %shl = shl i8 %or, 5
+  %ashr = ashr i8 %shl, 5
+  %cmp = icmp slt i8 %ashr, 1
+  ret i1 %cmp
+}
+
+; Unsigned compare allows a transformation to compare against 0.
+; CHECK-LABEL: @test_shift_and_cmp_changed2(
+; CHECK: icmp eq i8 %andp, 0
+define i1 @test_shift_and_cmp_changed2(i8 %p) #0 {
+entry:
+  %shlp = shl i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp ult i8 %andp, 32
+  ret i1 %cmp
+}
+
+; nsw on the shift should not affect the comparison.
+; CHECK-LABEL: @test_shift_and_cmp_changed3(
+; CHECK: icmp slt i8 %andp, 32
+define i1 @test_shift_and_cmp_changed3(i8 %p) #0 {
+entry:
+  %shlp = shl nsw i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
+; Logical shift right allows a return true because the 'and' guarantees no bits are set.
+; CHECK-LABEL: @test_shift_and_cmp_changed4(
+; CHECK: ret i1 true
+define i1 @test_shift_and_cmp_changed4(i8 %p) #0 {
+entry:
+  %shlp = lshr i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
diff --git a/test/Transforms/InstCombine/printf-1.ll b/test/Transforms/InstCombine/printf-1.ll
index 59d0f16..c98ddd5 100644
--- a/test/Transforms/InstCombine/printf-1.ll
+++ b/test/Transforms/InstCombine/printf-1.ll
@@ -1,7 +1,7 @@
 ; Test that the printf library call simplifier works correctly.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=IPRINTF
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=CHECK-IPRINTF
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
diff --git a/test/Transforms/InstCombine/select-2.ll b/test/Transforms/InstCombine/select-2.ll
index a76addc..5b9deb4 100644
--- a/test/Transforms/InstCombine/select-2.ll
+++ b/test/Transforms/InstCombine/select-2.ll
@@ -1,4 +1,7 @@
-; RUN: opt < %s -instcombine -S | grep select | count 2
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK: select
+; CHECK: select
 
 ; Make sure instcombine don't fold select into operands. We don't want to emit
 ; select of two integers unless it's selecting 0 / 1.
diff --git a/test/Transforms/InstCombine/select-extractelement.ll b/test/Transforms/InstCombine/select-extractelement.ll
new file mode 100644
index 0000000..e7ea851
--- /dev/null
+++ b/test/Transforms/InstCombine/select-extractelement.ll
@@ -0,0 +1,102 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare void @v4float_user(<4 x float>) #0
+
+
+
+define float @extract_one_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
+; CHECK-LABEL: @extract_one_select(
+; CHECK-NOT: select i1 {{.*}}, <4 x float>
+  %cmp = icmp ne i32 %c, 0
+  %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
+  %extract = extractelement <4 x float> %sel, i32 2
+  ret float %extract
+}
+
+; Multiple extractelements
+define <2 x float> @extract_two_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
+; CHECK-LABEL: @extract_two_select(
+; CHECK: select i1 {{.*}}, <4 x float>
+  %cmp = icmp ne i32 %c, 0
+  %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
+  %extract1 = extractelement <4 x float> %sel, i32 1
+  %extract2 = extractelement <4 x float> %sel, i32 2
+  %build1 = insertelement <2 x float> undef, float %extract1, i32 0
+  %build2 = insertelement <2 x float> %build1, float %extract2, i32 1
+  ret <2 x float> %build2
+}
+
+; Select has an extra non-extractelement user, don't change it
+define float @extract_one_select_user(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
+; CHECK-LABEL: @extract_one_select_user(
+; CHECK: select i1 {{.*}}, <4 x float>
+  %cmp = icmp ne i32 %c, 0
+  %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
+  %extract = extractelement <4 x float> %sel, i32 2
+  call void @v4float_user(<4 x float> %sel)
+  ret float %extract
+}
+
+define float @extract_one_vselect_user(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @extract_one_vselect_user(
+; CHECK: select <4 x i1> {{.*}}, <4 x float>
+  %cmp = icmp ne <4 x i32> %c, zeroinitializer
+  %sel = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
+  %extract = extractelement <4 x float> %sel, i32 2
+  call void @v4float_user(<4 x float> %sel)
+  ret float %extract
+}
+
+; Extract from a vector select
+define float @extract_one_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @extract_one_vselect(
+; CHECK-NOT: select <4 x i1>
+  %cmp = icmp ne <4 x i32> %c, zeroinitializer
+  %select = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
+  %extract = extractelement <4 x float> %select, i32 0
+  ret float %extract
+}
+
+; Multiple extractelements from a vector select
+define <2 x float> @extract_two_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @extract_two_vselect(
+; CHECK-NOT: select i1 {{.*}}, <4 x float>
+  %cmp = icmp ne <4 x i32> %c, zeroinitializer
+  %sel = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
+  %extract1 = extractelement <4 x float> %sel, i32 1
+  %extract2 = extractelement <4 x float> %sel, i32 2
+  %build1 = insertelement <2 x float> undef, float %extract1, i32 0
+  %build2 = insertelement <2 x float> %build1, float %extract2, i32 1
+  ret <2 x float> %build2
+}
+
+; All the vector selects should be decomposed into scalar selects
+; Test multiple extractelements
+define <4 x float> @simple_vector_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_vector_select(
+; CHECK-NOT: select i1 {{.*}}, <4 x float>
+entry:
+  %0 = extractelement <4 x i32> %c, i32 0
+  %tobool = icmp ne i32 %0, 0
+  %a.sink = select i1 %tobool, <4 x float> %a, <4 x float> %b
+  %1 = extractelement <4 x float> %a.sink, i32 0
+  %2 = insertelement <4 x float> undef, float %1, i32 0
+  %3 = extractelement <4 x i32> %c, i32 1
+  %tobool1 = icmp ne i32 %3, 0
+  %a.sink1 = select i1 %tobool1, <4 x float> %a, <4 x float> %b
+  %4 = extractelement <4 x float> %a.sink1, i32 1
+  %5 = insertelement <4 x float> %2, float %4, i32 1
+  %6 = extractelement <4 x i32> %c, i32 2
+  %tobool6 = icmp ne i32 %6, 0
+  %a.sink2 = select i1 %tobool6, <4 x float> %a, <4 x float> %b
+  %7 = extractelement <4 x float> %a.sink2, i32 2
+  %8 = insertelement <4 x float> %5, float %7, i32 2
+  %9 = extractelement <4 x i32> %c, i32 3
+  %tobool11 = icmp ne i32 %9, 0
+  %a.sink3 = select i1 %tobool11, <4 x float> %a, <4 x float> %b
+  %10 = extractelement <4 x float> %a.sink3, i32 3
+  %11 = insertelement <4 x float> %8, float %10, i32 3
+  ret <4 x float> %11
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index c7809f7..1458bde 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -986,6 +986,16 @@ define i32 @select_icmp_ne_0_and_8_or_1073741824(i8 %x, i32 %y) {
   ret i32 %select
 }
 
+; We can't combine here, because the cmp is scalar and the or vector.
+; Just make sure we don't assert.
+define <2 x i32> @select_icmp_eq_and_1_0_or_vector_of_2s(i32 %x, <2 x i32> %y) {
+  %and = and i32 %x, 1
+  %cmp = icmp eq i32 %and, 0
+  %or = or <2 x i32> %y, <i32 2, i32 2>
+  %select = select i1 %cmp, <2 x i32> %y, <2 x i32> %or
+  ret <2 x i32> %select
+}
+
 define i32 @test65(i64 %x) {
   %1 = and i64 %x, 16
   %2 = icmp ne i64 %1, 0
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 0bdab13..b1082f0 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -744,3 +744,39 @@ define i32 @test62(i32 %x) {
 ; CHECK-LABEL: @test62(
 ; CHECK: ashr exact i32 %x, 3
 }
+
+; PR17026
+; CHECK-LABEL: @test63(
+; CHECK-NOT: sh
+; CHECK: ret
+define void @test63(i128 %arg) {
+bb:
+  br i1 undef, label %bb1, label %bb12
+
+bb1:                                              ; preds = %bb11, %bb
+  br label %bb2
+
+bb2:                                              ; preds = %bb7, %bb1
+  br i1 undef, label %bb3, label %bb7
+
+bb3:                                              ; preds = %bb2
+  %tmp = lshr i128 %arg, 36893488147419103232
+  %tmp4 = shl i128 %tmp, 0
+  %tmp5 = or i128 %tmp4, undef
+  %tmp6 = trunc i128 %tmp5 to i16
+  br label %bb8
+
+bb7:                                              ; preds = %bb2
+  br i1 undef, label %bb8, label %bb2
+
+bb8:                                              ; preds = %bb7, %bb3
+  %tmp9 = phi i16 [ %tmp6, %bb3 ], [ undef, %bb7 ]
+  %tmp10 = icmp eq i16 %tmp9, 0
+  br i1 %tmp10, label %bb11, label %bb12
+
+bb11:                                             ; preds = %bb8
+  br i1 undef, label %bb1, label %bb12
+
+bb12:                                             ; preds = %bb11, %bb8, %bb
+  ret void
+}
diff --git a/test/Transforms/InstCombine/sincospi.ll b/test/Transforms/InstCombine/sincospi.ll
new file mode 100644
index 0000000..0d1a602
--- /dev/null
+++ b/test/Transforms/InstCombine/sincospi.ll
@@ -0,0 +1,91 @@
+; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.9 | FileCheck %s --check-prefix=CHECK-FLOAT-IN-VEC
+; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios7.0 | FileCheck %s
+; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.8 | FileCheck %s --check-prefix=CHECK-NO-SINCOS
+; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios6.0 | FileCheck %s --check-prefix=CHECK-NO-SINCOS
+; RUN: opt -instcombine -S < %s -mtriple=x86_64-none-linux-gnu | FileCheck %s --check-prefix=CHECK-NO-SINCOS
+
+
+attributes #0 = { readnone nounwind }
+
+declare float @__sinpif(float %x) #0
+declare float @__cospif(float %x) #0 
+
+declare double @__sinpi(double %x) #0
+declare double @__cospi(double %x) #0 
+
+@var32 = global float 0.0
+@var64 = global double 0.0
+
+define float @test_instbased_f32() {
+       %val = load float* @var32
+       %sin = call float @__sinpif(float %val) #0
+       %cos = call float @__cospif(float %val) #0
+       %res = fadd float %sin, %cos
+       ret float %res
+; CHECK-FLOAT-IN-VEC: [[VAL:%[a-z0-9]+]] = load float* @var32
+; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospi_stretf(float [[VAL]])
+; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 0
+; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 1
+
+; CHECK: [[VAL:%[a-z0-9]+]] = load float* @var32
+; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospi_stretf(float [[VAL]])
+; CHECK: extractvalue { float, float } [[SINCOS]], 0
+; CHECK: extractvalue { float, float } [[SINCOS]], 1
+
+; CHECK-NO-SINCOS: call float @__sinpif
+; CHECK-NO-SINCOS: call float @__cospif
+}
+
+define float @test_constant_f32() {
+       %sin = call float @__sinpif(float 1.0) #0
+       %cos = call float @__cospif(float 1.0) #0
+       %res = fadd float %sin, %cos
+       ret float %res
+; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospi_stretf(float 1.000000e+00)
+; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 0
+; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 1
+
+; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospi_stretf(float 1.000000e+00)
+; CHECK: extractvalue { float, float } [[SINCOS]], 0
+; CHECK: extractvalue { float, float } [[SINCOS]], 1
+
+; CHECK-NO-SINCOS: call float @__sinpif
+; CHECK-NO-SINCOS: call float @__cospif
+}
+
+define double @test_instbased_f64() {
+       %val = load double* @var64
+       %sin = call double @__sinpi(double %val) #0
+       %cos = call double @__cospi(double %val) #0
+       %res = fadd double %sin, %cos
+       ret double %res
+; CHECK-FLOAT-IN-VEC: [[VAL:%[a-z0-9]+]] = load double* @var64
+; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double [[VAL]])
+; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 0
+; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 1
+
+; CHECK: [[VAL:%[a-z0-9]+]] = load double* @var64
+; CHECK: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double [[VAL]])
+; CHECK: extractvalue { double, double } [[SINCOS]], 0
+; CHECK: extractvalue { double, double } [[SINCOS]], 1
+
+; CHECK-NO-SINCOS: call double @__sinpi
+; CHECK-NO-SINCOS: call double @__cospi
+}
+
+define double @test_constant_f64() {
+       %sin = call double @__sinpi(double 1.0) #0
+       %cos = call double @__cospi(double 1.0) #0
+       %res = fadd double %sin, %cos
+       ret double %res
+; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double 1.000000e+00)
+; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 0
+; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 1
+
+; CHECK: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double 1.000000e+00)
+; CHECK: extractvalue { double, double } [[SINCOS]], 0
+; CHECK: extractvalue { double, double } [[SINCOS]], 1
+
+; CHECK-NO-SINCOS: call double @__sinpi
+; CHECK-NO-SINCOS: call double @__cospi
+}
diff --git a/test/Transforms/InstCombine/sprintf-1.ll b/test/Transforms/InstCombine/sprintf-1.ll
index 6d0ab13..78dd7aa 100644
--- a/test/Transforms/InstCombine/sprintf-1.ll
+++ b/test/Transforms/InstCombine/sprintf-1.ll
@@ -1,7 +1,7 @@
 ; Test that the sprintf library call simplifier works correctly.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=IPRINTF
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=CHECK-IPRINTF
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
diff --git a/test/Transforms/InstCombine/store.ll b/test/Transforms/InstCombine/store.ll
index 9b666b9..b64c800 100644
--- a/test/Transforms/InstCombine/store.ll
+++ b/test/Transforms/InstCombine/store.ll
@@ -113,7 +113,8 @@ for.end:                                          ; preds = %for.cond
 ; CHECK-NEXT: store i32 %storemerge, i32* %gi, align 4, !tbaa !0
 }
 
-!0 = metadata !{metadata !"int", metadata !1}
+!0 = metadata !{metadata !4, metadata !4, i64 0}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA"}
 !3 = metadata !{metadata !"float", metadata !1}
+!4 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Transforms/InstCombine/stpcpy_chk-1.ll b/test/Transforms/InstCombine/stpcpy_chk-1.ll
index a6d5585..8a02529 100644
--- a/test/Transforms/InstCombine/stpcpy_chk-1.ll
+++ b/test/Transforms/InstCombine/stpcpy_chk-1.ll
@@ -61,7 +61,7 @@ define i8* @test_simplify5() {
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
 ; CHECK: @__memcpy_chk
-  %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
   %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 %len)
 ; CHECK: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 11)
   ret i8* %ret
@@ -75,7 +75,7 @@ define i8* @test_simplify6() {
 
 ; CHECK: [[LEN:%[a-z]+]] = call i32 @strlen
 ; CHECK-NEXT: getelementptr inbounds [60 x i8]* @a, i32 0, i32 [[LEN]]
-  %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
   %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %dst, i32 %len)
   ret i8* %ret
 }
@@ -93,4 +93,4 @@ define void @test_no_simplify1() {
 }
 
 declare i8* @__stpcpy_chk(i8*, i8*, i32) nounwind
-declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readonly
diff --git a/test/Transforms/InstCombine/strchr-1.ll b/test/Transforms/InstCombine/strchr-1.ll
index 5efab9e..d2c9894 100644
--- a/test/Transforms/InstCombine/strchr-1.ll
+++ b/test/Transforms/InstCombine/strchr-1.ll
@@ -52,3 +52,14 @@ define void @test_simplify4(i32 %chr) {
   store i8* %dst, i8** @chp
   ret void
 }
+
+define void @test_simplify5() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8]* @hello, i32 0, i32 13)
+; CHECK-NOT: call i8* @strchr
+; CHECK: ret void
+
+  %src = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strchr(i8* %src, i32 65280)
+  store i8* %dst, i8** @chp
+  ret void
+}
diff --git a/test/Transforms/InstCombine/strcpy_chk-1.ll b/test/Transforms/InstCombine/strcpy_chk-1.ll
index 5b98cf8..8e7fec7 100644
--- a/test/Transforms/InstCombine/strcpy_chk-1.ll
+++ b/test/Transforms/InstCombine/strcpy_chk-1.ll
@@ -61,7 +61,7 @@ define void @test_simplify5() {
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
 ; CHECK: @__memcpy_chk
-  %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
   call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 %len)
   ret void
 }
@@ -73,7 +73,7 @@ define i8* @test_simplify6() {
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
 
 ; CHECK: getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0)
-  %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
   %ret = call i8* @__strcpy_chk(i8* %dst, i8* %dst, i32 %len)
   ret i8* %ret
 }
@@ -91,4 +91,4 @@ define void @test_no_simplify1() {
 }
 
 declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind
-declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readonly
diff --git a/test/Transforms/InstCombine/strrchr-1.ll b/test/Transforms/InstCombine/strrchr-1.ll
index a0bdb22..4615f5f 100644
--- a/test/Transforms/InstCombine/strrchr-1.ll
+++ b/test/Transforms/InstCombine/strrchr-1.ll
@@ -42,6 +42,17 @@ define void @test_simplify3() {
   ret void
 }
 
+define void @test_simplify4() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8]* @hello, i32 0, i32 13)
+; CHECK-NOT: call i8* @strrchr
+; CHECK: ret void
+
+  %src = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strrchr(i8* %src, i32 65280)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
 define void @test_nosimplify1(i32 %chr) {
 ; CHECK-LABEL: @test_nosimplify1(
 ; CHECK: call i8* @strrchr
diff --git a/test/Transforms/InstCombine/struct-assign-tbaa.ll b/test/Transforms/InstCombine/struct-assign-tbaa.ll
index d7a26fa..c80e31a 100644
--- a/test/Transforms/InstCombine/struct-assign-tbaa.ll
+++ b/test/Transforms/InstCombine/struct-assign-tbaa.ll
@@ -35,10 +35,12 @@ define i32 (i8*, i32*, double*)*** @test2() {
   ret i32 (i8*, i32*, double*)*** %tmp2
 }
 
-; CHECK: !0 = metadata !{metadata !"float", metadata !1}
+; CHECK: !0 = metadata !{metadata !1, metadata !1, i64 0}
+; CHECK: !1 = metadata !{metadata !"float", metadata !2}
 
 !0 = metadata !{metadata !"Simple C/C++ TBAA"}
 !1 = metadata !{metadata !"omnipotent char", metadata !0}
-!2 = metadata !{metadata !"float", metadata !0}
+!2 = metadata !{metadata !5, metadata !5, i64 0}
 !3 = metadata !{i64 0, i64 4, metadata !2}
 !4 = metadata !{i64 0, i64 8, null}
+!5 = metadata !{metadata !"float", metadata !0}
diff --git a/test/Transforms/InstCombine/sub.ll b/test/Transforms/InstCombine/sub.ll
index 5449656..36c523b 100644
--- a/test/Transforms/InstCombine/sub.ll
+++ b/test/Transforms/InstCombine/sub.ll
@@ -1,34 +1,34 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
 ; Optimize subtracts.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i32 @test1(i32 %A) {
-	%B = sub i32 %A, %A	
+	%B = sub i32 %A, %A
 	ret i32 %B
 ; CHECK-LABEL: @test1(
 ; CHECK: ret i32 0
 }
 
 define i32 @test2(i32 %A) {
-	%B = sub i32 %A, 0	
+	%B = sub i32 %A, 0
 	ret i32 %B
 ; CHECK-LABEL: @test2(
 ; CHECK: ret i32 %A
 }
 
 define i32 @test3(i32 %A) {
-	%B = sub i32 0, %A	
-	%C = sub i32 0, %B	
+	%B = sub i32 0, %A
+	%C = sub i32 0, %B
 	ret i32 %C
 ; CHECK-LABEL: @test3(
 ; CHECK: ret i32 %A
 }
 
 define i32 @test4(i32 %A, i32 %x) {
-	%B = sub i32 0, %A	
-	%C = sub i32 %x, %B	
+	%B = sub i32 0, %A
+	%C = sub i32 %x, %B
 	ret i32 %C
 ; CHECK-LABEL: @test4(
 ; CHECK: %C = add i32 %x, %A
@@ -36,8 +36,8 @@ define i32 @test4(i32 %A, i32 %x) {
 }
 
 define i32 @test5(i32 %A, i32 %B, i32 %C) {
-	%D = sub i32 %B, %C	
-	%E = sub i32 %A, %D	
+	%D = sub i32 %B, %C
+	%E = sub i32 %A, %D
 	ret i32 %E
 ; CHECK-LABEL: @test5(
 ; CHECK: %D1 = sub i32 %C, %B
@@ -46,17 +46,17 @@ define i32 @test5(i32 %A, i32 %B, i32 %C) {
 }
 
 define i32 @test6(i32 %A, i32 %B) {
-	%C = and i32 %A, %B	
-	%D = sub i32 %A, %C	
+	%C = and i32 %A, %B
+	%D = sub i32 %A, %C
 	ret i32 %D
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT: xor i32 %B, -1
-; CHECK-NEXT: %D = and i32 
+; CHECK-NEXT: %D = and i32
 ; CHECK-NEXT: ret i32 %D
 }
 
 define i32 @test7(i32 %A) {
-	%B = sub i32 -1, %A	
+	%B = sub i32 -1, %A
 	ret i32 %B
 ; CHECK-LABEL: @test7(
 ; CHECK: %B = xor i32 %A, -1
@@ -64,8 +64,8 @@ define i32 @test7(i32 %A) {
 }
 
 define i32 @test8(i32 %A) {
-	%B = mul i32 9, %A	
-	%C = sub i32 %B, %A	
+	%B = mul i32 9, %A
+	%C = sub i32 %B, %A
 	ret i32 %C
 ; CHECK-LABEL: @test8(
 ; CHECK: %C = shl i32 %A, 3
@@ -73,8 +73,8 @@ define i32 @test8(i32 %A) {
 }
 
 define i32 @test9(i32 %A) {
-	%B = mul i32 3, %A	
-	%C = sub i32 %A, %B	
+	%B = mul i32 3, %A
+	%C = sub i32 %A, %B
 	ret i32 %C
 ; CHECK-LABEL: @test9(
 ; CHECK: %C = mul i32 %A, -2
@@ -82,9 +82,9 @@ define i32 @test9(i32 %A) {
 }
 
 define i32 @test10(i32 %A, i32 %B) {
-	%C = sub i32 0, %A	
-	%D = sub i32 0, %B	
-	%E = mul i32 %C, %D	
+	%C = sub i32 0, %A
+	%D = sub i32 0, %B
+	%E = mul i32 %C, %D
 	ret i32 %E
 ; CHECK-LABEL: @test10(
 ; CHECK: %E = mul i32 %A, %B
@@ -92,8 +92,8 @@ define i32 @test10(i32 %A, i32 %B) {
 }
 
 define i32 @test10a(i32 %A) {
-	%C = sub i32 0, %A	
-	%E = mul i32 %C, 7	
+	%C = sub i32 0, %A
+	%E = mul i32 %C, 7
 	ret i32 %E
 ; CHECK-LABEL: @test10a(
 ; CHECK: %E = mul i32 %A, -7
@@ -101,8 +101,8 @@ define i32 @test10a(i32 %A) {
 }
 
 define i1 @test11(i8 %A, i8 %B) {
-	%C = sub i8 %A, %B	
-	%cD = icmp ne i8 %C, 0	
+	%C = sub i8 %A, %B
+	%cD = icmp ne i8 %C, 0
 	ret i1 %cD
 ; CHECK-LABEL: @test11(
 ; CHECK: %cD = icmp ne i8 %A, %B
@@ -110,8 +110,8 @@ define i1 @test11(i8 %A, i8 %B) {
 }
 
 define i32 @test12(i32 %A) {
-	%B = ashr i32 %A, 31	
-	%C = sub i32 0, %B	
+	%B = ashr i32 %A, 31
+	%C = sub i32 0, %B
 	ret i32 %C
 ; CHECK-LABEL: @test12(
 ; CHECK: %C = lshr i32 %A, 31
@@ -119,8 +119,8 @@ define i32 @test12(i32 %A) {
 }
 
 define i32 @test13(i32 %A) {
-	%B = lshr i32 %A, 31	
-	%C = sub i32 0, %B	
+	%B = lshr i32 %A, 31
+	%C = sub i32 0, %B
 	ret i32 %C
 ; CHECK-LABEL: @test13(
 ; CHECK: %C = ashr i32 %A, 31
@@ -128,9 +128,9 @@ define i32 @test13(i32 %A) {
 }
 
 define i32 @test14(i32 %A) {
-	%B = lshr i32 %A, 31	
-	%C = bitcast i32 %B to i32	
-	%D = sub i32 0, %C	
+	%B = lshr i32 %A, 31
+	%C = bitcast i32 %B to i32
+	%D = sub i32 0, %C
 	ret i32 %D
 ; CHECK-LABEL: @test14(
 ; CHECK: %D = ashr i32 %A, 31
@@ -138,17 +138,17 @@ define i32 @test14(i32 %A) {
 }
 
 define i32 @test15(i32 %A, i32 %B) {
-	%C = sub i32 0, %A	
-	%D = srem i32 %B, %C	
+	%C = sub i32 0, %A
+	%D = srem i32 %B, %C
 	ret i32 %D
 ; CHECK-LABEL: @test15(
-; CHECK: %D = srem i32 %B, %A 
+; CHECK: %D = srem i32 %B, %A
 ; CHECK: ret i32 %D
 }
 
 define i32 @test16(i32 %A) {
-	%X = sdiv i32 %A, 1123	
-	%Y = sub i32 0, %X	
+	%X = sdiv i32 %A, 1123
+	%Y = sub i32 0, %X
 	ret i32 %Y
 ; CHECK-LABEL: @test16(
 ; CHECK: %Y = sdiv i32 %A, -1123
@@ -158,8 +158,8 @@ define i32 @test16(i32 %A) {
 ; Can't fold subtract here because negation it might oveflow.
 ; PR3142
 define i32 @test17(i32 %A) {
-	%B = sub i32 0, %A	
-	%C = sdiv i32 %B, 1234	
+	%B = sub i32 0, %A
+	%C = sdiv i32 %B, 1234
 	ret i32 %C
 ; CHECK-LABEL: @test17(
 ; CHECK: %B = sub i32 0, %A
@@ -168,25 +168,25 @@ define i32 @test17(i32 %A) {
 }
 
 define i64 @test18(i64 %Y) {
-	%tmp.4 = shl i64 %Y, 2	
-	%tmp.12 = shl i64 %Y, 2	
-	%tmp.8 = sub i64 %tmp.4, %tmp.12	
+	%tmp.4 = shl i64 %Y, 2
+	%tmp.12 = shl i64 %Y, 2
+	%tmp.8 = sub i64 %tmp.4, %tmp.12
 	ret i64 %tmp.8
 ; CHECK-LABEL: @test18(
 ; CHECK: ret i64 0
 }
 
 define i32 @test19(i32 %X, i32 %Y) {
-	%Z = sub i32 %X, %Y	
-	%Q = add i32 %Z, %Y	
+	%Z = sub i32 %X, %Y
+	%Q = add i32 %Z, %Y
 	ret i32 %Q
 ; CHECK-LABEL: @test19(
 ; CHECK: ret i32 %X
 }
 
 define i1 @test20(i32 %g, i32 %h) {
-	%tmp.2 = sub i32 %g, %h	
-	%tmp.4 = icmp ne i32 %tmp.2, %g	
+	%tmp.2 = sub i32 %g, %h
+	%tmp.4 = icmp ne i32 %tmp.2, %g
 	ret i1 %tmp.4
 ; CHECK-LABEL: @test20(
 ; CHECK: %tmp.4 = icmp ne i32 %h, 0
@@ -194,8 +194,8 @@ define i1 @test20(i32 %g, i32 %h) {
 }
 
 define i1 @test21(i32 %g, i32 %h) {
-	%tmp.2 = sub i32 %g, %h	
-	%tmp.4 = icmp ne i32 %tmp.2, %g		
+	%tmp.2 = sub i32 %g, %h
+	%tmp.4 = icmp ne i32 %tmp.2, %g
         ret i1 %tmp.4
 ; CHECK-LABEL: @test21(
 ; CHECK: %tmp.4 = icmp ne i32 %h, 0
@@ -204,9 +204,9 @@ define i1 @test21(i32 %g, i32 %h) {
 
 ; PR2298
 define zeroext i1 @test22(i32 %a, i32 %b)  nounwind  {
-	%tmp2 = sub i32 0, %a	
-	%tmp4 = sub i32 0, %b	
-	%tmp5 = icmp eq i32 %tmp2, %tmp4	
+	%tmp2 = sub i32 0, %a
+	%tmp4 = sub i32 0, %b
+	%tmp5 = icmp eq i32 %tmp2, %tmp4
 	ret i1 %tmp5
 ; CHECK-LABEL: @test22(
 ; CHECK: %tmp5 = icmp eq i32 %b, %a
@@ -227,6 +227,19 @@ define i32 @test23(i8* %P, i64 %A){
 ; CHECK-NEXT: ret i32
 }
 
+define i8 @test23_as1(i8 addrspace(1)* %P, i16 %A) {
+; CHECK: @test23_as1
+; CHECK-NEXT: = trunc i16 %A to i8
+; CHECK-NEXT: ret i8
+  %B = getelementptr inbounds i8 addrspace(1)* %P, i16 %A
+  %C = ptrtoint i8 addrspace(1)* %B to i16
+  %D = trunc i16 %C to i8
+  %E = ptrtoint i8 addrspace(1)* %P to i16
+  %F = trunc i16 %E to i8
+  %G = sub i8 %D, %F
+  ret i8 %G
+}
+
 define i64 @test24(i8* %P, i64 %A){
   %B = getelementptr inbounds i8* %P, i64 %A
   %C = ptrtoint i8* %B to i64
@@ -237,6 +250,16 @@ define i64 @test24(i8* %P, i64 %A){
 ; CHECK-NEXT: ret i64 %A
 }
 
+define i16 @test24_as1(i8 addrspace(1)* %P, i16 %A) {
+; CHECK: @test24_as1
+; CHECK-NEXT: ret i16 %A
+  %B = getelementptr inbounds i8 addrspace(1)* %P, i16 %A
+  %C = ptrtoint i8 addrspace(1)* %B to i16
+  %E = ptrtoint i8 addrspace(1)* %P to i16
+  %G = sub i16 %C, %E
+  ret i16 %G
+}
+
 define i64 @test24a(i8* %P, i64 %A){
   %B = getelementptr inbounds i8* %P, i64 %A
   %C = ptrtoint i8* %B to i64
@@ -245,9 +268,21 @@ define i64 @test24a(i8* %P, i64 %A){
   ret i64 %G
 ; CHECK-LABEL: @test24a(
 ; CHECK-NEXT: sub i64 0, %A
-; CHECK-NEXT: ret i64 
+; CHECK-NEXT: ret i64
 }
 
+define i16 @test24a_as1(i8 addrspace(1)* %P, i16 %A) {
+; CHECK: @test24a_as1
+; CHECK-NEXT: sub i16 0, %A
+; CHECK-NEXT: ret i16
+  %B = getelementptr inbounds i8 addrspace(1)* %P, i16 %A
+  %C = ptrtoint i8 addrspace(1)* %B to i16
+  %E = ptrtoint i8 addrspace(1)* %P to i16
+  %G = sub i16 %E, %C
+  ret i16 %G
+}
+
+
 @Arr = external global [42 x i16]
 
 define i64 @test24b(i8* %P, i64 %A){
@@ -257,7 +292,7 @@ define i64 @test24b(i8* %P, i64 %A){
   ret i64 %G
 ; CHECK-LABEL: @test24b(
 ; CHECK-NEXT: shl nuw i64 %A, 1
-; CHECK-NEXT: ret i64 
+; CHECK-NEXT: ret i64
 }
 
 
@@ -269,7 +304,21 @@ define i64 @test25(i8* %P, i64 %A){
 ; CHECK-LABEL: @test25(
 ; CHECK-NEXT: shl nuw i64 %A, 1
 ; CHECK-NEXT: add i64 {{.*}}, -84
-; CHECK-NEXT: ret i64 
+; CHECK-NEXT: ret i64
+}
+
+@Arr_as1 = external addrspace(1) global [42 x i16]
+
+define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) {
+; CHECK: @test25_as1
+; CHECK-NEXT: %1 = trunc i64 %A to i16
+; CHECK-NEXT: shl nuw i16 %1, 1
+; CHECK-NEXT: add i16 {{.*}}, -84
+; CHECK-NEXT: ret i16
+  %B = getelementptr inbounds [42 x i16] addrspace(1)* @Arr_as1, i64 0, i64 %A
+  %C = ptrtoint i16 addrspace(1)* %B to i16
+  %G = sub i16 %C, ptrtoint (i16 addrspace(1)* getelementptr ([42 x i16] addrspace(1)* @Arr_as1, i64 1, i64 0) to i16)
+  ret i16 %G
 }
 
 define i32 @test26(i32 %x) {
@@ -327,3 +376,19 @@ define i64 @test30(i8* %foo, i64 %i, i64 %j) {
 ; CHECK-NEXT: sub i64 %gep1.idx, %j
 ; CHECK-NEXT: ret i64
 }
+
+define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) {
+; CHECK-LABEL: @test30_as1(
+; CHECK-NEXT: %gep1.idx = shl nuw i16 %i, 2
+; CHECK-NEXT: sub i16 %gep1.idx, %j
+; CHECK-NEXT: ret i16
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32 addrspace(1)* %bit, i16 %i
+  %gep2 = getelementptr inbounds i8 addrspace(1)* %foo, i16 %j
+  %cast1 = ptrtoint i32 addrspace(1)* %gep1 to i16
+  %cast2 = ptrtoint i8 addrspace(1)* %gep2 to i16
+  %sub = sub i16 %cast1, %cast2
+  ret i16 %sub
+}
+
+
diff --git a/test/Transforms/InstCombine/vec_extract_elt.ll b/test/Transforms/InstCombine/vec_extract_elt.ll
index 166066a..3daf72e 100644
--- a/test/Transforms/InstCombine/vec_extract_elt.ll
+++ b/test/Transforms/InstCombine/vec_extract_elt.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -instcombine -S | not grep extractelement
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK-NOT: extractelement
 
 define i32 @test(float %f) {
         %tmp7 = insertelement <4 x float> undef, float %f, i32 0                ; <<4 x float>> [#uses=1]
diff --git a/test/Transforms/InstCombine/vec_insertelt.ll b/test/Transforms/InstCombine/vec_insertelt.ll
index e35fa5e..3b94920 100644
--- a/test/Transforms/InstCombine/vec_insertelt.ll
+++ b/test/Transforms/InstCombine/vec_insertelt.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -instcombine -S | grep "ret <4 x i32> %A"
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: ret <4 x i32> %A
 
 ; PR1286
 define <4 x i32> @test1(<4 x i32> %A) {
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index 738e05b..3ee43dc 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -200,3 +200,31 @@ define void @test14(i16 %conv10) {
   %sext = sext <2 x i1> %cmp to <2 x i16>
   ret void
 }
+
+; Check that sequences of insert/extract element are 
+; collapsed into valid shuffle instruction with correct shuffle indexes.
+ 
+define <4 x float> @test15a(<4 x float> %LHS, <4 x float> %RHS) {
+; CHECK-LABEL: @test15a
+; CHECK-NEXT: shufflevector <4 x float> %LHS, <4 x float> %RHS, <4 x i32> <i32 4, i32 0, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x float> %tmp4
+  %tmp1 = extractelement <4 x float> %LHS, i32 0
+  %tmp2 = insertelement <4 x float> %RHS, float %tmp1, i32 1
+  %tmp3 = extractelement <4 x float> %RHS, i32 2
+  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 3
+  ret <4 x float> %tmp4
+}
+ 
+define <4 x float> @test15b(<4 x float> %LHS, <4 x float> %RHS) {
+; CHECK-LABEL: @test15b
+; CHECK-NEXT: shufflevector <4 x float> %LHS, <4 x float> %RHS, <4 x i32> <i32 4, i32 3, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x float> %tmp5
+  %tmp0 = extractelement <4 x float> %LHS, i32 3
+  %tmp1 = insertelement <4 x float> %RHS, float %tmp0, i32 0
+  %tmp2 = extractelement <4 x float> %tmp1, i32 0
+  %tmp3 = insertelement <4 x float> %RHS, float %tmp2, i32 1
+  %tmp4 = extractelement <4 x float> %RHS, i32 2
+  %tmp5 = insertelement <4 x float> %tmp3, float %tmp4, i32 3
+  ret <4 x float> %tmp5
+}
+
diff --git a/test/Transforms/InstCombine/win-math.ll b/test/Transforms/InstCombine/win-math.ll
index df3ac93..e6e79e2 100644
--- a/test/Transforms/InstCombine/win-math.ll
+++ b/test/Transforms/InstCombine/win-math.ll
@@ -273,3 +273,23 @@ define float @float_round(float %x) nounwind readnone {
     ret float %3
 }
 
+declare float @powf(float, float)
+; win32 lacks sqrtf&fabsf, win64 lacks fabsf
+define float @float_powsqrt(float %x) nounwind readnone {
+; WIN32-LABEL: @float_powsqrt(
+; WIN32-NOT: float @sqrtf
+; WIN32: float @powf
+; WIN64-LABEL: @float_powsqrt(
+; WIN64-NOT: float @sqrtf
+; WIN64: float @powf
+; MINGW32-LABEL: @float_powsqrt(
+; MINGW32: float @sqrtf
+; MINGW32: float @fabsf
+; MINGW32-NOT: float @powf
+; MINGW64-LABEL: @float_powsqrt(
+; MINGW64: float @sqrtf
+; MINGW64: float @fabsf
+; MINGW64-NOT: float @powf
+    %1 = call float @powf(float %x, float 0.5)
+    ret float %1
+}
diff --git a/test/Transforms/InstSimplify/call.ll b/test/Transforms/InstSimplify/call.ll
index 3e1621c..fd854c5 100644
--- a/test/Transforms/InstSimplify/call.ll
+++ b/test/Transforms/InstSimplify/call.ll
@@ -101,3 +101,66 @@ define float @test_idempotence(float %a) {
 
   ret float %r4
 }
+
+define i8* @operator_new() {
+entry:
+  %call = tail call noalias i8* @_Znwm(i64 8)
+  %cmp = icmp eq i8* %call, null
+  br i1 %cmp, label %cast.end, label %cast.notnull
+
+cast.notnull:                                     ; preds = %entry
+  %add.ptr = getelementptr inbounds i8* %call, i64 4
+  br label %cast.end
+
+cast.end:                                         ; preds = %cast.notnull, %entry
+  %cast.result = phi i8* [ %add.ptr, %cast.notnull ], [ null, %entry ]
+  ret i8* %cast.result
+
+; CHECK-LABEL: @operator_new
+; CHECK: br i1 false, label %cast.end, label %cast.notnull
+}
+
+declare noalias i8* @_Znwm(i64)
+
+%"struct.std::nothrow_t" = type { i8 }
+@_ZSt7nothrow = external global %"struct.std::nothrow_t"
+
+define i8* @operator_new_nothrow_t() {
+entry:
+  %call = tail call noalias i8* @_ZnamRKSt9nothrow_t(i64 8, %"struct.std::nothrow_t"* @_ZSt7nothrow)
+  %cmp = icmp eq i8* %call, null
+  br i1 %cmp, label %cast.end, label %cast.notnull
+
+cast.notnull:                                     ; preds = %entry
+  %add.ptr = getelementptr inbounds i8* %call, i64 4
+  br label %cast.end
+
+cast.end:                                         ; preds = %cast.notnull, %entry
+  %cast.result = phi i8* [ %add.ptr, %cast.notnull ], [ null, %entry ]
+  ret i8* %cast.result
+
+; CHECK-LABEL: @operator_new_nothrow_t
+; CHECK: br i1 %cmp, label %cast.end, label %cast.notnull
+}
+
+declare i8* @_ZnamRKSt9nothrow_t(i64, %"struct.std::nothrow_t"*) nounwind
+
+define i8* @malloc_can_return_null() {
+entry:
+  %call = tail call noalias i8* @malloc(i64 8)
+  %cmp = icmp eq i8* %call, null
+  br i1 %cmp, label %cast.end, label %cast.notnull
+
+cast.notnull:                                     ; preds = %entry
+  %add.ptr = getelementptr inbounds i8* %call, i64 4
+  br label %cast.end
+
+cast.end:                                         ; preds = %cast.notnull, %entry
+  %cast.result = phi i8* [ %add.ptr, %cast.notnull ], [ null, %entry ]
+  ret i8* %cast.result
+
+; CHECK-LABEL: @malloc_can_return_null
+; CHECK: br i1 %cmp, label %cast.end, label %cast.notnull
+}
+
+declare noalias i8* @malloc(i64)
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index 0957949..abb3869 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -717,3 +717,25 @@ define i1 @alloca_gep(i64 %a, i64 %b) {
   ret i1 %cmp
 ; CHECK-NEXT: ret i1 false
 }
+
+define i1 @non_inbounds_gep_compare(i64* %a) {
+; CHECK-LABEL: @non_inbounds_gep_compare(
+; Equality compares with non-inbounds GEPs can be folded.
+  %x = getelementptr i64* %a, i64 42
+  %y = getelementptr inbounds i64* %x, i64 -42
+  %z = getelementptr i64* %a, i64 -42
+  %w = getelementptr inbounds i64* %z, i64 42
+  %cmp = icmp eq i64* %y, %w
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 true
+}
+
+define i1 @non_inbounds_gep_compare2(i64* %a) {
+; CHECK-LABEL: @non_inbounds_gep_compare2(
+; Equality compares with non-inbounds GEPs can be folded.
+  %x = getelementptr i64* %a, i64 4294967297
+  %y = getelementptr i64* %a, i64 1
+  %cmp = icmp eq i64* %y, %y
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 true
+}
diff --git a/test/Transforms/InstSimplify/lit.local.cfg b/test/Transforms/InstSimplify/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/InstSimplify/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Internalize/2008-05-09-AllButMain.ll b/test/Transforms/Internalize/2008-05-09-AllButMain.ll
deleted file mode 100644
index f75e80d..0000000
--- a/test/Transforms/Internalize/2008-05-09-AllButMain.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; No arguments means internalize everything
-; RUN: opt < %s -internalize -S | FileCheck --check-prefix=NOARGS %s
-
-; Internalize all but foo and j
-; RUN: opt < %s -internalize -internalize-public-api-list foo -internalize-public-api-list j -S | FileCheck --check-prefix=LIST %s
-
-; Non existent files should be treated as if they were empty (so internalize
-; everything)
-; RUN: opt < %s -internalize -internalize-public-api-file /nonexistent/file 2> /dev/null -S | FileCheck --check-prefix=EMPTYFILE %s
-
-; RUN: opt < %s -S -internalize -internalize-public-api-list bar -internalize-public-api-list foo -internalize-public-api-file /nonexistent/file  2> /dev/null | FileCheck --check-prefix=LIST2 %s
-
-; -file and -list options should be merged, the .apifile contains foo and j
-; RUN: opt < %s -internalize -internalize-public-api-list bar -internalize-public-api-file %s.apifile -S | FileCheck --check-prefix=MERGE %s
-
-; NOARGS: @i = internal global
-; LIST: @i = internal global
-; EMPTYFILE: @i = internal global
-; LIST2: @i = internal global
-; MERGE: @i = internal global
-@i = global i32 0
-
-; NOARGS: @j = internal global
-; LIST: @j = global
-; EMPTYFILE: @j = internal global
-; LIST2: @j = internal global
-; MERGE: @j = global
-@j = global i32 0
-
-; NOARGS-LABEL: define internal void @main(
-; LIST-LABEL: define internal void @main(
-; EMPTYFILE-LABEL: define internal void @main(
-; LIST2-LABEL: define internal void @main(
-; MERGE-LABEL: define internal void @main(
-define void @main() {
-        ret void
-}
-
-; NOARGS-LABEL: define internal void @foo(
-; LIST-LABEL: define void @foo(
-; EMPTYFILE-LABEL: define internal void @foo(
-; LIST2-LABEL: define void @foo(
-; MERGE-LABEL: define void @foo(
-define void @foo() {
-        ret void
-}
-
-; NOARGS-LABEL: define internal void @bar(
-; LIST-LABEL: define internal void @bar(
-; EMPTYFILE-LABEL: define internal void @bar(
-; LIST2-LABEL: define void @bar(
-; MERGE-LABEL: define void @bar(
-define void @bar() {
-        ret void
-}
diff --git a/test/Transforms/Internalize/2008-05-09-AllButMain.ll.apifile b/test/Transforms/Internalize/apifile
index f6c58b8..f6c58b8 100644
--- a/test/Transforms/Internalize/2008-05-09-AllButMain.ll.apifile
+++ b/test/Transforms/Internalize/apifile
diff --git a/test/Transforms/Internalize/available_externally.ll b/test/Transforms/Internalize/available_externally.ll
deleted file mode 100644
index bb89603..0000000
--- a/test/Transforms/Internalize/available_externally.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: opt < %s -internalize -internalize-public-api-list foo -S | FileCheck %s
-
-; CHECK-LABEL: define void @foo(
-define void @foo() {
-  ret void
-}
-
-; CHECK-LABEL: define internal void @zed(
-define void @zed() {
-  ret void
-}
-
-; CHECK-LABEL: define available_externally void @bar(
-define available_externally void @bar() {
-  ret void
-}
diff --git a/test/Transforms/Internalize/lists.ll b/test/Transforms/Internalize/lists.ll
new file mode 100644
index 0000000..83e441a2
--- /dev/null
+++ b/test/Transforms/Internalize/lists.ll
@@ -0,0 +1,50 @@
+; No arguments means internalize everything
+; RUN: opt < %s -internalize -S | FileCheck --check-prefix=ALL %s
+
+; Non existent files should be treated as if they were empty (so internalize
+; everything)
+; RUN: opt < %s -internalize -internalize-public-api-file /nonexistent/file 2> /dev/null -S | FileCheck --check-prefix=ALL %s
+
+; Internalize all but foo and j
+; RUN: opt < %s -internalize -internalize-public-api-list foo -internalize-public-api-list j -S | FileCheck --check-prefix=FOO_AND_J %s
+
+; RUN: opt < %s -S -internalize -internalize-public-api-list bar -internalize-public-api-list foo -internalize-public-api-file /nonexistent/file  2> /dev/null | FileCheck --check-prefix=FOO_AND_BAR %s
+
+; -file and -list options should be merged, the apifile contains foo and j
+; RUN: opt < %s -internalize -internalize-public-api-list bar -internalize-public-api-file %S/apifile -S | FileCheck --check-prefix=FOO_J_AND_BAR %s
+
+; ALL: @i = internal global
+; FOO_AND_J: @i = internal global
+; FOO_AND_BAR: @i = internal global
+; FOO_J_AND_BAR: @i = internal global
+@i = global i32 0
+
+; ALL: @j = internal global
+; FOO_AND_J: @j = global
+; FOO_AND_BAR: @j = internal global
+; FOO_J_AND_BAR: @j = global
+@j = global i32 0
+
+; ALL: define internal void @main() {
+; FOO_AND_J: define internal void @main() {
+; FOO_AND_BAR: define internal void @main() {
+; FOO_J_AND_BAR: define internal void @main() {
+define void @main() {
+        ret void
+}
+
+; ALL: define internal void @foo() {
+; FOO_AND_J: define void @foo() {
+; FOO_AND_BAR: define void @foo() {
+; FOO_J_AND_BAR: define void @foo() {
+define void @foo() {
+        ret void
+}
+
+; ALL: define available_externally void @bar() {
+; FOO_AND_J: define available_externally void @bar() {
+; FOO_AND_BAR: define available_externally void @bar() {
+; FOO_J_AND_BAR: define available_externally void @bar() {
+define available_externally void @bar() {
+  ret void
+}
diff --git a/test/Transforms/Internalize/lit.local.cfg b/test/Transforms/Internalize/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/Internalize/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/JumpThreading/landing-pad.ll b/test/Transforms/JumpThreading/landing-pad.ll
new file mode 100644
index 0000000..9ee0526
--- /dev/null
+++ b/test/Transforms/JumpThreading/landing-pad.ll
@@ -0,0 +1,203 @@
+; RUN: opt < %s -disable-output -jump-threading
+
+%class.E = type { i32 (...)**, %class.C }
+%class.C = type { %class.A }
+%class.A = type { i32 }
+%class.D = type { %class.F }
+%class.F = type { %class.E }
+%class.B = type { %class.D* }
+
+@_ZTV1D = unnamed_addr constant [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8*, i8* }* @_ZTI1D to i8*), i8* bitcast (void (%class.D*)* @_ZN1D7doApplyEv to i8*)]
+@_ZTI1D = external unnamed_addr constant { i8*, i8*, i8* }
+
+define void @_ZN15EditCommandImpl5applyEv(%class.E* %this) uwtable align 2 {
+entry:
+  %0 = bitcast %class.E* %this to void (%class.E*)***
+  %vtable = load void (%class.E*)*** %0, align 8
+  %1 = load void (%class.E*)** %vtable, align 8
+  call void %1(%class.E* %this)
+  ret void
+}
+
+define void @_ZN1DC1Ev(%class.D* nocapture %this) unnamed_addr uwtable align 2 {
+entry:
+  call void @_ZN24CompositeEditCommandImplC2Ev()
+  %0 = getelementptr inbounds %class.D* %this, i64 0, i32 0, i32 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*]* @_ZTV1D, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret void
+}
+
+define void @_ZN1DC2Ev(%class.D* nocapture %this) unnamed_addr uwtable align 2 {
+entry:
+  call void @_ZN24CompositeEditCommandImplC2Ev()
+  %0 = getelementptr inbounds %class.D* %this, i64 0, i32 0, i32 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*]* @_ZTV1D, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret void
+}
+
+declare void @_ZN24CompositeEditCommandImplC2Ev() #1
+
+define void @_ZN1D7doApplyEv(%class.D* nocapture %this) unnamed_addr nounwind readnone uwtable align 2 {
+entry:
+  ret void
+}
+
+define void @_Z3fn1v() uwtable {
+entry:
+  %call = call noalias i8* @_Znwm() #8
+  invoke void @_ZN24CompositeEditCommandImplC2Ev()
+          to label %_ZN1DC1Ev.exit unwind label %lpad
+
+_ZN1DC1Ev.exit:                                   ; preds = %entry
+  %0 = bitcast i8* %call to i32 (...)***
+  store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*]* @_ZTV1D, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  %_ref.i.i.i = getelementptr inbounds i8* %call, i64 8
+  %1 = bitcast i8* %_ref.i.i.i to i32*
+  %2 = load i32* %1, align 4
+  %inc.i.i.i = add nsw i32 %2, 1
+  store i32 %inc.i.i.i, i32* %1, align 4
+  %3 = bitcast i8* %call to %class.D*
+  invoke void @_ZN1D7doApplyEv(%class.D* %3)
+          to label %_ZN15EditCommandImpl5applyEv.exit unwind label %lpad1
+
+_ZN15EditCommandImpl5applyEv.exit:                ; preds = %_ZN1DC1Ev.exit
+  invoke void @_ZN1D16deleteKeyPressedEv()
+          to label %invoke.cont7 unwind label %lpad1
+
+invoke.cont7:                                     ; preds = %_ZN15EditCommandImpl5applyEv.exit
+  ret void
+
+lpad:                                             ; preds = %entry
+  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  call void @_ZdlPv() #9
+  unreachable
+
+lpad1:                                            ; preds = %_ZN1DC1Ev.exit, %_ZN15EditCommandImpl5applyEv.exit
+  %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  %6 = load i32* %1, align 4
+  %tobool.i.i.i = icmp eq i32 %6, 0
+  br i1 %tobool.i.i.i, label %_ZN1BI1DED1Ev.exit, label %if.then.i.i.i
+
+if.then.i.i.i:                                    ; preds = %lpad1
+  br i1 undef, label %_ZN1BI1DED1Ev.exit, label %delete.notnull.i.i.i
+
+delete.notnull.i.i.i:                             ; preds = %if.then.i.i.i
+  call void @_ZdlPv() #9
+  unreachable
+
+_ZN1BI1DED1Ev.exit:                               ; preds = %lpad1, %if.then.i.i.i
+  resume { i8*, i32 } undef
+
+terminate.lpad:                                   ; No predecessors!
+  %7 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  unreachable
+}
+
+define void @_ZN1BI1DEC1EPS0_(%class.B* nocapture %this, %class.D* %p1) unnamed_addr uwtable align 2 {
+entry:
+  %m_ptr.i = getelementptr inbounds %class.B* %this, i64 0, i32 0
+  store %class.D* %p1, %class.D** %m_ptr.i, align 8
+  %_ref.i.i = getelementptr inbounds %class.D* %p1, i64 0, i32 0, i32 0, i32 1, i32 0, i32 0
+  %0 = load i32* %_ref.i.i, align 4
+  %inc.i.i = add nsw i32 %0, 1
+  store i32 %inc.i.i, i32* %_ref.i.i, align 4
+  ret void
+}
+
+declare noalias i8* @_Znwm()
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_ZdlPv()
+
+define %class.D* @_ZN1BI1DEptEv(%class.B* nocapture readonly %this) nounwind readonly uwtable align 2 {
+entry:
+  %m_ptr = getelementptr inbounds %class.B* %this, i64 0, i32 0
+  %0 = load %class.D** %m_ptr, align 8
+  ret %class.D* %0
+}
+
+declare void @_ZN1D16deleteKeyPressedEv()
+
+define void @_ZN1BI1DED1Ev(%class.B* nocapture readonly %this) unnamed_addr uwtable align 2 {
+entry:
+  %m_ptr.i = getelementptr inbounds %class.B* %this, i64 0, i32 0
+  %0 = load %class.D** %m_ptr.i, align 8
+  %_ref.i.i = getelementptr inbounds %class.D* %0, i64 0, i32 0, i32 0, i32 1, i32 0, i32 0
+  %1 = load i32* %_ref.i.i, align 4
+  %tobool.i.i = icmp eq i32 %1, 0
+  br i1 %tobool.i.i, label %_ZN1BI1DED2Ev.exit, label %if.then.i.i
+
+if.then.i.i:                                      ; preds = %entry
+  br i1 undef, label %_ZN1BI1DED2Ev.exit, label %delete.notnull.i.i
+
+delete.notnull.i.i:                               ; preds = %if.then.i.i
+  call void @_ZdlPv() #9
+  unreachable
+
+_ZN1BI1DED2Ev.exit:                               ; preds = %entry, %if.then.i.i
+  ret void
+}
+
+declare hidden void @__clang_call_terminate()
+
+define void @_ZN1BI1DED2Ev(%class.B* nocapture readonly %this) unnamed_addr uwtable align 2 {
+entry:
+  %m_ptr = getelementptr inbounds %class.B* %this, i64 0, i32 0
+  %0 = load %class.D** %m_ptr, align 8
+  %_ref.i = getelementptr inbounds %class.D* %0, i64 0, i32 0, i32 0, i32 1, i32 0, i32 0
+  %1 = load i32* %_ref.i, align 4
+  %tobool.i = icmp eq i32 %1, 0
+  br i1 %tobool.i, label %_ZN1AI1CE5derefEv.exit, label %if.then.i
+
+if.then.i:                                        ; preds = %entry
+  br i1 undef, label %_ZN1AI1CE5derefEv.exit, label %delete.notnull.i
+
+delete.notnull.i:                                 ; preds = %if.then.i
+  call void @_ZdlPv() #9
+  unreachable
+
+_ZN1AI1CE5derefEv.exit:                           ; preds = %entry, %if.then.i
+  ret void
+}
+
+define void @_ZN1AI1CE5derefEv(%class.A* nocapture readonly %this) nounwind uwtable align 2 {
+entry:
+  %_ref = getelementptr inbounds %class.A* %this, i64 0, i32 0
+  %0 = load i32* %_ref, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  br i1 undef, label %if.end, label %delete.notnull
+
+delete.notnull:                                   ; preds = %if.then
+  call void @_ZdlPv() #9
+  unreachable
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+}
+
+define void @_ZN1BI1DEC2EPS0_(%class.B* nocapture %this, %class.D* %p1) unnamed_addr uwtable align 2 {
+entry:
+  %m_ptr = getelementptr inbounds %class.B* %this, i64 0, i32 0
+  store %class.D* %p1, %class.D** %m_ptr, align 8
+  %_ref.i = getelementptr inbounds %class.D* %p1, i64 0, i32 0, i32 0, i32 1, i32 0, i32 0
+  %0 = load i32* %_ref.i, align 4
+  %inc.i = add nsw i32 %0, 1
+  store i32 %inc.i, i32* %_ref.i, align 4
+  ret void
+}
+
+define void @_ZN1AI1CE3refEv(%class.A* nocapture %this) nounwind uwtable align 2 {
+entry:
+  %_ref = getelementptr inbounds %class.A* %this, i64 0, i32 0
+  %0 = load i32* %_ref, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %_ref, align 4
+  ret void
+}
diff --git a/test/Transforms/JumpThreading/lit.local.cfg b/test/Transforms/JumpThreading/lit.local.cfg
deleted file mode 100644
index c6106e4..0000000
--- a/test/Transforms/JumpThreading/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/JumpThreading/thread-loads.ll b/test/Transforms/JumpThreading/thread-loads.ll
index e651f9a..e5bf64b 100644
--- a/test/Transforms/JumpThreading/thread-loads.ll
+++ b/test/Transforms/JumpThreading/thread-loads.ll
@@ -75,7 +75,7 @@ bb3:		; preds = %bb1
 	ret i32 %res.0
 }
 
-!0 = metadata !{metadata !"int", metadata !1}
+!0 = metadata !{metadata !3, metadata !3, i64 0}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-
+!3 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Transforms/LCSSA/lit.local.cfg b/test/Transforms/LCSSA/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LCSSA/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll b/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll
index 86c2679..7cf7a32 100644
--- a/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll
+++ b/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll
@@ -30,8 +30,10 @@ for.end:                                          ; preds = %for.inc
   ret void
 }
 
-!0 = metadata !{metadata !"any pointer", metadata !1}
+!0 = metadata !{metadata !5, metadata !5, i64 0}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA", null}
 !3 = metadata !{metadata !"short", metadata !1}
-!4 = metadata !{metadata !"int", metadata !1}
+!4 = metadata !{metadata !6, metadata !6, i64 0}
+!5 = metadata !{metadata !"any pointer", metadata !1}
+!6 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Transforms/LICM/debug-value.ll b/test/Transforms/LICM/debug-value.ll
index 3c70064..e5c774f 100644
--- a/test/Transforms/LICM/debug-value.ll
+++ b/test/Transforms/LICM/debug-value.ll
@@ -33,19 +33,20 @@ for.end104:                                       ; preds = %for.cond.backedge
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
+!llvm.module.flags = !{!26}
 !llvm.dbg.sp = !{!0, !6, !9, !10}
 
 !0 = metadata !{i32 589870, metadata !25, metadata !1, metadata !"idamax", metadata !"idamax", metadata !"", i32 112, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 589865, metadata !25} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 589841, metadata !25, i32 12, metadata !"clang version 2.9 (trunk 127169)", i1 true, metadata !"", i32 0, metadata !8, metadata !8, metadata !8, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !25, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 589845, metadata !25, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 589860, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 589870, metadata !25, metadata !1, metadata !"dscal", metadata !"dscal", metadata !"", i32 206, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 589845, metadata !25, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 589845, metadata !25, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null}
 !9 = metadata !{i32 589870, metadata !25, metadata !1, metadata !"daxpy", metadata !"daxpy", metadata !"", i32 230, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 589870, metadata !25, metadata !1, metadata !"dgefa", metadata !"dgefa", metadata !"", i32 267, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 589870, metadata !25, metadata !1, metadata !"dgefa", metadata !"dgefa", metadata !"", i32 267, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 267] [def] [scope 0] [dgefa]
 !11 = metadata !{i32 281, i32 9, metadata !12, null}
 !12 = metadata !{i32 589835, metadata !25, metadata !13, i32 272, i32 5, i32 32} ; [ DW_TAG_lexical_block ]
 !13 = metadata !{i32 589835, metadata !25, metadata !14, i32 271, i32 5, i32 31} ; [ DW_TAG_lexical_block ]
@@ -61,3 +62,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !23 = metadata !{i32 296, i32 13, metadata !17, null}
 !24 = metadata !{i32 313, i32 1, metadata !14, null}
 !25 = metadata !{metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/Benchmarks/CoyoteBench/lpbench.c", metadata !"/private/tmp"}
+!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/LICM/lit.local.cfg b/test/Transforms/LICM/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LICM/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LICM/promote-order.ll b/test/Transforms/LICM/promote-order.ll
index b016265..86f11fe 100644
--- a/test/Transforms/LICM/promote-order.ll
+++ b/test/Transforms/LICM/promote-order.ll
@@ -37,5 +37,7 @@ for.end:                                          ; preds = %for.cond.for.end_cr
 }
 
 !0 = metadata !{metadata !"minimal TBAA"}
-!1 = metadata !{metadata !"float", metadata !0}
-!2 = metadata !{metadata !"int", metadata !0}
+!1 = metadata !{metadata !3, metadata !3, i64 0}
+!2 = metadata !{metadata !4, metadata !4, i64 0}
+!3 = metadata !{metadata !"float", metadata !0}
+!4 = metadata !{metadata !"int", metadata !0}
diff --git a/test/Transforms/LICM/scalar_promote.ll b/test/Transforms/LICM/scalar_promote.ll
index b3e45c5..92ef155 100644
--- a/test/Transforms/LICM/scalar_promote.ll
+++ b/test/Transforms/LICM/scalar_promote.ll
@@ -181,7 +181,9 @@ for.end:                                          ; preds = %for.cond.for.end_cr
 ; CHECK-NEXT:  store i32 %inc, i32* %gi, align 4, !tbaa !0
 }
 
-!0 = metadata !{metadata !"int", metadata !1}
+!0 = metadata !{metadata !4, metadata !4, i64 0}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"float", metadata !1}
+!3 = metadata !{metadata !5, metadata !5, i64 0}
+!4 = metadata !{metadata !"int", metadata !1}
+!5 = metadata !{metadata !"float", metadata !1}
diff --git a/test/Transforms/LICM/volatile-alias.ll b/test/Transforms/LICM/volatile-alias.ll
new file mode 100644
index 0000000..886d7f2
--- /dev/null
+++ b/test/Transforms/LICM/volatile-alias.ll
@@ -0,0 +1,54 @@
+; RUN: opt -basicaa -sroa -loop-rotate -licm -S < %s | FileCheck %s
+; The objects *p and *q are aliased to each other, but even though *q is
+; volatile, *p can be considered invariant in the loop. Check if it is moved
+; out of the loop.
+; CHECK: load i32* %p
+; CHECK: for.body:
+; CHECK; load volatile i32* %q
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind uwtable
+define i32 @foo(i32* %p, i32* %q, i32 %n) #0 {
+entry:
+  %p.addr = alloca i32*, align 8
+  %q.addr = alloca i32*, align 8
+  %n.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  %s = alloca i32, align 4
+  store i32* %p, i32** %p.addr, align 8
+  store i32* %q, i32** %q.addr, align 8
+  store i32 %n, i32* %n.addr, align 4
+  store i32 0, i32* %s, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %1 = load i32* %n.addr, align 4
+  %cmp = icmp slt i32 %0, %1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %2 = load i32** %p.addr, align 8
+  %3 = load i32* %2, align 4
+  %4 = load i32** %q.addr, align 8
+  %5 = load volatile i32* %4, align 4
+  %add = add nsw i32 %3, %5
+  %6 = load i32* %s, align 4
+  %add1 = add nsw i32 %6, %add
+  store i32 %add1, i32* %s, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32* %s, align 4
+  ret i32 %8
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/LoopDeletion/lit.local.cfg b/test/Transforms/LoopDeletion/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LoopDeletion/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopIdiom/X86/lit.local.cfg b/test/Transforms/LoopIdiom/X86/lit.local.cfg
index a8ad0f1..ba763cf 100644
--- a/test/Transforms/LoopIdiom/X86/lit.local.cfg
+++ b/test/Transforms/LoopIdiom/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Transforms/LoopIdiom/basic-address-space.ll b/test/Transforms/LoopIdiom/basic-address-space.ll
new file mode 100644
index 0000000..697ab37
--- /dev/null
+++ b/test/Transforms/LoopIdiom/basic-address-space.ll
@@ -0,0 +1,91 @@
+; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-p1:64:64:64-p2:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; Two dimensional nested loop should be promoted to one big memset.
+define void @test10(i8 addrspace(2)* %X) nounwind ssp {
+; CHECK-LABEL: @test10(
+; CHECK: entry:
+; CHECK-NEXT: call void @llvm.memset.p2i8.i16(i8 addrspace(2)* %X, i8 0, i16 10000, i32 1, i1 false)
+; CHECK-NOT: store
+; CHECK: ret void
+
+entry:
+  br label %bb.nph
+
+bb.nph:                                           ; preds = %entry, %for.inc10
+  %i.04 = phi i16 [ 0, %entry ], [ %inc12, %for.inc10 ]
+  br label %for.body5
+
+for.body5:                                        ; preds = %for.body5, %bb.nph
+  %j.02 = phi i16 [ 0, %bb.nph ], [ %inc, %for.body5 ]
+  %mul = mul nsw i16 %i.04, 100
+  %add = add nsw i16 %j.02, %mul
+  %arrayidx = getelementptr inbounds i8 addrspace(2)* %X, i16 %add
+  store i8 0, i8 addrspace(2)* %arrayidx, align 1
+  %inc = add nsw i16 %j.02, 1
+  %cmp4 = icmp eq i16 %inc, 100
+  br i1 %cmp4, label %for.inc10, label %for.body5
+
+for.inc10:                                        ; preds = %for.body5
+  %inc12 = add nsw i16 %i.04, 1
+  %cmp = icmp eq i16 %inc12, 100
+  br i1 %cmp, label %for.end13, label %bb.nph
+
+for.end13:                                        ; preds = %for.inc10
+  ret void
+}
+
+define void @test11_pattern(i32 addrspace(2)* nocapture %P) nounwind ssp {
+; CHECK-LABEL: @test11_pattern(
+; CHECK-NOT: memset_pattern
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
+  %arrayidx = getelementptr i32 addrspace(2)* %P, i64 %indvar
+  store i32 1, i32 addrspace(2)* %arrayidx, align 4
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, 10000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; PR9815 - This is a partial overlap case that cannot be safely transformed
+; into a memcpy.
+@g_50 = addrspace(2) global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
+
+
+define i32 @test14() nounwind {
+; CHECK-LABEL: @test14(
+; CHECK: for.body:
+; CHECK: load i32
+; CHECK: store i32
+; CHECK: br i1 %cmp
+
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %for.body.lr.ph
+  %tmp5 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %add = add nsw i32 %tmp5, 4
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds [7 x i32] addrspace(2)* @g_50, i32 0, i64 %idxprom
+  %tmp2 = load i32 addrspace(2)* %arrayidx, align 4
+  %add4 = add nsw i32 %tmp5, 5
+  %idxprom5 = sext i32 %add4 to i64
+  %arrayidx6 = getelementptr inbounds [7 x i32] addrspace(2)* @g_50, i32 0, i64 %idxprom5
+  store i32 %tmp2, i32 addrspace(2)* %arrayidx6, align 4
+  %inc = add nsw i32 %tmp5, 1
+  %cmp = icmp slt i32 %inc, 2
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.inc
+  %tmp8 = load i32 addrspace(2)* getelementptr inbounds ([7 x i32] addrspace(2)* @g_50, i32 0, i64 6), align 4
+  ret i32 %tmp8
+}
+
diff --git a/test/Transforms/LoopIdiom/debug-line.ll b/test/Transforms/LoopIdiom/debug-line.ll
index 2337590..ef4a478 100644
--- a/test/Transforms/LoopIdiom/debug-line.ll
+++ b/test/Transforms/LoopIdiom/debug-line.ll
@@ -27,12 +27,13 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
+!llvm.module.flags = !{!19}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !18, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (double*)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 589870, metadata !18, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (double*)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
 !1 = metadata !{i32 589865, metadata !18} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 589841, metadata !18, i32 12, metadata !"clang version 2.9 (trunk 127165:127174)", i1 true, metadata !"", i32 0, metadata !9, metadata !9, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !18, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 589845, metadata !18, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 16777218, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
 !6 = metadata !{i32 589839, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
@@ -48,3 +49,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !16 = metadata !{i32 3, i32 29, metadata !11, null}
 !17 = metadata !{i32 5, i32 1, metadata !12, null}
 !18 = metadata !{metadata !"li.c", metadata !"/private/tmp"}
+!19 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/LoopIdiom/lit.local.cfg b/test/Transforms/LoopIdiom/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LoopIdiom/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopReroll/basic.ll b/test/Transforms/LoopReroll/basic.ll
new file mode 100644
index 0000000..314a149
--- /dev/null
+++ b/test/Transforms/LoopReroll/basic.ll
@@ -0,0 +1,327 @@
+; RUN: opt < %s -loop-reroll -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; int foo(int a);
+; void bar(int *x) {
+;   for (int i = 0; i < 500; i += 3) {
+;     foo(i);
+;     foo(i+1);
+;     foo(i+2);
+;   }
+; }
+
+; Function Attrs: nounwind uwtable
+define void @bar(i32* nocapture readnone %x) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.08 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+  %call = tail call i32 @foo(i32 %i.08) #1
+  %add = add nsw i32 %i.08, 1
+  %call1 = tail call i32 @foo(i32 %add) #1
+  %add2 = add nsw i32 %i.08, 2
+  %call3 = tail call i32 @foo(i32 %add2) #1
+  %add3 = add nsw i32 %i.08, 3
+  %exitcond = icmp eq i32 %add3, 500
+  br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK-LABEL: @bar
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK: %call = tail call i32 @foo(i32 %indvar) #1
+; CHECK: %indvar.next = add i32 %indvar, 1
+; CHECK: %exitcond1 = icmp eq i32 %indvar.next, 498
+; CHECK: br i1 %exitcond1, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+declare i32 @foo(i32)
+
+; void hi1(int *x) {
+;   for (int i = 0; i < 1500; i += 3) {
+;     x[i] = foo(0);
+;     x[i+1] = foo(0);
+;     x[i+2] = foo(0);
+;   }
+; }
+
+; Function Attrs: nounwind uwtable
+define void @hi1(i32* nocapture %x) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %call = tail call i32 @foo(i32 0) #1
+  %arrayidx = getelementptr inbounds i32* %x, i64 %indvars.iv
+  store i32 %call, i32* %arrayidx, align 4
+  %call1 = tail call i32 @foo(i32 0) #1
+  %0 = add nsw i64 %indvars.iv, 1
+  %arrayidx3 = getelementptr inbounds i32* %x, i64 %0
+  store i32 %call1, i32* %arrayidx3, align 4
+  %call4 = tail call i32 @foo(i32 0) #1
+  %1 = add nsw i64 %indvars.iv, 2
+  %arrayidx7 = getelementptr inbounds i32* %x, i64 %1
+  store i32 %call4, i32* %arrayidx7, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 3
+  %2 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %2, 1500
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: @hi1
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK: %call = tail call i32 @foo(i32 0) #1
+; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvar
+; CHECK: store i32 %call, i32* %arrayidx, align 4
+; CHECK: %indvar.next = add i64 %indvar, 1
+; CHECK: %exitcond = icmp eq i64 %indvar.next, 1500
+; CHECK: br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; void hi2(int *x) {
+;   for (int i = 0; i < 500; ++i) {
+;     x[3*i] = foo(0);
+;     x[3*i+1] = foo(0);
+;     x[3*i+2] = foo(0);
+;   }
+; }
+
+; Function Attrs: nounwind uwtable
+define void @hi2(i32* nocapture %x) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %call = tail call i32 @foo(i32 0) #1
+  %0 = mul nsw i64 %indvars.iv, 3
+  %arrayidx = getelementptr inbounds i32* %x, i64 %0
+  store i32 %call, i32* %arrayidx, align 4
+  %call1 = tail call i32 @foo(i32 0) #1
+  %1 = add nsw i64 %0, 1
+  %arrayidx4 = getelementptr inbounds i32* %x, i64 %1
+  store i32 %call1, i32* %arrayidx4, align 4
+  %call5 = tail call i32 @foo(i32 0) #1
+  %2 = add nsw i64 %0, 2
+  %arrayidx9 = getelementptr inbounds i32* %x, i64 %2
+  store i32 %call5, i32* %arrayidx9, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 500
+  br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK-LABEL: @hi2
+
+; CHECK: for.body:
+; CHECK: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+; CHECK: %call = tail call i32 @foo(i32 0) #1
+; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvars.iv
+; CHECK: store i32 %call, i32* %arrayidx, align 4
+; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK: %exitcond1 = icmp eq i64 %indvars.iv.next, 1500
+; CHECK: br i1 %exitcond1, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; void goo(float alpha, float *a, float *b) {
+;   for (int i = 0; i < 3200; i += 5) {
+;     a[i] += alpha * b[i];
+;     a[i + 1] += alpha * b[i + 1];
+;     a[i + 2] += alpha * b[i + 2];
+;     a[i + 3] += alpha * b[i + 3];
+;     a[i + 4] += alpha * b[i + 4];
+;   }
+; }
+
+; Function Attrs: nounwind uwtable
+define void @goo(float %alpha, float* nocapture %a, float* nocapture readonly %b) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %mul = fmul float %0, %alpha
+  %arrayidx2 = getelementptr inbounds float* %a, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4
+  %add = fadd float %1, %mul
+  store float %add, float* %arrayidx2, align 4
+  %2 = add nsw i64 %indvars.iv, 1
+  %arrayidx5 = getelementptr inbounds float* %b, i64 %2
+  %3 = load float* %arrayidx5, align 4
+  %mul6 = fmul float %3, %alpha
+  %arrayidx9 = getelementptr inbounds float* %a, i64 %2
+  %4 = load float* %arrayidx9, align 4
+  %add10 = fadd float %4, %mul6
+  store float %add10, float* %arrayidx9, align 4
+  %5 = add nsw i64 %indvars.iv, 2
+  %arrayidx13 = getelementptr inbounds float* %b, i64 %5
+  %6 = load float* %arrayidx13, align 4
+  %mul14 = fmul float %6, %alpha
+  %arrayidx17 = getelementptr inbounds float* %a, i64 %5
+  %7 = load float* %arrayidx17, align 4
+  %add18 = fadd float %7, %mul14
+  store float %add18, float* %arrayidx17, align 4
+  %8 = add nsw i64 %indvars.iv, 3
+  %arrayidx21 = getelementptr inbounds float* %b, i64 %8
+  %9 = load float* %arrayidx21, align 4
+  %mul22 = fmul float %9, %alpha
+  %arrayidx25 = getelementptr inbounds float* %a, i64 %8
+  %10 = load float* %arrayidx25, align 4
+  %add26 = fadd float %10, %mul22
+  store float %add26, float* %arrayidx25, align 4
+  %11 = add nsw i64 %indvars.iv, 4
+  %arrayidx29 = getelementptr inbounds float* %b, i64 %11
+  %12 = load float* %arrayidx29, align 4
+  %mul30 = fmul float %12, %alpha
+  %arrayidx33 = getelementptr inbounds float* %a, i64 %11
+  %13 = load float* %arrayidx33, align 4
+  %add34 = fadd float %13, %mul30
+  store float %add34, float* %arrayidx33, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
+  %14 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %14, 3200
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: @goo
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK: %arrayidx = getelementptr inbounds float* %b, i64 %indvar
+; CHECK: %0 = load float* %arrayidx, align 4
+; CHECK: %mul = fmul float %0, %alpha
+; CHECK: %arrayidx2 = getelementptr inbounds float* %a, i64 %indvar
+; CHECK: %1 = load float* %arrayidx2, align 4
+; CHECK: %add = fadd float %1, %mul
+; CHECK: store float %add, float* %arrayidx2, align 4
+; CHECK: %indvar.next = add i64 %indvar, 1
+; CHECK: %exitcond = icmp eq i64 %indvar.next, 3200
+; CHECK: br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; void hoo(float alpha, float *a, float *b, int *ip) {
+;   for (int i = 0; i < 3200; i += 5) {
+;     a[i] += alpha * b[ip[i]];
+;     a[i + 1] += alpha * b[ip[i + 1]];
+;     a[i + 2] += alpha * b[ip[i + 2]];
+;     a[i + 3] += alpha * b[ip[i + 3]];
+;     a[i + 4] += alpha * b[ip[i + 4]];
+;   }
+; }
+
+; Function Attrs: nounwind uwtable
+define void @hoo(float %alpha, float* nocapture %a, float* nocapture readonly %b, i32* nocapture readonly %ip) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %ip, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %idxprom1 = sext i32 %0 to i64
+  %arrayidx2 = getelementptr inbounds float* %b, i64 %idxprom1
+  %1 = load float* %arrayidx2, align 4
+  %mul = fmul float %1, %alpha
+  %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
+  %2 = load float* %arrayidx4, align 4
+  %add = fadd float %2, %mul
+  store float %add, float* %arrayidx4, align 4
+  %3 = add nsw i64 %indvars.iv, 1
+  %arrayidx7 = getelementptr inbounds i32* %ip, i64 %3
+  %4 = load i32* %arrayidx7, align 4
+  %idxprom8 = sext i32 %4 to i64
+  %arrayidx9 = getelementptr inbounds float* %b, i64 %idxprom8
+  %5 = load float* %arrayidx9, align 4
+  %mul10 = fmul float %5, %alpha
+  %arrayidx13 = getelementptr inbounds float* %a, i64 %3
+  %6 = load float* %arrayidx13, align 4
+  %add14 = fadd float %6, %mul10
+  store float %add14, float* %arrayidx13, align 4
+  %7 = add nsw i64 %indvars.iv, 2
+  %arrayidx17 = getelementptr inbounds i32* %ip, i64 %7
+  %8 = load i32* %arrayidx17, align 4
+  %idxprom18 = sext i32 %8 to i64
+  %arrayidx19 = getelementptr inbounds float* %b, i64 %idxprom18
+  %9 = load float* %arrayidx19, align 4
+  %mul20 = fmul float %9, %alpha
+  %arrayidx23 = getelementptr inbounds float* %a, i64 %7
+  %10 = load float* %arrayidx23, align 4
+  %add24 = fadd float %10, %mul20
+  store float %add24, float* %arrayidx23, align 4
+  %11 = add nsw i64 %indvars.iv, 3
+  %arrayidx27 = getelementptr inbounds i32* %ip, i64 %11
+  %12 = load i32* %arrayidx27, align 4
+  %idxprom28 = sext i32 %12 to i64
+  %arrayidx29 = getelementptr inbounds float* %b, i64 %idxprom28
+  %13 = load float* %arrayidx29, align 4
+  %mul30 = fmul float %13, %alpha
+  %arrayidx33 = getelementptr inbounds float* %a, i64 %11
+  %14 = load float* %arrayidx33, align 4
+  %add34 = fadd float %14, %mul30
+  store float %add34, float* %arrayidx33, align 4
+  %15 = add nsw i64 %indvars.iv, 4
+  %arrayidx37 = getelementptr inbounds i32* %ip, i64 %15
+  %16 = load i32* %arrayidx37, align 4
+  %idxprom38 = sext i32 %16 to i64
+  %arrayidx39 = getelementptr inbounds float* %b, i64 %idxprom38
+  %17 = load float* %arrayidx39, align 4
+  %mul40 = fmul float %17, %alpha
+  %arrayidx43 = getelementptr inbounds float* %a, i64 %15
+  %18 = load float* %arrayidx43, align 4
+  %add44 = fadd float %18, %mul40
+  store float %add44, float* %arrayidx43, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
+  %19 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %19, 3200
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: @hoo
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK: %arrayidx = getelementptr inbounds i32* %ip, i64 %indvar
+; CHECK: %0 = load i32* %arrayidx, align 4
+; CHECK: %idxprom1 = sext i32 %0 to i64
+; CHECK: %arrayidx2 = getelementptr inbounds float* %b, i64 %idxprom1
+; CHECK: %1 = load float* %arrayidx2, align 4
+; CHECK: %mul = fmul float %1, %alpha
+; CHECK: %arrayidx4 = getelementptr inbounds float* %a, i64 %indvar
+; CHECK: %2 = load float* %arrayidx4, align 4
+; CHECK: %add = fadd float %2, %mul
+; CHECK: store float %add, float* %arrayidx4, align 4
+; CHECK: %indvar.next = add i64 %indvar, 1
+; CHECK: %exitcond = icmp eq i64 %indvar.next, 3200
+; CHECK: br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
diff --git a/test/Transforms/LoopReroll/reduction.ll b/test/Transforms/LoopReroll/reduction.ll
new file mode 100644
index 0000000..aed7670
--- /dev/null
+++ b/test/Transforms/LoopReroll/reduction.ll
@@ -0,0 +1,96 @@
+; RUN: opt < %s -loop-reroll -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo(i32* nocapture readonly %x) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.029 = phi i32 [ 0, %entry ], [ %add12, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %x, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %r.029
+  %1 = or i64 %indvars.iv, 1
+  %arrayidx3 = getelementptr inbounds i32* %x, i64 %1
+  %2 = load i32* %arrayidx3, align 4
+  %add4 = add nsw i32 %add, %2
+  %3 = or i64 %indvars.iv, 2
+  %arrayidx7 = getelementptr inbounds i32* %x, i64 %3
+  %4 = load i32* %arrayidx7, align 4
+  %add8 = add nsw i32 %add4, %4
+  %5 = or i64 %indvars.iv, 3
+  %arrayidx11 = getelementptr inbounds i32* %x, i64 %5
+  %6 = load i32* %arrayidx11, align 4
+  %add12 = add nsw i32 %add8, %6
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
+  %7 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %7, 400
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: @foo
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK: %r.029 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvar
+; CHECK: %0 = load i32* %arrayidx, align 4
+; CHECK: %add = add nsw i32 %0, %r.029
+; CHECK: %indvar.next = add i64 %indvar, 1
+; CHECK: %exitcond = icmp eq i64 %indvar.next, 400
+; CHECK: br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add12
+}
+
+define float @bar(float* nocapture readonly %x) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.029 = phi float [ 0.0, %entry ], [ %add12, %for.body ]
+  %arrayidx = getelementptr inbounds float* %x, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %add = fadd float %0, %r.029
+  %1 = or i64 %indvars.iv, 1
+  %arrayidx3 = getelementptr inbounds float* %x, i64 %1
+  %2 = load float* %arrayidx3, align 4
+  %add4 = fadd float %add, %2
+  %3 = or i64 %indvars.iv, 2
+  %arrayidx7 = getelementptr inbounds float* %x, i64 %3
+  %4 = load float* %arrayidx7, align 4
+  %add8 = fadd float %add4, %4
+  %5 = or i64 %indvars.iv, 3
+  %arrayidx11 = getelementptr inbounds float* %x, i64 %5
+  %6 = load float* %arrayidx11, align 4
+  %add12 = fadd float %add8, %6
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
+  %7 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %7, 400
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: @bar
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK: %r.029 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+; CHECK: %arrayidx = getelementptr inbounds float* %x, i64 %indvar
+; CHECK: %0 = load float* %arrayidx, align 4
+; CHECK: %add = fadd float %0, %r.029
+; CHECK: %indvar.next = add i64 %indvar, 1
+; CHECK: %exitcond = icmp eq i64 %indvar.next, 400
+; CHECK: br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret float %add12
+}
+
+attributes #0 = { nounwind readonly uwtable }
+
diff --git a/test/Transforms/LoopRotate/dbgvalue.ll b/test/Transforms/LoopRotate/dbgvalue.ll
index 3434cdc..9461980 100644
--- a/test/Transforms/LoopRotate/dbgvalue.ll
+++ b/test/Transforms/LoopRotate/dbgvalue.ll
@@ -77,12 +77,13 @@ for.end:
   ret void
 }
 
+!llvm.module.flags = !{!20}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !18, metadata !1, metadata !"tak", metadata !"tak", metadata !"", i32 32, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i32, i32)* @tak, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 589870, metadata !18, metadata !1, metadata !"tak", metadata !"tak", metadata !"", i32 32, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i32, i32)* @tak, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 32] [def] [scope 0] [tak]
 !1 = metadata !{i32 589865, metadata !18} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 589841, metadata !18, i32 12, metadata !"clang version 2.9 (trunk 125492)", i1 true, metadata !"", i32 0, metadata !19, metadata !19, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !18, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 589845, metadata !18, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 589860, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 590081, metadata !0, metadata !"x", metadata !1, i32 32, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
@@ -99,3 +100,4 @@ for.end:
 !17 = metadata !{i32 37, i32 1, metadata !13, null}
 !18 = metadata !{metadata !"/Volumes/Lalgate/cj/llvm/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame/recursive.c", metadata !"/Volumes/Lalgate/cj/D/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame"}
 !19 = metadata !{i32 0}
+!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/LoopRotate/lit.local.cfg b/test/Transforms/LoopRotate/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LoopRotate/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopSimplify/dup-preds.ll b/test/Transforms/LoopSimplify/dup-preds.ll
new file mode 100644
index 0000000..3d1f149
--- /dev/null
+++ b/test/Transforms/LoopSimplify/dup-preds.ll
@@ -0,0 +1,46 @@
+; RUN: opt -loop-simplify -S %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+define fastcc void @do_update_md([3 x float]* nocapture readonly %x) #0 {
+entry:
+  br i1 undef, label %if.end365, label %lor.lhs.false134
+
+lor.lhs.false134:                                 ; preds = %entry
+  br i1 undef, label %lor.lhs.false138, label %if.end365
+
+lor.lhs.false138:                                 ; preds = %lor.lhs.false134
+  br i1 undef, label %lor.lhs.false142, label %if.end365
+
+lor.lhs.false142:                                 ; preds = %lor.lhs.false138
+  br i1 undef, label %for.body276.lr.ph, label %if.end365
+
+for.body276.lr.ph:                                ; preds = %lor.lhs.false142
+  switch i16 undef, label %if.then288 [
+    i16 4, label %for.body344
+    i16 2, label %for.body344
+  ]
+
+if.then288:                                       ; preds = %for.body276.lr.ph
+  br label %for.body305
+
+for.body305:                                      ; preds = %for.body305, %if.then288
+  br label %for.body305
+
+for.body344:                                      ; preds = %for.body344, %for.body276.lr.ph, %for.body276.lr.ph
+  %indvar = phi i64 [ %indvar.next, %for.body344 ], [ 0, %for.body276.lr.ph ]
+  %indvars.iv552 = phi i64 [ %indvars.iv.next553, %for.body344 ], [ 0, %for.body276.lr.ph ], [ 0, %for.body276.lr.ph ]
+  %indvars.iv.next553 = add nuw nsw i64 %indvars.iv552, 1
+  %indvar.next = add i64 %indvar, 1
+  br label %for.body344
+
+; CHECK-LABEL: @do_update_md
+; CHECK: %indvars.iv552 = phi i64 [ %indvars.iv.next553, %for.body344 ], [ 0, %for.body344.preheader ]
+; CHECK: ret
+
+if.end365:                                        ; preds = %lor.lhs.false142, %lor.lhs.false138, %lor.lhs.false134, %entry
+  ret void
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/Transforms/LoopSimplify/lit.local.cfg b/test/Transforms/LoopSimplify/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LoopSimplify/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg
index bac2ffa..8a3ba96 100644
--- a/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg
+++ b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll']
-
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
     config.unsupported = True
diff --git a/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll b/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
index fad5241..001a1d6 100644
--- a/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -loop-reduce -S | FileCheck %s
 ;
-; Test LSR's ability to prune formulae that refer to nonexistant
+; Test LSR's ability to prune formulae that refer to nonexistent
 ; AddRecs in other loops.
 ;
 ; Unable to reduce this case further because it requires LSR to exceed
diff --git a/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg
index da2db5a..ba763cf 100644
--- a/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg
+++ b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll b/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll
new file mode 100644
index 0000000..6333291
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll
@@ -0,0 +1,88 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+; CHECK: bb1:
+; CHECK: load double addrspace(1)* [[IV:%[^,]+]]
+; CHECK: store double {{.*}}, double addrspace(1)* [[IV]]
+
+; CHECK-NOT: cast
+; Make sure the GEP has the right index type
+; CHECK: getelementptr double addrspace(1)* [[IV]], i16 1
+; CHECK: br {{.*}} label %bb1
+
+; Make sure the GEP has the right index type
+; CHECK: getelementptr double addrspace(1)* {{.*}}, i16
+
+
+; This test tests several things. The load and store should use the
+; same address instead of having it computed twice, and SCEVExpander should
+; be able to reconstruct the full getelementptr, despite it having a few
+; obstacles set in its way.
+; We only check that the inner loop (bb1-bb2) is "reduced" because LSR
+; currently only operates on inner loops.
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-n16:32:64"
+
+define void @foo(i64 %n, i64 %m, i64 %o, i64 %q, double addrspace(1)* nocapture %p) nounwind {
+entry:
+	%tmp = icmp sgt i64 %n, 0		; <i1> [#uses=1]
+	br i1 %tmp, label %bb.nph3, label %return
+
+bb.nph:		; preds = %bb2.preheader
+	%tmp1 = mul i64 %tmp16, %i.02		; <i64> [#uses=1]
+	%tmp2 = mul i64 %tmp19, %i.02		; <i64> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb2, %bb.nph
+	%j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ]		; <i64> [#uses=3]
+	%tmp3 = add i64 %j.01, %tmp1		; <i64> [#uses=1]
+	%tmp4 = add i64 %j.01, %tmp2		; <i64> [#uses=1]
+        %z0 = add i64 %tmp3, 5203
+	%tmp5 = getelementptr double addrspace(1)* %p, i64 %z0		; <double addrspace(1)*> [#uses=1]
+	%tmp6 = load double addrspace(1)* %tmp5, align 8		; <double> [#uses=1]
+	%tmp7 = fdiv double %tmp6, 2.100000e+00		; <double> [#uses=1]
+        %z1 = add i64 %tmp4, 5203
+	%tmp8 = getelementptr double addrspace(1)* %p, i64 %z1		; <double addrspace(1)*> [#uses=1]
+	store double %tmp7, double addrspace(1)* %tmp8, align 8
+	%tmp9 = add i64 %j.01, 1		; <i64> [#uses=2]
+	br label %bb2
+
+bb2:		; preds = %bb1
+	%tmp10 = icmp slt i64 %tmp9, %m		; <i1> [#uses=1]
+	br i1 %tmp10, label %bb1, label %bb2.bb3_crit_edge
+
+bb2.bb3_crit_edge:		; preds = %bb2
+	br label %bb3
+
+bb3:		; preds = %bb2.preheader, %bb2.bb3_crit_edge
+	%tmp11 = add i64 %i.02, 1		; <i64> [#uses=2]
+	br label %bb4
+
+bb4:		; preds = %bb3
+	%tmp12 = icmp slt i64 %tmp11, %n		; <i1> [#uses=1]
+	br i1 %tmp12, label %bb2.preheader, label %bb4.return_crit_edge
+
+bb4.return_crit_edge:		; preds = %bb4
+	br label %bb4.return_crit_edge.split
+
+bb4.return_crit_edge.split:		; preds = %bb.nph3, %bb4.return_crit_edge
+	br label %return
+
+bb.nph3:		; preds = %entry
+	%tmp13 = icmp sgt i64 %m, 0		; <i1> [#uses=1]
+	%tmp14 = mul i64 %n, 37		; <i64> [#uses=1]
+	%tmp15 = mul i64 %tmp14, %o		; <i64> [#uses=1]
+	%tmp16 = mul i64 %tmp15, %q		; <i64> [#uses=1]
+	%tmp17 = mul i64 %n, 37		; <i64> [#uses=1]
+	%tmp18 = mul i64 %tmp17, %o		; <i64> [#uses=1]
+	%tmp19 = mul i64 %tmp18, %q		; <i64> [#uses=1]
+	br i1 %tmp13, label %bb.nph3.split, label %bb4.return_crit_edge.split
+
+bb.nph3.split:		; preds = %bb.nph3
+	br label %bb2.preheader
+
+bb2.preheader:		; preds = %bb.nph3.split, %bb4
+	%i.02 = phi i64 [ %tmp11, %bb4 ], [ 0, %bb.nph3.split ]		; <i64> [#uses=3]
+	br i1 true, label %bb.nph, label %bb3
+
+return:		; preds = %bb4.return_crit_edge.split, %entry
+	ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/address-space-loop.ll b/test/Transforms/LoopStrengthReduce/address-space-loop.ll
new file mode 100644
index 0000000..9c1b213
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/address-space-loop.ll
@@ -0,0 +1,56 @@
+; RUN: opt -S -loop-reduce < %s | FileCheck %s
+
+; LSR shouldn't consider %t8 to be an interesting user of %t6, and it
+; should be able to form pretty GEPs.
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; Copy of uglygep with a different address space
+; This tests expandAddToGEP uses the right smaller integer type for
+; another address space
+define void @Z4() nounwind {
+; CHECK-LABEL: @Z4(
+bb:
+  br label %bb3
+
+bb1:                                              ; preds = %bb3
+  br i1 undef, label %bb10, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %t = add i16 %t4, 1                         ; <i16> [#uses=1]
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %t4 = phi i16 [ %t, %bb2 ], [ 0, %bb ]      ; <i16> [#uses=3]
+  br label %bb1
+
+; CHECK: bb10:
+; CHECK-NEXT: %t7 = icmp eq i16 %t4, 0
+; Host %t2 computation outside the loop.
+; CHECK-NEXT: [[SCEVGEP:%[^ ]+]] = getelementptr i8 addrspace(1)* undef, i16 %t4
+; CHECK-NEXT: br label %bb14
+bb10:                                             ; preds = %bb9
+  %t7 = icmp eq i16 %t4, 0                    ; <i1> [#uses=1]
+  %t3 = add i16 %t4, 16                     ; <i16> [#uses=1]
+  br label %bb14
+
+; CHECK: bb14:
+; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[SCEVGEP]]
+; CHECK-NEXT: %t6 = load float addrspace(1)* addrspace(1)* undef
+; Fold %t3's add within the address.
+; CHECK-NEXT: [[SCEVGEP1:%[^ ]+]] = getelementptr float addrspace(1)* %t6, i16 4
+; CHECK-NEXT: [[SCEVGEP2:%[^ ]+]] = bitcast float addrspace(1)* [[SCEVGEP1]] to i8 addrspace(1)*
+; Use the induction variable (%t4) to access the right element
+; CHECK-NEXT: [[ADDRESS:%[^ ]+]] = getelementptr i8 addrspace(1)* [[SCEVGEP2]], i16 %t4
+; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[ADDRESS]]
+; CHECK-NEXT: br label %bb14
+bb14:                                             ; preds = %bb14, %bb10
+  %t2 = getelementptr inbounds i8 addrspace(1)* undef, i16 %t4 ; <i8*> [#uses=1]
+  store i8 undef, i8 addrspace(1)* %t2
+  %t6 = load float addrspace(1)* addrspace(1)* undef
+  %t8 = bitcast float addrspace(1)* %t6 to i8 addrspace(1)*              ; <i8*> [#uses=1]
+  %t9 = getelementptr inbounds i8 addrspace(1)* %t8, i16 %t3 ; <i8*> [#uses=1]
+  store i8 undef, i8 addrspace(1)* %t9
+  br label %bb14
+}
+
diff --git a/test/Transforms/LoopStrengthReduce/dominate-assert.ll b/test/Transforms/LoopStrengthReduce/dominate-assert.ll
index ff8cab8..3ba93ff 100644
--- a/test/Transforms/LoopStrengthReduce/dominate-assert.ll
+++ b/test/Transforms/LoopStrengthReduce/dominate-assert.ll
@@ -68,3 +68,46 @@ bb7:
           catch i8* null
   ret void
 }
+
+; PR17425
+define void @i() {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %entry
+  %c.0 = phi i16* [ undef, %entry ], [ %incdec.ptr, %while.cond ]
+  %incdec.ptr = getelementptr inbounds i16* %c.0, i64 1
+  br i1 undef, label %while.cond1, label %while.cond
+
+while.cond1:                                      ; preds = %while.cond1, %while.cond
+  %c.1 = phi i16* [ %incdec.ptr5, %while.cond1 ], [ %c.0, %while.cond ]
+  %incdec.ptr5 = getelementptr inbounds i16* %c.1, i64 1
+  br i1 undef, label %while.cond7, label %while.cond1
+
+while.cond7:                                      ; preds = %while.cond7, %while.cond1
+  %0 = phi i16* [ %incdec.ptr10, %while.cond7 ], [ %c.1, %while.cond1 ]
+  %incdec.ptr10 = getelementptr inbounds i16* %0, i64 1
+  br i1 undef, label %while.cond12.preheader, label %while.cond7
+
+while.cond12.preheader:                           ; preds = %while.cond7
+  br i1 undef, label %while.end16, label %while.body13.lr.ph
+
+while.body13:                                     ; preds = %if.else, %while.body13.lr.ph
+  %1 = phi i16* [ %2, %while.body13.lr.ph ], [ %incdec.ptr15, %if.else ]
+  br i1 undef, label %while.cond12.outer.loopexit, label %if.else
+
+while.cond12.outer.loopexit:                      ; preds = %while.body13
+  br i1 undef, label %while.end16, label %while.body13.lr.ph
+
+while.body13.lr.ph:                               ; preds = %while.cond12.outer.loopexit, %while.cond12.preheader
+  %2 = phi i16* [ %1, %while.cond12.outer.loopexit ], [ undef, %while.cond12.preheader ]
+  br label %while.body13
+
+if.else:                                          ; preds = %while.body13
+  %incdec.ptr15 = getelementptr inbounds i16* %1, i64 1
+  %cmp = icmp eq i16* %incdec.ptr15, %0
+  br i1 %cmp, label %while.end16, label %while.body13
+
+while.end16:                                      ; preds = %if.else, %while.cond12.outer.loopexit, %while.cond12.preheader
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/lit.local.cfg b/test/Transforms/LoopStrengthReduce/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LoopStrengthReduce/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll b/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll
new file mode 100644
index 0000000..255cf41
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll
@@ -0,0 +1,42 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+; PR15470: LSR miscompile. The test2 function should return '1'.
+;
+; SCEV expander cannot expand quadratic recurrences outside of the
+; loop. This recurrence depends on %sub.us, so can't be expanded.
+;
+; CHECK-LABEL: @test2
+; CHECK-LABEL: test2.loop:
+; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -16777216, %entry ]
+; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 16777216
+;
+; CHECK=LABEL: for.end:
+; CHECK: %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
+; CHECK: %sext.us = mul i32 %lsr.iv.next, %sub.cond.us
+; CHECK: %f = ashr i32 %sext.us, 24
+; CHECK: ret i32 %f
+define i32 @test2() {
+entry:
+  br label %test2.loop
+
+test2.loop:
+  %inc1115.us = phi i32 [ 0, %entry ], [ %inc11.us, %test2.loop ]
+  %inc11.us = add nsw i32 %inc1115.us, 1
+  %cmp.us = icmp slt i32 %inc11.us, 2
+  br i1 %cmp.us, label %test2.loop, label %for.end
+
+for.end:
+  %tobool.us = icmp eq i32 %inc1115.us, 0
+  %sub.us = select i1 %tobool.us, i32 0, i32 0
+  %mul.us = shl i32 %inc1115.us, 24
+  %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
+  %sext.us = mul i32 %mul.us, %sub.cond.us
+  %f = ashr i32 %sext.us, 24
+  br label %exit
+
+exit:
+  ret i32 %f
+}
diff --git a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
index 0118241..65aa61f 100644
--- a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
+++ b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
@@ -1,18 +1,50 @@
-; RUN: opt < %s -analyze -iv-users | grep "{1,+,3,+,2}<%loop> (post-inc with loop %loop)"
+; RUN: opt < %s -analyze -iv-users | FileCheck %s
 
 ; The value of %r is dependent on a polynomial iteration expression.
-
+;
+; CHECK-LABEL: IV Users for loop %foo.loop
+; CHECK: {1,+,3,+,2}<%foo.loop>
 define i64 @foo(i64 %n) {
 entry:
-  br label %loop
+  br label %foo.loop
 
-loop:
-  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+foo.loop:
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %foo.loop ]
   %indvar.next = add i64 %indvar, 1
   %c = icmp eq i64 %indvar.next, %n
-  br i1 %c, label %exit, label %loop
+  br i1 %c, label %exit, label %foo.loop
 
 exit:
   %r = mul i64 %indvar.next, %indvar.next
   ret i64 %r
 }
+
+; PR15470: LSR miscompile. The test2 function should return '1'.
+;
+; SCEV does not know how to denormalize chained recurrences, so make
+; sure they aren't marked as post-inc users.
+;
+; CHECK-LABEL: IV Users for loop %test2.loop
+; CHECK: %sext.us = {0,+,(16777216 + (-16777216 * %sub.us)),+,33554432}<%test2.loop> in %f = ashr i32 %sext.us, 24
+define i32 @test2() {
+entry:
+  br label %test2.loop
+
+test2.loop:
+  %inc1115.us = phi i32 [ 0, %entry ], [ %inc11.us, %test2.loop ]
+  %inc11.us = add nsw i32 %inc1115.us, 1
+  %cmp.us = icmp slt i32 %inc11.us, 2
+  br i1 %cmp.us, label %test2.loop, label %for.end
+
+for.end:
+  %tobool.us = icmp eq i32 %inc1115.us, 0
+  %sub.us = select i1 %tobool.us, i32 0, i32 0
+  %mul.us = shl i32 %inc1115.us, 24
+  %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
+  %sext.us = mul i32 %mul.us, %sub.cond.us
+  %f = ashr i32 %sext.us, 24
+  br label %exit
+
+exit:
+  ret i32 %f
+}
diff --git a/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll b/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll
new file mode 100644
index 0000000..2c65261
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+; LSR shouldn't consider %t8 to be an interesting user of %t6, and it
+; should be able to form pretty GEPs.
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; Copy of uglygep with a different address space
+; This tests expandAddToGEP uses the right smaller integer type for
+; another address space
+define void @Z4() nounwind {
+; CHECK: define void @Z4
+bb:
+  br label %bb3
+
+bb1:                                              ; preds = %bb3
+  br i1 undef, label %bb10, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %t = add i16 %t4, 1                         ; <i16> [#uses=1]
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %t4 = phi i16 [ %t, %bb2 ], [ 0, %bb ]      ; <i16> [#uses=3]
+  br label %bb1
+
+; CHECK: bb10:
+; CHECK-NEXT: %t7 = icmp eq i16 %t4, 0
+; Host %t2 computation outside the loop.
+; CHECK-NEXT: [[SCEVGEP:%[^ ]+]] = getelementptr i8 addrspace(1)* undef, i16 %t4
+; CHECK-NEXT: br label %bb14
+bb10:                                             ; preds = %bb9
+  %t7 = icmp eq i16 %t4, 0                    ; <i1> [#uses=1]
+  %t3 = add i16 %t4, 16                     ; <i16> [#uses=1]
+  br label %bb14
+
+; CHECK: bb14:
+; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[SCEVGEP]]
+; CHECK-NEXT: %t6 = load float addrspace(1)* addrspace(1)* undef
+; Fold %t3's add within the address.
+; CHECK-NEXT: [[SCEVGEP1:%[^ ]+]] = getelementptr float addrspace(1)* %t6, i16 4
+; CHECK-NEXT: [[SCEVGEP2:%[^ ]+]] = bitcast float addrspace(1)* [[SCEVGEP1]] to i8 addrspace(1)*
+; Use the induction variable (%t4) to access the right element
+; CHECK-NEXT: [[ADDRESS:%[^ ]+]] = getelementptr i8 addrspace(1)* [[SCEVGEP2]], i16 %t4
+; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[ADDRESS]]
+; CHECK-NEXT: br label %bb14
+bb14:                                             ; preds = %bb14, %bb10
+  %t2 = getelementptr inbounds i8 addrspace(1)* undef, i16 %t4 ; <i8*> [#uses=1]
+  store i8 undef, i8 addrspace(1)* %t2
+  %t6 = load float addrspace(1)* addrspace(1)* undef
+  %t8 = bitcast float addrspace(1)* %t6 to i8 addrspace(1)*              ; <i8*> [#uses=1]
+  %t9 = getelementptr inbounds i8 addrspace(1)* %t8, i16 %t3 ; <i8*> [#uses=1]
+  store i8 undef, i8 addrspace(1)* %t9
+  br label %bb14
+}
+
diff --git a/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
new file mode 100644
index 0000000..17c91e5
--- /dev/null
+++ b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll | FileCheck %s
+define void @unroll_opt_for_size() nounwind optsize {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %inc = add i32 %iv, 1
+  %exitcnd = icmp uge i32 %inc, 1024
+  br i1 %exitcnd, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @unroll_opt_for_size
+; CHECK:      add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK: icmp
+
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
+
+; CHECK-LABEL: @test
+; CHECK: unr.cmp{{.*}}:
+; CHECK: for.body.unr{{.*}}:
+; CHECK: for.body:
+; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body
+
diff --git a/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg b/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg
new file mode 100644
index 0000000..2e46300
--- /dev/null
+++ b/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg
@@ -0,0 +1,4 @@
+targets = set(config.root.targets_to_build.split())
+if not 'PowerPC' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopUnroll/lit.local.cfg b/test/Transforms/LoopUnroll/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LoopUnroll/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopUnswitch/lit.local.cfg b/test/Transforms/LoopUnswitch/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LoopUnswitch/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopVectorize/ARM/lit.local.cfg b/test/Transforms/LoopVectorize/ARM/lit.local.cfg
index cb77b09..8a3ba96 100644
--- a/test/Transforms/LoopVectorize/ARM/lit.local.cfg
+++ b/test/Transforms/LoopVectorize/ARM/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
     config.unsupported = True
diff --git a/test/Transforms/LoopVectorize/ARM/width-detect.ll b/test/Transforms/LoopVectorize/ARM/width-detect.ll
index c0795b6..99d7fa7 100644
--- a/test/Transforms/LoopVectorize/ARM/width-detect.ll
+++ b/test/Transforms/LoopVectorize/ARM/width-detect.ll
@@ -3,27 +3,27 @@
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 target triple = "thumbv7-apple-ios3.0.0"
 
-;CHECK:foo_F64
-;CHECK: <2 x double>
+;CHECK:foo_F32
+;CHECK: <4 x float>
 ;CHECK:ret
-define double @foo_F64(double* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+define float @foo_F32(float* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %prod.01 = phi double [ %4, %.lr.ph ], [ 0.000000e+00, %0 ]
-  %2 = getelementptr inbounds double* %A, i64 %indvars.iv
-  %3 = load double* %2, align 8
-  %4 = fmul fast double %prod.01, %3
+  %prod.01 = phi float [ %4, %.lr.ph ], [ 0.000000e+00, %0 ]
+  %2 = getelementptr inbounds float* %A, i64 %indvars.iv
+  %3 = load float* %2, align 8
+  %4 = fmul fast float %prod.01, %3
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
   br i1 %exitcond, label %._crit_edge, label %.lr.ph
 
 ._crit_edge:                                      ; preds = %.lr.ph, %0
-  %prod.0.lcssa = phi double [ 0.000000e+00, %0 ], [ %4, %.lr.ph ]
-  ret double %prod.0.lcssa
+  %prod.0.lcssa = phi float [ 0.000000e+00, %0 ], [ %4, %.lr.ph ]
+  ret float %prod.0.lcssa
 }
 
 ;CHECK:foo_I8
diff --git a/test/Transforms/LoopVectorize/X86/already-vectorized.ll b/test/Transforms/LoopVectorize/X86/already-vectorized.ll
new file mode 100644
index 0000000..885418c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/already-vectorized.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -debug-only=loop-vectorize -O3 -S 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; We want to make sure that we don't even try to vectorize loops again
+; The vectorizer used to mark the un-vectorized loop only as already vectorized
+; thus, trying to vectorize the vectorized loop again
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = external global [255 x i32]
+
+; Function Attrs: nounwind readonly uwtable
+define i32 @vect() {
+; CHECK: LV: Checking a loop in "vect"
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+; We need to make sure we did vectorize the loop
+; CHECK: LV: Found a loop: for.body
+; CHECK: LV: We can vectorize this loop!
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds [255 x i32]* @a, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %red.05
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 255
+  br i1 %exitcond, label %for.end, label %for.body
+
+; If it did, we have two loops:
+; CHECK: vector.body:
+; CHECK: br {{.*}} label %vector.body, !llvm.loop [[vect:![0-9]+]]
+; CHECK: for.body:
+; CHECK: br {{.*}} label %for.body, !llvm.loop [[scalar:![0-9]+]]
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add
+}
+
+; Now, we check for the Hint metadata
+; CHECK: [[vect]] = metadata !{metadata [[vect]], metadata [[width:![0-9]+]], metadata [[unroll:![0-9]+]]}
+; CHECK: [[width]] = metadata !{metadata !"llvm.vectorizer.width", i32 1}
+; CHECK: [[unroll]] = metadata !{metadata !"llvm.vectorizer.unroll", i32 1}
+; CHECK: [[scalar]] = metadata !{metadata [[scalar]], metadata [[width]], metadata [[unroll]]}
+
diff --git a/test/Transforms/LoopVectorize/X86/cost-model.ll b/test/Transforms/LoopVectorize/X86/cost-model.ll
index b7f479a..98718e1 100644
--- a/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 @a = common global [2048 x i32] zeroinitializer, align 16
 
 ; The program below gathers and scatters data. We better not vectorize it.
-;CHECK: cost_model_1
+;CHECK-LABEL: @cost_model_1(
 ;CHECK-NOT: <2 x i32>
 ;CHECK-NOT: <4 x i32>
 ;CHECK-NOT: <8 x i32>
diff --git a/test/Transforms/LoopVectorize/X86/lit.local.cfg b/test/Transforms/LoopVectorize/X86/lit.local.cfg
index a8ad0f1..ba763cf 100644
--- a/test/Transforms/LoopVectorize/X86/lit.local.cfg
+++ b/test/Transforms/LoopVectorize/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Transforms/LoopVectorize/X86/rauw-bug.ll b/test/Transforms/LoopVectorize/X86/rauw-bug.ll
new file mode 100644
index 0000000..4284fba
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/rauw-bug.ll
@@ -0,0 +1,33 @@
+; RUN: opt -slp-vectorizer -S %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+; This test used to fail under libgmalloc. Because we would try to access a
+; pointer that was already deleted.
+;
+; llvm-lit -v --param use_gmalloc=1 --param
+;   gmalloc_path=/usr/lib/libgmalloc.dylib
+;   test/Transforms/LoopVectorize/X86/rauw-bug.ll
+;
+; radar://15498655
+
+; CHECK: reduced
+define void @reduced()  {
+entry:
+  br i1 undef, label %while.body, label %while.cond63.preheader.while.end76_crit_edge
+
+while.cond63.preheader.while.end76_crit_edge:
+  ret void
+
+while.body:
+  %d2_fx.015 = phi double [ %sub52, %while.body ], [ undef, %entry ]
+  %d2_fy.014 = phi double [ %sub58, %while.body ], [ undef, %entry ]
+  %d3_fy.013 = phi double [ %div56, %while.body ], [ undef, %entry ]
+  %d3_fx.012 = phi double [ %div50, %while.body ], [ undef, %entry ]
+  %div50 = fmul double %d3_fx.012, 1.250000e-01
+  %sub52 = fsub double 0.000000e+00, %div50
+  %div56 = fmul double %d3_fy.013, 1.250000e-01
+  %sub58 = fsub double 0.000000e+00, %div56
+  br label %while.body
+}
diff --git a/test/Transforms/LoopVectorize/X86/tripcount.ll b/test/Transforms/LoopVectorize/X86/tripcount.ll
new file mode 100644
index 0000000..6b38bac
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/tripcount.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -mcpu=prescott < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-unknown-freebsd11.0"
+
+@big = external global [0 x i32]
+
+; PR18049
+; We need to truncate the exit count to i32. This is legal because the
+; arithmetic is signed (%inc is nsw).
+
+; CHECK-LABEL: tripcount
+; CHECK: trunc i64 %count to i32
+
+define void @tripcount(i64 %count) {
+entry:
+  %cmp6 = icmp sgt i64 %count, 0
+  br i1 %cmp6, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds [0 x i32]* @big, i32 0, i32 %i.07
+  %0 = load i32* %arrayidx, align 4
+  %neg = xor i32 %0, -1
+  store i32 %neg, i32* %arrayidx, align 4
+  %inc = add nsw i32 %i.07, 1
+  %conv = sext i32 %inc to i64
+  %cmp = icmp slt i64 %conv, %count
+  br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/unroll-pm.ll b/test/Transforms/LoopVectorize/X86/unroll-pm.ll
new file mode 100644
index 0000000..5064fec
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/unroll-pm.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -O2 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -S | FileCheck %s
+; RUN: opt < %s -O2 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -disable-loop-unrolling -S | FileCheck %s -check-prefix=CHECK-NOUNRL
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+;CHECK-LABEL: @bar(
+;CHECK: store <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+;CHECK-NOUNRL-LABEL: @bar(
+;CHECK-NOUNRL: store <4 x i32>
+;CHECK-NOUNRL-NOT: store <4 x i32>
+;CHECK-NOUNRL: ret
+define i32 @bar(i32* nocapture %A, i32 %n) nounwind uwtable ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = add nsw i32 %3, 6
+  store i32 %4, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret i32 undef
+}
diff --git a/test/Transforms/LoopVectorize/XCore/lit.local.cfg b/test/Transforms/LoopVectorize/XCore/lit.local.cfg
new file mode 100644
index 0000000..4d17d46
--- /dev/null
+++ b/test/Transforms/LoopVectorize/XCore/lit.local.cfg
@@ -0,0 +1,3 @@
+targets = set(config.root.targets_to_build.split())
+if not 'XCore' in targets:
+    config.unsupported = True
diff --git a/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll b/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll
new file mode 100644
index 0000000..a099daa
--- /dev/null
+++ b/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=2 -S -mtriple=xcore | FileCheck %s
+
+target datalayout = "e-p:32:32:32-a0:0:32-n32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f16:16:32-f32:32:32-f64:32:32"
+target triple = "xcore"
+; The xcore target has no vector registers, so loop should not be vectorized.
+;CHECK-LABEL: @f(
+;CHECK: entry:
+;CHECK-NOT: vector.body
+;CHECK-NEXT: br label %do.body
+define void @f(i8* nocapture %ptr, i32 %len) {
+entry:
+  br label %do.body
+do.body:
+  %ptr.addr.0 = phi i8* [ %ptr, %entry ], [ %incdec.ptr, %do.body ]
+  %len.addr.0 = phi i32 [ %len, %entry ], [ %dec, %do.body ]
+  %incdec.ptr = getelementptr inbounds i8* %ptr.addr.0, i32 1
+  store i8 0, i8* %ptr.addr.0, align 1
+  %dec = add nsw i32 %len.addr.0, -1
+  %tobool = icmp eq i32 %len.addr.0, 0
+  br i1 %tobool, label %do.end, label %do.body
+do.end:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/align.ll b/test/Transforms/LoopVectorize/align.ll
new file mode 100644
index 0000000..84b0361
--- /dev/null
+++ b/test/Transforms/LoopVectorize/align.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Make sure we output the abi alignment if no alignment is specified.
+
+;CHECK-LABEL: @align
+;CHECK: load <4 x i32>* {{.*}} align  4
+;CHECK: load <4 x i32>* {{.*}} align  4
+;CHECK: store <4 x i32> {{.*}} align  4
+
+define void @align(i32* %a, i32* %b, i32* %c) nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %3 = load i32* %2
+  %4 = getelementptr inbounds i32* %c, i64 %indvars.iv
+  %5 = load i32* %4
+  %6 = add nsw i32 %5, %3
+  %7 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  store i32 %6, i32* %7
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 128 
+  br i1 %exitcond, label %8, label %1
+
+; <label>:8                                       ; preds = %1
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/bsd_regex.ll b/test/Transforms/LoopVectorize/bsd_regex.ll
index a14b92d..7b71272 100644
--- a/test/Transforms/LoopVectorize/bsd_regex.ll
+++ b/test/Transforms/LoopVectorize/bsd_regex.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ;PR 15830.
 
-;CHECK: foo
+;CHECK-LABEL: @foo(
 ; When scalarizing stores we need to preserve the original order.
 ; Make sure that we are extracting in the correct order (0101, and not 0011).
 ;CHECK: extractelement <2 x i64> {{.*}}, i32 0
diff --git a/test/Transforms/LoopVectorize/dbg.value.ll b/test/Transforms/LoopVectorize/dbg.value.ll
index b69e72f..2497b25 100644
--- a/test/Transforms/LoopVectorize/dbg.value.ll
+++ b/test/Transforms/LoopVectorize/dbg.value.ll
@@ -42,13 +42,14 @@ attributes #0 = { nounwind ssp uwtable "fp-contract-model"="standard" "no-frame-
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!26}
 
 !0 = metadata !{i32 786449, metadata !25, i32 4, metadata !"clang", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !11, null, metadata !""}
 !1 = metadata !{i32 0}
 !2 = metadata !{metadata !3}
 !3 = metadata !{i32 786478, metadata !25, metadata !4, metadata !"test", metadata !"test", metadata !"test", i32 5, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @test, null, null, metadata !8, i32 5}
 !4 = metadata !{i32 786473, metadata !25}
-!5 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, i32 0}
+!5 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{metadata !7}
 !7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
 !8 = metadata !{metadata !9}
@@ -56,7 +57,7 @@ attributes #1 = { nounwind readnone }
 !10 = metadata !{i32 786443, metadata !25, metadata !3, i32 6, i32 0, i32 0}
 !11 = metadata !{metadata !12, metadata !16, metadata !17}
 !12 = metadata !{i32 786484, i32 0, null, metadata !"A", metadata !"A", metadata !"", metadata !4, i32 1, metadata !13, i32 0, i32 1, [1024 x i32]* @A, null}
-!13 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 32768, i64 32, i32 0, i32 0, metadata !7, metadata !14, i32 0, i32 0}
+!13 = metadata !{i32 786433, null, null, null, i32 0, i64 32768, i64 32, i32 0, i32 0, metadata !7, metadata !14, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32768, align 32, offset 0] [from int]
 !14 = metadata !{metadata !15}
 !15 = metadata !{i32 786465, i64 0, i64 1024}
 !16 = metadata !{i32 786484, i32 0, null, metadata !"B", metadata !"B", metadata !"", metadata !4, i32 2, metadata !13, i32 0, i32 1, [1024 x i32]* @B, null}
@@ -66,3 +67,4 @@ attributes #1 = { nounwind readnone }
 !20 = metadata !{i32 786443, metadata !25, metadata !10, i32 6, i32 0, i32 1}
 !24 = metadata !{i32 9, i32 0, metadata !3, null}
 !25 = metadata !{metadata !"test", metadata !"/path/to/somewhere"}
+!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/LoopVectorize/debugloc.ll b/test/Transforms/LoopVectorize/debugloc.ll
index 0a6fc4e..bf0b418 100644
--- a/test/Transforms/LoopVectorize/debugloc.ll
+++ b/test/Transforms/LoopVectorize/debugloc.ll
@@ -5,17 +5,17 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; Make sure we are preserving debug info in the vectorized code.
 
 ; CHECK: for.body.lr.ph
-; CHECK:   cmp.zero = icmp eq i64 {{.*}}, 0, !dbg !21
+; CHECK:   cmp.zero = icmp eq i64 {{.*}}, 0, !dbg ![[LOC:[0-9]+]]
 ; CHECK: vector.body
-; CHECK:   index {{.*}}, !dbg !21
-; CHECK:   getelementptr inbounds i32* %a, {{.*}}, !dbg !22
-; CHECK:   load <2 x i32>* {{.*}}, !dbg !22
-; CHECK:   add <2 x i32> {{.*}}, !dbg !22
-; CHECK:   add i64 %index, 2, !dbg !21
-; CHECK:   icmp eq i64 %index.next, %end.idx.rnd.down, !dbg !21
+; CHECK:   index {{.*}}, !dbg ![[LOC]]
+; CHECK:   getelementptr inbounds i32* %a, {{.*}}, !dbg ![[LOC2:[0-9]+]]
+; CHECK:   load <2 x i32>* {{.*}}, !dbg ![[LOC2]]
+; CHECK:   add <2 x i32> {{.*}}, !dbg ![[LOC2]]
+; CHECK:   add i64 %index, 2, !dbg ![[LOC]]
+; CHECK:   icmp eq i64 %index.next, %end.idx.rnd.down, !dbg ![[LOC]]
 ; CHECK: middle.block
-; CHECK:   add <2 x i32> %rdx.vec.exit.phi, %rdx.shuf, !dbg !22
-; CHECK:   extractelement <2 x i32> %bin.rdx, i32 0, !dbg !22
+; CHECK:   add <2 x i32> %rdx.vec.exit.phi, %rdx.shuf, !dbg ![[LOC2]]
+; CHECK:   extractelement <2 x i32> %bin.rdx, i32 0, !dbg ![[LOC2]]
 
 define i32 @f(i32* nocapture %a, i32 %size) #0 {
 entry:
@@ -33,7 +33,7 @@ for.body:                                         ; preds = %for.body.lr.ph, %fo
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %sum.05 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv, !dbg !22
-  %0 = load i32* %arrayidx, align 4, !dbg !22, !tbaa !23
+  %0 = load i32* %arrayidx, align 4, !dbg !22
   %add = add i32 %0, %sum.05, !dbg !22
   tail call void @llvm.dbg.value(metadata !{i32 %add.lcssa}, i64 0, metadata !15), !dbg !22
   %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !21
@@ -61,7 +61,7 @@ attributes #0 = { nounwind readonly ssp uwtable "less-precise-fpmad"="false" "no
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!18}
+!llvm.module.flags = !{!18, !27}
 
 !0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 185038) (llvm/trunk 185097)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Data/backedup/dev/os/llvm/debug/-] [DW_LANG_C99]
 !1 = metadata !{metadata !"-", metadata !"/Volumes/Data/backedup/dev/os/llvm/debug"}
@@ -70,7 +70,7 @@ attributes #1 = { nounwind readnone }
 !4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32*, i32)* @f, null, null, metadata !12, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
 !5 = metadata !{metadata !"<stdin>", metadata !"/Volumes/Data/backedup/dev/os/llvm/debug"}
 !6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>]
-!7 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !10, metadata !11}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
@@ -86,7 +86,5 @@ attributes #1 = { nounwind readnone }
 !20 = metadata !{i32 4, i32 0, metadata !4, null}
 !21 = metadata !{i32 5, i32 0, metadata !17, null}
 !22 = metadata !{i32 6, i32 0, metadata !17, null}
-!23 = metadata !{metadata !"int", metadata !24}
-!24 = metadata !{metadata !"omnipotent char", metadata !25}
-!25 = metadata !{metadata !"Simple C/C++ TBAA"}
 !26 = metadata !{i32 7, i32 0, metadata !4, null}
+!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/LoopVectorize/ee-crash.ll b/test/Transforms/LoopVectorize/ee-crash.ll
new file mode 100644
index 0000000..8a4f8ce
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ee-crash.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; This test checks that we deal with an in-loop extractelement (for now, this
+; means not crashing by not vectorizing).
+; CHECK-LABEL: @_Z4foo1Pii(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+define i32 @_Z4foo1Pii(i32* %A, i32 %n, <2 x i32> %q) #0 {
+entry:
+  %idx.ext = sext i32 %n to i64
+  %add.ptr = getelementptr inbounds i32* %A, i64 %idx.ext
+  %cmp3.i = icmp eq i32 %n, 0
+  br i1 %cmp3.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
+
+for.body.i:                                       ; preds = %entry, %for.body.i
+  %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
+  %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
+  %0 = load i32* %__first.addr.04.i, align 4
+  %q1 = extractelement <2 x i32> %q, i32 %n
+  %q2 = add nsw i32 %0, %q1
+  %add.i = add nsw i32 %q2, %__init.addr.05.i
+  %incdec.ptr.i = getelementptr inbounds i32* %__first.addr.04.i, i64 1
+  %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
+  br i1 %cmp.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
+
+_ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
+  %__init.addr.0.lcssa.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ]
+  ret i32 %__init.addr.0.lcssa.i
+}
+
+attributes #0 = { nounwind readonly ssp uwtable }
+
diff --git a/test/Transforms/LoopVectorize/funcall.ll b/test/Transforms/LoopVectorize/funcall.ll
index 0fb929f..f1f068c 100644
--- a/test/Transforms/LoopVectorize/funcall.ll
+++ b/test/Transforms/LoopVectorize/funcall.ll
@@ -7,7 +7,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; contain a limited set of function calls and none of them sets the rounding
 ; mode, so vectorizing them is safe.
 
-; CHECK: test
+; CHECK-LABEL: @test(
 ; CHECK: <2 x double>
 
 define void @test(double* %d, double %t) {
diff --git a/test/Transforms/LoopVectorize/global_alias.ll b/test/Transforms/LoopVectorize/global_alias.ll
index ae72d3c..0118fb4 100644
--- a/test/Transforms/LoopVectorize/global_alias.ll
+++ b/test/Transforms/LoopVectorize/global_alias.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s -O1 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
 
@@ -336,9 +336,8 @@ for.end:                                          ; preds = %for.cond
 ;   return Foo.A[a];
 ; }
 ; CHECK-LABEL: define i32 @noAlias07(
-; CHECK: sub nsw <4 x i32>
+; CHECK: store <4 x i32>
 ; CHECK: ret
-
 define i32 @noAlias07(i32 %a) #0 {
 entry:
   %a.addr = alloca i32, align 4
@@ -552,7 +551,7 @@ for.end:                                          ; preds = %for.cond
 ;   return Bar.A[N][a];
 ; }
 ; CHECK-LABEL: define i32 @noAlias11(
-; CHECK: sub nsw <4 x i32>
+; CHECK: store <4 x i32>
 ; CHECK: ret
 
 define i32 @noAlias11(i32 %a) #0 {
@@ -612,7 +611,7 @@ for.end:                                          ; preds = %for.cond
 ;   return Bar.A[N][a];
 ; }
 ; CHECK-LABEL: define i32 @noAlias12(
-; CHECK: sub nsw <4 x i32>
+; CHECK: store <4 x i32>
 ; CHECK: ret
 
 define i32 @noAlias12(i32 %a) #0 {
diff --git a/test/Transforms/LoopVectorize/hoist-loads.ll b/test/Transforms/LoopVectorize/hoist-loads.ll
index fad1735..765e14d 100644
--- a/test/Transforms/LoopVectorize/hoist-loads.ll
+++ b/test/Transforms/LoopVectorize/hoist-loads.ll
@@ -6,7 +6,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 @B = common global [1024 x float] zeroinitializer, align 16
 
 ; Make sure we can vectorize in the presence of hoistable conditional loads.
-; CHECK: hoist_cond_load
+; CHECK-LABEL: @hoist_cond_load(
 ; CHECK: load <2 x float>
 
 define void @hoist_cond_load() {
@@ -38,7 +38,7 @@ for.end:
 
 ; However, we can't hoist loads whose address we have not seen unconditionally
 ; accessed.
-; CHECK:     dont_hoist_cond_load
+; CHECK-LABEL: @dont_hoist_cond_load(
 ; CHECK-NOT: load <2 x float>
 
 define void @dont_hoist_cond_load() {
diff --git a/test/Transforms/LoopVectorize/if-conversion.ll b/test/Transforms/LoopVectorize/if-conversion.ll
index 88e56b2..dbe0243 100644
--- a/test/Transforms/LoopVectorize/if-conversion.ll
+++ b/test/Transforms/LoopVectorize/if-conversion.ll
@@ -106,3 +106,66 @@ for.end:                                          ; preds = %for.inc, %entry
   ret i32 %sum.0.lcssa
 }
 
+@a = common global [1 x i32*] zeroinitializer, align 8
+@c = common global i32* null, align 8
+
+; We use to if convert this loop. This is not safe because there is a trapping
+; constant expression.
+; PR16729
+
+; CHECK-LABEL: trapping_constant_expression
+; CHECK-NOT: or <4 x i32>
+
+define i32 @trapping_constant_expression() {
+entry:
+  br label %for.body
+
+for.body:
+  %inc3 = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
+  %or2 = phi i32 [ 0, %entry ], [ %or, %cond.end ]
+  br i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 0), i32** @c), label %cond.false, label %cond.end
+
+cond.false:
+  br label %cond.end
+
+cond.end:
+  %cond = phi i32 [ sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 0), i32** @c) to i32)), %cond.false ], [ 0, %for.body ]
+  %or = or i32 %or2, %cond
+  %inc = add nsw i32 %inc3, 1
+  %cmp = icmp slt i32 %inc, 128
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret i32 %or
+}
+
+; Neither should we if-convert if there is an instruction operand that is a
+; trapping constant expression.
+; PR16729
+
+; CHECK-LABEL: trapping_constant_expression2
+; CHECK-NOT: or <4 x i32>
+
+define i32 @trapping_constant_expression2() {
+entry:
+  br label %for.body
+
+for.body:
+  %inc3 = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
+  %or2 = phi i32 [ 0, %entry ], [ %or, %cond.end ]
+  br i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 0), i32** @c), label %cond.false, label %cond.end
+
+cond.false:
+  %cond.1 = or i32 %inc3, sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 0), i32** @c) to i32))
+  br label %cond.end
+
+cond.end:
+  %cond = phi i32 [ %cond.1, %cond.false ], [ %inc3, %for.body ]
+  %or = or i32 %or2, %cond
+  %inc = add nsw i32 %inc3, 1
+  %cmp = icmp slt i32 %inc, 128
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret i32 %or
+}
diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll
index 48bb438..50c3b6b 100644
--- a/test/Transforms/LoopVectorize/induction.ll
+++ b/test/Transforms/LoopVectorize/induction.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
 ; Make sure that we can handle multiple integer induction variables.
-; CHECK: multi_int_induction
+; CHECK-LABEL: @multi_int_induction(
 ; CHECK: vector.body:
 ; CHECK:  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
 ; CHECK:  %normalized.idx = sub i64 %index, 0
@@ -28,3 +28,83 @@ for.end:
   ret void
 }
 
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
+
+; Make sure we remove unneeded vectorization of induction variables.
+; In order for instcombine to cleanup the vectorized induction variables that we
+; create in the loop vectorizer we need to perform some form of redundancy
+; elimination to get rid of multiple uses.
+
+; IND-LABEL: scalar_use
+
+; IND:     br label %vector.body
+; IND:     vector.body:
+;   Vectorized induction variable.
+; IND-NOT:  insertelement <2 x i64>
+; IND-NOT:  shufflevector <2 x i64>
+; IND:     br {{.*}}, label %vector.body
+
+define void @scalar_use(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %ind.sum = add i64 %iv, %offset
+  %arr.idx = getelementptr inbounds float* %a, i64 %ind.sum
+  %l1 = load float* %arr.idx, align 4
+  %ind.sum2 = add i64 %iv, %offset2
+  %arr.idx2 = getelementptr inbounds float* %a, i64 %ind.sum2
+  %l2 = load float* %arr.idx2, align 4
+  %m = fmul fast float %b, %l2
+  %ad = fadd fast float %l1, %m
+  store float %ad, float* %arr.idx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, %n
+  br i1 %exitcond, label %loopexit, label %for.body
+
+loopexit:
+  ret void
+}
+
+
+; Make sure that the loop exit count computation does not overflow for i8 and
+; i16. The exit count of these loops is i8/i16 max + 1. If we don't cast the
+; induction variable to a bigger type the exit count computation will overflow
+; to 0.
+; PR17532
+
+; CHECK-LABEL: i8_loop
+; CHECK; icmp eq i32 {{.*}}, 256
+define i32 @i8_loop() nounwind readnone ssp uwtable {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
+  %b.0 = phi i8 [ 0, %0 ], [ %3, %1 ]
+  %2 = and i32 %a.0, 4
+  %3 = add i8 %b.0, -1
+  %4 = icmp eq i8 %3, 0
+  br i1 %4, label %5, label %1
+
+; <label>:5                                       ; preds = %1
+  ret i32 %2
+}
+
+; CHECK-LABEL: i16_loop
+; CHECK; icmp eq i32 {{.*}}, 65536
+
+define i32 @i16_loop() nounwind readnone ssp uwtable {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
+  %b.0 = phi i16 [ 0, %0 ], [ %3, %1 ]
+  %2 = and i32 %a.0, 4
+  %3 = add i16 %b.0, -1
+  %4 = icmp eq i16 %3, 0
+  br i1 %4, label %5, label %1
+
+; <label>:5                                       ; preds = %1
+  ret i32 %2
+}
diff --git a/test/Transforms/LoopVectorize/induction_plus.ll b/test/Transforms/LoopVectorize/induction_plus.ll
index 6141c39..9c8201a 100644
--- a/test/Transforms/LoopVectorize/induction_plus.ll
+++ b/test/Transforms/LoopVectorize/induction_plus.ll
@@ -6,8 +6,8 @@ target triple = "x86_64-apple-macosx10.8.0"
 @array = common global [1024 x i32] zeroinitializer, align 16
 
 ;CHECK-LABEL: @array_at_plus_one(
-;CHECK: trunc i64
 ;CHECK: add i64 %index, 12
+;CHECK: trunc i64
 ;CHECK: ret i32
 define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
diff --git a/test/Transforms/LoopVectorize/infiniteloop.ll b/test/Transforms/LoopVectorize/infiniteloop.ll
index f6ab564..5c5e1a3 100644
--- a/test/Transforms/LoopVectorize/infiniteloop.ll
+++ b/test/Transforms/LoopVectorize/infiniteloop.ll
@@ -14,7 +14,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 ;   // return SCEVCouldNotCompute.
 ; For an infinite loop SE can return any number.
 
-; CHECK: fn1
+; CHECK-LABEL: @fn1(
 define void @fn1()  {
 entry:
   store i64 0, i64* @a, align 8
diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll
index 95b53b7..c3d570c 100644
--- a/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/test/Transforms/LoopVectorize/intrinsic.ll
@@ -468,6 +468,59 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare double @llvm.fabs(double) nounwind readnone
 
+;CHECK-LABEL: @copysign_f32(
+;CHECK: llvm.copysign.v4f32
+;CHECK: ret void
+define void @copysign_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float* %z, i64 %indvars.iv
+  %1 = load float* %arrayidx1, align 4
+  %call = tail call float @llvm.copysign.f32(float %0, float %1) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.copysign.f32(float, float) nounwind readnone
+
+define void @copysign_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8
+  %arrayidx1 = getelementptr inbounds double* %z, i64 %indvars.iv
+  %1 = load double* %arrayidx, align 8
+  %call = tail call double @llvm.copysign(double %0, double %1) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.copysign(double, double) nounwind readnone
+
 ;CHECK-LABEL: @floor_f32(
 ;CHECK: llvm.floor.v4f32
 ;CHECK: ret void
@@ -728,6 +781,58 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare double @llvm.nearbyint.f64(double) nounwind readnone
 
+;CHECK-LABEL: @round_f32(
+;CHECK: llvm.round.v4f32
+;CHECK: ret void
+define void @round_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %call = tail call float @llvm.round.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.round.f32(float) nounwind readnone
+
+;CHECK-LABEL: @round_f64(
+;CHECK: llvm.round.v4f64
+;CHECK: ret void
+define void @round_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8
+  %call = tail call double @llvm.round.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.round.f64(double) nounwind readnone
+
 ;CHECK-LABEL: @fma_f32(
 ;CHECK: llvm.fma.v4f32
 ;CHECK: ret void
@@ -927,3 +1032,61 @@ for.end:                                          ; preds = %for.body
 declare float @fabsf(float) nounwind readnone
 
 declare double @llvm.pow.f64(double, double) nounwind readnone
+
+
+
+; Make sure we don't replace calls to functions with standard library function
+; signatures but defined with internal linkage.
+
+define internal float @roundf(float %x) nounwind readnone {
+  ret float 0.00000000
+}
+; CHECK-LABEL: internal_round
+; CHECK-NOT:  load <4 x float>
+
+define void @internal_round(float* nocapture %x) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %x, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %call = tail call float @roundf(float %0) nounwind readnone
+  store float %call, float* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; Make sure we don't replace calls to functions with standard library names but
+; different signatures.
+
+declare void @round(double %f)
+
+; CHECK-LABEL: wrong_signature
+; CHECK-NOT:  load <4 x double>
+
+define void @wrong_signature(double* nocapture %x) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds double* %x, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 4
+  store double %0, double* %arrayidx, align 4
+  tail call void @round(double %0) nounwind readnone
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/lifetime.ll b/test/Transforms/LoopVectorize/lifetime.ll
index 87006ed..4f6f3b8 100644
--- a/test/Transforms/LoopVectorize/lifetime.ll
+++ b/test/Transforms/LoopVectorize/lifetime.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; Make sure we can vectorize loops which contain lifetime markers.
 
-; CHECK-LABEL: test
+; CHECK-LABEL: @test(
 ; CHECK: call void @llvm.lifetime.end
 ; CHECK: store <2 x i32>
 ; CHECK: call void @llvm.lifetime.start
@@ -33,7 +33,7 @@ for.end:
   ret void
 }
 
-; CHECK-LABEL: testbitcast
+; CHECK-LABEL: @testbitcast(
 ; CHECK: call void @llvm.lifetime.end
 ; CHECK: store <2 x i32>
 ; CHECK: call void @llvm.lifetime.start
@@ -63,7 +63,7 @@ for.end:
   ret void
 }
 
-; CHECK-LABEL: testloopvariant
+; CHECK-LABEL: @testloopvariant(
 ; CHECK: call void @llvm.lifetime.end
 ; CHECK: store <2 x i32>
 ; CHECK: call void @llvm.lifetime.start
diff --git a/test/Transforms/LoopVectorize/lit.local.cfg b/test/Transforms/LoopVectorize/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LoopVectorize/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopVectorize/memdep.ll b/test/Transforms/LoopVectorize/memdep.ll
index b6d9e2e..21cb703 100644
--- a/test/Transforms/LoopVectorize/memdep.ll
+++ b/test/Transforms/LoopVectorize/memdep.ll
@@ -9,7 +9,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;  for (i = 0; i < 1024; ++i)
 ;    A[i] = A[i + 1] + 1;
 
-; CHECK: f1_vec
+; CHECK-LABEL: @f1_vec(
 ; CHECK: <2 x i32>
 
 define void @f1_vec(i32* %A) {
@@ -35,7 +35,7 @@ for.end:
 ;  for (i = 0; i < 1024; ++i)
 ;    A[i+1] = A[i] + 1;
 
-; CHECK: f2_novec
+; CHECK-LABEL: @f2_novec(
 ; CHECK-NOT: <2 x i32>
 
 define void @f2_novec(i32* %A) {
@@ -61,7 +61,7 @@ for.end:
 ;  for (i = 0; i < 1024; ++i)
 ;    A[i+2] = A[i] + 1;
 
-; CHECK: f3_vec_len
+; CHECK-LABEL: @f3_vec_len(
 ; CHECK: <2 x i32>
 
 ; WIDTH: f3_vec_len
@@ -96,7 +96,7 @@ for.end:
 ;     A[i] = B[i + 1];
 ;   }
 
-; CHECK: f5
+; CHECK-LABEL: @f5(
 ; CHECK-NOT: <2 x i32>
 
 define void @f5(i32*  %A, i32* %B) {
@@ -127,7 +127,7 @@ for.end:
 ;     tmp = a[i];
 ;   }
 
-; CHECK: f6
+; CHECK-LABEL: @f6
 ; CHECK-NOT: <2 x i32>
 
 define i32 @f6(i32* %a, i32 %tmp) {
diff --git a/test/Transforms/LoopVectorize/minmax_reduction.ll b/test/Transforms/LoopVectorize/minmax_reduction.ll
index bade561..0e47260 100644
--- a/test/Transforms/LoopVectorize/minmax_reduction.ll
+++ b/test/Transforms/LoopVectorize/minmax_reduction.ll
@@ -17,7 +17,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp sgt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @max_red(i32 %max) {
 entry:
@@ -46,7 +46,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp sgt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @max_red_inverse_select(i32 %max) {
 entry:
@@ -74,7 +74,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp slt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @min_red(i32 %max) {
 entry:
@@ -103,7 +103,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp slt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @min_red_inverse_select(i32 %max) {
 entry:
@@ -133,7 +133,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ugt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @umax_red(i32 %max) {
 entry:
@@ -162,7 +162,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ugt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @umax_red_inverse_select(i32 %max) {
 entry:
@@ -190,7 +190,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ult <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @umin_red(i32 %max) {
 entry:
@@ -219,7 +219,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ult <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @umin_red_inverse_select(i32 %max) {
 entry:
@@ -248,7 +248,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp slt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @sge_min_red(i32 %max) {
 entry:
@@ -277,7 +277,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp sgt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @sle_min_red(i32 %max) {
 entry:
@@ -306,7 +306,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ult <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @uge_min_red(i32 %max) {
 entry:
@@ -335,7 +335,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ugt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @ule_min_red(i32 %max) {
 entry:
@@ -416,7 +416,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @max_red_float(float %max) #0 {
 entry:
@@ -442,7 +442,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @max_red_float_ge(float %max) #0 {
 entry:
@@ -468,7 +468,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_max_red_float(float %max) #0 {
 entry:
@@ -494,7 +494,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_max_red_float_le(float %max) #0 {
 entry:
@@ -515,12 +515,12 @@ for.end:
   ret float %max.red.0
 }
 
-; CHECK: @unordered_max_red
+; CHECK-LABEL: @unordered_max_red_float(
 ; CHECK: fcmp ugt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @unordered_max_red_float(float %max) #0 {
 entry:
@@ -546,7 +546,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @unordered_max_red_float_ge(float %max) #0 {
 entry:
@@ -567,12 +567,12 @@ for.end:
   ret float %max.red.0
 }
 
-; CHECK: @inverted_unordered_max_red
+; CHECK-LABEL: @inverted_unordered_max_red_float(
 ; CHECK: fcmp ult <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_unordered_max_red_float(float %max) #0 {
 entry:
@@ -598,7 +598,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_unordered_max_red_float_le(float %max) #0 {
 entry:
@@ -627,7 +627,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @min_red_float(float %min) #0 {
 entry:
@@ -653,7 +653,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @min_red_float_le(float %min) #0 {
 entry:
@@ -679,7 +679,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_min_red_float(float %min) #0 {
 entry:
@@ -705,7 +705,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_min_red_float_ge(float %min) #0 {
 entry:
@@ -726,12 +726,12 @@ for.end:
   ret float %min.red.0
 }
 
-; CHECK: @unordered_min_red
+; CHECK-LABEL: @unordered_min_red_float(
 ; CHECK: fcmp ult <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @unordered_min_red_float(float %min) #0 {
 entry:
@@ -757,7 +757,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @unordered_min_red_float_le(float %min) #0 {
 entry:
@@ -778,12 +778,12 @@ for.end:
   ret float %min.red.0
 }
 
-; CHECK: @inverted_unordered_min_red
+; CHECK-LABEL: @inverted_unordered_min_red_float(
 ; CHECK: fcmp ugt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_unordered_min_red_float(float %min) #0 {
 entry:
@@ -809,7 +809,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_unordered_min_red_float_ge(float %min) #0 {
 entry:
@@ -836,7 +836,7 @@ for.end:
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x double>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define double @min_red_double(double %min) #0 {
 entry:
@@ -882,4 +882,4 @@ for.end:
 }
 
 
-attributes #0 = { "no-nans-fp-math"="true" } 
+attributes #0 = { "no-nans-fp-math"="true" }
diff --git a/test/Transforms/LoopVectorize/multiple-address-spaces.ll b/test/Transforms/LoopVectorize/multiple-address-spaces.ll
index 6906195..7d836de 100644
--- a/test/Transforms/LoopVectorize/multiple-address-spaces.ll
+++ b/test/Transforms/LoopVectorize/multiple-address-spaces.ll
@@ -28,10 +28,10 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds [40000 x i8] addrspace(1)* @Y, i64 0, i64 %indvars.iv
-  %0 = load i8 addrspace(1)* %arrayidx, align 1, !tbaa !0
+  %0 = load i8 addrspace(1)* %arrayidx, align 1
   %add = add i8 %0, 1
   %arrayidx3 = getelementptr inbounds [40000 x i8]* @X, i64 0, i64 %indvars.iv
-  store i8 %add, i8* %arrayidx3, align 1, !tbaa !0
+  store i8 %add, i8* %arrayidx3, align 1
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 40000
@@ -42,6 +42,3 @@ for.end:                                          ; preds = %for.body
 }
 
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/no_int_induction.ll b/test/Transforms/LoopVectorize/no_int_induction.ll
index 66d5301..e572d1a 100644
--- a/test/Transforms/LoopVectorize/no_int_induction.ll
+++ b/test/Transforms/LoopVectorize/no_int_induction.ll
@@ -4,10 +4,10 @@
 ;  return std::accumulate(A, A + n, 0);
 ; }
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.8.0"
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-n8:16:32:64-S128"
 
 ;CHECK-LABEL: @sum_array(
+;CHECK: phi i64
 ;CHECK: phi <4 x i32>
 ;CHECK: load <4 x i32>
 ;CHECK: add nsw <4 x i32>
@@ -31,3 +31,30 @@ _ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %.lr.ph.i, %0
   %.01.lcssa.i = phi i32 [ 0, %0 ], [ %5, %.lr.ph.i ]
   ret i32 %.01.lcssa.i
 }
+
+; Same, but use a pointer with a different size.
+;CHECK-LABEL: @sum_array_as1(
+;CHECK: phi i16
+;CHECK: phi <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: ret i32
+define i32 @sum_array_as1(i32 addrspace(1)* %A, i32 %n) nounwind uwtable readonly noinline ssp {
+  %1 = sext i32 %n to i64
+  %2 = getelementptr inbounds i32 addrspace(1)* %A, i64 %1
+  %3 = icmp eq i32 %n, 0
+  br i1 %3, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
+
+.lr.ph.i:                                         ; preds = %0, %.lr.ph.i
+  %.03.i = phi i32 addrspace(1)* [ %6, %.lr.ph.i ], [ %A, %0 ]
+  %.012.i = phi i32 [ %5, %.lr.ph.i ], [ 0, %0 ]
+  %4 = load i32 addrspace(1)* %.03.i, align 4
+  %5 = add nsw i32 %4, %.012.i
+  %6 = getelementptr inbounds i32 addrspace(1)* %.03.i, i64 1
+  %7 = icmp eq i32 addrspace(1)* %6, %2
+  br i1 %7, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
+
+_ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %.lr.ph.i, %0
+  %.01.lcssa.i = phi i32 [ 0, %0 ], [ %5, %.lr.ph.i ]
+  ret i32 %.01.lcssa.i
+}
diff --git a/test/Transforms/LoopVectorize/no_outside_user.ll b/test/Transforms/LoopVectorize/no_outside_user.ll
index 6f0357c..1f891ad 100644
--- a/test/Transforms/LoopVectorize/no_outside_user.ll
+++ b/test/Transforms/LoopVectorize/no_outside_user.ll
@@ -12,6 +12,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 ; We used to vectorize this loop. But it has a value that is used outside of the
 ; and is not a recognized reduction variable "tmp17".
 
+; CHECK-LABEL: @main(
 ; CHECK-NOT: <2 x i32>
 
 define i32 @main()  {
@@ -38,4 +39,33 @@ f1.exit.loopexit:
   ret i32 %.lcssa
 }
 
+; Don't vectorize this loop. Its phi node (induction variable) has an outside
+; loop user. We currently don't handle this case.
+; PR17179
 
+; CHECK-LABEL: @test2(
+; CHECK-NOT:  <2 x
+
+@x1 = common global i32 0, align 4
+@x2 = common global i32 0, align 4
+@x0 = common global i32 0, align 4
+
+define i32 @test2()  {
+entry:
+  store i32 0, i32* @x1, align 4
+  %0 = load i32* @x0, align 4
+  br label %for.cond1.preheader
+
+for.cond1.preheader:
+  %inc7 = phi i32 [ 0, %entry ], [ %inc, %for.cond1.preheader ]
+  %inc = add nsw i32 %inc7, 1
+  %cmp = icmp eq i32 %inc, 52
+  br i1 %cmp, label %for.end5, label %for.cond1.preheader
+
+for.end5:
+  %inc7.lcssa = phi i32 [ %inc7, %for.cond1.preheader ]
+  %xor = xor i32 %inc7.lcssa, %0
+  store i32 52, i32* @x1, align 4
+  store i32 1, i32* @x2, align 4
+  ret i32 %xor
+}
diff --git a/test/Transforms/LoopVectorize/opt.ll b/test/Transforms/LoopVectorize/opt.ll
new file mode 100644
index 0000000..27030a2
--- /dev/null
+++ b/test/Transforms/LoopVectorize/opt.ll
@@ -0,0 +1,28 @@
+; RUN: opt -S -O3 -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck --check-prefix=LOOPVEC %s
+; RUN: opt -S -O3 -disable-loop-vectorization -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck --check-prefix=NOLOOPVEC %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Make sure we can disable vectorization in opt.
+
+; LOOPVEC:       add <2 x i32>
+; NOLOOPVEC-NOT: add <2 x i32>
+
+define i32 @vect(i32* %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %red.05
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 255
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %add
+}
diff --git a/test/Transforms/LoopVectorize/ptr_loops.ll b/test/Transforms/LoopVectorize/ptr_loops.ll
index 25599f8..15983f0 100644
--- a/test/Transforms/LoopVectorize/ptr_loops.ll
+++ b/test/Transforms/LoopVectorize/ptr_loops.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 @A = global [36 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35], align 16
 @B = global [36 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35], align 16
 
-;CHECK:_Z5test1v
+;CHECK-LABEL:@_Z5test1v(
 ;CHECK: load <4 x i32>
 ;CHECK: shufflevector <4 x i32>
 ;CHECK: store <4 x i32>
@@ -29,7 +29,7 @@ define i32 @_Z5test1v() nounwind uwtable ssp {
   ret i32 0
 }
 
-;CHECK:_Z5test2v
+;CHECK-LABEL: @_Z5test2v(
 ;CHECK: load <4 x i32>
 ;CHECK: shufflevector <4 x i32>
 ;CHECK: store <4 x i32>
diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll
index 18a0a93..791fce1 100644
--- a/test/Transforms/LoopVectorize/reduction.ll
+++ b/test/Transforms/LoopVectorize/reduction.ll
@@ -467,3 +467,30 @@ for.body:
 for.end:
   ret i32 %p.addr.02
 }
+
+; Don't vectorize a reduction value that is not the last in a reduction cyle. We
+; would loose iterations (VF-1) on the operations after that use.
+; PR17498
+
+; CHECK-LABEL: not_last_operation
+; CHECK-NOT: x i32>
+define i32 @not_last_operation(i32 %p, i32 %val) {
+entry:
+  %tobool = icmp eq i32 %p, 0
+  br label %for.body
+
+for.body:
+  %inc613.1 = phi i32 [ 0, %entry ], [ %inc6.1, %for.body ]
+  %inc511.1 = phi i32 [ %val, %entry ], [ %inc5.1, %for.body ]
+  %0 = zext i1 %tobool to i32
+  %inc4.1 = xor i32 %0, 1
+  %inc511.1.inc4.1 = add nsw i32 %inc511.1, %inc4.1
+  %inc5.1 = add nsw i32 %inc511.1.inc4.1, 1
+  %inc6.1 = add nsw i32 %inc613.1, 1
+  %exitcond.1 = icmp eq i32 %inc6.1, 22
+  br i1 %exitcond.1, label %exit, label %for.body
+
+exit:
+  %inc.2 = add nsw i32 %inc511.1.inc4.1, 2
+  ret i32 %inc.2
+}
diff --git a/test/Transforms/LoopVectorize/reverse_induction.ll b/test/Transforms/LoopVectorize/reverse_induction.ll
index 9e8c1b1..65ef95d 100644
--- a/test/Transforms/LoopVectorize/reverse_induction.ll
+++ b/test/Transforms/LoopVectorize/reverse_induction.ll
@@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; Make sure consecutive vector generates correct negative indices.
 ; PR15882
 
-; CHECK: reverse_induction_i64
+; CHECK-LABEL: @reverse_induction_i64(
 ; CHECK: add <4 x i64> %[[SPLAT:.*]], <i64 0, i64 -1, i64 -2, i64 -3>
 ; CHECK: add <4 x i64> %[[SPLAT]], <i64 -4, i64 -5, i64 -6, i64 -7>
 
@@ -29,7 +29,7 @@ loopend:
   ret i32 %inc.redux
 }
 
-; CHECK: reverse_induction_i128
+; CHECK-LABEL: @reverse_induction_i128(
 ; CHECK: add <4 x i128> %[[SPLAT:.*]], <i128 0, i128 -1, i128 -2, i128 -3>
 ; CHECK: add <4 x i128> %[[SPLAT]], <i128 -4, i128 -5, i128 -6, i128 -7>
 define i32 @reverse_induction_i128(i128 %startval, i32 * %ptr) {
@@ -52,7 +52,7 @@ loopend:
   ret i32 %inc.redux
 }
 
-; CHECK: reverse_induction_i16
+; CHECK-LABEL: @reverse_induction_i16(
 ; CHECK: add <4 x i16> %[[SPLAT:.*]], <i16 0, i16 -1, i16 -2, i16 -3>
 ; CHECK: add <4 x i16> %[[SPLAT]], <i16 -4, i16 -5, i16 -6, i16 -7>
 
@@ -93,7 +93,7 @@ loopend:
 ;   }
 ; }
 
-; CHECK: reverse_forward_induction_i64_i8
+; CHECK-LABEL: @reverse_forward_induction_i64_i8(
 ; CHECK: vector.body
 ; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
 ; CHECK: %normalized.idx = sub i64 %index, 0
@@ -120,7 +120,7 @@ while.end:
   ret void
 }
 
-; CHECK: reverse_forward_induction_i64_i8_signed
+; CHECK-LABEL: @reverse_forward_induction_i64_i8_signed(
 ; CHECK: vector.body:
 ; CHECK:  %index = phi i64 [ 129, %vector.ph ], [ %index.next, %vector.body ]
 ; CHECK:  %normalized.idx = sub i64 %index, 129
diff --git a/test/Transforms/LoopVectorize/runtime-check-address-space.ll b/test/Transforms/LoopVectorize/runtime-check-address-space.ll
new file mode 100644
index 0000000..6c86561
--- /dev/null
+++ b/test/Transforms/LoopVectorize/runtime-check-address-space.ll
@@ -0,0 +1,235 @@
+; RUN: opt -S -march=r600 -mcpu=cayman -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
+
+; Check vectorization that would ordinarily require a runtime bounds
+; check on the pointers when mixing address spaces. For now we cannot
+; assume address spaces do not alias, and we can't assume that
+; different pointers are directly comparable.
+;
+; These all test this basic loop for different combinations of address
+; spaces, and swapping in globals or adding noalias.
+;
+;void foo(int addrspace(N)* [noalias] a, int addrspace(M)* [noalias] b, int n)
+;{
+;    for (int i = 0; i < n; ++i)
+;    {
+;        a[i] = 3 * b[i];
+;    }
+;}
+
+; Artificial datalayout
+target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
+
+
+@g_as1 = common addrspace(1) global [1024 x i32] zeroinitializer, align 16
+@q_as2 = common addrspace(2) global [1024 x i32] zeroinitializer, align 16
+
+; Both parameters are unidentified objects with the same address
+; space, so this should vectorize normally.
+define void @foo(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %n) #0 {
+; CHECK-LABEL: @foo(
+; CHECK: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.0 to i64
+  %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %idxprom1
+  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; Parameters are unidentified and different address spaces, so cannot vectorize.
+define void @bar0(i32* %a, i32 addrspace(1)* %b, i32 %n) #0 {
+; CHECK-LABEL: @bar0(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.0 to i64
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %idxprom1
+  store i32 %mul, i32* %arrayidx2, align 4
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; Swapped arguments should be the same
+define void @bar1(i32 addrspace(1)* %a, i32* %b, i32 %n) #0 {
+; CHECK-LABEL: @bar1(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32* %b, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.0 to i64
+  %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %idxprom1
+  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; We should still be able to vectorize with noalias even if the
+; address spaces are different.
+define void @bar2(i32* noalias %a, i32 addrspace(1)* noalias %b, i32 %n) #0 {
+; CHECK-LABEL: @bar2(
+; CHECK: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.0 to i64
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %idxprom1
+  store i32 %mul, i32* %arrayidx2, align 4
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; Store to identified global with different address space. This isn't
+; generally safe and shouldn't be vectorized.
+define void @arst0(i32* %b, i32 %n) #0 {
+; CHECK-LABEL: @arst0(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32* %b, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.0 to i64
+  %arrayidx2 = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
+  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+
+; Load from identified global with different address space.
+; This isn't generally safe and shouldn't be vectorized.
+define void @arst1(i32* %b, i32 %n) #0 {
+; CHECK-LABEL: @arst1(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.0 to i64
+  %arrayidx2 = getelementptr inbounds i32* %b, i64 %idxprom1
+  store i32 %mul, i32* %arrayidx2, align 4
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; Read and write to 2 identified globals in different address
+; spaces. This should be vectorized.
+define void @aoeu(i32 %n) #0 {
+; CHECK-LABEL: @aoeu(
+; CHECK: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds [1024 x i32] addrspace(2)* @q_as2, i64 0, i64 %idxprom
+  %0 = load i32 addrspace(2)* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.0 to i64
+  %arrayidx2 = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
+  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll b/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
new file mode 100644
index 0000000..212b37c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
@@ -0,0 +1,142 @@
+; RUN: opt -S -march=r600 -mcpu=cayman -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
+
+; Artificial datalayout
+target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
+
+
+define void @add_ints_1_1_1(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #0 {
+; CHECK-LABEL: @add_ints_1_1_1(
+; CHECK: <4 x i32>
+; CHECK: ret
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i64 %i.0, 200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.0
+  %1 = load i32 addrspace(1)* %arrayidx1, align 4
+  %add = add nsw i32 %0, %1
+  %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.0
+  store i32 %add, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add i64 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+define void @add_ints_as_1_0_0(i32 addrspace(1)* %a, i32* %b, i32* %c) #0 {
+; CHECK-LABEL: @add_ints_as_1_0_0(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i64 %i.0, 200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32* %b, i64 %i.0
+  %0 = load i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.0
+  %1 = load i32* %arrayidx1, align 4
+  %add = add nsw i32 %0, %1
+  %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.0
+  store i32 %add, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add i64 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+define void @add_ints_as_0_1_0(i32* %a, i32 addrspace(1)* %b, i32* %c) #0 {
+; CHECK-LABEL: @add_ints_as_0_1_0(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i64 %i.0, 200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.0
+  %1 = load i32* %arrayidx1, align 4
+  %add = add nsw i32 %0, %1
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0
+  store i32 %add, i32* %arrayidx2, align 4
+  %inc = add i64 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+define void @add_ints_as_0_1_1(i32* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #0 {
+; CHECK-LABEL: @add_ints_as_0_1_1(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i64 %i.0, 200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.0
+  %1 = load i32 addrspace(1)* %arrayidx1, align 4
+  %add = add nsw i32 %0, %1
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0
+  store i32 %add, i32* %arrayidx2, align 4
+  %inc = add i64 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+define void @add_ints_as_0_1_2(i32* %a, i32 addrspace(1)* %b, i32 addrspace(2)* %c) #0 {
+; CHECK-LABEL: @add_ints_as_0_1_2(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i64 %i.0, 200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32 addrspace(2)* %c, i64 %i.0
+  %1 = load i32 addrspace(2)* %arrayidx1, align 4
+  %add = add nsw i32 %0, %1
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0
+  store i32 %add, i32* %arrayidx2, align 4
+  %inc = add i64 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly.ll b/test/Transforms/LoopVectorize/runtime-check-readonly.ll
index 4145d13..a2b9ad9 100644
--- a/test/Transforms/LoopVectorize/runtime-check-readonly.ll
+++ b/test/Transforms/LoopVectorize/runtime-check-readonly.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-;CHECK: add_ints
+;CHECK-LABEL: @add_ints(
 ;CHECK: br
 ;CHECK: getelementptr
 ;CHECK-NEXT: getelementptr
diff --git a/test/Transforms/LoopVectorize/runtime-check.ll b/test/Transforms/LoopVectorize/runtime-check.ll
index 4772256..d15479d 100644
--- a/test/Transforms/LoopVectorize/runtime-check.ll
+++ b/test/Transforms/LoopVectorize/runtime-check.ll
@@ -34,3 +34,31 @@ for.body:                                         ; preds = %entry, %for.body
 for.end:                                          ; preds = %for.body, %entry
   ret i32 undef
 }
+
+; Make sure that we try to vectorize loops with a runtime check if the
+; dependency check fails.
+
+; CHECK-LABEL: test_runtime_check
+; CHECK:      <4 x float>
+define void @test_runtime_check(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %ind.sum = add i64 %iv, %offset
+  %arr.idx = getelementptr inbounds float* %a, i64 %ind.sum
+  %l1 = load float* %arr.idx, align 4
+  %ind.sum2 = add i64 %iv, %offset2
+  %arr.idx2 = getelementptr inbounds float* %a, i64 %ind.sum2
+  %l2 = load float* %arr.idx2, align 4
+  %m = fmul fast float %b, %l2
+  %ad = fadd fast float %l1, %m
+  store float %ad, float* %arr.idx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, %n
+  br i1 %exitcond, label %loopexit, label %for.body
+
+loopexit:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/runtime-limit.ll b/test/Transforms/LoopVectorize/runtime-limit.ll
index d783974..7370a6f 100644
--- a/test/Transforms/LoopVectorize/runtime-limit.ll
+++ b/test/Transforms/LoopVectorize/runtime-limit.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.8.0"
 
 ; We are vectorizing with 6 runtime checks.
-;CHECK: func1x6
+;CHECK-LABEL: func1x6(
 ;CHECK: <4 x i32>
 ;CHECK: ret
 define i32 @func1x6(i32* nocapture %out, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
@@ -38,7 +38,7 @@ for.end:                                          ; preds = %for.body
 }
 
 ; We are not vectorizing with 12 runtime checks.
-;CHECK: func2x6
+;CHECK-LABEL: func2x6(
 ;CHECK-NOT: <4 x i32>
 ;CHECK: ret
 define i32 @func2x6(i32* nocapture %out, i32* nocapture %out2, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
diff --git a/test/Transforms/LoopVectorize/safegep.ll b/test/Transforms/LoopVectorize/safegep.ll
index 46ec28b..c950860 100644
--- a/test/Transforms/LoopVectorize/safegep.ll
+++ b/test/Transforms/LoopVectorize/safegep.ll
@@ -8,7 +8,7 @@ target datalayout = "e-p:32:32:32-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:
 
 ; PR16592
 
-; CHECK: safe
+; CHECK-LABEL: @safe(
 ; CHECK: <4 x float>
 
 define void @safe(float* %A, float* %B, float %K) {
@@ -34,7 +34,7 @@ return:
 
 ; In a non-default address space we don't have this rule.
 
-; CHECK: notsafe
+; CHECK-LABEL: @notsafe(
 ; CHECK-NOT: <4 x float>
 
 define void @notsafe(float addrspace(5) * %A, float* %B, float %K) {
diff --git a/test/Transforms/LoopVectorize/scev-exitlim-crash.ll b/test/Transforms/LoopVectorize/scev-exitlim-crash.ll
index 7687738..683621a 100644
--- a/test/Transforms/LoopVectorize/scev-exitlim-crash.ll
+++ b/test/Transforms/LoopVectorize/scev-exitlim-crash.ll
@@ -10,7 +10,7 @@ target triple = "x86_64-apple-macosx"
 @e = common global i32* null, align 8
 @c = common global i32 0, align 4
 
-; CHECK-LABEL-LABEL: @fn1(
+; CHECK-LABEL: @fn1(
 ; CHECK: vector.body
 define void @fn1() #0 {
 entry:
@@ -29,14 +29,14 @@ for.cond4.preheader:                              ; preds = %for.cond
   br i1 %cmp514, label %for.cond7.preheader.lr.ph, label %for.end26
 
 for.cond7.preheader.lr.ph:                        ; preds = %for.cond4.preheader
-  %0 = load i32** @e, align 8, !tbaa !0
+  %0 = load i32** @e, align 8, !tbaa !4
   br label %for.cond7.preheader
 
 for.cond7.preheader:                              ; preds = %for.cond7.preheader.lr.ph, %for.inc23
   %y.017 = phi i32 [ 0, %for.cond7.preheader.lr.ph ], [ %inc24, %for.inc23 ]
   %i.116 = phi i32 [ 0, %for.cond7.preheader.lr.ph ], [ %i.2.lcssa, %for.inc23 ]
   %n.015 = phi i32 [ undef, %for.cond7.preheader.lr.ph ], [ %inc25, %for.inc23 ]
-  %1 = load i32* @b, align 4, !tbaa !3
+  %1 = load i32* @b, align 4, !tbaa !5
   %tobool11 = icmp eq i32 %1, 0
   br i1 %tobool11, label %for.inc23, label %for.body8.lr.ph
 
@@ -49,7 +49,7 @@ for.body8:                                        ; preds = %for.body8.lr.ph, %f
   %i.213 = phi i32 [ %i.116, %for.body8.lr.ph ], [ 0, %for.inc19 ]
   %2 = trunc i64 %indvars.iv19 to i32
   %add10 = add i32 %add9, %2
-  store i32 %add10, i32* @f, align 4, !tbaa !3
+  store i32 %add10, i32* @f, align 4, !tbaa !5
   %idx.ext = sext i32 %add10 to i64
   %add.ptr = getelementptr inbounds i32* @a, i64 %idx.ext
   %tobool129 = icmp eq i32 %i.213, 0
@@ -63,9 +63,9 @@ for.body13:                                       ; preds = %for.body13.lr.ph, %
   %indvars.iv = phi i64 [ %3, %for.body13.lr.ph ], [ %indvars.iv.next, %for.body13 ]
   %add.ptr.sum = add i64 %idx.ext, %indvars.iv
   %arrayidx = getelementptr inbounds i32* @a, i64 %add.ptr.sum
-  %4 = load i32* %arrayidx, align 4, !tbaa !3
+  %4 = load i32* %arrayidx, align 4, !tbaa !5
   %arrayidx15 = getelementptr inbounds i32* %0, i64 %indvars.iv
-  store i32 %4, i32* %arrayidx15, align 4, !tbaa !3
+  store i32 %4, i32* %arrayidx15, align 4, !tbaa !5
   %indvars.iv.next = add i64 %indvars.iv, 1
   %5 = trunc i64 %indvars.iv.next to i32
   %tobool12 = icmp eq i32 %5, 0
@@ -75,17 +75,17 @@ for.cond11.for.inc19_crit_edge:                   ; preds = %for.body13
   br label %for.inc19
 
 for.inc19:                                        ; preds = %for.cond11.for.inc19_crit_edge, %for.body8
-  %6 = load i32* @c, align 4, !tbaa !3
+  %6 = load i32* @c, align 4, !tbaa !5
   %inc20 = add nsw i32 %6, 1
-  store i32 %inc20, i32* @c, align 4, !tbaa !3
+  store i32 %inc20, i32* @c, align 4, !tbaa !5
   %indvars.iv.next20 = add i64 %indvars.iv19, 1
-  %7 = load i32* @b, align 4, !tbaa !3
+  %7 = load i32* @b, align 4, !tbaa !5
   %tobool = icmp eq i32 %7, 0
   br i1 %tobool, label %for.cond7.for.inc23_crit_edge, label %for.body8
 
 for.cond7.for.inc23_crit_edge:                    ; preds = %for.inc19
   %add.ptr.lcssa = phi i32* [ %add.ptr, %for.inc19 ]
-  store i32* %add.ptr.lcssa, i32** @d, align 8, !tbaa !0
+  store i32* %add.ptr.lcssa, i32** @d, align 8, !tbaa !4
   br label %for.inc23
 
 for.inc23:                                        ; preds = %for.cond7.for.inc23_crit_edge, %for.cond7.preheader
@@ -110,4 +110,5 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA"}
 !3 = metadata !{metadata !"double", metadata !1}
-!4 = metadata !{metadata !"any pointer", metadata !1}
+!4 = metadata !{metadata !0, metadata !0, i64 0}
+!5 = metadata !{metadata !3, metadata !3, i64 0}
diff --git a/test/Transforms/LoopVectorize/struct_access.ll b/test/Transforms/LoopVectorize/struct_access.ll
index 0cfaabe..75beae8 100644
--- a/test/Transforms/LoopVectorize/struct_access.ll
+++ b/test/Transforms/LoopVectorize/struct_access.ll
@@ -44,3 +44,45 @@ for.end:                                          ; preds = %for.body, %entry
   %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
   ret i32 %sum.0.lcssa
 }
+
+%struct.lit = type { i32 }
+
+; Verify that we still vectorize the access if the struct has the same size as
+; the loaded element.
+; struct lit {
+;  int x;
+; };
+;
+;
+; int bar(struct lit *A, int n) {
+;
+;   int sum = 0;
+;   for (int i = 0; i < n; ++i)
+;     sum += A[i].x;
+;
+;   return sum;
+; }
+
+;CHECK-LABEL: @bar(
+;CHECK: load <4 x i32>
+;CHECK: ret
+define i32 @bar(%struct.lit* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %x = getelementptr inbounds %struct.lit* %A, i64 %indvars.iv, i32 0
+  %0 = load i32* %x, align 4
+  %add = add nsw i32 %0, %sum.05
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/unroll_novec.ll b/test/Transforms/LoopVectorize/unroll_novec.ll
new file mode 100644
index 0000000..33f128d
--- /dev/null
+++ b/test/Transforms/LoopVectorize/unroll_novec.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-width=1 -force-vector-unroll=2 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+; This is the loop.
+;  for (i=0; i<n; i++){
+;    a[i] += i;
+;  }
+;CHECK-LABEL: @inc(
+;CHECK: load i32*
+;CHECK: load i32*
+;CHECK: add nsw i32
+;CHECK: add nsw i32
+;CHECK: store i32
+;CHECK: store i32
+;CHECK: ret void
+define void @inc(i32 %n) nounwind uwtable noinline ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = trunc i64 %indvars.iv to i32
+  %5 = add nsw i32 %3, %4
+  store i32 %5, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/value-ptr-bug.ll b/test/Transforms/LoopVectorize/value-ptr-bug.ll
index f376656..e8d3728 100644
--- a/test/Transforms/LoopVectorize/value-ptr-bug.ll
+++ b/test/Transforms/LoopVectorize/value-ptr-bug.ll
@@ -9,7 +9,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; SCEVExpander::expandCodeFor would change a value (the start value of an
 ; induction) that we cached in the induction variable list.
 
-; CHECK: test_vh
+; CHECK-LABEL: @test_vh(
 ; CHECK-NOT: store <4 x i8> undef
 
 define void @test_vh(i32* %ptr265, i32* %ptr266, i32 %sub267) {
diff --git a/test/Transforms/LoopVectorize/vectorize-once.ll b/test/Transforms/LoopVectorize/vectorize-once.ll
index 2b8f3fd..7800469 100644
--- a/test/Transforms/LoopVectorize/vectorize-once.ll
+++ b/test/Transforms/LoopVectorize/vectorize-once.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 
 ; This test checks that we add metadata to vectorized loops
-; CHECK: _Z4foo1Pii
+; CHECK-LABEL: @_Z4foo1Pii(
 ; CHECK: <4 x i32>
 ; CHECK: llvm.loop
 ; CHECK: ret
@@ -41,7 +41,7 @@ _ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
 }
 
 ; This test checks that we don't vectorize loops that are marked with the "width" == 1 metadata.
-; CHECK: _Z4foo2Pii
+; CHECK-LABEL: @_Z4foo2Pii(
 ; CHECK-NOT: <4 x i32>
 ; CHECK: llvm.loop
 ; CHECK: ret
@@ -68,9 +68,10 @@ _ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
 
 attributes #0 = { nounwind readonly ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
 
-; CHECK: !0 = metadata !{metadata !0, metadata !1}
+; CHECK: !0 = metadata !{metadata !0, metadata !1, metadata !2}
 ; CHECK: !1 = metadata !{metadata !"llvm.vectorizer.width", i32 1}
-; CHECK: !2 = metadata !{metadata !2, metadata !1}
+; CHECK: !2 = metadata !{metadata !"llvm.vectorizer.unroll", i32 1}
+; CHECK: !3 = metadata !{metadata !3, metadata !1, metadata !2}
 
 !0 = metadata !{metadata !0, metadata !1}
 !1 = metadata !{metadata !"llvm.vectorizer.width", i32 1}
diff --git a/test/Transforms/LowerAtomic/lit.local.cfg b/test/Transforms/LowerAtomic/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LowerAtomic/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LowerExpectIntrinsic/lit.local.cfg b/test/Transforms/LowerExpectIntrinsic/lit.local.cfg
deleted file mode 100644
index c6106e4..0000000
--- a/test/Transforms/LowerExpectIntrinsic/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/LowerInvoke/lit.local.cfg b/test/Transforms/LowerInvoke/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LowerInvoke/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LowerSwitch/feature.ll b/test/Transforms/LowerSwitch/feature.ll
index cc77d3c..e85f03e 100644
--- a/test/Transforms/LowerSwitch/feature.ll
+++ b/test/Transforms/LowerSwitch/feature.ll
@@ -7,88 +7,88 @@
 ;CHECK-NEXT:   br label %NodeBlock37
 
 ;CHECK:      NodeBlock37:                                      ; preds = %entry
-;CHECK-NEXT:   %Pivot38 = icmp ult i32 %tmp158, 11
+;CHECK-NEXT:   %Pivot38 = icmp slt i32 %tmp158, 10
 ;CHECK-NEXT:   br i1 %Pivot38, label %NodeBlock13, label %NodeBlock35
 
 ;CHECK:      NodeBlock35:                                      ; preds = %NodeBlock37
-;CHECK-NEXT:   %Pivot36 = icmp ult i32 %tmp158, 14
+;CHECK-NEXT:   %Pivot36 = icmp slt i32 %tmp158, 13
 ;CHECK-NEXT:   br i1 %Pivot36, label %NodeBlock23, label %NodeBlock33
 
 ;CHECK:      NodeBlock33:                                      ; preds = %NodeBlock35
-;CHECK-NEXT:   %Pivot34 = icmp ult i32 %tmp158, 15
+;CHECK-NEXT:   %Pivot34 = icmp slt i32 %tmp158, 14
 ;CHECK-NEXT:   br i1 %Pivot34, label %LeafBlock25, label %NodeBlock31
 
 ;CHECK:      NodeBlock31:                                      ; preds = %NodeBlock33
-;CHECK-NEXT:   %Pivot32 = icmp ult i32 %tmp158, -6
+;CHECK-NEXT:   %Pivot32 = icmp slt i32 %tmp158, 15
 ;CHECK-NEXT:   br i1 %Pivot32, label %LeafBlock27, label %LeafBlock29
 
 ;CHECK:      LeafBlock29:                                      ; preds = %NodeBlock31
-;CHECK-NEXT:   %tmp158.off = add i32 %tmp158, 6
-;CHECK-NEXT:   %SwitchLeaf30 = icmp ule i32 %tmp158.off, 4
-;CHECK-NEXT:   br i1 %SwitchLeaf30, label %bb338, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf30 = icmp eq i32 %tmp158, 15
+;CHECK-NEXT:   br i1 %SwitchLeaf30, label %bb334, label %NewDefault
 
 ;CHECK:      LeafBlock27:                                      ; preds = %NodeBlock31
-;CHECK-NEXT:   %SwitchLeaf28 = icmp eq i32 %tmp158, 15
-;CHECK-NEXT:   br i1 %SwitchLeaf28, label %bb334, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf28 = icmp eq i32 %tmp158, 14
+;CHECK-NEXT:   br i1 %SwitchLeaf28, label %bb332, label %NewDefault
 
 ;CHECK:      LeafBlock25:                                      ; preds = %NodeBlock33
-;CHECK-NEXT:   %SwitchLeaf26 = icmp eq i32 %tmp158, 14
-;CHECK-NEXT:   br i1 %SwitchLeaf26, label %bb332, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf26 = icmp eq i32 %tmp158, 13
+;CHECK-NEXT:   br i1 %SwitchLeaf26, label %bb330, label %NewDefault
 
 ;CHECK:      NodeBlock23:                                      ; preds = %NodeBlock35
-;CHECK-NEXT:   %Pivot24 = icmp ult i32 %tmp158, 12
+;CHECK-NEXT:   %Pivot24 = icmp slt i32 %tmp158, 11
 ;CHECK-NEXT:   br i1 %Pivot24, label %LeafBlock15, label %NodeBlock21
 
 ;CHECK:      NodeBlock21:                                      ; preds = %NodeBlock23
-;CHECK-NEXT:   %Pivot22 = icmp ult i32 %tmp158, 13
+;CHECK-NEXT:   %Pivot22 = icmp slt i32 %tmp158, 12
 ;CHECK-NEXT:   br i1 %Pivot22, label %LeafBlock17, label %LeafBlock19
 
 ;CHECK:      LeafBlock19:                                      ; preds = %NodeBlock21
-;CHECK-NEXT:   %SwitchLeaf20 = icmp eq i32 %tmp158, 13
-;CHECK-NEXT:   br i1 %SwitchLeaf20, label %bb330, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf20 = icmp eq i32 %tmp158, 12
+;CHECK-NEXT:   br i1 %SwitchLeaf20, label %bb328, label %NewDefault
 
 ;CHECK:      LeafBlock17:                                      ; preds = %NodeBlock21
-;CHECK-NEXT:   %SwitchLeaf18 = icmp eq i32 %tmp158, 12
-;CHECK-NEXT:   br i1 %SwitchLeaf18, label %bb328, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf18 = icmp eq i32 %tmp158, 11
+;CHECK-NEXT:   br i1 %SwitchLeaf18, label %bb326, label %NewDefault
 
 ;CHECK:      LeafBlock15:                                      ; preds = %NodeBlock23
-;CHECK-NEXT:   %SwitchLeaf16 = icmp eq i32 %tmp158, 11
-;CHECK-NEXT:   br i1 %SwitchLeaf16, label %bb326, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf16 = icmp eq i32 %tmp158, 10
+;CHECK-NEXT:   br i1 %SwitchLeaf16, label %bb324, label %NewDefault
 
 ;CHECK:      NodeBlock13:                                      ; preds = %NodeBlock37
-;CHECK-NEXT:   %Pivot14 = icmp ult i32 %tmp158, 8
+;CHECK-NEXT:   %Pivot14 = icmp slt i32 %tmp158, 7
 ;CHECK-NEXT:   br i1 %Pivot14, label %NodeBlock, label %NodeBlock11
 
 ;CHECK:      NodeBlock11:                                      ; preds = %NodeBlock13
-;CHECK-NEXT:   %Pivot12 = icmp ult i32 %tmp158, 9
+;CHECK-NEXT:   %Pivot12 = icmp slt i32 %tmp158, 8
 ;CHECK-NEXT:   br i1 %Pivot12, label %LeafBlock3, label %NodeBlock9
 
 ;CHECK:      NodeBlock9:                                       ; preds = %NodeBlock11
-;CHECK-NEXT:   %Pivot10 = icmp ult i32 %tmp158, 10
+;CHECK-NEXT:   %Pivot10 = icmp slt i32 %tmp158, 9
 ;CHECK-NEXT:   br i1 %Pivot10, label %LeafBlock5, label %LeafBlock7
 
 ;CHECK:      LeafBlock7:                                       ; preds = %NodeBlock9
-;CHECK-NEXT:   %SwitchLeaf8 = icmp eq i32 %tmp158, 10
-;CHECK-NEXT:   br i1 %SwitchLeaf8, label %bb324, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf8 = icmp eq i32 %tmp158, 9
+;CHECK-NEXT:   br i1 %SwitchLeaf8, label %bb322, label %NewDefault
 
 ;CHECK:      LeafBlock5:                                       ; preds = %NodeBlock9
-;CHECK-NEXT:   %SwitchLeaf6 = icmp eq i32 %tmp158, 9
-;CHECK-NEXT:   br i1 %SwitchLeaf6, label %bb322, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf6 = icmp eq i32 %tmp158, 8
+;CHECK-NEXT:   br i1 %SwitchLeaf6, label %bb338, label %NewDefault
 
 ;CHECK:      LeafBlock3:                                       ; preds = %NodeBlock11
-;CHECK-NEXT:   %SwitchLeaf4 = icmp eq i32 %tmp158, 8
-;CHECK-NEXT:   br i1 %SwitchLeaf4, label %bb338, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf4 = icmp eq i32 %tmp158, 7
+;CHECK-NEXT:   br i1 %SwitchLeaf4, label %bb, label %NewDefault
 
 ;CHECK:      NodeBlock:                                        ; preds = %NodeBlock13
-;CHECK-NEXT:   %Pivot = icmp ult i32 %tmp158, 7
+;CHECK-NEXT:   %Pivot = icmp slt i32 %tmp158, 0
 ;CHECK-NEXT:   br i1 %Pivot, label %LeafBlock, label %LeafBlock1
 
 ;CHECK:      LeafBlock1:                                       ; preds = %NodeBlock
-;CHECK-NEXT:   %SwitchLeaf2 = icmp eq i32 %tmp158, 7
-;CHECK-NEXT:   br i1 %SwitchLeaf2, label %bb, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf2 = icmp ule i32 %tmp158, 6
+;CHECK-NEXT:   br i1 %SwitchLeaf2, label %bb338, label %NewDefault
 
 ;CHECK:      LeafBlock:                                        ; preds = %NodeBlock
-;CHECK-NEXT:   %SwitchLeaf = icmp ule i32 %tmp158, 6
+;CHECK-NEXT:   %tmp158.off = add i32 %tmp158, 6
+;CHECK-NEXT:   %SwitchLeaf = icmp ule i32 %tmp158.off, 4
 ;CHECK-NEXT:   br i1 %SwitchLeaf, label %bb338, label %NewDefault
 
 define i32 @main(i32 %tmp158) {
diff --git a/test/Transforms/LowerSwitch/lit.local.cfg b/test/Transforms/LowerSwitch/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/LowerSwitch/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
index 5754fcd..33eaed6 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
@@ -33,12 +33,13 @@ return:                                           ; preds = %entry
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!14}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 2, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 786478, metadata !12, metadata !2, metadata !"testfunc", metadata !"testfunc", metadata !"testfunc", i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (i32, double)* @testfunc, null, null, null, i32 2} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786449, metadata !12, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !13, metadata !13, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !12, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 786453, metadata !12, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6, metadata !7, metadata !6}
 !6 = metadata !{i32 786468, metadata !12, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 786468, metadata !12, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
@@ -48,3 +49,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !11 = metadata !{i32 786443, metadata !12, metadata !1, i32 2, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !12 = metadata !{metadata !"testfunc.c", metadata !"/tmp"}
 !13 = metadata !{i32 0}
+!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
index 49dcb04..32acdd6 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
@@ -31,18 +31,19 @@ return:                                           ; preds = %entry
 }
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!22}
 !0 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 8, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 786478, metadata !20, metadata !2, metadata !"baz", metadata !"baz", metadata !"baz", i32 8, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i32)* @baz, null, null, null, i32 8} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786449, metadata !20, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !21, metadata !21, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 786453, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{null, metadata !6}
 !6 = metadata !{i32 786468, metadata !20, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 8, i32 0, metadata !1, null}
 !8 = metadata !{i32 9, i32 0, metadata !1, null}
 !9 = metadata !{i32 786689, metadata !10, metadata !"x", metadata !2, i32 4, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !10 = metadata !{i32 786478, metadata !20, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", i32 4, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 4} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{i32 786453, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{null, metadata !6, metadata !13, metadata !14}
 !13 = metadata !{i32 786468, metadata !20, metadata !2, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !14 = metadata !{i32 786447, metadata !20, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
@@ -53,3 +54,4 @@ return:                                           ; preds = %entry
 !19 = metadata !{i32 10, i32 0, metadata !1, null}
 !20 = metadata !{metadata !"bar.c", metadata !"/tmp/"}
 !21 = metadata !{i32 0}
+!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/Mem2Reg/ignore-lifetime.ll b/test/Transforms/Mem2Reg/ignore-lifetime.ll
new file mode 100644
index 0000000..5e4f9bf
--- /dev/null
+++ b/test/Transforms/Mem2Reg/ignore-lifetime.ll
@@ -0,0 +1,26 @@
+; RUN: opt -mem2reg -S -o - < %s | FileCheck %s
+
+declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr)
+declare void @llvm.lifetime.end(i64 %size, i8* nocapture %ptr)
+
+define void @test1() {
+; CHECK: test1
+; CHECK-NOT: alloca
+  %A = alloca i32
+  %B = bitcast i32* %A to i8*
+  call void @llvm.lifetime.start(i64 2, i8* %B)
+  store i32 1, i32* %A
+  call void @llvm.lifetime.end(i64 2, i8* %B)
+  ret void
+}
+
+define void @test2() {
+; CHECK: test2
+; CHECK-NOT: alloca
+  %A = alloca {i8, i16}
+  %B = getelementptr {i8, i16}* %A, i32 0, i32 0
+  call void @llvm.lifetime.start(i64 2, i8* %B)
+  store {i8, i16} zeroinitializer, {i8, i16}* %A
+  call void @llvm.lifetime.end(i64 2, i8* %B)
+  ret void
+}
diff --git a/test/Transforms/Mem2Reg/lit.local.cfg b/test/Transforms/Mem2Reg/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/Mem2Reg/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Mem2Reg/use-analysis.ll b/test/Transforms/Mem2Reg/use-analysis.ll
deleted file mode 100644
index b08b1f1..0000000
--- a/test/Transforms/Mem2Reg/use-analysis.ll
+++ /dev/null
@@ -1,70 +0,0 @@
-; RUN: opt -mem2reg -S -o - < %s | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
-
-declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr)
-declare void @llvm.lifetime.end(i64 %size, i8* nocapture %ptr)
-
-define void @test1() {
-; Ensure we can look through a bitcast to i8* and the addition of lifetime
-; markers.
-;
-; CHECK-LABEL: @test1(
-; CHECK-NOT: alloca
-; CHECK: ret void
-
-  %A = alloca i32
-  %B = bitcast i32* %A to i8*
-  call void @llvm.lifetime.start(i64 2, i8* %B)
-  store i32 1, i32* %A
-  call void @llvm.lifetime.end(i64 2, i8* %B)
-  ret void
-}
-
-define void @test2() {
-; Ensure we can look through a GEP to i8* and the addition of lifetime
-; markers.
-;
-; CHECK-LABEL: @test2(
-; CHECK-NOT: alloca
-; CHECK: ret void
-
-  %A = alloca {i8, i16}
-  %B = getelementptr {i8, i16}* %A, i32 0, i32 0
-  call void @llvm.lifetime.start(i64 2, i8* %B)
-  store {i8, i16} zeroinitializer, {i8, i16}* %A
-  call void @llvm.lifetime.end(i64 2, i8* %B)
-  ret void
-}
-
-define i32 @test3(i32 %x) {
-; CHECK-LABEL: @test3(
-;
-; Check that we recursively walk the uses of the alloca and thus can see
-; through round trip bitcasts, dead bitcasts, GEPs, multiple GEPs, and lifetime
-; markers.
-entry:
-  %a = alloca i32
-; CHECK-NOT: alloca
-
-  %b = bitcast i32* %a to i8*
-  %b2 = getelementptr inbounds i8* %b, i32 0
-  %b3 = getelementptr inbounds i8* %b2, i32 0
-  call void @llvm.lifetime.start(i64 -1, i8* %b3)
-; CHECK-NOT: call void @llvm.lifetime.start
-
-  store i32 %x, i32* %a
-; CHECK-NOT: store
-
-  %dead = bitcast i32* %a to i4096*
-  %dead1 = bitcast i4096* %dead to i42*
-  %dead2 = getelementptr inbounds i32* %a, i32 %x
-; CHECK-NOT: bitcast
-; CHECK-NOT: getelementptr
-
-  %ret = load i32* %a
-; CHECK-NOT: load
-
-  ret i32 %ret
-; CHECK: ret i32 %x
-}
diff --git a/test/Transforms/MemCpyOpt/lit.local.cfg b/test/Transforms/MemCpyOpt/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/MemCpyOpt/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/MergeFunc/address-spaces.ll b/test/Transforms/MergeFunc/address-spaces.ll
new file mode 100644
index 0000000..0d66b82
--- /dev/null
+++ b/test/Transforms/MergeFunc/address-spaces.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+target datalayout = "p:32:32:32-p1:32:32:32-p2:16:16:16"
+
+declare void @foo(i32) nounwind
+
+; None of these functions should be merged
+
+define i32 @store_as0(i32* %x) {
+; CHECK-LABEL: @store_as0(
+; CHECK: call void @foo(
+  %gep = getelementptr i32* %x, i32 4
+  %y = load i32* %gep
+  call void @foo(i32 %y) nounwind
+  ret i32 %y
+}
+
+define i32 @store_as1(i32 addrspace(1)* %x) {
+; CHECK-LABEL: @store_as1(
+; CHECK: call void @foo(
+  %gep = getelementptr i32 addrspace(1)* %x, i32 4
+  %y = load i32 addrspace(1)* %gep
+  call void @foo(i32 %y) nounwind
+  ret i32 %y
+}
+
+define i32 @store_as2(i32 addrspace(2)* %x) {
+; CHECK-LABEL: @store_as2(
+; CHECK: call void @foo(
+  %gep = getelementptr i32 addrspace(2)* %x, i32 4
+  %y = load i32 addrspace(2)* %gep
+  call void @foo(i32 %y) nounwind
+  ret i32 %y
+}
+
diff --git a/test/Transforms/MergeFunc/inttoptr-address-space.ll b/test/Transforms/MergeFunc/inttoptr-address-space.ll
new file mode 100644
index 0000000..0d834bc
--- /dev/null
+++ b/test/Transforms/MergeFunc/inttoptr-address-space.ll
@@ -0,0 +1,29 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-n8:16:32-S128"
+
+%.qux.2496 = type { i32, %.qux.2497 }
+%.qux.2497 = type { i8, i32 }
+%.qux.2585 = type { i32, i32, i8* }
+
+@g2 = external addrspace(1) constant [9 x i8], align 1
+@g3 = internal hidden unnamed_addr constant [1 x i8*] [i8* bitcast (i8* (%.qux.2585 addrspace(1)*)* @func35 to i8*)]
+
+
+define internal hidden i32 @func10(%.qux.2496 addrspace(1)* nocapture %this) align 2 {
+bb:
+  %tmp = getelementptr inbounds %.qux.2496 addrspace(1)* %this, i32 0, i32 1, i32 1
+  %tmp1 = load i32 addrspace(1)* %tmp, align 4
+  ret i32 %tmp1
+}
+
+; Check for pointer bitwidth equal assertion failure
+define internal hidden i8* @func35(%.qux.2585 addrspace(1)* nocapture %this) align 2 {
+bb:
+; CHECK-LABEL: @func35(
+; CHECK: %[[V2:.+]] = bitcast %.qux.2585 addrspace(1)* %{{.*}} to %.qux.2496 addrspace(1)*
+; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496 addrspace(1)* %[[V2]])
+; CHECK: %{{.*}} = inttoptr i32 %[[V3]] to i8*
+  %tmp = getelementptr inbounds %.qux.2585 addrspace(1)* %this, i32 0, i32 2
+  %tmp1 = load i8* addrspace(1)* %tmp, align 4
+  ret i8* %tmp1
+}
diff --git a/test/Transforms/MergeFunc/inttoptr.ll b/test/Transforms/MergeFunc/inttoptr.ll
index 93250fa..6a69e3f 100644
--- a/test/Transforms/MergeFunc/inttoptr.ll
+++ b/test/Transforms/MergeFunc/inttoptr.ll
@@ -46,6 +46,7 @@ bb:
 
 define internal hidden i8* @func35(%.qux.2585* nocapture %this) align 2 {
 bb:
+; CHECK-LABEL: @func35(
 ; CHECK: %[[V2:.+]] = bitcast %.qux.2585* %{{.*}} to %.qux.2496*
 ; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496* %[[V2]])
 ; CHECK: %{{.*}} = inttoptr i32 %[[V3]] to i8*
diff --git a/test/Transforms/MergeFunc/lit.local.cfg b/test/Transforms/MergeFunc/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/MergeFunc/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/MergeFunc/merge-ptr-and-int.ll b/test/Transforms/MergeFunc/merge-ptr-and-int.ll
new file mode 100644
index 0000000..4e887ce
--- /dev/null
+++ b/test/Transforms/MergeFunc/merge-ptr-and-int.ll
@@ -0,0 +1,27 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+
+declare void @stuff()
+
+; CHECK-LABEL: @f0(
+define void @f0(i64 %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
+; CHECK-LABEL: @f1(
+; CHECK: ptrtoint i64*
+; CHECK: tail call void @f0(i64
+
+define void @f1(i64* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
diff --git a/test/Transforms/MergeFunc/ptr-int-transitivity-1.ll b/test/Transforms/MergeFunc/ptr-int-transitivity-1.ll
new file mode 100644
index 0000000..d6ff10f
--- /dev/null
+++ b/test/Transforms/MergeFunc/ptr-int-transitivity-1.ll
@@ -0,0 +1,21 @@
+; RUN: opt -S -mergefunc < %s | not grep "functions merged"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare void @stuff()
+
+define void @f0(i64 %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
+define void @f2(i64 addrspace(1)* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
diff --git a/test/Transforms/MergeFunc/ptr-int-transitivity-2.ll b/test/Transforms/MergeFunc/ptr-int-transitivity-2.ll
new file mode 100644
index 0000000..c9fb6a6
--- /dev/null
+++ b/test/Transforms/MergeFunc/ptr-int-transitivity-2.ll
@@ -0,0 +1,25 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare void @stuff()
+
+define void @f0(i64 %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
+; CHECK-LABEL: @f0
+; CHECK:  %2 = ptrtoint i64* %0 to i64
+; CHECK:  tail call void @f0(i64 %2)
+; CHECK:  ret void
+define void @f1(i64 addrspace(0)* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
diff --git a/test/Transforms/MergeFunc/ptr-int-transitivity-3.ll b/test/Transforms/MergeFunc/ptr-int-transitivity-3.ll
new file mode 100644
index 0000000..8f00f03
--- /dev/null
+++ b/test/Transforms/MergeFunc/ptr-int-transitivity-3.ll
@@ -0,0 +1,21 @@
+; RUN: opt -S -mergefunc < %s | not grep "functions merged"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare void @stuff()
+
+define void @f0(i64 addrspace(0)* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
+define void @f2(i64 addrspace(1)* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
diff --git a/test/Transforms/MergeFunc/too-small.ll b/test/Transforms/MergeFunc/too-small.ll
new file mode 100644
index 0000000..1a526ff
--- /dev/null
+++ b/test/Transforms/MergeFunc/too-small.ll
@@ -0,0 +1,14 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+define void @foo(i32 %x) {
+; CHECK-LABEL: @foo(
+; CHECK-NOT: call
+  ret void
+}
+
+define void @bar(i32 %x) {
+; CHECK-LABEL: @bar(
+; CHECK-NOT: call
+  ret void
+}
+
diff --git a/test/Transforms/MetaRenamer/lit.local.cfg b/test/Transforms/MetaRenamer/lit.local.cfg
deleted file mode 100644
index c6106e4..0000000
--- a/test/Transforms/MetaRenamer/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index 12af354..885935c 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -182,7 +182,7 @@ if.end5:                                          ; preds = %if.then3, %if.end
 ; CHECK:   tail call i8* @objc_retain(i8* %x) [[NUW:#[0-9]+]]
 ; CHECK-NOT: @objc_
 ; CHECK: if.end5:
-; CHECK:   tail call void @objc_release(i8* %x) [[NUW]], !clang.imprecise_release !0
+; CHECK:   tail call void @objc_release(i8* %x) [[NUW]], !clang.imprecise_release ![[RELEASE:[0-9]+]]
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test1b_imprecise(i8* %x, i1 %p, i1 %q) {
@@ -1357,55 +1357,6 @@ C:
   ret void
 }
 
-; Optimize objc_retainBlock.
-
-; CHECK-LABEL: define void @test23(
-; CHECK-NOT: @objc_
-; CHECK: }
-%block0 = type { i64, i64, i8*, i8* }
-%block1 = type { i8**, i32, i32, i32 (%struct.__block_literal_1*)*, %block0* }
-%struct.__block_descriptor = type { i64, i64 }
-%struct.__block_literal_1 = type { i8**, i32, i32, i8**, %struct.__block_descriptor* }
-@__block_holder_tmp_1 = external constant %block1
-define void @test23() {
-entry:
-  %0 = call i8* @objc_retainBlock(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind, !clang.arc.copy_on_escape !0
-  call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
-  call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
-  call void @objc_release(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
-  ret void
-}
-
-; Don't optimize objc_retainBlock, but do strength reduce it.
-
-; CHECK: define void @test23b(i8* %p) {
-; CHECK: @objc_retain
-; CHECK: @objc_release
-; CHECK: }
-define void @test23b(i8* %p) {
-entry:
-  %0 = call i8* @objc_retainBlock(i8* %p) nounwind, !clang.arc.copy_on_escape !0
-  call void @callee()
-  call void @use_pointer(i8* %p)
-  call void @objc_release(i8* %p) nounwind
-  ret void
-}
-
-; Don't optimize objc_retainBlock, because there's no copy_on_escape metadata.
-
-; CHECK-LABEL: define void @test23c(
-; CHECK: @objc_retainBlock
-; CHECK: @objc_release
-; CHECK: }
-define void @test23c() {
-entry:
-  %0 = call i8* @objc_retainBlock(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
-  call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
-  call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
-  call void @objc_release(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
-  ret void
-}
-
 ; Any call can decrement a retain count.
 
 ; CHECK-LABEL: define void @test24(
@@ -2251,7 +2202,7 @@ define void @test53(void ()** %zz, i8** %pp) {
 
 ; CHECK-LABEL: define void @test54(
 ; CHECK: call i8* @returner()
-; CHECK-NEXT: call void @objc_release(i8* %t) [[NUW]], !clang.imprecise_release !0
+; CHECK-NEXT: call void @objc_release(i8* %t) [[NUW]], !clang.imprecise_release ![[RELEASE]]
 ; CHECK-NEXT: ret void
 ; CHECK: }
 define void @test54() {
@@ -2285,7 +2236,7 @@ entry:
 ; CHECK-NEXT: %0 = tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK-NEXT: tail call void @use_pointer(i8* %x)
 ; CHECK-NEXT: tail call void @use_pointer(i8* %x)
-; CHECK-NEXT: tail call void @objc_release(i8* %x) [[NUW]], !clang.imprecise_release !0
+; CHECK-NEXT: tail call void @objc_release(i8* %x) [[NUW]], !clang.imprecise_release ![[RELEASE]]
 ; CHECK-NEXT: br label %if.end
 ; CHECK-NOT: @objc
 ; CHECK: }
@@ -3058,7 +3009,11 @@ define void @test67(i8* %x) {
   ret void
 }
 
+!llvm.module.flags = !{!1}
+
 !0 = metadata !{}
+!1 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
 
 ; CHECK: attributes #0 = { nounwind readnone }
 ; CHECK: attributes [[NUW]] = { nounwind }
+; CHECK: ![[RELEASE]] = metadata !{}
diff --git a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
index 96a7d3e..0728617 100644
--- a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
+++ b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
@@ -111,14 +111,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 ; CHECK: attributes [[NUW]] = { nounwind }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!33, !34, !35, !36}
+!llvm.module.flags = !{!33, !34, !35, !36, !61}
 
 !0 = metadata !{i32 786449, metadata !60, i32 16, metadata !"clang version 3.3 ", i1 true, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1, null, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m] [DW_LANG_ObjC]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !27}
 !5 = metadata !{i32 786478, metadata !60, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @main, null, null, metadata !10, i32 10} ; [ DW_TAG_subprogram ] [line 9] [def] [scope 10] [main]
 !6 = metadata !{i32 786473, metadata !60} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{metadata !11}
@@ -127,11 +127,11 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !13 = metadata !{i32 786443, metadata !60, metadata !5, i32 10, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
 !14 = metadata !{i32 786454, metadata !60, null, metadata !"id", i32 11, i64 0, i64 0, i64 0, i32 0, metadata !15} ; [ DW_TAG_typedef ] [id] [line 11, size 0, align 0, offset 0] [from ]
 !15 = metadata !{i32 786447, metadata !60, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
-!16 = metadata !{i32 786451, metadata !60, null, metadata !"objc_object", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !17, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{i32 786451, metadata !60, null, metadata !"objc_object", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !17, i32 0, null, i32 0, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [def] [from ]
 !17 = metadata !{metadata !18}
 !18 = metadata !{i32 786445, metadata !60, metadata !16, metadata !"isa", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !19} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
 !19 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
-!20 = metadata !{i32 786451, metadata !60, null, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [fwd] [from ]
+!20 = metadata !{i32 786451, metadata !60, null, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [decl] [from ]
 !21 = metadata !{i32 786688, metadata !22, metadata !"ok", metadata !6, i32 13, metadata !23, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [ok] [line 13]
 !22 = metadata !{i32 786443, metadata !60, metadata !13, i32 12, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
 !23 = metadata !{i32 786454, metadata !60, null, metadata !"BOOL", i32 62, i64 0, i64 0, i64 0, i32 0, metadata !24} ; [ DW_TAG_typedef ] [BOOL] [line 62, size 0, align 0, offset 0] [from signed char]
@@ -139,7 +139,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !25 = metadata !{i32 786688, metadata !26, metadata !"obj2", metadata !6, i32 15, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [obj2] [line 15]
 !26 = metadata !{i32 786443, metadata !60, metadata !22, i32 14, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
 !27 = metadata !{i32 786478, metadata !60, metadata !6, metadata !"ThrowFunc", metadata !"ThrowFunc", metadata !"", i32 4, metadata !28, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i8*)* @ThrowFunc, null, null, metadata !30, i32 5} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [scope 5] [ThrowFunc]
-!28 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!28 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !29 = metadata !{null, metadata !14}
 !30 = metadata !{metadata !31}
 !31 = metadata !{metadata !32}
@@ -171,3 +171,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !57 = metadata !{i32 786443, metadata !60, metadata !27, i32 5, i32 0, i32 7} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
 !58 = metadata !{i32 7, i32 0, metadata !57, null}
 !60 = metadata !{metadata !"test.m", metadata !"/Volumes/Files/gottesmmcab/Radar/12906997"}
+!61 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/ObjCARC/lit.local.cfg b/test/Transforms/ObjCARC/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/ObjCARC/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll b/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll
deleted file mode 100644
index 2a56371..0000000
--- a/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll
+++ /dev/null
@@ -1,123 +0,0 @@
-; RUN: opt -S -objc-arc < %s | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-%struct.__block_byref_x = type { i8*, %struct.__block_byref_x*, i32, i32, i32 }
-%struct.__block_descriptor = type { i64, i64 }
-@_NSConcreteStackBlock = external global i8*
-@__block_descriptor_tmp = external hidden constant { i64, i64, i8*, i8*, i8*, i8* }
-
-; The optimizer should make use of the !clang.arc.no_objc_arc_exceptions
-; metadata and eliminate the retainBlock+release pair here.
-; rdar://10803830.
-
-; CHECK-LABEL: define void @test0(
-; CHECK-NOT: @objc
-; CHECK: }
-define void @test0() {
-entry:
-  %x = alloca %struct.__block_byref_x, align 8
-  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>, align 8
-  %byref.isa = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 0
-  store i8* null, i8** %byref.isa, align 8
-  %byref.forwarding = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 1
-  store %struct.__block_byref_x* %x, %struct.__block_byref_x** %byref.forwarding, align 8
-  %byref.flags = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 2
-  store i32 0, i32* %byref.flags, align 8
-  %byref.size = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 3
-  store i32 32, i32* %byref.size, align 4
-  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 0
-  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
-  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 1
-  store i32 1107296256, i32* %block.flags, align 8
-  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 2
-  store i32 0, i32* %block.reserved, align 4
-  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 3
-  store i8* bitcast (void (i8*)* @__foo_block_invoke_0 to i8*), i8** %block.invoke, align 8
-  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 4
-  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
-  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 5
-  %t1 = bitcast %struct.__block_byref_x* %x to i8*
-  store i8* %t1, i8** %block.captured, align 8
-  %t2 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block to i8*
-  %t3 = call i8* @objc_retainBlock(i8* %t2) nounwind, !clang.arc.copy_on_escape !4
-  %t4 = getelementptr inbounds i8* %t3, i64 16
-  %t5 = bitcast i8* %t4 to i8**
-  %t6 = load i8** %t5, align 8
-  %t7 = bitcast i8* %t6 to void (i8*)*
-  invoke void %t7(i8* %t3)
-          to label %invoke.cont unwind label %lpad, !clang.arc.no_objc_arc_exceptions !4
-
-invoke.cont:                                      ; preds = %entry
-  call void @objc_release(i8* %t3) nounwind, !clang.imprecise_release !4
-  call void @_Block_object_dispose(i8* %t1, i32 8)
-  ret void
-
-lpad:                                             ; preds = %entry
-  %t8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
-          cleanup
-  call void @_Block_object_dispose(i8* %t1, i32 8)
-  resume { i8*, i32 } %t8
-}
-
-; There is no !clang.arc.no_objc_arc_exceptions metadata here, so the optimizer
-; shouldn't eliminate anything, but *CAN* strength reduce the objc_retainBlock
-; to an objc_retain.
-
-; CHECK-LABEL: define void @test0_no_metadata(
-; CHECK: call i8* @objc_retain(
-; CHECK: invoke
-; CHECK: call void @objc_release(
-; CHECK: }
-define void @test0_no_metadata() {
-entry:
-  %x = alloca %struct.__block_byref_x, align 8
-  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>, align 8
-  %byref.isa = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 0
-  store i8* null, i8** %byref.isa, align 8
-  %byref.forwarding = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 1
-  store %struct.__block_byref_x* %x, %struct.__block_byref_x** %byref.forwarding, align 8
-  %byref.flags = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 2
-  store i32 0, i32* %byref.flags, align 8
-  %byref.size = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 3
-  store i32 32, i32* %byref.size, align 4
-  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 0
-  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
-  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 1
-  store i32 1107296256, i32* %block.flags, align 8
-  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 2
-  store i32 0, i32* %block.reserved, align 4
-  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 3
-  store i8* bitcast (void (i8*)* @__foo_block_invoke_0 to i8*), i8** %block.invoke, align 8
-  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 4
-  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
-  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 5
-  %t1 = bitcast %struct.__block_byref_x* %x to i8*
-  store i8* %t1, i8** %block.captured, align 8
-  %t2 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block to i8*
-  %t3 = call i8* @objc_retainBlock(i8* %t2) nounwind, !clang.arc.copy_on_escape !4
-  %t4 = getelementptr inbounds i8* %t3, i64 16
-  %t5 = bitcast i8* %t4 to i8**
-  %t6 = load i8** %t5, align 8
-  %t7 = bitcast i8* %t6 to void (i8*)*
-  invoke void %t7(i8* %t3)
-          to label %invoke.cont unwind label %lpad
-
-invoke.cont:                                      ; preds = %entry
-  call void @objc_release(i8* %t3) nounwind, !clang.imprecise_release !4
-  call void @_Block_object_dispose(i8* %t1, i32 8)
-  ret void
-
-lpad:                                             ; preds = %entry
-  %t8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
-          cleanup
-  call void @_Block_object_dispose(i8* %t1, i32 8)
-  resume { i8*, i32 } %t8
-}
-
-declare i8* @objc_retainBlock(i8*)
-declare void @objc_release(i8*)
-declare void @_Block_object_dispose(i8*, i32)
-declare i32 @__objc_personality_v0(...)
-declare void @__foo_block_invoke_0(i8* nocapture) uwtable ssp
-
-!4 = metadata !{}
diff --git a/test/Transforms/ObjCARC/path-overflow.ll b/test/Transforms/ObjCARC/path-overflow.ll
index 605e860..3c14353 100644
--- a/test/Transforms/ObjCARC/path-overflow.ll
+++ b/test/Transforms/ObjCARC/path-overflow.ll
@@ -1,6 +1,8 @@
 ; RUN: opt -objc-arc -S < %s
 ; rdar://12277446
 ; rdar://12480535
+; rdar://14590914
+; rdar://15377890
 
 ; The total number of paths grows exponentially with the number of branches, and a
 ; computation of this number can overflow any reasonable fixed-sized
@@ -10,14 +12,22 @@
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 target triple = "thumbv7-apple-ios5.0.0"
 
-%struct.NSConstantString.11.33.55.77.99.121.143.332.1130.1340.2768 = type { i32*, i32, i8*, i32 }
+%struct.NSConstantString = type { i32*, i32, i8*, i32 }
+%struct.CGPoint = type { float, float }
 
-@_unnamed_cfstring_591 = external constant %struct.NSConstantString.11.33.55.77.99.121.143.332.1130.1340.2768, section "__DATA,__cfstring"
+@_unnamed_cfstring = external constant %struct.NSConstantString, section "__DATA,__cfstring"
+@_unnamed_cfstring_2 = external constant %struct.NSConstantString, section "__DATA,__cfstring"
 
 declare i8* @objc_retain(i8*) nonlazybind
 declare i8* @objc_retainAutoreleasedReturnValue(i8*) nonlazybind
 declare void @objc_release(i8*) nonlazybind
 declare i8* @returner()
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
+declare void @NSLog(i8*, ...)
+declare void @objc_msgSend_stret(i8*, i8*, ...)
+declare i32 @__gxx_personality_sj0(...)
+declare i32 @__objc_personality_v0(...)
+
 
 define hidden void @test1() {
 entry:
@@ -30,7 +40,7 @@ msgSend.nullinit:                                 ; preds = %entry
   br label %msgSend.cont
 
 msgSend.cont:                                     ; preds = %msgSend.nullinit, %msgSend.call
-  %0 = bitcast %struct.NSConstantString.11.33.55.77.99.121.143.332.1130.1340.2768* @_unnamed_cfstring_591 to i8*
+  %0 = bitcast %struct.NSConstantString* @_unnamed_cfstring to i8*
   %1 = call i8* @objc_retain(i8* %0) nounwind
   br i1 undef, label %msgSend.nullinit33, label %msgSend.call32
 
@@ -853,5 +863,1331 @@ bb222:                                            ; preds = %bb20, %bb19
   ret void
 }
 
+; Function Attrs: ssp
+define void @test3() #1 {
+entry:
+  %call2 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  %call5 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont4 unwind label %lpad3
+
+invoke.cont4:                                     ; preds = %invoke.cont
+  br i1 undef, label %land.end, label %land.rhs
+
+land.rhs:                                         ; preds = %invoke.cont4
+  %call7 = invoke i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %land.end unwind label %lpad3
+
+land.end:                                         ; preds = %land.rhs, %invoke.cont4
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i unwind label %lpad.i
+
+invoke.cont.i:                                    ; preds = %land.end
+  br i1 undef, label %invoke.cont8, label %if.then.i
+
+if.then.i:                                        ; preds = %invoke.cont.i
+  br label %invoke.cont8
+
+lpad.i:                                           ; preds = %land.end
+  %tmp13 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont8:                                     ; preds = %if.then.i, %invoke.cont.i
+  %call18 = invoke i8* (i8*, i8*, i8*, ...)* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*, ...)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef)
+          to label %invoke.cont17 unwind label %lpad16
+
+invoke.cont17:                                    ; preds = %invoke.cont8
+  %call22 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont21 unwind label %lpad20
+
+invoke.cont21:                                    ; preds = %invoke.cont17
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i1980 unwind label %lpad.i1982
+
+invoke.cont.i1980:                                ; preds = %invoke.cont21
+  br i1 undef, label %invoke.cont24, label %if.then.i1981
+
+if.then.i1981:                                    ; preds = %invoke.cont.i1980
+  br label %invoke.cont24
+
+lpad.i1982:                                       ; preds = %invoke.cont21
+  %tmp28 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont24:                                    ; preds = %if.then.i1981, %invoke.cont.i1980
+  %call37 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont36 unwind label %lpad35
+
+invoke.cont36:                                    ; preds = %invoke.cont24
+  br i1 undef, label %land.end43, label %land.rhs39
+
+land.rhs39:                                       ; preds = %invoke.cont36
+  %call41 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %land.end43 unwind label %lpad35
+
+land.end43:                                       ; preds = %land.rhs39, %invoke.cont36
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i1986 unwind label %lpad.i1988
+
+invoke.cont.i1986:                                ; preds = %land.end43
+  br i1 undef, label %invoke.cont44, label %if.then.i1987
+
+if.then.i1987:                                    ; preds = %invoke.cont.i1986
+  br label %invoke.cont44
+
+lpad.i1988:                                       ; preds = %land.end43
+  %tmp42 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont44:                                    ; preds = %if.then.i1987, %invoke.cont.i1986
+  %call53 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont52 unwind label %lpad51
+
+invoke.cont52:                                    ; preds = %invoke.cont44
+  br i1 undef, label %land.end70, label %land.rhs58
+
+land.rhs58:                                       ; preds = %invoke.cont52
+  %call63 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 42)
+          to label %invoke.cont62 unwind label %lpad61
+
+invoke.cont62:                                    ; preds = %land.rhs58
+  %call68 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef)
+          to label %land.end70 unwind label %lpad66.body.thread
+
+land.end70:                                       ; preds = %invoke.cont62, %invoke.cont52
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i1992 unwind label %lpad66.body
+
+invoke.cont.i1992:                                ; preds = %land.end70
+  br i1 undef, label %invoke.cont71, label %if.then.i1993
+
+if.then.i1993:                                    ; preds = %invoke.cont.i1992
+  br label %invoke.cont71
+
+invoke.cont71:                                    ; preds = %if.then.i1993, %invoke.cont.i1992
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i1998 unwind label %lpad.i2000
+
+invoke.cont.i1998:                                ; preds = %invoke.cont71
+  br i1 undef, label %invoke.cont91, label %if.then.i1999
+
+if.then.i1999:                                    ; preds = %invoke.cont.i1998
+  br label %invoke.cont91
+
+lpad.i2000:                                       ; preds = %invoke.cont71
+  %tmp74 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup102
+
+invoke.cont91:                                    ; preds = %if.then.i1999, %invoke.cont.i1998
+  %call96 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont95 unwind label %lpad94
+
+invoke.cont95:                                    ; preds = %invoke.cont91
+  %call98 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* %call96)
+          to label %invoke.cont97 unwind label %lpad94
+
+invoke.cont97:                                    ; preds = %invoke.cont95
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2004 unwind label %lpad.i2006
+
+invoke.cont.i2004:                                ; preds = %invoke.cont97
+  br i1 undef, label %invoke.cont100, label %if.then.i2005
+
+if.then.i2005:                                    ; preds = %invoke.cont.i2004
+  br label %invoke.cont100
+
+lpad.i2006:                                       ; preds = %invoke.cont97
+  %tmp82 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont100:                                   ; preds = %if.then.i2005, %invoke.cont.i2004
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont110 unwind label %lpad109
+
+invoke.cont110:                                   ; preds = %invoke.cont100
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2010 unwind label %lpad.i2012
+
+invoke.cont.i2010:                                ; preds = %invoke.cont110
+  br i1 undef, label %invoke.cont117, label %if.then.i2011
+
+if.then.i2011:                                    ; preds = %invoke.cont.i2010
+  br label %invoke.cont117
+
+lpad.i2012:                                       ; preds = %invoke.cont110
+  %tmp98 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont117:                                   ; preds = %if.then.i2011, %invoke.cont.i2010
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2022 unwind label %lpad156.body
+
+lpad:                                             ; preds = %entry
+  %tmp118 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup
+
+lpad3:                                            ; preds = %land.rhs, %invoke.cont
+  %tmp119 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup
+
+ehcleanup:                                        ; preds = %lpad3, %lpad
+  unreachable
+
+lpad16:                                           ; preds = %invoke.cont8
+  %tmp121 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup26
+
+lpad20:                                           ; preds = %invoke.cont17
+  %tmp122 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup26
+
+ehcleanup26:                                      ; preds = %lpad20, %lpad16
+  unreachable
+
+lpad35:                                           ; preds = %land.rhs39, %invoke.cont24
+  %tmp124 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad51:                                           ; preds = %invoke.cont44
+  %tmp125 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad61:                                           ; preds = %land.rhs58
+  %tmp127 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad66.body.thread:                               ; preds = %invoke.cont62
+  %tmp128 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad66.body:                                      ; preds = %land.end70
+  %tmp129 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad94:                                           ; preds = %invoke.cont95, %invoke.cont91
+  %tmp133 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup102
+
+ehcleanup102:                                     ; preds = %lpad94, %lpad.i2000
+  unreachable
+
+lpad109:                                          ; preds = %invoke.cont100
+  %tmp134 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont.i2022:                                ; preds = %invoke.cont117
+  br i1 undef, label %invoke.cont157, label %if.then.i2023
+
+if.then.i2023:                                    ; preds = %invoke.cont.i2022
+  br label %invoke.cont157
+
+invoke.cont157:                                   ; preds = %if.then.i2023, %invoke.cont.i2022
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2028 unwind label %lpad164.body
+
+invoke.cont.i2028:                                ; preds = %invoke.cont157
+  br i1 undef, label %invoke.cont165, label %if.then.i2029
+
+if.then.i2029:                                    ; preds = %invoke.cont.i2028
+  br label %invoke.cont165
+
+invoke.cont165:                                   ; preds = %if.then.i2029, %invoke.cont.i2028
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, void (i8*, i8*)*)*)(i8* undef, i8* undef, void (i8*, i8*)* undef)
+          to label %invoke.cont184 unwind label %lpad183
+
+invoke.cont184:                                   ; preds = %invoke.cont165
+  %call186 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont185 unwind label %lpad183
+
+invoke.cont185:                                   ; preds = %invoke.cont184
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2034 unwind label %lpad.i2036
+
+invoke.cont.i2034:                                ; preds = %invoke.cont185
+  br i1 undef, label %invoke.cont190, label %if.then.i2035
+
+if.then.i2035:                                    ; preds = %invoke.cont.i2034
+  br label %invoke.cont190
+
+lpad.i2036:                                       ; preds = %invoke.cont185
+  %tmp168 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %lpad183.body
+
+invoke.cont190:                                   ; preds = %if.then.i2035, %invoke.cont.i2034
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont197 unwind label %lpad196
+
+invoke.cont197:                                   ; preds = %invoke.cont190
+  %call202 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont201 unwind label %lpad200
+
+invoke.cont201:                                   ; preds = %invoke.cont197
+  %call205 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont204 unwind label %lpad203
+
+invoke.cont204:                                   ; preds = %invoke.cont201
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2040 unwind label %lpad.i2042
+
+invoke.cont.i2040:                                ; preds = %invoke.cont204
+  br i1 undef, label %invoke.cont207, label %if.then.i2041
+
+if.then.i2041:                                    ; preds = %invoke.cont.i2040
+  br label %invoke.cont207
+
+lpad.i2042:                                       ; preds = %invoke.cont204
+  %tmp181 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont207:                                   ; preds = %if.then.i2041, %invoke.cont.i2040
+  %call209 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont208 unwind label %lpad203
+
+invoke.cont208:                                   ; preds = %invoke.cont207
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2046 unwind label %lpad212.body
+
+invoke.cont.i2046:                                ; preds = %invoke.cont208
+  br i1 undef, label %invoke.cont213, label %if.then.i2047
+
+if.then.i2047:                                    ; preds = %invoke.cont.i2046
+  br label %invoke.cont213
+
+invoke.cont213:                                   ; preds = %if.then.i2047, %invoke.cont.i2046
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont221 unwind label %lpad220
+
+invoke.cont221:                                   ; preds = %invoke.cont213
+  %call229 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont228 unwind label %lpad227
+
+invoke.cont228:                                   ; preds = %invoke.cont221
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2052 unwind label %lpad.i2054
+
+invoke.cont.i2052:                                ; preds = %invoke.cont228
+  br i1 undef, label %invoke.cont231, label %if.then.i2053
+
+if.then.i2053:                                    ; preds = %invoke.cont.i2052
+  br label %invoke.cont231
+
+lpad.i2054:                                       ; preds = %invoke.cont228
+  %tmp198 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont231:                                   ; preds = %if.then.i2053, %invoke.cont.i2052
+  %call233 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont232 unwind label %lpad227
+
+invoke.cont232:                                   ; preds = %invoke.cont231
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2058 unwind label %lpad236.body
+
+invoke.cont.i2058:                                ; preds = %invoke.cont232
+  br i1 undef, label %invoke.cont237, label %if.then.i2059
+
+if.then.i2059:                                    ; preds = %invoke.cont.i2058
+  br label %invoke.cont237
+
+invoke.cont237:                                   ; preds = %if.then.i2059, %invoke.cont.i2058
+  %call246 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont245 unwind label %lpad244
+
+invoke.cont245:                                   ; preds = %invoke.cont237
+  %call248 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 13)
+          to label %invoke.cont247 unwind label %lpad244
+
+invoke.cont247:                                   ; preds = %invoke.cont245
+  %call251 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 2)
+          to label %invoke.cont250 unwind label %lpad249
+
+invoke.cont250:                                   ; preds = %invoke.cont247
+  %call254 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 7)
+          to label %invoke.cont253 unwind label %lpad252
+
+invoke.cont253:                                   ; preds = %invoke.cont250
+  %call257 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8**, i32)*)(i8* undef, i8* undef, i8** undef, i32 3)
+          to label %invoke.cont256 unwind label %lpad255
+
+invoke.cont256:                                   ; preds = %invoke.cont253
+  %call260 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* undef)
+          to label %invoke.cont259 unwind label %lpad258
+
+invoke.cont259:                                   ; preds = %invoke.cont256
+  %call267 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont266 unwind label %lpad265
+
+invoke.cont266:                                   ; preds = %invoke.cont259
+  %call275 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef)
+          to label %invoke.cont274 unwind label %lpad273
+
+invoke.cont274:                                   ; preds = %invoke.cont266
+  %call279 = invoke i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont278 unwind label %lpad277
+
+invoke.cont278:                                   ; preds = %invoke.cont274
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2064 unwind label %lpad.i2066
+
+invoke.cont.i2064:                                ; preds = %invoke.cont278
+  br i1 undef, label %invoke.cont281, label %if.then.i2065
+
+if.then.i2065:                                    ; preds = %invoke.cont.i2064
+  br label %invoke.cont281
+
+lpad.i2066:                                       ; preds = %invoke.cont278
+  %tmp253 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont281:                                   ; preds = %if.then.i2065, %invoke.cont.i2064
+  %call291 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont290 unwind label %lpad289
+
+invoke.cont290:                                   ; preds = %invoke.cont281
+  %call303 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 8)
+          to label %invoke.cont302 unwind label %lpad301
+
+invoke.cont302:                                   ; preds = %invoke.cont290
+  %call310 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, double)*)(i8* undef, i8* undef, double 5.000000e-01)
+          to label %invoke.cont309 unwind label %lpad308
+
+invoke.cont309:                                   ; preds = %invoke.cont302
+  %call313 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 42)
+          to label %invoke.cont312 unwind label %lpad311
+
+invoke.cont312:                                   ; preds = %invoke.cont309
+  %call316 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8**, i8**, i32)*)(i8* undef, i8* undef, i8** undef, i8** undef, i32 2)
+          to label %invoke.cont315 unwind label %lpad314
+
+invoke.cont315:                                   ; preds = %invoke.cont312
+  %call322 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef)
+          to label %invoke.cont321 unwind label %lpad320
+
+invoke.cont321:                                   ; preds = %invoke.cont315
+  br i1 undef, label %land.end344, label %land.rhs335
+
+land.rhs335:                                      ; preds = %invoke.cont321
+  %call342 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %land.end344 unwind label %lpad340.body.thread
+
+land.end344:                                      ; preds = %land.rhs335, %invoke.cont321
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2070 unwind label %lpad340.body
+
+invoke.cont.i2070:                                ; preds = %land.end344
+  br i1 undef, label %invoke.cont345, label %if.then.i2071
+
+if.then.i2071:                                    ; preds = %invoke.cont.i2070
+  br label %invoke.cont345
+
+invoke.cont345:                                   ; preds = %if.then.i2071, %invoke.cont.i2070
+  %call362 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef)
+          to label %invoke.cont361 unwind label %lpad360
+
+invoke.cont361:                                   ; preds = %invoke.cont345
+  %call365 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont364 unwind label %lpad363
+
+invoke.cont364:                                   ; preds = %invoke.cont361
+  %call371 = invoke i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont370 unwind label %lpad369
+
+invoke.cont370:                                   ; preds = %invoke.cont364
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2076 unwind label %lpad.i2078
+
+invoke.cont.i2076:                                ; preds = %invoke.cont370
+  br i1 undef, label %invoke.cont373, label %if.then.i2077
+
+if.then.i2077:                                    ; preds = %invoke.cont.i2076
+  br label %invoke.cont373
+
+lpad.i2078:                                       ; preds = %invoke.cont370
+  %tmp340 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont373:                                   ; preds = %if.then.i2077, %invoke.cont.i2076
+  %call377 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32, i8*)*)(i8* undef, i8* undef, i32 42, i8* undef)
+          to label %invoke.cont376 unwind label %lpad363
+
+invoke.cont376:                                   ; preds = %invoke.cont373
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i32)*)(i8* undef, i8* undef, i8* undef, i32 5)
+          to label %invoke.cont382 unwind label %lpad381
+
+invoke.cont382:                                   ; preds = %invoke.cont376
+  %call384 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont383 unwind label %lpad381
+
+invoke.cont383:                                   ; preds = %invoke.cont382
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2082 unwind label %lpad.i2084
+
+invoke.cont.i2082:                                ; preds = %invoke.cont383
+  br i1 undef, label %invoke.cont392, label %if.then.i2083
+
+if.then.i2083:                                    ; preds = %invoke.cont.i2082
+  br label %invoke.cont392
+
+lpad.i2084:                                       ; preds = %invoke.cont383
+  %tmp360 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont392:                                   ; preds = %if.then.i2083, %invoke.cont.i2082
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i32)*)(i8* undef, i8* undef, i8* undef, i32 -2)
+          to label %invoke.cont395 unwind label %lpad381
+
+invoke.cont395:                                   ; preds = %invoke.cont392
+  %call397 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont396 unwind label %lpad381
+
+invoke.cont396:                                   ; preds = %invoke.cont395
+  %call400 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont399 unwind label %lpad398
+
+invoke.cont399:                                   ; preds = %invoke.cont396
+  %call403 = invoke i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont402 unwind label %lpad401
+
+invoke.cont402:                                   ; preds = %invoke.cont399
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2088 unwind label %lpad.i2090
+
+invoke.cont.i2088:                                ; preds = %invoke.cont402
+  br i1 undef, label %invoke.cont405, label %if.then.i2089
+
+if.then.i2089:                                    ; preds = %invoke.cont.i2088
+  br label %invoke.cont405
+
+lpad.i2090:                                       ; preds = %invoke.cont402
+  %tmp370 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont405:                                   ; preds = %if.then.i2089, %invoke.cont.i2088
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i32)*)(i8* undef, i8* undef, i8* undef, i32 -1)
+          to label %invoke.cont408 unwind label %lpad381
+
+invoke.cont408:                                   ; preds = %invoke.cont405
+  %call410 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont409 unwind label %lpad381
+
+invoke.cont409:                                   ; preds = %invoke.cont408
+  %call413 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont412 unwind label %lpad411
+
+invoke.cont412:                                   ; preds = %invoke.cont409
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2094 unwind label %lpad.i2096
+
+invoke.cont.i2094:                                ; preds = %invoke.cont412
+  br i1 undef, label %invoke.cont418, label %if.then.i2095
+
+if.then.i2095:                                    ; preds = %invoke.cont.i2094
+  br label %invoke.cont418
+
+lpad.i2096:                                       ; preds = %invoke.cont412
+  %tmp380 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont418:                                   ; preds = %if.then.i2095, %invoke.cont.i2094
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i32)*)(i8* undef, i8* undef, i8* undef, i32 0)
+          to label %invoke.cont422 unwind label %lpad381
+
+invoke.cont422:                                   ; preds = %invoke.cont418
+  %call424 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont423 unwind label %lpad381
+
+invoke.cont423:                                   ; preds = %invoke.cont422
+  %call427 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont426 unwind label %lpad425
+
+invoke.cont426:                                   ; preds = %invoke.cont423
+  %call430 = invoke i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont429 unwind label %lpad428
+
+invoke.cont429:                                   ; preds = %invoke.cont426
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2100 unwind label %lpad.i2102
+
+invoke.cont.i2100:                                ; preds = %invoke.cont429
+  br i1 undef, label %invoke.cont432, label %if.then.i2101
+
+if.then.i2101:                                    ; preds = %invoke.cont.i2100
+  br label %invoke.cont432
+
+lpad.i2102:                                       ; preds = %invoke.cont429
+  %tmp390 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont432:                                   ; preds = %if.then.i2101, %invoke.cont.i2100
+  %call436 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 0)
+          to label %invoke.cont435 unwind label %lpad381
+
+invoke.cont435:                                   ; preds = %invoke.cont432
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2106 unwind label %lpad.i2108
+
+invoke.cont.i2106:                                ; preds = %invoke.cont435
+  %call444 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 5)
+          to label %invoke.cont443 unwind label %lpad381
+
+lpad.i2108:                                       ; preds = %invoke.cont435
+  %tmp396 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont443:                                   ; preds = %invoke.cont.i2106
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2112 unwind label %lpad.i2114
+
+invoke.cont.i2112:                                ; preds = %invoke.cont443
+  br i1 undef, label %invoke.cont449, label %if.then.i2113
+
+if.then.i2113:                                    ; preds = %invoke.cont.i2112
+  br label %invoke.cont449
+
+lpad.i2114:                                       ; preds = %invoke.cont443
+  %tmp402 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont449:                                   ; preds = %if.then.i2113, %invoke.cont.i2112
+  %call453 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 -2)
+          to label %invoke.cont452 unwind label %lpad381
+
+invoke.cont452:                                   ; preds = %invoke.cont449
+  %call456 = invoke i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont455 unwind label %lpad454
+
+invoke.cont455:                                   ; preds = %invoke.cont452
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2118 unwind label %lpad.i2120
+
+invoke.cont.i2118:                                ; preds = %invoke.cont455
+  br i1 undef, label %invoke.cont458, label %if.then.i2119
+
+if.then.i2119:                                    ; preds = %invoke.cont.i2118
+  br label %invoke.cont458
+
+lpad.i2120:                                       ; preds = %invoke.cont455
+  %tmp408 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont458:                                   ; preds = %if.then.i2119, %invoke.cont.i2118
+  %call461 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 -1)
+          to label %invoke.cont460 unwind label %lpad381
+
+invoke.cont460:                                   ; preds = %invoke.cont458
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2124 unwind label %lpad.i2126
+
+invoke.cont.i2124:                                ; preds = %invoke.cont460
+  br i1 undef, label %invoke.cont466, label %if.then.i2125
+
+if.then.i2125:                                    ; preds = %invoke.cont.i2124
+  br label %invoke.cont466
+
+lpad.i2126:                                       ; preds = %invoke.cont460
+  %tmp414 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup477
+
+invoke.cont466:                                   ; preds = %if.then.i2125, %invoke.cont.i2124
+  %call470 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 0)
+          to label %invoke.cont469 unwind label %lpad381
+
+invoke.cont469:                                   ; preds = %invoke.cont466
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2130 unwind label %lpad.i2132
+
+invoke.cont.i2130:                                ; preds = %invoke.cont469
+  br i1 undef, label %invoke.cont475, label %if.then.i2131
+
+if.then.i2131:                                    ; preds = %invoke.cont.i2130
+  br label %invoke.cont475
+
+lpad.i2132:                                       ; preds = %invoke.cont469
+  %tmp420 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup477
+
+invoke.cont475:                                   ; preds = %if.then.i2131, %invoke.cont.i2130
+  %call491 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 1)
+          to label %invoke.cont490 unwind label %lpad489
+
+invoke.cont490:                                   ; preds = %invoke.cont475
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont499 unwind label %lpad498
+
+invoke.cont499:                                   ; preds = %invoke.cont490
+  %call504 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont503 unwind label %lpad489
+
+invoke.cont503:                                   ; preds = %invoke.cont499
+  %call507 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 3)
+          to label %invoke.cont506 unwind label %lpad505
+
+invoke.cont506:                                   ; preds = %invoke.cont503
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont509 unwind label %lpad508
+
+invoke.cont509:                                   ; preds = %invoke.cont506
+  %call513 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont512 unwind label %lpad489
+
+invoke.cont512:                                   ; preds = %invoke.cont509
+  br i1 undef, label %msgSend.null-receiver, label %msgSend.call
+
+msgSend.call:                                     ; preds = %invoke.cont512
+  invoke void bitcast (void (i8*, i8*, ...)* @objc_msgSend_stret to void (%struct.CGPoint*, i8*, i8*)*)(%struct.CGPoint* sret undef, i8* undef, i8* undef)
+          to label %msgSend.cont unwind label %lpad514
+
+msgSend.null-receiver:                            ; preds = %invoke.cont512
+  br label %msgSend.cont
+
+msgSend.cont:                                     ; preds = %msgSend.null-receiver, %msgSend.call
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2136 unwind label %lpad.i2138
+
+invoke.cont.i2136:                                ; preds = %msgSend.cont
+  br i1 undef, label %invoke.cont521, label %if.then.i2137
+
+if.then.i2137:                                    ; preds = %invoke.cont.i2136
+  br label %invoke.cont521
+
+lpad.i2138:                                       ; preds = %msgSend.cont
+  %tmp468 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont521:                                   ; preds = %if.then.i2137, %invoke.cont.i2136
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef)
+          to label %invoke.cont528 unwind label %lpad527
+
+invoke.cont528:                                   ; preds = %invoke.cont521
+  %call532 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont531 unwind label %lpad489
+
+invoke.cont531:                                   ; preds = %invoke.cont528
+  %call535 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont534 unwind label %lpad533
+
+invoke.cont534:                                   ; preds = %invoke.cont531
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2142 unwind label %lpad.i2144
+
+invoke.cont.i2142:                                ; preds = %invoke.cont534
+  br i1 undef, label %invoke.cont540, label %if.then.i2143
+
+if.then.i2143:                                    ; preds = %invoke.cont.i2142
+  br label %invoke.cont540
+
+lpad.i2144:                                       ; preds = %invoke.cont534
+  %tmp486 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont540:                                   ; preds = %if.then.i2143, %invoke.cont.i2142
+  %call544 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i32 3)
+          to label %invoke.cont543 unwind label %lpad489
+
+invoke.cont543:                                   ; preds = %invoke.cont540
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef)
+          to label %invoke.cont546 unwind label %lpad545
+
+invoke.cont546:                                   ; preds = %invoke.cont543
+  %call549 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont548 unwind label %lpad489
+
+invoke.cont548:                                   ; preds = %invoke.cont546
+  %call555 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont554 unwind label %lpad553
+
+invoke.cont554:                                   ; preds = %invoke.cont548
+  %tmp499 = call i8* @objc_retain(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*)) #3
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* %tmp499, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2148 unwind label %lpad.i2150
+
+invoke.cont.i2148:                                ; preds = %invoke.cont554
+  call void @objc_release(i8* %tmp499) #3, !clang.imprecise_release !0
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont566 unwind label %lpad565
+
+lpad.i2150:                                       ; preds = %invoke.cont554
+  %tmp500 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  call void @objc_release(i8* %tmp499) #3, !clang.imprecise_release !0
+  unreachable
+
+invoke.cont566:                                   ; preds = %invoke.cont.i2148
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont572 unwind label %lpad571
+
+invoke.cont572:                                   ; preds = %invoke.cont566
+  %call582 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont581 unwind label %lpad580
+
+invoke.cont581:                                   ; preds = %invoke.cont572
+  unreachable
+
+lpad156.body:                                     ; preds = %invoke.cont117
+  %tmp1157 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad164.body:                                     ; preds = %invoke.cont157
+  %tmp1158 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad183:                                          ; preds = %invoke.cont184, %invoke.cont165
+  %tmp1159 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %lpad183.body
+
+lpad183.body:                                     ; preds = %lpad183, %lpad.i2036
+  unreachable
+
+lpad196:                                          ; preds = %invoke.cont190
+  %tmp1160 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad200:                                          ; preds = %invoke.cont197
+  %tmp1161 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad203:                                          ; preds = %invoke.cont207, %invoke.cont201
+  %tmp1162 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad212.body:                                     ; preds = %invoke.cont208
+  %tmp1163 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad220:                                          ; preds = %invoke.cont213
+  %tmp1164 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %eh.resume
+
+lpad227:                                          ; preds = %invoke.cont231, %invoke.cont221
+  %tmp1166 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup239
+
+lpad236.body:                                     ; preds = %invoke.cont232
+  %tmp1167 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup239
+
+ehcleanup239:                                     ; preds = %lpad236.body, %lpad227
+  unreachable
+
+lpad244:                                          ; preds = %invoke.cont245, %invoke.cont237
+  %tmp1168 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad249:                                          ; preds = %invoke.cont247
+  %tmp1169 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad252:                                          ; preds = %invoke.cont250
+  %tmp1170 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup263
+
+lpad255:                                          ; preds = %invoke.cont253
+  %tmp1171 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup263
+
+lpad258:                                          ; preds = %invoke.cont256
+  %tmp1172 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+ehcleanup263:                                     ; preds = %lpad255, %lpad252
+  unreachable
+
+lpad265:                                          ; preds = %invoke.cont259
+  %tmp1173 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad273:                                          ; preds = %invoke.cont266
+  %tmp1175 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad277:                                          ; preds = %invoke.cont274
+  %tmp1176 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad289:                                          ; preds = %invoke.cont281
+  %tmp1177 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad301:                                          ; preds = %invoke.cont290
+  %tmp1180 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad308:                                          ; preds = %invoke.cont302
+  %tmp1182 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad311:                                          ; preds = %invoke.cont309
+  %tmp1183 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad314:                                          ; preds = %invoke.cont312
+  %tmp1184 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad320:                                          ; preds = %invoke.cont315
+  %tmp1186 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad340.body.thread:                              ; preds = %land.rhs335
+  %tmp1188 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad340.body:                                     ; preds = %land.end344
+  %tmp1189 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad360:                                          ; preds = %invoke.cont345
+  %tmp1191 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %eh.resume
+
+lpad363:                                          ; preds = %invoke.cont373, %invoke.cont361
+  %tmp1192 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad369:                                          ; preds = %invoke.cont364
+  %tmp1194 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad381:                                          ; preds = %invoke.cont466, %invoke.cont458, %invoke.cont449, %invoke.cont.i2106, %invoke.cont432, %invoke.cont422, %invoke.cont418, %invoke.cont408, %invoke.cont405, %invoke.cont395, %invoke.cont392, %invoke.cont382, %invoke.cont376
+  %tmp1196 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup477
+
+lpad398:                                          ; preds = %invoke.cont396
+  %tmp1199 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad401:                                          ; preds = %invoke.cont399
+  %tmp1200 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad411:                                          ; preds = %invoke.cont409
+  %tmp1201 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad425:                                          ; preds = %invoke.cont423
+  %tmp1203 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup477
+
+lpad428:                                          ; preds = %invoke.cont426
+  %tmp1204 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad454:                                          ; preds = %invoke.cont452
+  %tmp1207 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+ehcleanup477:                                     ; preds = %lpad425, %lpad381, %lpad.i2132, %lpad.i2126
+  unreachable
+
+lpad489:                                          ; preds = %invoke.cont546, %invoke.cont540, %invoke.cont528, %invoke.cont509, %invoke.cont499, %invoke.cont475
+  %tmp1211 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup560
+
+lpad498:                                          ; preds = %invoke.cont490
+  %tmp1214 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad505:                                          ; preds = %invoke.cont503
+  %tmp1215 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad508:                                          ; preds = %invoke.cont506
+  %tmp1216 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad514:                                          ; preds = %msgSend.call
+  %tmp1217 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad527:                                          ; preds = %invoke.cont521
+  %tmp1219 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup560
+
+lpad533:                                          ; preds = %invoke.cont531
+  %tmp1220 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad545:                                          ; preds = %invoke.cont543
+  %tmp1222 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad553:                                          ; preds = %invoke.cont548
+  %tmp1224 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+ehcleanup560:                                     ; preds = %lpad527, %lpad489
+  br label %eh.resume
+
+lpad565:                                          ; preds = %invoke.cont.i2148
+  %tmp1225 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad571:                                          ; preds = %invoke.cont566
+  %tmp1227 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad580:                                          ; preds = %invoke.cont572
+  %tmp1228 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %eh.resume
+
+eh.resume:                                        ; preds = %lpad580, %ehcleanup560, %lpad360, %lpad220
+  resume { i8*, i32 } undef
+}
+
+@"OBJC_EHTYPE_$_NSException" = external global i8
+
+define void @test4() {
+entry:
+  br i1 undef, label %if.end13, label %if.then10
+
+if.then10:                                        ; preds = %entry
+  br label %if.end13
+
+if.end13:                                         ; preds = %if.then10, %entry
+  %0 = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*, i64, i8*, i8)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i64 2, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring_2 to i8*), i8 signext 0), !clang.arc.no_objc_arc_exceptions !0
+  br i1 undef, label %if.then17, label %if.end18
+
+if.then17:                                        ; preds = %if.end13
+  br label %if.end18
+
+if.end18:                                         ; preds = %if.then17, %if.end13
+  br i1 undef, label %if.then64, label %if.end73
+
+if.then64:                                        ; preds = %if.end18
+  br i1 undef, label %cond.end71, label %cond.true68
+
+cond.true68:                                      ; preds = %if.then64
+  br label %cond.end71
+
+cond.end71:                                       ; preds = %cond.true68, %if.then64
+  br i1 undef, label %cleanup.action, label %cleanup.done
+
+cleanup.action:                                   ; preds = %cond.end71
+  br label %cleanup.done
+
+cleanup.done:                                     ; preds = %cleanup.action, %cond.end71
+  br label %if.end73
+
+if.end73:                                         ; preds = %cleanup.done, %if.end18
+  br i1 undef, label %forcoll.empty, label %forcoll.loopinit
+
+forcoll.loopinit:                                 ; preds = %if.end73
+  br label %forcoll.loopbody.outer
+
+forcoll.loopbody.outer:                           ; preds = %forcoll.refetch, %forcoll.loopinit
+  br label %forcoll.loopbody
+
+forcoll.loopbody:                                 ; preds = %forcoll.notmutated, %forcoll.loopbody.outer
+  br i1 undef, label %forcoll.notmutated, label %forcoll.mutated
+
+forcoll.mutated:                                  ; preds = %forcoll.loopbody
+  br label %forcoll.notmutated
+
+forcoll.notmutated:                               ; preds = %forcoll.mutated, %forcoll.loopbody
+  br i1 undef, label %forcoll.loopbody, label %forcoll.refetch
+
+forcoll.refetch:                                  ; preds = %forcoll.notmutated
+  br i1 undef, label %forcoll.empty, label %forcoll.loopbody.outer
+
+forcoll.empty:                                    ; preds = %forcoll.refetch, %if.end73
+  br i1 undef, label %if.end85, label %if.then82
+
+if.then82:                                        ; preds = %forcoll.empty
+  br label %if.end85
+
+if.end85:                                         ; preds = %if.then82, %forcoll.empty
+  br i1 undef, label %if.then87, label %if.end102
+
+if.then87:                                        ; preds = %if.end85
+  br i1 undef, label %if.end94, label %if.then91
+
+if.then91:                                        ; preds = %if.then87
+  br label %if.end94
+
+if.end94:                                         ; preds = %if.then91, %if.then87
+  br i1 undef, label %if.end101, label %if.then98
+
+if.then98:                                        ; preds = %if.end94
+  br label %if.end101
+
+if.end101:                                        ; preds = %if.then98, %if.end94
+  br label %if.end102
+
+if.end102:                                        ; preds = %if.end101, %if.end85
+  br i1 undef, label %do.body113, label %if.then107
+
+if.then107:                                       ; preds = %if.end102
+  br label %do.body113
+
+do.body113:                                       ; preds = %if.then107, %if.end102
+  br i1 undef, label %if.then116, label %if.end117
+
+if.then116:                                       ; preds = %do.body113
+  br label %if.end117
+
+if.end117:                                        ; preds = %if.then116, %do.body113
+  br i1 undef, label %if.then125, label %if.end126
+
+if.then125:                                       ; preds = %if.end117
+  br label %if.end126
+
+if.end126:                                        ; preds = %if.then125, %if.end117
+  br i1 undef, label %do.end166, label %cond.true132
+
+cond.true132:                                     ; preds = %if.end126
+  br i1 undef, label %do.body148, label %cond.true151
+
+do.body148:                                       ; preds = %cond.true132
+  br i1 undef, label %do.end166, label %cond.true151
+
+cond.true151:                                     ; preds = %do.body148, %cond.true132
+  br i1 undef, label %if.then162, label %do.end166
+
+if.then162:                                       ; preds = %cond.true151
+  br label %do.end166
+
+do.end166:                                        ; preds = %if.then162, %cond.true151, %do.body148, %if.end126
+  br i1 undef, label %if.then304, label %if.then170
+
+if.then170:                                       ; preds = %do.end166
+  br i1 undef, label %do.end193, label %cond.true179
+
+cond.true179:                                     ; preds = %if.then170
+  br i1 undef, label %if.then190, label %do.end193
+
+if.then190:                                       ; preds = %cond.true179
+  br label %do.end193
+
+do.end193:                                        ; preds = %if.then190, %cond.true179, %if.then170
+  br i1 undef, label %do.body200, label %do.body283
+
+do.body200:                                       ; preds = %do.end193
+  br i1 undef, label %do.end254, label %cond.true203
+
+cond.true203:                                     ; preds = %do.body200
+  br i1 undef, label %do.body218, label %cond.true221
+
+do.body218:                                       ; preds = %cond.true203
+  br i1 undef, label %do.end254, label %cond.true221
+
+cond.true221:                                     ; preds = %do.body218, %cond.true203
+  br i1 undef, label %if.then232, label %do.body236
+
+if.then232:                                       ; preds = %cond.true221
+  br label %do.body236
+
+do.body236:                                       ; preds = %if.then232, %cond.true221
+  br i1 undef, label %do.end254, label %cond.true239
+
+cond.true239:                                     ; preds = %do.body236
+  br i1 undef, label %if.then250, label %do.end254
+
+if.then250:                                       ; preds = %cond.true239
+  br label %do.end254
+
+do.end254:                                        ; preds = %if.then250, %cond.true239, %do.body236, %do.body218, %do.body200
+  br i1 undef, label %do.end277, label %cond.true263
+
+cond.true263:                                     ; preds = %do.end254
+  br i1 undef, label %if.then274, label %do.end277
+
+if.then274:                                       ; preds = %cond.true263
+  unreachable
+
+do.end277:                                        ; preds = %cond.true263, %do.end254
+  br i1 undef, label %if.then280, label %do.body283
+
+if.then280:                                       ; preds = %do.end277
+  br label %do.body283
+
+do.body283:                                       ; preds = %if.then280, %do.end277, %do.end193
+  br i1 undef, label %if.end301, label %cond.true286
+
+cond.true286:                                     ; preds = %do.body283
+  br i1 undef, label %if.then297, label %if.end301
+
+if.then297:                                       ; preds = %cond.true286
+  br label %if.end301
+
+if.end301:                                        ; preds = %if.then297, %cond.true286, %do.body283
+  br i1 undef, label %if.then304, label %do.body351
+
+if.then304:                                       ; preds = %if.end301, %do.end166
+  br i1 undef, label %do.body309.lr.ph, label %do.body351
+
+do.body309.lr.ph:                                 ; preds = %if.then304
+  br label %do.body309
+
+do.body309:                                       ; preds = %for.cond.backedge, %do.body309.lr.ph
+  br i1 undef, label %do.end328, label %cond.true312
+
+cond.true312:                                     ; preds = %do.body309
+  br i1 undef, label %if.then323, label %do.end328
+
+if.then323:                                       ; preds = %cond.true312
+  br label %do.end328
+
+do.end328:                                        ; preds = %if.then323, %cond.true312, %do.body309
+  br i1 undef, label %for.cond.backedge, label %cond.true335
+
+for.cond.backedge:                                ; preds = %if.then346, %cond.true335, %do.end328
+  br i1 undef, label %do.body309, label %do.body351
+
+cond.true335:                                     ; preds = %do.end328
+  br i1 undef, label %if.then346, label %for.cond.backedge
+
+if.then346:                                       ; preds = %cond.true335
+  br label %for.cond.backedge
+
+do.body351:                                       ; preds = %for.cond.backedge, %if.then304, %if.end301
+  br i1 undef, label %if.then354, label %if.end355
+
+if.then354:                                       ; preds = %do.body351
+  br label %if.end355
+
+if.end355:                                        ; preds = %if.then354, %do.body351
+  br i1 undef, label %if.else, label %if.then364
+
+if.then364:                                       ; preds = %if.end355
+  br label %do.body366
+
+if.else:                                          ; preds = %if.end355
+  br label %do.body366
+
+do.body366:                                       ; preds = %if.else, %if.then364
+  br i1 undef, label %if.then369, label %if.end377.critedge
+
+if.then369:                                       ; preds = %do.body366
+  br label %if.end377
+
+if.end377.critedge:                               ; preds = %do.body366
+  br label %if.end377
+
+if.end377:                                        ; preds = %if.end377.critedge, %if.then369
+  br i1 undef, label %if.then383, label %if.end392.critedge
+
+if.then383:                                       ; preds = %if.end377
+  br label %if.end392
+
+if.end392.critedge:                               ; preds = %if.end377
+  br label %if.end392
+
+if.end392:                                        ; preds = %if.end392.critedge, %if.then383
+  br i1 undef, label %if.then398, label %if.end399
+
+if.then398:                                       ; preds = %if.end392
+  br label %if.end399
+
+if.end399:                                        ; preds = %if.then398, %if.end392
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %eh.cont unwind label %lpad, !clang.arc.no_objc_arc_exceptions !0
+
+eh.cont:                                          ; preds = %if.end399
+  br i1 undef, label %if.then430, label %if.end439.critedge
+
+if.then430:                                       ; preds = %eh.cont
+  %1 = call i8* @objc_retain(i8* %0)
+  br label %if.end439
+
+lpad:                                             ; preds = %if.end399
+  %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch i8* @"OBJC_EHTYPE_$_NSException"
+  unreachable
+
+if.end439.critedge:                               ; preds = %eh.cont
+  %3 = call i8* @objc_retain(i8* %0)
+  br label %if.end439
+
+if.end439:                                        ; preds = %if.end439.critedge, %if.then430
+  call void @objc_release(i8* %0), !clang.imprecise_release !0
+  unreachable
+
+return:                                           ; No predecessors!
+  ret void
+}
+
 
 !0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/retain-block-alloca.ll b/test/Transforms/ObjCARC/retain-block-alloca.ll
deleted file mode 100644
index 6b1578a..0000000
--- a/test/Transforms/ObjCARC/retain-block-alloca.ll
+++ /dev/null
@@ -1,94 +0,0 @@
-; RUN: opt -S -objc-arc < %s | FileCheck %s
-; rdar://10209613
-
-%0 = type opaque
-%struct.__block_descriptor = type { i64, i64 }
-
-@_NSConcreteStackBlock = external global i8*
-@__block_descriptor_tmp = external hidden constant { i64, i64, i8*, i8*, i8*, i8* }
-@"\01L_OBJC_SELECTOR_REFERENCES_" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
-
-; CHECK-LABEL: define void @test(
-; CHECK: %3 = call i8* @objc_retainBlock(i8* %2) [[NUW:#[0-9]+]]
-; CHECK: @objc_msgSend
-; CHECK-NEXT: @objc_release(i8* %3)
-define void @test(%0* %array) uwtable {
-entry:
-  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, align 8
-  %0 = bitcast %0* %array to i8*
-  %1 = tail call i8* @objc_retain(i8* %0) nounwind
-  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 0
-  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
-  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 1
-  store i32 1107296256, i32* %block.flags, align 8
-  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 2
-  store i32 0, i32* %block.reserved, align 4
-  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 3
-  store i8* bitcast (void (i8*)* @__test_block_invoke_0 to i8*), i8** %block.invoke, align 8
-  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 4
-  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
-  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 5
-  store %0* %array, %0** %block.captured, align 8
-  %2 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block to i8*
-  %3 = call i8* @objc_retainBlock(i8* %2) nounwind
-  %tmp2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
-  call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*)*)(i8* %0, i8* %tmp2, i8* %3)
-  call void @objc_release(i8* %3) nounwind
-  %strongdestroy = load %0** %block.captured, align 8
-  %4 = bitcast %0* %strongdestroy to i8*
-  call void @objc_release(i8* %4) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; Same as test, but the objc_retainBlock has a clang.arc.copy_on_escape
-; tag so it's safe to delete.
-
-; CHECK-LABEL: define void @test_with_COE(
-; CHECK-NOT: @objc_retainBlock
-; CHECK: @objc_msgSend
-; CHECK: @objc_release
-; CHECK-NOT: @objc_release
-; CHECK: }
-define void @test_with_COE(%0* %array) uwtable {
-entry:
-  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, align 8
-  %0 = bitcast %0* %array to i8*
-  %1 = tail call i8* @objc_retain(i8* %0) nounwind
-  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 0
-  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
-  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 1
-  store i32 1107296256, i32* %block.flags, align 8
-  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 2
-  store i32 0, i32* %block.reserved, align 4
-  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 3
-  store i8* bitcast (void (i8*)* @__test_block_invoke_0 to i8*), i8** %block.invoke, align 8
-  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 4
-  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
-  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 5
-  store %0* %array, %0** %block.captured, align 8
-  %2 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block to i8*
-  %3 = call i8* @objc_retainBlock(i8* %2) nounwind, !clang.arc.copy_on_escape !0
-  %tmp2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
-  call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*)*)(i8* %0, i8* %tmp2, i8* %3)
-  call void @objc_release(i8* %3) nounwind
-  %strongdestroy = load %0** %block.captured, align 8
-  %4 = bitcast %0* %strongdestroy to i8*
-  call void @objc_release(i8* %4) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-declare i8* @objc_retain(i8*)
-
-declare void @__test_block_invoke_0(i8* nocapture) uwtable
-
-declare i8* @objc_retainBlock(i8*)
-
-declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
-
-declare void @objc_release(i8*)
-
-; CHECK: attributes #0 = { uwtable }
-; CHECK: attributes #1 = { nonlazybind }
-; CHECK: attributes [[NUW]] = { nounwind }
-
-!0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/retain-block-escape-analysis.ll b/test/Transforms/ObjCARC/retain-block-escape-analysis.ll
deleted file mode 100644
index 7914bb8..0000000
--- a/test/Transforms/ObjCARC/retain-block-escape-analysis.ll
+++ /dev/null
@@ -1,215 +0,0 @@
-; RUN: opt -S -objc-arc < %s | FileCheck %s
-
-declare i8* @objc_retain(i8*) nonlazybind
-declare void @objc_release(i8*) nonlazybind
-declare i8* @objc_retainBlock(i8*)
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Use by an instruction which copies the value is an escape if the             ;
-; result is an escape. The current instructions with this property are:        ;
-;                                                                              ;
-; 1. BitCast.                                                                  ;
-; 2. GEP.                                                                      ;
-; 3. PhiNode.                                                                  ;
-; 4. SelectInst.                                                               ;
-;                                                                              ;
-; Make sure that such instructions do not confuse the optimizer into removing  ;
-; an objc_retainBlock that is needed.                                          ;
-;                                                                              ;
-; rdar://13273675. (With extra test cases to handle bitcast, phi, and select.  ;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-define void @bitcasttest(i8* %storage, void (...)* %block)  {
-; CHECK-LABEL: define void @bitcasttest(
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %storage to void (...)**
-  %t5 = bitcast i8* %t3 to void (...)*
-  store void (...)* %t5, void (...)** %t4, align 8
-; CHECK: call void @objc_release
-  call void @objc_release(i8* %t1)
-  ret void
-; CHECK: }
-}
-
-define void @bitcasttest_a(i8* %storage, void (...)* %block)  {
-; CHECK-LABEL: define void @bitcasttest_a(
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK-NOT: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %storage to void (...)**
-  %t5 = bitcast i8* %t3 to void (...)*
-  store void (...)* %t5, void (...)** %t4, align 8
-; CHECK-NOT: call void @objc_release
-  call void @objc_release(i8* %t1), !clang.imprecise_release !0
-  ret void
-; CHECK: }
-}
-
-define void @geptest(void (...)** %storage_array, void (...)* %block)  {
-; CHECK-LABEL: define void @geptest(
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %t3 to void (...)*
-  
-  %storage = getelementptr inbounds void (...)** %storage_array, i64 0
-  
-  store void (...)* %t4, void (...)** %storage, align 8
-; CHECK: call void @objc_release
-  call void @objc_release(i8* %t1)
-  ret void
-; CHECK: }
-}
-
-define void @geptest_a(void (...)** %storage_array, void (...)* %block)  {
-; CHECK-LABEL: define void @geptest_a(
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK-NOT: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %t3 to void (...)*
-  
-  %storage = getelementptr inbounds void (...)** %storage_array, i64 0
-  
-  store void (...)* %t4, void (...)** %storage, align 8
-; CHECK-NOT: call void @objc_release
-  call void @objc_release(i8* %t1), !clang.imprecise_release !0
-  ret void
-; CHECK: }
-}
-
-define void @selecttest(void (...)** %store1, void (...)** %store2,
-                        void (...)* %block) {
-; CHECK-LABEL: define void @selecttest(
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %t3 to void (...)*
-  %store = select i1 undef, void (...)** %store1, void (...)** %store2
-  store void (...)* %t4, void (...)** %store, align 8
-; CHECK: call void @objc_release
-  call void @objc_release(i8* %t1)
-  ret void
-; CHECK: }
-}
-
-define void @selecttest_a(void (...)** %store1, void (...)** %store2,
-                          void (...)* %block) {
-; CHECK-LABEL: define void @selecttest_a(
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK-NOT: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %t3 to void (...)*
-  %store = select i1 undef, void (...)** %store1, void (...)** %store2
-  store void (...)* %t4, void (...)** %store, align 8
-; CHECK-NOT: call void @objc_release
-  call void @objc_release(i8* %t1), !clang.imprecise_release !0
-  ret void
-; CHECK: }
-}
-
-define void @phinodetest(void (...)** %storage1,
-                         void (...)** %storage2,
-                         void (...)* %block) {
-; CHECK-LABEL: define void @phinodetest(
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %t3 to void (...)*
-  br i1 undef, label %store1_set, label %store2_set
-; CHECK: store1_set:
-
-store1_set:
-  br label %end
-
-store2_set:
-  br label %end
-
-end:
-; CHECK: end:
-  %storage = phi void (...)** [ %storage1, %store1_set ], [ %storage2, %store2_set]
-  store void (...)* %t4, void (...)** %storage, align 8
-; CHECK: call void @objc_release
-  call void @objc_release(i8* %t1)
-  ret void
-; CHECK: }
-}
-
-define void @phinodetest_a(void (...)** %storage1,
-                           void (...)** %storage2,
-                           void (...)* %block) {
-; CHECK-LABEL: define void @phinodetest_a(
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK-NOT: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %t3 to void (...)*
-  br i1 undef, label %store1_set, label %store2_set
-
-store1_set:
-  br label %end
-
-store2_set:
-  br label %end
-
-end:
-  %storage = phi void (...)** [ %storage1, %store1_set ], [ %storage2, %store2_set]
-  store void (...)* %t4, void (...)** %storage, align 8
-; CHECK-NOT: call void @objc_release
-  call void @objc_release(i8* %t1), !clang.imprecise_release !0
-  ret void
-}
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; This test makes sure that we do not hang clang when visiting a use ;
-; cycle caused by phi nodes during objc-arc analysis. *NOTE* This    ;
-; test case looks a little convoluted since it was produced by	     ;
-; bugpoint.							     ;
-; 								     ;
-; bugzilla://14551						     ;
-; rdar://12851911						     ;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-define void @phinode_use_cycle(i8* %block) uwtable optsize ssp {
-; CHECK: define void @phinode_use_cycle(i8* %block)
-entry:
-  br label %for.body
-
-for.body:                                         ; preds = %if.then, %for.body, %entry
-  %block.05 = phi void (...)* [ null, %entry ], [ %1, %if.then ], [ %block.05, %for.body ]
-  br i1 undef, label %for.body, label %if.then
-
-if.then:                                          ; preds = %for.body
-  %0 = call i8* @objc_retainBlock(i8* %block), !clang.arc.copy_on_escape !0
-  %1 = bitcast i8* %0 to void (...)*
-  %2 = bitcast void (...)* %block.05 to i8*
-  call void @objc_release(i8* %2) nounwind, !clang.imprecise_release !0
-  br label %for.body
-}
-
-!0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/retain-block-load.ll b/test/Transforms/ObjCARC/retain-block-load.ll
deleted file mode 100644
index a5170e3..0000000
--- a/test/Transforms/ObjCARC/retain-block-load.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: opt -objc-arc -S < %s | FileCheck %s
-
-; rdar://10803830
-; The optimizer should be able to prove that the block does not
-; "escape", so the retainBlock+release pair can be eliminated.
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-
-%struct.__block_descriptor = type { i64, i64 }
-
-@_NSConcreteStackBlock = external global i8*
-@__block_descriptor_tmp = external global { i64, i64, i8*, i8* }
-
-; CHECK: define void @test() {
-; CHECK-NOT: @objc
-; CHECK: declare i8* @objc_retainBlock(i8*)
-; CHECK: declare void @objc_release(i8*)
-
-define void @test() {
-entry:
-  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, align 8
-  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 0
-  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
-  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 1
-  store i32 1073741824, i32* %block.flags, align 8
-  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 2
-  store i32 0, i32* %block.reserved, align 4
-  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 3
-  store i8* bitcast (i32 (i8*)* @__test_block_invoke_0 to i8*), i8** %block.invoke, align 8
-  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 4
-  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
-  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 5
-  store i32 4, i32* %block.captured, align 8
-  %tmp = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block to i8*
-  %tmp1 = call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-  %tmp2 = getelementptr inbounds i8* %tmp1, i64 16
-  %tmp3 = bitcast i8* %tmp2 to i8**
-  %tmp4 = load i8** %tmp3, align 8
-  %tmp5 = bitcast i8* %tmp4 to i32 (i8*)*
-  %call = call i32 %tmp5(i8* %tmp1)
-  call void @objc_release(i8* %tmp1) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-declare i32 @__test_block_invoke_0(i8* nocapture %.block_descriptor) nounwind readonly
-
-declare i8* @objc_retainBlock(i8*)
-
-declare void @objc_release(i8*)
-
-!0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/retain-block.ll b/test/Transforms/ObjCARC/retain-block.ll
deleted file mode 100644
index 1bb3f02..0000000
--- a/test/Transforms/ObjCARC/retain-block.ll
+++ /dev/null
@@ -1,140 +0,0 @@
-; RUN: opt -objc-arc -S < %s | FileCheck %s
-
-target datalayout = "e-p:64:64:64"
-
-!0 = metadata !{}
-
-declare i8* @objc_retain(i8*)
-declare void @callee(i8)
-declare void @use_pointer(i8*)
-declare void @objc_release(i8*)
-declare i8* @objc_retainBlock(i8*)
-declare i8* @objc_autorelease(i8*)
-
-; Basic retainBlock+release elimination.
-
-; CHECK: define void @test0(i8* %tmp) {
-; CHECK-NOT: @objc
-; CHECK: }
-define void @test0(i8* %tmp) {
-entry:
-  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; Same as test0, but there's no copy_on_escape metadata, so there's no
-; optimization possible.
-
-; CHECK: define void @test0_no_metadata(i8* %tmp) {
-; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW:#[0-9]+]]
-; CHECK: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
-; CHECK: }
-define void @test0_no_metadata(i8* %tmp) {
-entry:
-  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; Same as test0, but the pointer escapes, so there's no
-; optimization possible.
-
-; CHECK: define void @test0_escape(i8* %tmp, i8** %z) {
-; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]], !clang.arc.copy_on_escape !0
-; CHECK: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
-; CHECK: }
-define void @test0_escape(i8* %tmp, i8** %z) {
-entry:
-  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-  store i8* %tmp2, i8** %z
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; Same as test0_escape, but there's no intervening call.
-
-; CHECK: define void @test0_just_escape(i8* %tmp, i8** %z) {
-; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]], !clang.arc.copy_on_escape !0
-; CHECK: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
-; CHECK: }
-define void @test0_just_escape(i8* %tmp, i8** %z) {
-entry:
-  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-  store i8* %tmp2, i8** %z
-  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; Basic nested retainBlock+release elimination.
-
-; CHECK: define void @test1(i8* %tmp) {
-; CHECK-NOT: @objc
-; CHECK: tail call i8* @objc_retain(i8* %tmp) [[NUW]]
-; CHECK-NOT: @objc
-; CHECK: tail call void @objc_release(i8* %tmp) [[NUW]], !clang.imprecise_release !0
-; CHECK-NOT: @objc
-; CHECK: }
-define void @test1(i8* %tmp) {
-entry:
-  %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
-  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
-  tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; Same as test1, but there's no copy_on_escape metadata, so there's no
-; retainBlock+release optimization possible. But we can still eliminate
-; the outer retain+release.
-
-; CHECK: define void @test1_no_metadata(i8* %tmp) {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]]
-; CHECK-NEXT: @use_pointer(i8* %tmp2)
-; CHECK-NEXT: @use_pointer(i8* %tmp2)
-; CHECK-NEXT: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
-; CHECK-NOT: @objc
-; CHECK: }
-define void @test1_no_metadata(i8* %tmp) {
-entry:
-  %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
-  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
-  tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; Same as test1, but the pointer escapes, so there's no
-; retainBlock+release optimization possible. But we can still eliminate
-; the outer retain+release
-
-; CHECK: define void @test1_escape(i8* %tmp, i8** %z) {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]], !clang.arc.copy_on_escape !0
-; CHECK-NEXT: store i8* %tmp2, i8** %z
-; CHECK-NEXT: @use_pointer(i8* %tmp2)
-; CHECK-NEXT: @use_pointer(i8* %tmp2)
-; CHECK-NEXT: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
-; CHECK-NOT: @objc
-; CHECK: }
-define void @test1_escape(i8* %tmp, i8** %z) {
-entry:
-  %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
-  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-  store i8* %tmp2, i8** %z
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
-  tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/PhaseOrdering/lit.local.cfg b/test/Transforms/PhaseOrdering/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/PhaseOrdering/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/PruneEH/lit.local.cfg b/test/Transforms/PruneEH/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/PruneEH/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Reassociate/lit.local.cfg b/test/Transforms/Reassociate/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/Reassociate/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Reg2Mem/lit.local.cfg b/test/Transforms/Reg2Mem/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/Reg2Mem/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SCCP/lit.local.cfg b/test/Transforms/SCCP/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/SCCP/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SLPVectorizer/ARM/lit.local.cfg b/test/Transforms/SLPVectorizer/ARM/lit.local.cfg
new file mode 100644
index 0000000..5fc35d8
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/ARM/lit.local.cfg
@@ -0,0 +1,3 @@
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
diff --git a/test/Transforms/SLPVectorizer/ARM/memory.ll b/test/Transforms/SLPVectorizer/ARM/memory.ll
new file mode 100644
index 0000000..383c808
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/ARM/memory.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=thumbv7-apple-ios3.0.0 -mcpu=swift | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+
+; On swift unaligned <2 x double> stores need 4uops and it is there for cheaper
+; to do this scalar.
+
+; CHECK-LABEL: expensive_double_store
+; CHECK-NOT: load <2 x double>
+; CHECK-NOT: store <2 x double>
+define void @expensive_double_store(double* noalias %dst, double* noalias %src, i64 %count) {
+entry:
+  %0 = load double* %src, align 8
+  store double %0, double* %dst, align 8
+  %arrayidx2 = getelementptr inbounds double* %src, i64 1
+  %1 = load double* %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds double* %dst, i64 1
+  store double %1, double* %arrayidx3, align 8
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/ARM/sroa.ll b/test/Transforms/SLPVectorizer/ARM/sroa.ll
new file mode 100644
index 0000000..e0c75b1
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/ARM/sroa.ll
@@ -0,0 +1,52 @@
+; RUN: opt -S -mcpu=swift -mtriple=thumbv7-apple-ios -basicaa -slp-vectorizer < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+
+%class.Complex = type { double, double }
+
+; Code like this is the result of SROA. Make sure we don't vectorize this
+; because the in the scalar version of this the shl/or are handled by the
+; backend and disappear, the vectorized code stays.
+
+; CHECK-LABEL: SROAed
+; CHECK-NOT: shl <2 x i64>
+; CHECK-NOT: or <2 x i64>
+
+define void @SROAed(%class.Complex* noalias nocapture sret %agg.result, [4 x i32] %a.coerce, [4 x i32] %b.coerce) {
+entry:
+  %a.coerce.fca.0.extract = extractvalue [4 x i32] %a.coerce, 0
+  %a.sroa.0.0.insert.ext = zext i32 %a.coerce.fca.0.extract to i64
+  %a.coerce.fca.1.extract = extractvalue [4 x i32] %a.coerce, 1
+  %a.sroa.0.4.insert.ext = zext i32 %a.coerce.fca.1.extract to i64
+  %a.sroa.0.4.insert.shift = shl nuw i64 %a.sroa.0.4.insert.ext, 32
+  %a.sroa.0.4.insert.insert = or i64 %a.sroa.0.4.insert.shift, %a.sroa.0.0.insert.ext
+  %0 = bitcast i64 %a.sroa.0.4.insert.insert to double
+  %a.coerce.fca.2.extract = extractvalue [4 x i32] %a.coerce, 2
+  %a.sroa.3.8.insert.ext = zext i32 %a.coerce.fca.2.extract to i64
+  %a.coerce.fca.3.extract = extractvalue [4 x i32] %a.coerce, 3
+  %a.sroa.3.12.insert.ext = zext i32 %a.coerce.fca.3.extract to i64
+  %a.sroa.3.12.insert.shift = shl nuw i64 %a.sroa.3.12.insert.ext, 32
+  %a.sroa.3.12.insert.insert = or i64 %a.sroa.3.12.insert.shift, %a.sroa.3.8.insert.ext
+  %1 = bitcast i64 %a.sroa.3.12.insert.insert to double
+  %b.coerce.fca.0.extract = extractvalue [4 x i32] %b.coerce, 0
+  %b.sroa.0.0.insert.ext = zext i32 %b.coerce.fca.0.extract to i64
+  %b.coerce.fca.1.extract = extractvalue [4 x i32] %b.coerce, 1
+  %b.sroa.0.4.insert.ext = zext i32 %b.coerce.fca.1.extract to i64
+  %b.sroa.0.4.insert.shift = shl nuw i64 %b.sroa.0.4.insert.ext, 32
+  %b.sroa.0.4.insert.insert = or i64 %b.sroa.0.4.insert.shift, %b.sroa.0.0.insert.ext
+  %2 = bitcast i64 %b.sroa.0.4.insert.insert to double
+  %b.coerce.fca.2.extract = extractvalue [4 x i32] %b.coerce, 2
+  %b.sroa.3.8.insert.ext = zext i32 %b.coerce.fca.2.extract to i64
+  %b.coerce.fca.3.extract = extractvalue [4 x i32] %b.coerce, 3
+  %b.sroa.3.12.insert.ext = zext i32 %b.coerce.fca.3.extract to i64
+  %b.sroa.3.12.insert.shift = shl nuw i64 %b.sroa.3.12.insert.ext, 32
+  %b.sroa.3.12.insert.insert = or i64 %b.sroa.3.12.insert.shift, %b.sroa.3.8.insert.ext
+  %3 = bitcast i64 %b.sroa.3.12.insert.insert to double
+  %add = fadd double %0, %2
+  %add3 = fadd double %1, %3
+  %re.i.i = getelementptr inbounds %class.Complex* %agg.result, i32 0, i32 0
+  store double %add, double* %re.i.i, align 4
+  %im.i.i = getelementptr inbounds %class.Complex* %agg.result, i32 0, i32 1
+  store double %add3, double* %im.i.i, align 4
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/R600/lit.local.cfg b/test/Transforms/SLPVectorizer/R600/lit.local.cfg
new file mode 100644
index 0000000..9e0ab99
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/R600/lit.local.cfg
@@ -0,0 +1,4 @@
+targets = set(config.root.targets_to_build.split())
+if not 'R600' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/SLPVectorizer/R600/simplebb.ll b/test/Transforms/SLPVectorizer/R600/simplebb.ll
new file mode 100644
index 0000000..b6d794b
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/R600/simplebb.ll
@@ -0,0 +1,65 @@
+; RUN: opt -S -march=r600 -mcpu=cayman -basicaa -slp-vectorizer -dce < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
+
+
+; Simple 3-pair chain with loads and stores
+define void @test1_as_3_3_3(double addrspace(3)* %a, double addrspace(3)* %b, double addrspace(3)* %c) {
+; CHECK-LABEL: @test1_as_3_3_3(
+; CHECK: load <2 x double> addrspace(3)*
+; CHECK: load <2 x double> addrspace(3)*
+; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* %
+; CHECK: ret
+  %i0 = load double addrspace(3)* %a, align 8
+  %i1 = load double addrspace(3)* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double addrspace(3)* %a, i64 1
+  %i3 = load double addrspace(3)* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double addrspace(3)* %b, i64 1
+  %i4 = load double addrspace(3)* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double addrspace(3)* %c, align 8
+  %arrayidx5 = getelementptr inbounds double addrspace(3)* %c, i64 1
+  store double %mul5, double addrspace(3)* %arrayidx5, align 8
+  ret void
+}
+
+define void @test1_as_3_0_0(double addrspace(3)* %a, double* %b, double* %c) {
+; CHECK-LABEL: @test1_as_3_0_0(
+; CHECK: load <2 x double> addrspace(3)*
+; CHECK: load <2 x double>*
+; CHECK: store <2 x double> %{{.*}}, <2 x double>* %
+; CHECK: ret
+  %i0 = load double addrspace(3)* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double addrspace(3)* %a, i64 1
+  %i3 = load double addrspace(3)* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+}
+
+define void @test1_as_0_0_3(double* %a, double* %b, double addrspace(3)* %c) {
+; CHECK-LABEL: @test1_as_0_0_3(
+; CHECK: load <2 x double>*
+; CHECK: load <2 x double>*
+; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* %
+; CHECK: ret
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double addrspace(3)* %c, align 8
+  %arrayidx5 = getelementptr inbounds double addrspace(3)* %c, i64 1
+  store double %mul5, double addrspace(3)* %arrayidx5, align 8
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll b/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
index 931195e..25c6545 100644
--- a/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
+++ b/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
@@ -60,9 +60,9 @@ if.end332:                                        ; preds = %if.then329, %if.end
   %sub334 = fsub float %add294, %dx272.1
   %sub338 = fsub float %add297, %dy276.1
   %arrayidx.i.i606 = getelementptr inbounds %class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113* %vertices, i64 0, i32 0, i64 0
-  store float %sub334, float* %arrayidx.i.i606, align 4, !tbaa !0
+  store float %sub334, float* %arrayidx.i.i606, align 4
   %arrayidx3.i607 = getelementptr inbounds %class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113* %vertices, i64 0, i32 0, i64 1
-  store float %sub338, float* %arrayidx3.i607, align 4, !tbaa !0
+  store float %sub338, float* %arrayidx3.i607, align 4
   br label %return
 
 return:                                           ; preds = %if.end332, %for.end271, %entry
@@ -82,7 +82,3 @@ if.end22.2:                                       ; preds = %if.then17.2, %if.en
 }
 
 attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!0 = metadata !{metadata !"float", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll b/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
new file mode 100644
index 0000000..8da3c34
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.DState = type { i32, i32 }
+
+@b = common global %struct.DState zeroinitializer, align 4
+@d = common global i32 0, align 4
+@c = common global i32 0, align 4
+@a = common global i32 0, align 4
+@e = common global i32 0, align 4
+
+define i32 @fn1() {
+entry:
+  %0 = load i32* getelementptr inbounds (%struct.DState* @b, i32 0, i32 0), align 4
+  %1 = load i32* getelementptr inbounds (%struct.DState* @b, i32 0, i32 1), align 4
+  %2 = load i32* @d, align 4
+  %cond = icmp eq i32 %2, 0
+  br i1 %cond, label %sw.bb, label %save_state_and_return
+
+sw.bb:                                            ; preds = %entry
+  %3 = load i32* @c, align 4
+  %and = and i32 %3, 7
+  store i32 %and, i32* @a, align 4
+  switch i32 %and, label %if.end [
+    i32 7, label %save_state_and_return
+    i32 0, label %save_state_and_return
+  ]
+
+if.end:                                           ; preds = %sw.bb
+  br label %save_state_and_return
+
+save_state_and_return:                            ; preds = %sw.bb, %sw.bb, %if.end, %entry
+  %t.0 = phi i32 [ 0, %if.end ], [ %0, %entry ], [ %0, %sw.bb ], [ %0, %sw.bb ]
+  %f.0 = phi i32 [ 0, %if.end ], [ %1, %entry ], [ 0, %sw.bb ], [ 0, %sw.bb ]
+  store i32 %t.0, i32* getelementptr inbounds (%struct.DState* @b, i32 0, i32 0), align 4
+  store i32 %f.0, i32* getelementptr inbounds (%struct.DState* @b, i32 0, i32 1), align 4
+  ret i32 undef
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/debug_info.ll b/test/Transforms/SLPVectorizer/X86/debug_info.ll
index b408913..f4e68f2 100644
--- a/test/Transforms/SLPVectorizer/X86/debug_info.ll
+++ b/test/Transforms/SLPVectorizer/X86/debug_info.ll
@@ -13,13 +13,13 @@ target triple = "x86_64-apple-macosx10.7.0"
 ; }
 
 ;CHECK: @depth
-;CHECK: getelementptr inbounds {{.*}}, !dbg !24
-;CHECK: bitcast double* {{.*}}, !dbg !24
-;CHECK: load <2 x double>* {{.*}}, !dbg !24
-;CHECK: store <2 x double> {{.*}}, !dbg !26
+;CHECK: getelementptr inbounds {{.*}}, !dbg ![[LOC:[0-9]+]]
+;CHECK: bitcast double* {{.*}}, !dbg ![[LOC]]
+;CHECK: load <2 x double>* {{.*}}, !dbg ![[LOC]]
+;CHECK: store <2 x double> {{.*}}, !dbg ![[LOC2:[0-9]+]]
 ;CHECK: ret
-;CHECK: !24 = metadata !{i32 4, i32 0,
-;CHECK: !26 = metadata !{i32 7, i32 0,
+;CHECK: ![[LOC]] = metadata !{i32 4, i32 0,
+;CHECK: ![[LOC2]] = metadata !{i32 7, i32 0,
 
 define i32 @depth(double* nocapture %A, i32 %m) #0 {
 entry:
@@ -33,18 +33,18 @@ entry:
 
 for.body.lr.ph:                                   ; preds = %entry
   %arrayidx = getelementptr inbounds double* %A, i64 4, !dbg !24
-  %0 = load double* %arrayidx, align 8, !dbg !24, !tbaa !26
+  %0 = load double* %arrayidx, align 8, !dbg !24
   %arrayidx1 = getelementptr inbounds double* %A, i64 5, !dbg !29
-  %1 = load double* %arrayidx1, align 8, !dbg !29, !tbaa !26
+  %1 = load double* %arrayidx1, align 8, !dbg !29
   br label %for.end, !dbg !23
 
 for.end:                                          ; preds = %for.body.lr.ph, %entry
   %y1.0.lcssa = phi double [ %1, %for.body.lr.ph ], [ 1.000000e+00, %entry ]
   %y0.0.lcssa = phi double [ %0, %for.body.lr.ph ], [ 0.000000e+00, %entry ]
   %arrayidx2 = getelementptr inbounds double* %A, i64 8, !dbg !30
-  store double %y0.0.lcssa, double* %arrayidx2, align 8, !dbg !30, !tbaa !26
+  store double %y0.0.lcssa, double* %arrayidx2, align 8, !dbg !30
   %arrayidx3 = getelementptr inbounds double* %A, i64 9, !dbg !30
-  store double %y1.0.lcssa, double* %arrayidx3, align 8, !dbg !30, !tbaa !26
+  store double %y1.0.lcssa, double* %arrayidx3, align 8, !dbg !30
   ret i32 undef, !dbg !31
 }
 
@@ -55,7 +55,7 @@ attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-po
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!18}
+!llvm.module.flags = !{!18, !32}
 
 !0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 187335) (llvm/trunk 187335:187340M)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/nadav/file.c] [DW_LANG_C99]
 !1 = metadata !{metadata !"file.c", metadata !"/Users/nadav"}
@@ -63,7 +63,7 @@ attributes #1 = { nounwind readnone }
 !3 = metadata !{metadata !4}
 !4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"depth", metadata !"depth", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (double*, i32)* @depth, null, null, metadata !11, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [depth]
 !5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/Users/nadav/file.c]
-!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{metadata !8, metadata !9, metadata !8}
 !8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from double]
@@ -83,9 +83,7 @@ attributes #1 = { nounwind readnone }
 !23 = metadata !{i32 3, i32 0, metadata !17, null}
 !24 = metadata !{i32 4, i32 0, metadata !25, null}
 !25 = metadata !{i32 786443, metadata !1, metadata !17, i32 3, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/Users/nadav/file.c]
-!26 = metadata !{metadata !"double", metadata !27}
-!27 = metadata !{metadata !"omnipotent char", metadata !28}
-!28 = metadata !{metadata !"Simple C/C++ TBAA"}
 !29 = metadata !{i32 5, i32 0, metadata !25, null}
 !30 = metadata !{i32 7, i32 0, metadata !4, null}
 !31 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
+!32 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/SLPVectorizer/X86/external_user.ll b/test/Transforms/SLPVectorizer/X86/external_user.ll
index 22f0e64..6d09aa6 100644
--- a/test/Transforms/SLPVectorizer/X86/external_user.ll
+++ b/test/Transforms/SLPVectorizer/X86/external_user.ll
@@ -59,3 +59,38 @@ for.end:                                          ; preds = %for.body
   ret double %mul3
 }
 
+; A need-to-gather entry cannot be an external use of the scalar element.
+; Instead the insertelement instructions of the need-to-gather entry are the
+; external users.
+; This test would assert because we would keep the scalar fpext and fadd alive.
+; PR18129
+
+; CHECK-LABEL: needtogather
+define i32 @needtogather(double *noalias %a, i32 *noalias %b,  float * noalias %c,
+                i32 * noalias %d) {
+entry:
+  %0 = load i32* %d, align 4
+  %conv = sitofp i32 %0 to float
+  %1 = load float* %c
+  %sub = fsub float 0.000000e+00, %1
+  %mul = fmul float %sub, 0.000000e+00
+  %add = fadd float %conv, %mul
+  %conv1 = fpext float %add to double
+  %sub3 = fsub float 1.000000e+00, %1
+  %mul4 = fmul float %sub3, 0.000000e+00
+  %add5 = fadd float %conv, %mul4
+  %conv6 = fpext float %add5 to double
+  %tobool = fcmp une float %add, 0.000000e+00
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+  br label %if.end
+
+if.end:
+  %storemerge = phi double [ %conv6, %if.then ], [ %conv1, %entry ]
+  %e.0 = phi double [ %conv1, %if.then ], [ %conv6, %entry ]
+  store double %storemerge, double* %a, align 8
+  %conv7 = fptosi double %e.0 to i32
+  store i32 %conv7, i32* %b, align 4
+  ret i32 undef
+}
diff --git a/test/Transforms/SLPVectorizer/X86/horizontal.ll b/test/Transforms/SLPVectorizer/X86/horizontal.ll
new file mode 100644
index 0000000..8f91951
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/horizontal.ll
@@ -0,0 +1,417 @@
+; RUN: opt -slp-vectorizer -slp-vectorize-hor -S <  %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefix=NOSTORE
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; #include <stdint.h>
+;
+; int foo(float *A, int n) {
+;   float sum = 0;
+;   for (intptr_t i=0; i < n; ++i) {
+;     sum += 7*A[i*4  ] +
+;            7*A[i*4+1] +
+;            7*A[i*4+2] +
+;            7*A[i*4+3];
+;   }
+;   return sum;
+; }
+
+; NOSTORE-LABEL: add_red
+; NOSTORE: fmul <4 x float>
+; NOSTORE: shufflevector <4 x float>
+
+define i32 @add_red(float* %A, i32 %n) {
+entry:
+  %cmp31 = icmp sgt i32 %n, 0
+  br i1 %cmp31, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %0 = sext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %i.033 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %sum.032 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add17, %for.body ]
+  %mul = shl nsw i64 %i.033, 2
+  %arrayidx = getelementptr inbounds float* %A, i64 %mul
+  %1 = load float* %arrayidx, align 4
+  %mul2 = fmul float %1, 7.000000e+00
+  %add28 = or i64 %mul, 1
+  %arrayidx4 = getelementptr inbounds float* %A, i64 %add28
+  %2 = load float* %arrayidx4, align 4
+  %mul5 = fmul float %2, 7.000000e+00
+  %add6 = fadd fast float %mul2, %mul5
+  %add829 = or i64 %mul, 2
+  %arrayidx9 = getelementptr inbounds float* %A, i64 %add829
+  %3 = load float* %arrayidx9, align 4
+  %mul10 = fmul float %3, 7.000000e+00
+  %add11 = fadd fast float %add6, %mul10
+  %add1330 = or i64 %mul, 3
+  %arrayidx14 = getelementptr inbounds float* %A, i64 %add1330
+  %4 = load float* %arrayidx14, align 4
+  %mul15 = fmul float %4, 7.000000e+00
+  %add16 = fadd fast float %add11, %mul15
+  %add17 = fadd fast float %sum.032, %add16
+  %inc = add nsw i64 %i.033, 1
+  %exitcond = icmp eq i64 %inc, %0
+  br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:
+  %phitmp = fptosi float %add17 to i32
+  br label %for.end
+
+for.end:
+  %sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i32 %sum.0.lcssa
+}
+
+; int foo(float * restrict A, float * restrict B, int n) {
+;   float sum = 0;
+;   for (intptr_t i=0; i < n; ++i) {
+;     sum *= B[0]*A[i*4  ] +
+;       B[1]*A[i*4+1] +
+;       B[2]*A[i*4+2] +
+;       B[3]*A[i*4+3];
+;   }
+;   return sum;
+; }
+
+; CHECK-LABEL: mul_red
+; CHECK: fmul <4 x float>
+; CHECK: shufflevector <4 x float>
+
+define i32 @mul_red(float* noalias %A, float* noalias %B, i32 %n) {
+entry:
+  %cmp38 = icmp sgt i32 %n, 0
+  br i1 %cmp38, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %0 = load float* %B, align 4
+  %arrayidx4 = getelementptr inbounds float* %B, i64 1
+  %1 = load float* %arrayidx4, align 4
+  %arrayidx9 = getelementptr inbounds float* %B, i64 2
+  %2 = load float* %arrayidx9, align 4
+  %arrayidx15 = getelementptr inbounds float* %B, i64 3
+  %3 = load float* %arrayidx15, align 4
+  %4 = sext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %i.040 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %sum.039 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %mul21, %for.body ]
+  %mul = shl nsw i64 %i.040, 2
+  %arrayidx2 = getelementptr inbounds float* %A, i64 %mul
+  %5 = load float* %arrayidx2, align 4
+  %mul3 = fmul float %0, %5
+  %add35 = or i64 %mul, 1
+  %arrayidx6 = getelementptr inbounds float* %A, i64 %add35
+  %6 = load float* %arrayidx6, align 4
+  %mul7 = fmul float %1, %6
+  %add8 = fadd fast float %mul3, %mul7
+  %add1136 = or i64 %mul, 2
+  %arrayidx12 = getelementptr inbounds float* %A, i64 %add1136
+  %7 = load float* %arrayidx12, align 4
+  %mul13 = fmul float %2, %7
+  %add14 = fadd fast float %add8, %mul13
+  %add1737 = or i64 %mul, 3
+  %arrayidx18 = getelementptr inbounds float* %A, i64 %add1737
+  %8 = load float* %arrayidx18, align 4
+  %mul19 = fmul float %3, %8
+  %add20 = fadd fast float %add14, %mul19
+  %mul21 = fmul float %sum.039, %add20
+  %inc = add nsw i64 %i.040, 1
+  %exitcond = icmp eq i64 %inc, %4
+  br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:
+  %phitmp = fptosi float %mul21 to i32
+  br label %for.end
+
+for.end:
+  %sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i32 %sum.0.lcssa
+}
+
+; int foo(float * restrict A, float * restrict B, int n) {
+;   float sum = 0;
+;   for (intptr_t i=0; i < n; ++i) {
+;     sum += B[0]*A[i*6  ] +
+;            B[1]*A[i*6+1] +
+;            B[2]*A[i*6+2] +
+;            B[3]*A[i*6+3] +
+;            B[4]*A[i*6+4] +
+;            B[5]*A[i*6+5] +
+;            B[6]*A[i*6+6] +
+;            B[7]*A[i*6+7] +
+;            B[8]*A[i*6+8];
+;   }
+;   return sum;
+; }
+
+; CHECK-LABEL: long_red
+; CHECK: fmul <4 x float>
+; CHECK: shufflevector <4 x float>
+
+define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) {
+entry:
+  %cmp81 = icmp sgt i32 %n, 0
+  br i1 %cmp81, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %0 = load float* %B, align 4
+  %arrayidx4 = getelementptr inbounds float* %B, i64 1
+  %1 = load float* %arrayidx4, align 4
+  %arrayidx9 = getelementptr inbounds float* %B, i64 2
+  %2 = load float* %arrayidx9, align 4
+  %arrayidx15 = getelementptr inbounds float* %B, i64 3
+  %3 = load float* %arrayidx15, align 4
+  %arrayidx21 = getelementptr inbounds float* %B, i64 4
+  %4 = load float* %arrayidx21, align 4
+  %arrayidx27 = getelementptr inbounds float* %B, i64 5
+  %5 = load float* %arrayidx27, align 4
+  %arrayidx33 = getelementptr inbounds float* %B, i64 6
+  %6 = load float* %arrayidx33, align 4
+  %arrayidx39 = getelementptr inbounds float* %B, i64 7
+  %7 = load float* %arrayidx39, align 4
+  %arrayidx45 = getelementptr inbounds float* %B, i64 8
+  %8 = load float* %arrayidx45, align 4
+  %9 = sext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %i.083 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %sum.082 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add51, %for.body ]
+  %mul = mul nsw i64 %i.083, 6
+  %arrayidx2 = getelementptr inbounds float* %A, i64 %mul
+  %10 = load float* %arrayidx2, align 4
+  %mul3 = fmul fast float %0, %10
+  %add80 = or i64 %mul, 1
+  %arrayidx6 = getelementptr inbounds float* %A, i64 %add80
+  %11 = load float* %arrayidx6, align 4
+  %mul7 = fmul fast float %1, %11
+  %add8 = fadd fast float %mul3, %mul7
+  %add11 = add nsw i64 %mul, 2
+  %arrayidx12 = getelementptr inbounds float* %A, i64 %add11
+  %12 = load float* %arrayidx12, align 4
+  %mul13 = fmul fast float %2, %12
+  %add14 = fadd fast float %add8, %mul13
+  %add17 = add nsw i64 %mul, 3
+  %arrayidx18 = getelementptr inbounds float* %A, i64 %add17
+  %13 = load float* %arrayidx18, align 4
+  %mul19 = fmul fast float %3, %13
+  %add20 = fadd fast float %add14, %mul19
+  %add23 = add nsw i64 %mul, 4
+  %arrayidx24 = getelementptr inbounds float* %A, i64 %add23
+  %14 = load float* %arrayidx24, align 4
+  %mul25 = fmul fast float %4, %14
+  %add26 = fadd fast float %add20, %mul25
+  %add29 = add nsw i64 %mul, 5
+  %arrayidx30 = getelementptr inbounds float* %A, i64 %add29
+  %15 = load float* %arrayidx30, align 4
+  %mul31 = fmul fast float %5, %15
+  %add32 = fadd fast float %add26, %mul31
+  %add35 = add nsw i64 %mul, 6
+  %arrayidx36 = getelementptr inbounds float* %A, i64 %add35
+  %16 = load float* %arrayidx36, align 4
+  %mul37 = fmul fast float %6, %16
+  %add38 = fadd fast float %add32, %mul37
+  %add41 = add nsw i64 %mul, 7
+  %arrayidx42 = getelementptr inbounds float* %A, i64 %add41
+  %17 = load float* %arrayidx42, align 4
+  %mul43 = fmul fast float %7, %17
+  %add44 = fadd fast float %add38, %mul43
+  %add47 = add nsw i64 %mul, 8
+  %arrayidx48 = getelementptr inbounds float* %A, i64 %add47
+  %18 = load float* %arrayidx48, align 4
+  %mul49 = fmul fast float %8, %18
+  %add50 = fadd fast float %add44, %mul49
+  %add51 = fadd fast float %sum.082, %add50
+  %inc = add nsw i64 %i.083, 1
+  %exitcond = icmp eq i64 %inc, %9
+  br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:
+  %phitmp = fptosi float %add51 to i32
+  br label %for.end
+
+for.end:
+  %sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i32 %sum.0.lcssa
+}
+
+; int foo(float * restrict A, float * restrict B, int n) {
+;   float sum = 0;
+;   for (intptr_t i=0; i < n; ++i) {
+;     sum += B[0]*A[i*4  ];
+;     sum += B[1]*A[i*4+1];
+;     sum += B[2]*A[i*4+2];
+;     sum += B[3]*A[i*4+3];
+;   }
+;   return sum;
+; }
+
+; CHECK-LABEL: chain_red
+; CHECK: fmul <4 x float>
+; CHECK: shufflevector <4 x float>
+
+define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) {
+entry:
+  %cmp41 = icmp sgt i32 %n, 0
+  br i1 %cmp41, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %0 = load float* %B, align 4
+  %arrayidx4 = getelementptr inbounds float* %B, i64 1
+  %1 = load float* %arrayidx4, align 4
+  %arrayidx10 = getelementptr inbounds float* %B, i64 2
+  %2 = load float* %arrayidx10, align 4
+  %arrayidx16 = getelementptr inbounds float* %B, i64 3
+  %3 = load float* %arrayidx16, align 4
+  %4 = sext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %i.043 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %sum.042 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add21, %for.body ]
+  %mul = shl nsw i64 %i.043, 2
+  %arrayidx2 = getelementptr inbounds float* %A, i64 %mul
+  %5 = load float* %arrayidx2, align 4
+  %mul3 = fmul fast float %0, %5
+  %add = fadd fast float %sum.042, %mul3
+  %add638 = or i64 %mul, 1
+  %arrayidx7 = getelementptr inbounds float* %A, i64 %add638
+  %6 = load float* %arrayidx7, align 4
+  %mul8 = fmul fast float %1, %6
+  %add9 = fadd fast float %add, %mul8
+  %add1239 = or i64 %mul, 2
+  %arrayidx13 = getelementptr inbounds float* %A, i64 %add1239
+  %7 = load float* %arrayidx13, align 4
+  %mul14 = fmul fast float %2, %7
+  %add15 = fadd fast float %add9, %mul14
+  %add1840 = or i64 %mul, 3
+  %arrayidx19 = getelementptr inbounds float* %A, i64 %add1840
+  %8 = load float* %arrayidx19, align 4
+  %mul20 = fmul fast float %3, %8
+  %add21 = fadd fast float %add15, %mul20
+  %inc = add nsw i64 %i.043, 1
+  %exitcond = icmp eq i64 %inc, %4
+  br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:
+  %phitmp = fptosi float %add21 to i32
+  br label %for.end
+
+for.end:
+  %sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i32 %sum.0.lcssa
+}
+
+; int foo(float * restrict A, float * restrict B, float * restrict C, int n) {
+;   float sum = 0;
+;   for (intptr_t i=0; i < n; ++i) {
+;     C[i] = B[0] *A[i*4  ] +
+;          B[1] *A[i*4+1] +
+;          B[2] *A[i*4+2] +
+;          B[3] *A[i*4+3];
+;   }
+;   return sum;
+; }
+
+; CHECK-LABEL: store_red
+; CHECK: fmul <4 x float>
+; CHECK: shufflevector <4 x float>
+
+define i32 @store_red(float* noalias %A, float* noalias %B, float* noalias %C, i32 %n) {
+entry:
+  %cmp37 = icmp sgt i32 %n, 0
+  br i1 %cmp37, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %arrayidx4 = getelementptr inbounds float* %B, i64 1
+  %arrayidx9 = getelementptr inbounds float* %B, i64 2
+  %arrayidx15 = getelementptr inbounds float* %B, i64 3
+  %0 = sext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %i.039 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %C.addr.038 = phi float* [ %C, %for.body.lr.ph ], [ %incdec.ptr, %for.body ]
+  %1 = load float* %B, align 4
+  %mul = shl nsw i64 %i.039, 2
+  %arrayidx2 = getelementptr inbounds float* %A, i64 %mul
+  %2 = load float* %arrayidx2, align 4
+  %mul3 = fmul fast float %1, %2
+  %3 = load float* %arrayidx4, align 4
+  %add34 = or i64 %mul, 1
+  %arrayidx6 = getelementptr inbounds float* %A, i64 %add34
+  %4 = load float* %arrayidx6, align 4
+  %mul7 = fmul fast float %3, %4
+  %add8 = fadd fast float %mul3, %mul7
+  %5 = load float* %arrayidx9, align 4
+  %add1135 = or i64 %mul, 2
+  %arrayidx12 = getelementptr inbounds float* %A, i64 %add1135
+  %6 = load float* %arrayidx12, align 4
+  %mul13 = fmul fast float %5, %6
+  %add14 = fadd fast float %add8, %mul13
+  %7 = load float* %arrayidx15, align 4
+  %add1736 = or i64 %mul, 3
+  %arrayidx18 = getelementptr inbounds float* %A, i64 %add1736
+  %8 = load float* %arrayidx18, align 4
+  %mul19 = fmul fast float %7, %8
+  %add20 = fadd fast float %add14, %mul19
+  store float %add20, float* %C.addr.038, align 4
+  %incdec.ptr = getelementptr inbounds float* %C.addr.038, i64 1
+  %inc = add nsw i64 %i.039, 1
+  %exitcond = icmp eq i64 %inc, %0
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 0
+}
+
+
+; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S <  %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefix=STORE
+
+; void foo(double * restrict A, double * restrict B, double * restrict C,
+;          int n) {
+;   for (intptr_t i=0; i < n; ++i) {
+;     C[i] = B[0] *A[i*4  ] + B[1] *A[i*4+1];
+;   }
+; }
+
+; STORE-LABEL: store_red_double
+; STORE: fmul <2 x double>
+; STORE: extractelement <2 x double>
+; STORE: extractelement <2 x double>
+
+define void @store_red_double(double* noalias %A, double* noalias %B, double* noalias %C, i32 %n) {
+entry:
+  %cmp17 = icmp sgt i32 %n, 0
+  br i1 %cmp17, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %0 = load double* %B, align 8
+  %arrayidx4 = getelementptr inbounds double* %B, i64 1
+  %1 = load double* %arrayidx4, align 8
+  %2 = sext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %i.018 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %mul = shl nsw i64 %i.018, 2
+  %arrayidx2 = getelementptr inbounds double* %A, i64 %mul
+  %3 = load double* %arrayidx2, align 8
+  %mul3 = fmul fast double %0, %3
+  %add16 = or i64 %mul, 1
+  %arrayidx6 = getelementptr inbounds double* %A, i64 %add16
+  %4 = load double* %arrayidx6, align 8
+  %mul7 = fmul fast double %1, %4
+  %add8 = fadd fast double %mul3, %mul7
+  %arrayidx9 = getelementptr inbounds double* %C, i64 %i.018
+  store double %add8, double* %arrayidx9, align 8
+  %inc = add nsw i64 %i.018, 1
+  %exitcond = icmp eq i64 %inc, %2
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
new file mode 100644
index 0000000..43f7aed
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
@@ -0,0 +1,197 @@
+; RUN: opt -S -slp-vectorizer -slp-threshold=-10000 < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-n8:16:32:64-S128"
+
+target triple = "x86_64-apple-macosx10.8.0"
+
+define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_select(
+; CHECK-NEXT: %1 = icmp ne <4 x i32> %c, zeroinitializer
+; CHECK-NEXT: select <4 x i1> %1, <4 x float> %a, <4 x float> %b
+  %c0 = extractelement <4 x i32> %c, i32 0
+  %c1 = extractelement <4 x i32> %c, i32 1
+  %c2 = extractelement <4 x i32> %c, i32 2
+  %c3 = extractelement <4 x i32> %c, i32 3
+  %a0 = extractelement <4 x float> %a, i32 0
+  %a1 = extractelement <4 x float> %a, i32 1
+  %a2 = extractelement <4 x float> %a, i32 2
+  %a3 = extractelement <4 x float> %a, i32 3
+  %b0 = extractelement <4 x float> %b, i32 0
+  %b1 = extractelement <4 x float> %b, i32 1
+  %b2 = extractelement <4 x float> %b, i32 2
+  %b3 = extractelement <4 x float> %b, i32 3
+  %cmp0 = icmp ne i32 %c0, 0
+  %cmp1 = icmp ne i32 %c1, 0
+  %cmp2 = icmp ne i32 %c2, 0
+  %cmp3 = icmp ne i32 %c3, 0
+  %s0 = select i1 %cmp0, float %a0, float %b0
+  %s1 = select i1 %cmp1, float %a1, float %b1
+  %s2 = select i1 %cmp2, float %a2, float %b2
+  %s3 = select i1 %cmp3, float %a3, float %b3
+  %ra = insertelement <4 x float> undef, float %s0, i32 0
+  %rb = insertelement <4 x float> %ra, float %s1, i32 1
+  %rc = insertelement <4 x float> %rb, float %s2, i32 2
+  %rd = insertelement <4 x float> %rc, float %s3, i32 3
+  ret <4 x float> %rd
+}
+
+; Insert in an order different from the vector indices to make sure it
+; doesn't matter
+define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_select_insert_out_of_order(
+; CHECK-NEXT: %1 = icmp ne <4 x i32> %c, zeroinitializer
+; CHECK-NEXT: select <4 x i1> %1, <4 x float> %a, <4 x float> %b
+  %c0 = extractelement <4 x i32> %c, i32 0
+  %c1 = extractelement <4 x i32> %c, i32 1
+  %c2 = extractelement <4 x i32> %c, i32 2
+  %c3 = extractelement <4 x i32> %c, i32 3
+  %a0 = extractelement <4 x float> %a, i32 0
+  %a1 = extractelement <4 x float> %a, i32 1
+  %a2 = extractelement <4 x float> %a, i32 2
+  %a3 = extractelement <4 x float> %a, i32 3
+  %b0 = extractelement <4 x float> %b, i32 0
+  %b1 = extractelement <4 x float> %b, i32 1
+  %b2 = extractelement <4 x float> %b, i32 2
+  %b3 = extractelement <4 x float> %b, i32 3
+  %cmp0 = icmp ne i32 %c0, 0
+  %cmp1 = icmp ne i32 %c1, 0
+  %cmp2 = icmp ne i32 %c2, 0
+  %cmp3 = icmp ne i32 %c3, 0
+  %s0 = select i1 %cmp0, float %a0, float %b0
+  %s1 = select i1 %cmp1, float %a1, float %b1
+  %s2 = select i1 %cmp2, float %a2, float %b2
+  %s3 = select i1 %cmp3, float %a3, float %b3
+  %ra = insertelement <4 x float> undef, float %s0, i32 2
+  %rb = insertelement <4 x float> %ra, float %s1, i32 1
+  %rc = insertelement <4 x float> %rb, float %s2, i32 0
+  %rd = insertelement <4 x float> %rc, float %s3, i32 3
+  ret <4 x float> %rd
+}
+
+declare void @v4f32_user(<4 x float>) #0
+declare void @f32_user(float) #0
+
+; Multiple users of the final constructed vector
+define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_select_users(
+; CHECK-NEXT: %1 = icmp ne <4 x i32> %c, zeroinitializer
+; CHECK-NEXT: select <4 x i1> %1, <4 x float> %a, <4 x float> %b
+  %c0 = extractelement <4 x i32> %c, i32 0
+  %c1 = extractelement <4 x i32> %c, i32 1
+  %c2 = extractelement <4 x i32> %c, i32 2
+  %c3 = extractelement <4 x i32> %c, i32 3
+  %a0 = extractelement <4 x float> %a, i32 0
+  %a1 = extractelement <4 x float> %a, i32 1
+  %a2 = extractelement <4 x float> %a, i32 2
+  %a3 = extractelement <4 x float> %a, i32 3
+  %b0 = extractelement <4 x float> %b, i32 0
+  %b1 = extractelement <4 x float> %b, i32 1
+  %b2 = extractelement <4 x float> %b, i32 2
+  %b3 = extractelement <4 x float> %b, i32 3
+  %cmp0 = icmp ne i32 %c0, 0
+  %cmp1 = icmp ne i32 %c1, 0
+  %cmp2 = icmp ne i32 %c2, 0
+  %cmp3 = icmp ne i32 %c3, 0
+  %s0 = select i1 %cmp0, float %a0, float %b0
+  %s1 = select i1 %cmp1, float %a1, float %b1
+  %s2 = select i1 %cmp2, float %a2, float %b2
+  %s3 = select i1 %cmp3, float %a3, float %b3
+  %ra = insertelement <4 x float> undef, float %s0, i32 0
+  %rb = insertelement <4 x float> %ra, float %s1, i32 1
+  %rc = insertelement <4 x float> %rb, float %s2, i32 2
+  %rd = insertelement <4 x float> %rc, float %s3, i32 3
+  call void @v4f32_user(<4 x float> %rd) #0
+  ret <4 x float> %rd
+}
+
+; Unused insertelement
+define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_select_no_users(
+; CHECK-NOT: icmp ne <4 x i32>
+; CHECK-NOT: select <4 x i1>
+  %c0 = extractelement <4 x i32> %c, i32 0
+  %c1 = extractelement <4 x i32> %c, i32 1
+  %c2 = extractelement <4 x i32> %c, i32 2
+  %c3 = extractelement <4 x i32> %c, i32 3
+  %a0 = extractelement <4 x float> %a, i32 0
+  %a1 = extractelement <4 x float> %a, i32 1
+  %a2 = extractelement <4 x float> %a, i32 2
+  %a3 = extractelement <4 x float> %a, i32 3
+  %b0 = extractelement <4 x float> %b, i32 0
+  %b1 = extractelement <4 x float> %b, i32 1
+  %b2 = extractelement <4 x float> %b, i32 2
+  %b3 = extractelement <4 x float> %b, i32 3
+  %cmp0 = icmp ne i32 %c0, 0
+  %cmp1 = icmp ne i32 %c1, 0
+  %cmp2 = icmp ne i32 %c2, 0
+  %cmp3 = icmp ne i32 %c3, 0
+  %s0 = select i1 %cmp0, float %a0, float %b0
+  %s1 = select i1 %cmp1, float %a1, float %b1
+  %s2 = select i1 %cmp2, float %a2, float %b2
+  %s3 = select i1 %cmp3, float %a3, float %b3
+  %ra = insertelement <4 x float> undef, float %s0, i32 0
+  %rb = insertelement <4 x float> %ra, float %s1, i32 1
+  %rc = insertelement <4 x float> undef, float %s2, i32 2
+  %rd = insertelement <4 x float> %rc, float %s3, i32 3
+  ret <4 x float> %rd
+}
+
+; Make sure infinite loop doesn't happen which I ran into when trying
+; to do this backwards this backwards
+define <4 x i32> @reconstruct(<4 x i32> %c) #0 {
+; CHECK-LABEL: @reconstruct(
+  %c0 = extractelement <4 x i32> %c, i32 0
+  %c1 = extractelement <4 x i32> %c, i32 1
+  %c2 = extractelement <4 x i32> %c, i32 2
+  %c3 = extractelement <4 x i32> %c, i32 3
+  %ra = insertelement <4 x i32> undef, i32 %c0, i32 0
+  %rb = insertelement <4 x i32> %ra, i32 %c1, i32 1
+  %rc = insertelement <4 x i32> %rb, i32 %c2, i32 2
+  %rd = insertelement <4 x i32> %rc, i32 %c3, i32 3
+  ret <4 x i32> %rd
+}
+
+define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %c) #0 {
+; CHECK-LABEL: @simple_select_v2(
+; CHECK: icmp ne <2 x i32>
+; CHECK: select <2 x i1>
+  %c0 = extractelement <2 x i32> %c, i32 0
+  %c1 = extractelement <2 x i32> %c, i32 1
+  %a0 = extractelement <2 x float> %a, i32 0
+  %a1 = extractelement <2 x float> %a, i32 1
+  %b0 = extractelement <2 x float> %b, i32 0
+  %b1 = extractelement <2 x float> %b, i32 1
+  %cmp0 = icmp ne i32 %c0, 0
+  %cmp1 = icmp ne i32 %c1, 0
+  %s0 = select i1 %cmp0, float %a0, float %b0
+  %s1 = select i1 %cmp1, float %a1, float %b1
+  %ra = insertelement <2 x float> undef, float %s0, i32 0
+  %rb = insertelement <2 x float> %ra, float %s1, i32 1
+  ret <2 x float> %rb
+}
+
+; Make sure when we construct partial vectors, we don't keep
+; re-visiting the insertelement chains starting with undef
+; (low cost threshold needed to force this to happen)
+define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+  %c0 = extractelement <4 x i32> %c, i32 0
+  %c1 = extractelement <4 x i32> %c, i32 1
+  %a0 = extractelement <4 x float> %a, i32 0
+  %a1 = extractelement <4 x float> %a, i32 1
+  %b0 = extractelement <4 x float> %b, i32 0
+  %b1 = extractelement <4 x float> %b, i32 1
+  %1 = insertelement <2 x i32> undef, i32 %c0, i32 0
+  %2 = insertelement <2 x i32> %1, i32 %c1, i32 1
+  %3 = icmp ne <2 x i32> %2, zeroinitializer
+  %4 = insertelement <2 x float> undef, float %a0, i32 0
+  %5 = insertelement <2 x float> %4, float %a1, i32 1
+  %6 = insertelement <2 x float> undef, float %b0, i32 0
+  %7 = insertelement <2 x float> %6, float %b1, i32 1
+  %8 = select <2 x i1> %3, <2 x float> %5, <2 x float> %7
+  %9 = extractelement <2 x float> %8, i32 0
+  %ra = insertelement <4 x float> undef, float %9, i32 0
+  %10 = extractelement <2 x float> %8, i32 1
+  %rb = insertelement <4 x float> %ra, float %10, i32 1
+  ret <4 x float> %rb
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/SLPVectorizer/X86/lit.local.cfg b/test/Transforms/SLPVectorizer/X86/lit.local.cfg
index a8ad0f1..ba763cf 100644
--- a/test/Transforms/SLPVectorizer/X86/lit.local.cfg
+++ b/test/Transforms/SLPVectorizer/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Transforms/SLPVectorizer/X86/operandorder.ll b/test/Transforms/SLPVectorizer/X86/operandorder.ll
new file mode 100644
index 0000000..c5322a8
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/operandorder.ll
@@ -0,0 +1,234 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-100 -instcombine -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+
+
+
+; Make sure we order the operands of commutative operations so that we get
+; bigger vectorizable trees.
+
+; CHECK-LABEL: shuffle_operands1
+; CHECK:         load <2 x double>
+; CHECK:         fadd <2 x double>
+
+define void @shuffle_operands1(double * noalias %from, double * noalias %to,
+                               double %v1, double %v2) {
+  %from_1 = getelementptr double *%from, i64 1
+  %v0_1 = load double * %from
+  %v0_2 = load double * %from_1
+  %v1_1 = fadd double %v0_1, %v1
+  %v1_2 = fadd double %v2, %v0_2
+  %to_2 = getelementptr double * %to, i64 1
+  store double %v1_1, double *%to
+  store double %v1_2, double *%to_2
+  ret void
+}
+
+; CHECK-LABEL: shuffle_preserve_broadcast
+; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
+; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+define void @shuffle_preserve_broadcast(double * noalias %from,
+                                        double * noalias %to,
+                                        double %v1, double %v2) {
+entry:
+br label %lp
+
+lp:
+  %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
+  %from_1 = getelementptr double *%from, i64 1
+  %v0_1 = load double * %from
+  %v0_2 = load double * %from_1
+  %v1_1 = fadd double %v0_1, %p
+  %v1_2 = fadd double %v0_1, %v0_2
+  %to_2 = getelementptr double * %to, i64 1
+  store double %v1_1, double *%to
+  store double %v1_2, double *%to_2
+br i1 undef, label %lp, label %ext
+
+ext:
+  ret void
+}
+
+; CHECK-LABEL: shuffle_preserve_broadcast2
+; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
+; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+define void @shuffle_preserve_broadcast2(double * noalias %from,
+                                        double * noalias %to,
+                                        double %v1, double %v2) {
+entry:
+br label %lp
+
+lp:
+  %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
+  %from_1 = getelementptr double *%from, i64 1
+  %v0_1 = load double * %from
+  %v0_2 = load double * %from_1
+  %v1_1 = fadd double %p, %v0_1
+  %v1_2 = fadd double %v0_2, %v0_1
+  %to_2 = getelementptr double * %to, i64 1
+  store double %v1_1, double *%to
+  store double %v1_2, double *%to_2
+br i1 undef, label %lp, label %ext
+
+ext:
+  ret void
+}
+
+; CHECK-LABEL: shuffle_preserve_broadcast3
+; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
+; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+define void @shuffle_preserve_broadcast3(double * noalias %from,
+                                        double * noalias %to,
+                                        double %v1, double %v2) {
+entry:
+br label %lp
+
+lp:
+  %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
+  %from_1 = getelementptr double *%from, i64 1
+  %v0_1 = load double * %from
+  %v0_2 = load double * %from_1
+  %v1_1 = fadd double %p, %v0_1
+  %v1_2 = fadd double %v0_1, %v0_2
+  %to_2 = getelementptr double * %to, i64 1
+  store double %v1_1, double *%to
+  store double %v1_2, double *%to_2
+br i1 undef, label %lp, label %ext
+
+ext:
+  ret void
+}
+
+
+; CHECK-LABEL: shuffle_preserve_broadcast4
+; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
+; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+define void @shuffle_preserve_broadcast4(double * noalias %from,
+                                        double * noalias %to,
+                                        double %v1, double %v2) {
+entry:
+br label %lp
+
+lp:
+  %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
+  %from_1 = getelementptr double *%from, i64 1
+  %v0_1 = load double * %from
+  %v0_2 = load double * %from_1
+  %v1_1 = fadd double %v0_2, %v0_1
+  %v1_2 = fadd double %p, %v0_1
+  %to_2 = getelementptr double * %to, i64 1
+  store double %v1_1, double *%to
+  store double %v1_2, double *%to_2
+br i1 undef, label %lp, label %ext
+
+ext:
+  ret void
+}
+
+; CHECK-LABEL: shuffle_preserve_broadcast5
+; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
+; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+define void @shuffle_preserve_broadcast5(double * noalias %from,
+                                        double * noalias %to,
+                                        double %v1, double %v2) {
+entry:
+br label %lp
+
+lp:
+  %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
+  %from_1 = getelementptr double *%from, i64 1
+  %v0_1 = load double * %from
+  %v0_2 = load double * %from_1
+  %v1_1 = fadd double %v0_1, %v0_2
+  %v1_2 = fadd double %p, %v0_1
+  %to_2 = getelementptr double * %to, i64 1
+  store double %v1_1, double *%to
+  store double %v1_2, double *%to_2
+br i1 undef, label %lp, label %ext
+
+ext:
+  ret void
+}
+
+
+; CHECK-LABEL: shuffle_preserve_broadcast6
+; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
+; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+define void @shuffle_preserve_broadcast6(double * noalias %from,
+                                        double * noalias %to,
+                                        double %v1, double %v2) {
+entry:
+br label %lp
+
+lp:
+  %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
+  %from_1 = getelementptr double *%from, i64 1
+  %v0_1 = load double * %from
+  %v0_2 = load double * %from_1
+  %v1_1 = fadd double %v0_1, %v0_2
+  %v1_2 = fadd double %v0_1, %p
+  %to_2 = getelementptr double * %to, i64 1
+  store double %v1_1, double *%to
+  store double %v1_2, double *%to_2
+br i1 undef, label %lp, label %ext
+
+ext:
+  ret void
+}
+
+; Make sure we don't scramble operands when we reorder them and destroy
+; 'good' source order.
+
+; CHECK-LABEL: good_load_order
+
+; CHECK: %[[V1:[0-9]+]] = load <4 x float>*
+; CHECK: %[[V2:[0-9]+]] = insertelement <4 x float> undef, float %1, i32 0
+; CHECK: %[[V3:[0-9]+]] = shufflevector <4 x float> %[[V2]], <4 x float> %[[V1]], <4 x i32> <i32 0, i32 4, i32 5, i32 6>
+; CHECK:                = fmul <4 x float> %[[V1]], %[[V3]]
+
+@a = common global [32000 x float] zeroinitializer, align 16
+
+define void @good_load_order() {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:
+  %0 = load float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), align 16
+  br label %for.body3
+
+for.body3:
+  %1 = phi float [ %0, %for.cond1.preheader ], [ %10, %for.body3 ]
+  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
+  %2 = add nsw i64 %indvars.iv, 1
+  %arrayidx = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %2
+  %3 = load float* %arrayidx, align 4
+  %arrayidx5 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv
+  %mul6 = fmul float %3, %1
+  store float %mul6, float* %arrayidx5, align 4
+  %4 = add nsw i64 %indvars.iv, 2
+  %arrayidx11 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %4
+  %5 = load float* %arrayidx11, align 4
+  %mul15 = fmul float %5, %3
+  store float %mul15, float* %arrayidx, align 4
+  %6 = add nsw i64 %indvars.iv, 3
+  %arrayidx21 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %6
+  %7 = load float* %arrayidx21, align 4
+  %mul25 = fmul float %7, %5
+  store float %mul25, float* %arrayidx11, align 4
+  %8 = add nsw i64 %indvars.iv, 4
+  %arrayidx31 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %8
+  %9 = load float* %arrayidx31, align 4
+  %mul35 = fmul float %9, %7
+  store float %mul35, float* %arrayidx21, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
+  %arrayidx41 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.next
+  %10 = load float* %arrayidx41, align 4
+  %mul45 = fmul float %10, %9
+  store float %mul45, float* %arrayidx31, align 4
+  %11 = trunc i64 %indvars.iv.next to i32
+  %cmp2 = icmp slt i32 %11, 31995
+  br i1 %cmp2, label %for.body3, label %for.end
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/opt.ll b/test/Transforms/SLPVectorizer/X86/opt.ll
new file mode 100644
index 0000000..14137c1
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/opt.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -O3 -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=SLP
+; RUN: opt < %s -O3 -disable-slp-vectorization -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=NOSLP
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Make sure we can disable slp vectorization in opt.
+
+; SLP-LABEL: test1
+; SLP: store <2 x double>
+
+; NOSLP-LABEL: test1
+; NOSLP-NOT: store <2 x double>
+
+
+define void @test1(double* %a, double* %b, double* %c) {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/ordering.ll b/test/Transforms/SLPVectorizer/X86/ordering.ll
index 588e115..d2ecd45 100644
--- a/test/Transforms/SLPVectorizer/X86/ordering.ll
+++ b/test/Transforms/SLPVectorizer/X86/ordering.ll
@@ -17,3 +17,65 @@ entry:
   %cmp11 = fcmp olt double %add, 0.000000e+00
   ret void
 }
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
+declare i32 @personality_v0(...)
+
+define void @invoketest() {
+entry:
+  br i1 undef, label %cond.true, label %cond.false
+
+cond.true:
+  %call49 = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef) 
+          to label %cond.true54 unwind label %lpad
+
+cond.false:
+  %call51 = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %cond.false57 unwind label %lpad
+
+cond.true54:
+  %call56 = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef) 
+          to label %cond.end60 unwind label %lpad
+
+cond.false57:
+  %call59 = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %cond.end60 unwind label %lpad
+
+; Make sure we don't vectorize these phis - they have invokes as inputs.
+
+; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
+
+; CHECK-LABEL: invoketest
+
+; CHECK-LABEL: cond.end60
+; CHECK-NEXT-NOT: phi <2 x double>
+; CHECK: insertelement
+; CHECK-LABEL: if.then63
+
+cond.end60:
+  %cond126 = phi double [ %call49, %cond.true54 ], [ %call51, %cond.false57 ]
+  %cond61 = phi double [ %call56, %cond.true54 ], [ %call59, %cond.false57 ]
+  br i1 undef, label %if.end98, label %if.then63
+
+if.then63:
+  %conv69 = fptrunc double undef to float
+  %conv70 = fpext float %conv69 to double
+  %div71 = fdiv double %cond126, %conv70
+  %conv78 = fptrunc double undef to float
+  %conv79 = fpext float %conv78 to double
+  %div80 = fdiv double %cond61, %conv79
+  br label %if.end98
+
+lpad:
+  %l = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @personality_v0 to i8*)
+          cleanup
+  resume { i8*, i32 } %l
+
+if.end98:
+  %dimensionsResult.sroa.0.0 = phi double [ %div71, %if.then63 ], [ %cond126, %cond.end60 ]
+  %dimensionsResult.sroa.6.0 = phi double [ %div80, %if.then63 ], [ %cond61, %cond.end60 ]
+  br label %if.end99
+
+if.end99:
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/phi.ll b/test/Transforms/SLPVectorizer/X86/phi.ll
index 1c7f9cc..964e0e4 100644
--- a/test/Transforms/SLPVectorizer/X86/phi.ll
+++ b/test/Transforms/SLPVectorizer/X86/phi.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-100 -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
 target triple = "i386-apple-macosx10.9.0"
@@ -95,3 +95,154 @@ for.end:                                          ; preds = %for.body
   ret i32 0
 }
 
+; float foo3(float *A) {
+;
+;   float R = A[0];
+;   float G = A[1];
+;   float B = A[2];
+;   float Y = A[3];
+;   float P = A[4];
+;   for (int i=0; i < 121; i+=3) {
+;     R+=A[i+0]*7;
+;     G+=A[i+1]*8;
+;     B+=A[i+2]*9;
+;     Y+=A[i+3]*10;
+;     P+=A[i+4]*11;
+;   }
+;
+;   return R+G+B+Y+P;
+; }
+
+;CHECK: foo3
+;CHECK: phi <4 x float>
+;CHECK: fmul <4 x float>
+;CHECK: fadd <4 x float>
+;CHECK-NOT: phi <5 x float>
+;CHECK-NOT: fmul <5 x float>
+;CHECK-NOT: fadd <5 x float>
+
+define float @foo3(float* nocapture readonly %A) #0 {
+entry:
+  %0 = load float* %A, align 4
+  %arrayidx1 = getelementptr inbounds float* %A, i64 1
+  %1 = load float* %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds float* %A, i64 2
+  %2 = load float* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float* %A, i64 3
+  %3 = load float* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds float* %A, i64 4
+  %4 = load float* %arrayidx4, align 4
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %P.056 = phi float [ %4, %entry ], [ %add26, %for.body ]
+  %Y.055 = phi float [ %3, %entry ], [ %add21, %for.body ]
+  %B.054 = phi float [ %2, %entry ], [ %add16, %for.body ]
+  %G.053 = phi float [ %1, %entry ], [ %add11, %for.body ]
+  %R.052 = phi float [ %0, %entry ], [ %add6, %for.body ]
+  %5 = phi float [ %1, %entry ], [ %11, %for.body ]
+  %6 = phi float [ %0, %entry ], [ %9, %for.body ]
+  %mul = fmul float %6, 7.000000e+00
+  %add6 = fadd float %R.052, %mul
+  %mul10 = fmul float %5, 8.000000e+00
+  %add11 = fadd float %G.053, %mul10
+  %7 = add nsw i64 %indvars.iv, 2
+  %arrayidx14 = getelementptr inbounds float* %A, i64 %7
+  %8 = load float* %arrayidx14, align 4
+  %mul15 = fmul float %8, 9.000000e+00
+  %add16 = fadd float %B.054, %mul15
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 3
+  %arrayidx19 = getelementptr inbounds float* %A, i64 %indvars.iv.next
+  %9 = load float* %arrayidx19, align 4
+  %mul20 = fmul float %9, 1.000000e+01
+  %add21 = fadd float %Y.055, %mul20
+  %10 = add nsw i64 %indvars.iv, 4
+  %arrayidx24 = getelementptr inbounds float* %A, i64 %10
+  %11 = load float* %arrayidx24, align 4
+  %mul25 = fmul float %11, 1.100000e+01
+  %add26 = fadd float %P.056, %mul25
+  %12 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %12, 121
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add28 = fadd float %add6, %add11
+  %add29 = fadd float %add28, %add16
+  %add30 = fadd float %add29, %add21
+  %add31 = fadd float %add30, %add26
+  ret float %add31
+}
+
+; Make sure the order of phi nodes of different types does not prevent
+; vectorization of same typed phi nodes.
+; CHECK-LABEL: sort_phi_type
+; CHECK: phi <4 x float>
+; CHECK: fmul <4 x float>
+
+define float @sort_phi_type(float* nocapture readonly %A) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %Y = phi float [ 1.000000e+01, %entry ], [ %mul10, %for.body ]
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %B = phi float [ 1.000000e+01, %entry ], [ %mul15, %for.body ]
+  %G = phi float [ 1.000000e+01, %entry ], [ %mul20, %for.body ]
+  %R = phi float [ 1.000000e+01, %entry ], [ %mul25, %for.body ]
+  %mul10 = fmul float %Y, 8.000000e+00
+  %mul15 = fmul float %B, 9.000000e+00
+  %mul20 = fmul float %R, 10.000000e+01
+  %mul25 = fmul float %G, 11.100000e+01
+  %indvars.iv.next = add nsw i64 %indvars.iv, 4
+  %cmp = icmp slt i64 %indvars.iv.next, 128
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add28 = fadd float 1.000000e+01, %mul10
+  %add29 = fadd float %mul10, %mul15
+  %add30 = fadd float %add29, %mul20
+  %add31 = fadd float %add30, %mul25
+  ret float %add31
+}
+
+define void @test(x86_fp80* %i1, x86_fp80* %i2, x86_fp80* %o) {
+; CHECK-LABEL: @test(
+;
+; Test that we correctly recognize the discontiguous memory in arrays where the
+; size is less than the alignment, and through various different GEP formations.
+;
+; We disable the vectorization of x86_fp80 for now. 
+
+entry:
+  %i1.0 = load x86_fp80* %i1, align 16
+  %i1.gep1 = getelementptr x86_fp80* %i1, i64 1
+  %i1.1 = load x86_fp80* %i1.gep1, align 16
+; CHECK: load x86_fp80*
+; CHECK: load x86_fp80*
+; CHECK-NOT: insertelement <2 x x86_fp80>
+; CHECK_NOT: insertelement <2 x x86_fp80>
+  br i1 undef, label %then, label %end
+
+then:
+  %i2.gep0 = getelementptr inbounds x86_fp80* %i2, i64 0
+  %i2.0 = load x86_fp80* %i2.gep0, align 16
+  %i2.gep1 = getelementptr inbounds x86_fp80* %i2, i64 1
+  %i2.1 = load x86_fp80* %i2.gep1, align 16
+; CHECK: load x86_fp80*
+; CHECK: load x86_fp80*
+; CHECK-NOT: insertelement <2 x x86_fp80>
+; CHECK-NOT: insertelement <2 x x86_fp80>
+  br label %end
+
+end:
+  %phi0 = phi x86_fp80 [ %i1.0, %entry ], [ %i2.0, %then ]
+  %phi1 = phi x86_fp80 [ %i1.1, %entry ], [ %i2.1, %then ]
+; CHECK-NOT: phi <2 x x86_fp80>
+; CHECK-NOT: extractelement <2 x x86_fp80>
+; CHECK-NOT: extractelement <2 x x86_fp80>
+  store x86_fp80 %phi0, x86_fp80* %o, align 16
+  %o.gep1 = getelementptr inbounds x86_fp80* %o, i64 1
+  store x86_fp80 %phi1, x86_fp80* %o.gep1, align 16
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll b/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll
new file mode 100644
index 0000000..6d2d5e3
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -slp-vectorizer -mtriple=x86_64-apple-macosx10.9.0 -disable-output
+
+target datalayout = "f64:64:64-v64:64:64"
+
+define void @test_phi_in_landingpad() {
+entry:
+  invoke void @foo()
+          to label %inner unwind label %lpad
+
+inner:
+  %x0 = fsub double undef, undef
+  %y0 = fsub double undef, undef
+  invoke void @foo()
+          to label %done unwind label %lpad
+
+lpad:
+  %x1 = phi double [ undef, %entry ], [ undef, %inner ]
+  %y1 = phi double [ undef, %entry ], [ undef, %inner ]
+  landingpad { i8*, i32 } personality i8*
+          bitcast (i32 (...)* @__gxx_personality_v0 to i8*) catch i8* null
+  br label %done
+
+done:
+  phi double [ %x0, %inner ], [ %x1, %lpad ]
+  phi double [ %y0, %inner ], [ %y1, %lpad ]
+  ret void
+}
+
+declare void @foo()
+
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll b/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll
new file mode 100644
index 0000000..520e672
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-100 -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+; We purposely over-align f64 to 128bit here. 
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:128:128-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.9.0"
+
+
+define void @test(double* %i1, double* %i2, double* %o) {
+; CHECK-LABEL: @test(
+;
+; Test that we correctly recognize the discontiguous memory in arrays where the
+; size is less than the alignment, and through various different GEP formations.
+
+entry:
+  %i1.0 = load double* %i1, align 16
+  %i1.gep1 = getelementptr double* %i1, i64 1
+  %i1.1 = load double* %i1.gep1, align 16
+; CHECK: load double*
+; CHECK: load double*
+; CHECK: insertelement <2 x double>
+; CHECK: insertelement <2 x double>
+  br i1 undef, label %then, label %end
+
+then:
+  %i2.gep0 = getelementptr inbounds double* %i2, i64 0
+  %i2.0 = load double* %i2.gep0, align 16
+  %i2.gep1 = getelementptr inbounds double* %i2, i64 1
+  %i2.1 = load double* %i2.gep1, align 16
+; CHECK: load double*
+; CHECK: load double*
+; CHECK: insertelement <2 x double>
+; CHECK: insertelement <2 x double>
+  br label %end
+
+end:
+  %phi0 = phi double [ %i1.0, %entry ], [ %i2.0, %then ]
+  %phi1 = phi double [ %i1.1, %entry ], [ %i2.1, %then ]
+; CHECK: phi <2 x double>
+; CHECK: extractelement <2 x double>
+; CHECK: extractelement <2 x double>
+  store double %phi0, double* %o, align 16
+  %o.gep1 = getelementptr inbounds double* %o, i64 1
+  store double %phi1, double* %o.gep1, align 16
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/pr16899.ll b/test/Transforms/SLPVectorizer/X86/pr16899.ll
new file mode 100644
index 0000000..8631bc9
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/pr16899.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s  -slp-vectorizer -S -mtriple=i386--netbsd -mcpu=i486
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386--netbsd"
+
+@a = common global i32* null, align 4
+
+; Function Attrs: noreturn nounwind readonly
+define i32 @fn1() #0 {
+entry:
+  %0 = load i32** @a, align 4, !tbaa !4
+  %1 = load i32* %0, align 4, !tbaa !5
+  %arrayidx1 = getelementptr inbounds i32* %0, i32 1
+  %2 = load i32* %arrayidx1, align 4, !tbaa !5
+  br label %do.body
+
+do.body:                                          ; preds = %do.body, %entry
+  %c.0 = phi i32 [ %2, %entry ], [ %add2, %do.body ]
+  %b.0 = phi i32 [ %1, %entry ], [ %add, %do.body ]
+  %add = add nsw i32 %b.0, %c.0
+  %add2 = add nsw i32 %add, 1
+  br label %do.body
+}
+
+attributes #0 = { noreturn nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"int", metadata !1}
+!4 = metadata !{metadata !0, metadata !0, i64 0}
+!5 = metadata !{metadata !3, metadata !3, i64 0}
diff --git a/test/Transforms/SLPVectorizer/X86/pr18060.ll b/test/Transforms/SLPVectorizer/X86/pr18060.ll
new file mode 100644
index 0000000..e6813f3
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/pr18060.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -slp-vectorizer -S -mtriple=i386-pc-linux
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-pc-linux"
+
+; Function Attrs: nounwind
+define i32 @_Z16adjustFixupValueyj(i64 %Value, i32 %Kind) {
+entry:
+  %extract.t = trunc i64 %Value to i32
+  %extract = lshr i64 %Value, 12
+  %extract.t6 = trunc i64 %extract to i32
+  switch i32 %Kind, label %sw.default [
+    i32 0, label %return
+    i32 1, label %return
+    i32 129, label %sw.bb1
+    i32 130, label %sw.bb2
+  ]
+
+sw.default:                                       ; preds = %entry
+  call void @_Z25llvm_unreachable_internalv()
+  unreachable
+
+sw.bb1:                                           ; preds = %entry
+  %shr = lshr i64 %Value, 16
+  %extract.t5 = trunc i64 %shr to i32
+  %extract7 = lshr i64 %Value, 28
+  %extract.t8 = trunc i64 %extract7 to i32
+  br label %sw.bb2
+
+sw.bb2:                                           ; preds = %sw.bb1, %entry
+  %Value.addr.0.off0 = phi i32 [ %extract.t, %entry ], [ %extract.t5, %sw.bb1 ]
+  %Value.addr.0.off12 = phi i32 [ %extract.t6, %entry ], [ %extract.t8, %sw.bb1 ]
+  %conv6 = and i32 %Value.addr.0.off0, 4095
+  %conv4 = shl i32 %Value.addr.0.off12, 16
+  %shl = and i32 %conv4, 983040
+  %or = or i32 %shl, %conv6
+  %or11 = or i32 %or, 8388608
+  br label %return
+
+return:                                           ; preds = %sw.bb2, %entry, %entry
+  %retval.0 = phi i32 [ %or11, %sw.bb2 ], [ %extract.t, %entry ], [ %extract.t, %entry ]
+  ret i32 %retval.0
+}
+
+; Function Attrs: noreturn
+declare void @_Z25llvm_unreachable_internalv()
+
diff --git a/test/Transforms/SLPVectorizer/X86/rgb_phi.ll b/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
index 3235fd9..6aea5d3 100644
--- a/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
+++ b/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
@@ -3,6 +3,8 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
 target triple = "i386-apple-macosx10.9.0"
 
+; We disable the vectorization of <3 x float> for now
+
 ; float foo(float *A) {
 ;
 ;   float R = A[0];
@@ -19,14 +21,14 @@ target triple = "i386-apple-macosx10.9.0"
 
 ;CHECK-LABEL: @foo(
 ;CHECK: br
-;CHECK: phi <3 x float>
-;CHECK: fmul <3 x float>
-;CHECK: fadd <3 x float>
+;CHECK-NOT: phi <3 x float>
+;CHECK-NOT: fmul <3 x float>
+;CHECK-NOT: fadd <3 x float>
 ; At the moment we don't sink extractelements.
 ;CHECK: br
-;CHECK: extractelement
-;CHECK: extractelement
-;CHECK: extractelement
+;CHECK-NOT: extractelement
+;CHECK-NOT: extractelement
+;CHECK-NOT: extractelement
 ;CHECK: ret
 
 define float @foo(float* nocapture readonly %A) {
diff --git a/test/Transforms/SLPVectorizer/X86/simplebb.ll b/test/Transforms/SLPVectorizer/X86/simplebb.ll
index cd0b99e..7d682e5 100644
--- a/test/Transforms/SLPVectorizer/X86/simplebb.ll
+++ b/test/Transforms/SLPVectorizer/X86/simplebb.ll
@@ -23,3 +23,67 @@ entry:
   ret void
 }
 
+; Simple 3-pair chain with loads and stores, obfuscated with bitcasts
+; CHECK: test2
+; CHECK: store <2 x double>
+; CHECK: ret
+define void @test2(double* %a, double* %b, i8* %e) {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  %c = bitcast i8* %e to double*
+  store double %mul, double* %c, align 8
+  %carrayidx5 = getelementptr inbounds i8* %e, i64 8
+  %arrayidx5 = bitcast i8* %carrayidx5 to double*
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+}
+
+; Don't vectorize volatile loads.
+; CHECK: test_volatile_load
+; CHECK-NOT: load <2 x double>
+; CHECK: store <2 x double>
+; CHECK: ret
+define void @test_volatile_load(double* %a, double* %b, double* %c) {
+entry:
+  %i0 = load volatile double* %a, align 8
+  %i1 = load volatile double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+}
+
+; Don't vectorize volatile stores.
+; CHECK: test_volatile_store
+; CHECK-NOT: store <2 x double>
+; CHECK: ret
+define void @test_volatile_store(double* %a, double* %b, double* %c) {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store volatile double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store volatile double %mul5, double* %arrayidx5, align 8
+  ret void
+}
+
+
diff --git a/test/Transforms/SLPVectorizer/X86/tiny-tree.ll b/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
new file mode 100644
index 0000000..2747a1f
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
@@ -0,0 +1,140 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
+
+
+; CHECK: tiny_tree_fully_vectorizable
+; CHECK: load <2 x double>
+; CHECK: store <2 x double>
+; CHECK: ret 
+
+define void @tiny_tree_fully_vectorizable(double* noalias nocapture %dst, double* noalias nocapture readonly %src, i64 %count) #0 {
+entry:
+  %cmp12 = icmp eq i64 %count, 0
+  br i1 %cmp12, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.015 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %dst.addr.014 = phi double* [ %add.ptr4, %for.body ], [ %dst, %entry ]
+  %src.addr.013 = phi double* [ %add.ptr, %for.body ], [ %src, %entry ]
+  %0 = load double* %src.addr.013, align 8
+  store double %0, double* %dst.addr.014, align 8
+  %arrayidx2 = getelementptr inbounds double* %src.addr.013, i64 1
+  %1 = load double* %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds double* %dst.addr.014, i64 1
+  store double %1, double* %arrayidx3, align 8
+  %add.ptr = getelementptr inbounds double* %src.addr.013, i64 %i.015
+  %add.ptr4 = getelementptr inbounds double* %dst.addr.014, i64 %i.015
+  %inc = add i64 %i.015, 1
+  %exitcond = icmp eq i64 %inc, %count
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; CHECK: tiny_tree_fully_vectorizable2
+; CHECK: load <4 x float>
+; CHECK: store <4 x float>
+; CHECK: ret
+
+define void @tiny_tree_fully_vectorizable2(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %count) #0 {
+entry:
+  %cmp20 = icmp eq i64 %count, 0
+  br i1 %cmp20, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.023 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %dst.addr.022 = phi float* [ %add.ptr8, %for.body ], [ %dst, %entry ]
+  %src.addr.021 = phi float* [ %add.ptr, %for.body ], [ %src, %entry ]
+  %0 = load float* %src.addr.021, align 4
+  store float %0, float* %dst.addr.022, align 4
+  %arrayidx2 = getelementptr inbounds float* %src.addr.021, i64 1
+  %1 = load float* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float* %dst.addr.022, i64 1
+  store float %1, float* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds float* %src.addr.021, i64 2
+  %2 = load float* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds float* %dst.addr.022, i64 2
+  store float %2, float* %arrayidx5, align 4
+  %arrayidx6 = getelementptr inbounds float* %src.addr.021, i64 3
+  %3 = load float* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds float* %dst.addr.022, i64 3
+  store float %3, float* %arrayidx7, align 4
+  %add.ptr = getelementptr inbounds float* %src.addr.021, i64 %i.023
+  %add.ptr8 = getelementptr inbounds float* %dst.addr.022, i64 %i.023
+  %inc = add i64 %i.023, 1
+  %exitcond = icmp eq i64 %inc, %count
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; We do not vectorize the tiny tree which is not fully vectorizable. 
+; CHECK: tiny_tree_not_fully_vectorizable
+; CHECK-NOT: <2 x double>
+; CHECK: ret 
+
+define void @tiny_tree_not_fully_vectorizable(double* noalias nocapture %dst, double* noalias nocapture readonly %src, i64 %count) #0 {
+entry:
+  %cmp12 = icmp eq i64 %count, 0
+  br i1 %cmp12, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.015 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %dst.addr.014 = phi double* [ %add.ptr4, %for.body ], [ %dst, %entry ]
+  %src.addr.013 = phi double* [ %add.ptr, %for.body ], [ %src, %entry ]
+  %0 = load double* %src.addr.013, align 8
+  store double %0, double* %dst.addr.014, align 8
+  %arrayidx2 = getelementptr inbounds double* %src.addr.013, i64 2
+  %1 = load double* %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds double* %dst.addr.014, i64 1 
+  store double %1, double* %arrayidx3, align 8
+  %add.ptr = getelementptr inbounds double* %src.addr.013, i64 %i.015
+  %add.ptr4 = getelementptr inbounds double* %dst.addr.014, i64 %i.015
+  %inc = add i64 %i.015, 1
+  %exitcond = icmp eq i64 %inc, %count
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+; CHECK: tiny_tree_not_fully_vectorizable2
+; CHECK-NOT: <2 x double>
+; CHECK: ret
+
+define void @tiny_tree_not_fully_vectorizable2(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %count) #0 {
+entry:
+  %cmp20 = icmp eq i64 %count, 0
+  br i1 %cmp20, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.023 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %dst.addr.022 = phi float* [ %add.ptr8, %for.body ], [ %dst, %entry ]
+  %src.addr.021 = phi float* [ %add.ptr, %for.body ], [ %src, %entry ]
+  %0 = load float* %src.addr.021, align 4
+  store float %0, float* %dst.addr.022, align 4
+  %arrayidx2 = getelementptr inbounds float* %src.addr.021, i64 4 
+  %1 = load float* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float* %dst.addr.022, i64 1
+  store float %1, float* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds float* %src.addr.021, i64 2
+  %2 = load float* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds float* %dst.addr.022, i64 2
+  store float %2, float* %arrayidx5, align 4
+  %arrayidx6 = getelementptr inbounds float* %src.addr.021, i64 3
+  %3 = load float* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds float* %dst.addr.022, i64 3
+  store float %3, float* %arrayidx7, align 4
+  %add.ptr = getelementptr inbounds float* %src.addr.021, i64 %i.023
+  %add.ptr8 = getelementptr inbounds float* %dst.addr.022, i64 %i.023
+  %inc = add i64 %i.023, 1
+  %exitcond = icmp eq i64 %inc, %count
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/XCore/lit.local.cfg b/test/Transforms/SLPVectorizer/XCore/lit.local.cfg
new file mode 100644
index 0000000..4d17d46
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/XCore/lit.local.cfg
@@ -0,0 +1,3 @@
+targets = set(config.root.targets_to_build.split())
+if not 'XCore' in targets:
+    config.unsupported = True
diff --git a/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll b/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll
new file mode 100644
index 0000000..66392e7
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=xcore  | FileCheck %s
+
+target datalayout = "e-p:32:32:32-a0:0:32-n32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f16:16:32-f32:32:32-f64:32:32"
+target triple = "xcore"
+
+; Simple 3-pair chain with loads and stores
+; CHECK: test1
+; CHECK-NOT: <2 x double>
+define void @test1(double* %a, double* %b, double* %c) {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+}
+
diff --git a/test/Transforms/SLPVectorizer/lit.local.cfg b/test/Transforms/SLPVectorizer/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/SLPVectorizer/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index 458b0df..5d3e4b5 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -1181,7 +1181,6 @@ entry:
   store i1 %x, i1* %b.i1, align 8
   %b.i8 = bitcast <{ i1 }>* %b to i8*
   %foo = load i8* %b.i8, align 1
-; CHECK-NEXT: {{.*}} = zext i1 %x to i8
 ; CHECK-NEXT: %[[ext:.*]] = zext i1 %x to i8
 ; CHECK-NEXT: store i8 %[[ext]], i8* %[[a]], align 8
 ; CHECK-NEXT: {{.*}} = load i8* %[[a]], align 8
diff --git a/test/Transforms/SROA/lit.local.cfg b/test/Transforms/SROA/lit.local.cfg
deleted file mode 100644
index c6106e4..0000000
--- a/test/Transforms/SROA/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/SROA/vector-conversion.ll b/test/Transforms/SROA/vector-conversion.ll
new file mode 100644
index 0000000..08d7960
--- /dev/null
+++ b/test/Transforms/SROA/vector-conversion.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+define <4 x i64> @vector_ptrtoint({<2 x i32*>, <2 x i32*>} %x) {
+; CHECK-LABEL: @vector_ptrtoint
+  %a = alloca {<2 x i32*>, <2 x i32*>}
+; CHECK-NOT: alloca
+
+  store {<2 x i32*>, <2 x i32*>} %x, {<2 x i32*>, <2 x i32*>}* %a
+; CHECK-NOT: store
+
+  %cast = bitcast {<2 x i32*>, <2 x i32*>}* %a to <4 x i64>*
+  %vec = load <4 x i64>* %cast
+; CHECK-NOT: load
+; CHECK: ptrtoint
+
+  ret <4 x i64> %vec
+}
+
+define <4 x i32*> @vector_inttoptr({<2 x i64>, <2 x i64>} %x) {
+; CHECK-LABEL: @vector_inttoptr
+  %a = alloca {<2 x i64>, <2 x i64>}
+; CHECK-NOT: alloca
+
+  store {<2 x i64>, <2 x i64>} %x, {<2 x i64>, <2 x i64>}* %a
+; CHECK-NOT: store
+
+  %cast = bitcast {<2 x i64>, <2 x i64>}* %a to <4 x i32*>*
+  %vec = load <4 x i32*>* %cast
+; CHECK-NOT: load
+; CHECK: inttoptr
+
+  ret <4 x i32*> %vec
+}
+
+define <2 x i64> @vector_ptrtointbitcast({<1 x i32*>, <1 x i32*>} %x) {
+; CHECK-LABEL: @vector_ptrtointbitcast
+  %a = alloca {<1 x i32*>, <1 x i32*>}
+; CHECK-NOT: alloca
+
+  store {<1 x i32*>, <1 x i32*>} %x, {<1 x i32*>, <1 x i32*>}* %a
+; CHECK-NOT: store
+
+  %cast = bitcast {<1 x i32*>, <1 x i32*>}* %a to <2 x i64>*
+  %vec = load <2 x i64>* %cast
+; CHECK-NOT: load
+; CHECK: ptrtoint
+; CHECK: bitcast
+; CHECK: ptrtoint
+; CHECK: bitcast
+
+  ret <2 x i64> %vec
+}
diff --git a/test/Transforms/SampleProfile/Inputs/branch.prof b/test/Transforms/SampleProfile/Inputs/branch.prof
new file mode 100644
index 0000000..d19894d
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/branch.prof
@@ -0,0 +1,11 @@
+symbol table
+1
+main
+main:15680:0:7
+0: 0
+4: 0
+7: 0
+9: 10226
+10: 2243
+16: 0
+18: 0
diff --git a/test/Transforms/SampleProfile/branch.ll b/test/Transforms/SampleProfile/branch.ll
new file mode 100644
index 0000000..5167627
--- /dev/null
+++ b/test/Transforms/SampleProfile/branch.ll
@@ -0,0 +1,143 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/branch.prof | opt -analyze -branch-prob | FileCheck %s
+
+; Original C++ code for this test case:
+;
+; #include <stdio.h>
+; #include <stdlib.h>
+;
+; int main(int argc, char *argv[]) {
+;   if (argc < 2)
+;     return 1;
+;   double result;
+;   int limit = atoi(argv[1]);
+;   if (limit > 100) {
+;     double s = 23.041968;
+;     for (int u = 0; u < limit; u++) {
+;       double x = s;
+;       s = x + 3.049 + (double)u;
+;       s -= s + 3.94 / x * 0.32;
+;     }
+;     result = s;
+;   } else {
+;     result = 0;
+;   }
+;   printf("result is %lf\n", result);
+;   return 0;
+; }
+
+@.str = private unnamed_addr constant [15 x i8] c"result is %lf\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define i32 @main(i32 %argc, i8** nocapture readonly %argv) #0 {
+; CHECK: Printing analysis 'Branch Probability Analysis' for function 'main':
+
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !13), !dbg !27
+  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !14), !dbg !27
+  %cmp = icmp slt i32 %argc, 2, !dbg !28
+  br i1 %cmp, label %return, label %if.end, !dbg !28
+; CHECK: edge entry -> return probability is 1 / 2 = 50%
+; CHECK: edge entry -> if.end probability is 1 / 2 = 50%
+
+if.end:                                           ; preds = %entry
+  %arrayidx = getelementptr inbounds i8** %argv, i64 1, !dbg !30
+  %0 = load i8** %arrayidx, align 8, !dbg !30, !tbaa !31
+  %call = tail call i32 @atoi(i8* %0) #4, !dbg !30
+  tail call void @llvm.dbg.value(metadata !{i32 %call}, i64 0, metadata !17), !dbg !30
+  %cmp1 = icmp sgt i32 %call, 100, !dbg !35
+  br i1 %cmp1, label %for.body, label %if.end6, !dbg !35
+; CHECK: edge if.end -> for.body probability is 2243 / 2244 = 99.9554% [HOT edge]
+; CHECK: edge if.end -> if.end6 probability is 1 / 2244 = 0.0445633%
+
+for.body:                                         ; preds = %if.end, %for.body
+  %u.016 = phi i32 [ %inc, %for.body ], [ 0, %if.end ]
+  %s.015 = phi double [ %sub, %for.body ], [ 0x40370ABE6A337A81, %if.end ]
+  %add = fadd double %s.015, 3.049000e+00, !dbg !36
+  %conv = sitofp i32 %u.016 to double, !dbg !36
+  %add4 = fadd double %add, %conv, !dbg !36
+  tail call void @llvm.dbg.value(metadata !{double %add4}, i64 0, metadata !18), !dbg !36
+  %div = fdiv double 3.940000e+00, %s.015, !dbg !37
+  %mul = fmul double %div, 3.200000e-01, !dbg !37
+  %add5 = fadd double %add4, %mul, !dbg !37
+  %sub = fsub double %add4, %add5, !dbg !37
+  tail call void @llvm.dbg.value(metadata !{double %sub}, i64 0, metadata !18), !dbg !37
+  %inc = add nsw i32 %u.016, 1, !dbg !38
+  tail call void @llvm.dbg.value(metadata !{i32 %inc}, i64 0, metadata !21), !dbg !38
+  %exitcond = icmp eq i32 %inc, %call, !dbg !38
+  br i1 %exitcond, label %if.end6, label %for.body, !dbg !38
+; CHECK: edge for.body -> if.end6 probability is 1 / 2244 = 0.0445633%
+; CHECK: edge for.body -> for.body probability is 2243 / 2244 = 99.9554% [HOT edge]
+
+if.end6:                                          ; preds = %for.body, %if.end
+  %result.0 = phi double [ 0.000000e+00, %if.end ], [ %sub, %for.body ]
+  %call7 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str, i64 0, i64 0), double %result.0), !dbg !39
+  br label %return, !dbg !40
+; CHECK: edge if.end6 -> return probability is 16 / 16 = 100% [HOT edge]
+
+return:                                           ; preds = %entry, %if.end6
+  %retval.0 = phi i32 [ 0, %if.end6 ], [ 1, %entry ]
+  ret i32 %retval.0, !dbg !41
+}
+
+; Function Attrs: nounwind readonly
+declare i32 @atoi(i8* nocapture) #1
+
+; Function Attrs: nounwind
+declare i32 @printf(i8* nocapture readonly, ...) #2
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #3
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind readnone }
+attributes #4 = { nounwind readonly }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!25, !42}
+!llvm.ident = !{!26}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (trunk 192896) (llvm/trunk 192895)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [./branch.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"branch.cc", metadata !"."}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 4, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !12, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [main]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [./branch.cc]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8, metadata !8, metadata !9}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
+!11 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!12 = metadata !{metadata !13, metadata !14, metadata !15, metadata !17, metadata !18, metadata !21, metadata !23}
+!13 = metadata !{i32 786689, metadata !4, metadata !"argc", metadata !5, i32 16777220, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argc] [line 4]
+!14 = metadata !{i32 786689, metadata !4, metadata !"argv", metadata !5, i32 33554436, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 4]
+!15 = metadata !{i32 786688, metadata !4, metadata !"result", metadata !5, i32 7, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [result] [line 7]
+!16 = metadata !{i32 786468, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
+!17 = metadata !{i32 786688, metadata !4, metadata !"limit", metadata !5, i32 8, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [limit] [line 8]
+!18 = metadata !{i32 786688, metadata !19, metadata !"s", metadata !5, i32 10, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [s] [line 10]
+!19 = metadata !{i32 786443, metadata !1, metadata !20, i32 9, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!20 = metadata !{i32 786443, metadata !1, metadata !4, i32 9, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!21 = metadata !{i32 786688, metadata !22, metadata !"u", metadata !5, i32 11, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [u] [line 11]
+!22 = metadata !{i32 786443, metadata !1, metadata !19, i32 11, i32 0, i32 3} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!23 = metadata !{i32 786688, metadata !24, metadata !"x", metadata !5, i32 12, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [x] [line 12]
+!24 = metadata !{i32 786443, metadata !1, metadata !22, i32 11, i32 0, i32 4} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!25 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!26 = metadata !{metadata !"clang version 3.4 (trunk 192896) (llvm/trunk 192895)"}
+!27 = metadata !{i32 4, i32 0, metadata !4, null}
+!28 = metadata !{i32 5, i32 0, metadata !29, null}
+!29 = metadata !{i32 786443, metadata !1, metadata !4, i32 5, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!30 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
+!31 = metadata !{metadata !32, metadata !32, i64 0}
+!32 = metadata !{metadata !"any pointer", metadata !33, i64 0}
+!33 = metadata !{metadata !"omnipotent char", metadata !34, i64 0}
+!34 = metadata !{metadata !"Simple C/C++ TBAA"}
+!35 = metadata !{i32 9, i32 0, metadata !20, null}
+!36 = metadata !{i32 13, i32 0, metadata !24, null}
+!37 = metadata !{i32 14, i32 0, metadata !24, null}
+!38 = metadata !{i32 11, i32 0, metadata !22, null}
+!39 = metadata !{i32 20, i32 0, metadata !4, null}
+!40 = metadata !{i32 21, i32 0, metadata !4, null}
+!41 = metadata !{i32 22, i32 0, metadata !4, null}
+!42 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/ScalarRepl/debuginfo-preserved.ll b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
index 27e6670..71bf22a 100644
--- a/test/Transforms/ScalarRepl/debuginfo-preserved.ll
+++ b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
@@ -40,11 +40,12 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!20}
 
 !0 = metadata !{i32 786449, metadata !18, i32 12, metadata !"clang version 3.0 (trunk 131941)", i1 false, metadata !"", i32 0, metadata !19, metadata !19, metadata !17, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !18, metadata !2, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i32)* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !18, metadata !2, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i32)* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
 !2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 16777217, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
@@ -61,3 +62,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !17 = metadata !{metadata !1}
 !18 = metadata !{metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b"}
 !19 = metadata !{i32 0}
+!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/ScalarRepl/lit.local.cfg b/test/Transforms/ScalarRepl/lit.local.cfg
deleted file mode 100644
index c6106e4..0000000
--- a/test/Transforms/ScalarRepl/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/ScalarRepl/union-pointer.ll b/test/Transforms/ScalarRepl/union-pointer.ll
index 03d25ac..f0dc141 100644
--- a/test/Transforms/ScalarRepl/union-pointer.ll
+++ b/test/Transforms/ScalarRepl/union-pointer.ll
@@ -1,13 +1,16 @@
 ; PR892
-; RUN: opt < %s -scalarrepl -S | \
-; RUN:   not grep alloca
-; RUN: opt < %s -scalarrepl -S | grep "ret i8"
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
 
-target datalayout = "e-p:32:32-n8:16:32"
+
+target datalayout = "e-p:32:32-p1:16:16-n8:16:32"
 target triple = "i686-apple-darwin8.7.2"
-	%struct.Val = type { i32*, i32 }
+
+%struct.Val = type { i32*, i32 }
 
 define i8* @test(i16* %X) {
+; CHECK-LABEL: @test(
+; CHECK-NOT: alloca
+; CHECK: ret i8*
 	%X_addr = alloca i16*		; <i16**> [#uses=2]
 	store i16* %X, i16** %X_addr
 	%X_addr.upgrd.1 = bitcast i16** %X_addr to i8**		; <i8**> [#uses=1]
@@ -15,7 +18,37 @@ define i8* @test(i16* %X) {
 	ret i8* %tmp
 }
 
+define i8 addrspace(1)* @test_as1(i16 addrspace(1)* %x) {
+; CHECK-LABEL: @test_as1(
+; CHECK-NEXT: %1 = ptrtoint i16 addrspace(1)* %x to i16
+; CHECK-NEXT: %2 = inttoptr i16 %1 to i8 addrspace(1)*
+; CHECK-NEXT: ret i8 addrspace(1)* %2
+    %x_addr = alloca i16 addrspace(1)*
+	store i16 addrspace(1)* %x, i16 addrspace(1)** %x_addr
+	%x_addr.upgrd.1 = bitcast i16 addrspace(1)** %x_addr to i8 addrspace(1)**
+	%tmp = load i8 addrspace(1)** %x_addr.upgrd.1
+	ret i8 addrspace(1)* %tmp
+}
+
+define i8 addrspace(1)* @test_as1_array(i16 addrspace(1)* %x) {
+; CHECK-LABEL: @test_as1_array(
+; CHECK-NEXT: %1 = ptrtoint i16 addrspace(1)* %x to i16
+; CHECK-NEXT: %2 = inttoptr i16 %1 to i8 addrspace(1)*
+; CHECK-NEXT: ret i8 addrspace(1)* %2
+  %as_ptr_array = alloca [4 x i16 addrspace(1)*]
+  %elem1 = getelementptr [4 x i16 addrspace(1)*]* %as_ptr_array, i32 0, i32 1
+  store i16 addrspace(1)* %x, i16 addrspace(1)** %elem1
+  %elem1.cast = bitcast i16 addrspace(1)** %elem1 to i8 addrspace(1)**
+  %tmp = load i8 addrspace(1)** %elem1.cast
+  ret i8 addrspace(1)* %tmp
+}
+
+
 define void @test2(i64 %Op.0) {
+; CHECK-LABEL: @test2(
+; CHECK-NOT: alloca
+; CHECK: ret void
+
 	%tmp = alloca %struct.Val, align 8		; <%struct.Val*> [#uses=3]
 	%tmp1 = alloca %struct.Val, align 8		; <%struct.Val*> [#uses=3]
 	%tmp.upgrd.2 = call i64 @_Z3foov( )		; <i64> [#uses=1]
diff --git a/test/Transforms/SimplifyCFG/CoveredLookupTable.ll b/test/Transforms/SimplifyCFG/CoveredLookupTable.ll
new file mode 100644
index 0000000..8b45a59
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/CoveredLookupTable.ll
@@ -0,0 +1,48 @@
+; RUN: opt -simplifycfg -S %s | FileCheck %s
+; rdar://15268442
+
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin12.0.0"
+
+; CHECK-LABEL: define i3 @coveredswitch_test(
+; CHECK: entry:
+; CHECK-NEXT: sub i3 %input, -4
+; CHECK-NEXT: zext i3 %switch.tableidx to i24
+; CHECK-NEXT: mul i24 %switch.cast, 3
+; CHECK-NEXT: lshr i24 7507338, %switch.shiftamt
+; CHECK-NEXT: trunc i24 %switch.downshift to i3
+; CHECK-NEXT: ret i3 %switch.masked
+
+define i3 @coveredswitch_test(i3 %input) {
+entry:
+  switch i3 %input, label %bb8 [
+    i3 0, label %bb7
+    i3 1, label %bb
+    i3 2, label %bb3
+    i3 3, label %bb4
+    i3 4, label %bb5
+    i3 5, label %bb6
+  ]
+
+bb:                                               ; preds = %entry
+  br label %bb8
+
+bb3:                                              ; preds = %entry
+  br label %bb8
+
+bb4:                                              ; preds = %entry
+  br label %bb8
+
+bb5:                                              ; preds = %entry
+  br label %bb8
+
+bb6:                                              ; preds = %entry
+  br label %bb8
+
+bb7:                                              ; preds = %entry
+  br label %bb8
+
+bb8:                                              ; preds = %bb7, %bb6, %bb5, %bb4, %bb3, %bb, %entry
+  %result = phi i3 [ 0, %bb7 ], [ 1, %bb6 ], [ 2, %bb5 ], [ 3, %bb4 ], [ 4, %bb3 ], [ 5, %bb ], [ 6, %entry ]
+  ret i3 %result
+}
diff --git a/test/Transforms/SimplifyCFG/MagicPointer.ll b/test/Transforms/SimplifyCFG/MagicPointer.ll
index 93b9a27..b8b8cbd 100644
--- a/test/Transforms/SimplifyCFG/MagicPointer.ll
+++ b/test/Transforms/SimplifyCFG/MagicPointer.ll
@@ -2,15 +2,7 @@
 ;
 ; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
-; CHECK: switch i64 %magicptr
-; CHECK: i64 0, label
-; CHECK: i64 1, label
-; CHECK: i64 2, label
-; CHECK: i64 3, label
-; CHECK: i64 4, label
-; CHECK: }
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
 @.str = private constant [5 x i8] c"null\00"      ; <[5 x i8]*> [#uses=2]
@@ -18,7 +10,24 @@ target triple = "x86_64-apple-darwin10.0.0"
 @.str2 = private constant [4 x i8] c"two\00"      ; <[4 x i8]*> [#uses=2]
 @.str3 = private constant [5 x i8] c"four\00"     ; <[5 x i8]*> [#uses=2]
 
+@.str_as1 = private addrspace(1) constant [5 x i8] c"null\00"      ; <[5 x i8]*> [#uses=2]
+@.str1_as1 = private addrspace(1) constant [4 x i8] c"one\00"      ; <[4 x i8]*> [#uses=2]
+@.str2_as1 = private addrspace(1) constant [4 x i8] c"two\00"      ; <[4 x i8]*> [#uses=2]
+@.str3_as1 = private addrspace(1) constant [5 x i8] c"four\00"     ; <[5 x i8]*> [#uses=2]
+
+declare i32 @puts(i8*)
+declare i32 @puts_as1(i8 addrspace(1)*)
+
 define void @f(i8* %x) nounwind ssp {
+; CHECK-LABEL: @f(
+; CHECK: switch i64 %magicptr
+; CHECK: i64 0, label
+; CHECK: i64 1, label
+; CHECK: i64 2, label
+; CHECK: i64 3, label
+; CHECK: i64 4, label
+; CHECK: }
+
 entry:
   %tobool = icmp eq i8* %x, null                  ; <i1> [#uses=1]
   br i1 %tobool, label %if.then, label %if.else
@@ -72,4 +81,69 @@ if.end21:                                         ; preds = %if.end20, %if.then
   ret void
 }
 
-declare i32 @puts(i8*)
+; Is it useful to test a version where the ptrtoints are to the same
+; size?
+define void @f_as1(i8 addrspace(1)* %x) nounwind ssp {
+; CHECK-LABEL: @f_as1(
+; CHECK: ptrtoint i8 addrspace(1)* %x to i16
+; CHECK: switch i16 %magicptr
+; CHECK: i16 0, label
+; CHECK: i16 1, label
+; CHECK: i16 2, label
+; CHECK: i16 3, label
+; CHECK: i16 4, label
+; CHECK: }
+
+entry:
+  %tobool = icmp eq i8 addrspace(1)* %x, null                  ; <i1> [#uses=1]
+  br i1 %tobool, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %call = call i32 @puts_as1(i8 addrspace(1)* getelementptr inbounds ([5 x i8] addrspace(1)* @.str_as1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %if.end21
+
+if.else:                                          ; preds = %entry
+  %cmp = icmp eq i8 addrspace(1)* %x, inttoptr (i64 1 to i8 addrspace(1)*)  ; <i1> [#uses=1]
+  br i1 %cmp, label %if.then2, label %if.else4
+
+if.then2:                                         ; preds = %if.else
+  %call3 = call i32 @puts_as1(i8 addrspace(1)* getelementptr inbounds ([4 x i8] addrspace(1)* @.str1_as1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %if.end20
+
+if.else4:                                         ; preds = %if.else
+  %cmp6 = icmp eq i8 addrspace(1)* %x, inttoptr (i64 2 to i8 addrspace(1)*) ; <i1> [#uses=1]
+  br i1 %cmp6, label %if.then9, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %if.else4
+  %cmp8 = icmp eq i8 addrspace(1)* %x, inttoptr (i64 3 to i8 addrspace(1)*) ; <i1> [#uses=1]
+  br i1 %cmp8, label %if.then9, label %if.else11
+
+if.then9:                                         ; preds = %lor.lhs.false, %if.else4
+  %call10 = call i32 @puts_as1(i8 addrspace(1)* getelementptr inbounds ([4 x i8] addrspace(1)* @.str2_as1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %if.end19
+
+if.else11:                                        ; preds = %lor.lhs.false
+  %cmp13 = icmp eq i8 addrspace(1)* %x, inttoptr (i64 4 to i8 addrspace(1)*) ; <i1> [#uses=1]
+  br i1 %cmp13, label %if.then14, label %if.else16
+
+if.then14:                                        ; preds = %if.else11
+  %call15 = call i32 @puts_as1(i8 addrspace(1)* getelementptr inbounds ([5 x i8] addrspace(1)* @.str3_as1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %if.end
+
+if.else16:                                        ; preds = %if.else11
+  %call18 = call i32 @puts_as1(i8 addrspace(1)* %x) nounwind       ; <i32> [#uses=0]
+  br label %if.end
+
+if.end:                                           ; preds = %if.else16, %if.then14
+  br label %if.end19
+
+if.end19:                                         ; preds = %if.end, %if.then9
+  br label %if.end20
+
+if.end20:                                         ; preds = %if.end19, %if.then2
+  br label %if.end21
+
+if.end21:                                         ; preds = %if.end20, %if.then
+  ret void
+}
+
diff --git a/test/Transforms/SimplifyCFG/R600/lit.local.cfg b/test/Transforms/SimplifyCFG/R600/lit.local.cfg
deleted file mode 100644
index e69de29..0000000
--- a/test/Transforms/SimplifyCFG/R600/lit.local.cfg
+++ /dev/null
diff --git a/test/Transforms/SimplifyCFG/R600/parallelandifcollapse.ll b/test/Transforms/SimplifyCFG/R600/parallelandifcollapse.ll
deleted file mode 100644
index e69de29..0000000
--- a/test/Transforms/SimplifyCFG/R600/parallelandifcollapse.ll
+++ /dev/null
diff --git a/test/Transforms/SimplifyCFG/R600/parallelorifcollapse.ll b/test/Transforms/SimplifyCFG/R600/parallelorifcollapse.ll
deleted file mode 100644
index e69de29..0000000
--- a/test/Transforms/SimplifyCFG/R600/parallelorifcollapse.ll
+++ /dev/null
diff --git a/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg b/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg
index 786fee9..4d344fa 100644
--- a/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg
+++ b/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'Sparc' in targets:
     config.unsupported = True
diff --git a/test/Transforms/SimplifyCFG/X86/lit.local.cfg b/test/Transforms/SimplifyCFG/X86/lit.local.cfg
index a8ad0f1..ba763cf 100644
--- a/test/Transforms/SimplifyCFG/X86/lit.local.cfg
+++ b/test/Transforms/SimplifyCFG/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index 71259c9..3687327 100644
--- a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -711,7 +711,7 @@ return:
   ret i32 %retval.0
 }
 
-define i32 @cprop(i32 %x) {
+define i32 @cprop(i32 %x, i32 %y) {
 entry:
   switch i32 %x, label %sw.default [
     i32 1, label %return
@@ -727,7 +727,8 @@ sw.bb1: br label %return
 
 sw.bb2:
   %and = and i32 %x, 1
-  %tobool = icmp ne i32 %and, 0
+  %and.ptr = inttoptr i32 %and to i8*
+  %tobool = icmp ne i8* %and.ptr, null
   %cond = select i1 %tobool, i32 -123, i32 456
   %sub = sub nsw i32 %x, %cond
   br label %return
@@ -735,13 +736,15 @@ sw.bb2:
 sw.bb3:
   %trunc = trunc i32 %x to i8
   %sext = sext i8 %trunc to i32
+  %select.i = icmp sgt i32 %sext, 0
+  %select = select i1 %select.i, i32 %sext, i32 %y
   br label %return
 
 sw.default:
   br label %return
 
 return:
-  %retval.0 = phi i32 [ 123, %sw.default ], [ %sext, %sw.bb3 ], [ %sub, %sw.bb2 ], [ 42, %sw.bb1 ], [ 5, %entry ]
+  %retval.0 = phi i32 [ 123, %sw.default ], [ %select, %sw.bb3 ], [ %sub, %sw.bb2 ], [ 42, %sw.bb1 ], [ 5, %entry ]
   ret i32 %retval.0
 
 ; CHECK-LABEL: @cprop(
diff --git a/test/Transforms/SimplifyCFG/attr-noduplicate.ll b/test/Transforms/SimplifyCFG/attr-noduplicate.ll
new file mode 100644
index 0000000..523aa51
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/attr-noduplicate.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; This test checks that the SimplifyCFG pass won't duplicate a call to a
+; function marked noduplicate.
+;
+; CHECK-LABEL: @noduplicate
+; CHECK: call void @barrier
+; CHECK-NOT: call void @barrier
+define void @noduplicate(i32 %cond, i32* %out) {
+entry:
+  %out1 = getelementptr i32* %out, i32 1
+  %out2 = getelementptr i32* %out, i32 2
+  %cmp = icmp eq i32 %cond, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i32 5, i32* %out
+  br label %if.end
+
+if.end:
+  call void @barrier() #0
+  br i1 %cmp, label %cond.end, label %cond.false
+
+cond.false:
+  store i32 5, i32* %out1
+  br label %cond.end
+
+cond.end:
+  %value = phi i32 [ 1, %cond.false ], [ 0, %if.end ]
+  store i32 %value, i32* %out2
+  ret void
+}
+
+; Function Attrs: noduplicate nounwind
+declare void @barrier() #0
+
+attributes #0 = { noduplicate nounwind }
diff --git a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
index 7fc0cbd..9d8086c 100644
--- a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
+++ b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
@@ -41,10 +41,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !15, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 231, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (i32)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 589870, metadata !15, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 231, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 231] [def] [scope 0] [foo]
 !1 = metadata !{i32 589865, metadata !15} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 589841, metadata !15, i32 12, metadata !"clang (trunk 129006)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !15, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 589845, metadata !15, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 131, i32 2, metadata !0, null}
 !6 = metadata !{i32 134, i32 2, metadata !0, null}
diff --git a/test/Transforms/SimplifyCFG/common-dest-folding.ll b/test/Transforms/SimplifyCFG/common-dest-folding.ll
new file mode 100644
index 0000000..0aa3b2c
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/common-dest-folding.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+;CHECK: @foo
+;CHECK: and i32 %c1, %k
+;CHECK: icmp eq i32
+;CHECK: and i32 %c2, %k
+;CHECK: icmp eq i32
+;CHECK: or i1
+;CHECK: ret
+define i32 @foo(i32 %k, i32 %c1, i32 %c2) {
+  %1 = and i32 %c1, %k
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %8, label %3
+
+; <label>:3                                       ; preds = %0
+  %4 = and i32 %c2, %k
+  %5 = icmp eq i32 %4, 0
+  br i1 %5, label %8, label %6
+
+; <label>:6                                       ; preds = %3
+  %7 = tail call i32 (...)* @bar() nounwind
+  br label %8
+
+; <label>:8                                       ; preds = %3, %0, %6
+  ret i32 undef
+}
+
+;CHECK: @conduse
+;CHECK: shl i32 1, %c1
+;CHECK-NEXT: shl i32 1, %c2
+;CHECK-NEXT: and i32
+;CHECK-NEXT: icmp eq i32
+;CHECK-NEXT: and i32
+;CHECK-NEXT: icmp eq i32
+;CHECK: ret
+define i32 @conduse(i32 %k, i32 %c1, i32 %c2) #0 {
+bb:
+  %tmp = shl i32 1, %c1
+  %tmp4 = shl i32 1, %c2
+  %tmp1 = and i32 %tmp, %k
+  %tmp2 = icmp eq i32 %tmp1, 0
+  br i1 %tmp2, label %bb9, label %bb3
+
+bb3:                                              ; preds = %bb
+  %tmp5 = and i32 %tmp4, %k
+  %tmp6 = icmp eq i32 %tmp5, 0
+  br i1 %tmp6, label %bb9, label %bb7
+
+bb7:                                              ; preds = %bb3
+  %tmp8 = tail call i32 (...)* @bar() #1
+  br label %bb9
+
+bb9:                                              ; preds = %bb7, %bb3, %bb
+  ret i32 undef
+}
+
+declare i32 @bar(...)
diff --git a/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll b/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
index 0e36066..0547fa9 100644
--- a/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
+++ b/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
@@ -29,12 +29,13 @@ declare i32 @bar(...)
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
+!llvm.module.flags = !{!21}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !20, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 589870, metadata !20, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
 !1 = metadata !{i32 589865, metadata !20} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 589841, metadata !20, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, metadata !8, metadata !8, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !20, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 589845, metadata !20, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 589860, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 590081, metadata !0, metadata !"i", metadata !1, i32 16777218, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
@@ -52,3 +53,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !18 = metadata !{i32 8, i32 3, metadata !17, null}
 !19 = metadata !{i32 9, i32 3, metadata !10, null}
 !20 = metadata !{metadata !"b.c", metadata !"/private/tmp"}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/SimplifyCFG/lit.local.cfg b/test/Transforms/SimplifyCFG/lit.local.cfg
deleted file mode 100644
index e69de29..0000000
--- a/test/Transforms/SimplifyCFG/lit.local.cfg
+++ /dev/null
diff --git a/test/Transforms/SimplifyCFG/switch_create.ll b/test/Transforms/SimplifyCFG/switch_create.ll
index 5500ba2..e1e9157 100644
--- a/test/Transforms/SimplifyCFG/switch_create.ll
+++ b/test/Transforms/SimplifyCFG/switch_create.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -simplifycfg -S | FileCheck %s
+; RUN: opt -S -simplifycfg < %s | FileCheck -check-prefix=CHECK %s
+; RUN: opt -S -default-data-layout="p:32:32-p1:16:16" -simplifycfg < %s | FileCheck -check-prefix=CHECK -check-prefix=DL %s
 
 declare void @foo1()
 
@@ -22,6 +23,44 @@ F:              ; preds = %0
 ; CHECK:  ]
 }
 
+define void @test1_ptr(i32* %V) {
+        %C1 = icmp eq i32* %V, inttoptr (i32 4 to i32*)
+        %C2 = icmp eq i32* %V, inttoptr (i32 17 to i32*)
+        %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
+        br i1 %CN, label %T, label %F
+T:              ; preds = %0
+        call void @foo1( )
+        ret void
+F:              ; preds = %0
+        call void @foo2( )
+        ret void
+; CHECK-LABEL: @test1_ptr(
+; DL:  %magicptr = ptrtoint i32* %V to i32
+; DL:  switch i32 %magicptr, label %F [
+; DL:    i32 17, label %T
+; DL:    i32 4, label %T
+; DL:  ]
+}
+
+define void @test1_ptr_as1(i32 addrspace(1)* %V) {
+        %C1 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 4 to i32 addrspace(1)*)
+        %C2 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 17 to i32 addrspace(1)*)
+        %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
+        br i1 %CN, label %T, label %F
+T:              ; preds = %0
+        call void @foo1( )
+        ret void
+F:              ; preds = %0
+        call void @foo2( )
+        ret void
+; CHECK-LABEL: @test1_ptr_as1(
+; DL:  %magicptr = ptrtoint i32 addrspace(1)* %V to i16
+; DL:  switch i16 %magicptr, label %F [
+; DL:    i16 17, label %T
+; DL:    i16 4, label %T
+; DL:  ]
+}
+
 define void @test2(i32 %V) {
         %C1 = icmp ne i32 %V, 4         ; <i1> [#uses=1]
         %C2 = icmp ne i32 %V, 17                ; <i1> [#uses=1]
@@ -79,7 +118,7 @@ lor.end:                                          ; preds = %lor.rhs, %lor.lhs.f
   %0 = phi i1 [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp8, %lor.rhs ]
   %lor.ext = zext i1 %0 to i32
   ret i32 %lor.ext
-  
+
 ; CHECK-LABEL: @test4(
 ; CHECK:  switch i8 %c, label %lor.rhs [
 ; CHECK:    i8 62, label %lor.end
@@ -139,7 +178,7 @@ shortcirc_done.4:               ; preds = %shortcirc_next.3, %shortcirc_next.2,
 UnifiedReturnBlock:             ; preds = %shortcirc_done.4, %shortcirc_next.4
         %UnifiedRetVal = phi i1 [ %tmp.26, %shortcirc_next.4 ], [ true, %shortcirc_done.4 ]             ; <i1> [#uses=1]
         ret i1 %UnifiedRetVal
-        
+
 ; CHECK-LABEL: @test6(
 ; CHECK: %tmp.2.i.off = add i32 %tmp.2.i, -14
 ; CHECK: %switch = icmp ult i32 %tmp.2.i.off, 6
@@ -160,7 +199,7 @@ if.then:                                          ; preds = %entry
 
 if.end:                                           ; preds = %entry
   ret void
-  
+
 ; CHECK-LABEL: @test7(
 ; CHECK:   %cmp = icmp ult i32 %x, 32
 ; CHECK:   br i1 %cmp, label %if.then, label %switch.early.test
@@ -189,7 +228,7 @@ if.then:                                          ; preds = %entry
 
 if.end:                                           ; preds = %entry
   ret i32 0
-  
+
 ; CHECK-LABEL: @test8(
 ; CHECK: switch.early.test:
 ; CHECK:   switch i8 %c, label %if.end [
@@ -245,7 +284,7 @@ lor.end:                                          ; preds = %lor.rhs, %lor.lhs.f
   %0 = phi i1 [ true, %lor.lhs.false36 ], [ true, %lor.lhs.false31 ], [ true, %lor.lhs.false26 ], [ true, %lor.lhs.false21 ], [ true, %lor.lhs.false16 ], [ true, %lor.lhs.false11 ], [ true, %lor.lhs.false6 ], [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp43, %lor.rhs ]
   %conv46 = zext i1 %0 to i32
   ret i32 %conv46
-  
+
 ; CHECK-LABEL: @test9(
 ; CHECK:   %cmp = icmp ult i8 %c, 33
 ; CHECK:   br i1 %cmp, label %lor.end, label %switch.early.test
diff --git a/test/Transforms/SimplifyCFG/trap-debugloc.ll b/test/Transforms/SimplifyCFG/trap-debugloc.ll
index 953557ff..3b449cb 100644
--- a/test/Transforms/SimplifyCFG/trap-debugloc.ll
+++ b/test/Transforms/SimplifyCFG/trap-debugloc.ll
@@ -8,15 +8,17 @@ define void @foo() nounwind ssp {
 }
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!10}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !8, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 589870, metadata !8, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 0] [foo]
 !1 = metadata !{i32 589865, metadata !8} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 589841, metadata !8, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-206.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !9, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !8, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 589845, metadata !8, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 4, i32 2, metadata !6, null}
 !6 = metadata !{i32 589835, metadata !8, metadata !0, i32 3, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
 !7 = metadata !{i32 5, i32 1, metadata !6, null}
 !8 = metadata !{metadata !"foo.c", metadata !"/private/tmp"}
 !9 = metadata !{metadata !0}
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/Sink/lit.local.cfg b/test/Transforms/Sink/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/Sink/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll b/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll
index 69febc3..438fa96 100644
--- a/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll
+++ b/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll
@@ -1,5 +1,10 @@
-; RUN: opt < %s -strip -S | grep foo | count 2
-; RUN: opt < %s -strip -S | grep bar | count 2
+; RUN: opt < %s -strip -S | FileCheck %s
+
+; CHECK: foo
+; CHECK: bar
+; CHECK: foo
+; CHECK: bar
+
 @llvm.used = appending global [2 x i8*] [ i8* bitcast (i32* @foo to i8*), i8* bitcast (i32 ()* @bar to i8*) ], section "llvm.metadata"		; <[2 x i8*]*> [#uses=0]
 @foo = internal constant i32 41		; <i32*> [#uses=1]
 
diff --git a/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll b/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
index 0181c9b..5353744 100644
--- a/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
+++ b/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
@@ -1,4 +1,6 @@
-; RUN: opt -strip-debug < %s | llvm-dis | grep -v llvm.dbg
+; RUN: opt -strip-debug < %s -S | FileCheck %s
+
+; CHECK-NOT: llvm.dbg
 
 @x = common global i32 0                          ; <i32*> [#uses=0]
 
@@ -11,6 +13,7 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!13}
 !llvm.dbg.sp = !{!0}
 !llvm.dbg.lv.foo = !{!5}
 !llvm.dbg.gv = !{!8}
@@ -18,7 +21,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !0 = metadata !{i32 524334, metadata !12, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 524329, metadata !12} ; [ DW_TAG_file_type ]
 !2 = metadata !{i32 524305, metadata !12, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !12, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 524309, metadata !12, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 524544, metadata !6, metadata !"y", metadata !1, i32 3, metadata !7} ; [ DW_TAG_auto_variable ]
 !6 = metadata !{i32 524299, metadata !12, metadata !0, i32 2, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
@@ -28,3 +31,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !10 = metadata !{i32 3, i32 0, metadata !6, null}
 !11 = metadata !{i32 4, i32 0, metadata !6, null}
 !12 = metadata !{metadata !"b.c", metadata !"/tmp"}
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/StripSymbols/2010-07-01-DeadDbgInfo.ll b/test/Transforms/StripSymbols/2010-07-01-DeadDbgInfo.ll
deleted file mode 100644
index b893410..0000000
--- a/test/Transforms/StripSymbols/2010-07-01-DeadDbgInfo.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: opt -strip-dead-debug-info < %s | llvm-dis -o %t.ll
-; RUN: grep -v bar %t.ll
-; RUN: grep -v abcd %t.ll
-
-@xyz = global i32 2                               ; <i32*> [#uses=1]
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-define i32 @fn() nounwind readnone ssp {
-entry:
-  ret i32 0, !dbg !17
-}
-
-define i32 @foo(i32 %i) nounwind readonly ssp {
-entry:
-  tail call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !14), !dbg !19
-  %.0 = load i32* @xyz, align 4                   ; <i32> [#uses=1]
-  ret i32 %.0, !dbg !20
-}
-
-!llvm.dbg.cu = !{!2}
-!llvm.dbg.sp = !{!0, !5, !9}
-!llvm.dbg.lv.bar = !{!12}
-!llvm.dbg.lv.foo = !{!14}
-!llvm.dbg.gv = !{!15, !16}
-
-!0 = metadata !{i32 524334, metadata !22, null, metadata !"bar", metadata !"bar", metadata !"", i32 5, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524329, metadata !22} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, metadata !22, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !22, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{null}
-!5 = metadata !{i32 524334, metadata !22, null, metadata !"fn", metadata !"fn", metadata !"fn", i32 6, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @fn, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 524309, metadata !22, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 524324, metadata !22, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 524334, metadata !22, null, metadata !"foo", metadata !"foo", metadata !"foo", i32 7, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 524309, metadata !22, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!11 = metadata !{metadata !8, metadata !8}
-!12 = metadata !{i32 524544, metadata !13, metadata !"bb", metadata !1, i32 5, metadata !8} ; [ DW_TAG_auto_variable ]
-!13 = metadata !{i32 524299, metadata !22, metadata !0, i32 5, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!14 = metadata !{i32 524545, metadata !9, metadata !"i", metadata !1, i32 7, metadata !8} ; [ DW_TAG_arg_variable ]
-!15 = metadata !{i32 524340, i32 0, metadata !1, metadata !"abcd", metadata !"abcd", metadata !"", metadata !1, i32 2, metadata !8, i1 true, i1 true, null} ; [ DW_TAG_variable ]
-!16 = metadata !{i32 524340, i32 0, metadata !1, metadata !"xyz", metadata !"xyz", metadata !"", metadata !1, i32 3, metadata !8, i1 false, i1 true, i32* @xyz} ; [ DW_TAG_variable ]
-!17 = metadata !{i32 6, i32 0, metadata !18, null}
-!18 = metadata !{i32 524299, metadata !22, metadata !5, i32 6, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!19 = metadata !{i32 7, i32 0, metadata !9, null}
-!20 = metadata !{i32 10, i32 0, metadata !21, null}
-!21 = metadata !{i32 524299, metadata !22, metadata !9, i32 7, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!22 = metadata !{metadata !"g.c", metadata !"/tmp/"}
diff --git a/test/Transforms/StripSymbols/2010-08-25-crash.ll b/test/Transforms/StripSymbols/2010-08-25-crash.ll
index e480f43..2878468 100644
--- a/test/Transforms/StripSymbols/2010-08-25-crash.ll
+++ b/test/Transforms/StripSymbols/2010-08-25-crash.ll
@@ -5,18 +5,20 @@ entry:
 }
 
 !llvm.dbg.cu = !{!2}
-!llvm.dbg.sp = !{!0}
-!llvm.dbg.gv = !{!6}
+!llvm.module.flags = !{!14}
 
 !0 = metadata !{i32 524334, metadata !10, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 524329, metadata !10} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, metadata !10, i32 12, metadata !"clang version 2.8 (trunk 112062)", i1 true, metadata !"", i32 0, metadata !11, metadata !11, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !10, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 524305, metadata !10, i32 12, metadata !"clang version 2.8 (trunk 112062)", i1 true, metadata !"", i32 0, metadata !11, metadata !11, metadata !12, metadata !13, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !10, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 524324, metadata !10, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 524340, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"i", metadata !1, i32 2, metadata !7, i1 true, i1 true, i32 0} ; [ DW_TAG_variable ]
+!6 = metadata !{i32 524340, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"i", metadata !1, i32 2, metadata !7, i1 true, i1 true, i32 0, null} ; [ DW_TAG_variable ]
 !7 = metadata !{i32 524326, metadata !10, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !5} ; [ DW_TAG_const_type ]
 !8 = metadata !{i32 3, i32 13, metadata !9, null}
 !9 = metadata !{i32 524299, metadata !10, metadata !0, i32 3, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
 !10 = metadata !{metadata !"/tmp/a.c", metadata !"/Volumes/Lalgate/clean/D.CW"}
 !11 = metadata !{i32 0}
+!12 = metadata !{metadata !0}
+!13 = metadata !{metadata !6}
+!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/StripSymbols/lit.local.cfg b/test/Transforms/StripSymbols/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/StripSymbols/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/StripSymbols/strip-dead-debug-info.ll b/test/Transforms/StripSymbols/strip-dead-debug-info.ll
new file mode 100644
index 0000000..2d687ae
--- /dev/null
+++ b/test/Transforms/StripSymbols/strip-dead-debug-info.ll
@@ -0,0 +1,58 @@
+; RUN: opt -strip-dead-debug-info -verify %s -S | FileCheck %s
+
+; CHECK: ModuleID = '{{.*}}'
+; CHECK-NOT: bar
+; CHECK-NOT: abcd
+
+@xyz = global i32 2
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #0
+
+; Function Attrs: nounwind readnone ssp
+define i32 @fn() #1 {
+entry:
+  ret i32 0, !dbg !18
+}
+
+; Function Attrs: nounwind readonly ssp
+define i32 @foo(i32 %i) #2 {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !15), !dbg !20
+  %.0 = load i32* @xyz, align 4
+  ret i32 %.0, !dbg !21
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind readnone ssp }
+attributes #2 = { nounwind readonly ssp }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!25}
+
+!0 = metadata !{i32 524305, metadata !1, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !23, metadata !24, null, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp//g.c] [DW_LANG_C89]
+!1 = metadata !{metadata !"g.c", metadata !"/tmp/"}
+!2 = metadata !{null}
+!3 = metadata !{i32 524334, metadata !1, null, metadata !"bar", metadata !"bar", metadata !"", i32 5, metadata !4, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [scope 0] [bar]
+!4 = metadata !{i32 524309, metadata !1, metadata !5, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = metadata !{i32 524329, metadata !1}          ; [ DW_TAG_file_type ] [/tmp//g.c]
+!6 = metadata !{i32 524334, metadata !1, null, metadata !"fn", metadata !"fn", metadata !"fn", i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @fn, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 6] [def] [scope 0] [fn]
+!7 = metadata !{i32 524309, metadata !1, metadata !5, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 524324, metadata !1, metadata !5, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 524334, metadata !1, null, metadata !"foo", metadata !"foo", metadata !"foo", i32 7, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 7] [def] [scope 0] [foo]
+!11 = metadata !{i32 524309, metadata !1, metadata !5, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{metadata !9, metadata !9}
+!13 = metadata !{i32 524544, metadata !14, metadata !"bb", metadata !5, i32 5, metadata !9}
+!14 = metadata !{i32 524299, metadata !1, metadata !3, i32 5, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp//g.c]
+!15 = metadata !{i32 524545, metadata !10, metadata !"i", metadata !5, i32 7, metadata !9}
+!16 = metadata !{i32 524340, i32 0, metadata !5, metadata !"abcd", metadata !"abcd", metadata !"", metadata !5, i32 2, metadata !9, i1 true, i1 true, null, null}
+!17 = metadata !{i32 524340, i32 0, metadata !5, metadata !"xyz", metadata !"xyz", metadata !"", metadata !5, i32 3, metadata !9, i1 false, i1 true, i32* @xyz, null}
+!18 = metadata !{i32 6, i32 0, metadata !19, null}
+!19 = metadata !{i32 524299, metadata !1, metadata !6, i32 6, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp//g.c]
+!20 = metadata !{i32 7, i32 0, metadata !10, null}
+!21 = metadata !{i32 10, i32 0, metadata !22, null}
+!22 = metadata !{i32 524299, metadata !1, metadata !10, i32 7, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp//g.c]
+!23 = metadata !{metadata !3, metadata !6, metadata !10}
+!24 = metadata !{metadata !16, metadata !17}
+!25 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/StructurizeCFG/branch-on-argument.ll b/test/Transforms/StructurizeCFG/branch-on-argument.ll
new file mode 100644
index 0000000..4eba0cd
--- /dev/null
+++ b/test/Transforms/StructurizeCFG/branch-on-argument.ll
@@ -0,0 +1,47 @@
+; RUN: opt -S -o - -structurizecfg < %s | FileCheck %s
+
+; CHECK-LABEL: @invert_branch_on_arg_inf_loop(
+; CHECK: entry:
+; CHECK: %arg.inv = xor i1 %arg, true
+; CHECK: phi i1 [ false, %Flow1 ], [ %arg.inv, %entry ]
+define void @invert_branch_on_arg_inf_loop(i32 addrspace(1)* %out, i1 %arg) {
+entry:
+  br i1 %arg, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  store i32 999, i32 addrspace(1)* %out, align 4
+  br label %for.body
+
+for.end:                                          ; preds = %Flow
+  ret void
+}
+
+
+; CHECK-LABEL: @invert_branch_on_arg_jump_into_loop(
+; CHECK: entry:
+; CHECK: %arg.inv = xor i1 %arg, true
+; CHECK: Flow:
+; CHECK: Flow1:
+define void @invert_branch_on_arg_jump_into_loop(i32 addrspace(1)* %out, i32 %n, i1 %arg) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [0, %entry], [%i.inc, %end.loop]
+  %ptr = getelementptr i32 addrspace(1)* %out, i32 %i
+  store i32 %i, i32 addrspace(1)* %ptr, align 4
+  br i1 %arg, label %mid.loop, label %end.loop
+
+mid.loop:
+  store i32 333, i32 addrspace(1)* %out, align 4
+  br label %for.end
+
+end.loop:
+  %i.inc = add i32 %i, 1
+  %cmp = icmp ne i32 %i.inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
diff --git a/test/Transforms/StructurizeCFG/lit.local.cfg b/test/Transforms/StructurizeCFG/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/StructurizeCFG/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/StructurizeCFG/no-branch-to-entry.ll b/test/Transforms/StructurizeCFG/no-branch-to-entry.ll
new file mode 100644
index 0000000..2e22c87
--- /dev/null
+++ b/test/Transforms/StructurizeCFG/no-branch-to-entry.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -o - -structurizecfg < %s | FileCheck %s
+
+; CHECK-LABEL: @no_branch_to_entry_undef(
+; CHECK: entry:
+; CHECK-NEXT: br label %entry.orig
+define void @no_branch_to_entry_undef(i32 addrspace(1)* %out) {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  store i32 999, i32 addrspace(1)* %out, align 4
+  br label %for.body
+
+for.end:                                          ; preds = %Flow
+  ret void
+}
+
+; CHECK-LABEL: @no_branch_to_entry_true(
+; CHECK: entry:
+; CHECK-NEXT: br label %entry.orig
+define void @no_branch_to_entry_true(i32 addrspace(1)* %out) {
+entry:
+  br i1 true, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  store i32 999, i32 addrspace(1)* %out, align 4
+  br label %for.body
+
+for.end:                                          ; preds = %Flow
+  ret void
+}
diff --git a/test/Transforms/StructurizeCFG/switch.ll b/test/Transforms/StructurizeCFG/switch.ll
new file mode 100644
index 0000000..316df57
--- /dev/null
+++ b/test/Transforms/StructurizeCFG/switch.ll
@@ -0,0 +1,23 @@
+; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
+
+; The structurizecfg pass cannot handle switch instructions, so we need to
+; make sure the lower switch pass is always run before structurizecfg.
+
+; CHECK-LABEL: @switch
+define void @switch(i32 addrspace(1)* %out, i32 %cond) nounwind {
+entry:
+; CHECK: icmp
+  switch i32 %cond, label %done [ i32 0, label %zero]
+
+; CHECK: zero:
+zero:
+; CHECK: store i32 7, i32 addrspace(1)* %out
+  store i32 7, i32 addrspace(1)* %out
+; CHECK: br label %done
+  br label %done
+
+; CHECK: done:
+done:
+; CHECK: ret void
+  ret void
+}
diff --git a/test/Transforms/TailCallElim/lit.local.cfg b/test/Transforms/TailCallElim/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Transforms/TailCallElim/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/TailDup/X86/lit.local.cfg b/test/Transforms/TailDup/X86/lit.local.cfg
index da2db5a..ba763cf 100644
--- a/test/Transforms/TailDup/X86/lit.local.cfg
+++ b/test/Transforms/TailDup/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Transforms/TailDup/lit.local.cfg b/test/Transforms/TailDup/lit.local.cfg
index 18c604a..19840aa 100644
--- a/test/Transforms/TailDup/lit.local.cfg
+++ b/test/Transforms/TailDup/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Unit/lit.cfg b/test/Unit/lit.cfg
index 15cf626..68ba0b3 100644
--- a/test/Unit/lit.cfg
+++ b/test/Unit/lit.cfg
@@ -4,6 +4,8 @@
 
 import os
 
+import lit.formats
+
 # name: The name of this test suite.
 config.name = 'LLVM-Unit'
 
@@ -43,9 +45,9 @@ if config.test_exec_root is None:
     # out-of-tree build situation).
 
     # Check for 'llvm_unit_site_config' user parameter, and use that if available.
-    site_cfg = lit.params.get('llvm_unit_site_config', None)
+    site_cfg = lit_config.params.get('llvm_unit_site_config', None)
     if site_cfg and os.path.exists(site_cfg):
-        lit.load_config(config, site_cfg)
+        lit_config.load_config(config, site_cfg)
         raise SystemExit
 
     # Try to detect the situation where we are using an out-of-tree build by
@@ -58,7 +60,7 @@ if config.test_exec_root is None:
 
     llvm_config = lit.util.which('llvm-config', config.environment['PATH'])
     if not llvm_config:
-        lit.fatal('No site specific configuration available!')
+        lit_config.fatal('No site specific configuration available!')
 
     # Get the source and object roots.
     llvm_src_root = lit.util.capture(['llvm-config', '--src-root']).strip()
@@ -67,16 +69,16 @@ if config.test_exec_root is None:
     # Validate that we got a tree which points to here.
     this_src_root = os.path.join(os.path.dirname(__file__),'..','..')
     if os.path.realpath(llvm_src_root) != os.path.realpath(this_src_root):
-        lit.fatal('No site specific configuration available!')
+        lit_config.fatal('No site specific configuration available!')
 
     # Check that the site specific configuration exists.
     site_cfg = os.path.join(llvm_obj_root, 'test', 'Unit', 'lit.site.cfg')
     if not os.path.exists(site_cfg):
-        lit.fatal('No site specific configuration available!')
+        lit_config.fatal('No site specific configuration available!')
 
     # Okay, that worked. Notify the user of the automagic, and reconfigure.
-    lit.note('using out-of-tree build at %r' % llvm_obj_root)
-    lit.load_config(config, site_cfg)
+    lit_config.note('using out-of-tree build at %r' % llvm_obj_root)
+    lit_config.load_config(config, site_cfg)
     raise SystemExit
 
 # If necessary, point the dynamic loader at libLLVM.so.
diff --git a/test/Unit/lit.site.cfg.in b/test/Unit/lit.site.cfg.in
index 65e98d0..7ff8155 100644
--- a/test/Unit/lit.site.cfg.in
+++ b/test/Unit/lit.site.cfg.in
@@ -1,3 +1,5 @@
+import sys
+
 ## Autogenerated by LLVM/Clang configuration.
 # Do not edit!
 config.llvm_src_root = "@LLVM_SOURCE_DIR@"
@@ -11,11 +13,12 @@ config.shlibpath_var = "@SHLIBPATH_VAR@"
 # Support substitution of the tools_dir and build_mode with user parameters.
 # This is used when we can't determine the tool dir at configuration time.
 try:
-    config.llvm_tools_dir = config.llvm_tools_dir % lit.params
-    config.llvm_build_mode = config.llvm_build_mode % lit.params
-except KeyError,e:
+    config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params
+    config.llvm_build_mode = config.llvm_build_mode % lit_config.params
+except KeyError:
+    e = sys.exc_info()[1]
     key, = e.args
-    lit.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key))
+    lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key))
 
 # Let the main config do the real work.
-lit.load_config(config, "@LLVM_SOURCE_DIR@/test/Unit/lit.cfg")
+lit_config.load_config(config, "@LLVM_SOURCE_DIR@/test/Unit/lit.cfg")
diff --git a/test/Verifier/ident-meta1.ll b/test/Verifier/ident-meta1.ll
new file mode 100644
index 0000000..fb247a8
--- /dev/null
+++ b/test/Verifier/ident-meta1.ll
@@ -0,0 +1,12 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+; Verify that llvm.ident is properly structured.
+; llvm.ident takes a list of metadata entries.
+; Each metadata entry can have only one string.
+
+!llvm.ident = !{!0, !1}
+!0 = metadata !{metadata !"version string"}
+!1 = metadata !{metadata !"string1", metadata !"string2"}
+; CHECK: assembly parsed, but does not verify as correct!
+; CHECK-NEXT: incorrect number of operands in llvm.ident metadata
+; CHECK-NEXT: metadata !1
+
diff --git a/test/Verifier/ident-meta2.ll b/test/Verifier/ident-meta2.ll
new file mode 100644
index 0000000..e86f18a
--- /dev/null
+++ b/test/Verifier/ident-meta2.ll
@@ -0,0 +1,13 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+; Verify that llvm.ident is properly structured.
+; llvm.ident takes a list of metadata entries.
+; Each metadata entry can contain one string only.
+
+!llvm.ident = !{!0, !1, !2, !3}
+!0 = metadata !{metadata !"str1"}
+!1 = metadata !{metadata !"str2"}
+!2 = metadata !{metadata !"str3"}
+!3 = metadata !{i32 1}
+; CHECK: assembly parsed, but does not verify as correct!
+; CHECK-NEXT: invalid value for llvm.ident metadata entry operand(the operand should be a string)
+; CHECK-NEXT: i32 1
diff --git a/test/Verifier/ident-meta3.ll b/test/Verifier/ident-meta3.ll
new file mode 100644
index 0000000..a847b46
--- /dev/null
+++ b/test/Verifier/ident-meta3.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+; Verify that llvm.ident is properly structured.
+; llvm.ident takes a list of metadata entries.
+; Each metadata entry can contain one string only.
+
+!llvm.ident = !{!0}
+!0 = metadata !{metadata !{metadata !"nested metadata"}}
+; CHECK: assembly parsed, but does not verify as correct!
+; CHECK-NEXT: invalid value for llvm.ident metadata entry operand(the operand should be a string)
+; CHECK-NEXT: metadata !1
diff --git a/test/Verifier/lit.local.cfg b/test/Verifier/lit.local.cfg
deleted file mode 100644
index 19eebc0..0000000
--- a/test/Verifier/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Verifier/varargs-intrinsic.ll b/test/Verifier/varargs-intrinsic.ll
new file mode 100644
index 0000000..f6d0a70
--- /dev/null
+++ b/test/Verifier/varargs-intrinsic.ll
@@ -0,0 +1,16 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+declare void @llvm.experimental.stackmap(i32, i32)
+declare void @llvm.donothing(...)
+
+define void @foo1() {
+  call void @llvm.experimental.stackmap(i32 0, i32 12)
+; CHECK: Callsite was not defined with variable arguments!
+  ret void
+}
+
+define void @foo2() {
+  call void (...)* @llvm.donothing(i32 0, i64 1)
+; CHECK: Intrinsic was not defined with variable arguments!
+  ret void
+}
diff --git a/test/YAMLParser/spec-02-24.data b/test/YAMLParser/spec-02-24.data
index 01ca7f5..56b25cb 100644
--- a/test/YAMLParser/spec-02-24.data
+++ b/test/YAMLParser/spec-02-24.data
@@ -1,4 +1,4 @@
-# RUN: yaml-bench -canonical %s
+# RUN: yaml-bench -canonical %s | FileCheck %s
 
 %TAG ! tag:clarkevans.com,2002:
 --- !shape
@@ -14,3 +14,8 @@
   start: *ORIGIN
   color: 0xFFEEBB
   text: Pretty vector drawing.
+
+#CHECK: !<tag:clarkevans.com,2002:shape>
+#CHECK:   !<tag:clarkevans.com,2002:circle>
+#CHECK:   !<tag:clarkevans.com,2002:line>
+#CHECK:   !<tag:clarkevans.com,2002:label>
diff --git a/test/YAMLParser/spec-07-04.data b/test/YAMLParser/spec-07-04.data
index beba7d0..2c8b2ec 100644
--- a/test/YAMLParser/spec-07-04.data
+++ b/test/YAMLParser/spec-07-04.data
@@ -1,5 +1,7 @@
-# RUN: yaml-bench -canonical %s
+# RUN: yaml-bench -canonical %s | FileCheck %s
 
 %TAG !yaml! tag:yaml.org,2002:
 ---
 !yaml!str "foo"
+
+#CHECK: !!str "foo"
diff --git a/test/YAMLParser/yaml.data b/test/YAMLParser/yaml.data
index 3ce5e4b..4f9b294 100644
--- a/test/YAMLParser/yaml.data
+++ b/test/YAMLParser/yaml.data
@@ -1,5 +1,11 @@
-# RUN: yaml-bench -canonical %s
+# RUN: yaml-bench -canonical %s | FileCheck %s
 
 - !!yaml '!'
 - !!yaml '&'
 - !!yaml '*'
+
+# CHECK: !!seq [
+# CHECK:   !!yaml "!",
+# CHECK:   !!yaml "&",
+# CHECK:   !!yaml "*",
+# CHECK: ]
diff --git a/test/lit.cfg b/test/lit.cfg
index c018674..df1f4a1 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -7,6 +7,9 @@ import sys
 import re
 import platform
 
+import lit.util
+import lit.formats
+
 # name: The name of this test suite.
 config.name = 'LLVM'
 
@@ -14,9 +17,9 @@ config.name = 'LLVM'
 if sys.platform in ['win32']:
     # Seek sane tools in directories and set to $PATH.
     path = getattr(config, 'lit_tools_dir', None)
-    path = lit.getToolsPath(path,
-                            config.environment['PATH'],
-                            ['cmp.exe', 'grep.exe', 'sed.exe'])
+    path = lit_config.getToolsPath(path,
+                                   config.environment['PATH'],
+                                   ['cmp.exe', 'grep.exe', 'sed.exe'])
     if path is not None:
         path = os.path.pathsep.join((path,
                                      config.environment['PATH']))
@@ -36,17 +39,14 @@ else:
 # testFormat: The test format to use to interpret tests.
 config.test_format = lit.formats.ShTest(execute_external)
 
-# To ignore test output on stderr so it doesn't trigger failures uncomment this:
-#config.test_format = lit.formats.TclTest(ignoreStdErr=True)
-
-# suffixes: A list of file extensions to treat as test files, this is actually
-# set by on_clone().
-config.suffixes = []
+# suffixes: A list of file extensions to treat as test files. This is overriden
+# by individual lit.local.cfg files in the test subdirectories.
+config.suffixes = ['.ll', '.c', '.cpp', '.test', '.txt', '.s']
 
 # excludes: A list of directories to exclude from the testsuite. The 'Inputs'
 # subdirectories contain auxiliary inputs for various tests in their parent
 # directories.
-config.excludes = ['Inputs']
+config.excludes = ['Inputs', 'CMakeLists.txt', 'README.txt', 'LICENSE.txt']
 
 # test_source_root: The root path where tests are located.
 config.test_source_root = os.path.dirname(__file__)
@@ -60,7 +60,7 @@ if llvm_obj_root is not None:
 if llvm_obj_root is not None:
     llvm_tools_dir = getattr(config, 'llvm_tools_dir', None)
     if not llvm_tools_dir:
-        lit.fatal('No LLVM tools dir set!')
+        lit_config.fatal('No LLVM tools dir set!')
     path = os.path.pathsep.join((llvm_tools_dir, config.environment['PATH']))
     config.environment['PATH'] = path
 
@@ -107,9 +107,9 @@ if config.test_exec_root is None:
     # out-of-tree build situation).
 
     # Check for 'llvm_site_config' user parameter, and use that if available.
-    site_cfg = lit.params.get('llvm_site_config', None)
+    site_cfg = lit_config.params.get('llvm_site_config', None)
     if site_cfg and os.path.exists(site_cfg):
-        lit.load_config(config, site_cfg)
+        lit_config.load_config(config, site_cfg)
         raise SystemExit
 
     # Try to detect the situation where we are using an out-of-tree build by
@@ -122,7 +122,7 @@ if config.test_exec_root is None:
 
     llvm_config = lit.util.which('llvm-config', config.environment['PATH'])
     if not llvm_config:
-        lit.fatal('No site specific configuration available!')
+        lit_config.fatal('No site specific configuration available!')
 
     # Get the source and object roots.
     llvm_src_root = lit.util.capture(['llvm-config', '--src-root']).strip()
@@ -131,16 +131,16 @@ if config.test_exec_root is None:
     # Validate that we got a tree which points to here.
     this_src_root = os.path.dirname(config.test_source_root)
     if os.path.realpath(llvm_src_root) != os.path.realpath(this_src_root):
-        lit.fatal('No site specific configuration available!')
+        lit_config.fatal('No site specific configuration available!')
 
     # Check that the site specific configuration exists.
     site_cfg = os.path.join(llvm_obj_root, 'test', 'lit.site.cfg')
     if not os.path.exists(site_cfg):
-        lit.fatal('No site specific configuration available!')
+        lit_config.fatal('No site specific configuration available!')
 
     # Okay, that worked. Notify the user of the automagic, and reconfigure.
-    lit.note('using out-of-tree build at %r' % llvm_obj_root)
-    lit.load_config(config, site_cfg)
+    lit_config.note('using out-of-tree build at %r' % llvm_obj_root)
+    lit_config.load_config(config, site_cfg)
     raise SystemExit
 
 ###
@@ -169,11 +169,11 @@ else:
 config.substitutions.append( ('%defaultjit', '-use-mcjit='+defaultIsMCJIT) )
 
 # Process jit implementation option
-jit_impl_cfg = lit.params.get('jit_impl', None)
+jit_impl_cfg = lit_config.params.get('jit_impl', None)
 if jit_impl_cfg == 'mcjit':
   # When running with mcjit, mangle -mcjit into target triple
   # and add -use-mcjit flag to lli invocation
-  if 'i686' in config.target_triple:
+  if 'i386' in config.target_triple or 'i686' in config.target_triple:
     config.target_triple += jit_impl_cfg + '-ia32'
   elif 'x86_64' in config.target_triple:
     config.target_triple += jit_impl_cfg + '-ia64'
@@ -204,32 +204,53 @@ else:
 # Regex to reject matching a hyphen
 NOHYPHEN = r"(?<!-)"
 
-for pattern in [r"\bbugpoint\b(?!-)",   r"(?<!/|-)\bclang\b(?!-)",
+for pattern in [r"\bbugpoint\b(?!-)",
+                r"(?<!/|-)\bclang\b(?!-)",
                 r"\bgold\b",
                 # Match llc but not -llc
                 NOHYPHEN + r"\bllc\b",
                 r"\blli\b",
-                r"\bllvm-ar\b",         r"\bllvm-as\b",
-                r"\bllvm-bcanalyzer\b", r"\bllvm-config\b",
-                r"\bllvm-cov\b",        r"\bllvm-diff\b",
-                r"\bllvm-dis\b",        r"\bllvm-dwarfdump\b",
-                r"\bllvm-extract\b",    r"\bllvm-jistlistener\b",
-                r"\bllvm-link\b",       r"\bllvm-mc\b",
-                r"\bllvm-nm\b",         r"\bllvm-objdump\b",
-                r"\bllvm-prof\b",       r"\bllvm-size\b",
-                r"\bllvm-rtdyld\b",     r"\bllvm-shlib\b",
+                r"\bllvm-PerfectShuffle\b",
+                r"\bllvm-ar\b",
+                r"\bllvm-as\b",
+                r"\bllvm-bcanalyzer\b",
+                r"\bllvm-config\b",
+                r"\bllvm-cov\b",
+                r"\bllvm-diff\b",
+                r"\bllvm-dis\b",
+                r"\bllvm-dwarfdump\b",
+                r"\bllvm-extract\b",
+                r"\bllvm-jistlistener\b",
+                r"\bllvm-link\b",
+                r"\bllvm-lto\b",
+                r"\bllvm-mc\b",
+                r"\bllvm-mcmarkup\b",
+                r"\bllvm-nm\b",
+                r"\bllvm-objdump\b",
+                r"\bllvm-ranlib\b",
+                r"\bllvm-readobj\b",
+                r"\bllvm-rtdyld\b",
+                r"\bllvm-shlib\b",
+                r"\bllvm-size\b",
+                r"\bllvm-tblgen\b",
+                r"\bllvm-c-test\b",
                 # Match llvmc but not -llvmc
                 NOHYPHEN + r"\bllvmc\b",
-                r"\blto\b",
-                                        # Don't match '.opt', '-opt',
-                                        # '^opt' or '/opt'.
-                r"\bmacho-dump\b",      r"(?<!\.|-|\^|/)\bopt\b",
-                r"\bllvm-tblgen\b",     r"\bFileCheck\b",
-                r"\bFileUpdate\b",      r"\bc-index-test\b",
-                r"\bfpcmp\b",           r"\bllvm-PerfectShuffle\b",
+                # Match lto but not -lto
+                NOHYPHEN + r"\blto\b",
+                r"\bmacho-dump\b",
+                # Don't match '.opt', '-opt', '^opt' or '/opt'.
+                r"(?<!\.|-|\^|/)\bopt\b",
+                r"\bFileCheck\b",
+                r"\bFileUpdate\b",
+                r"\bc-index-test\b",
+                r"\bfpcmp\b",
+                r"\bobj2yaml\b",
+                r"\byaml2obj\b",
                 # Handle these specially as they are strings searched
                 # for during testing.
-                r"\| \bcount\b",         r"\| \bnot\b"]:
+                r"\| \bcount\b",
+                r"\| \bnot\b"]:
     # Extract the tool name from the pattern.  This relies on the tool
     # name being surrounded by \b word match operators.  If the
     # pattern starts with "| ", include it in the string to be
@@ -278,35 +299,53 @@ if not 'hexagon' in config.target_triple:
 if config.have_zlib == "1":
     config.available_features.add("zlib")
 
+# Native compilation: host arch == target arch
+# FIXME: Consider cases that target can be executed
+# even if host_triple were different from target_triple.
+if config.host_triple == config.target_triple:
+    config.available_features.add("native")
+
 # llc knows whether he is compiled with -DNDEBUG.
 import subprocess
 try:
     llc_cmd = subprocess.Popen([os.path.join(llvm_tools_dir, 'llc'), '-version'],
                            stdout = subprocess.PIPE)
-except OSError, why:
-    print "Could not find llc in " + llvm_tools_dir
+except OSError:
+    print("Could not find llc in " + llvm_tools_dir)
     exit(42)
 
-if re.search(r'with assertions', llc_cmd.stdout.read()):
+if re.search(r'with assertions', llc_cmd.stdout.read().decode('ascii')):
     config.available_features.add('asserts')
 llc_cmd.wait()
 
+if 'darwin' == sys.platform:
+    try:
+        sysctl_cmd = subprocess.Popen(['sysctl', 'hw.optional.fma'],
+                                    stdout = subprocess.PIPE)
+    except OSError:
+        print("Could not exec sysctl")
+    result = sysctl_cmd.stdout.read().decode('ascii')
+    if -1 != result.find("hw.optional.fma: 1"):
+        config.available_features.add('fma3')
+    sysctl_cmd.wait()
+
 # Check if we should use gmalloc.
-use_gmalloc_str = lit.params.get('use_gmalloc', None)
+use_gmalloc_str = lit_config.params.get('use_gmalloc', None)
 if use_gmalloc_str is not None:
     if use_gmalloc_str.lower() in ('1', 'true'):
         use_gmalloc = True
     elif use_gmalloc_str.lower() in ('', '0', 'false'):
         use_gmalloc = False
     else:
-        lit.fatal('user parameter use_gmalloc should be 0 or 1')
+        lit_config.fatal('user parameter use_gmalloc should be 0 or 1')
 else:
     # Default to not using gmalloc
     use_gmalloc = False
 
 # Allow use of an explicit path for gmalloc library.
 # Will default to '/usr/lib/libgmalloc.dylib' if not set.
-gmalloc_path_str = lit.params.get('gmalloc_path', '/usr/lib/libgmalloc.dylib')
+gmalloc_path_str = lit_config.params.get('gmalloc_path',
+                                         '/usr/lib/libgmalloc.dylib')
 
 if use_gmalloc:
      config.environment.update({'DYLD_INSERT_LIBRARIES' : gmalloc_path_str})
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index 1ae99eb..72fd9c9 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -1,3 +1,5 @@
+import sys
+
 ## Autogenerated by LLVM/Clang configuration.
 # Do not edit!
 config.host_triple = "@LLVM_HOST_TRIPLE@"
@@ -12,7 +14,6 @@ config.python_executable = "@PYTHON_EXECUTABLE@"
 config.ocamlopt_executable = "@OCAMLOPT@"
 config.enable_shared = @ENABLE_SHARED@
 config.enable_assertions = @ENABLE_ASSERTIONS@
-config.lto_is_enabled = "@LTO_IS_ENABLED@"
 config.targets_to_build = "@TARGETS_TO_BUILD@"
 config.llvm_bindings = "@LLVM_BINDINGS@"
 config.host_os = "@HOST_OS@"
@@ -24,10 +25,11 @@ config.have_zlib = "@HAVE_LIBZ@"
 # Support substitution of the tools_dir with user parameters. This is
 # used when we can't determine the tool dir at configuration time.
 try:
-    config.llvm_tools_dir = config.llvm_tools_dir % lit.params
-except KeyError,e:
+    config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params
+except KeyError:
+    e = sys.exc_info()[1]
     key, = e.args
-    lit.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key))
+    lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key))
 
 # Let the main config do the real work.
-lit.load_config(config, "@LLVM_SOURCE_DIR@/test/lit.cfg")
+lit_config.load_config(config, "@LLVM_SOURCE_DIR@/test/lit.cfg")
diff --git a/test/tools/llvm-cov/Inputs/README b/test/tools/llvm-cov/Inputs/README
new file mode 100644
index 0000000..2cfb191
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/README
@@ -0,0 +1,7 @@
+These inputs were pre-generated to allow for easier testing of llvm-cov.
+
+test.gcno and test.gcda were create by running clang:
+  clang++ -g -ftest-coverage -fprofile-arcs test.cpp
+
+test.cpp.gcov was created by running gcov 4.2.1:
+  gcov test.cpp
diff --git a/test/tools/llvm-cov/Inputs/test.cpp b/test/tools/llvm-cov/Inputs/test.cpp
new file mode 100644
index 0000000..07bc3f2
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/test.cpp
@@ -0,0 +1,77 @@
+#include <cstdlib>
+
+bool on = false;
+int len = 42;
+double grid[10][10] = {0};
+const char * hello = "world";
+const char * world = "hello";
+
+struct A {
+  virtual void B();
+};
+
+void A::B() {}
+
+void useless() {}
+
+double more_useless() {
+  return 0;
+}
+
+int foo() {
+  on = true;
+  return 3;
+}
+
+int bar() {
+  len--;
+  return foo() + 45;
+}
+
+void assign(int ii, int jj) {
+  grid[ii][jj] = (ii+1) * (jj+1);
+}
+
+void initialize_grid() {
+  for (int ii = 0; ii < 2; ii++)
+    for (int jj = 0; jj < 2; jj++)
+      assign(ii, jj);
+}
+
+int main() {
+  initialize_grid();
+
+  int a = 2;
+  on = rand() % 2;
+  if (on) {
+    foo();
+    ++a;
+  } else {
+    bar();
+    a += rand();
+  }
+
+  for (int ii = 0; ii < 10; ++ii) {
+    switch (rand() % 5) {
+      case 0:
+        a += rand();
+        break;
+      case 1:
+      case 2:
+        a += rand() / rand();
+        break;
+      case 3:
+        a -= rand();
+        break;
+      default:
+        a = -1;
+    }
+  }
+
+  A thing;
+  for (uint64_t ii = 0; ii < 4294967296; ++ii)
+    thing.B();
+
+  return a + 8 + grid[2][3] + len;
+  return more_useless();
+}
diff --git a/test/tools/llvm-cov/Inputs/test.cpp.gcov b/test/tools/llvm-cov/Inputs/test.cpp.gcov
new file mode 100644
index 0000000..a3dacc2
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/test.cpp.gcov
@@ -0,0 +1,82 @@
+        -:    0:Source:test.cpp
+        -:    0:Graph:test.gcno
+        -:    0:Data:test.gcda
+        -:    0:Runs:2
+        -:    0:Programs:1
+        -:    1:#include <cstdlib>
+        -:    2:
+        -:    3:bool on = false;
+        -:    4:int len = 42;
+        -:    5:double grid[10][10] = {0};
+        -:    6:const char * hello = "world";
+        -:    7:const char * world = "hello";
+        -:    8:
+        4:    9:struct A {
+        -:   10:  virtual void B();
+        -:   11:};
+        -:   12:
+8589934592:   13:void A::B() {}
+        -:   14:
+    #####:   15:void useless() {}
+        -:   16:
+        -:   17:double more_useless() {
+    #####:   18:  return 0;
+        -:   19:}
+        -:   20:
+        -:   21:int foo() {
+        2:   22:  on = true;
+        2:   23:  return 3;
+        -:   24:}
+        -:   25:
+        -:   26:int bar() {
+    #####:   27:  len--;
+    #####:   28:  return foo() + 45;
+        -:   29:}
+        -:   30:
+        8:   31:void assign(int ii, int jj) {
+        8:   32:  grid[ii][jj] = (ii+1) * (jj+1);
+        8:   33:}
+        -:   34:
+        -:   35:void initialize_grid() {
+       12:   36:  for (int ii = 0; ii < 2; ii++)
+       24:   37:    for (int jj = 0; jj < 2; jj++)
+       12:   38:      assign(ii, jj);
+        2:   39:}
+        -:   40:
+        -:   41:int main() {
+        2:   42:  initialize_grid();
+        -:   43:
+        2:   44:  int a = 2;
+        2:   45:  on = rand() % 2;
+        2:   46:  if (on) {
+        2:   47:    foo();
+        2:   48:    ++a;
+        2:   49:  } else {
+    #####:   50:    bar();
+    #####:   51:    a += rand();
+        -:   52:  }
+        -:   53:
+       44:   54:  for (int ii = 0; ii < 10; ++ii) {
+       20:   55:    switch (rand() % 5) {
+        -:   56:      case 0:
+        4:   57:        a += rand();
+        4:   58:        break;
+        -:   59:      case 1:
+        -:   60:      case 2:
+        2:   61:        a += rand() / rand();
+        2:   62:        break;
+        -:   63:      case 3:
+        6:   64:        a -= rand();
+        6:   65:        break;
+        -:   66:      default:
+        8:   67:        a = -1;
+        8:   68:    }
+       20:   69:  }
+        -:   70:
+        2:   71:  A thing;
+17179869188:   72:  for (uint64_t ii = 0; ii < 4294967296; ++ii)
+8589934592:   73:    thing.B();
+        -:   74:
+        2:   75:  return a + 8 + grid[2][3] + len;
+        -:   76:  return more_useless();
+        -:   77:}
diff --git a/test/tools/llvm-cov/Inputs/test.gcda b/test/tools/llvm-cov/Inputs/test.gcda
new file mode 100644
index 0000000..23d03bd
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/test.gcda
diff --git a/test/tools/llvm-cov/Inputs/test.gcno b/test/tools/llvm-cov/Inputs/test.gcno
new file mode 100644
index 0000000..6162604
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/test.gcno
diff --git a/test/tools/llvm-cov/Inputs/test_read_fail.gcno b/test/tools/llvm-cov/Inputs/test_read_fail.gcno
new file mode 100644
index 0000000..63b5d71
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/test_read_fail.gcno
diff --git a/test/tools/llvm-readobj/lit.local.cfg b/test/tools/llvm-cov/lit.local.cfg
index df9b335..df9b335 100644
--- a/test/tools/llvm-readobj/lit.local.cfg
+++ b/test/tools/llvm-cov/lit.local.cfg
diff --git a/test/tools/llvm-cov/llvm-cov.test b/test/tools/llvm-cov/llvm-cov.test
new file mode 100644
index 0000000..28738a7
--- /dev/null
+++ b/test/tools/llvm-cov/llvm-cov.test
@@ -0,0 +1,10 @@
+RUN: cd %p/Inputs
+# "cd" is unsupported in lit internal runner.
+REQUIRES: shell
+
+RUN: llvm-cov -gcno=test.gcno -gcda=test.gcda \
+RUN:   | diff -aub test.cpp.gcov -
+
+RUN: not llvm-cov -gcno=test_read_fail.gcno -gcda=test.gcda
+
+XFAIL: powerpc64, s390x
diff --git a/test/tools/llvm-lit/lit.local.cfg b/test/tools/llvm-lit/lit.local.cfg
deleted file mode 100644
index 856a549..0000000
--- a/test/tools/llvm-lit/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.c']
diff --git a/test/tools/llvm-objdump/Inputs/nop.exe.coff-i386 b/test/tools/llvm-objdump/Inputs/nop.exe.coff-i386
new file mode 100644
index 0000000..68c9d3d
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/nop.exe.coff-i386
diff --git a/test/tools/llvm-objdump/Inputs/trivial.obj.elf-i386 b/test/tools/llvm-objdump/Inputs/trivial.obj.elf-i386
new file mode 100644
index 0000000..fdc4874
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/trivial.obj.elf-i386
diff --git a/test/tools/llvm-objdump/Inputs/win64-unwind.exe.coff-x86_64 b/test/tools/llvm-objdump/Inputs/win64-unwind.exe.coff-x86_64
new file mode 100644
index 0000000..63460e7
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/win64-unwind.exe.coff-x86_64
diff --git a/test/tools/llvm-objdump/Inputs/win64-unwind.exe.coff-x86_64.asm b/test/tools/llvm-objdump/Inputs/win64-unwind.exe.coff-x86_64.asm
new file mode 100644
index 0000000..4d47fa4
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/win64-unwind.exe.coff-x86_64.asm
@@ -0,0 +1,53 @@
+    .text
+    .globl func
+    .def func; .scl 2; .type 32; .endef
+    .seh_proc func
+func:
+    .seh_pushframe @code
+    subq $24, %rsp
+    .seh_stackalloc 24
+    movq %rsi, 16(%rsp)
+    .seh_savereg %rsi, 16
+    movups %xmm8, (%rsp)
+    .seh_savexmm %xmm8, 0
+    pushq %rbx
+    .seh_pushreg 3
+    mov %rsp, %rbx
+    .seh_setframe 3, 0
+    .seh_endprologue
+    .seh_handler __C_specific_handler, @except
+    .seh_handlerdata
+    .long 0
+    .text
+    .seh_startchained
+    .seh_endprologue
+    .seh_endchained
+    lea (%rbx), %rsp
+    pop %rbx
+    addq $24, %rsp
+    ret
+    .seh_endproc
+
+// Test emission of small functions.
+    .globl smallFunc
+    .def smallFunc; .scl 2; .type 32; .endef
+    .seh_proc smallFunc
+smallFunc:
+    ret
+    .seh_endproc
+
+// Function with big stack allocation.
+    .globl allocFunc
+    .def allocFunc; .scl 2; .type 32; .endef
+    .seh_proc allocFunc
+allocFunc:
+    .seh_pushframe @code
+    subq $65520, %rsp
+    .seh_stackalloc 65520
+    sub $8454128, %rsp
+    .seh_stackalloc 8454128
+    .seh_endprologue
+    add $8454128, %rsp
+    addq $65520, %rsp
+    ret
+    .seh_endproc
diff --git a/test/tools/llvm-objdump/coff-private-headers.test b/test/tools/llvm-objdump/coff-private-headers.test
new file mode 100644
index 0000000..d36c148
--- /dev/null
+++ b/test/tools/llvm-objdump/coff-private-headers.test
@@ -0,0 +1,9 @@
+// RUN: llvm-objdump -p %p/Inputs/nop.exe.coff-i386 | FileCheck %s
+
+CHECK:       The Import Tables:
+CHECK-NEXT:  lookup 00005028 time 00000000 fwd 00000000 name 00005096 addr 00005058
+CHECK:       DLL Name: KERNEL32.dll
+CHECK-NEXT:     Hint/Ord  Name
+CHECK-NEXT:          365  ExitProcess
+
+
diff --git a/test/tools/llvm-objdump/disassembly-show-raw.s b/test/tools/llvm-objdump/disassembly-show-raw.s
deleted file mode 100644
index 32fcad4..0000000
--- a/test/tools/llvm-objdump/disassembly-show-raw.s
+++ /dev/null
@@ -1,15 +0,0 @@
-// RUN: llvm-mc -filetype=obj -arch=x86 %s | llvm-objdump -d - \
-// RUN:                                    | FileCheck %s -check-prefix=WITHRAW
-// RUN: llvm-mc -filetype=obj -arch=x86 %s | llvm-objdump -d -no-show-raw-insn - \
-// RUN:                                    | FileCheck %s -check-prefix=NORAW
-
-// Expect to find the raw incoding when run with raw output (default), but not
-// when run explicitly with -no-show-raw-insn
-
-movl 0, %eax
-// WITHRAW: a1 00 00 00 00 movl
-
-// NORAW: movl
-// NORAW-NOT: a1 00
-
-
diff --git a/test/tools/llvm-objdump/disassembly-show-raw.test b/test/tools/llvm-objdump/disassembly-show-raw.test
new file mode 100644
index 0000000..e9956a5
--- /dev/null
+++ b/test/tools/llvm-objdump/disassembly-show-raw.test
@@ -0,0 +1,14 @@
+// RUN: llvm-objdump -d %p/Inputs/trivial.obj.elf-i386 \
+// RUN:     | FileCheck %s -check-prefix=WITHRAW
+// RUN: llvm-objdump -d -no-show-raw-insn %p/Inputs/trivial.obj.elf-i386 \
+// RUN:     | FileCheck %s -check-prefix=NORAW
+
+// Expect to find the raw incoding when run with raw output (default), but not
+// when run explicitly with -no-show-raw-insn
+
+WITHRAW: a1 00 00 00 00 movl
+
+NORAW: movl
+NORAW-NOT: a1 00
+
+
diff --git a/test/tools/llvm-objdump/lit.local.cfg b/test/tools/llvm-objdump/lit.local.cfg
index 56bf008..19840aa 100644
--- a/test/tools/llvm-objdump/lit.local.cfg
+++ b/test/tools/llvm-objdump/lit.local.cfg
@@ -1,6 +1,3 @@
-config.suffixes = ['.ll', '.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
-
diff --git a/test/tools/llvm-objdump/win64-unwind-data.s b/test/tools/llvm-objdump/win64-unwind-data.s
deleted file mode 100644
index 1e4c742..0000000
--- a/test/tools/llvm-objdump/win64-unwind-data.s
+++ /dev/null
@@ -1,106 +0,0 @@
-// This test checks that the unwind data is dumped by llvm-objdump.
-// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-objdump -u - | FileCheck %s
-
-// CHECK:      Unwind info:
-// CHECK:      Function Table:
-// CHECK-NEXT: Start Address: .text
-// CHECK-NEXT: End Address: .text + 0x001b
-// CHECK-NEXT: Unwind Info Address: .xdata
-// CHECK-NEXT: Version: 1
-// CHECK-NEXT: Flags: 1 UNW_ExceptionHandler
-// CHECK-NEXT: Size of prolog: 18
-// CHECK-NEXT: Number of Codes: 8
-// CHECK-NEXT: Frame register: RBX
-// CHECK-NEXT: Frame offset: 0
-// CHECK-NEXT: Unwind Codes:
-// CHECK-NEXT: 0x00: UOP_SetFPReg
-// CHECK-NEXT: 0x0f: UOP_PushNonVol RBX
-// CHECK-NEXT: 0x0e: UOP_SaveXMM128 XMM8 [0x0000]
-// CHECK-NEXT: 0x09: UOP_SaveNonVol RSI [0x0010]
-// CHECK-NEXT: 0x04: UOP_AllocSmall 24
-// CHECK-NEXT: 0x00: UOP_PushMachFrame w/o error code
-// CHECK:      Function Table:
-// CHECK-NEXT: Start Address: .text + 0x0012
-// CHECK-NEXT: End Address: .text + 0x0012
-// CHECK-NEXT: Unwind Info Address: .xdata + 0x001c
-// CHECK-NEXT: Version: 1
-// CHECK-NEXT: Flags: 4 UNW_ChainInfo
-// CHECK-NEXT: Size of prolog: 0
-// CHECK-NEXT: Number of Codes: 0
-// CHECK-NEXT: No frame pointer used
-// CHECK:      Function Table:
-// CHECK-NEXT: Start Address: .text + 0x001b
-// CHECK-NEXT: End Address: .text + 0x001c
-// CHECK-NEXT: Unwind Info Address: .xdata + 0x002c
-// CHECK-NEXT: Version: 1
-// CHECK-NEXT: Flags: 0
-// CHECK-NEXT: Size of prolog: 0
-// CHECK-NEXT: Number of Codes: 0
-// CHECK-NEXT: No frame pointer used
-// CHECK:      Function Table:
-// CHECK-NEXT: Start Address: .text + 0x001c
-// CHECK-NEXT: End Address: .text + 0x0039
-// CHECK-NEXT: Unwind Info Address: .xdata + 0x0034
-// CHECK-NEXT: Version: 1
-// CHECK-NEXT: Flags: 0
-// CHECK-NEXT: Size of prolog: 14
-// CHECK-NEXT: Number of Codes: 6
-// CHECK-NEXT: No frame pointer used
-// CHECK-NEXT: Unwind Codes:
-// CHECK-NEXT: 0x0e: UOP_AllocLarge 8454128
-// CHECK-NEXT: 0x07: UOP_AllocLarge 8190
-// CHECK-NEXT: 0x00: UOP_PushMachFrame w/o error code
-
-    .text
-    .globl func
-    .def func; .scl 2; .type 32; .endef
-    .seh_proc func
-func:
-    .seh_pushframe @code
-    subq $24, %rsp
-    .seh_stackalloc 24
-    movq %rsi, 16(%rsp)
-    .seh_savereg %rsi, 16
-    movups %xmm8, (%rsp)
-    .seh_savexmm %xmm8, 0
-    pushq %rbx
-    .seh_pushreg 3
-    mov %rsp, %rbx
-    .seh_setframe 3, 0
-    .seh_endprologue
-    .seh_handler __C_specific_handler, @except
-    .seh_handlerdata
-    .long 0
-    .text
-    .seh_startchained
-    .seh_endprologue
-    .seh_endchained
-    lea (%rbx), %rsp
-    pop %rbx
-    addq $24, %rsp
-    ret
-    .seh_endproc
-
-// Test emission of small functions.
-    .globl smallFunc
-    .def smallFunc; .scl 2; .type 32; .endef
-    .seh_proc smallFunc
-smallFunc:
-    ret
-    .seh_endproc
-
-// Function with big stack allocation.
-    .globl smallFunc
-    .def allocFunc; .scl 2; .type 32; .endef
-    .seh_proc smallFunc
-allocFunc:
-    .seh_pushframe @code
-    subq $65520, %rsp
-    .seh_stackalloc 65520
-    sub $8454128, %rsp
-    .seh_stackalloc 8454128
-    .seh_endprologue
-    add $8454128, %rsp
-    addq $65520, %rsp
-    ret
-    .seh_endproc
diff --git a/test/tools/llvm-objdump/win64-unwind-data.test b/test/tools/llvm-objdump/win64-unwind-data.test
new file mode 100644
index 0000000..a723ffe
--- /dev/null
+++ b/test/tools/llvm-objdump/win64-unwind-data.test
@@ -0,0 +1,52 @@
+// This test checks that the unwind data is dumped by llvm-objdump.
+// RUN: llvm-objdump -u %p/Inputs/win64-unwind.exe.coff-x86_64 | FileCheck %s
+
+CHECK:      Unwind info:
+CHECK:      Function Table:
+CHECK-NEXT: Start Address: func
+CHECK-NEXT: End Address: func + 0x001b
+CHECK-NEXT: Unwind Info Address: .xdata
+CHECK-NEXT: Version: 1
+CHECK-NEXT: Flags: 1 UNW_ExceptionHandler
+CHECK-NEXT: Size of prolog: 18
+CHECK-NEXT: Number of Codes: 8
+CHECK-NEXT: Frame register: RBX
+CHECK-NEXT: Frame offset: 0
+CHECK-NEXT: Unwind Codes:
+CHECK-NEXT: 0x12: UOP_SetFPReg
+CHECK-NEXT: 0x0f: UOP_PushNonVol RBX
+CHECK-NEXT: 0x0e: UOP_SaveXMM128 XMM8 [0x0000]
+CHECK-NEXT: 0x09: UOP_SaveNonVol RSI [0x0010]
+CHECK-NEXT: 0x04: UOP_AllocSmall 24
+CHECK-NEXT: 0x00: UOP_PushMachFrame w/o error code
+CHECK:      Function Table:
+CHECK-NEXT: Start Address: func + 0x0012
+CHECK-NEXT: End Address: func + 0x0012
+CHECK-NEXT: Unwind Info Address: .xdata + 0x001c
+CHECK-NEXT: Version: 1
+CHECK-NEXT: Flags: 4 UNW_ChainInfo
+CHECK-NEXT: Size of prolog: 0
+CHECK-NEXT: Number of Codes: 0
+CHECK-NEXT: No frame pointer used
+CHECK:      Function Table:
+CHECK-NEXT: Start Address: smallFunc
+CHECK-NEXT: End Address: smallFunc + 0x0001
+CHECK-NEXT: Unwind Info Address: .xdata + 0x002c
+CHECK-NEXT: Version: 1
+CHECK-NEXT: Flags: 0
+CHECK-NEXT: Size of prolog: 0
+CHECK-NEXT: Number of Codes: 0
+CHECK-NEXT: No frame pointer used
+CHECK:      Function Table:
+CHECK-NEXT: Start Address: allocFunc
+CHECK-NEXT: End Address: allocFunc + 0x001d
+CHECK-NEXT: Unwind Info Address: .xdata + 0x0034
+CHECK-NEXT: Version: 1
+CHECK-NEXT: Flags: 0
+CHECK-NEXT: Size of prolog: 14
+CHECK-NEXT: Number of Codes: 6
+CHECK-NEXT: No frame pointer used
+CHECK-NEXT: Unwind Codes:
+CHECK-NEXT: 0x0e: UOP_AllocLarge 8454128
+CHECK-NEXT: 0x07: UOP_AllocLarge 8190
+CHECK-NEXT: 0x00: UOP_PushMachFrame w/o error code
diff --git a/test/tools/llvm-readobj/Inputs/dynamic-table.c b/test/tools/llvm-readobj/Inputs/dynamic-table.c
new file mode 100644
index 0000000..6d36e8a
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/dynamic-table.c
@@ -0,0 +1,7 @@
+// clang -target mipsel-linux-gnu -shared -fPIC -lc dynamic-table.c \
+//       -o dynamic-table.mips
+int puts(const char *);
+
+void foo(void) {
+  puts("Hello, World");
+}
diff --git a/test/tools/llvm-readobj/Inputs/dynamic-table.mips b/test/tools/llvm-readobj/Inputs/dynamic-table.mips
new file mode 100644
index 0000000..ab36cee
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/dynamic-table.mips
diff --git a/test/tools/llvm-readobj/Inputs/magic.coff-importlib b/test/tools/llvm-readobj/Inputs/magic.coff-importlib
new file mode 100644
index 0000000..b934afb
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/magic.coff-importlib
diff --git a/test/tools/llvm-readobj/Inputs/magic.coff-unknown b/test/tools/llvm-readobj/Inputs/magic.coff-unknown
new file mode 100644
index 0000000..7b3b461
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/magic.coff-unknown
diff --git a/test/tools/llvm-readobj/Inputs/rpath.exe.elf-x86_64 b/test/tools/llvm-readobj/Inputs/rpath.exe.elf-x86_64
new file mode 100644
index 0000000..8c01c50
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/rpath.exe.elf-x86_64
diff --git a/test/tools/llvm-readobj/dynamic.test b/test/tools/llvm-readobj/dynamic.test
new file mode 100644
index 0000000..78a9b3b
--- /dev/null
+++ b/test/tools/llvm-readobj/dynamic.test
@@ -0,0 +1,33 @@
+RUN: llvm-readobj -dynamic-table %p/Inputs/dynamic-table.mips \
+RUN:     | FileCheck %s -check-prefix ELF-MIPS
+
+ELF-MIPS: Format: ELF32-mips
+ELF-MIPS: Arch: mipsel
+ELF-MIPS: AddressSize: 32bit
+ELF-MIPS: LoadName:
+ELF-MIPS: DynamicSection [ (23 entries)
+ELF-MIPS:   Tag        Type                 Name/Value
+ELF-MIPS:   0x00000001 NEEDED               SharedLibrary (libc.so.6)
+ELF-MIPS:   0x0000000C INIT                 0x528
+ELF-MIPS:   0x0000000D FINI                 0x860
+ELF-MIPS:   0x00000004 HASH                 0x210
+ELF-MIPS:   0x00000005 STRTAB               0x3D8
+ELF-MIPS:   0x00000006 SYMTAB               0x2A8
+ELF-MIPS:   0x0000000A STRSZ                231 (bytes)
+ELF-MIPS:   0x0000000B SYMENT               16 (bytes)
+ELF-MIPS:   0x00000003 PLTGOT               0x108E0
+ELF-MIPS:   0x00000011 REL                  0x518
+ELF-MIPS:   0x00000012 RELSZ                16 (bytes)
+ELF-MIPS:   0x00000013 RELENT               8 (bytes)
+ELF-MIPS:   0x70000001 MIPS_RLD_VERSION     1
+ELF-MIPS:   0x70000005 MIPS_FLAGS           0x2
+ELF-MIPS:   0x70000006 MIPS_BASE_ADDRESS    0x0
+ELF-MIPS:   0x7000000A MIPS_LOCAL_GOTNO     10
+ELF-MIPS:   0x70000011 MIPS_SYMTABNO        19
+ELF-MIPS:   0x70000012 MIPS_UNREFEXTNO      26
+ELF-MIPS:   0x70000013 MIPS_GOTSYM          0xD
+ELF-MIPS:   0x6FFFFFFE VERNEED              0x4E8
+ELF-MIPS:   0x6FFFFFFF VERNEEDNUM           1
+ELF-MIPS:   0x6FFFFFF0 VERSYM               0x4C0
+ELF-MIPS:   0x00000000 NULL                 0x0
+ELF-MIPS: ]
diff --git a/test/tools/llvm-readobj/file-headers.test b/test/tools/llvm-readobj/file-headers.test
index b900e36..b2b4547 100644
--- a/test/tools/llvm-readobj/file-headers.test
+++ b/test/tools/llvm-readobj/file-headers.test
@@ -8,6 +8,10 @@ RUN: llvm-readobj -h %p/Inputs/trivial.obj.elf-i386 \
 RUN:   | FileCheck %s -check-prefix ELF32
 RUN: llvm-readobj -h %p/Inputs/trivial.obj.elf-x86-64 \
 RUN:   | FileCheck %s -check-prefix ELF64
+RUN: llvm-readobj -h %p/Inputs/magic.coff-unknown \
+RUN:   | FileCheck %s -check-prefix COFF-UNKNOWN
+RUN: llvm-readobj -h %p/Inputs/magic.coff-importlib \
+RUN:   | FileCheck %s -check-prefix COFF-IMPORTLIB
 
 COFF32:      File: {{(.*[/\\])?}}trivial.obj.coff-i386
 COFF32-NEXT: Format: COFF-i386
@@ -183,3 +187,32 @@ PE32-NEXT:     ReservedRVA: 0x0
 PE32-NEXT:     ReservedSize: 0x0
 PE32-NEXT:   }
 PE32-NEXT: }
+
+COFF-UNKNOWN:      Format: COFF-<unknown arch>
+COFF-UNKNOWN-NEXT: Arch: unknown
+COFF-UNKNOWN-NEXT: AddressSize: 32bit
+COFF-UNKNOWN-NEXT: ImageFileHeader {
+COFF-UNKNOWN-NEXT:   Machine: IMAGE_FILE_MACHINE_UNKNOWN (0x0)
+COFF-UNKNOWN-NEXT:   SectionCount: 3
+COFF-UNKNOWN-NEXT:   TimeDateStamp: 2013-11-14 21:19:28 (0x52853E60)
+COFF-UNKNOWN-NEXT:   PointerToSymbolTable: 0xF8
+COFF-UNKNOWN-NEXT:   SymbolCount: 11
+COFF-UNKNOWN-NEXT:   OptionalHeaderSize: 0
+COFF-UNKNOWN-NEXT:   Characteristics [ (0x0)
+COFF-UNKNOWN-NEXT:   ]
+COFF-UNKNOWN-NEXT: }
+
+COFF-IMPORTLIB:      Format: COFF-<unknown arch>
+COFF-IMPORTLIB-NEXT: Arch: unknown
+COFF-IMPORTLIB-NEXT: AddressSize: 32bit
+COFF-IMPORTLIB-NEXT: ImageFileHeader {
+COFF-IMPORTLIB-NEXT:   Machine: IMAGE_FILE_MACHINE_UNKNOWN (0x0)
+COFF-IMPORTLIB-NEXT:   SectionCount: 65535
+COFF-IMPORTLIB-NEXT:   TimeDateStamp: 1970-09-09 19:52:32 (0x14C0000)
+COFF-IMPORTLIB-NEXT:   PointerToSymbolTable: 0x528542EB
+COFF-IMPORTLIB-NEXT:   SymbolCount: 20
+COFF-IMPORTLIB-NEXT:   OptionalHeaderSize: 0
+COFF-IMPORTLIB-NEXT:   Characteristics [ (0x8)
+COFF-IMPORTLIB-NEXT:     IMAGE_FILE_LOCAL_SYMS_STRIPPED (0x8)
+COFF-IMPORTLIB-NEXT:   ]
+COFF-IMPORTLIB-NEXT: }
diff --git a/test/tools/llvm-readobj/program-headers.test b/test/tools/llvm-readobj/program-headers.test
index 2a574bb..7c22f2b 100644
--- a/test/tools/llvm-readobj/program-headers.test
+++ b/test/tools/llvm-readobj/program-headers.test
@@ -2,6 +2,8 @@ RUN: llvm-readobj -program-headers %p/../../Object/Inputs/program-headers.elf-i3
 RUN:     | FileCheck %s -check-prefix ELF-I386
 RUN: llvm-readobj -program-headers %p/../../Object/Inputs/program-headers.elf-x86-64 \
 RUN:     | FileCheck %s -check-prefix ELF-X86-64
+RUN: llvm-readobj -program-headers %p/../../Object/Inputs/program-headers.mips \
+RUN:     | FileCheck %s -check-prefix ELF-MIPS
 
 ELF-I386:      ProgramHeaders [
 ELF-I386-NEXT:   ProgramHeader {
@@ -72,3 +74,31 @@ ELF-X86-64-NEXT:     ]
 ELF-X86-64-NEXT:     Alignment: 8
 ELF-X86-64-NEXT:   }
 ELF-X86-64-NEXT: ]
+
+ELF-MIPS:      ProgramHeaders [
+ELF-MIPS-NEXT:   ProgramHeader {
+ELF-MIPS-NEXT:     Type: PT_MIPS_REGINFO (0x70000000)
+ELF-MIPS-NEXT:     Offset: 0x74
+ELF-MIPS-NEXT:     VirtualAddress: 0x400074
+ELF-MIPS-NEXT:     PhysicalAddress: 0x400074
+ELF-MIPS-NEXT:     FileSize: 24
+ELF-MIPS-NEXT:     MemSize: 24
+ELF-MIPS-NEXT:     Flags [ (0x4)
+ELF-MIPS-NEXT:       PF_R (0x4)
+ELF-MIPS-NEXT:     ]
+ELF-MIPS-NEXT:     Alignment: 4
+ELF-MIPS-NEXT:   }
+ELF-MIPS-NEXT:   ProgramHeader {
+ELF-MIPS-NEXT:     Type: PT_LOAD (0x1)
+ELF-MIPS-NEXT:     Offset: 0x0
+ELF-MIPS-NEXT:     VirtualAddress: 0x400000
+ELF-MIPS-NEXT:     PhysicalAddress: 0x400000
+ELF-MIPS-NEXT:     FileSize: 160
+ELF-MIPS-NEXT:     MemSize: 160
+ELF-MIPS-NEXT:     Flags [ (0x5)
+ELF-MIPS-NEXT:       PF_R (0x4)
+ELF-MIPS-NEXT:       PF_X (0x1)
+ELF-MIPS-NEXT:     ]
+ELF-MIPS-NEXT:     Alignment: 65536
+ELF-MIPS-NEXT:   }
+ELF-MIPS-NEXT: ]
diff --git a/test/tools/llvm-readobj/rpath.test b/test/tools/llvm-readobj/rpath.test
new file mode 100644
index 0000000..600938e
--- /dev/null
+++ b/test/tools/llvm-readobj/rpath.test
@@ -0,0 +1,4 @@
+RUN: llvm-readobj -dynamic-table %p/Inputs/rpath.exe.elf-x86_64 \
+RUN:   | FileCheck %s
+
+CHECK: 0x000000000000000F RPATH /usr/local/lib